├── data
    ├── l_weights
    │   └── .gitignore
    └── test_imgs
    │   ├── sample_input
    │       └── .gitignore
    │   └── sample_output
    │       └── .gitignore
├── requirements.txt
├── .gitignore
├── model
    ├── image_handler.py
    ├── loss.py
    ├── wmts_handler.py
    ├── iiif_handler.py
    ├── save_localheight_original_txt_fastzk.py
    ├── predict_annotations.py
    └── mymodel.py
├── Dockerfile
└── README.md


/data/l_weights/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/data/test_imgs/sample_input/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/data/test_imgs/sample_output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | xmltodict
2 | requests
3 | shapely
4 | rasterio


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .idea
3 | model/debug.py
4 | model/debug.txt


--------------------------------------------------------------------------------
/model/image_handler.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import rasterio
 4 | 
 5 | 
 6 | class ImageHandler:
 7 |     def __init__(self):
 8 |         self.img = None
 9 |         print("ImageHandler")
10 | 
11 |     def process_img(self, args):
12 |         print(f"ImageHandler: {args}")
13 | 
14 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM zekunli/zekun-keras-gpu
 2 | 
 3 | WORKDIR = /map-kurator
 4 | 
 5 | # Install GDAL for Rasterio
 6 | RUN add-apt-repository -y ppa:ubuntugis/ppa \
 7 |  && apt-get update -y \
 8 |  && apt-get install -y python-numpy gdal-bin libgdal-dev
 9 | 
10 | COPY requirements.txt requirements.txt
11 | 
12 | RUN pip3 install -r requirements.txt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # map-kurator
  2 | Wrapper around Zekun's model to detect map text labels. 
  3 | 
  4 | [**UPDATE 2022/10**]: A more comprehensive pipeline that performs detection, recogntion, img-to-geocoordinate conversion and postOCR is available here: https://github.com/knowledge-computing/mapkurator-system
  5 | 
  6 | 
  7 | ## Installation
  8 | ### 1. Installing Docker
  9 | If the machine doesn't have Docker installed, you can follow instructions (for e.g., Ubuntu) here: https://docs.docker.com/engine/install/ubuntu/
 10 | 
 11 | In particular, here are the commands I ran to install Docker on Azure VM: 
 12 | ```shell
 13 | # 1. Install prerequisites 
 14 | sudo apt-get update
 15 | 
 16 | sudo apt-get install -y \
 17 |     apt-transport-https \
 18 |     ca-certificates \
 19 |     curl \
 20 |     gnupg \
 21 |     lsb-release
 22 | 
 23 | 
 24 | # 2. Add Docker’s official GPG key:
 25 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
 26 | 
 27 | # 3. Set up repo
 28 | echo \
 29 |   "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
 30 |   $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
 31 |   
 32 | # 4. Install Docker
 33 | sudo apt-get update
 34 | sudo apt-get install -y docker-ce docker-ce-cli containerd.io
 35 | 
 36 | # 5. Verify that everything works
 37 | sudo docker run hello-world
 38 | 
 39 | # 6. Add mrm user to docker's group to allow running without sudo
 40 | usermod -a -G docker mrm
 41 | ```
 42 | 
 43 | ### 2. Download map-kurator
 44 | 
 45 | 1. Clone this repository: 
 46 | ```
 47 | git clone https://github.com/machines-reading-maps/map-kurator.git
 48 | ```
 49 | 2. `cd map-kurator/`
 50 | 
 51 | 3. Build docker image, if you haven't already. 
 52 | ```shell
 53 | docker build -t map-kurator .
 54 | ```
 55 | This command should build the image from `Dockerfile` file in the current directory (`.`) and name the image `map-kurator`
 56 | 
 57 | 4. **IMPORTANT** make sure the file with the model weights is available:
 58 | ```shell
 59 | ls -lah data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 
 60 | #> -rwxrwxr-x 1 danf danf 183M Jul  5 18:48 data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5
 61 | ```
 62 | This file is over the size limit to be stored on github, hence you need to download it from [here](https://drive.google.com/file/d/1PW_wPZO54Cr5wPk44Uf8g5_gEN7UGReA/view?usp=sharing) and put it under `data/l_weights` folder.
 63 | 
 64 | If you are trying to run map-kurator locally and you have access to the Turing VM (and the VM is running), you can download it to your machine:
 65 | ```shell
 66 | scp {USER}@{VM_HOST}:~/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 
 67 | 
 68 | ```
 69 | 
 70 | ## Usage
 71 | 
 72 | ### Input
 73 | 
 74 | #### WMTS
 75 | 
 76 | ```shell
 77 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm  --workdir=/map-kurator map-kurator python model/predict_annotations.py wmts --url='https://wmts.maptiler.com/aHR0cDovL3dtdHMubWFwdGlsZXIuY29tL2FIUjBjSE02THk5dFlYQnpaWEpwWlhNdGRHbHNaWE5sZEhNdWN6TXVZVzFoZW05dVlYZHpMbU52YlM4eU5WOXBibU5vTDNsdmNtdHphR2x5WlM5dFpYUmhaR0YwWVM1cWMyOXUvanNvbg/wmts' --boundary='{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-1.1248,53.9711],[-1.0592,53.9711],[-1.0592,53.9569],[-1.1248,53.9569],[-1.1248,53.9711]]]}}' --zoom=16 --dst=data/test_imgs/sample_output/ --filename=sample_filename
 78 | ```
 79 | 
 80 | For WMTS, you can also choose to return the predicted polygons in the EPSG4326 coordinate system (lat, lng) by adding `--coord epsg4326` at the end of the above command.
 81 | 
 82 | #### IIIF
 83 | 
 84 | ```shell
 85 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py iiif --url='https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json' --dst=data/test_imgs/sample_output/ --filename=sample_filename
 86 | ```
 87 | 
 88 | #### Regular File
 89 | ```shell
 90 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py file --src={PATH_TO_INPUT_FILE} --dst=data/test_imgs/sample_output/ --filename=sample_filename
 91 | ```
 92 | 
 93 | ### Output
 94 | 
 95 | Assuming output directory is `--dst=$OUT_DIR` and (optional) `--filename=my_filename`, if either of the above commands ran successfully, `$OUT_DIR` will have the following files:
 96 | 
 97 | - `my_filename_stitched.jpg`: image that was passed to the model
 98 |   
 99 | - `my_filename_predictions.jpg`: text regions detected by the model
100 |   
101 | - `my_filename_annotations.json`: detected text region outlines represented as polygons (using [Web Annotation](https://www.w3.org/TR/annotation-model/) format)
102 | 
103 | If `--filename` is not provided, it will be generated automatically as a unique `uuid4()`
104 | 


--------------------------------------------------------------------------------
/model/loss.py:
--------------------------------------------------------------------------------
 1 | from keras import backend as K    
 2 | 
 3 | # https://gist.github.com/wassname/ce364fddfc8a025bfab4348cf5de852d
 4 | def weighted_categorical_crossentropy(weights):
 5 |     """
 6 |     A weighted version of keras.objectives.categorical_crossentropy
 7 |     
 8 |     Variables:
 9 |         weights: numpy array of shape (C,) where C is the number of classes
10 |     
11 |     Usage:
12 |         weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x.
13 |         loss = weighted_categorical_crossentropy(weights)
14 |         model.compile(loss=loss,optimizer='adam')
15 |     """
16 |     
17 |     weights = K.variable(weights)
18 |         
19 |     def loss(y_true, y_pred):
20 |         # scale predictions so that the class probas of each sample sum to 1
21 |         y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
22 |         # clip to prevent NaN's and Inf's
23 |         y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
24 |         # calc
25 |         loss = y_true * K.log(y_pred) * weights
26 |         loss = -K.sum(loss, -1)
27 |         return loss
28 |     
29 |     return loss
30 | 
31 | def mean_squared_error_mask(y_true, y_pred):
32 |     y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos)
33 |     y_mask = K.expand_dims(y_mask, axis = -1)
34 |     #print y_true.shape, y_pred.shape
35 |     y_true = y_true[:,:,:,1:]
36 |     #y_pred = y_pred[:,:,:,1:]
37 |     return K.sum(K.square((y_pred - y_true)*y_mask),  axis= -1) # mse at each pixel location
38 | 
39 | def mean_absolute_error_mask(y_true, y_pred):                                                                                                                                                               
40 |     y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos)                                                                                                                                                 
41 |     y_mask = K.expand_dims(y_mask, axis = -1)                                                                                                                                                               
42 |     #print y_true.shape, y_pred.shape                                                                                                                                                                       
43 |     y_true = y_true[:,:,:,1:]                                                                                                                                                                               
44 |                                                                                                                                                                                                             
45 |     return K.sum(K.abs((y_pred - y_true)*y_mask), axis=-1)                                                                                                                                                  
46 |                                                                                                                                                                                                             
47 |                                                                                                                                                                                                             
48 | def mean_absolute_percentage_error_mask(y_true, y_pred):                                                                                                                                                    
49 |     y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos)                                                                                                                                                 
50 |     y_mask = K.expand_dims(y_mask, axis = -1)                                                                                                                                                               
51 |     #print y_true.shape, y_pred.shape                                                                                                                                                                       
52 |     y_true = y_true[:,:,:,1:]                                                                                                                                                                               
53 |                                                                                                                                                                                                             
54 |     diff = K.abs(((y_true - y_pred))*y_mask / K.clip(K.abs(y_true * y_mask),                                                                                                                                
55 |                                             K.epsilon(),                                                                                                                                                    
56 |                                             None))                                                                                                                                                          
57 |     return 100. * K.sum(diff, axis=-1)             
58 |     


--------------------------------------------------------------------------------
/model/wmts_handler.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import xmltodict
  3 | import json
  4 | import shapely.geometry
  5 | import math
  6 | import cv2
  7 | import numpy as np
  8 | import os
  9 | 
 10 | 
 11 | class WMTSHandler:
 12 |     def __init__(self, url, bounds, zoom, output_dir, img_filename):
 13 |         self.url = url
 14 |         self.tile_info = {}
 15 |         self.bounds = json.loads(bounds)
 16 |         self.zoom = zoom
 17 |         self.output_dir = output_dir
 18 |         self.img_filename = img_filename
 19 | 
 20 |     def _tile_idxs_in_poly(self, poly: shapely.geometry.Polygon):
 21 |         min_lon, min_lat, max_lon, max_lat = poly.bounds
 22 |         (min_x, max_y), (max_x, min_y) = self._latlon2tile(min_lat, min_lon), self._latlon2tile(max_lat, max_lon)
 23 | 
 24 |         tile_idxs = []
 25 | 
 26 |         for x in range(int(min_x), int(max_x) + 1):
 27 |             for y in range(int(min_y), int(max_y) + 1):
 28 |                 nw_pt = self._tile2latlon(x, y)[::-1]  # poly is defined in geojson form
 29 |                 ne_pt = self._tile2latlon(x + 1, y)[::-1]  # poly is defined in geojson form
 30 |                 sw_pt = self._tile2latlon(x, y + 1)[::-1]  # poly is defined in geojson form
 31 |                 se_pt = self._tile2latlon(x + 1, y + 1)[::-1]  # poly is defined in geojson form
 32 | 
 33 |                 bbox = shapely.geometry.Polygon([nw_pt, ne_pt, sw_pt, se_pt])
 34 | 
 35 |                 # print(f"{x}-{y}; {nw_pt} {ne_pt} {sw_pt} {se_pt}")
 36 |                 # if any(map(lambda pt: shapely.geometry.Point(pt).within(poly), (nw_pt, ne_pt, sw_pt, se_pt))):
 37 |                 if poly.intersects(bbox):
 38 |                     tile_idxs.append((x, y))
 39 | 
 40 |         return tile_idxs, int(max_x + 1) - int(min_x), int(max_y + 1) - int(min_y), int(min_x), int(min_y)
 41 | 
 42 |     def _generate_tile_info(self, tile_idxs, min_x, min_y, url_template):
 43 |         zoom_level = str(self.zoom)
 44 |         tile_info = {
 45 |             'zoom_level': zoom_level,
 46 |             'tile_idxs': {}
 47 |         }
 48 | 
 49 |         for (x, y) in tile_idxs:
 50 |             # tile_col = str(x)
 51 |             # tile_row = str(y)
 52 | 
 53 |             url = url_template.replace('{TileMatrix}', zoom_level).replace('{TileCol}', str(x)).replace('{TileRow}',
 54 |                                                                                                         str(y))
 55 |             tile_info['tile_idxs'][(x - min_x, y - min_y)] = {'url': url}
 56 | 
 57 |         return tile_info
 58 | 
 59 |     def process_wmts(self):
 60 |         # print(args)
 61 |         # zoom_level = 18 # ~45min to download and predict; similar results to zoom=16; stitched png ~100Mb
 62 |         # zoom_level = 16 # ~2 min to download to predict; decent results; stitched png ~7Mb
 63 |         # zoom_level = 14 # too small for the model to detect text
 64 | 
 65 |         r = requests.get(self.url)
 66 |         # print(r.status_code)
 67 |         # print(str(r.headers))
 68 |         # print(json.dumps(xmltodict.parse(r.content)))
 69 |         response_dict = xmltodict.parse(r.content)
 70 |         wmts_capabilities = response_dict['Capabilities']
 71 |         # print(list(wmts_capabilities.keys()))
 72 |         url_template = wmts_capabilities['Contents']['Layer']['ResourceURL']['@template']
 73 | 
 74 |         poly = shapely.geometry.shape(self.bounds['geometry'])
 75 | 
 76 |         tile_idxs, num_tiles_w, num_tiles_h, min_x, min_y = self._tile_idxs_in_poly(poly)
 77 | 
 78 |         # print(f"num_tiles: {len(tile_idxs)}")
 79 |         tile_info = self._generate_tile_info(tile_idxs, min_x, min_y, url_template)
 80 |         tile_info['num_tiles_w'] = num_tiles_w
 81 |         tile_info['num_tiles_h'] = num_tiles_h
 82 |         tile_info['min_x'] = min_x 
 83 |         tile_info['min_y'] = min_y
 84 | 
 85 |         tile_info = self._download_tiles(tile_info)
 86 | 
 87 |         map_path = self._generate_img(tile_info)
 88 | 
 89 |         # update self.tile_info
 90 |         self.tile_info = tile_info
 91 | 
 92 |         return map_path
 93 | 
 94 |     def _download_tiles(self, tile_info):
 95 | 
 96 |         for tile_idx in list(tile_info['tile_idxs'].keys()):
 97 |             url = tile_info['tile_idxs'][tile_idx]['url']
 98 | 
 99 |             print(f"downloading for key {str(tile_idx)} - {url}")
100 | 
101 |             resp = requests.get(url)
102 |             img = np.asarray(bytearray(resp.content), dtype=np.uint8)
103 |             img = cv2.imdecode(img, cv2.IMREAD_COLOR)
104 | 
105 |             tile_info['tile_idxs'][tile_idx]['img'] = img
106 | 
107 |         # return the images
108 |         return tile_info
109 | 
110 |     def _generate_img(self, tile_info):
111 |         num_tiles_w = tile_info['num_tiles_w']
112 |         num_tiles_h = tile_info['num_tiles_h']
113 | 
114 |         shift_size = 256
115 | 
116 |         enlarged_width = int(shift_size * num_tiles_w)
117 |         enlarged_height = int(shift_size * num_tiles_h)
118 | 
119 |         # paste the original map to the enlarged map
120 |         enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.uint8)
121 | 
122 |         # process tile by tile
123 |         for idx in range(0, max(1,num_tiles_w)):
124 |             # paste the predicted probabilty maps to the output image
125 |             for jdx in range(0, max(1,num_tiles_h)):
126 |                 img = tile_info['tile_idxs'][(idx, jdx)]['img']
127 |                 enlarged_map[jdx * shift_size:(jdx + 1) * shift_size, idx * shift_size:(idx + 1) * shift_size, :] = img
128 | 
129 |         map_path = os.path.join(self.output_dir, self.img_filename)
130 | 
131 |         cv2.imwrite(map_path, enlarged_map)
132 |         return map_path
133 | 
134 |     def _stitch_tiles(self):
135 |         # needs input path with (cached) image tiles
136 |         # needs output path
137 |         return True
138 | 
139 |     # from OSM Slippy Tile definitions & https://github.com/Caged/tile-stitch
140 |     def _latlon2tile(self, lat, lon):
141 |         lat_radians = lat * math.pi / 180.0
142 |         n = 1 << self.zoom
143 |         return (
144 |             n * ((lon + 180.0) / 360.0),
145 |             n * (1 - (math.log(math.tan(lat_radians) + 1 / math.cos(lat_radians)) / math.pi)) / 2.0
146 |         )
147 | 
148 |     # from OSM Slippy Tile definitions & https://github.com/Caged/tile-stitch
149 |     def _tile2latlon(self, x, y):
150 |         n = 1 << self.zoom
151 |         lat_radians = math.atan(math.sinh(math.pi * (1.0 - 2.0 * y / n)))
152 |         lat = lat_radians * 180 / math.pi
153 |         lon = 360 * x / n - 180.0
154 |         return (lat, lon)
155 | 
156 |     def _tile2latlon_list(self, x_list, y_list):
157 |         n = 1 << self.zoom
158 |         x_list, y_list = np.array(x_list), np.array(y_list)
159 |         lat_radians_list = np.arctan(np.sinh(np.pi * (1.0 - 2.0 * y_list / n)))
160 |         lat_list = lat_radians_list * 180 / math.pi
161 |         lon_list = 360 * x_list / n - 180.0
162 |         return (lat_list, lon_list)
163 | 


--------------------------------------------------------------------------------
/model/iiif_handler.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import cv2
  3 | import numpy as np
  4 | import json
  5 | import pprint as pp
  6 | import math
  7 | from urllib.parse import urlparse, unquote
  8 | import os
  9 | import uuid
 10 | 
 11 | 
 12 | class IIIFHandler:
 13 |     def __init__(self, manifest_url, output_dir, img_filename):
 14 |         self.tile_info = {'tile_idxs': {}, 'num_tiles_w': 0, 'num_tiles_h': 0}
 15 |         # self.url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/12288,8192,4096,3242/512,/0/default.jpg"
 16 |         # self.url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/full/max/0/default.jpg"
 17 |         self.tile_width = None
 18 |         self.tile_height = None
 19 |         self.img_width = None
 20 |         self.img_height = None
 21 |         self.url_prefix = None
 22 |         # self.manifest_url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json"
 23 |         self.manifest_url = manifest_url
 24 |         self.output_dir = output_dir
 25 |         self.rotation = 0
 26 |         self.tile_size = "full"
 27 |         self.quality = "default"
 28 |         self.img_format = "jpg"
 29 |         self.img_filename = img_filename
 30 | 
 31 |     def process_url(self):
 32 |         r = requests.get(self.manifest_url)
 33 |         # print(r.status_code)
 34 |         # print(str(r.headers))
 35 | 
 36 |         response_dict = r.json()
 37 |         print(json.dumps(response_dict, indent=2))
 38 | 
 39 |         self.url_prefix = response_dict['@id']
 40 |         # self.img_filename = unquote(urlparse(self.url_prefix).path).split("/")[-1]
 41 | 
 42 |         self.img_width = response_dict['width']
 43 |         self.img_height = response_dict['height']
 44 | 
 45 | 
 46 |         if response_dict['profile'] is not None:
 47 |             profile_list = response_dict['profile']
 48 |             if type(profile_list) == list and len(profile_list) > 1:
 49 |                 profile_info = profile_list[1]
 50 |                 if 'qualities' in profile_info:
 51 |                     if 'native' in profile_info['qualities']:
 52 |                         self.quality = 'native'
 53 |                         print('set to native')
 54 | 
 55 |         if response_dict['tiles'] is not None:
 56 |             #assert response_dict['tiles'][0]['width'] == response_dict['tiles'][0]['height']
 57 |             #tile_size = response_dict['tiles'][0]['width']
 58 |             #self.tile_size = str(tile_size) + ','
 59 | 
 60 |             tile_info = response_dict['tiles'][0]
 61 |             self.tile_width = tile_info['width']
 62 |             # hack for sanborn maps
 63 |             if 'height' in tile_info:
 64 |                 self.tile_height = tile_info['height']
 65 |             else:
 66 |                 self.tile_height = tile_info['width']
 67 | 
 68 | 
 69 |             assert self.tile_height == self.tile_width
 70 | 
 71 |             # hack for david rumsey maps
 72 |             try:
 73 |                 # probe once to decide the url format
 74 |                 probe_bbox_str = ",".join([str(0), str(0), str(self.tile_width), str(self.tile_height)]) 
 75 |                 probe_url = self.url_prefix + f"/{probe_bbox_str}/{self.tile_size}/{self.rotation}/{self.quality}.{self.img_format}"
 76 |                 probe_resp = requests.get(probe_url)
 77 |                 probe_img = np.asarray(bytearray(probe_resp.content), dtype=np.uint8)
 78 |                 _,_,_ = prob_img.shape # DO NOT delete this line. This line would cause an error and trigger the execption branch if url format is incorrect
 79 |             except:
 80 |                 
 81 |                 self.tile_size = str(self.tile_height) + ','
 82 | 
 83 | 
 84 |             self._generate_tile_info()
 85 |             # pp.pprint(self.tile_info)
 86 |             self._download_tiles()
 87 |             map_path = self._generate_img()
 88 |             return map_path
 89 | 
 90 | 
 91 | 
 92 |     # generate a list of unique urls for each tile to download the entire image in pieces
 93 |     # https://iiif.io/api/image/2.1/#appendices
 94 |     def _generate_tile_info(self):
 95 |         row_idx = 0
 96 |         col_idx = 0
 97 | 
 98 |         max_col_idx = math.ceil(self.img_width / self.tile_width)
 99 |         max_row_idx = math.ceil(self.img_height / self.tile_height)
100 | 
101 |         current_region_x = col_idx * self.tile_width
102 |         current_region_w = self.tile_width
103 |         current_region_y = row_idx * self.tile_height
104 |         current_region_h = self.tile_height
105 | 
106 |         while col_idx < max_col_idx:
107 |             row_idx = 0 # always start outer loop from new row
108 |             current_region_x = col_idx * self.tile_width
109 |             current_region_w = self.tile_width
110 |             if current_region_x + current_region_w > self.img_width:
111 |                 current_region_w = self.img_width - current_region_x
112 | 
113 |             while row_idx < max_row_idx:
114 |                 current_region_y = row_idx * self.tile_height
115 |                 current_region_h = self.tile_height
116 | 
117 |                 if current_region_y + current_region_h > self.img_height:
118 |                     current_region_h = self.img_height - current_region_y
119 | 
120 |                 url = self._generate_url(current_region_x, current_region_y, current_region_w, current_region_h)
121 |                 self.tile_info['tile_idxs'][(col_idx, row_idx)] = {'url': url}
122 | 
123 |                 row_idx += 1
124 | 
125 |             col_idx += 1
126 | 
127 |         url = self._generate_url(current_region_x, current_region_y, current_region_w, current_region_h)
128 |         self.tile_info['tile_idxs'][(col_idx, row_idx)] = {'url': url}
129 | 
130 |         self.tile_info['num_tiles_w'] = max_col_idx
131 |         self.tile_info['num_tiles_h'] = max_row_idx
132 | 
133 |     def _download_tiles(self):
134 | 
135 |         for tile_idx in list(self.tile_info['tile_idxs'].keys()):
136 |             url = self.tile_info['tile_idxs'][tile_idx]['url']
137 | 
138 |             print(f"downloading for key {str(tile_idx)} - {url}")
139 | 
140 |             resp = requests.get(url)
141 |             #print(url)
142 |             img = np.asarray(bytearray(resp.content), dtype=np.uint8)
143 | 
144 |             if img.shape[0] == 0: # empty image
145 |                 continue 
146 |         
147 |             try:
148 |                 img = cv2.imdecode(img, cv2.IMREAD_COLOR)
149 |                 img_height, img_width, img_depth = img.shape
150 |                 print(img.shape)
151 | 
152 |             except:
153 |                 print('Tile might be empty, skipped', url)
154 |                 #exit(-1)
155 | 
156 |             try:
157 |                 # Pad width and height to multiples of self.tile_width and self.tile_height
158 |                 d_height = self.tile_height - img_height
159 |                 d_width = self.tile_width - img_width
160 |                 top = 0
161 |                 bottom = d_height
162 |                 left = 0
163 |                 right = d_width
164 | 
165 |                 img = cv2.copyMakeBorder(img.copy(), top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
166 | 
167 |                 self.tile_info['tile_idxs'][tile_idx]['img'] = img
168 |             except:
169 |                 print('Error making border, skipped', url)
170 | 
171 | 
172 |     def _generate_img(self):
173 |         num_tiles_w = self.tile_info['num_tiles_w']
174 |         num_tiles_h = self.tile_info['num_tiles_h']
175 | 
176 |         enlarged_width = int(self.tile_width * num_tiles_w)
177 |         enlarged_height = int(self.tile_height * num_tiles_h)
178 |         print(f"ntw, nth: {num_tiles_h}, {num_tiles_w}")
179 |         print(f"ew, eh: {enlarged_width}, {enlarged_height}")
180 | 
181 |         # print("BLAGALHAGLAHGA:")
182 |         # print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}")
183 |         # paste the original map to the enlarged map
184 |         enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.uint8)
185 | 
186 |         # process tile by tile
187 |         for idx in range(0, num_tiles_w):
188 |             # paste the predicted probabilty maps to the output image
189 |             for jdx in range(0, num_tiles_h):
190 |                 if 'img' not in self.tile_info['tile_idxs'][(idx, jdx)]:
191 |                     continue 
192 | 
193 |                 img = self.tile_info['tile_idxs'][(idx, jdx)]['img']
194 |         
195 |                 # print(f"img shape for ({idx}, {jdx}) - {img.shape}")
196 |                 enlarged_map[jdx * self.tile_width:(jdx + 1) * self.tile_width, idx * self.tile_height:(idx + 1) * self.tile_height, :] = img
197 | 
198 |         map_path = os.path.join(self.output_dir, self.img_filename)
199 |         cv2.imwrite(map_path, enlarged_map)
200 | 
201 |         return map_path
202 | 
203 |     def _generate_url(self, x, y, w, h):
204 | 
205 |         bbox_str = ",".join([str(x), str(y), str(w), str(h)])
206 |         return_url = self.url_prefix + f"/{bbox_str}/{self.tile_size}/{self.rotation}/{self.quality}.{self.img_format}"
207 |         #print(return_url)
208 |         return return_url
209 | 
210 | 


--------------------------------------------------------------------------------
/model/save_localheight_original_txt_fastzk.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import glob
  3 | import json
  4 | 
  5 | import cv2
  6 | import math
  7 | import numpy as np
  8 | import os
  9 | 
 10 | os.environ['KERAS_BACKEND'] = 'tensorflow'
 11 | import sys
 12 | import tensorflow as tf
 13 | 
 14 | print(tf.__file__)
 15 | print(tf.__version__)
 16 | 
 17 | # gpus = tf.config.list_physical_devices('GPU')
 18 | # if gpus:
 19 | #   # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
 20 | #   try:
 21 | #     tf.config.experimental.set_virtual_device_configuration(
 22 | #         gpus[0],
 23 | #         [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
 24 | #     logical_gpus = tf.config.experimental.list_logical_devices('GPU')
 25 | #     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
 26 | #   except RuntimeError as e:
 27 | #     # Virtual devices must be set before GPUs have been initialized
 28 | #     print(e)
 29 | 
 30 | 
 31 | os.environ['CUDA_VISIBLE_DEVICES'] = ""
 32 | import keras
 33 | from keras.models import load_model
 34 | from keras.models import Sequential
 35 | from keras.layers import Dense, Dropout, Flatten, Activation
 36 | from keras.layers import Conv2D, MaxPooling2D
 37 | from keras import backend as K
 38 | from keras.callbacks import Callback
 39 | from keras.layers import Lambda, Input, Dense, Concatenate, Conv2DTranspose
 40 | from keras.layers import LeakyReLU, BatchNormalization, AveragePooling2D, Reshape
 41 | from keras.layers import UpSampling2D, ZeroPadding2D
 42 | from keras.losses import mse, binary_crossentropy
 43 | from keras.models import Model
 44 | from keras.layers import Lambda, TimeDistributed
 45 | from keras import layers
 46 | 
 47 | import numpy as np
 48 | import cv2
 49 | import argparse
 50 | import glob
 51 | 
 52 | 
 53 | from loss import weighted_categorical_crossentropy, mean_squared_error_mask
 54 | from loss import mean_absolute_error_mask, mean_absolute_percentage_error_mask
 55 | from mymodel import model_U_VGG_Centerline_Localheight
 56 | 
 57 | 
 58 | map_images = glob.glob('./data/test_imgs/sample_input/101201496_h10w3.jpg')
 59 | # map_images = glob.glob('./data/*.png')
 60 | 
 61 | # print(globals()['map_images'])
 62 | 
 63 | print("-----")
 64 | print(locals()['map_images'])
 65 | 
 66 | output_path = './data/test_imgs/sample_output/'
 67 | 
 68 | saved_weights = './data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5'
 69 | model = model_U_VGG_Centerline_Localheight()
 70 | model.load_weights(saved_weights)
 71 | 
 72 | if not os.path.isdir(output_path):
 73 |     os.makedirs(output_path)
 74 | 
 75 | 
 76 | shift_size = 512
 77 | 
 78 | for map_path in map_images:
 79 | 
 80 |     base_name = os.path.basename(map_path)
 81 | 
 82 |     txt_name = output_path + base_name[0:len(base_name) - 4] + '.txt'
 83 | 
 84 |     f = open(txt_name, 'w+')
 85 | 
 86 |     print(map_path)
 87 | 
 88 |     map_img = cv2.imread(map_path)
 89 | 
 90 |     width = map_img.shape[1]  # dimension2
 91 |     height = map_img.shape[0]  # dimension1
 92 | 
 93 |     in_map_img = map_img / 255.
 94 |     
 95 |     # pad the image to the size divisible by shift-size
 96 |     num_tiles_w = int(np.ceil(1. * width/shift_size))
 97 |     num_tiles_h = int(np.ceil(1. * height/shift_size))
 98 |     enlarged_width = int(shift_size * num_tiles_w)
 99 |     enlarged_height = int(shift_size * num_tiles_h)
100 |     print("BLAGALHAGLAHGA:")
101 |     print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}")
102 |     # paste the original map to the enlarged map
103 |     enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.float32)
104 |     enlarged_map[0:height, 0:width, :] = in_map_img
105 |     
106 |     # define the output probability maps
107 |     localheight_map_o = np.zeros((enlarged_height, enlarged_width, 1), np.float32)
108 |     center_map_o = np.zeros((enlarged_height, enlarged_width, 2), np.float32)
109 |     prob_map_o = np.zeros((enlarged_height, enlarged_width, 3), np.float32)
110 |     
111 |     # process tile by tile
112 |     for idx in range(0, num_tiles_h):
113 |         # pack several tiles in a batch and feed the batch to the model
114 |         test_batch = []
115 |         for jdx in range(0, num_tiles_w):
116 |             img_clip = enlarged_map[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :]
117 |             test_batch.append(img_clip)
118 |         test_batch = np.array(test_batch).astype(np.float32)
119 |         
120 |         # use the pretrained model to predict
121 |         batch_out = model.predict(test_batch)
122 |         
123 |         # get predictions
124 |         prob_map_batch = batch_out[0]
125 |         center_map_batch = batch_out[1]
126 |         localheight_map_batch = batch_out[2]
127 |         
128 |         # paste the predicted probabilty maps to the output image
129 |         for jdx in range(0, num_tiles_w):
130 |             localheight_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = localheight_map_batch[jdx]
131 |             center_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = center_map_batch[jdx]
132 |             prob_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = prob_map_batch[jdx]
133 |     
134 |     
135 |     # convert from 0-1? to 0-255 range
136 |     prob_map_o = (prob_map_o * 255).astype(np.uint8)
137 |     center_map_o = (center_map_o[:, :, 1] * 255).astype(np.uint8)
138 |     #localheight_map = (localheight_map_o * 255).astype(np.uint8)
139 |     
140 |     prob_map_o = prob_map_o[0:height, 0:width, :]
141 |     center_map_o = center_map_o[0:height, 0:width]
142 |     localheight_map_o = localheight_map_o[0:height, 0:width, :]
143 |     
144 | 
145 | 
146 |     num_c, connected_map = cv2.connectedComponents(center_map_o)
147 |     print('num_c:', num_c)
148 |     
149 |     # process component by component
150 |     for cur_cc_idx in range(1, num_c):  # index_0 is the background
151 |         
152 |         if cur_cc_idx % 100 == 0:
153 |             print('processed', str(cur_cc_idx))
154 |             
155 |         centerline_indices = np.where(connected_map == cur_cc_idx)
156 | 
157 |         centerPoints=[]
158 |         for i, j in zip(centerline_indices[0], centerline_indices[1]):
159 |             if localheight_map_o[i, j, 0] > 0:
160 |                 centerPoints.append([i, j])
161 | 
162 |         if len(centerPoints) == 0:
163 |             continue 
164 | 
165 |         mini, minj = np.min(centerPoints, axis=0)
166 |         maxi, maxj = np.max(centerPoints, axis=0)
167 | 
168 |         localheight_result_o = np.zeros((maxi-mini+100, maxj-minj+100, 3), np.uint8)
169 | 
170 |         for i, j in centerPoints:
171 |             cv2.circle(localheight_result_o, (j-minj+50, i-mini+50), int(localheight_map_o[i][j]*0.4), (0, 0, 255), -1)
172 | 
173 |         img_gray = cv2.cvtColor(localheight_result_o, cv2.COLOR_BGR2GRAY)
174 | 
175 |         contours, hierarchy = cv2.findContours(img_gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
176 | 
177 |         new_context = ''
178 | 
179 |         if len(contours) == 0:
180 |             continue
181 | 
182 |         for i in range(0, len(contours[0])):
183 |             if i < len(contours[0]) - 1:
184 |                 new_context = new_context + str(contours[0][i][0][0].item()+minj-50) + ',' + str(contours[0][i][0][1].item()+mini-50) + ','
185 |             else:
186 |                 new_context = new_context + str(contours[0][i][0][0].item()+minj-50) + ',' + str(contours[0][i][0][1].item()+mini-50)
187 | 
188 |         new_context = new_context + '\n'
189 | 
190 |         f.writelines(new_context)
191 | 
192 |     cv2.imwrite(output_path+'prob_' + base_name[0:len(base_name) - 4] + '.jpg', prob_map_o)
193 |     cv2.imwrite(output_path+'cent_' + base_name[0:len(base_name) - 4] + '.jpg', center_map_o)
194 |     cv2.imwrite(output_path+'localheight_map_' + base_name[0:len(base_name) - 4] + '.jpg', localheight_map_o)
195 | 
196 |     f.close()
197 | 
198 | 
199 |     #txt parse
200 |     with open(txt_name, 'r') as f:
201 |         data = f.readlines()
202 | 
203 |     polyList = []
204 | 
205 |     for line in data:
206 |         polyStr = line.split(',')
207 |         poly = []
208 |         for i in range(0, len(polyStr)):
209 |             if i % 2 == 0:
210 |                 poly.append([int(polyStr[i]), int(polyStr[i+1])])
211 | 
212 |         polyList.append(poly)
213 | 
214 | 
215 |     for i in range(0,len(polyList)):
216 |         polyPoints = np.array([polyList[i]], dtype=np.int32)
217 |         cv2.polylines(map_img, polyPoints, True, (0, 0, 255), 3)
218 | 
219 | 
220 |     cv2.imwrite(output_path+'parse_result_'+base_name[0:len(base_name) - 4] + '.jpg',map_img)
221 | 
222 | 
223 |     # Generate web annotations: https://www.w3.org/TR/annotation-model/
224 |     annotations = []
225 |     for polygon in polyList:
226 |         svg_polygon_coords = ' '.join([f"{x},{y}" for x, y in polygon])
227 |         annotation = {
228 |             "@context": "http://www.w3.org/ns/anno.jsonld",
229 |             "id": "",
230 |             "body": [{
231 |                 "type": "TextualBody",
232 |                 "purpose": "tagging",
233 |                 "value": "null"
234 |             }],
235 |             "target": {
236 |                 "selector": [{
237 |                     "type": "SvgSelector",
238 |                     "value": f"<svg><polygon points='{svg_polygon_coords}'></polygon></svg>"
239 |                 }]
240 |             }
241 |         }
242 |         annotations.append(annotation)
243 | 
244 |     with open(output_path+'web_annotations'+base_name[0:len(base_name) - 4] + '.json', 'w') as f:
245 |         f.write(json.dumps(annotations, indent=2))
246 |     # print(f"{polyList}")
247 | 
248 | print('done processing')
249 | 
250 | 
251 | 


--------------------------------------------------------------------------------
/model/predict_annotations.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import uuid
  4 | 
  5 | from wmts_handler import WMTSHandler
  6 | from image_handler import ImageHandler
  7 | from iiif_handler import IIIFHandler
  8 | from mymodel import model_U_VGG_Centerline_Localheight
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | import json
 13 | from shapely.geometry import Polygon
 14 | 
 15 | os.environ['KERAS_BACKEND'] = 'tensorflow'
 16 | os.environ['CUDA_VISIBLE_DEVICES'] = ""
 17 | 
 18 | import sys
 19 | import tensorflow as tf
 20 | 
 21 | import time
 22 | 
 23 | print(tf.__file__)
 24 | print(tf.__version__)
 25 | 
 26 | # basically copy-pasted from the original implementation in save_localheight_original_txt_fastzk.py
 27 | def run_model(map_id, map_path, output_dir):
 28 |     saved_weights = './data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5'
 29 |     model = model_U_VGG_Centerline_Localheight()
 30 |     model.load_weights(saved_weights)
 31 | 
 32 |     map_img = cv2.imread(map_path)
 33 |     #print(map_path)
 34 |     shift_size = 512
 35 | 
 36 |     base_name = os.path.basename(map_path)
 37 | 
 38 |     width = map_img.shape[1]  # dimension2
 39 |     height = map_img.shape[0]  # dimension1
 40 | 
 41 |     in_map_img = map_img / 255.
 42 | 
 43 |     # pad the image to the size divisible by shift-size
 44 |     num_tiles_w = int(np.ceil(1. * width / shift_size))
 45 |     num_tiles_h = int(np.ceil(1. * height / shift_size))
 46 |     enlarged_width = int(shift_size * num_tiles_w)
 47 |     enlarged_height = int(shift_size * num_tiles_h)
 48 |     # print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}")
 49 |     # paste the original map to the enlarged map
 50 |     enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.float32)
 51 |     enlarged_map[0:height, 0:width, :] = in_map_img
 52 | 
 53 |     # define the output probability maps
 54 |     localheight_map_o = np.zeros((enlarged_height, enlarged_width, 1), np.float32)
 55 |     center_map_o = np.zeros((enlarged_height, enlarged_width, 2), np.float32)
 56 |     prob_map_o = np.zeros((enlarged_height, enlarged_width, 3), np.float32)
 57 | 
 58 |     # process tile by tile
 59 |     for idx in range(0, num_tiles_h):
 60 |         # pack several tiles in a batch and feed the batch to the model
 61 |         test_batch = []
 62 |         for jdx in range(0, num_tiles_w):
 63 |             img_clip = enlarged_map[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :]
 64 |             test_batch.append(img_clip)
 65 |         test_batch = np.array(test_batch).astype(np.float32)
 66 | 
 67 |         # use the pretrained model to predict
 68 |         batch_out = model.predict(test_batch)
 69 | 
 70 |         # get predictions
 71 |         prob_map_batch = batch_out[0]
 72 |         center_map_batch = batch_out[1]
 73 |         localheight_map_batch = batch_out[2]
 74 | 
 75 |         # paste the predicted probabilty maps to the output image
 76 |         for jdx in range(0, num_tiles_w):
 77 |             localheight_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \
 78 |             localheight_map_batch[jdx]
 79 |             center_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \
 80 |             center_map_batch[jdx]
 81 |             prob_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \
 82 |             prob_map_batch[jdx]
 83 | 
 84 |     # convert from 0-1? to 0-255 range
 85 |     prob_map_o = (prob_map_o * 255).astype(np.uint8)
 86 |     center_map_o = (center_map_o[:, :, 1] * 255).astype(np.uint8)
 87 |     # localheight_map = (localheight_map_o * 255).astype(np.uint8)
 88 | 
 89 |     prob_map_o = prob_map_o[0:height, 0:width, :]
 90 |     center_map_o = center_map_o[0:height, 0:width]
 91 |     localheight_map_o = localheight_map_o[0:height, 0:width, :]
 92 | 
 93 |     num_c, connected_map = cv2.connectedComponents(center_map_o)
 94 |     print('num_c:', num_c)
 95 | 
 96 |     poly_list = []
 97 |     # process component by component
 98 |     for cur_cc_idx in range(1, num_c):  # index_0 is the background
 99 | 
100 |         if cur_cc_idx % 100 == 0:
101 |             print('processed', str(cur_cc_idx))
102 | 
103 |         centerline_indices = np.where(connected_map == cur_cc_idx)
104 | 
105 |         centerPoints = []
106 |         for i, j in zip(centerline_indices[0], centerline_indices[1]):
107 |             if localheight_map_o[i, j, 0] > 0:
108 |                 centerPoints.append([i, j])
109 | 
110 |         if len(centerPoints) == 0:
111 |             continue
112 | 
113 |         mini, minj = np.min(centerPoints, axis=0)
114 |         maxi, maxj = np.max(centerPoints, axis=0)
115 | 
116 |         localheight_result_o = np.zeros((maxi - mini + 100, maxj - minj + 100, 3), np.uint8)
117 | 
118 |         for i, j in centerPoints:
119 |             cv2.circle(localheight_result_o, (j - minj + 50, i - mini + 50), int(localheight_map_o[i][j] * 0.5),
120 |                        (0, 0, 255), -1)
121 | 
122 |         img_gray = cv2.cvtColor(localheight_result_o, cv2.COLOR_BGR2GRAY)
123 | 
124 |         contours, hierarchy = cv2.findContours(img_gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
125 | 
126 |         new_context = ''
127 | 
128 |         if len(contours) == 0:
129 |             continue
130 | 
131 |         for i in range(0, len(contours[0])):
132 |             if i < len(contours[0]) - 1:
133 |                 new_context = new_context + str(contours[0][i][0][0].item() + minj - 50) + ',' + str(
134 |                     contours[0][i][0][1].item() + mini - 50) + ','
135 |             else:
136 |                 new_context = new_context + str(contours[0][i][0][0].item() + minj - 50) + ',' + str(
137 |                     contours[0][i][0][1].item() + mini - 50)
138 | 
139 |         # new_context = new_context + '\n'
140 |         poly_str = new_context.split(',')
141 |         poly = []
142 |         for i in range(0, len(poly_str)):
143 |             if i % 2 == 0:
144 |                 poly.append((int(poly_str[i]), int(poly_str[i + 1])))
145 | 
146 |         try:
147 |             simple_poly = Polygon(poly).simplify(tolerance = 5, preserve_topology=False).exterior.coords[:] # tolerance is a hyper-param. Larger tolerance leads to fewer points
148 |             #poly_list.append(poly)
149 |             poly_list.append(simple_poly)
150 |         except:
151 |             poly_list.append(poly)
152 | 
153 |         # cv2.imwrite(output_path + 'prob_' + base_name[0:len(base_name) - 4] + '.jpg', prob_map_o)
154 |         # cv2.imwrite(output_path + 'cent_' + base_name[0:len(base_name) - 4] + '.jpg', center_map_o)
155 |         # cv2.imwrite(output_path + 'localheight_map_' + base_name[0:len(base_name) - 4] + '.jpg', localheight_map_o)
156 | 
157 | 
158 |     for i in range(0,len(poly_list)):
159 |         poly_points = np.array([poly_list[i]], dtype=np.int32)
160 |         cv2.polylines(map_img, poly_points, True, (0, 0, 255), 3)
161 | 
162 |     predictions_file = os.path.join(output_dir, map_id + '_predictions.jpg')
163 |     cv2.imwrite(predictions_file, map_img)
164 | 
165 | 
166 |     return poly_list
167 | 
168 | def write_annotation(map_id, output_dir, poly_list, handler = None):
169 | 
170 | 
171 |     if handler is not None: 
172 |         # perform this operation for WMTS tiles only
173 |         # based on the tile info, convert from image coordinate system to EPSG：4326
174 |         # assumes that the tilesize = 256x256
175 | 
176 |         tile_info = handler.tile_info
177 | 
178 |         min_tile_x = tile_info['min_x']
179 |         min_tile_y = tile_info['min_y']
180 | 
181 |         latlon_poly_list = []
182 |         for polygon in poly_list:
183 |             
184 |             if np.array(polygon).shape[0] == 0:
185 |                 continue
186 |                 
187 |             # process each polygon 
188 |             poly_x_list , poly_y_list = np.array(polygon)[:,0], np.array(polygon)[:,1] 
189 | 
190 |             # get corresponding tile index in the current map, i.e. tile shift range from min_tile_x ,min_tile_y
191 |             temp_tile_x_list, temp_tile_y_list = np.floor(poly_x_list/ 256.),  np.floor(poly_y_list/256.)
192 | 
193 |             # compute the starting tile idx that the polygon point lies in
194 |             tile_x_list, tile_y_list = min_tile_x + temp_tile_x_list , min_tile_y + temp_tile_y_list
195 | 
196 |             # get polygon point pixel location in its current tile
197 |             remainder_x_list, remainder_y_list = poly_x_list/256. - temp_tile_x_list , poly_y_list/256. - temp_tile_y_list
198 | 
199 |             # final position in EPSG:3857? 
200 |             tile_x_list, tile_y_list = tile_x_list + remainder_x_list, tile_y_list + remainder_y_list  
201 | 
202 |             # convert to EPSG:4326
203 |             lat_list, lon_list = handler._tile2latlon_list(tile_x_list, tile_y_list)
204 | 
205 |             # x=long, y = lat. so need to flip 
206 |             #latlon_poly = [[x,y] for x,y in zip(lon_list, lat_list)]
207 |             latlon_poly = [["{:.6f}".format(x),"{:.6f}".format(y)] for x,y in zip(lon_list, lat_list)]
208 | 
209 | 
210 |             latlon_poly_list.append(latlon_poly)
211 | 
212 |         poly_list = latlon_poly_list
213 |         # reassign latlon_poly_list to poly_list for consistency
214 | 
215 | 
216 |     # Generate web annotations: https://www.w3.org/TR/annotation-model/
217 |     annotations = []
218 |     for polygon in poly_list:
219 |         svg_polygon_coords = ' '.join([f"{x},{y}" for x, y in polygon])
220 |         annotation = {
221 |             "@context": "http://www.w3.org/ns/anno.jsonld",
222 |             "id": "",
223 |             #"body": [{
224 |             #    "type": "TextualBody",
225 |             #    "purpose": "tagging",
226 |             #    "value": "null"
227 |             #}],
228 |             "target": {
229 |                 "selector": [{
230 |                     "type": "SvgSelector",
231 |                     "value": f"<svg><polygon points='{svg_polygon_coords}'></polygon></svg>"
232 |                 }]
233 |             }
234 |         }
235 |         annotations.append(annotation)
236 | 
237 |     annotation_file = os.path.join(output_dir, map_id + '_annotations.json')
238 |     with open(annotation_file, 'w') as f:
239 |         f.write(json.dumps(annotations, indent=2))
240 | 
241 |     return annotation_file
242 |     # print(f"{polyList}")
243 | 
244 | 
245 | if __name__ == "__main__":
246 |     parser = argparse.ArgumentParser()
247 | 
248 |     arg_parser_common = argparse.ArgumentParser(add_help=False)
249 |     arg_parser_common.add_argument('--dst', required=True, type=str, help='path to output annotations file')
250 |     arg_parser_common.add_argument('--filename', required=False, type=str, help='output filename prefix')
251 |     arg_parser_common.add_argument('--coord', default = 'img_coord', required=False, type=str, choices = ['img_coord' ,'epsg4326'], help='return annotation in image coord or EPSG:4326')
252 | 
253 |     # parser.add_argument("input_type", choices=["wmts", "iiif", "tiff", "jpeg", "png"])
254 |     subparsers = parser.add_subparsers(dest='subcommand')
255 | 
256 |     arg_parser_wmts = subparsers.add_parser('wmts', parents=[arg_parser_common],
257 |                                             help='generate annotations for wmts input type')
258 |     arg_parser_wmts.add_argument('--url', required=True, type=str, help='getCapabilities url')
259 |     arg_parser_wmts.add_argument('--boundary', required=True, type=str, help='desired region boundary in GeoJSON')
260 |     arg_parser_wmts.add_argument('--zoom', default=14, type=int, help='desired zoom level')
261 | 
262 |     arg_parser_iiif = subparsers.add_parser('iiif', parents=[arg_parser_common],
263 |                                             help='generate annotations for iiif input type')
264 |     arg_parser_iiif.add_argument('--url', required=True, type=str, help='IIIF manifest url')
265 | 
266 |     arg_parser_raw_input = subparsers.add_parser('file', parents=[arg_parser_common])
267 |     arg_parser_raw_input.add_argument('--src', required=True, type=str, help='path to input image')
268 | 
269 |     args = parser.parse_args()
270 | 
271 |     map_path = None
272 |     output_dir = args.dst
273 | 
274 |     if args.filename is not None:
275 |         img_id = args.filename
276 |     else:
277 |         img_id = str(uuid.uuid4())
278 | 
279 |     if not os.path.isdir(output_dir):
280 |         os.makedirs(output_dir)
281 | 
282 | 
283 |     if args.coord == 'epsg4326':
284 |         assert args.subcommand == 'wmts'
285 | 
286 | 
287 |     if args.subcommand == 'wmts':
288 |         '''
289 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all  --workdir=/map-kurator map-kurator python model/predict_annotations.py wmts --url='https://wmts.maptiler.com/aHR0cDovL3dtdHMubWFwdGlsZXIuY29tL2FIUjBjSE02THk5dFlYQnpaWEpwWlhNdGRHbHNaWE5sZEhNdWN6TXVZVzFoZW05dVlYZHpMbU52YlM4eU5WOXBibU5vTDNsdmNtdHphR2x5WlM5dFpYUmhaR0YwWVM1cWMyOXUvanNvbg/wmts' --boundary='{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-1.1248,53.9711],[-1.0592,53.9711],[-1.0592,53.9569],[-1.1248,53.9569],[-1.1248,53.9711]]]}}' --zoom=16 --dst=data/test_imgs/sample_output/
290 |         '''
291 | 
292 |         wmts_handler = WMTSHandler(url=args.url, bounds=args.boundary, zoom=args.zoom, output_dir=output_dir, img_filename=img_id + '_stitched.jpg')
293 |         map_path = wmts_handler.process_wmts()
294 | 
295 |         poly_list = run_model(img_id, map_path, output_dir)
296 |         if args.coord == 'img_coord':
297 |             annotation_file = write_annotation(img_id, output_dir, poly_list)
298 |         else:
299 |             annotation_file = write_annotation(img_id, output_dir, poly_list, handler = wmts_handler)
300 | 
301 |     if args.subcommand == 'iiif':
302 |         '''
303 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all  --workdir=/map-kurator map-kurator python model/predict_annotations.py iiif --url='https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json' --dst=data/test_imgs/sample_output/
304 |         '''
305 |         start_download = time.time()
306 |         iiif_handler = IIIFHandler(args.url, output_dir, img_filename=img_id + '_stitched.jpg')
307 |         map_path = iiif_handler.process_url()
308 | 
309 |         end_download = time.time()
310 | 
311 |         poly_list = run_model(img_id, map_path, output_dir)
312 |         annotation_file = write_annotation(img_id, output_dir, poly_list)
313 | 
314 |         end_detection = time.time()
315 | 
316 |         print('download time: ', end_download - start_download)
317 |         print('detection time: ', end_detection - end_download)
318 | 
319 | 
320 |     if args.subcommand == 'file':
321 |         '''
322 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all  --workdir=/map-kurator map-kurator python model/predict_annotations.py file --src=data/test_imgs/sample_input/101201496_h10w3.jpg --dst=data/test_imgs/sample_output/
323 |         '''
324 |         map_path = args.src
325 | 
326 |         poly_list = run_model(img_id, map_path, output_dir)
327 |         annotation_file = write_annotation(img_id, output_dir, poly_list)
328 | 
329 | 
330 |     
331 | 
332 |     print("done")
333 |     print(annotation_file)
334 | 


--------------------------------------------------------------------------------
/model/mymodel.py:
--------------------------------------------------------------------------------
  1 | import keras                              
  2 | from keras.models import Sequential  
  3 | from keras.layers import Dense, Dropout, Flatten , Activation
  4 | from keras.layers import Conv2D, MaxPooling2D    
  5 | from keras import backend as K                   
  6 | from keras.callbacks import Callback             
  7 | from keras.layers import Lambda, Input, Dense, Concatenate ,Conv2DTranspose 
  8 | from keras.layers import LeakyReLU,BatchNormalization,AveragePooling2D,Reshape 
  9 | from keras.layers import UpSampling2D,ZeroPadding2D
 10 | from keras.losses import mse, binary_crossentropy                           
 11 | from keras.models import Model        
 12 | from keras.layers import Lambda,TimeDistributed
 13 | from keras import layers
 14 | 
 15 | def UNET(pretrained_weights = None,input_size = (256,256,3)):
 16 |     inputs = Input(input_size)
 17 |     conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv1-1')(inputs)
 18 |     conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv1-2')(conv1)
 19 |     pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
 20 |     
 21 |     conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv2-1')(pool1)
 22 |     conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv2-2')(conv2)
 23 |     pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
 24 |     
 25 |     conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv3-1')(pool2)
 26 |     conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv3-2')(conv3)
 27 |     pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
 28 |     
 29 |     conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv4-1')(pool3)
 30 |     conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv4-2')(conv4)
 31 |     drop4 = Dropout(0.5)(conv4)
 32 |     pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)
 33 | 
 34 |     conv5 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv5-1')(pool4)
 35 |     conv5 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv5-2')(conv5)
 36 |     drop5 = Dropout(0.5)(conv5)
 37 |     pool5 = MaxPooling2D(pool_size=(2, 2))(drop5)
 38 |     
 39 |     conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6-1')(pool5)
 40 |     conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6-2')(conv6)
 41 |     drop6 = Dropout(0.5)(conv6)
 42 |     
 43 |     #conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7-1')(pool6)
 44 |     #conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7-2')(conv7)
 45 |     #drop7 = Dropout(0.5)(conv7)
 46 |     
 47 |     #//////////////////////////////////////////////////////////
 48 | 
 49 |     up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-0')(UpSampling2D(size = (2,2))(conv6))
 50 |     #merge6 = concatenate([drop4,up6], axis = 3)
 51 |     conv6u = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-1')(up6)
 52 |     conv6u = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-2')(conv6u)
 53 | 
 54 |     up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-0')(UpSampling2D(size = (2,2))(conv6u))
 55 |     #merge7 = concatenate([conv3,up7], axis = 3)
 56 |     conv7u = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-1')(up7)
 57 |     conv7u = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-2')(conv7u)
 58 | 
 59 |     up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-0')(UpSampling2D(size = (2,2))(conv7u))
 60 |     #merge8 = concatenate([conv2,up8], axis = 3)
 61 |     conv8u = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-1')(up8)
 62 |     conv8u = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-2')(conv8u)
 63 | 
 64 |     up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-0')(UpSampling2D(size = (2,2))(conv8u))
 65 |     #merge9 = concatenate([conv1,up9], axis = 3)
 66 |     conv9u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-1')(up9)
 67 |     conv9u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-2')(conv9u)
 68 |     
 69 |     
 70 |     up10 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-0')(UpSampling2D(size = (2,2))(conv9u))
 71 |     #merge9 = concatenate([conv1,up9], axis = 3)
 72 |     conv10u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-1')(up10)
 73 |     conv10u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-2')(conv10u)    
 74 |     conv10u = Conv2D(3, 3, activation = 'sigmoid', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-3')(conv10u)
 75 | 
 76 |     model = Model(inputs, conv10u)
 77 | 
 78 |     #model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy'])
 79 | 
 80 |     if(pretrained_weights):
 81 |         model.load_weights(pretrained_weights)
 82 | 
 83 |     return model
 84 | 
 85 | def model_U_VGG():
 86 |     #input_shape = (720, 1280, 3)
 87 |     #input_shape = (512,512,3)
 88 |     input_shape = (None,None,3)
 89 |     inputs = Input(shape=input_shape, name='input') 
 90 | 
 91 | 
 92 |     # Block 1
 93 |     x0 = layers.Conv2D(64, (3, 3),
 94 |                       activation='relu',
 95 |                       padding='same',
 96 |                       name='block1_conv1')(inputs)
 97 |     x0 = layers.Conv2D(64, (3, 3),
 98 |                       activation='relu',
 99 |                       padding='same',
100 |                       name='block1_conv2')(x0)
101 |     x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0)
102 | 
103 |     # Block 2
104 |     x1 = layers.Conv2D(128, (3, 3),
105 |                       activation='relu',
106 |                       padding='same',
107 |                       name='block2_conv1')(x0)
108 |     x1 = layers.Conv2D(128, (3, 3),
109 |                       activation='relu',
110 |                       padding='same',
111 |                       name='block2_conv2')(x1)
112 |     x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1)
113 | 
114 |     # Block 3
115 |     x2 = layers.Conv2D(256, (3, 3),
116 |                       activation='relu',
117 |                       padding='same',
118 |                       name='block3_conv1')(x1)
119 |     x2 = layers.Conv2D(256, (3, 3),
120 |                       activation='relu',
121 |                       padding='same',
122 |                       name='block3_conv2')(x2)
123 |     x2_take = layers.Conv2D(256, (3, 3),
124 |                       activation='relu',
125 |                       padding='same',
126 |                       name='block3_conv3')(x2)
127 |     x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take)
128 | 
129 |     # Block 4
130 |     x3 = layers.Conv2D(512, (3, 3),
131 |                       activation='relu',
132 |                       padding='same',
133 |                       name='block4_conv1')(x2)
134 |     x3 = layers.Conv2D(512, (3, 3),
135 |                       activation='relu',
136 |                       padding='same',
137 |                       name='block4_conv2')(x3)
138 |     x3_take = layers.Conv2D(512, (3, 3),
139 |                       activation='relu',
140 |                       padding='same',
141 |                       name='block4_conv3')(x3)
142 |     x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take)
143 | 
144 |     # Block 5
145 |     x4 = layers.Conv2D(512, (3, 3),
146 |                       activation='relu',
147 |                       padding='same',
148 |                       name='block5_conv1')(x3)
149 |     x4 = layers.Conv2D(512, (3, 3),
150 |                       activation='relu',
151 |                       padding='same',
152 |                       name='block5_conv2')(x4)
153 |     x4_take = layers.Conv2D(512, (3, 3),
154 |                       activation='relu',
155 |                       padding='same',
156 |                       name='block5_conv3')(x4)
157 |     x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take)
158 | 
159 |     #f1 = UpSampling2D((2,2))(x4) 
160 |     #if TASK_4:
161 |     #    f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1)
162 |     f1 = x4_take
163 |     f2 = x3
164 |     h1 = Concatenate()([f2, f1])
165 |     h1 = layers.Conv2D(128, (1, 1),
166 |                       activation='relu',
167 |                       padding='same',
168 |                       name='up1_1')(h1)
169 | 
170 |     h1 = layers.Conv2D(128, (3, 3),
171 |                       activation='relu',
172 |                       padding='same',
173 |                       name='up1_2')(h1)
174 | 
175 | 
176 |     h2 = Concatenate()([x2, UpSampling2D((2,2))(h1)])
177 |     h2 = layers.Conv2D(64, (1,1),
178 |                     activation = 'relu',
179 |                     padding = 'same',
180 |                     name = 'up2_1')(h2)
181 |     h2 = layers.Conv2D(64, (3,3),
182 |                     activation = 'relu',
183 |                     padding = 'same',
184 |                     name = 'up2_2')(h2)
185 | 
186 |     h3 = Concatenate()([x1, UpSampling2D((2,2))(h2)])
187 |     h3 = layers.Conv2D(32, (1,1),
188 |                     activation = 'relu',
189 |                     padding = 'same',
190 |                     name = 'up3_1')(h3)
191 |     h3 = layers.Conv2D(32, (3,3),
192 |                     activation = 'relu',
193 |                     padding = 'same',
194 |                     name = 'up3_2')(h3)
195 | 
196 |     h4 = Concatenate()([x0, UpSampling2D((2,2))(h3)])
197 |     h4 = layers.Conv2D(32, (1,1),
198 |                     activation = 'relu',
199 |                     padding = 'same',
200 |                     name = 'up4_1')(h4)
201 |     h4 = layers.Conv2D(32, (3,3),
202 |                     activation = 'relu',
203 |                     padding = 'same',
204 |                     name = 'up4_2')(h4)
205 | 
206 |     h5 =  Concatenate()([inputs, UpSampling2D((2,2))(h4)])
207 |     h5 = layers.Conv2D(16, (1,1),
208 |                     activation = 'relu',
209 |                     padding = 'same',
210 |                     name = 'up5_1')(h5)
211 |     ################## output for TEXT/NON-TEXT ############
212 |     
213 |     o1 = layers.Conv2D(3, (3,3),
214 |                     activation = 'softmax',
215 |                     padding = 'same',
216 |                     name = 'up5_2')(h5)
217 |     
218 |     ################ Regression ###########################
219 |     b1 = Concatenate(name = 'agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630
220 |     b1 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 
221 |                                 activation = 'relu', name = 'agg_feat-2')(b1) # 64,64,128
222 |    
223 |     #------ xy regression -------
224 |     o2 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same',
225 |                                 activation = 'relu', name = 'regress-1-1')(b1) # 128,128, 32
226 |     o2 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same',
227 |                                 activation = 'relu', name = 'regress-1-2')(o2) # 128,128, 32
228 |     o2 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 
229 |                                 activation = 'relu',name = 'regress-1-3')(o2) # 256,256, 8
230 |     o2 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 
231 |                                 activation = 'relu',name = 'regress-1-4')(o2) # 256,256, 8
232 |     o2 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same',
233 |                                 activation = 'relu', name = 'regress-1-5')(o2) # 512,512, 2
234 |     o2 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same',
235 |                                 activation = 'tanh', name = 'regress-1-6')(o2) # 512,512, 2
236 |     
237 |     #------ wh regression -------
238 |     o4 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same', 
239 |                                 activation = 'relu',name = 'regress-3-1')(b1) # 128,128, 32
240 |     o4 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same', 
241 |                                 activation = 'relu',name = 'regress-3-2')(o4) # 128,128, 32
242 |     o4 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 
243 |                                 activation = 'relu', name = 'regress-3-3')(o4) # 256,256, 8
244 |     o4 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 
245 |                                 activation = 'relu', name = 'regress-3-4')(o4) # 256,256, 8
246 |     o4 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same', 
247 |                                 activation = 'relu', name = 'regress-3-5')(o4) # 256,256, 8
248 |     o4 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same', 
249 |                                 activation = 'sigmoid',name = 'regress-3-6')(o4) # 512,512, 2
250 |     
251 |     # ------ sin/cos regression -------
252 |     b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630
253 |     b2 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 
254 |                                 activation = 'relu', name = 'regress-2-1')(b2) # 128, 128, 128
255 |     o3 = Concatenate()([x2_take, b2 ]) # block3_conv3, agg_feat-3 # 128, 128, (256+128)
256 |     o3 = layers.Conv2DTranspose(32, (3,3),strides = (2,2),padding = 'same',
257 |                                 activation = 'relu', name = 'regress-2-2')(o3) # 256,256, 32
258 |     o3 = layers.Conv2DTranspose(2, (3,3),strides = (2,2),padding = 'same',
259 |                                 activation = 'tanh', name = 'regress-2-3')(o3) # 512,512,2
260 |     
261 | 
262 |     model =  Model(inputs, [o1,o2,o3,o4], name = 'U-VGG-model')
263 |     
264 |     return model
265 | 
266 | 
267 | 
268 | def model_U_VGG_Centerline():
269 |     #input_shape = (720, 1280, 3)
270 |     #input_shape = (512,512,3)
271 |     input_shape = (None,None,3)
272 |     inputs = Input(shape=input_shape, name='input') 
273 | 
274 | 
275 |     # Block 1
276 |     x0 = layers.Conv2D(64, (3, 3),
277 |                       activation='relu',
278 |                       padding='same',
279 |                       name='block1_conv1')(inputs)
280 |     x0 = layers.Conv2D(64, (3, 3),
281 |                       activation='relu',
282 |                       padding='same',
283 |                       name='block1_conv2')(x0)
284 |     x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0)
285 | 
286 |     # Block 2
287 |     x1 = layers.Conv2D(128, (3, 3),
288 |                       activation='relu',
289 |                       padding='same',
290 |                       name='block2_conv1')(x0)
291 |     x1 = layers.Conv2D(128, (3, 3),
292 |                       activation='relu',
293 |                       padding='same',
294 |                       name='block2_conv2')(x1)
295 |     x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1)
296 | 
297 |     # Block 3
298 |     x2 = layers.Conv2D(256, (3, 3),
299 |                       activation='relu',
300 |                       padding='same',
301 |                       name='block3_conv1')(x1)
302 |     x2 = layers.Conv2D(256, (3, 3),
303 |                       activation='relu',
304 |                       padding='same',
305 |                       name='block3_conv2')(x2)
306 |     x2_take = layers.Conv2D(256, (3, 3),
307 |                       activation='relu',
308 |                       padding='same',
309 |                       name='block3_conv3')(x2)
310 |     x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take)
311 | 
312 |     # Block 4
313 |     x3 = layers.Conv2D(512, (3, 3),
314 |                       activation='relu',
315 |                       padding='same',
316 |                       name='block4_conv1')(x2)
317 |     x3 = layers.Conv2D(512, (3, 3),
318 |                       activation='relu',
319 |                       padding='same',
320 |                       name='block4_conv2')(x3)
321 |     x3_take = layers.Conv2D(512, (3, 3),
322 |                       activation='relu',
323 |                       padding='same',
324 |                       name='block4_conv3')(x3)
325 |     x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take)
326 | 
327 |     # Block 5
328 |     x4 = layers.Conv2D(512, (3, 3),
329 |                       activation='relu',
330 |                       padding='same',
331 |                       name='block5_conv1')(x3)
332 |     x4 = layers.Conv2D(512, (3, 3),
333 |                       activation='relu',
334 |                       padding='same',
335 |                       name='block5_conv2')(x4)
336 |     x4_take = layers.Conv2D(512, (3, 3),
337 |                       activation='relu',
338 |                       padding='same',
339 |                       name='block5_conv3')(x4)
340 |     x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take)
341 | 
342 |     #f1 = UpSampling2D((2,2))(x4) 
343 |     #if TASK_4:
344 |     #    f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1)
345 |     f1 = x4_take
346 |     f2 = x3
347 |     h1 = Concatenate()([f2, f1])
348 |     h1 = layers.Conv2D(128, (1, 1),
349 |                       activation='relu',
350 |                       padding='same',
351 |                       name='up1_1')(h1)
352 | 
353 |     h1 = layers.Conv2D(128, (3, 3),
354 |                       activation='relu',
355 |                       padding='same',
356 |                       name='up1_2')(h1)
357 | 
358 | 
359 |     h2 = Concatenate()([x2, UpSampling2D((2,2))(h1)])
360 |     h2 = layers.Conv2D(64, (1,1),
361 |                     activation = 'relu',
362 |                     padding = 'same',
363 |                     name = 'up2_1')(h2)
364 |     h2 = layers.Conv2D(64, (3,3),
365 |                     activation = 'relu',
366 |                     padding = 'same',
367 |                     name = 'up2_2')(h2)
368 | 
369 |     h3 = Concatenate()([x1, UpSampling2D((2,2))(h2)])
370 |     h3 = layers.Conv2D(32, (1,1),
371 |                     activation = 'relu',
372 |                     padding = 'same',
373 |                     name = 'up3_1')(h3)
374 |     h3 = layers.Conv2D(32, (3,3),
375 |                     activation = 'relu',
376 |                     padding = 'same',
377 |                     name = 'up3_2')(h3)
378 | 
379 |     h4_take = Concatenate()([x0, UpSampling2D((2,2))(h3)])
380 | 
381 |     h4 = layers.Conv2D(32, (1,1),
382 |                     activation = 'relu',
383 |                     padding = 'same',
384 |                     name = 'up4_1')(h4_take)
385 |     h4 = layers.Conv2D(32, (3,3),
386 |                     activation = 'relu',
387 |                     padding = 'same',
388 |                     name = 'up4_2')(h4)
389 | 
390 |     h5 =  Concatenate()([inputs, UpSampling2D((2,2))(h4)])
391 |     h5 = layers.Conv2D(16, (1,1),
392 |                     activation = 'relu',
393 |                     padding = 'same',
394 |                     name = 'up5_1')(h5)
395 |     ################## output for TEXT/NON-TEXT ############
396 |     
397 |     o1 = layers.Conv2D(3, (3,3),
398 |                     activation = 'softmax',
399 |                     padding = 'same',
400 |                     name = 'up5_2')(h5)
401 |     ################## output for centerline /other ###########
402 |     h41 = layers.Conv2D(32, (1,1),
403 |                     activation = 'relu',
404 |                     padding = 'same',
405 |                     name = 'up41_1')(h4_take)
406 |     h41 = layers.Conv2D(32, (3,3),
407 |                     activation = 'relu',
408 |                     padding = 'same',
409 |                     name = 'up41_2')(h41)
410 | 
411 |     h51 =  Concatenate()([inputs, UpSampling2D((2,2))(h41)])
412 |     h51 = layers.Conv2D(16, (1,1),
413 |                     activation = 'relu',
414 |                     padding = 'same',
415 |                     name = 'up51_1')(h51)
416 |     
417 |     o11 = layers.Conv2D(2, (3,3),
418 |                     activation = 'softmax',
419 |                     padding = 'same',
420 |                     name = 'up51_2')(h51)
421 |     
422 |     ################ Regression ###########################
423 |     b1 = Concatenate(name = 'agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630
424 |     b1 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 
425 |                                 activation = 'relu', name = 'agg_feat-2')(b1) # 64,64,128
426 |    
427 |     #------ xy regression -------
428 |     o2 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same',
429 |                                 activation = 'relu', name = 'regress-1-1')(b1) # 128,128, 32
430 |     o2 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same',
431 |                                 activation = 'relu', name = 'regress-1-2')(o2) # 128,128, 32
432 |     o2 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 
433 |                                 activation = 'relu',name = 'regress-1-3')(o2) # 256,256, 8
434 |     o2 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 
435 |                                 activation = 'relu',name = 'regress-1-4')(o2) # 256,256, 8
436 |     o2 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same',
437 |                                 activation = 'relu', name = 'regress-1-5')(o2) # 512,512, 2
438 |     o2 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same',
439 |                                 activation = 'tanh', name = 'regress-1-6')(o2) # 512,512, 2
440 |     
441 |     #------ wh regression -------
442 |     o4 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same', 
443 |                                 activation = 'relu',name = 'regress-3-1')(b1) # 128,128, 32
444 |     o4 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same', 
445 |                                 activation = 'relu',name = 'regress-3-2')(o4) # 128,128, 32
446 |     o4 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same', 
447 |                                 activation = 'relu', name = 'regress-3-3')(o4) # 256,256, 8
448 |     o4 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same', 
449 |                                 activation = 'relu', name = 'regress-3-4')(o4) # 256,256, 8
450 |     o4 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same', 
451 |                                 activation = 'relu', name = 'regress-3-5')(o4) # 256,256, 8
452 |     o4 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same', 
453 |                                 activation = 'sigmoid',name = 'regress-3-6')(o4) # 512,512, 2
454 |     
455 |     # ------ sin/cos regression -------
456 |     b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630
457 |     b2 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same', 
458 |                                 activation = 'relu', name = 'regress-2-1')(b2) # 128, 128, 128
459 |     o3 = Concatenate()([x2_take, b2 ]) # block3_conv3, agg_feat-3 # 128, 128, (256+128)
460 |     o3 = layers.Conv2DTranspose(32, (3,3),strides = (2,2),padding = 'same',
461 |                                 activation = 'relu', name = 'regress-2-2')(o3) # 256,256, 32
462 |     o3 = layers.Conv2DTranspose(2, (3,3),strides = (2,2),padding = 'same',
463 |                                 activation = 'tanh', name = 'regress-2-3')(o3) # 512,512,2
464 | 
465 | 
466 | 
467 |     #o1: t/nt, o11:centerline, o2:x,y, o3:sin,cos, o4:bounding box width,height
468 |     model =  Model(inputs, [o1,o11, o2,o3,o4], name = 'U-VGG-model')
469 | 
470 |     
471 |     return model
472 | 
473 | 
474 | def model_U_VGG_Centerline_Localheight():
475 |     # input_shape = (720, 1280, 3)
476 |     # input_shape = (512,512,3)
477 |     input_shape = (None, None, 3)
478 |     inputs = Input(shape=input_shape, name='input')
479 | 
480 |     # Block 1
481 |     x0 = layers.Conv2D(64, (3, 3),
482 |                        activation='relu',
483 |                        padding='same',
484 |                        name='block1_conv1')(inputs)
485 |     x0 = layers.Conv2D(64, (3, 3),
486 |                        activation='relu',
487 |                        padding='same',
488 |                        name='block1_conv2')(x0)
489 |     x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0)
490 | 
491 |     # Block 2
492 |     x1 = layers.Conv2D(128, (3, 3),
493 |                        activation='relu',
494 |                        padding='same',
495 |                        name='block2_conv1')(x0)
496 |     x1 = layers.Conv2D(128, (3, 3),
497 |                        activation='relu',
498 |                        padding='same',
499 |                        name='block2_conv2')(x1)
500 |     x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1)
501 | 
502 |     # Block 3
503 |     x2 = layers.Conv2D(256, (3, 3),
504 |                        activation='relu',
505 |                        padding='same',
506 |                        name='block3_conv1')(x1)
507 |     x2 = layers.Conv2D(256, (3, 3),
508 |                        activation='relu',
509 |                        padding='same',
510 |                        name='block3_conv2')(x2)
511 |     x2_take = layers.Conv2D(256, (3, 3),
512 |                             activation='relu',
513 |                             padding='same',
514 |                             name='block3_conv3')(x2)
515 |     x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take)
516 | 
517 |     # Block 4
518 |     x3 = layers.Conv2D(512, (3, 3),
519 |                        activation='relu',
520 |                        padding='same',
521 |                        name='block4_conv1')(x2)
522 |     x3 = layers.Conv2D(512, (3, 3),
523 |                        activation='relu',
524 |                        padding='same',
525 |                        name='block4_conv2')(x3)
526 |     x3_take = layers.Conv2D(512, (3, 3),
527 |                             activation='relu',
528 |                             padding='same',
529 |                             name='block4_conv3')(x3)
530 |     x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take)
531 | 
532 |     # Block 5
533 |     x4 = layers.Conv2D(512, (3, 3),
534 |                        activation='relu',
535 |                        padding='same',
536 |                        name='block5_conv1')(x3)
537 |     x4 = layers.Conv2D(512, (3, 3),
538 |                        activation='relu',
539 |                        padding='same',
540 |                        name='block5_conv2')(x4)
541 |     x4_take = layers.Conv2D(512, (3, 3),
542 |                             activation='relu',
543 |                             padding='same',
544 |                             name='block5_conv3')(x4)
545 |     x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take)
546 | 
547 |     # f1 = UpSampling2D((2,2))(x4)
548 |     # if TASK_4:
549 |     #    f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1)
550 |     f1 = x4_take
551 |     f2 = x3
552 |     h1 = Concatenate()([f2, f1])
553 |     h1 = layers.Conv2D(128, (1, 1),
554 |                        activation='relu',
555 |                        padding='same',
556 |                        name='up1_1')(h1)
557 | 
558 |     h1 = layers.Conv2D(128, (3, 3),
559 |                        activation='relu',
560 |                        padding='same',
561 |                        name='up1_2')(h1)
562 | 
563 |     h2 = Concatenate()([x2, UpSampling2D((2, 2))(h1)])
564 |     h2 = layers.Conv2D(64, (1, 1),
565 |                        activation='relu',
566 |                        padding='same',
567 |                        name='up2_1')(h2)
568 |     h2 = layers.Conv2D(64, (3, 3),
569 |                        activation='relu',
570 |                        padding='same',
571 |                        name='up2_2')(h2)
572 | 
573 |     h3 = Concatenate()([x1, UpSampling2D((2, 2))(h2)])
574 |     h3 = layers.Conv2D(32, (1, 1),
575 |                        activation='relu',
576 |                        padding='same',
577 |                        name='up3_1')(h3)
578 |     h3 = layers.Conv2D(32, (3, 3),
579 |                        activation='relu',
580 |                        padding='same',
581 |                        name='up3_2')(h3)
582 | 
583 |     h4_take = Concatenate()([x0, UpSampling2D((2, 2))(h3)])
584 | 
585 |     h4 = layers.Conv2D(32, (1, 1),
586 |                        activation='relu',
587 |                        padding='same',
588 |                        name='up4_1')(h4_take)
589 |     h4 = layers.Conv2D(32, (3, 3),
590 |                        activation='relu',
591 |                        padding='same',
592 |                        name='up4_2')(h4)
593 | 
594 |     h5 = Concatenate()([inputs, UpSampling2D((2, 2))(h4)])
595 |     h5 = layers.Conv2D(16, (1, 1),
596 |                        activation='relu',
597 |                        padding='same',
598 |                        name='up5_1')(h5)
599 |     ################## output for TEXT/NON-TEXT ############
600 | 
601 |     o1 = layers.Conv2D(3, (3, 3),
602 |                        activation='softmax',
603 |                        padding='same',
604 |                        name='up5_2')(h5)
605 |     ################## output for centerline /other ###########
606 |     h41 = layers.Conv2D(32, (1, 1),
607 |                         activation='relu',
608 |                         padding='same',
609 |                         name='up41_1')(h4_take)
610 |     h41 = layers.Conv2D(32, (3, 3),
611 |                         activation='relu',
612 |                         padding='same',
613 |                         name='up41_2')(h41)
614 | 
615 |     h51 = Concatenate()([inputs, UpSampling2D((2, 2))(h41)])
616 |     h51 = layers.Conv2D(16, (1, 1),
617 |                         activation='relu',
618 |                         padding='same',
619 |                         name='up51_1')(h51)
620 | 
621 |     o11 = layers.Conv2D(2, (3, 3),
622 |                         activation='softmax',
623 |                         padding='same',
624 |                         name='up51_2')(h51)
625 | 
626 |     ################ Regression ###########################
627 |     b1 = Concatenate(name='agg_feat-1')([x4_take, h1])  # block_conv3, up1_2 # 32,32,630
628 |     b1 = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same',
629 |                                 activation='relu', name='agg_feat-2')(b1)  # 64,64,128
630 | 
631 |     # ------ xy regression -------
632 |     o2 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same',
633 |                                 activation='relu', name='regress-1-1')(b1)  # 128,128, 32
634 |     o2 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same',
635 |                                 activation='relu', name='regress-1-2')(o2)  # 128,128, 32
636 |     o2 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same',
637 |                                 activation='relu', name='regress-1-3')(o2)  # 256,256, 8
638 |     o2 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same',
639 |                                 activation='relu', name='regress-1-4')(o2)  # 256,256, 8
640 |     o2 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same',
641 |                                 activation='relu', name='regress-1-5')(o2)  # 512,512, 2
642 |     o2 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same',
643 |                                 activation='tanh', name='regress-1-6')(o2)  # 512,512, 2
644 | 
645 |     # ------ wh regression -------
646 |     o4 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same',
647 |                                 activation='relu', name='regress-3-1')(b1)  # 128,128, 32
648 |     o4 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same',
649 |                                 activation='relu', name='regress-3-2')(o4)  # 128,128, 32
650 |     o4 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same',
651 |                                 activation='relu', name='regress-3-3')(o4)  # 256,256, 8
652 |     o4 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same',
653 |                                 activation='relu', name='regress-3-4')(o4)  # 256,256, 8
654 |     o4 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same',
655 |                                 activation='relu', name='regress-3-5')(o4)  # 256,256, 8
656 |     o4 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same',
657 |                                 activation='sigmoid', name='regress-3-6')(o4)  # 512,512, 2
658 | 
659 |     # ------ sin/cos regression -------
660 |     b2 = Concatenate()([x3_take, b1])  # block4_conv3, agg_feat-2 # 64,64,630
661 |     b2 = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same',
662 |                                 activation='relu', name='regress-2-1')(b2)  # 128, 128, 128
663 |     o3 = Concatenate()([x2_take, b2])  # block3_conv3, agg_feat-3 # 128, 128, (256+128)
664 |     o3 = layers.Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same',
665 |                                 activation='relu', name='regress-2-2')(o3)  # 256,256, 32
666 |     o3 = layers.Conv2DTranspose(2, (3, 3), strides=(2, 2), padding='same',
667 |                                 activation='tanh', name='regress-2-3')(o3)  # 512,512,2
668 | 
669 |     # ------ local height regression ------
670 |     o5 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same',
671 |                                 activation='relu', name='regress-4-1')(b1)  # 128,128, 32
672 |     o5 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same',
673 |                                 activation='relu', name='regress-4-2')(o5)  # 128,128, 32
674 |     o5 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same',
675 |                                 activation='relu', name='regress-4-3')(o5)  # 256,256, 8
676 |     o5 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same',
677 |                                 activation='relu', name='regress-4-4')(o5)  # 256,256, 8
678 |     o5 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same',
679 |                                 activation='relu', name='regress-4-5')(o5)  # 256,256, 8
680 |     o5 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same',
681 |                                 activation='relu', name='regress-4-6')(o5)  # 512,512, 2
682 |     o5 = layers.Conv2DTranspose(1, (3, 3), strides=(1, 1), padding='same',
683 |                                 activation='relu', name='regress-4-7')(o5)  # 512,512, 1
684 | 
685 |     # o1: t/nt, o11:centerline, o2:x,y, o3:sin,cos, o4:bounding box width,height, o5:localheight
686 |     # model =  Model(inputs, [o1,o11, o2,o3,o4], name = 'U-VGG-model')
687 |     model = Model(inputs, [o1, o11, o5], name='U-VGG-model-Localheight')
688 | 
689 |     return model
690 | 
691 | 


--------------------------------------------------------------------------------