├── data
├── l_weights
│ └── .gitignore
└── test_imgs
│ ├── sample_input
│ └── .gitignore
│ └── sample_output
│ └── .gitignore
├── requirements.txt
├── .gitignore
├── model
├── image_handler.py
├── loss.py
├── wmts_handler.py
├── iiif_handler.py
├── save_localheight_original_txt_fastzk.py
├── predict_annotations.py
└── mymodel.py
├── Dockerfile
└── README.md
/data/l_weights/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/data/test_imgs/sample_input/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/data/test_imgs/sample_output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | xmltodict
2 | requests
3 | shapely
4 | rasterio
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .idea
3 | model/debug.py
4 | model/debug.txt
--------------------------------------------------------------------------------
/model/image_handler.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import rasterio
4 |
5 |
6 | class ImageHandler:
7 | def __init__(self):
8 | self.img = None
9 | print("ImageHandler")
10 |
11 | def process_img(self, args):
12 | print(f"ImageHandler: {args}")
13 |
14 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM zekunli/zekun-keras-gpu
2 |
3 | WORKDIR = /map-kurator
4 |
5 | # Install GDAL for Rasterio
6 | RUN add-apt-repository -y ppa:ubuntugis/ppa \
7 | && apt-get update -y \
8 | && apt-get install -y python-numpy gdal-bin libgdal-dev
9 |
10 | COPY requirements.txt requirements.txt
11 |
12 | RUN pip3 install -r requirements.txt
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # map-kurator
2 | Wrapper around Zekun's model to detect map text labels.
3 |
4 | [**UPDATE 2022/10**]: A more comprehensive pipeline that performs detection, recogntion, img-to-geocoordinate conversion and postOCR is available here: https://github.com/knowledge-computing/mapkurator-system
5 |
6 |
7 | ## Installation
8 | ### 1. Installing Docker
9 | If the machine doesn't have Docker installed, you can follow instructions (for e.g., Ubuntu) here: https://docs.docker.com/engine/install/ubuntu/
10 |
11 | In particular, here are the commands I ran to install Docker on Azure VM:
12 | ```shell
13 | # 1. Install prerequisites
14 | sudo apt-get update
15 |
16 | sudo apt-get install -y \
17 | apt-transport-https \
18 | ca-certificates \
19 | curl \
20 | gnupg \
21 | lsb-release
22 |
23 |
24 | # 2. Add Docker’s official GPG key:
25 | curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
26 |
27 | # 3. Set up repo
28 | echo \
29 | "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
30 | $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
31 |
32 | # 4. Install Docker
33 | sudo apt-get update
34 | sudo apt-get install -y docker-ce docker-ce-cli containerd.io
35 |
36 | # 5. Verify that everything works
37 | sudo docker run hello-world
38 |
39 | # 6. Add mrm user to docker's group to allow running without sudo
40 | usermod -a -G docker mrm
41 | ```
42 |
43 | ### 2. Download map-kurator
44 |
45 | 1. Clone this repository:
46 | ```
47 | git clone https://github.com/machines-reading-maps/map-kurator.git
48 | ```
49 | 2. `cd map-kurator/`
50 |
51 | 3. Build docker image, if you haven't already.
52 | ```shell
53 | docker build -t map-kurator .
54 | ```
55 | This command should build the image from `Dockerfile` file in the current directory (`.`) and name the image `map-kurator`
56 |
57 | 4. **IMPORTANT** make sure the file with the model weights is available:
58 | ```shell
59 | ls -lah data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5
60 | #> -rwxrwxr-x 1 danf danf 183M Jul 5 18:48 data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5
61 | ```
62 | This file is over the size limit to be stored on github, hence you need to download it from [here](https://drive.google.com/file/d/1PW_wPZO54Cr5wPk44Uf8g5_gEN7UGReA/view?usp=sharing) and put it under `data/l_weights` folder.
63 |
64 | If you are trying to run map-kurator locally and you have access to the Turing VM (and the VM is running), you can download it to your machine:
65 | ```shell
66 | scp {USER}@{VM_HOST}:~/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5 data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5
67 |
68 | ```
69 |
70 | ## Usage
71 |
72 | ### Input
73 |
74 | #### WMTS
75 |
76 | ```shell
77 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py wmts --url='https://wmts.maptiler.com/aHR0cDovL3dtdHMubWFwdGlsZXIuY29tL2FIUjBjSE02THk5dFlYQnpaWEpwWlhNdGRHbHNaWE5sZEhNdWN6TXVZVzFoZW05dVlYZHpMbU52YlM4eU5WOXBibU5vTDNsdmNtdHphR2x5WlM5dFpYUmhaR0YwWVM1cWMyOXUvanNvbg/wmts' --boundary='{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-1.1248,53.9711],[-1.0592,53.9711],[-1.0592,53.9569],[-1.1248,53.9569],[-1.1248,53.9711]]]}}' --zoom=16 --dst=data/test_imgs/sample_output/ --filename=sample_filename
78 | ```
79 |
80 | For WMTS, you can also choose to return the predicted polygons in the EPSG4326 coordinate system (lat, lng) by adding `--coord epsg4326` at the end of the above command.
81 |
82 | #### IIIF
83 |
84 | ```shell
85 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py iiif --url='https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json' --dst=data/test_imgs/sample_output/ --filename=sample_filename
86 | ```
87 |
88 | #### Regular File
89 | ```shell
90 | docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --workdir=/map-kurator map-kurator python model/predict_annotations.py file --src={PATH_TO_INPUT_FILE} --dst=data/test_imgs/sample_output/ --filename=sample_filename
91 | ```
92 |
93 | ### Output
94 |
95 | Assuming output directory is `--dst=$OUT_DIR` and (optional) `--filename=my_filename`, if either of the above commands ran successfully, `$OUT_DIR` will have the following files:
96 |
97 | - `my_filename_stitched.jpg`: image that was passed to the model
98 |
99 | - `my_filename_predictions.jpg`: text regions detected by the model
100 |
101 | - `my_filename_annotations.json`: detected text region outlines represented as polygons (using [Web Annotation](https://www.w3.org/TR/annotation-model/) format)
102 |
103 | If `--filename` is not provided, it will be generated automatically as a unique `uuid4()`
104 |
--------------------------------------------------------------------------------
/model/loss.py:
--------------------------------------------------------------------------------
1 | from keras import backend as K
2 |
3 | # https://gist.github.com/wassname/ce364fddfc8a025bfab4348cf5de852d
4 | def weighted_categorical_crossentropy(weights):
5 | """
6 | A weighted version of keras.objectives.categorical_crossentropy
7 |
8 | Variables:
9 | weights: numpy array of shape (C,) where C is the number of classes
10 |
11 | Usage:
12 | weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x.
13 | loss = weighted_categorical_crossentropy(weights)
14 | model.compile(loss=loss,optimizer='adam')
15 | """
16 |
17 | weights = K.variable(weights)
18 |
19 | def loss(y_true, y_pred):
20 | # scale predictions so that the class probas of each sample sum to 1
21 | y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
22 | # clip to prevent NaN's and Inf's
23 | y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
24 | # calc
25 | loss = y_true * K.log(y_pred) * weights
26 | loss = -K.sum(loss, -1)
27 | return loss
28 |
29 | return loss
30 |
31 | def mean_squared_error_mask(y_true, y_pred):
32 | y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos)
33 | y_mask = K.expand_dims(y_mask, axis = -1)
34 | #print y_true.shape, y_pred.shape
35 | y_true = y_true[:,:,:,1:]
36 | #y_pred = y_pred[:,:,:,1:]
37 | return K.sum(K.square((y_pred - y_true)*y_mask), axis= -1) # mse at each pixel location
38 |
39 | def mean_absolute_error_mask(y_true, y_pred):
40 | y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos)
41 | y_mask = K.expand_dims(y_mask, axis = -1)
42 | #print y_true.shape, y_pred.shape
43 | y_true = y_true[:,:,:,1:]
44 |
45 | return K.sum(K.abs((y_pred - y_true)*y_mask), axis=-1)
46 |
47 |
48 | def mean_absolute_percentage_error_mask(y_true, y_pred):
49 | y_mask = y_true[:,:,:,0] #bsize, h, w, 5(m,x,y,sin,cos)
50 | y_mask = K.expand_dims(y_mask, axis = -1)
51 | #print y_true.shape, y_pred.shape
52 | y_true = y_true[:,:,:,1:]
53 |
54 | diff = K.abs(((y_true - y_pred))*y_mask / K.clip(K.abs(y_true * y_mask),
55 | K.epsilon(),
56 | None))
57 | return 100. * K.sum(diff, axis=-1)
58 |
--------------------------------------------------------------------------------
/model/wmts_handler.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import xmltodict
3 | import json
4 | import shapely.geometry
5 | import math
6 | import cv2
7 | import numpy as np
8 | import os
9 |
10 |
11 | class WMTSHandler:
12 | def __init__(self, url, bounds, zoom, output_dir, img_filename):
13 | self.url = url
14 | self.tile_info = {}
15 | self.bounds = json.loads(bounds)
16 | self.zoom = zoom
17 | self.output_dir = output_dir
18 | self.img_filename = img_filename
19 |
20 | def _tile_idxs_in_poly(self, poly: shapely.geometry.Polygon):
21 | min_lon, min_lat, max_lon, max_lat = poly.bounds
22 | (min_x, max_y), (max_x, min_y) = self._latlon2tile(min_lat, min_lon), self._latlon2tile(max_lat, max_lon)
23 |
24 | tile_idxs = []
25 |
26 | for x in range(int(min_x), int(max_x) + 1):
27 | for y in range(int(min_y), int(max_y) + 1):
28 | nw_pt = self._tile2latlon(x, y)[::-1] # poly is defined in geojson form
29 | ne_pt = self._tile2latlon(x + 1, y)[::-1] # poly is defined in geojson form
30 | sw_pt = self._tile2latlon(x, y + 1)[::-1] # poly is defined in geojson form
31 | se_pt = self._tile2latlon(x + 1, y + 1)[::-1] # poly is defined in geojson form
32 |
33 | bbox = shapely.geometry.Polygon([nw_pt, ne_pt, sw_pt, se_pt])
34 |
35 | # print(f"{x}-{y}; {nw_pt} {ne_pt} {sw_pt} {se_pt}")
36 | # if any(map(lambda pt: shapely.geometry.Point(pt).within(poly), (nw_pt, ne_pt, sw_pt, se_pt))):
37 | if poly.intersects(bbox):
38 | tile_idxs.append((x, y))
39 |
40 | return tile_idxs, int(max_x + 1) - int(min_x), int(max_y + 1) - int(min_y), int(min_x), int(min_y)
41 |
42 | def _generate_tile_info(self, tile_idxs, min_x, min_y, url_template):
43 | zoom_level = str(self.zoom)
44 | tile_info = {
45 | 'zoom_level': zoom_level,
46 | 'tile_idxs': {}
47 | }
48 |
49 | for (x, y) in tile_idxs:
50 | # tile_col = str(x)
51 | # tile_row = str(y)
52 |
53 | url = url_template.replace('{TileMatrix}', zoom_level).replace('{TileCol}', str(x)).replace('{TileRow}',
54 | str(y))
55 | tile_info['tile_idxs'][(x - min_x, y - min_y)] = {'url': url}
56 |
57 | return tile_info
58 |
59 | def process_wmts(self):
60 | # print(args)
61 | # zoom_level = 18 # ~45min to download and predict; similar results to zoom=16; stitched png ~100Mb
62 | # zoom_level = 16 # ~2 min to download to predict; decent results; stitched png ~7Mb
63 | # zoom_level = 14 # too small for the model to detect text
64 |
65 | r = requests.get(self.url)
66 | # print(r.status_code)
67 | # print(str(r.headers))
68 | # print(json.dumps(xmltodict.parse(r.content)))
69 | response_dict = xmltodict.parse(r.content)
70 | wmts_capabilities = response_dict['Capabilities']
71 | # print(list(wmts_capabilities.keys()))
72 | url_template = wmts_capabilities['Contents']['Layer']['ResourceURL']['@template']
73 |
74 | poly = shapely.geometry.shape(self.bounds['geometry'])
75 |
76 | tile_idxs, num_tiles_w, num_tiles_h, min_x, min_y = self._tile_idxs_in_poly(poly)
77 |
78 | # print(f"num_tiles: {len(tile_idxs)}")
79 | tile_info = self._generate_tile_info(tile_idxs, min_x, min_y, url_template)
80 | tile_info['num_tiles_w'] = num_tiles_w
81 | tile_info['num_tiles_h'] = num_tiles_h
82 | tile_info['min_x'] = min_x
83 | tile_info['min_y'] = min_y
84 |
85 | tile_info = self._download_tiles(tile_info)
86 |
87 | map_path = self._generate_img(tile_info)
88 |
89 | # update self.tile_info
90 | self.tile_info = tile_info
91 |
92 | return map_path
93 |
94 | def _download_tiles(self, tile_info):
95 |
96 | for tile_idx in list(tile_info['tile_idxs'].keys()):
97 | url = tile_info['tile_idxs'][tile_idx]['url']
98 |
99 | print(f"downloading for key {str(tile_idx)} - {url}")
100 |
101 | resp = requests.get(url)
102 | img = np.asarray(bytearray(resp.content), dtype=np.uint8)
103 | img = cv2.imdecode(img, cv2.IMREAD_COLOR)
104 |
105 | tile_info['tile_idxs'][tile_idx]['img'] = img
106 |
107 | # return the images
108 | return tile_info
109 |
110 | def _generate_img(self, tile_info):
111 | num_tiles_w = tile_info['num_tiles_w']
112 | num_tiles_h = tile_info['num_tiles_h']
113 |
114 | shift_size = 256
115 |
116 | enlarged_width = int(shift_size * num_tiles_w)
117 | enlarged_height = int(shift_size * num_tiles_h)
118 |
119 | # paste the original map to the enlarged map
120 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.uint8)
121 |
122 | # process tile by tile
123 | for idx in range(0, max(1,num_tiles_w)):
124 | # paste the predicted probabilty maps to the output image
125 | for jdx in range(0, max(1,num_tiles_h)):
126 | img = tile_info['tile_idxs'][(idx, jdx)]['img']
127 | enlarged_map[jdx * shift_size:(jdx + 1) * shift_size, idx * shift_size:(idx + 1) * shift_size, :] = img
128 |
129 | map_path = os.path.join(self.output_dir, self.img_filename)
130 |
131 | cv2.imwrite(map_path, enlarged_map)
132 | return map_path
133 |
134 | def _stitch_tiles(self):
135 | # needs input path with (cached) image tiles
136 | # needs output path
137 | return True
138 |
139 | # from OSM Slippy Tile definitions & https://github.com/Caged/tile-stitch
140 | def _latlon2tile(self, lat, lon):
141 | lat_radians = lat * math.pi / 180.0
142 | n = 1 << self.zoom
143 | return (
144 | n * ((lon + 180.0) / 360.0),
145 | n * (1 - (math.log(math.tan(lat_radians) + 1 / math.cos(lat_radians)) / math.pi)) / 2.0
146 | )
147 |
148 | # from OSM Slippy Tile definitions & https://github.com/Caged/tile-stitch
149 | def _tile2latlon(self, x, y):
150 | n = 1 << self.zoom
151 | lat_radians = math.atan(math.sinh(math.pi * (1.0 - 2.0 * y / n)))
152 | lat = lat_radians * 180 / math.pi
153 | lon = 360 * x / n - 180.0
154 | return (lat, lon)
155 |
156 | def _tile2latlon_list(self, x_list, y_list):
157 | n = 1 << self.zoom
158 | x_list, y_list = np.array(x_list), np.array(y_list)
159 | lat_radians_list = np.arctan(np.sinh(np.pi * (1.0 - 2.0 * y_list / n)))
160 | lat_list = lat_radians_list * 180 / math.pi
161 | lon_list = 360 * x_list / n - 180.0
162 | return (lat_list, lon_list)
163 |
--------------------------------------------------------------------------------
/model/iiif_handler.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import cv2
3 | import numpy as np
4 | import json
5 | import pprint as pp
6 | import math
7 | from urllib.parse import urlparse, unquote
8 | import os
9 | import uuid
10 |
11 |
12 | class IIIFHandler:
13 | def __init__(self, manifest_url, output_dir, img_filename):
14 | self.tile_info = {'tile_idxs': {}, 'num_tiles_w': 0, 'num_tiles_h': 0}
15 | # self.url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/12288,8192,4096,3242/512,/0/default.jpg"
16 | # self.url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/full/max/0/default.jpg"
17 | self.tile_width = None
18 | self.tile_height = None
19 | self.img_width = None
20 | self.img_height = None
21 | self.url_prefix = None
22 | # self.manifest_url = "https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json"
23 | self.manifest_url = manifest_url
24 | self.output_dir = output_dir
25 | self.rotation = 0
26 | self.tile_size = "full"
27 | self.quality = "default"
28 | self.img_format = "jpg"
29 | self.img_filename = img_filename
30 |
31 | def process_url(self):
32 | r = requests.get(self.manifest_url)
33 | # print(r.status_code)
34 | # print(str(r.headers))
35 |
36 | response_dict = r.json()
37 | print(json.dumps(response_dict, indent=2))
38 |
39 | self.url_prefix = response_dict['@id']
40 | # self.img_filename = unquote(urlparse(self.url_prefix).path).split("/")[-1]
41 |
42 | self.img_width = response_dict['width']
43 | self.img_height = response_dict['height']
44 |
45 |
46 | if response_dict['profile'] is not None:
47 | profile_list = response_dict['profile']
48 | if type(profile_list) == list and len(profile_list) > 1:
49 | profile_info = profile_list[1]
50 | if 'qualities' in profile_info:
51 | if 'native' in profile_info['qualities']:
52 | self.quality = 'native'
53 | print('set to native')
54 |
55 | if response_dict['tiles'] is not None:
56 | #assert response_dict['tiles'][0]['width'] == response_dict['tiles'][0]['height']
57 | #tile_size = response_dict['tiles'][0]['width']
58 | #self.tile_size = str(tile_size) + ','
59 |
60 | tile_info = response_dict['tiles'][0]
61 | self.tile_width = tile_info['width']
62 | # hack for sanborn maps
63 | if 'height' in tile_info:
64 | self.tile_height = tile_info['height']
65 | else:
66 | self.tile_height = tile_info['width']
67 |
68 |
69 | assert self.tile_height == self.tile_width
70 |
71 | # hack for david rumsey maps
72 | try:
73 | # probe once to decide the url format
74 | probe_bbox_str = ",".join([str(0), str(0), str(self.tile_width), str(self.tile_height)])
75 | probe_url = self.url_prefix + f"/{probe_bbox_str}/{self.tile_size}/{self.rotation}/{self.quality}.{self.img_format}"
76 | probe_resp = requests.get(probe_url)
77 | probe_img = np.asarray(bytearray(probe_resp.content), dtype=np.uint8)
78 | _,_,_ = prob_img.shape # DO NOT delete this line. This line would cause an error and trigger the execption branch if url format is incorrect
79 | except:
80 |
81 | self.tile_size = str(self.tile_height) + ','
82 |
83 |
84 | self._generate_tile_info()
85 | # pp.pprint(self.tile_info)
86 | self._download_tiles()
87 | map_path = self._generate_img()
88 | return map_path
89 |
90 |
91 |
92 | # generate a list of unique urls for each tile to download the entire image in pieces
93 | # https://iiif.io/api/image/2.1/#appendices
94 | def _generate_tile_info(self):
95 | row_idx = 0
96 | col_idx = 0
97 |
98 | max_col_idx = math.ceil(self.img_width / self.tile_width)
99 | max_row_idx = math.ceil(self.img_height / self.tile_height)
100 |
101 | current_region_x = col_idx * self.tile_width
102 | current_region_w = self.tile_width
103 | current_region_y = row_idx * self.tile_height
104 | current_region_h = self.tile_height
105 |
106 | while col_idx < max_col_idx:
107 | row_idx = 0 # always start outer loop from new row
108 | current_region_x = col_idx * self.tile_width
109 | current_region_w = self.tile_width
110 | if current_region_x + current_region_w > self.img_width:
111 | current_region_w = self.img_width - current_region_x
112 |
113 | while row_idx < max_row_idx:
114 | current_region_y = row_idx * self.tile_height
115 | current_region_h = self.tile_height
116 |
117 | if current_region_y + current_region_h > self.img_height:
118 | current_region_h = self.img_height - current_region_y
119 |
120 | url = self._generate_url(current_region_x, current_region_y, current_region_w, current_region_h)
121 | self.tile_info['tile_idxs'][(col_idx, row_idx)] = {'url': url}
122 |
123 | row_idx += 1
124 |
125 | col_idx += 1
126 |
127 | url = self._generate_url(current_region_x, current_region_y, current_region_w, current_region_h)
128 | self.tile_info['tile_idxs'][(col_idx, row_idx)] = {'url': url}
129 |
130 | self.tile_info['num_tiles_w'] = max_col_idx
131 | self.tile_info['num_tiles_h'] = max_row_idx
132 |
133 | def _download_tiles(self):
134 |
135 | for tile_idx in list(self.tile_info['tile_idxs'].keys()):
136 | url = self.tile_info['tile_idxs'][tile_idx]['url']
137 |
138 | print(f"downloading for key {str(tile_idx)} - {url}")
139 |
140 | resp = requests.get(url)
141 | #print(url)
142 | img = np.asarray(bytearray(resp.content), dtype=np.uint8)
143 |
144 | if img.shape[0] == 0: # empty image
145 | continue
146 |
147 | try:
148 | img = cv2.imdecode(img, cv2.IMREAD_COLOR)
149 | img_height, img_width, img_depth = img.shape
150 | print(img.shape)
151 |
152 | except:
153 | print('Tile might be empty, skipped', url)
154 | #exit(-1)
155 |
156 | try:
157 | # Pad width and height to multiples of self.tile_width and self.tile_height
158 | d_height = self.tile_height - img_height
159 | d_width = self.tile_width - img_width
160 | top = 0
161 | bottom = d_height
162 | left = 0
163 | right = d_width
164 |
165 | img = cv2.copyMakeBorder(img.copy(), top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
166 |
167 | self.tile_info['tile_idxs'][tile_idx]['img'] = img
168 | except:
169 | print('Error making border, skipped', url)
170 |
171 |
172 | def _generate_img(self):
173 | num_tiles_w = self.tile_info['num_tiles_w']
174 | num_tiles_h = self.tile_info['num_tiles_h']
175 |
176 | enlarged_width = int(self.tile_width * num_tiles_w)
177 | enlarged_height = int(self.tile_height * num_tiles_h)
178 | print(f"ntw, nth: {num_tiles_h}, {num_tiles_w}")
179 | print(f"ew, eh: {enlarged_width}, {enlarged_height}")
180 |
181 | # print("BLAGALHAGLAHGA:")
182 | # print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}")
183 | # paste the original map to the enlarged map
184 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.uint8)
185 |
186 | # process tile by tile
187 | for idx in range(0, num_tiles_w):
188 | # paste the predicted probabilty maps to the output image
189 | for jdx in range(0, num_tiles_h):
190 | if 'img' not in self.tile_info['tile_idxs'][(idx, jdx)]:
191 | continue
192 |
193 | img = self.tile_info['tile_idxs'][(idx, jdx)]['img']
194 |
195 | # print(f"img shape for ({idx}, {jdx}) - {img.shape}")
196 | enlarged_map[jdx * self.tile_width:(jdx + 1) * self.tile_width, idx * self.tile_height:(idx + 1) * self.tile_height, :] = img
197 |
198 | map_path = os.path.join(self.output_dir, self.img_filename)
199 | cv2.imwrite(map_path, enlarged_map)
200 |
201 | return map_path
202 |
203 | def _generate_url(self, x, y, w, h):
204 |
205 | bbox_str = ",".join([str(x), str(y), str(w), str(h)])
206 | return_url = self.url_prefix + f"/{bbox_str}/{self.tile_size}/{self.rotation}/{self.quality}.{self.img_format}"
207 | #print(return_url)
208 | return return_url
209 |
210 |
--------------------------------------------------------------------------------
/model/save_localheight_original_txt_fastzk.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import glob
3 | import json
4 |
5 | import cv2
6 | import math
7 | import numpy as np
8 | import os
9 |
10 | os.environ['KERAS_BACKEND'] = 'tensorflow'
11 | import sys
12 | import tensorflow as tf
13 |
14 | print(tf.__file__)
15 | print(tf.__version__)
16 |
17 | # gpus = tf.config.list_physical_devices('GPU')
18 | # if gpus:
19 | # # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
20 | # try:
21 | # tf.config.experimental.set_virtual_device_configuration(
22 | # gpus[0],
23 | # [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
24 | # logical_gpus = tf.config.experimental.list_logical_devices('GPU')
25 | # print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
26 | # except RuntimeError as e:
27 | # # Virtual devices must be set before GPUs have been initialized
28 | # print(e)
29 |
30 |
31 | os.environ['CUDA_VISIBLE_DEVICES'] = ""
32 | import keras
33 | from keras.models import load_model
34 | from keras.models import Sequential
35 | from keras.layers import Dense, Dropout, Flatten, Activation
36 | from keras.layers import Conv2D, MaxPooling2D
37 | from keras import backend as K
38 | from keras.callbacks import Callback
39 | from keras.layers import Lambda, Input, Dense, Concatenate, Conv2DTranspose
40 | from keras.layers import LeakyReLU, BatchNormalization, AveragePooling2D, Reshape
41 | from keras.layers import UpSampling2D, ZeroPadding2D
42 | from keras.losses import mse, binary_crossentropy
43 | from keras.models import Model
44 | from keras.layers import Lambda, TimeDistributed
45 | from keras import layers
46 |
47 | import numpy as np
48 | import cv2
49 | import argparse
50 | import glob
51 |
52 |
53 | from loss import weighted_categorical_crossentropy, mean_squared_error_mask
54 | from loss import mean_absolute_error_mask, mean_absolute_percentage_error_mask
55 | from mymodel import model_U_VGG_Centerline_Localheight
56 |
57 |
58 | map_images = glob.glob('./data/test_imgs/sample_input/101201496_h10w3.jpg')
59 | # map_images = glob.glob('./data/*.png')
60 |
61 | # print(globals()['map_images'])
62 |
63 | print("-----")
64 | print(locals()['map_images'])
65 |
66 | output_path = './data/test_imgs/sample_output/'
67 |
68 | saved_weights = './data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5'
69 | model = model_U_VGG_Centerline_Localheight()
70 | model.load_weights(saved_weights)
71 |
72 | if not os.path.isdir(output_path):
73 | os.makedirs(output_path)
74 |
75 |
76 | shift_size = 512
77 |
78 | for map_path in map_images:
79 |
80 | base_name = os.path.basename(map_path)
81 |
82 | txt_name = output_path + base_name[0:len(base_name) - 4] + '.txt'
83 |
84 | f = open(txt_name, 'w+')
85 |
86 | print(map_path)
87 |
88 | map_img = cv2.imread(map_path)
89 |
90 | width = map_img.shape[1] # dimension2
91 | height = map_img.shape[0] # dimension1
92 |
93 | in_map_img = map_img / 255.
94 |
95 | # pad the image to the size divisible by shift-size
96 | num_tiles_w = int(np.ceil(1. * width/shift_size))
97 | num_tiles_h = int(np.ceil(1. * height/shift_size))
98 | enlarged_width = int(shift_size * num_tiles_w)
99 | enlarged_height = int(shift_size * num_tiles_h)
100 | print("BLAGALHAGLAHGA:")
101 | print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}")
102 | # paste the original map to the enlarged map
103 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.float32)
104 | enlarged_map[0:height, 0:width, :] = in_map_img
105 |
106 | # define the output probability maps
107 | localheight_map_o = np.zeros((enlarged_height, enlarged_width, 1), np.float32)
108 | center_map_o = np.zeros((enlarged_height, enlarged_width, 2), np.float32)
109 | prob_map_o = np.zeros((enlarged_height, enlarged_width, 3), np.float32)
110 |
111 | # process tile by tile
112 | for idx in range(0, num_tiles_h):
113 | # pack several tiles in a batch and feed the batch to the model
114 | test_batch = []
115 | for jdx in range(0, num_tiles_w):
116 | img_clip = enlarged_map[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :]
117 | test_batch.append(img_clip)
118 | test_batch = np.array(test_batch).astype(np.float32)
119 |
120 | # use the pretrained model to predict
121 | batch_out = model.predict(test_batch)
122 |
123 | # get predictions
124 | prob_map_batch = batch_out[0]
125 | center_map_batch = batch_out[1]
126 | localheight_map_batch = batch_out[2]
127 |
128 | # paste the predicted probabilty maps to the output image
129 | for jdx in range(0, num_tiles_w):
130 | localheight_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = localheight_map_batch[jdx]
131 | center_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = center_map_batch[jdx]
132 | prob_map_o[idx*shift_size:(idx+1)*shift_size, jdx*shift_size:(jdx+1)*shift_size, :] = prob_map_batch[jdx]
133 |
134 |
135 | # convert from 0-1? to 0-255 range
136 | prob_map_o = (prob_map_o * 255).astype(np.uint8)
137 | center_map_o = (center_map_o[:, :, 1] * 255).astype(np.uint8)
138 | #localheight_map = (localheight_map_o * 255).astype(np.uint8)
139 |
140 | prob_map_o = prob_map_o[0:height, 0:width, :]
141 | center_map_o = center_map_o[0:height, 0:width]
142 | localheight_map_o = localheight_map_o[0:height, 0:width, :]
143 |
144 |
145 |
146 | num_c, connected_map = cv2.connectedComponents(center_map_o)
147 | print('num_c:', num_c)
148 |
149 | # process component by component
150 | for cur_cc_idx in range(1, num_c): # index_0 is the background
151 |
152 | if cur_cc_idx % 100 == 0:
153 | print('processed', str(cur_cc_idx))
154 |
155 | centerline_indices = np.where(connected_map == cur_cc_idx)
156 |
157 | centerPoints=[]
158 | for i, j in zip(centerline_indices[0], centerline_indices[1]):
159 | if localheight_map_o[i, j, 0] > 0:
160 | centerPoints.append([i, j])
161 |
162 | if len(centerPoints) == 0:
163 | continue
164 |
165 | mini, minj = np.min(centerPoints, axis=0)
166 | maxi, maxj = np.max(centerPoints, axis=0)
167 |
168 | localheight_result_o = np.zeros((maxi-mini+100, maxj-minj+100, 3), np.uint8)
169 |
170 | for i, j in centerPoints:
171 | cv2.circle(localheight_result_o, (j-minj+50, i-mini+50), int(localheight_map_o[i][j]*0.4), (0, 0, 255), -1)
172 |
173 | img_gray = cv2.cvtColor(localheight_result_o, cv2.COLOR_BGR2GRAY)
174 |
175 | contours, hierarchy = cv2.findContours(img_gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
176 |
177 | new_context = ''
178 |
179 | if len(contours) == 0:
180 | continue
181 |
182 | for i in range(0, len(contours[0])):
183 | if i < len(contours[0]) - 1:
184 | new_context = new_context + str(contours[0][i][0][0].item()+minj-50) + ',' + str(contours[0][i][0][1].item()+mini-50) + ','
185 | else:
186 | new_context = new_context + str(contours[0][i][0][0].item()+minj-50) + ',' + str(contours[0][i][0][1].item()+mini-50)
187 |
188 | new_context = new_context + '\n'
189 |
190 | f.writelines(new_context)
191 |
192 | cv2.imwrite(output_path+'prob_' + base_name[0:len(base_name) - 4] + '.jpg', prob_map_o)
193 | cv2.imwrite(output_path+'cent_' + base_name[0:len(base_name) - 4] + '.jpg', center_map_o)
194 | cv2.imwrite(output_path+'localheight_map_' + base_name[0:len(base_name) - 4] + '.jpg', localheight_map_o)
195 |
196 | f.close()
197 |
198 |
199 | #txt parse
200 | with open(txt_name, 'r') as f:
201 | data = f.readlines()
202 |
203 | polyList = []
204 |
205 | for line in data:
206 | polyStr = line.split(',')
207 | poly = []
208 | for i in range(0, len(polyStr)):
209 | if i % 2 == 0:
210 | poly.append([int(polyStr[i]), int(polyStr[i+1])])
211 |
212 | polyList.append(poly)
213 |
214 |
215 | for i in range(0,len(polyList)):
216 | polyPoints = np.array([polyList[i]], dtype=np.int32)
217 | cv2.polylines(map_img, polyPoints, True, (0, 0, 255), 3)
218 |
219 |
220 | cv2.imwrite(output_path+'parse_result_'+base_name[0:len(base_name) - 4] + '.jpg',map_img)
221 |
222 |
223 | # Generate web annotations: https://www.w3.org/TR/annotation-model/
224 | annotations = []
225 | for polygon in polyList:
226 | svg_polygon_coords = ' '.join([f"{x},{y}" for x, y in polygon])
227 | annotation = {
228 | "@context": "http://www.w3.org/ns/anno.jsonld",
229 | "id": "",
230 | "body": [{
231 | "type": "TextualBody",
232 | "purpose": "tagging",
233 | "value": "null"
234 | }],
235 | "target": {
236 | "selector": [{
237 | "type": "SvgSelector",
238 | "value": f""
239 | }]
240 | }
241 | }
242 | annotations.append(annotation)
243 |
244 | with open(output_path+'web_annotations'+base_name[0:len(base_name) - 4] + '.json', 'w') as f:
245 | f.write(json.dumps(annotations, indent=2))
246 | # print(f"{polyList}")
247 |
248 | print('done processing')
249 |
250 |
251 |
--------------------------------------------------------------------------------
/model/predict_annotations.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import uuid
4 |
5 | from wmts_handler import WMTSHandler
6 | from image_handler import ImageHandler
7 | from iiif_handler import IIIFHandler
8 | from mymodel import model_U_VGG_Centerline_Localheight
9 |
10 | import cv2
11 | import numpy as np
12 | import json
13 | from shapely.geometry import Polygon
14 |
15 | os.environ['KERAS_BACKEND'] = 'tensorflow'
16 | os.environ['CUDA_VISIBLE_DEVICES'] = ""
17 |
18 | import sys
19 | import tensorflow as tf
20 |
21 | import time
22 |
23 | print(tf.__file__)
24 | print(tf.__version__)
25 |
26 | # basically copy-pasted from the original implementation in save_localheight_original_txt_fastzk.py
27 | def run_model(map_id, map_path, output_dir):
28 | saved_weights = './data/l_weights/finetune_map_model_map_w1e50_bsize8_w1_spe200_ep50.hdf5'
29 | model = model_U_VGG_Centerline_Localheight()
30 | model.load_weights(saved_weights)
31 |
32 | map_img = cv2.imread(map_path)
33 | #print(map_path)
34 | shift_size = 512
35 |
36 | base_name = os.path.basename(map_path)
37 |
38 | width = map_img.shape[1] # dimension2
39 | height = map_img.shape[0] # dimension1
40 |
41 | in_map_img = map_img / 255.
42 |
43 | # pad the image to the size divisible by shift-size
44 | num_tiles_w = int(np.ceil(1. * width / shift_size))
45 | num_tiles_h = int(np.ceil(1. * height / shift_size))
46 | enlarged_width = int(shift_size * num_tiles_w)
47 | enlarged_height = int(shift_size * num_tiles_h)
48 | # print(f"{width}-{num_tiles_w}, {height}-{num_tiles_h}, {enlarged_width}, {enlarged_height}")
49 | # paste the original map to the enlarged map
50 | enlarged_map = np.zeros((enlarged_height, enlarged_width, 3)).astype(np.float32)
51 | enlarged_map[0:height, 0:width, :] = in_map_img
52 |
53 | # define the output probability maps
54 | localheight_map_o = np.zeros((enlarged_height, enlarged_width, 1), np.float32)
55 | center_map_o = np.zeros((enlarged_height, enlarged_width, 2), np.float32)
56 | prob_map_o = np.zeros((enlarged_height, enlarged_width, 3), np.float32)
57 |
58 | # process tile by tile
59 | for idx in range(0, num_tiles_h):
60 | # pack several tiles in a batch and feed the batch to the model
61 | test_batch = []
62 | for jdx in range(0, num_tiles_w):
63 | img_clip = enlarged_map[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :]
64 | test_batch.append(img_clip)
65 | test_batch = np.array(test_batch).astype(np.float32)
66 |
67 | # use the pretrained model to predict
68 | batch_out = model.predict(test_batch)
69 |
70 | # get predictions
71 | prob_map_batch = batch_out[0]
72 | center_map_batch = batch_out[1]
73 | localheight_map_batch = batch_out[2]
74 |
75 | # paste the predicted probabilty maps to the output image
76 | for jdx in range(0, num_tiles_w):
77 | localheight_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \
78 | localheight_map_batch[jdx]
79 | center_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \
80 | center_map_batch[jdx]
81 | prob_map_o[idx * shift_size:(idx + 1) * shift_size, jdx * shift_size:(jdx + 1) * shift_size, :] = \
82 | prob_map_batch[jdx]
83 |
84 | # convert from 0-1? to 0-255 range
85 | prob_map_o = (prob_map_o * 255).astype(np.uint8)
86 | center_map_o = (center_map_o[:, :, 1] * 255).astype(np.uint8)
87 | # localheight_map = (localheight_map_o * 255).astype(np.uint8)
88 |
89 | prob_map_o = prob_map_o[0:height, 0:width, :]
90 | center_map_o = center_map_o[0:height, 0:width]
91 | localheight_map_o = localheight_map_o[0:height, 0:width, :]
92 |
93 | num_c, connected_map = cv2.connectedComponents(center_map_o)
94 | print('num_c:', num_c)
95 |
96 | poly_list = []
97 | # process component by component
98 | for cur_cc_idx in range(1, num_c): # index_0 is the background
99 |
100 | if cur_cc_idx % 100 == 0:
101 | print('processed', str(cur_cc_idx))
102 |
103 | centerline_indices = np.where(connected_map == cur_cc_idx)
104 |
105 | centerPoints = []
106 | for i, j in zip(centerline_indices[0], centerline_indices[1]):
107 | if localheight_map_o[i, j, 0] > 0:
108 | centerPoints.append([i, j])
109 |
110 | if len(centerPoints) == 0:
111 | continue
112 |
113 | mini, minj = np.min(centerPoints, axis=0)
114 | maxi, maxj = np.max(centerPoints, axis=0)
115 |
116 | localheight_result_o = np.zeros((maxi - mini + 100, maxj - minj + 100, 3), np.uint8)
117 |
118 | for i, j in centerPoints:
119 | cv2.circle(localheight_result_o, (j - minj + 50, i - mini + 50), int(localheight_map_o[i][j] * 0.5),
120 | (0, 0, 255), -1)
121 |
122 | img_gray = cv2.cvtColor(localheight_result_o, cv2.COLOR_BGR2GRAY)
123 |
124 | contours, hierarchy = cv2.findContours(img_gray, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
125 |
126 | new_context = ''
127 |
128 | if len(contours) == 0:
129 | continue
130 |
131 | for i in range(0, len(contours[0])):
132 | if i < len(contours[0]) - 1:
133 | new_context = new_context + str(contours[0][i][0][0].item() + minj - 50) + ',' + str(
134 | contours[0][i][0][1].item() + mini - 50) + ','
135 | else:
136 | new_context = new_context + str(contours[0][i][0][0].item() + minj - 50) + ',' + str(
137 | contours[0][i][0][1].item() + mini - 50)
138 |
139 | # new_context = new_context + '\n'
140 | poly_str = new_context.split(',')
141 | poly = []
142 | for i in range(0, len(poly_str)):
143 | if i % 2 == 0:
144 | poly.append((int(poly_str[i]), int(poly_str[i + 1])))
145 |
146 | try:
147 | simple_poly = Polygon(poly).simplify(tolerance = 5, preserve_topology=False).exterior.coords[:] # tolerance is a hyper-param. Larger tolerance leads to fewer points
148 | #poly_list.append(poly)
149 | poly_list.append(simple_poly)
150 | except:
151 | poly_list.append(poly)
152 |
153 | # cv2.imwrite(output_path + 'prob_' + base_name[0:len(base_name) - 4] + '.jpg', prob_map_o)
154 | # cv2.imwrite(output_path + 'cent_' + base_name[0:len(base_name) - 4] + '.jpg', center_map_o)
155 | # cv2.imwrite(output_path + 'localheight_map_' + base_name[0:len(base_name) - 4] + '.jpg', localheight_map_o)
156 |
157 |
158 | for i in range(0,len(poly_list)):
159 | poly_points = np.array([poly_list[i]], dtype=np.int32)
160 | cv2.polylines(map_img, poly_points, True, (0, 0, 255), 3)
161 |
162 | predictions_file = os.path.join(output_dir, map_id + '_predictions.jpg')
163 | cv2.imwrite(predictions_file, map_img)
164 |
165 |
166 | return poly_list
167 |
168 | def write_annotation(map_id, output_dir, poly_list, handler = None):
169 |
170 |
171 | if handler is not None:
172 | # perform this operation for WMTS tiles only
173 | # based on the tile info, convert from image coordinate system to EPSG:4326
174 | # assumes that the tilesize = 256x256
175 |
176 | tile_info = handler.tile_info
177 |
178 | min_tile_x = tile_info['min_x']
179 | min_tile_y = tile_info['min_y']
180 |
181 | latlon_poly_list = []
182 | for polygon in poly_list:
183 |
184 | if np.array(polygon).shape[0] == 0:
185 | continue
186 |
187 | # process each polygon
188 | poly_x_list , poly_y_list = np.array(polygon)[:,0], np.array(polygon)[:,1]
189 |
190 | # get corresponding tile index in the current map, i.e. tile shift range from min_tile_x ,min_tile_y
191 | temp_tile_x_list, temp_tile_y_list = np.floor(poly_x_list/ 256.), np.floor(poly_y_list/256.)
192 |
193 | # compute the starting tile idx that the polygon point lies in
194 | tile_x_list, tile_y_list = min_tile_x + temp_tile_x_list , min_tile_y + temp_tile_y_list
195 |
196 | # get polygon point pixel location in its current tile
197 | remainder_x_list, remainder_y_list = poly_x_list/256. - temp_tile_x_list , poly_y_list/256. - temp_tile_y_list
198 |
199 | # final position in EPSG:3857?
200 | tile_x_list, tile_y_list = tile_x_list + remainder_x_list, tile_y_list + remainder_y_list
201 |
202 | # convert to EPSG:4326
203 | lat_list, lon_list = handler._tile2latlon_list(tile_x_list, tile_y_list)
204 |
205 | # x=long, y = lat. so need to flip
206 | #latlon_poly = [[x,y] for x,y in zip(lon_list, lat_list)]
207 | latlon_poly = [["{:.6f}".format(x),"{:.6f}".format(y)] for x,y in zip(lon_list, lat_list)]
208 |
209 |
210 | latlon_poly_list.append(latlon_poly)
211 |
212 | poly_list = latlon_poly_list
213 | # reassign latlon_poly_list to poly_list for consistency
214 |
215 |
216 | # Generate web annotations: https://www.w3.org/TR/annotation-model/
217 | annotations = []
218 | for polygon in poly_list:
219 | svg_polygon_coords = ' '.join([f"{x},{y}" for x, y in polygon])
220 | annotation = {
221 | "@context": "http://www.w3.org/ns/anno.jsonld",
222 | "id": "",
223 | #"body": [{
224 | # "type": "TextualBody",
225 | # "purpose": "tagging",
226 | # "value": "null"
227 | #}],
228 | "target": {
229 | "selector": [{
230 | "type": "SvgSelector",
231 | "value": f""
232 | }]
233 | }
234 | }
235 | annotations.append(annotation)
236 |
237 | annotation_file = os.path.join(output_dir, map_id + '_annotations.json')
238 | with open(annotation_file, 'w') as f:
239 | f.write(json.dumps(annotations, indent=2))
240 |
241 | return annotation_file
242 | # print(f"{polyList}")
243 |
244 |
245 | if __name__ == "__main__":
246 | parser = argparse.ArgumentParser()
247 |
248 | arg_parser_common = argparse.ArgumentParser(add_help=False)
249 | arg_parser_common.add_argument('--dst', required=True, type=str, help='path to output annotations file')
250 | arg_parser_common.add_argument('--filename', required=False, type=str, help='output filename prefix')
251 | arg_parser_common.add_argument('--coord', default = 'img_coord', required=False, type=str, choices = ['img_coord' ,'epsg4326'], help='return annotation in image coord or EPSG:4326')
252 |
253 | # parser.add_argument("input_type", choices=["wmts", "iiif", "tiff", "jpeg", "png"])
254 | subparsers = parser.add_subparsers(dest='subcommand')
255 |
256 | arg_parser_wmts = subparsers.add_parser('wmts', parents=[arg_parser_common],
257 | help='generate annotations for wmts input type')
258 | arg_parser_wmts.add_argument('--url', required=True, type=str, help='getCapabilities url')
259 | arg_parser_wmts.add_argument('--boundary', required=True, type=str, help='desired region boundary in GeoJSON')
260 | arg_parser_wmts.add_argument('--zoom', default=14, type=int, help='desired zoom level')
261 |
262 | arg_parser_iiif = subparsers.add_parser('iiif', parents=[arg_parser_common],
263 | help='generate annotations for iiif input type')
264 | arg_parser_iiif.add_argument('--url', required=True, type=str, help='IIIF manifest url')
265 |
266 | arg_parser_raw_input = subparsers.add_parser('file', parents=[arg_parser_common])
267 | arg_parser_raw_input.add_argument('--src', required=True, type=str, help='path to input image')
268 |
269 | args = parser.parse_args()
270 |
271 | map_path = None
272 | output_dir = args.dst
273 |
274 | if args.filename is not None:
275 | img_id = args.filename
276 | else:
277 | img_id = str(uuid.uuid4())
278 |
279 | if not os.path.isdir(output_dir):
280 | os.makedirs(output_dir)
281 |
282 |
283 | if args.coord == 'epsg4326':
284 | assert args.subcommand == 'wmts'
285 |
286 |
287 | if args.subcommand == 'wmts':
288 | '''
289 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all --workdir=/map-kurator map-kurator python model/predict_annotations.py wmts --url='https://wmts.maptiler.com/aHR0cDovL3dtdHMubWFwdGlsZXIuY29tL2FIUjBjSE02THk5dFlYQnpaWEpwWlhNdGRHbHNaWE5sZEhNdWN6TXVZVzFoZW05dVlYZHpMbU52YlM4eU5WOXBibU5vTDNsdmNtdHphR2x5WlM5dFpYUmhaR0YwWVM1cWMyOXUvanNvbg/wmts' --boundary='{"type":"Feature","properties":{},"geometry":{"type":"Polygon","coordinates":[[[-1.1248,53.9711],[-1.0592,53.9711],[-1.0592,53.9569],[-1.1248,53.9569],[-1.1248,53.9711]]]}}' --zoom=16 --dst=data/test_imgs/sample_output/
290 | '''
291 |
292 | wmts_handler = WMTSHandler(url=args.url, bounds=args.boundary, zoom=args.zoom, output_dir=output_dir, img_filename=img_id + '_stitched.jpg')
293 | map_path = wmts_handler.process_wmts()
294 |
295 | poly_list = run_model(img_id, map_path, output_dir)
296 | if args.coord == 'img_coord':
297 | annotation_file = write_annotation(img_id, output_dir, poly_list)
298 | else:
299 | annotation_file = write_annotation(img_id, output_dir, poly_list, handler = wmts_handler)
300 |
301 | if args.subcommand == 'iiif':
302 | '''
303 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all --workdir=/map-kurator map-kurator python model/predict_annotations.py iiif --url='https://map-view.nls.uk/iiif/2/12563%2F125635459/info.json' --dst=data/test_imgs/sample_output/
304 | '''
305 | start_download = time.time()
306 | iiif_handler = IIIFHandler(args.url, output_dir, img_filename=img_id + '_stitched.jpg')
307 | map_path = iiif_handler.process_url()
308 |
309 | end_download = time.time()
310 |
311 | poly_list = run_model(img_id, map_path, output_dir)
312 | annotation_file = write_annotation(img_id, output_dir, poly_list)
313 |
314 | end_detection = time.time()
315 |
316 | print('download time: ', end_download - start_download)
317 | print('detection time: ', end_detection - end_download)
318 |
319 |
320 | if args.subcommand == 'file':
321 | '''
322 | time docker run -it -v $(pwd)/data/:/map-kurator/data -v $(pwd)/model:/map-kurator/model --rm --runtime=nvidia --gpus all --workdir=/map-kurator map-kurator python model/predict_annotations.py file --src=data/test_imgs/sample_input/101201496_h10w3.jpg --dst=data/test_imgs/sample_output/
323 | '''
324 | map_path = args.src
325 |
326 | poly_list = run_model(img_id, map_path, output_dir)
327 | annotation_file = write_annotation(img_id, output_dir, poly_list)
328 |
329 |
330 |
331 |
332 | print("done")
333 | print(annotation_file)
334 |
--------------------------------------------------------------------------------
/model/mymodel.py:
--------------------------------------------------------------------------------
1 | import keras
2 | from keras.models import Sequential
3 | from keras.layers import Dense, Dropout, Flatten , Activation
4 | from keras.layers import Conv2D, MaxPooling2D
5 | from keras import backend as K
6 | from keras.callbacks import Callback
7 | from keras.layers import Lambda, Input, Dense, Concatenate ,Conv2DTranspose
8 | from keras.layers import LeakyReLU,BatchNormalization,AveragePooling2D,Reshape
9 | from keras.layers import UpSampling2D,ZeroPadding2D
10 | from keras.losses import mse, binary_crossentropy
11 | from keras.models import Model
12 | from keras.layers import Lambda,TimeDistributed
13 | from keras import layers
14 |
15 | def UNET(pretrained_weights = None,input_size = (256,256,3)):
16 | inputs = Input(input_size)
17 | conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv1-1')(inputs)
18 | conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv1-2')(conv1)
19 | pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
20 |
21 | conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv2-1')(pool1)
22 | conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv2-2')(conv2)
23 | pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
24 |
25 | conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv3-1')(pool2)
26 | conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv3-2')(conv3)
27 | pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
28 |
29 | conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv4-1')(pool3)
30 | conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv4-2')(conv4)
31 | drop4 = Dropout(0.5)(conv4)
32 | pool4 = MaxPooling2D(pool_size=(2, 2))(drop4)
33 |
34 | conv5 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv5-1')(pool4)
35 | conv5 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv5-2')(conv5)
36 | drop5 = Dropout(0.5)(conv5)
37 | pool5 = MaxPooling2D(pool_size=(2, 2))(drop5)
38 |
39 | conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6-1')(pool5)
40 | conv6 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6-2')(conv6)
41 | drop6 = Dropout(0.5)(conv6)
42 |
43 | #conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7-1')(pool6)
44 | #conv7 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7-2')(conv7)
45 | #drop7 = Dropout(0.5)(conv7)
46 |
47 | #//////////////////////////////////////////////////////////
48 |
49 | up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-0')(UpSampling2D(size = (2,2))(conv6))
50 | #merge6 = concatenate([drop4,up6], axis = 3)
51 | conv6u = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-1')(up6)
52 | conv6u = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv6u-2')(conv6u)
53 |
54 | up7 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-0')(UpSampling2D(size = (2,2))(conv6u))
55 | #merge7 = concatenate([conv3,up7], axis = 3)
56 | conv7u = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-1')(up7)
57 | conv7u = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv7u-2')(conv7u)
58 |
59 | up8 = Conv2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-0')(UpSampling2D(size = (2,2))(conv7u))
60 | #merge8 = concatenate([conv2,up8], axis = 3)
61 | conv8u = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-1')(up8)
62 | conv8u = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv8u-2')(conv8u)
63 |
64 | up9 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-0')(UpSampling2D(size = (2,2))(conv8u))
65 | #merge9 = concatenate([conv1,up9], axis = 3)
66 | conv9u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-1')(up9)
67 | conv9u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv9u-2')(conv9u)
68 |
69 |
70 | up10 = Conv2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-0')(UpSampling2D(size = (2,2))(conv9u))
71 | #merge9 = concatenate([conv1,up9], axis = 3)
72 | conv10u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-1')(up10)
73 | conv10u = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-2')(conv10u)
74 | conv10u = Conv2D(3, 3, activation = 'sigmoid', padding = 'same', kernel_initializer = 'he_normal', name = 'conv10u-3')(conv10u)
75 |
76 | model = Model(inputs, conv10u)
77 |
78 | #model.compile(optimizer = Adam(lr = 1e-4), loss = 'binary_crossentropy', metrics = ['accuracy'])
79 |
80 | if(pretrained_weights):
81 | model.load_weights(pretrained_weights)
82 |
83 | return model
84 |
85 | def model_U_VGG():
86 | #input_shape = (720, 1280, 3)
87 | #input_shape = (512,512,3)
88 | input_shape = (None,None,3)
89 | inputs = Input(shape=input_shape, name='input')
90 |
91 |
92 | # Block 1
93 | x0 = layers.Conv2D(64, (3, 3),
94 | activation='relu',
95 | padding='same',
96 | name='block1_conv1')(inputs)
97 | x0 = layers.Conv2D(64, (3, 3),
98 | activation='relu',
99 | padding='same',
100 | name='block1_conv2')(x0)
101 | x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0)
102 |
103 | # Block 2
104 | x1 = layers.Conv2D(128, (3, 3),
105 | activation='relu',
106 | padding='same',
107 | name='block2_conv1')(x0)
108 | x1 = layers.Conv2D(128, (3, 3),
109 | activation='relu',
110 | padding='same',
111 | name='block2_conv2')(x1)
112 | x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1)
113 |
114 | # Block 3
115 | x2 = layers.Conv2D(256, (3, 3),
116 | activation='relu',
117 | padding='same',
118 | name='block3_conv1')(x1)
119 | x2 = layers.Conv2D(256, (3, 3),
120 | activation='relu',
121 | padding='same',
122 | name='block3_conv2')(x2)
123 | x2_take = layers.Conv2D(256, (3, 3),
124 | activation='relu',
125 | padding='same',
126 | name='block3_conv3')(x2)
127 | x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take)
128 |
129 | # Block 4
130 | x3 = layers.Conv2D(512, (3, 3),
131 | activation='relu',
132 | padding='same',
133 | name='block4_conv1')(x2)
134 | x3 = layers.Conv2D(512, (3, 3),
135 | activation='relu',
136 | padding='same',
137 | name='block4_conv2')(x3)
138 | x3_take = layers.Conv2D(512, (3, 3),
139 | activation='relu',
140 | padding='same',
141 | name='block4_conv3')(x3)
142 | x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take)
143 |
144 | # Block 5
145 | x4 = layers.Conv2D(512, (3, 3),
146 | activation='relu',
147 | padding='same',
148 | name='block5_conv1')(x3)
149 | x4 = layers.Conv2D(512, (3, 3),
150 | activation='relu',
151 | padding='same',
152 | name='block5_conv2')(x4)
153 | x4_take = layers.Conv2D(512, (3, 3),
154 | activation='relu',
155 | padding='same',
156 | name='block5_conv3')(x4)
157 | x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take)
158 |
159 | #f1 = UpSampling2D((2,2))(x4)
160 | #if TASK_4:
161 | # f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1)
162 | f1 = x4_take
163 | f2 = x3
164 | h1 = Concatenate()([f2, f1])
165 | h1 = layers.Conv2D(128, (1, 1),
166 | activation='relu',
167 | padding='same',
168 | name='up1_1')(h1)
169 |
170 | h1 = layers.Conv2D(128, (3, 3),
171 | activation='relu',
172 | padding='same',
173 | name='up1_2')(h1)
174 |
175 |
176 | h2 = Concatenate()([x2, UpSampling2D((2,2))(h1)])
177 | h2 = layers.Conv2D(64, (1,1),
178 | activation = 'relu',
179 | padding = 'same',
180 | name = 'up2_1')(h2)
181 | h2 = layers.Conv2D(64, (3,3),
182 | activation = 'relu',
183 | padding = 'same',
184 | name = 'up2_2')(h2)
185 |
186 | h3 = Concatenate()([x1, UpSampling2D((2,2))(h2)])
187 | h3 = layers.Conv2D(32, (1,1),
188 | activation = 'relu',
189 | padding = 'same',
190 | name = 'up3_1')(h3)
191 | h3 = layers.Conv2D(32, (3,3),
192 | activation = 'relu',
193 | padding = 'same',
194 | name = 'up3_2')(h3)
195 |
196 | h4 = Concatenate()([x0, UpSampling2D((2,2))(h3)])
197 | h4 = layers.Conv2D(32, (1,1),
198 | activation = 'relu',
199 | padding = 'same',
200 | name = 'up4_1')(h4)
201 | h4 = layers.Conv2D(32, (3,3),
202 | activation = 'relu',
203 | padding = 'same',
204 | name = 'up4_2')(h4)
205 |
206 | h5 = Concatenate()([inputs, UpSampling2D((2,2))(h4)])
207 | h5 = layers.Conv2D(16, (1,1),
208 | activation = 'relu',
209 | padding = 'same',
210 | name = 'up5_1')(h5)
211 | ################## output for TEXT/NON-TEXT ############
212 |
213 | o1 = layers.Conv2D(3, (3,3),
214 | activation = 'softmax',
215 | padding = 'same',
216 | name = 'up5_2')(h5)
217 |
218 | ################ Regression ###########################
219 | b1 = Concatenate(name = 'agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630
220 | b1 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same',
221 | activation = 'relu', name = 'agg_feat-2')(b1) # 64,64,128
222 |
223 | #------ xy regression -------
224 | o2 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same',
225 | activation = 'relu', name = 'regress-1-1')(b1) # 128,128, 32
226 | o2 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same',
227 | activation = 'relu', name = 'regress-1-2')(o2) # 128,128, 32
228 | o2 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same',
229 | activation = 'relu',name = 'regress-1-3')(o2) # 256,256, 8
230 | o2 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same',
231 | activation = 'relu',name = 'regress-1-4')(o2) # 256,256, 8
232 | o2 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same',
233 | activation = 'relu', name = 'regress-1-5')(o2) # 512,512, 2
234 | o2 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same',
235 | activation = 'tanh', name = 'regress-1-6')(o2) # 512,512, 2
236 |
237 | #------ wh regression -------
238 | o4 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same',
239 | activation = 'relu',name = 'regress-3-1')(b1) # 128,128, 32
240 | o4 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same',
241 | activation = 'relu',name = 'regress-3-2')(o4) # 128,128, 32
242 | o4 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same',
243 | activation = 'relu', name = 'regress-3-3')(o4) # 256,256, 8
244 | o4 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same',
245 | activation = 'relu', name = 'regress-3-4')(o4) # 256,256, 8
246 | o4 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same',
247 | activation = 'relu', name = 'regress-3-5')(o4) # 256,256, 8
248 | o4 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same',
249 | activation = 'sigmoid',name = 'regress-3-6')(o4) # 512,512, 2
250 |
251 | # ------ sin/cos regression -------
252 | b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630
253 | b2 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same',
254 | activation = 'relu', name = 'regress-2-1')(b2) # 128, 128, 128
255 | o3 = Concatenate()([x2_take, b2 ]) # block3_conv3, agg_feat-3 # 128, 128, (256+128)
256 | o3 = layers.Conv2DTranspose(32, (3,3),strides = (2,2),padding = 'same',
257 | activation = 'relu', name = 'regress-2-2')(o3) # 256,256, 32
258 | o3 = layers.Conv2DTranspose(2, (3,3),strides = (2,2),padding = 'same',
259 | activation = 'tanh', name = 'regress-2-3')(o3) # 512,512,2
260 |
261 |
262 | model = Model(inputs, [o1,o2,o3,o4], name = 'U-VGG-model')
263 |
264 | return model
265 |
266 |
267 |
268 | def model_U_VGG_Centerline():
269 | #input_shape = (720, 1280, 3)
270 | #input_shape = (512,512,3)
271 | input_shape = (None,None,3)
272 | inputs = Input(shape=input_shape, name='input')
273 |
274 |
275 | # Block 1
276 | x0 = layers.Conv2D(64, (3, 3),
277 | activation='relu',
278 | padding='same',
279 | name='block1_conv1')(inputs)
280 | x0 = layers.Conv2D(64, (3, 3),
281 | activation='relu',
282 | padding='same',
283 | name='block1_conv2')(x0)
284 | x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0)
285 |
286 | # Block 2
287 | x1 = layers.Conv2D(128, (3, 3),
288 | activation='relu',
289 | padding='same',
290 | name='block2_conv1')(x0)
291 | x1 = layers.Conv2D(128, (3, 3),
292 | activation='relu',
293 | padding='same',
294 | name='block2_conv2')(x1)
295 | x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1)
296 |
297 | # Block 3
298 | x2 = layers.Conv2D(256, (3, 3),
299 | activation='relu',
300 | padding='same',
301 | name='block3_conv1')(x1)
302 | x2 = layers.Conv2D(256, (3, 3),
303 | activation='relu',
304 | padding='same',
305 | name='block3_conv2')(x2)
306 | x2_take = layers.Conv2D(256, (3, 3),
307 | activation='relu',
308 | padding='same',
309 | name='block3_conv3')(x2)
310 | x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take)
311 |
312 | # Block 4
313 | x3 = layers.Conv2D(512, (3, 3),
314 | activation='relu',
315 | padding='same',
316 | name='block4_conv1')(x2)
317 | x3 = layers.Conv2D(512, (3, 3),
318 | activation='relu',
319 | padding='same',
320 | name='block4_conv2')(x3)
321 | x3_take = layers.Conv2D(512, (3, 3),
322 | activation='relu',
323 | padding='same',
324 | name='block4_conv3')(x3)
325 | x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take)
326 |
327 | # Block 5
328 | x4 = layers.Conv2D(512, (3, 3),
329 | activation='relu',
330 | padding='same',
331 | name='block5_conv1')(x3)
332 | x4 = layers.Conv2D(512, (3, 3),
333 | activation='relu',
334 | padding='same',
335 | name='block5_conv2')(x4)
336 | x4_take = layers.Conv2D(512, (3, 3),
337 | activation='relu',
338 | padding='same',
339 | name='block5_conv3')(x4)
340 | x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take)
341 |
342 | #f1 = UpSampling2D((2,2))(x4)
343 | #if TASK_4:
344 | # f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1)
345 | f1 = x4_take
346 | f2 = x3
347 | h1 = Concatenate()([f2, f1])
348 | h1 = layers.Conv2D(128, (1, 1),
349 | activation='relu',
350 | padding='same',
351 | name='up1_1')(h1)
352 |
353 | h1 = layers.Conv2D(128, (3, 3),
354 | activation='relu',
355 | padding='same',
356 | name='up1_2')(h1)
357 |
358 |
359 | h2 = Concatenate()([x2, UpSampling2D((2,2))(h1)])
360 | h2 = layers.Conv2D(64, (1,1),
361 | activation = 'relu',
362 | padding = 'same',
363 | name = 'up2_1')(h2)
364 | h2 = layers.Conv2D(64, (3,3),
365 | activation = 'relu',
366 | padding = 'same',
367 | name = 'up2_2')(h2)
368 |
369 | h3 = Concatenate()([x1, UpSampling2D((2,2))(h2)])
370 | h3 = layers.Conv2D(32, (1,1),
371 | activation = 'relu',
372 | padding = 'same',
373 | name = 'up3_1')(h3)
374 | h3 = layers.Conv2D(32, (3,3),
375 | activation = 'relu',
376 | padding = 'same',
377 | name = 'up3_2')(h3)
378 |
379 | h4_take = Concatenate()([x0, UpSampling2D((2,2))(h3)])
380 |
381 | h4 = layers.Conv2D(32, (1,1),
382 | activation = 'relu',
383 | padding = 'same',
384 | name = 'up4_1')(h4_take)
385 | h4 = layers.Conv2D(32, (3,3),
386 | activation = 'relu',
387 | padding = 'same',
388 | name = 'up4_2')(h4)
389 |
390 | h5 = Concatenate()([inputs, UpSampling2D((2,2))(h4)])
391 | h5 = layers.Conv2D(16, (1,1),
392 | activation = 'relu',
393 | padding = 'same',
394 | name = 'up5_1')(h5)
395 | ################## output for TEXT/NON-TEXT ############
396 |
397 | o1 = layers.Conv2D(3, (3,3),
398 | activation = 'softmax',
399 | padding = 'same',
400 | name = 'up5_2')(h5)
401 | ################## output for centerline /other ###########
402 | h41 = layers.Conv2D(32, (1,1),
403 | activation = 'relu',
404 | padding = 'same',
405 | name = 'up41_1')(h4_take)
406 | h41 = layers.Conv2D(32, (3,3),
407 | activation = 'relu',
408 | padding = 'same',
409 | name = 'up41_2')(h41)
410 |
411 | h51 = Concatenate()([inputs, UpSampling2D((2,2))(h41)])
412 | h51 = layers.Conv2D(16, (1,1),
413 | activation = 'relu',
414 | padding = 'same',
415 | name = 'up51_1')(h51)
416 |
417 | o11 = layers.Conv2D(2, (3,3),
418 | activation = 'softmax',
419 | padding = 'same',
420 | name = 'up51_2')(h51)
421 |
422 | ################ Regression ###########################
423 | b1 = Concatenate(name = 'agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630
424 | b1 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same',
425 | activation = 'relu', name = 'agg_feat-2')(b1) # 64,64,128
426 |
427 | #------ xy regression -------
428 | o2 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same',
429 | activation = 'relu', name = 'regress-1-1')(b1) # 128,128, 32
430 | o2 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same',
431 | activation = 'relu', name = 'regress-1-2')(o2) # 128,128, 32
432 | o2 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same',
433 | activation = 'relu',name = 'regress-1-3')(o2) # 256,256, 8
434 | o2 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same',
435 | activation = 'relu',name = 'regress-1-4')(o2) # 256,256, 8
436 | o2 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same',
437 | activation = 'relu', name = 'regress-1-5')(o2) # 512,512, 2
438 | o2 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same',
439 | activation = 'tanh', name = 'regress-1-6')(o2) # 512,512, 2
440 |
441 | #------ wh regression -------
442 | o4 = layers.Conv2DTranspose(64, (3,3), strides = (2,2),padding = 'same',
443 | activation = 'relu',name = 'regress-3-1')(b1) # 128,128, 32
444 | o4 = layers.Conv2DTranspose(32, (3,3), strides = (1,1),padding = 'same',
445 | activation = 'relu',name = 'regress-3-2')(o4) # 128,128, 32
446 | o4 = layers.Conv2DTranspose(16, (3,3),strides = (2,2), padding = 'same',
447 | activation = 'relu', name = 'regress-3-3')(o4) # 256,256, 8
448 | o4 = layers.Conv2DTranspose(8, (3,3),strides = (1,1), padding = 'same',
449 | activation = 'relu', name = 'regress-3-4')(o4) # 256,256, 8
450 | o4 = layers.Conv2DTranspose(4, (3,3),strides = (2,2), padding = 'same',
451 | activation = 'relu', name = 'regress-3-5')(o4) # 256,256, 8
452 | o4 = layers.Conv2DTranspose(2, (3,3),strides = (1,1), padding = 'same',
453 | activation = 'sigmoid',name = 'regress-3-6')(o4) # 512,512, 2
454 |
455 | # ------ sin/cos regression -------
456 | b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630
457 | b2 = layers.Conv2DTranspose(128, (3,3), strides=(2, 2), padding='same',
458 | activation = 'relu', name = 'regress-2-1')(b2) # 128, 128, 128
459 | o3 = Concatenate()([x2_take, b2 ]) # block3_conv3, agg_feat-3 # 128, 128, (256+128)
460 | o3 = layers.Conv2DTranspose(32, (3,3),strides = (2,2),padding = 'same',
461 | activation = 'relu', name = 'regress-2-2')(o3) # 256,256, 32
462 | o3 = layers.Conv2DTranspose(2, (3,3),strides = (2,2),padding = 'same',
463 | activation = 'tanh', name = 'regress-2-3')(o3) # 512,512,2
464 |
465 |
466 |
467 | #o1: t/nt, o11:centerline, o2:x,y, o3:sin,cos, o4:bounding box width,height
468 | model = Model(inputs, [o1,o11, o2,o3,o4], name = 'U-VGG-model')
469 |
470 |
471 | return model
472 |
473 |
474 | def model_U_VGG_Centerline_Localheight():
475 | # input_shape = (720, 1280, 3)
476 | # input_shape = (512,512,3)
477 | input_shape = (None, None, 3)
478 | inputs = Input(shape=input_shape, name='input')
479 |
480 | # Block 1
481 | x0 = layers.Conv2D(64, (3, 3),
482 | activation='relu',
483 | padding='same',
484 | name='block1_conv1')(inputs)
485 | x0 = layers.Conv2D(64, (3, 3),
486 | activation='relu',
487 | padding='same',
488 | name='block1_conv2')(x0)
489 | x0 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x0)
490 |
491 | # Block 2
492 | x1 = layers.Conv2D(128, (3, 3),
493 | activation='relu',
494 | padding='same',
495 | name='block2_conv1')(x0)
496 | x1 = layers.Conv2D(128, (3, 3),
497 | activation='relu',
498 | padding='same',
499 | name='block2_conv2')(x1)
500 | x1 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x1)
501 |
502 | # Block 3
503 | x2 = layers.Conv2D(256, (3, 3),
504 | activation='relu',
505 | padding='same',
506 | name='block3_conv1')(x1)
507 | x2 = layers.Conv2D(256, (3, 3),
508 | activation='relu',
509 | padding='same',
510 | name='block3_conv2')(x2)
511 | x2_take = layers.Conv2D(256, (3, 3),
512 | activation='relu',
513 | padding='same',
514 | name='block3_conv3')(x2)
515 | x2 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x2_take)
516 |
517 | # Block 4
518 | x3 = layers.Conv2D(512, (3, 3),
519 | activation='relu',
520 | padding='same',
521 | name='block4_conv1')(x2)
522 | x3 = layers.Conv2D(512, (3, 3),
523 | activation='relu',
524 | padding='same',
525 | name='block4_conv2')(x3)
526 | x3_take = layers.Conv2D(512, (3, 3),
527 | activation='relu',
528 | padding='same',
529 | name='block4_conv3')(x3)
530 | x3 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x3_take)
531 |
532 | # Block 5
533 | x4 = layers.Conv2D(512, (3, 3),
534 | activation='relu',
535 | padding='same',
536 | name='block5_conv1')(x3)
537 | x4 = layers.Conv2D(512, (3, 3),
538 | activation='relu',
539 | padding='same',
540 | name='block5_conv2')(x4)
541 | x4_take = layers.Conv2D(512, (3, 3),
542 | activation='relu',
543 | padding='same',
544 | name='block5_conv3')(x4)
545 | x4 = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x4_take)
546 |
547 | # f1 = UpSampling2D((2,2))(x4)
548 | # if TASK_4:
549 | # f1 = ZeroPadding2D(padding=((1,0), (0,0)), name = 'f1')(f1)
550 | f1 = x4_take
551 | f2 = x3
552 | h1 = Concatenate()([f2, f1])
553 | h1 = layers.Conv2D(128, (1, 1),
554 | activation='relu',
555 | padding='same',
556 | name='up1_1')(h1)
557 |
558 | h1 = layers.Conv2D(128, (3, 3),
559 | activation='relu',
560 | padding='same',
561 | name='up1_2')(h1)
562 |
563 | h2 = Concatenate()([x2, UpSampling2D((2, 2))(h1)])
564 | h2 = layers.Conv2D(64, (1, 1),
565 | activation='relu',
566 | padding='same',
567 | name='up2_1')(h2)
568 | h2 = layers.Conv2D(64, (3, 3),
569 | activation='relu',
570 | padding='same',
571 | name='up2_2')(h2)
572 |
573 | h3 = Concatenate()([x1, UpSampling2D((2, 2))(h2)])
574 | h3 = layers.Conv2D(32, (1, 1),
575 | activation='relu',
576 | padding='same',
577 | name='up3_1')(h3)
578 | h3 = layers.Conv2D(32, (3, 3),
579 | activation='relu',
580 | padding='same',
581 | name='up3_2')(h3)
582 |
583 | h4_take = Concatenate()([x0, UpSampling2D((2, 2))(h3)])
584 |
585 | h4 = layers.Conv2D(32, (1, 1),
586 | activation='relu',
587 | padding='same',
588 | name='up4_1')(h4_take)
589 | h4 = layers.Conv2D(32, (3, 3),
590 | activation='relu',
591 | padding='same',
592 | name='up4_2')(h4)
593 |
594 | h5 = Concatenate()([inputs, UpSampling2D((2, 2))(h4)])
595 | h5 = layers.Conv2D(16, (1, 1),
596 | activation='relu',
597 | padding='same',
598 | name='up5_1')(h5)
599 | ################## output for TEXT/NON-TEXT ############
600 |
601 | o1 = layers.Conv2D(3, (3, 3),
602 | activation='softmax',
603 | padding='same',
604 | name='up5_2')(h5)
605 | ################## output for centerline /other ###########
606 | h41 = layers.Conv2D(32, (1, 1),
607 | activation='relu',
608 | padding='same',
609 | name='up41_1')(h4_take)
610 | h41 = layers.Conv2D(32, (3, 3),
611 | activation='relu',
612 | padding='same',
613 | name='up41_2')(h41)
614 |
615 | h51 = Concatenate()([inputs, UpSampling2D((2, 2))(h41)])
616 | h51 = layers.Conv2D(16, (1, 1),
617 | activation='relu',
618 | padding='same',
619 | name='up51_1')(h51)
620 |
621 | o11 = layers.Conv2D(2, (3, 3),
622 | activation='softmax',
623 | padding='same',
624 | name='up51_2')(h51)
625 |
626 | ################ Regression ###########################
627 | b1 = Concatenate(name='agg_feat-1')([x4_take, h1]) # block_conv3, up1_2 # 32,32,630
628 | b1 = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same',
629 | activation='relu', name='agg_feat-2')(b1) # 64,64,128
630 |
631 | # ------ xy regression -------
632 | o2 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same',
633 | activation='relu', name='regress-1-1')(b1) # 128,128, 32
634 | o2 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same',
635 | activation='relu', name='regress-1-2')(o2) # 128,128, 32
636 | o2 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same',
637 | activation='relu', name='regress-1-3')(o2) # 256,256, 8
638 | o2 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same',
639 | activation='relu', name='regress-1-4')(o2) # 256,256, 8
640 | o2 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same',
641 | activation='relu', name='regress-1-5')(o2) # 512,512, 2
642 | o2 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same',
643 | activation='tanh', name='regress-1-6')(o2) # 512,512, 2
644 |
645 | # ------ wh regression -------
646 | o4 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same',
647 | activation='relu', name='regress-3-1')(b1) # 128,128, 32
648 | o4 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same',
649 | activation='relu', name='regress-3-2')(o4) # 128,128, 32
650 | o4 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same',
651 | activation='relu', name='regress-3-3')(o4) # 256,256, 8
652 | o4 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same',
653 | activation='relu', name='regress-3-4')(o4) # 256,256, 8
654 | o4 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same',
655 | activation='relu', name='regress-3-5')(o4) # 256,256, 8
656 | o4 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same',
657 | activation='sigmoid', name='regress-3-6')(o4) # 512,512, 2
658 |
659 | # ------ sin/cos regression -------
660 | b2 = Concatenate()([x3_take, b1]) # block4_conv3, agg_feat-2 # 64,64,630
661 | b2 = layers.Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same',
662 | activation='relu', name='regress-2-1')(b2) # 128, 128, 128
663 | o3 = Concatenate()([x2_take, b2]) # block3_conv3, agg_feat-3 # 128, 128, (256+128)
664 | o3 = layers.Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same',
665 | activation='relu', name='regress-2-2')(o3) # 256,256, 32
666 | o3 = layers.Conv2DTranspose(2, (3, 3), strides=(2, 2), padding='same',
667 | activation='tanh', name='regress-2-3')(o3) # 512,512,2
668 |
669 | # ------ local height regression ------
670 | o5 = layers.Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same',
671 | activation='relu', name='regress-4-1')(b1) # 128,128, 32
672 | o5 = layers.Conv2DTranspose(32, (3, 3), strides=(1, 1), padding='same',
673 | activation='relu', name='regress-4-2')(o5) # 128,128, 32
674 | o5 = layers.Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same',
675 | activation='relu', name='regress-4-3')(o5) # 256,256, 8
676 | o5 = layers.Conv2DTranspose(8, (3, 3), strides=(1, 1), padding='same',
677 | activation='relu', name='regress-4-4')(o5) # 256,256, 8
678 | o5 = layers.Conv2DTranspose(4, (3, 3), strides=(2, 2), padding='same',
679 | activation='relu', name='regress-4-5')(o5) # 256,256, 8
680 | o5 = layers.Conv2DTranspose(2, (3, 3), strides=(1, 1), padding='same',
681 | activation='relu', name='regress-4-6')(o5) # 512,512, 2
682 | o5 = layers.Conv2DTranspose(1, (3, 3), strides=(1, 1), padding='same',
683 | activation='relu', name='regress-4-7')(o5) # 512,512, 1
684 |
685 | # o1: t/nt, o11:centerline, o2:x,y, o3:sin,cos, o4:bounding box width,height, o5:localheight
686 | # model = Model(inputs, [o1,o11, o2,o3,o4], name = 'U-VGG-model')
687 | model = Model(inputs, [o1, o11, o5], name='U-VGG-model-Localheight')
688 |
689 | return model
690 |
691 |
--------------------------------------------------------------------------------