├── .gitignore ├── LICENSE ├── README.md ├── configs.py ├── img └── Vision-LSTM.png ├── modules ├── __init__.py ├── datasets.py ├── models.py ├── trainUtils.py └── utils.py ├── preprocess.py ├── preprocessing ├── __init__.py ├── build_dataset.py ├── gen_grids.py ├── split_geotiff.py ├── sv_process.py └── taxi_process.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/data 2 | **/logs 3 | **/tensorboard 4 | **/weights 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # poetry 102 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 103 | # This is especially recommended for binary packages to ensure reproducibility, and is more 104 | # commonly ignored for libraries. 105 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 106 | #poetry.lock 107 | 108 | # pdm 109 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 110 | #pdm.lock 111 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 112 | # in version control. 113 | # https://pdm.fming.dev/#use-with-ide 114 | .pdm.toml 115 | 116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 117 | __pypackages__/ 118 | 119 | # Celery stuff 120 | celerybeat-schedule 121 | celerybeat.pid 122 | 123 | # SageMath parsed files 124 | *.sage.py 125 | 126 | # Environments 127 | .env 128 | .venv 129 | env/ 130 | venv/ 131 | ENV/ 132 | env.bak/ 133 | venv.bak/ 134 | 135 | # Spyder project settings 136 | .spyderproject 137 | .spyproject 138 | 139 | # Rope project settings 140 | .ropeproject 141 | 142 | # mkdocs documentation 143 | /site 144 | 145 | # mypy 146 | .mypy_cache/ 147 | .dmypy.json 148 | dmypy.json 149 | 150 | # Pyre type checker 151 | .pyre/ 152 | 153 | # pytype static type analyzer 154 | .pytype/ 155 | 156 | # Cython debug symbols 157 | cython_debug/ 158 | 159 | # PyCharm 160 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 161 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 162 | # and can be added to the global gitignore or merged into this file. For a more nuclear 163 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 164 | #.idea/ 165 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Yingjing Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Vision-LSTM 🚀 2 | Authors: Yingjing Huang, Fan zhang, Yong Gao, Wei Tu, Fabio Duarte, Carlo Ratti, Diansheng Guo, Yu Liu 3 | 4 | [PaperLink](https://www.sciencedirect.com/science/article/pii/S0198971523001060) 5 | 6 | ## Abstract 🌍 7 | Street-level imagery has emerged as a valuable tool for observing large-scale urban spaces with unprecedented detail. However, previous studies have been limited to analyzing individual street-level images. This approach falls short in representing the characteristics of a spatial unit, such as a street or grid, which may contain a varying number of street view images ranging from several to hundreds. As a result, a more comprehensive and representative approach is required to capture the complexity and diversity of urban environments at different spatial scales. To address this issue, this study proposes a deep learning-based module called **Vision-LSTM**, which can effectively **obtain vector representation from varying numbers of street-level images in spatial units**. The effectiveness of the module is validated through experiments to recognize urban villages, achieving reliable recognition results (overall accuracy: 91.6\%) through multimodal learning that combines street-level imagery with remote sensing imagery and social sensing data. Compared to existing image fusion methods, Vision-LSTM demonstrates significant effectiveness in capturing associations between street-level images. The proposed module can provide a more comprehensive understanding of urban space, enhancing the research value of street-level imagery and facilitating multimodal learning-based urban research. Our models are available at https://github.com/yingjinghuang/Vision-LSTM. 8 | 9 | ![Framework of Vision-LSTM](./img/Vision-LSTM.png) 10 | 11 | ## Citation 📚 12 | **If you find the resource useful, please cite the following :- )** 13 | ```bibtex 14 | @article{HUANG2023102043, 15 | title = {Comprehensive urban space representation with varying numbers of street-level images}, 16 | journal = {Computers, Environment and Urban Systems}, 17 | volume = {106}, 18 | pages = {102043}, 19 | year = {2023}, 20 | issn = {0198-9715}, 21 | doi = {https://doi.org/10.1016/j.compenvurbsys.2023.102043}, 22 | url = {https://www.sciencedirect.com/science/article/pii/S0198971523001060}, 23 | author = {Yingjing Huang and Fan Zhang and Yong Gao and Wei Tu and Fabio Duarte and Carlo Ratti and Diansheng Guo and Yu Liu}, 24 | keywords = {Street-level imagery, Urban space representation, Multimodal data fusion, Deep learning, Urban village recognition} 25 | ``` 26 | 27 | ## Highlights ✨ 28 | - Representing regional features by capturing associations among street-level images. 29 | - The proposed Vision-LSTM extracts features from varying numbers of images. 30 | - A multimodal model that fuses satellite imagery 🌍, street-level imagery 🚗, and mobility data 📊. 31 | - Both visual and dynamic mobility information are crucial for urban village recognition. 32 | - The framework achieved 91.6% accuracy in identifying urban villages 🏙️. 33 | 34 | ## Results 📈 35 | In our urban village recognition case, the results can be seen in the following table. 36 | 37 | | Method | OA(%) | Kappa | F1 | 38 | |------------------------------------------|-----------|-----------|-----------| 39 | | No fusion (random image) | 88.1 | 0.634 | 0.708 | 40 | | Average Pooling | 89.1 | 0.656 | 0.727 | 41 | | Maximum Pooling | 79.3 | 0.461 | 0.588 | 42 | | Element-wise Sum | 77.4 | 0.432 | 0.566 | 43 | | **Vision-LSTM (proposed in this study)** | **91.6** | **0.720** | **0.773** | 44 | 45 | ## Usage 🛠️ 46 | **Step 1**. Prepare your own datasets. 47 | 48 | **Step 2**. Run the [preprocess.py](preprocess.py) to preprocess data. 49 | ```bash 50 | python preprocess.py 51 | ``` 52 | 53 | **Step 3**. Revise the configs in [configs.py](configs.py). 54 | 55 | **Step 4**. Train your own model. 56 | ```bash 57 | python train.py 58 | ``` 59 | -------------------------------------------------------------------------------- /configs.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | configs = { 4 | # Parameters 5 | # model name for folders of weights and tensorboard 6 | "model_name" : "m3_1000_single", 7 | # GPU id(s) to use 8 | # Format: a comma-delimited list. 0,1,2,3 9 | "gpu" : "0", 10 | # batch size 11 | "batch_size" : 128, 12 | # num of epochs 13 | "epochs" : 100, 14 | # initial learning rate 15 | "lr" : 0.1, 16 | # warmup learning rate 17 | "lr_warm": 0.0035, 18 | # initial weight decay 19 | "wd" : 1e-4, 20 | # number of workers 21 | "workers" : 1, 22 | # the mode to decomposite street view 23 | # Format: single, mean, max, sum, lstm 24 | "mode" : "single", 25 | 26 | "modality_count": 3, 27 | "modalities": ["remote", "sv", "mobility"], 28 | 29 | # Paths 30 | "weights_folder": "Vision-LSTM//weights", 31 | "tensorboard_folder": "Vision-LSTM//tensorboard", 32 | "log_folder": "Vision-LSTM//logs", 33 | "model_data_path": "Vision-LSTM/data/grids1000/model_data.pkl", 34 | "rs_path": "Vision-LSTM/data/grids1000/rs_tiles" 35 | } -------------------------------------------------------------------------------- /img/Vision-LSTM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yingjinghuang/Vision-LSTM/430cd96f74b7fba855903a40838c43ea6992bdc3/img/Vision-LSTM.png -------------------------------------------------------------------------------- /modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yingjinghuang/Vision-LSTM/430cd96f74b7fba855903a40838c43ea6992bdc3/modules/__init__.py -------------------------------------------------------------------------------- /modules/datasets.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | from torchvision import transforms 4 | from PIL import Image 5 | import os, sys 6 | import numpy as np 7 | 8 | Image.MAX_IMAGE_PIXELS = 2300000000 9 | 10 | # 继承pytorch的dataset,创建自己的 11 | class RemoteData(Dataset): 12 | def __init__(self, img_list, label_list, file_path, resize_size=(224,224), mode='train'): 13 | self.img_list = img_list 14 | self.label_list = label_list 15 | self.file_path = file_path 16 | self.resize_size = resize_size 17 | self.mode = mode 18 | 19 | def __getitem__(self, index): 20 | # 从image_list中得到索引对应的文件路径 21 | img_path = self.img_list[index][0] 22 | 23 | # 读取图像文件 24 | image = Image.open(os.path.join(self.file_path, img_path + ".tif")) 25 | 26 | # 设置好需要转换的变量,还可以包括一系列的 normalize 等等操作 27 | if self.mode == 'train': 28 | transform = transforms.Compose([ 29 | transforms.Resize(self.resize_size), 30 | transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转,选择一个概率 31 | transforms.RandomVerticalFlip(p=0.5), # 随机垂直翻转,选择一个概率 32 | transforms.RandomRotation(degrees=180), # 随机旋转 33 | transforms.ToTensor(), 34 | transforms.Normalize([0.4914, 0.4822, 0.4465], 35 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 36 | ]) 37 | else: 38 | # 测试和验证不做数据增强 39 | transform = transforms.Compose([ 40 | transforms.Resize(self.resize_size), 41 | transforms.ToTensor(), 42 | transforms.Normalize([0.4914, 0.4822, 0.4465], 43 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 44 | ]) 45 | 46 | image = transform(image) 47 | 48 | label = self.label_list[index] 49 | sample = (image, label) 50 | return sample 51 | 52 | def __len__(self): 53 | return len(self.img_list) 54 | 55 | class TaxiDataset(Dataset): 56 | def __init__(self, taxi_list, label_list, mode='train'): 57 | self.taxi_list = taxi_list 58 | self.label_list = label_list 59 | self.mode = mode 60 | 61 | def __getitem__(self, index): 62 | ## 处理 Taxi 63 | taxi_attrs = self.taxi_list[index] 64 | taxi_attrs = torch.FloatTensor(taxi_attrs) 65 | 66 | label = self.label_list[index] 67 | sample = (taxi_attrs, label) 68 | return sample 69 | 70 | def __len__(self): 71 | # You should change 0 to the total size of your dataset. 72 | return len(self.label_list) 73 | 74 | class SVFeatureDataset(Dataset): 75 | def __init__(self, feature_list, label_list, mode='train'): 76 | self.feature_list = feature_list 77 | self.label_list = label_list 78 | self.mode = mode 79 | 80 | def __getitem__(self, index): 81 | attrs = self.feature_list[index][0] 82 | attrs = [float(x) if float(x)!=0 else 0.0000001 for x in attrs.split(",")] 83 | attrs = torch.FloatTensor(attrs) 84 | attrs = attrs.view(-1, 512) 85 | 86 | label = self.label_list[index] 87 | sample = (attrs, label) 88 | return sample 89 | 90 | def __len__(self): 91 | # You should change 0 to the total size of your dataset. 92 | return len(self.label_list) 93 | 94 | class SVDataset(Dataset): 95 | def __init__(self, img_list, label_list, resize_size=(224,224), mode='train'): 96 | self.img_list = img_list 97 | self.label_list = label_list 98 | self.resize_size = resize_size 99 | self.mode = mode 100 | 101 | def __getitem__(self, index): 102 | # 从image_list中得到索引对应的文件路径 103 | img_path = self.img_list[index] 104 | 105 | # 读取图像文件 106 | image = Image.open(img_path) 107 | 108 | # 设置好需要转换的变量,还可以包括一系列的 normalize 等等操作 109 | if self.mode == 'train': 110 | transform = transforms.Compose([ 111 | # transforms.Resize(self.resize_size), 112 | transforms.RandomResizedCrop(224), 113 | transforms.RandomHorizontalFlip(), 114 | transforms.ToTensor(), 115 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 116 | ]) 117 | else: 118 | # 测试和验证不做数据增强 119 | transform = transforms.Compose([ 120 | transforms.Resize(256), 121 | transforms.CenterCrop(224), 122 | transforms.ToTensor(), 123 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 124 | ]) 125 | 126 | image = transform(image) 127 | image = torch.where(image==0, 0.0000001, image) 128 | 129 | label = self.label_list[index] 130 | sample = (image, label) 131 | return sample 132 | 133 | def __len__(self): 134 | return len(self.img_list) 135 | 136 | class MultiDataset(Dataset): 137 | def __init__(self, img_list, sv_list, taxi_list, label_list, image_dir_path, resize_size=(224, 224), mode='train'): 138 | self.img_list = img_list 139 | self.sv_list = sv_list 140 | self.taxi_list = taxi_list 141 | self.label_list = label_list 142 | self.image_dir_path = image_dir_path 143 | self.resize_size = resize_size 144 | self.mode = mode 145 | 146 | def __getitem__(self, index): 147 | ## 处理 Remote 148 | img_path = self.img_list[index] 149 | # 读取图像文件 150 | image = Image.open(os.path.join(self.image_dir_path, img_path + ".tif")) 151 | # 数据增强 152 | if self.mode == 'train': 153 | transform = transforms.Compose([ 154 | transforms.Resize(self.resize_size), 155 | transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转,选择一个概率 156 | transforms.RandomVerticalFlip(p=0.5), # 随机垂直翻转,选择一个概率 157 | transforms.RandomRotation(degrees=180), # 随机旋转 158 | transforms.ToTensor(), 159 | transforms.Normalize([0.4914, 0.4822, 0.4465], 160 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 161 | ]) 162 | else: 163 | transform = transforms.Compose([ 164 | transforms.Resize(self.resize_size), 165 | transforms.ToTensor(), 166 | transforms.Normalize([0.4914, 0.4822, 0.4465], 167 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 168 | ]) 169 | image = transform(image) 170 | 171 | ## 处理 Taxi 172 | taxi_attrs = np.array(self.taxi_list[index]) 173 | taxi_attrs = torch.FloatTensor(taxi_attrs.astype(np.float32)) 174 | 175 | ## 处理 sv 176 | sv_attrs = self.sv_list[index] 177 | # sv_attrs = [float(x) if float(x)!=0 else 0.0000001 for x in sv_attrs.split(",")] 178 | sv_attrs = torch.FloatTensor(sv_attrs) 179 | sv_attrs = sv_attrs.view(-1, 512) 180 | 181 | # label = self.label_list[index].astype(np.int64) 182 | label = self.label_list[index] 183 | 184 | sample = ( 185 | image, 186 | sv_attrs, 187 | taxi_attrs, 188 | label) 189 | return sample 190 | 191 | def __len__(self): 192 | # You should change 0 to the total size of your dataset. 193 | return len(self.img_list) 194 | 195 | class MultiDataset1SV(Dataset): 196 | def __init__(self, img_list, sv_list, taxi_list, label_list, image_dir_path, resize_size=(224, 224), mode='train'): 197 | self.img_list = img_list 198 | self.sv_list = sv_list 199 | self.taxi_list = taxi_list 200 | self.label_list = label_list 201 | self.image_dir_path = image_dir_path 202 | self.resize_size = resize_size 203 | self.mode = mode 204 | 205 | def __getitem__(self, index): 206 | ## 处理 Remote 207 | img_path = self.img_list[index] 208 | # 读取图像文件 209 | image = Image.open(os.path.join(self.image_dir_path, img_path + ".tif")) 210 | # 数据增强 211 | if self.mode == 'train': 212 | transform = transforms.Compose([ 213 | transforms.Resize(self.resize_size), 214 | transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转,选择一个概率 215 | transforms.RandomVerticalFlip(p=0.5), # 随机垂直翻转,选择一个概率 216 | transforms.RandomRotation(degrees=180), # 随机旋转 217 | transforms.ToTensor(), 218 | transforms.Normalize([0.4914, 0.4822, 0.4465], 219 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 220 | ]) 221 | else: 222 | transform = transforms.Compose([ 223 | transforms.Resize(self.resize_size), 224 | transforms.ToTensor(), 225 | transforms.Normalize([0.4914, 0.4822, 0.4465], 226 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 227 | ]) 228 | image = transform(image) 229 | 230 | ## 处理 Taxi 231 | taxi_attrs = np.array(self.taxi_list[index]) 232 | taxi_attrs = torch.FloatTensor(taxi_attrs.astype(np.float32)) 233 | 234 | ## 处理 sv 235 | sv_path = self.sv_list[index] 236 | idx = np.random.randint(sv_path.shape[0], size=1) 237 | sv = sv_path[idx, :] 238 | sv = torch.FloatTensor(sv.astype(np.float32)) 239 | 240 | # label = self.label_list[index].astype(np.int64) 241 | label = self.label_list[index] 242 | 243 | sample = (image, sv, taxi_attrs, label) 244 | return sample 245 | 246 | def __len__(self): 247 | # You should change 0 to the total size of your dataset. 248 | return len(self.img_list) 249 | 250 | class TwoDataset(Dataset): 251 | def __init__(self, img_list, sv_list, taxi_list, label_list, image_dir_path, modal, resize_size=(224, 224), mode='train'): 252 | self.modal = modal 253 | self.img_list = img_list 254 | self.sv_list = sv_list 255 | self.taxi_list = taxi_list 256 | self.label_list = label_list 257 | self.image_dir_path = image_dir_path 258 | self.resize_size = resize_size 259 | self.mode = mode 260 | 261 | def __getitem__(self, index): 262 | if "remote" in self.modal: 263 | ## 处理 Remote 264 | img_path = self.img_list[index] 265 | # 读取图像文件 266 | image = Image.open(os.path.join(self.image_dir_path, img_path + ".tif")) 267 | # 数据增强 268 | if self.mode == 'train': 269 | transform = transforms.Compose([ 270 | transforms.Resize(self.resize_size), 271 | transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转,选择一个概率 272 | transforms.RandomVerticalFlip(p=0.5), # 随机垂直翻转,选择一个概率 273 | transforms.RandomRotation(degrees=180), # 随机旋转 274 | transforms.ToTensor(), 275 | transforms.Normalize([0.4914, 0.4822, 0.4465], 276 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 277 | ]) 278 | else: 279 | transform = transforms.Compose([ 280 | transforms.Resize(self.resize_size), 281 | transforms.ToTensor(), 282 | transforms.Normalize([0.4914, 0.4822, 0.4465], 283 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 284 | ]) 285 | f1 = transform(image) 286 | 287 | if "sv" in self.modal: 288 | ## 处理 sv 289 | sv_attrs = self.sv_list[index] 290 | sv_attrs = [float(x) if float(x)!=0 else 0.0000001 for x in sv_attrs.split(",")] 291 | sv_attrs = torch.FloatTensor(sv_attrs) 292 | f2 = sv_attrs.view(-1, 512) 293 | else: 294 | ## 处理 Taxi 295 | taxi_attrs = np.array(self.taxi_list[index]) 296 | f2 = torch.FloatTensor(taxi_attrs.astype(np.float32)) 297 | else: 298 | ## 处理 sv 299 | sv_attrs = self.sv_list[index] 300 | sv_attrs = [float(x) if float(x)!=0 else 0.0000001 for x in sv_attrs.split(",")] 301 | sv_attrs = torch.FloatTensor(sv_attrs) 302 | f1 = sv_attrs.view(-1, 512) 303 | 304 | ## 处理 Taxi 305 | taxi_attrs = np.array(self.taxi_list[index]) 306 | f2 = torch.FloatTensor(taxi_attrs.astype(np.float32)) 307 | 308 | # label = self.label_list[index].astype(np.int64) 309 | label = self.label_list[index] 310 | 311 | sample = (f1, f2, label) 312 | return sample 313 | 314 | def __len__(self): 315 | # You should change 0 to the total size of your dataset. 316 | return len(self.img_list) 317 | 318 | class SVEnd2EndDataset(Dataset): 319 | def __init__(self, sv_list, label_list, dir, resize_size=(224, 224), mode='train'): 320 | self.sv_list = sv_list 321 | self.label_list = label_list 322 | self.resize_size = resize_size 323 | self.dir = dir 324 | self.mode = mode 325 | 326 | def __getitem__(self, index): 327 | ## 处理 sv 328 | sv_paths = self.sv_list[index][0] 329 | # print(sv_paths) 330 | try: 331 | sv_paths = sv_paths.split(",") 332 | except: 333 | print(sv_paths) 334 | sys.exit(1) 335 | sv_list = [] 336 | for sv_path in sv_paths: 337 | # 读取图像文件 338 | sv_img = Image.open(os.path.join(self.dir, sv_path)) 339 | # 设置好需要转换的变量,还可以包括一系列的 normalize 等等操作 340 | if self.mode == 'train': 341 | transform = transforms.Compose([ 342 | # transforms.Resize(self.resize_size), 343 | transforms.RandomResizedCrop(224), 344 | transforms.RandomHorizontalFlip(), 345 | transforms.ColorJitter(brightness=(0.6,1.4), contrast=(0.6,1.4), saturation=(0.6,1.4), hue=0), 346 | transforms.ToTensor(), 347 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 348 | ]) 349 | else: 350 | # 测试和验证不做数据增强 351 | transform = transforms.Compose([ 352 | transforms.Resize(self.resize_size), 353 | transforms.ToTensor(), 354 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 355 | ]) 356 | 357 | sv_img = transform(sv_img) 358 | sv_list.append(sv_img) 359 | sv_attrs = torch.stack(sv_list, 0) 360 | sv_attrs[sv_attrs == 0] = 0.0000001 361 | 362 | label = self.label_list[index] 363 | 364 | sample = (sv_attrs, label) 365 | return sample 366 | 367 | def __len__(self): 368 | # You should change 0 to the total size of your dataset. 369 | return len(self.label_list) 370 | 371 | class End2EndDataset(Dataset): 372 | def __init__(self, img_list, sv_list, taxi_list, label_list, image_dir_path, resize_size=(224, 224), mode='train'): 373 | self.img_list = img_list 374 | self.sv_list = sv_list 375 | self.taxi_list = taxi_list 376 | self.label_list = label_list 377 | self.image_dir_path = image_dir_path 378 | self.resize_size = resize_size 379 | self.mode = mode 380 | 381 | def __getitem__(self, index): 382 | ## 处理 Remote 383 | img_path = self.img_list[index] 384 | # 读取图像文件 385 | image = Image.open(os.path.join(self.image_dir_path, img_path + ".tif")) 386 | # 数据增强 387 | if self.mode == 'train': 388 | transform = transforms.Compose([ 389 | transforms.Resize(self.resize_size), 390 | transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转,选择一个概率 391 | transforms.RandomVerticalFlip(p=0.5), # 随机垂直翻转,选择一个概率 392 | transforms.RandomRotation(degrees=180), # 随机旋转 393 | transforms.ToTensor(), 394 | transforms.Normalize([0.4914, 0.4822, 0.4465], 395 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 396 | ]) 397 | else: 398 | transform = transforms.Compose([ 399 | transforms.Resize(self.resize_size), 400 | transforms.ToTensor(), 401 | transforms.Normalize([0.4914, 0.4822, 0.4465], 402 | [0.2023, 0.1994, 0.2010]) # 按imagenet权重的归一化标准归一化 403 | ]) 404 | image = transform(image) 405 | 406 | ## 处理 Taxi 407 | taxi_attrs = np.array(self.taxi_list[index]) 408 | taxi_attrs = torch.FloatTensor(taxi_attrs.astype(np.float32)) 409 | 410 | ## 处理 sv 411 | sv_paths = self.sv_list[index] 412 | sv_paths = sv_paths.split(",") 413 | sv_list = [] 414 | for sv_path in sv_paths: 415 | # 读取图像文件 416 | sv_img = Image.open(sv_path) 417 | # 设置好需要转换的变量,还可以包括一系列的 normalize 等等操作 418 | if self.mode == 'train': 419 | transform = transforms.Compose([ 420 | transforms.RandomResizedCrop(224), 421 | transforms.RandomHorizontalFlip(), 422 | transforms.ColorJitter(brightness=(0.6,1.4), contrast=(0.6,1.4), saturation=(0.6,1.4), hue=0), 423 | transforms.ToTensor(), 424 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 425 | ]) 426 | else: 427 | # 测试和验证不做数据增强 428 | transform = transforms.Compose([ 429 | transforms.Resize(self.resize_size), 430 | transforms.ToTensor(), 431 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 432 | ]) 433 | 434 | sv_img = transform(sv_img) 435 | sv_list.append(sv_img) 436 | sv_attrs = torch.stack(sv_list, 0) 437 | # print("sv attrs shape: ", sv_attrs.shape) 438 | 439 | label = self.label_list[index].astype(np.int64) 440 | 441 | sample = (image, sv_attrs, taxi_attrs, label) 442 | return sample 443 | 444 | def __len__(self): 445 | # You should change 0 to the total size of your dataset. 446 | return len(self.img_list) -------------------------------------------------------------------------------- /modules/models.py: -------------------------------------------------------------------------------- 1 | from turtle import forward 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torchvision.models as models 6 | import math 7 | 8 | from torch import Tensor 9 | 10 | ################################################################################ 11 | ################################ TAXI Part ##################################### 12 | ################################################################################ 13 | 14 | class BlockLSTM(nn.Module): 15 | def __init__(self, time_steps, num_layers, lstm_hs, dropout=0.8, attention=False): 16 | super().__init__() 17 | self.lstm = nn.LSTM(input_size=time_steps, hidden_size=lstm_hs, num_layers=num_layers) 18 | self.dropout = nn.Dropout(p=dropout) 19 | def forward(self, x): 20 | # input is of the form (batch_size, num_layers, time_steps), e.g. (128, 1, 512) 21 | x = torch.transpose(x, 0, 1) 22 | # lstm layer is of the form (num_layers, batch_size, time_steps) 23 | x, (h_n, c_n) = self.lstm(x) 24 | # dropout layer input shape (Sequence Length, Batch Size, Hidden Size * Num Directions) 25 | y = self.dropout(x) 26 | # output shape is same as Dropout intput 27 | return y 28 | 29 | class BlockFCNConv(nn.Module): 30 | def __init__(self, in_channel=1, out_channel=128, kernel_size=8, momentum=0.99, epsilon=0.001, squeeze=False): 31 | super().__init__() 32 | self.conv = nn.Conv1d(in_channel, out_channel, kernel_size=kernel_size) 33 | self.batch_norm = nn.BatchNorm1d(num_features=out_channel, eps=epsilon, momentum=momentum) 34 | self.relu = nn.ReLU() 35 | def forward(self, x): 36 | # input (batch_size, num_variables, time_steps), e.g. (128, 1, 512) 37 | x = self.conv(x) 38 | # input (batch_size, out_channel, L_out) 39 | x = self.batch_norm(x) 40 | # same shape as input 41 | y = self.relu(x) 42 | return y 43 | 44 | class BlockFCN(nn.Module): 45 | def __init__(self, time_steps, channels=[1, 128, 256, 256], kernels=[8, 5, 3], mom=0.99, eps=0.001): 46 | super().__init__() 47 | self.conv1 = BlockFCNConv(channels[0], channels[1], kernels[0], momentum=mom, epsilon=eps, squeeze=True) 48 | self.conv2 = BlockFCNConv(channels[1], channels[2], kernels[1], momentum=mom, epsilon=eps, squeeze=True) 49 | self.conv3 = BlockFCNConv(channels[2], channels[3], kernels[2], momentum=mom, epsilon=eps) 50 | output_size = time_steps - sum(kernels) + len(kernels) 51 | self.global_pooling = nn.AvgPool1d(kernel_size=output_size) 52 | def forward(self, x): 53 | x = self.conv1(x) 54 | x = self.conv2(x) 55 | x = self.conv3(x) 56 | # apply Global Average Pooling 1D 57 | y = self.global_pooling(x) 58 | return y 59 | 60 | class LSTMFCNBlock(nn.Module): 61 | def __init__(self, time_steps, num_variables=1, lstm_hs=256, channels=[1, 128, 256, 256]): 62 | super(LSTMFCNBlock, self).__init__() 63 | # self.lstm_block = BlockLSTM(time_steps, 1, lstm_hs) 64 | self.lstm_block = BlockLSTM(int(time_steps/2), 2, int(lstm_hs/2)) 65 | self.fcn_block = BlockFCN(time_steps, channels=channels) 66 | 67 | def forward(self, x): 68 | # input is (batch_size, time_steps), it has to be (batch_size, 1, time_steps) 69 | x1 = x 70 | x = x.unsqueeze(1) 71 | # pass input through LSTM block 72 | # x1 = x 73 | x1_1, x1_2 = x1.split(170, dim=-1) 74 | x1 = torch.stack([x1_1, x1_2], dim=1) # (batch_size, 2, 170) 75 | 76 | x1 = self.lstm_block(x1) 77 | # x1 = self.lstm_block(x) 78 | # x1 = torch.squeeze(x1) 79 | x1 = torch.transpose(x1, 0, 1) 80 | x1 = torch.flatten(x1, start_dim=1, end_dim=2) 81 | # pass input through FCN block 82 | x2 = self.fcn_block(x) 83 | x2 = torch.squeeze(x2, dim=-1) 84 | # concatenate blocks output 85 | x = torch.cat([x1, x2], 1) 86 | return x 87 | 88 | class TaxiNet(nn.Module): 89 | def __init__(self): 90 | super(TaxiNet, self).__init__() 91 | self.backbone = LSTMFCNBlock() 92 | 93 | self.fc = nn.Sequential( 94 | # nn.BatchNorm1d(512), 95 | nn.ReLU(), 96 | nn.Linear(512, 2) 97 | ) 98 | 99 | def forward(self, x): 100 | x = self.backbone(x) 101 | x = self.fc(x) 102 | 103 | return x 104 | ################################################################################ 105 | ################################ SV Part ##################################### 106 | ################################################################################ 107 | 108 | class DensenetEncoder(nn.Module): 109 | def __init__(self): 110 | super(DensenetEncoder, self).__init__() 111 | densnet = models.densenet121(pretrained=True) 112 | self.feature = densnet.features 113 | self.classifier = nn.Sequential(*list(densnet.classifier.children())[:-1]) 114 | pretrained_dict = densnet.state_dict() 115 | model_dict = self.classifier.state_dict() 116 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 117 | model_dict.update(pretrained_dict) 118 | self.classifier.load_state_dict(model_dict) 119 | self.avg = nn.AvgPool2d(7, stride=1) 120 | 121 | def forward(self, x): 122 | output = self.feature(x) 123 | output = self.avg(output) 124 | return output 125 | 126 | class VSFCnet(nn.Module): 127 | def __init__(self): 128 | super(VSFCnet, self).__init__() 129 | self.hidden1 = nn.Sequential( 130 | nn.Linear(in_features=512, out_features=256, bias=True), 131 | nn.ReLU()) 132 | self.hidden2 = nn.Sequential( 133 | nn.Linear(in_features=256, out_features=64, bias=True), 134 | nn.ReLU()) 135 | self.hidden3 = nn.Linear(64,2) 136 | 137 | def forward(self, x): 138 | x = self.hidden1(x) 139 | x = self.hidden2(x) 140 | x = self.hidden3(x) 141 | return x 142 | 143 | class SVEnd2endBlock(nn.Module): 144 | def __init__(self, mode='mean'): 145 | super(SVEnd2endBlock, self).__init__() 146 | self.mode = mode 147 | 148 | model = models.__dict__["resnet18"](num_classes=365) 149 | checkpoint = torch.load('/home/huangyj/urban_village/utils/resnet18_places365.pth.tar') # map_location=lambda storage, loc:storage 150 | state_dict = {str.replace(k, 'module.', ''): v for k, v in checkpoint["state_dict"].items()} 151 | model.load_state_dict(state_dict) 152 | 153 | # layer = model._modules.get("avgpool") 154 | #保存feature部分 去掉fc层 155 | model = torch.nn.Sequential(*(list(model.children())[:-1])) # 提取512维特征 156 | self.feature_extrator = model 157 | 158 | if mode == 'lstm': 159 | self.lstm = nn.LSTM(512, 512, 1, batch_first=True) 160 | 161 | def forward(self, x): 162 | b, n, c, h, w = x.size() # (batch, num, channal, height, width) 163 | 164 | x_list = [] 165 | for sample in range(b): # 拆解 batch 这个维度 166 | # b,n,c,h,w -> n,c,h,w 167 | x_tmp = x[sample] 168 | # n,c,h,w -> n_valid,c,h,w 169 | x_tmp = x_tmp[x_tmp.nonzero(as_tuple=True)].view(-1, x_tmp.shape[1], x_tmp.shape[2], x_tmp.shape[3]) # 去掉0的padding 170 | x_tmp = self.feature_extrator(x_tmp) # 得到 (num, n, 1, 1) 171 | x_tmp = x_tmp.view(-1, 512) 172 | 173 | if self.mode == "mean": 174 | x_tmp = torch.mean(x_tmp, dim=0) # 按列求均值,得到(512) 175 | elif self.mode == "pca": 176 | x_tmp, s, v = torch.pca_lowrank(x_tmp.T, 1) # 按 PCA 降维,得到(512,1) 177 | x_tmp = torch.squeeze(x_tmp) # 得到 (512) 178 | elif self.mode == "max": 179 | x_tmp = torch.max(x_tmp, dim=0).values # 按列求max 180 | elif self.mode == "lstm": 181 | x_tmp, (h_n, c_n) = self.lstm(x_tmp.view(1, -1, 512)) # 输入 lstm 需要加上 batch 这个维度 182 | x_tmp = x_tmp[:,-1,:] # 只取最后一层的输出 # (1,1,512) 183 | x_tmp = torch.squeeze(x_tmp) # 得到 (512) 184 | elif self.mode == "vit": 185 | x_tmp = self.vit(x_tmp.view(1, -1, 512)) # 输入 lstm 需要加上 batch 这个维度 186 | x_tmp = torch.squeeze(x_tmp) # 得到 (512) 187 | else: 188 | pass 189 | 190 | x_list.append(x_tmp) 191 | 192 | x = torch.stack(x_list) # 拼接,(batch,512) 193 | return x 194 | 195 | class SVFeatureBlock(nn.Module): 196 | def __init__(self, mode='mean'): 197 | super(SVFeatureBlock, self).__init__() 198 | self.mode = mode 199 | 200 | if mode == 'lstm': 201 | self.lstm = nn.LSTM(512, 512, 1, batch_first=True) 202 | 203 | def forward(self, x): 204 | b, c, f = x.size() # batch, channnel, feature 205 | 206 | x_list = [] 207 | for sample in range(b): # 拆解 batch 这个维度 208 | # b,c,f -> c,f 209 | x_tmp = x[sample] 210 | # c,f -> c_valid, f 211 | # Get row sums 212 | row_sums = torch.sum(x_tmp, dim=1) 213 | 214 | # Find rows with non-zero sums 215 | non_zero_rows = (row_sums != 0).nonzero().squeeze() 216 | 217 | # Extract rows with non-zero sums 218 | x_tmp = x_tmp[non_zero_rows] 219 | # x_tmp = x_tmp[x_tmp.nonzero(as_tuple=True)].view(-1, 512) # 去掉0的padding 220 | 221 | if self.mode == "mean": 222 | if x_tmp.dim() != 1: 223 | x_tmp = torch.mean(x_tmp, dim=0) # 按列求均值,得到(512) 224 | elif self.mode == "pca": 225 | x_tmp, s, v = torch.pca_lowrank(x_tmp.T, 1) # 按 PCA 降维,得到(512,1) 226 | x_tmp = torch.squeeze(x_tmp) # 得到 (512) 227 | elif self.mode == "sum": 228 | if x_tmp.dim() != 1: 229 | x_tmp = torch.sum(x_tmp, dim=0) # 按列求和 230 | elif self.mode == "max": 231 | if x_tmp.dim() != 1: 232 | x_tmp = torch.max(x_tmp, dim=0).values # 按列求max 233 | elif self.mode == "lstm": 234 | x_tmp, (h_n, c_n) = self.lstm(x_tmp.view(1, -1, 512)) # 输入 lstm 需要加上 batch 这个维度 235 | x_tmp = x_tmp[:,-1,:] # 只取最后一层的输出 # (1,1,512) 236 | x_tmp = torch.squeeze(x_tmp) # 得到 (512) 237 | else: 238 | pass 239 | 240 | # print(x[sample].shape, x_tmp.shape) 241 | 242 | x_list.append(x_tmp) 243 | 244 | x = torch.stack(x_list) # 拼接,(batch,512) 245 | return x 246 | 247 | class SVEnd2end(nn.Module): 248 | def __init__(self, mode='mean'): 249 | super(SVEnd2end, self).__init__() 250 | self.backbone = SVEnd2endBlock(mode) 251 | 252 | self.fc = nn.Sequential( 253 | # nn.BatchNorm1d(512), 254 | nn.ReLU(), 255 | nn.Linear(512, 2) 256 | ) 257 | 258 | def forward(self, x): 259 | x = self.backbone(x) 260 | x = self.fc(x) 261 | 262 | return x 263 | 264 | class SVFeature(nn.Module): 265 | def __init__(self, mode='mean'): 266 | super(SVFeature, self).__init__() 267 | self.backbone = SVFeatureBlock(mode) 268 | 269 | self.fc = nn.Sequential( 270 | # nn.BatchNorm1d(512), 271 | nn.ReLU(), 272 | nn.Linear(512, 2) 273 | ) 274 | 275 | def forward(self, x): 276 | x = self.backbone(x) 277 | x = self.fc(x) 278 | 279 | return x 280 | 281 | ################################################################################ 282 | ############################### Remote Part #################################### 283 | ################################################################################ 284 | 285 | class RemoteNet(nn.Module): 286 | def __init__(self, ratio=0.5, model_name='densenet'): 287 | super(RemoteNet, self).__init__() 288 | self.model_name = model_name 289 | if model_name == "densenet": 290 | self.backbone = DensenetEncoder() 291 | self.fc = nn.Sequential( 292 | nn.Linear(1024,512), 293 | nn.ReLU(True), 294 | nn.Dropout(ratio), 295 | nn.Linear(512, 2) 296 | ) 297 | else: 298 | self.backbone = models.resnet18(pretrained=False) 299 | #保存feature部分 去掉fc层 300 | self.backbone = nn.Sequential(*(list(self.backbone.children())[:-1])) 301 | self.fc = nn.Sequential( 302 | nn.Linear(512,256), 303 | nn.ReLU(True), 304 | nn.Dropout(ratio), 305 | nn.Linear(256, 2) 306 | ) 307 | 308 | def forward(self,x): 309 | x = self.backbone(x) 310 | x = torch.squeeze(x) 311 | if self.model_name == "densenet": 312 | x = x.view(-1, 1024) 313 | else: 314 | x = x.view(-1, 512) 315 | x = self.fc(x) 316 | return x 317 | 318 | ################################################################################ 319 | ############################### Remote Part #################################### 320 | ################################################################################ 321 | 322 | ################################################################################ 323 | ############################### Multi Part #################################### 324 | ################################################################################ 325 | 326 | class MultiFeature1SV(nn.Module): 327 | def __init__(self): 328 | super(MultiFeature1SV, self).__init__() 329 | # Remote branch 330 | model1 = models.resnet18(num_classes=2) 331 | self.remote_backbone = torch.nn.Sequential(*(list(model1.children())[:-1])) # 提取512维特征 332 | 333 | # Taxi branch 334 | self.taxi_backbone = LSTMFCNBlock(time_steps=340, num_variables=2) 335 | 336 | self.fc = nn.Sequential( 337 | # nn.BatchNorm1d(512), 338 | nn.ReLU(), 339 | nn.Linear(512 + 512 + 512, 2) 340 | ) 341 | 342 | def forward(self, image, sv, taxi): 343 | # Remote branch 344 | x1 = self.remote_backbone(image) # 得到 (batch, 512) 345 | x1 = x1.view(-1, 512) 346 | 347 | # SV branch 348 | x2 = sv 349 | x2 = x2.view(-1, 512) 350 | 351 | # taxi branch 352 | x3 = self.taxi_backbone(taxi) 353 | 354 | x = torch.cat((x1, x2, x3), dim=1) 355 | x = self.fc(x) 356 | return x 357 | 358 | class MultiFeature(nn.Module): 359 | def __init__(self, mode='mean'): 360 | super(MultiFeature, self).__init__() 361 | # Remote branch 362 | model1 = models.resnet18(num_classes=2) 363 | self.remote_backbone = torch.nn.Sequential(*(list(model1.children())[:-1])) # 提取512维特征 364 | 365 | # SV branch 366 | self.sv_backbone = SVFeatureBlock(mode) 367 | 368 | # Taxi branch 369 | self.taxi_backbone = LSTMFCNBlock(time_steps=340, num_variables=2) 370 | 371 | self.fc = nn.Sequential( 372 | # nn.BatchNorm1d(512), 373 | nn.ReLU(), 374 | nn.Linear(512 + 512 + 512, 2) 375 | ) 376 | 377 | def forward(self, image, sv, taxi): 378 | # Remote branch 379 | x1 = self.remote_backbone(image) # 得到 (batch, 512) 380 | x1 = x1.view(-1, 512) 381 | 382 | # SV branch 383 | x2 = self.sv_backbone(sv) 384 | 385 | # taxi branch 386 | x3 = self.taxi_backbone(taxi) 387 | 388 | x = torch.cat((x1, x2, x3), dim=1) 389 | x = self.fc(x) 390 | return x 391 | 392 | 393 | ################################################################################ 394 | ############################### Multi Part #################################### 395 | ################################################################################ 396 | 397 | ################################################################################ 398 | ########################## two modal Part ##################################### 399 | ################################################################################ 400 | 401 | class TwoFeature(nn.Module): 402 | def __init__(self, mode='mean', modal=["remote", "sv"]): 403 | super(TwoFeature, self).__init__() 404 | 405 | self.modal = modal 406 | 407 | # Remote branch 408 | model1 = models.resnet18(num_classes=2) 409 | self.remote_backbone = torch.nn.Sequential(*(list(model1.children())[:-1])) # 提取512维特征 410 | 411 | # SV branch 412 | self.sv_backbone = SVFeatureBlock(mode) 413 | 414 | # Taxi branch 415 | self.taxi_backbone = LSTMFCNBlock(time_steps=340, num_variables=2) 416 | 417 | self.fc = nn.Sequential( 418 | # nn.BatchNorm1d(512), 419 | nn.ReLU(), 420 | nn.Linear(512 + 512, 2) 421 | ) 422 | 423 | def forward(self, f1, f2): 424 | if "remote" in self.modal: 425 | # Remote branch 426 | x1 = self.remote_backbone(f1) # 得到 (batch, 512) 427 | x1 = x1.view(-1, 512) 428 | 429 | if "sv" in self.modal: 430 | # SV branch 431 | x2 = self.sv_backbone(f2) 432 | else: 433 | # taxi branch 434 | x2 = self.taxi_backbone(f2) 435 | else: 436 | # SV branch 437 | x1 = self.sv_backbone(f1) 438 | 439 | # taxi branch 440 | x2 = self.taxi_backbone(f2) 441 | 442 | x = torch.cat((x1, x2), dim=1) 443 | x = self.fc(x) 444 | return x -------------------------------------------------------------------------------- /modules/trainUtils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # from utils.utils import * 4 | # from utils.distributed_utils import reduce_value, is_main_process 5 | 6 | import time 7 | import torch 8 | import numpy as np 9 | from tqdm import tqdm 10 | from sklearn.metrics import confusion_matrix, f1_score, cohen_kappa_score 11 | 12 | class AverageMeter(object): 13 | """Computes and stores the average and current value""" 14 | def __init__(self, name, fmt=':f'): 15 | self.name = name 16 | self.fmt = fmt 17 | self.reset() 18 | 19 | def reset(self): 20 | self.val = 0 21 | self.avg = 0 22 | self.sum = 0 23 | self.count = 0 24 | 25 | def update(self, val, n=1): 26 | if not np.isnan(val): 27 | self.val = val 28 | self.sum += val * n 29 | self.count += n 30 | self.avg = self.sum / self.count 31 | 32 | def cal_metrics(labels, predicts): 33 | # 计算每个batch的混淆矩阵 34 | cm = np.array(confusion_matrix(labels, predicts, labels=[0, 1])) 35 | # 计算1类的召回率 36 | if 1 in labels: 37 | if cm.shape[0] > 1: 38 | recall_rate = round(cm[1,1] / (cm[1,0] + cm[1,1]), 5) 39 | else: 40 | recall_rate = 1.00 41 | else: 42 | recall_rate = np.nan 43 | 44 | # 计算 kappa 值 45 | kappa = cohen_kappa_score(labels, predicts) 46 | # 计算 F1 值 47 | f1 = f1_score(labels, predicts) 48 | return recall_rate, kappa, f1, cm 49 | 50 | def train_one_epoch_1m(model, criterion, optimizer, dataloader, device): 51 | batch_time = AverageMeter('Time', ':6.3f') 52 | data_time = AverageMeter('Data', ':6.3f') 53 | losses = AverageMeter('loss', ':.4e') 54 | accs = AverageMeter('acc', ':.4e') 55 | recalls = AverageMeter('recall_rate', ':.4e') 56 | f1s = AverageMeter('f1', ':.4e') 57 | kappas = AverageMeter('kappa', ':.4e') 58 | 59 | # 确保模型在训练前是处于training的模式 60 | model.train() 61 | 62 | # 这些列表用来记录训练中的信息 63 | end = time.time() 64 | for features, labels in tqdm(dataloader, desc="Train"): 65 | # measure data loading time 66 | data_time.update(time.time() - end) 67 | 68 | # 前向传播数据(确保数据和模型都在同一个设备上) 69 | logits = model(features.to(device)) 70 | # 计算交叉熵损失. 71 | # 计算交叉熵损失前不需要使用softmax,因为它是自动完成的 72 | loss = criterion(logits, labels.to(device)) 73 | 74 | # 更新各项指标 75 | accs.update((logits.argmax(dim=-1) == labels.to(device)).float().mean().cpu().item()) 76 | losses.update(loss.cpu().item()) 77 | recall_rate, kappa, f1, cm = cal_metrics(labels.cpu(), logits.argmax(dim=-1).cpu()) 78 | recalls.update(recall_rate) 79 | f1s.update(f1) 80 | kappas.update(kappa) 81 | 82 | optimizer.zero_grad() 83 | loss.backward() 84 | optimizer.step() 85 | 86 | # measure elapsed time 87 | batch_time.update(time.time() - end) 88 | end = time.time() 89 | 90 | # 计算整个epoch的训练集平均loss和accuracy 91 | metrics = { 92 | losses.name: losses.avg, 93 | accs.name: accs.avg, 94 | recalls.name: recalls.avg, 95 | f1s.name: f1s.avg, 96 | kappas.name: kappas.avg 97 | 98 | } 99 | print(f"Train batch time: {batch_time.avg:.4f}, Train data time: {data_time.avg:.4f}") 100 | print(metrics) 101 | 102 | return metrics 103 | 104 | 105 | def validate_1m(model, criterion, dataloader, device): 106 | batch_time = AverageMeter('Time', ':6.3f') 107 | data_time = AverageMeter('Data', ':6.3f') 108 | losses = AverageMeter('loss', ':.4e') 109 | accs = AverageMeter('acc', ':.4e') 110 | recalls = AverageMeter('recall_rate', ':.4e') 111 | f1s = AverageMeter('f1', ':.4e') 112 | kappas = AverageMeter('kappa', ':.4e') 113 | # 确保模型现在处于验证的模式,这样模型中的一些模块比如dropout会被禁用 114 | model.eval() 115 | # model.cuda() 116 | # 这些列表用来记录训练中的信息 117 | 118 | # 验证中不需要计算梯度 119 | # 使用 torch.no_grad() 加速前向传播 120 | end = time.time() 121 | with torch.no_grad(): 122 | for features, labels in tqdm(dataloader, desc="Val"): 123 | # measure data loading time 124 | data_time.update(time.time() - end) 125 | 126 | logits = model(features.to(device)) 127 | # loss还是可以算的(但是不算梯度) 128 | loss = criterion(logits, labels.to(device)) 129 | 130 | # 更新各项指标 131 | accs.update((logits.argmax(dim=-1) == labels.to(device)).float().mean().cpu().item()) 132 | losses.update(loss.cpu().item()) 133 | recall_rate, kappa, f1, cm = cal_metrics(labels.cpu(), logits.argmax(dim=-1).cpu()) 134 | recalls.update(recall_rate) 135 | f1s.update(f1) 136 | kappas.update(kappa) 137 | print(cm) 138 | 139 | # measure elapsed time 140 | batch_time.update(time.time() - end) 141 | end = time.time() 142 | 143 | # 计算整个epoch的训练集平均loss和accuracy 144 | metrics = { 145 | losses.name: losses.avg, 146 | accs.name: accs.avg, 147 | recalls.name: recalls.avg, 148 | f1s.name: f1s.avg, 149 | kappas.name: kappas.avg 150 | 151 | } 152 | 153 | print(f"Val batch time: {batch_time.avg:.4f}, Val data time: {data_time.avg:.4f}") 154 | print(metrics) 155 | 156 | return metrics 157 | 158 | def train_one_epoch_3m(model, criterion, optimizer, dataloader, device): 159 | batch_time = AverageMeter('Time', ':6.3f') 160 | data_time = AverageMeter('Data', ':6.3f') 161 | losses = AverageMeter('loss', ':.4e') 162 | accs = AverageMeter('acc', ':.4e') 163 | recalls = AverageMeter('recall_rate', ':.4e') 164 | f1s = AverageMeter('f1', ':.4e') 165 | kappas = AverageMeter('kappa', ':.4e') 166 | 167 | # 确保模型在训练前是处于training的模式 168 | model.train() 169 | 170 | # 这些列表用来记录训练中的信息 171 | end = time.time() 172 | for images, sv_attrs, taxi_attrs, labels in tqdm(dataloader, desc="Train"): 173 | # measure data loading time 174 | data_time.update(time.time() - end) 175 | 176 | logits = model(images.to(device), sv_attrs.to(device), taxi_attrs.to(device)) 177 | 178 | # 计算交叉熵损失. 179 | # 计算交叉熵损失前不需要使用softmax,因为它是自动完成的 180 | loss = criterion(logits, labels.to(device)) 181 | 182 | # 更新各项指标 183 | accs.update((logits.argmax(dim=-1) == labels.to(device)).float().mean().cpu().item()) 184 | losses.update(loss.cpu().item()) 185 | recall_rate, kappa, f1, cm = cal_metrics(labels.cpu(), logits.argmax(dim=-1).cpu()) 186 | recalls.update(recall_rate) 187 | f1s.update(f1) 188 | kappas.update(kappa) 189 | 190 | optimizer.zero_grad() 191 | loss.backward() 192 | optimizer.step() 193 | 194 | # measure elapsed time 195 | batch_time.update(time.time() - end) 196 | end = time.time() 197 | 198 | # 计算整个epoch的训练集平均loss和accuracy 199 | metrics = { 200 | losses.name: losses.avg, 201 | accs.name: accs.avg, 202 | recalls.name: recalls.avg, 203 | f1s.name: f1s.avg, 204 | kappas.name: kappas.avg 205 | 206 | } 207 | print(f"Train batch time: {batch_time.avg:.4f}, Train data time: {data_time.avg:.4f}") 208 | print(metrics) 209 | 210 | return metrics 211 | 212 | 213 | def validate_3m(model, criterion, dataloader, device): 214 | batch_time = AverageMeter('Time', ':6.3f') 215 | data_time = AverageMeter('Data', ':6.3f') 216 | losses = AverageMeter('loss', ':.4e') 217 | accs = AverageMeter('acc', ':.4e') 218 | recalls = AverageMeter('recall_rate', ':.4e') 219 | f1s = AverageMeter('f1', ':.4e') 220 | kappas = AverageMeter('kappa', ':.4e') 221 | # 确保模型现在处于验证的模式,这样模型中的一些模块比如dropout会被禁用 222 | model.eval() 223 | # model.cuda() 224 | # 这些列表用来记录训练中的信息 225 | 226 | # 验证中不需要计算梯度 227 | # 使用 torch.no_grad() 加速前向传播 228 | end = time.time() 229 | with torch.no_grad(): 230 | for images, sv_attrs, taxi_attrs, labels in tqdm(dataloader, desc="Val"): 231 | # measure data loading time 232 | data_time.update(time.time() - end) 233 | 234 | logits = model(images.to(device), sv_attrs.to(device), taxi_attrs.to(device)) 235 | # loss还是可以算的(但是不算梯度) 236 | loss = criterion(logits, labels.to(device)) 237 | 238 | # 更新各项指标 239 | accs.update((logits.argmax(dim=-1) == labels.to(device)).float().mean().cpu().item()) 240 | losses.update(loss.cpu().item()) 241 | recall_rate, kappa, f1, cm = cal_metrics(labels.cpu(), logits.argmax(dim=-1).cpu()) 242 | recalls.update(recall_rate) 243 | f1s.update(f1) 244 | kappas.update(kappa) 245 | print(cm) 246 | 247 | # measure elapsed time 248 | batch_time.update(time.time() - end) 249 | end = time.time() 250 | 251 | # 计算整个epoch的训练集平均loss和accuracy 252 | metrics = { 253 | losses.name: losses.avg, 254 | accs.name: accs.avg, 255 | recalls.name: recalls.avg, 256 | f1s.name: f1s.avg, 257 | kappas.name: kappas.avg 258 | 259 | } 260 | 261 | print(f"Val batch time: {batch_time.avg:.4f}, Val data time: {data_time.avg:.4f}") 262 | print(metrics) 263 | 264 | return metrics 265 | 266 | def train_one_epoch_2m(model, criterion, optimizer, dataloader, device): 267 | batch_time = AverageMeter('Time', ':6.3f') 268 | data_time = AverageMeter('Data', ':6.3f') 269 | losses = AverageMeter('loss', ':.4e') 270 | accs = AverageMeter('acc', ':.4e') 271 | recalls = AverageMeter('recall_rate', ':.4e') 272 | f1s = AverageMeter('f1', ':.4e') 273 | kappas = AverageMeter('kappa', ':.4e') 274 | 275 | # 确保模型在训练前是处于training的模式 276 | model.train() 277 | 278 | # 这些列表用来记录训练中的信息 279 | end = time.time() 280 | for f1, f2, labels in tqdm(dataloader, desc="Train"): 281 | # measure data loading time 282 | data_time.update(time.time() - end) 283 | 284 | logits = model(f1.to(device), f2.to(device)) 285 | 286 | # 计算交叉熵损失. 287 | # 计算交叉熵损失前不需要使用softmax,因为它是自动完成的 288 | loss = criterion(logits, labels.to(device)) 289 | 290 | # 更新各项指标 291 | accs.update((logits.argmax(dim=-1) == labels.to(device)).float().mean().cpu().item()) 292 | losses.update(loss.cpu().item()) 293 | recall_rate, kappa, f1, cm = cal_metrics(labels.cpu(), logits.argmax(dim=-1).cpu()) 294 | recalls.update(recall_rate) 295 | f1s.update(f1) 296 | kappas.update(kappa) 297 | 298 | optimizer.zero_grad() 299 | loss.backward() 300 | optimizer.step() 301 | 302 | # measure elapsed time 303 | batch_time.update(time.time() - end) 304 | end = time.time() 305 | 306 | # 计算整个epoch的训练集平均loss和accuracy 307 | metrics = { 308 | losses.name: losses.avg, 309 | accs.name: accs.avg, 310 | recalls.name: recalls.avg, 311 | f1s.name: f1s.avg, 312 | kappas.name: kappas.avg 313 | 314 | } 315 | print(f"Train batch time: {batch_time.avg:.4f}, Train data time: {data_time.avg:.4f}") 316 | print(metrics) 317 | 318 | return metrics 319 | 320 | 321 | def validate_2m(model, criterion, dataloader, device): 322 | batch_time = AverageMeter('Time', ':6.3f') 323 | data_time = AverageMeter('Data', ':6.3f') 324 | losses = AverageMeter('loss', ':.4e') 325 | accs = AverageMeter('acc', ':.4e') 326 | recalls = AverageMeter('recall_rate', ':.4e') 327 | f1s = AverageMeter('f1', ':.4e') 328 | kappas = AverageMeter('kappa', ':.4e') 329 | # 确保模型现在处于验证的模式,这样模型中的一些模块比如dropout会被禁用 330 | model.eval() 331 | # model.cuda() 332 | # 这些列表用来记录训练中的信息 333 | 334 | # 验证中不需要计算梯度 335 | # 使用 torch.no_grad() 加速前向传播 336 | end = time.time() 337 | with torch.no_grad(): 338 | for f1, f2, labels in tqdm(dataloader, desc="Val"): 339 | # measure data loading time 340 | data_time.update(time.time() - end) 341 | 342 | logits = model(f1.to(device), f2.to(device)) 343 | # loss还是可以算的(但是不算梯度) 344 | loss = criterion(logits, labels.to(device)) 345 | 346 | # 更新各项指标 347 | accs.update((logits.argmax(dim=-1) == labels.to(device)).float().mean().cpu().item()) 348 | losses.update(loss.cpu().item()) 349 | recall_rate, kappa, f1, cm = cal_metrics(labels.cpu(), logits.argmax(dim=-1).cpu()) 350 | recalls.update(recall_rate) 351 | f1s.update(f1) 352 | kappas.update(kappa) 353 | print(cm) 354 | 355 | # measure elapsed time 356 | batch_time.update(time.time() - end) 357 | end = time.time() 358 | 359 | # 计算整个epoch的训练集平均loss和accuracy 360 | metrics = { 361 | losses.name: losses.avg, 362 | accs.name: accs.avg, 363 | recalls.name: recalls.avg, 364 | f1s.name: f1s.avg, 365 | kappas.name: kappas.avg 366 | 367 | } 368 | 369 | print(f"Val batch time: {batch_time.avg:.4f}, Val data time: {data_time.avg:.4f}") 370 | print(metrics) 371 | 372 | return metrics -------------------------------------------------------------------------------- /modules/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import itertools 4 | import torch 5 | import torch.nn as nn 6 | from torch.nn.utils.rnn import pad_sequence 7 | import os, shutil 8 | 9 | 10 | 11 | def plot_confusion_matrix(cm, classes, normalize=False, title='State transition matrix', cmap=plt.cm.Blues): 12 | 13 | plt.figure() 14 | plt.imshow(cm, interpolation='nearest', cmap=cmap) 15 | plt.title(title) 16 | plt.colorbar() 17 | tick_marks = np.arange(len(classes)) 18 | plt.xticks(tick_marks, classes, rotation=90) 19 | plt.yticks(tick_marks, classes) 20 | 21 | plt.axis("equal") 22 | 23 | ax = plt.gca() 24 | left, right = plt.xlim() 25 | ax.spines['left'].set_position(('data', left)) 26 | ax.spines['right'].set_position(('data', right)) 27 | for edge_i in ['top', 'bottom', 'right', 'left']: 28 | ax.spines[edge_i].set_edgecolor("white") 29 | 30 | 31 | thresh = cm.max() / 2. 32 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 33 | num = '{:.2f}'.format(cm[i, j]) if normalize else int(cm[i, j]) 34 | plt.text(j, i, num, 35 | verticalalignment='center', 36 | horizontalalignment="center", 37 | color="white" if num > thresh else "black") 38 | 39 | plt.ylabel('Self patt') 40 | plt.xlabel('Transition patt') 41 | 42 | plt.tight_layout() 43 | # plt.savefig('res/method_2.png', transparent=True, dpi=800) 44 | 45 | plt.show() 46 | 47 | 48 | 49 | def check_folder(folder, mode="create"): 50 | if os.path.exists(folder): 51 | shutil.rmtree(folder) 52 | print(folder, "removed!") 53 | if mode != "del": 54 | os.makedirs(folder) 55 | 56 | class Logger(): 57 | def __init__(self, path): 58 | self.path = path 59 | 60 | def log(self, *messages): 61 | with open(self.path, "a") as f: 62 | for message in messages: 63 | f.write(str(message)) 64 | f.write(" ") 65 | f.write("\n") 66 | 67 | def collate_fn_sv(batch): 68 | # 这里的data是一个list, list的元素是元组,元组构成为(self.data, self.label) 69 | # collate_fn的作用是把[(data, label),(data, label)...]转化成([data, data...],[label,label...]) 70 | # 假设self.data的一个data的shape为(channels, length), 每一个channel的length相等,data[索引到数据index][索引到data或者label][索引到channel] 71 | sv_data_list, label_list = zip(*batch) 72 | 73 | # 每一个sample [n, 3, 224, 224] 做padding都转成 [m, 3, 224, 224],其中m是这个batch最长的序列 74 | # 最后得到的是 [batch_size, m, 3, 224, 224] (transpose 之后 75 | sv_data_tensor = pad_sequence(sv_data_list, batch_first=True) 76 | # sv_data_tensor = sv_data_tensor.transpose(0,1) 77 | 78 | label_tensor = torch.LongTensor(label_list) 79 | data_copy = (sv_data_tensor, label_tensor) 80 | return data_copy 81 | 82 | def collate_fn_end2end(batch): 83 | # 这里的data是一个list, list的元素是元组,元组构成为(self.data, self.label) 84 | # collate_fn的作用是把[(data, label),(data, label)...]转化成([data, data...],[label,label...]) 85 | # 假设self.data的一个data的shape为(channels, length), 每一个channel的length相等,data[索引到数据index][索引到data或者label][索引到channel] 86 | remote_data_list, sv_data_list, taxi_data_list, label_list = zip(*batch) 87 | 88 | # 每一个sample [n, 3, 224, 224] 做padding都转成 [m, 3, 224, 224],其中m是这个batch最长的序列 89 | # 最后得到的是 [batch_size, m, 3, 224, 224] (transpose 之后 90 | sv_data_tensor = pad_sequence(sv_data_list) 91 | sv_data_tensor = sv_data_tensor.transpose(0,1) 92 | 93 | remote_data_tensor = torch.stack(remote_data_list) 94 | taxi_data_tensor = torch.stack(taxi_data_list) 95 | label_tensor = torch.LongTensor(label_list) 96 | return remote_data_tensor, sv_data_tensor, taxi_data_tensor, label_tensor 97 | 98 | def collate_fn_end2end2(batch): 99 | # 这里的data是一个list, list的元素是元组,元组构成为(self.data, self.label) 100 | # collate_fn的作用是把[(data, label),(data, label)...]转化成([data, data...],[label,label...]) 101 | # 假设self.data的一个data的shape为(channels, length), 每一个channel的length相等,data[索引到数据index][索引到data或者label][索引到channel] 102 | f1_list, f2_list, label_list = zip(*batch) 103 | 104 | if f1_list[0].shape[0]==3: 105 | f1_tensor = torch.stack(f1_list) 106 | 107 | if f2_list[0].shape[0] > 3: 108 | 109 | # 每一个sample [n, 3, 224, 224] 做padding都转成 [m, 3, 224, 224],其中m是这个batch最长的序列 110 | # 最后得到的是 [batch_size, m, 3, 224, 224] (transpose 之后 111 | sv_data_tensor = pad_sequence(f2_list) 112 | f2_tensor = sv_data_tensor.transpose(0,1) 113 | else: 114 | f2_tensor = torch.stack(f2_list) 115 | else: 116 | sv_data_tensor = pad_sequence(f1_list) 117 | f1_tensor = sv_data_tensor.transpose(0,1) 118 | 119 | f2_tensor = torch.stack(f2_list) 120 | 121 | label_tensor = torch.LongTensor(label_list) 122 | return f1_tensor, f2_tensor, label_tensor 123 | 124 | -------------------------------------------------------------------------------- /preprocess.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.filterwarnings("ignore") 3 | import os 4 | import geopandas as gpd 5 | import numpy as np 6 | from shapely.geometry import Polygon 7 | 8 | ################################################### 9 | ############## Config part ####################### 10 | ################################################### 11 | # Parameters 12 | grid_size = 500 13 | valid_ratio = 0.2 14 | boundary_path = r".\data\raw\sz_qu.json" 15 | ground_truth_path = r".\data\raw\urbanVillage_sz\urbanVillage_sz_landtruth_2018.shp" 16 | rs_tiff_path = r".\data\raw\shenzhen\shenzhen\shenzhen_大图\L19\shenzhen.tif" 17 | taxi_db_path = r".\data\taxi_valid2.db" 18 | sv_xy_info_path = r".\data\raw\shenzhen_all_50m_xy.csv" 19 | sv_path_path = r".\data\raw\shenzhen_all_50m_path.csv" 20 | sv_features_path = r".\data\sv_all_feature\*.csv" 21 | 22 | # Outputs 23 | output_folder = f"Vision-LSTM/data/grids{grid_size}" 24 | ## output path for grids geojson 25 | grids_geojson_path = os.path.join(output_folder, "grids.geojson") 26 | ## output path for grids labels csv 27 | grids_label_path = os.path.join(output_folder, "grids_label.csv") 28 | ## output folder for remote sensing tiles 29 | rs_tiles_folder = os.path.join(output_folder, "rs_tiles") 30 | # output path for taxi data 31 | taxi_joined_path = os.path.join(output_folder, "taxi_valid.csv") 32 | taxi_ts_path = os.path.join(output_folder, "taxi_ts340.csv") 33 | # output path for street view data 34 | valid_sv_path = os.path.join(output_folder, "sv_path_id_gid.csv") 35 | valid_feature_path = os.path.join(output_folder, "sv_features.pkl") 36 | # output path for model data 37 | model_data_path = os.path.join(output_folder, "model_data.pkl") 38 | model_data_geo_path = os.path.join(output_folder, "model_data_geo.shp") 39 | 40 | ################################################### 41 | #################### Steps ####################### 42 | ################################################### 43 | 44 | # Step 1: Generate grids 45 | from preprocessing.gen_grids import gen_grids 46 | gen_grids(grid_size, boundary_path, ground_truth_path, grids_geojson_path, grids_label_path) 47 | print("Step 1 completed!") 48 | 49 | # Step 2: Prepare remote sensing data - Split tiles from huge GeoTiff file based on grids 50 | from preprocessing.split_geotiff import split_image_by_shapefile 51 | split_image_by_shapefile(rs_tiff_path, grids_geojson_path, rs_tiles_folder) 52 | print("Step 2 completed!") 53 | 54 | # Step 3: Prepare taxi data - Joined to grids and calculate the time series 55 | from preprocessing.taxi_process import prepare_taxi 56 | prepare_taxi(taxi_db_path, grids_geojson_path, taxi_joined_path, taxi_ts_path) 57 | print("Step 3 completed!") 58 | 59 | # ## Step 4: Prepare street view data - Joined to grids 60 | from preprocessing.sv_process import sv_join, features_join 61 | sv_join(sv_xy_info_path, sv_path_path, grids_geojson_path, valid_sv_path) 62 | features_join(valid_sv_path, sv_features_path, valid_feature_path) 63 | print("Step 4 completed!") 64 | 65 | # ## Step 5: Build datasets 66 | from preprocessing.build_dataset import build_dataset 67 | build_dataset(valid_feature_path, taxi_ts_path, rs_tiles_folder, grids_label_path, valid_ratio, grids_geojson_path, model_data_path, model_data_geo_path) 68 | print("Step 5 completed!") -------------------------------------------------------------------------------- /preprocessing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yingjinghuang/Vision-LSTM/430cd96f74b7fba855903a40838c43ea6992bdc3/preprocessing/__init__.py -------------------------------------------------------------------------------- /preprocessing/build_dataset.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import geopandas as gpd 3 | import numpy as np 4 | import os 5 | 6 | def build_dataset(valid_feature_path, taxi_ts_path, rs_tiles_folder, grids_label_path, valid_ratio, grids_geojson_path, model_data_path, model_data_geo_path): 7 | sv = pd.read_pickle(valid_feature_path) 8 | taxi = pd.read_csv(taxi_ts_path, dtype = {"GID": str}, index_col=0) 9 | taxi = taxi.reset_index() 10 | labels = pd.read_csv(grids_label_path, dtype = {'GID': str}, index_col=0) 11 | intersect_set = list(set(sv["GID"]).intersection(set(taxi["GID"]))) 12 | for GID in intersect_set: 13 | if not os.path.exists(os.path.join(rs_tiles_folder, GID + ".tif")): 14 | intersect_set.remove(GID) 15 | print("Intersect size: ", len(intersect_set)) 16 | 17 | sv_valid = sv[sv["GID"].isin(list(intersect_set))] 18 | taxi_valid = taxi[taxi["GID"].isin(list(intersect_set))] 19 | labels_valid = labels[labels["GID"].isin(list(intersect_set))] 20 | 21 | data = pd.merge(sv_valid, taxi_valid, on = "GID") 22 | data = pd.merge(data, labels_valid[["GID", "label"]], on = "GID") 23 | 24 | val_index = data.sample(frac=valid_ratio).index.tolist() 25 | data["mode"] = "train" 26 | data.loc[val_index, "mode"] = "val" 27 | print(data[["label", "mode"]].value_counts()) 28 | data.to_pickle(model_data_path) 29 | 30 | grids = gpd.read_file(grids_geojson_path) 31 | grids_valid = grids[grids["GID"].isin(list(intersect_set))] 32 | grids_valid = pd.merge(grids_valid, data[["GID", "mode"]]) 33 | grids_valid = gpd.GeoDataFrame(grids_valid, geometry="geometry") 34 | grids_valid.to_file(model_data_geo_path) -------------------------------------------------------------------------------- /preprocessing/gen_grids.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import warnings 3 | import numpy as np 4 | from shapely.geometry import Polygon 5 | 6 | warnings.filterwarnings("ignore") 7 | 8 | def cal_ratio(row, grid_size, uv_boundary): 9 | if row["intersects"] == 1: 10 | return gpd.clip(gpd.GeoSeries(uv_boundary), row.geometry)[0].area / pow(grid_size, 2) 11 | else: 12 | return 0 13 | 14 | def gen_grids(grid_size, boundary_path, ground_truth_path, grids_geojson_path, grids_label_path, threshold=0): 15 | 16 | boundary = gpd.read_file(boundary_path) 17 | 18 | boundary = boundary.to_crs(2381) 19 | 20 | xmin, ymin, xmax, ymax = boundary.total_bounds 21 | 22 | x_range = int((xmax - xmin) / grid_size) + 1 23 | y_range = int((ymax - ymin) / grid_size) + 1 24 | 25 | polygons = [] 26 | for i in range(x_range): 27 | for j in range(y_range): 28 | xmin_grid = xmin + i * grid_size 29 | xmax_grid = xmin + (i + 1) * grid_size 30 | ymin_grid = ymin + j * grid_size 31 | ymax_grid = ymin + (j + 1) * grid_size 32 | polygons.append(Polygon([(xmin_grid, ymin_grid), 33 | (xmax_grid, ymin_grid), 34 | (xmax_grid, ymax_grid), 35 | (xmin_grid, ymax_grid)])) 36 | 37 | grids = gpd.GeoDataFrame({"geometry": polygons}, crs=2381) 38 | 39 | # 使用地块裁剪有效格网 40 | grids_valid = grids[grids.geometry.intersects(boundary.unary_union)] 41 | 42 | # 重设 GID 43 | grids_valid["GID"] = ["G"+str(i).zfill(len(str(grids_valid.index[-1]))) for i in range(grids_valid.shape[0])] 44 | grids_valid.reset_index(drop=True, inplace=True) 45 | 46 | uv = gpd.read_file(ground_truth_path) 47 | uv = uv.to_crs(grids_valid.crs) 48 | 49 | uv_boundary = uv["geometry"].unary_union 50 | 51 | # 先筛选出有交集的grid 52 | grids_valid["intersects"] = grids_valid.intersects(uv_boundary).map({True: 1, False: 0}) 53 | 54 | # 计算有城中村的格网,城中村的面积占格网面积的比例 55 | grids_valid["ratio"] = grids_valid.apply(cal_ratio, args=(grid_size, uv_boundary,), axis=1) 56 | grids_valid["label"] = grids_valid["ratio"].apply(lambda x: 1 if x > threshold else 0) 57 | 58 | # 保存 59 | grids_valid = grids_valid.to_crs(4326) 60 | grids_valid.to_file(grids_geojson_path, driver="GeoJSON") 61 | grids_valid[["GID", "ratio", "label"]].to_csv(grids_label_path) 62 | 63 | if __name__=="__main__": 64 | boundary_path = "./data/raw/sz_qu.json" 65 | ground_truth_path = "./data/raw/urbanVillage_sz/urbanVillage_sz_landtruth_2018.shp" 66 | grid_size = 500 67 | 68 | # output path for grids geojson 69 | grids_geojson_path = "./data/grids_all_500m/grids_all_500m.geojson" 70 | # output path for grids labels csv 71 | grids_label_path = "./data/grids_all_500m/grids_label_all_500m.csv" 72 | 73 | gen_grids(grid_size, boundary_path, ground_truth_path, grids_geojson_path, grids_label_path) 74 | -------------------------------------------------------------------------------- /preprocessing/split_geotiff.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import rasterio as rio 3 | import rasterio.mask as mask 4 | from tqdm import tqdm 5 | import os 6 | 7 | def split_image_by_shapefile(image_path, shapefile_path, output_dir): 8 | """ 9 | Split a GeoTiff image into multiple smaller PNG images based on each feature in a shapefile. 10 | 11 | Parameters: 12 | image_path (str): Path to the image file. 13 | shapefile_path (str): Path to the shapefile. 14 | output_dir (str): Path to the directory to save the output images. 15 | 16 | Returns: 17 | None 18 | """ 19 | # Open raster 20 | input_raster = rio.open(image_path) 21 | 22 | if not os.path.exists(output_dir): 23 | os.makedirs(output_dir) 24 | 25 | 26 | # Open the shapefile using GeoPandas 27 | # gdf = pd.read_pickle(shapefile_path) 28 | # gdf = gpd.GeoDataFrame(gdf, geometry="geometry") 29 | gdf = gpd.read_file(shapefile_path) 30 | gdf = gdf.to_crs(input_raster.crs) 31 | # print(gdf) 32 | # gdf = gdf.set_index("panoid") 33 | 34 | for i in tqdm(range(gdf.shape[0])): 35 | # if True: 36 | 37 | try: 38 | # Use shapefile geometry to clip raster 39 | clip_res = mask.mask(input_raster, [gdf.iloc[i].geometry.__geo_interface__], all_touched=False, crop=True, nodata=0) 40 | # Save census tiff 41 | out_meta = input_raster.meta.copy() 42 | # 更新数据参数,“crs”参数需要结合自己的实际需求,设定相应的坐标参数 43 | out_meta.update({"driver": "GTiff", 44 | "height": clip_res[0].shape[1], 45 | "width": clip_res[0].shape[2], 46 | "transform": clip_res[1] 47 | } 48 | ) 49 | # 保存文件 50 | with rio.open(os.path.join(output_dir, "{}.tif".format(gdf.iloc[i].GID)), "w", **out_meta) as dest: 51 | dest.write(clip_res[0]) 52 | # print("Blcok group {} insects".format(i)) 53 | except Exception as e: 54 | print(e) 55 | # pass 56 | # print("Blcok group {} not insects".format(i)) -------------------------------------------------------------------------------- /preprocessing/sv_process.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import geopandas as gpd 3 | import glob, os 4 | import numpy as np 5 | 6 | def sv_join(sv_xy_info_path, sv_path_path, grids_path, valid_sv_path): 7 | 8 | xy_info = pd.read_csv(sv_xy_info_path, index_col=0) 9 | data_geo = gpd.GeoDataFrame(xy_info, geometry=gpd.points_from_xy(xy_info["lon_wgs"], xy_info["lat_wgs"]), crs=4326) 10 | grids = gpd.read_file(grids_path) 11 | data_geo = data_geo.to_crs(grids.crs) 12 | # 求交集 13 | sv_valid = data_geo[data_geo.intersects(grids.unary_union)] 14 | 15 | # 研究区内的街景点数 16 | print(sv_valid.shape) 17 | 18 | ## 与格网空间连接 19 | res = gpd.sjoin(sv_valid, grids[["GID", "geometry"]]) 20 | res = res.reset_index() 21 | print("Valid grid count:", len(res["GID"].unique())) 22 | 23 | sv_valid = res[["id", "lat_wgs", "lon_wgs", "pov_exp", "heading", "GID"]] 24 | 25 | ## 连接路径 26 | sv_path = pd.read_csv(sv_path_path, index_col=0) 27 | sv_path["id"] = sv_path["org_name"].str.split("_", expand=True)[0] 28 | sv_path["new_path"] = sv_path["new_path"].str.replace("Tencent_SV/city_image/guangdong/shenzhen_all_50m", "data/raw/SV/SZTSV/shenzhen_all_50m") 29 | sv_path.rename(columns={"new_path": "path"}, inplace=True) 30 | sv_path = sv_path.merge(sv_valid, on="id") 31 | 32 | sv_path[["id", "path", "GID"]].to_csv(valid_sv_path) 33 | 34 | def features_join(valid_sv_path, sv_features_path, valid_feature_path): 35 | sv = pd.read_csv(valid_sv_path, dtype = {'GID': str}, index_col=0) 36 | sv["name"] = sv["path"].apply(os.path.basename) 37 | 38 | tmp_list = [] 39 | for file in glob.glob(sv_features_path): 40 | tmp = pd.read_csv(file, index_col=0) 41 | del tmp["id"] 42 | tmp["path"] = tmp["path"].apply(os.path.basename) 43 | tmp_list.append(tmp) 44 | features = pd.concat(tmp_list) 45 | 46 | sv = pd.merge(sv, features, how="left", left_on="name", right_on="path") 47 | data_l = [] 48 | for name, group in sv.groupby("GID"): 49 | tmp_features = np.array(group[[str(x) for x in list(range(512))]]) 50 | data_l.append([name, tmp_features]) 51 | 52 | grid_features = pd.DataFrame(data_l) 53 | grid_features.columns = ["GID", "features"] 54 | grid_features.to_pickle(valid_feature_path) 55 | -------------------------------------------------------------------------------- /preprocessing/taxi_process.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import pandas as pd 3 | import numpy as np 4 | import sqlite3 5 | 6 | def prepare_taxi(db_path, grids_path, taxi_joined_path, taxi_ts_path, threshold = 50): 7 | ## 将车牌和格网 spatial join 8 | grids = gpd.read_file(grids_path) 9 | 10 | conn = sqlite3.connect(db_path) 11 | ods = pd.read_sql("select * from TAXI", conn) 12 | conn.close() 13 | print(ods.shape) 14 | 15 | del ods["ID"] 16 | ods_geo = gpd.GeoDataFrame(ods, geometry=gpd.points_from_xy(ods["lon"], ods["lat"]), crs=4326) 17 | ods_geo = ods_geo.to_crs(grids.crs) 18 | 19 | print("spatial join started") 20 | res = gpd.sjoin(ods_geo, grids, how="inner") 21 | print("spatial join end") 22 | 23 | res = res[["timestamp", "OD", "GID", "lon", "lat"]] 24 | res.to_csv(taxi_joined_path) 25 | 26 | ## 处理为时间序列 27 | res["hour"] = res["timestamp"].str[11:13] 28 | res["date"] = res["timestamp"].str[:10] 29 | 30 | taxi_time_od_data = res.groupby(["GID", "date", "hour", "OD"]).size().unstack("OD").unstack("date").unstack("hour") 31 | taxi_time_od_data = taxi_time_od_data.fillna(0) 32 | taxi_time_od_data.columns = ['_'.join(col).strip() for col in taxi_time_od_data.columns.values] 33 | # 数据仅从 2017-10-22 23点 至 2017-10-30 00点 34 | col_name = list(taxi_time_od_data.columns[23:193]) + list(taxi_time_od_data.columns[239:-23]) 35 | taxi_time_od_data = taxi_time_od_data.loc[:,col_name] 36 | 37 | # 归一化 38 | max_min_scaler = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)) 39 | taxi_time_od_data = taxi_time_od_data.apply(max_min_scaler, axis=1) 40 | 41 | taxi_time_od_data.to_csv(taxi_ts_path) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.data import DataLoader 5 | import torch.utils.tensorboard as tensorboard 6 | import pandas as pd 7 | from collections import Counter 8 | import imblearn 9 | 10 | from modules.utils import * 11 | from modules.datasets import * 12 | from modules.models import * 13 | from modules.trainUtils import * 14 | from configs import * 15 | 16 | def main(configs): 17 | print(configs) 18 | device = torch.device("cuda") 19 | 20 | os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 21 | os.environ["CUDA_VISIBLE_DEVICES"]= configs["gpu"] 22 | 23 | models_path = os.path.join(configs["weights_folder"], configs["model_name"]) 24 | tensorboard_path = os.path.join(configs["tensorboard_folder"], configs["model_name"]) 25 | logs_folder = os.path.join(configs["log_folder"], configs["model_name"]) 26 | logs_path = os.path.join(logs_folder, configs["model_name"] + ".log") 27 | 28 | check_folder(models_path) 29 | check_folder(tensorboard_path) 30 | check_folder(logs_folder) 31 | 32 | logger = Logger(logs_path) 33 | logger.log(configs) 34 | 35 | writer = tensorboard.SummaryWriter(tensorboard_path + "/") 36 | 37 | data_df = pd.read_pickle(configs["model_data_path"]) 38 | taxi_features_col = list(data_df.columns[2:-2]) 39 | # Columns: GID,features,[taxi_features],label,mode 40 | logger.log("data loaded!") 41 | logger.log(data_df[["label", "mode"]].value_counts()) 42 | 43 | # 数据集构建 44 | data_df_train = data_df[data_df["mode"] == "train"] 45 | data_df_val = data_df[data_df["mode"] == "val"] 46 | 47 | x_train = data_df_train.iloc[:, :-2] 48 | y_train = data_df_train["label"].tolist() 49 | x_val = data_df_val.iloc[:, :-2] 50 | y_val = data_df_val["label"].tolist() 51 | 52 | logger.log("data prepared done.") 53 | 54 | 55 | counter = Counter(data_df_train['label'].to_list()) 56 | # 数据重采样 57 | oversampler = imblearn.over_sampling.RandomOverSampler() 58 | x_train, y_train = oversampler.fit_resample(x_train, y_train) 59 | 60 | if configs["modality_count"] == 3: 61 | if configs["mode"] == "single": 62 | train_dataset = MultiDataset1SV(x_train["GID"].to_numpy(), x_train["features"].to_numpy(), x_train[taxi_features_col].to_numpy(), y_train, configs["rs_path"]) 63 | val_dataset = MultiDataset1SV(x_val["GID"].to_numpy(), x_val["features"].to_numpy(), x_val[taxi_features_col].to_numpy(), y_val, configs["rs_path"], mode='valid') 64 | model = MultiFeature1SV() 65 | else: 66 | train_dataset = MultiDataset(x_train["GID"].to_numpy(), x_train["features"].to_numpy(), x_train[taxi_features_col].to_numpy(), y_train, configs["rs_path"]) 67 | val_dataset = MultiDataset(x_val["GID"].to_numpy(), x_val["features"].to_numpy(), x_val[taxi_features_col].to_numpy(), y_val, configs["rs_path"], mode='valid') 68 | model = MultiFeature(mode=configs["mode"]) 69 | collate_fn = collate_fn_end2end 70 | 71 | elif configs["modality_count"] == 2: 72 | train_dataset = TwoDataset(x_train["GID"].to_numpy(), x_train["features"].to_numpy(), x_train[taxi_features_col].to_numpy(), y_train, configs["rs_path"], configs["modalities"]) 73 | val_dataset = TwoDataset(x_val["GID"].to_numpy(), x_val["features"].to_numpy(), x_val[taxi_features_col].to_numpy(), y_val, configs["rs_path"], configs["modalities"], mode='valid') 74 | if "sv" in configs["modalities"]: 75 | collate_fn = collate_fn_end2end2 76 | else: 77 | collate_fn = None 78 | model = TwoFeature(mode=configs["mode"], modal=configs["modalities"]) 79 | else: 80 | if "remote" in configs["modalities"]: 81 | train_dataset = RemoteData(x_train["GID"].to_numpy(), y_train, configs["rs_path"]) 82 | val_dataset = RemoteData(x_val["GID"].to_numpy(), y_val, configs["rs_path"], mode='valid') 83 | collate_fn = None 84 | model = RemoteNet() 85 | elif "sv" in configs["modalities"]: 86 | train_dataset = SVFeatureDataset(x_train["features"].to_numpy(), y_train) 87 | val_dataset = SVFeatureDataset(x_val["features"].to_numpy(), y_val, mode='valid') 88 | collate_fn = collate_fn_sv 89 | model = SVFeature(mode=configs["mode"]) 90 | else: 91 | train_dataset = TaxiDataset(x_train[taxi_features_col].to_numpy(), y_train) 92 | val_dataset = TaxiDataset(x_val[taxi_features_col].to_numpy(), y_val, mode='valid') 93 | collate_fn = None 94 | model = TaxiNet() 95 | 96 | logger.log("dataset prepared done.") 97 | logger.log("Train Dataset size: ", len(train_dataset), "Validation Dataset size: ", len(val_dataset)) 98 | 99 | # 定义data loader 100 | if collate_fn is None: 101 | train_loader = DataLoader( 102 | dataset=train_dataset, 103 | batch_size=configs["batch_size"], 104 | shuffle=True, 105 | num_workers=configs["workers"], 106 | pin_memory=True 107 | ) 108 | 109 | val_loader = DataLoader( 110 | dataset=val_dataset, 111 | batch_size=configs["batch_size"], 112 | shuffle=False, 113 | num_workers=configs["workers"], 114 | pin_memory=True 115 | ) 116 | else: 117 | train_loader = DataLoader( 118 | dataset=train_dataset, 119 | batch_size=configs["batch_size"], 120 | shuffle=True, 121 | num_workers=configs["workers"], 122 | pin_memory=True, 123 | collate_fn=collate_fn 124 | ) 125 | 126 | val_loader = DataLoader( 127 | dataset=val_dataset, 128 | batch_size=configs["batch_size"], 129 | shuffle=False, 130 | num_workers=configs["workers"], 131 | pin_memory=True, 132 | collate_fn=collate_fn 133 | ) 134 | 135 | print(next(iter(train_loader))) 136 | model = model.to(device) 137 | 138 | # 使用加权交叉熵 139 | criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor([1/counter[0], 1/counter[1]]).cuda()) # 加权交叉熵 140 | 141 | # 初始化一个优化器,我们可以自行调节一些超参数进行微调,比如说学习率 142 | # optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay) 143 | optimizer = torch.optim.SGD(model.parameters(), lr=configs["lr"], weight_decay=configs["wd"], momentum=0.9) 144 | # 学习率衰减 145 | t = 10 # warmup 146 | T = configs["epochs"] # epochs - 10 为 cosine rate 147 | lr_warm = configs["epochs"] 148 | n_t = 0.5 149 | # lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) /2 ) * (1 - args.lr_factor) + args.lr_factor # cosine 150 | lf = lambda epoch: (0.9 * epoch / t + 0.1) if epoch < t else 0.1 if n_t * (1 + math.cos(math.pi * (epoch - t) / (T - t)))<0.1 else n_t * (1+math.cos(math.pi*(epoch - t)/(T-t))) 151 | scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) 152 | 153 | best_loss = 500.0 154 | 155 | for epoch in range(configs["epochs"]): 156 | # ---------- Training ---------- 157 | if configs["modality_count"] == 3: 158 | train_metrics = train_one_epoch_3m(model, criterion, optimizer, train_loader, device) 159 | elif configs["modality_count"] == 2: 160 | train_metrics = train_one_epoch_2m(model, criterion, optimizer, train_loader, device) 161 | else: 162 | train_metrics = train_one_epoch_1m(model, criterion, optimizer, train_loader, device) 163 | 164 | # ---------- Validation ---------- 165 | if configs["modality_count"] == 3: 166 | val_metrics = validate_3m(model, criterion, val_loader, device) 167 | elif configs["modality_count"] == 2: 168 | val_metrics = validate_2m(model, criterion, val_loader, device) 169 | else: 170 | val_metrics = validate_1m(model, criterion, val_loader, device) 171 | 172 | 173 | scheduler.step() 174 | # ============= tensorboard ============= 175 | writer.add_scalar('Loss/Train',train_metrics["loss"], epoch) 176 | writer.add_scalar('Accuracy/Train',train_metrics["acc"], epoch) 177 | writer.add_scalar('Recall_Rate/Train',train_metrics["recall_rate"], epoch) 178 | writer.add_scalar('Kappa/Train',train_metrics["kappa"], epoch) 179 | writer.add_scalar('F1/Train',train_metrics["f1"], epoch) 180 | writer.add_scalar('Loss/Val',val_metrics["loss"], epoch) 181 | writer.add_scalar('Accuracy/Val',val_metrics["acc"], epoch) 182 | writer.add_scalar('Recall_Rate/Val',val_metrics["recall_rate"], epoch) 183 | writer.add_scalar('Kappa/Val',val_metrics["kappa"], epoch) 184 | writer.add_scalar('F1/Val',val_metrics["f1"], epoch) 185 | 186 | train_metrics_str = ", ".join([f"{k} = {v:.5f}" for k, v in train_metrics.items()]) 187 | val_metrics_str = ", ".join([f"{k} = {v:.5f}" for k, v in val_metrics.items()]) 188 | logger.log(f"[ Train | {epoch + 1:03d}/{configs['epochs']:03d} ] {train_metrics_str}\n[ Val | {epoch + 1:03d}/{configs['epochs']:03d} ] {val_metrics_str}") 189 | print(f"[ Train | {epoch + 1:03d}/{configs['epochs']:03d} ] {train_metrics_str}\n[ Val | {epoch + 1:03d}/{configs['epochs']:03d} ] {val_metrics_str}") 190 | 191 | # save model 192 | # if val_metrics["loss"] < best_loss: 193 | # best_loss = val_metrics["loss"] 194 | savepath = os.path.join(models_path, f'model_epoch{epoch+1:03d}_{val_metrics["acc"]:.3f}.pth.tar') 195 | torch.save(model.state_dict(), savepath) 196 | # logger.log(f'\n\t*** Saved checkpoint in {savepath} ***\n') 197 | 198 | writer.close() 199 | 200 | if __name__ == "__main__": 201 | main(configs) --------------------------------------------------------------------------------