├── .gitignore ├── README.md ├── datasets ├── download_datasets.py └── pf-pascal │ ├── PF-dataset-PASCAL │ └── .gitignore │ ├── download.sh │ └── image_pairs │ ├── test_pairs.csv │ ├── train_pairs.csv │ ├── val_pairs.csv │ └── val_pairs_nocoords.csv ├── dccnet_archi.jpg ├── eval_pf_pascal.py ├── eval_pf_willow.py ├── eval_tss.py ├── geotnf ├── flow.py ├── point_tnf.py └── transformation.py ├── lib ├── __init__.py ├── conv4d.py ├── dataloader.py ├── eval_util_dynamic.py ├── im_pair_dataset.py ├── modules.py ├── normalization.py ├── pf_dataset.py ├── pf_willow_dataset.py ├── plot.py ├── point_tnf_dynamic.py ├── py_util.py ├── torch_util.py ├── transformation.py └── tss_dataset.py ├── models ├── __init__.py ├── dynamic_fusion_att.py ├── loss_dynamic.py ├── model_dynamic.py ├── sce_efficient.py └── spatial_context_encoder.py ├── scripts └── train_dccnet.sh ├── train_dccnet.py └── trained_models ├── .gitignore └── download.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | **/.DS_Store 4 | *.png 5 | *.txt 6 | *.zip 7 | log/ 8 | *.pyc 9 | .idea/ 10 | *.tar 11 | *.pth.tar 12 | *.jpg 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DCCNet-Pytorch 2 | ![](dccnet_archi.jpg) 3 | 4 | This is the implementation of the paper: 5 | 6 | S. Huang, Q. Wang, S. Zhang, S. Yan, and X. He. Dynamic Context Correspondence Network for Semantic Alignment. ICCV 2019 [[arXiv](https://arxiv.org/abs/1909.03444)] 7 | 8 | using PyTorch. 9 | 10 | 11 | ## Getting started 12 | 13 | ### Environment 14 | Python 3.5.2 15 | Pytorch 0.3.1 16 | torchvision 0.2.1 17 | 18 | ### Getting the datasets 19 | 20 | The PF-Pascal dataset (used for training and evaluation) can be downloaded and unzipped by browsing to the `datasets/pf-pascal/` folder and running `download.sh`. 21 | 22 | The PF-Willow and TSS dataset (used for evaluation) can be downloaded by browsing to the `datasets/` folder and run `download_datasets.py`. The datasets will be under `datasets/proposal-flow-willow` and `datasets/tss` 23 | 24 | 25 | 26 | ### Getting the trained models 27 | 28 | The trained models trained on PF-Pascal (`best_dccnet.pth.tar`) can be dowloaded [[here](https://pan.baidu.com/s/1GESlvTqmLkUZmdidvDs_Qg)] (passwd:y42d). Put pretrained model under `trained_models/` folder for further evaluation. 29 | 30 | 31 | ## Training 32 | 33 | To train a model, run `train_dccnet.sh` under `scripts` folder to reproduce our results. 34 | 35 | 36 | ## Evaluation 37 | 38 | Evaluation for PF-Pascal and PF-Willow is implemented in the `eval_pf_pascal.py` and `eval_pf_willow.py` file respectively. You can run the evaluation in the following way: 39 | 40 | ```bash 41 | python eval_pf_pascal.py --checkpoint trained_models/best_dccnet.pth.tar 42 | ``` 43 | 44 | Evaluation for TSS is implemented in the `eval_tss.py` file. You can run the evaluation in the following way: 45 | 46 | ```bash 47 | python eval_tss.py --checkpoint trained_models/best_dccnet.pth.tar 48 | ``` 49 | 50 | This will generate a series of flow files in the `datasets/dccnet_results` folder that then need to be fed to the TSS evaluation Matlab code. 51 | In order to run the Matlab evaluation, you need to clone the [TSS repo](https://github.com/t-taniai/TSS_CVPR2016_EvaluationKit) and follow the corresponding instructions. 52 | 53 | ## Acknwoledgement 54 | 55 | We borrow tons of code from [NC-Net](https://github.com/ignacio-rocco/ncnet) and [WeakAlign](https://github.com/ignacio-rocco/weakalign). 56 | 57 | ## BibTeX 58 | 59 | If you use this code in your project, please cite our paper: 60 | ```` 61 | @inproceedings{huang2019dynamic, 62 | title={Dynamic Context Correspondence Network for Semantic Alignment}, 63 | author={Huang, Shuaiyi and Wang, Qiuyue and Zhang, Songyang and Yan, Shipeng and He, Xuming}, 64 | booktitle={Proceedings of the IEEE International Conference on Computer Vision}, 65 | pages={2010--2019}, 66 | year={2019} 67 | } 68 | ```` 69 | 70 | 71 | -------------------------------------------------------------------------------- /datasets/download_datasets.py: -------------------------------------------------------------------------------- 1 | # download_datasets.py from WeakAlign Rocco et al. CVPR2018 2 | 3 | from os.path import exists, join, basename, dirname, splitext 4 | from os import makedirs, remove, rename 5 | from six.moves import urllib 6 | import tarfile 7 | import zipfile 8 | import requests 9 | import sys 10 | import click 11 | 12 | 13 | def download_and_uncompress(url, dest=None, chunk_size=1024, replace="ask", 14 | label="Downloading {dest_basename} ({size:.2f}MB)"): 15 | dest = dest or "./" + url.split("/")[-1] 16 | dest_dir = dirname(dest) 17 | if not exists(dest_dir): 18 | makedirs(dest_dir) 19 | if exists(dest): 20 | if (replace is False 21 | or replace == "ask" 22 | and not click.confirm("Replace {}?".format(dest))): 23 | return 24 | # download file 25 | with open(dest, "wb") as f: 26 | response = requests.get(url, stream=True) 27 | total_length = response.headers.get('content-length') 28 | 29 | if total_length is None: # no content length header 30 | f.write(response.content) 31 | else: 32 | dl = 0 33 | total_length = int(total_length) 34 | for data in response.iter_content(chunk_size=4096): 35 | dl += len(data) 36 | f.write(data) 37 | done = int(50 * dl / total_length) 38 | sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50 - done))) 39 | sys.stdout.write("{:.1%}".format(dl / total_length)) 40 | sys.stdout.flush() 41 | sys.stdout.write("\n") 42 | # uncompress 43 | if dest.endswith("zip"): 44 | file = zipfile.ZipFile(dest, 'r') 45 | elif dest.endswith("tar"): 46 | file = tarfile.open(dest, 'r') 47 | elif dest.endswith("tar.gz"): 48 | file = tarfile.open(dest, 'r:gz') 49 | else: 50 | return dest 51 | 52 | print("Extracting data...") 53 | file.extractall(dest_dir) 54 | file.close() 55 | 56 | return dest 57 | 58 | 59 | def download_PF_willow(dest="./proposal-flow-willow"): 60 | print("Fetching PF Willow dataset ") 61 | url = "http://www.di.ens.fr/willow/research/proposalflow/dataset/PF-dataset.zip" 62 | file_path = join(dest, basename(url)) 63 | download_and_uncompress(url, file_path) 64 | 65 | print('Downloading image pair list \n') 66 | url = "http://www.di.ens.fr/willow/research/cnngeometric/other_resources/test_pairs_pf.csv" 67 | file_path = join(dest, basename(url)) 68 | download_and_uncompress(url, file_path) 69 | 70 | def download_TSS(dest="./tss"): 71 | print("Fetching TSS dataset ") 72 | url = "http://www.hci.iis.u-tokyo.ac.jp/datasets/data/JointCorrCoseg/TSS_CVPR2016.zip" 73 | file_path = join(dest, basename(url)) 74 | download_and_uncompress(url, file_path) 75 | 76 | print('Downloading image pair list \n') 77 | url = "http://www.di.ens.fr/willow/research/cnngeometric/other_resources/test_pairs_tss.csv" 78 | file_path = join(dest, basename(url)) 79 | download_and_uncompress(url, file_path) 80 | 81 | if __name__ == '__main__': 82 | download_PF_willow() 83 | 84 | download_TSS() -------------------------------------------------------------------------------- /datasets/pf-pascal/PF-dataset-PASCAL/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | -------------------------------------------------------------------------------- /datasets/pf-pascal/download.sh: -------------------------------------------------------------------------------- 1 | wget https://www.di.ens.fr/willow/research/proposalflow/dataset/PF-dataset-PASCAL.zip 2 | unzip PF-dataset-PASCAL.zip 'PF-dataset-PASCAL/JPEGImages/*' -------------------------------------------------------------------------------- /datasets/pf-pascal/image_pairs/val_pairs_nocoords.csv: -------------------------------------------------------------------------------- 1 | source_image,target_image,class,flip 2 | PF-dataset-PASCAL/JPEGImages/2008_006194.jpg,PF-dataset-PASCAL/JPEGImages/2010_001089.jpg,8,0 3 | PF-dataset-PASCAL/JPEGImages/2009_003697.jpg,PF-dataset-PASCAL/JPEGImages/2010_004224.jpg,16,0 4 | PF-dataset-PASCAL/JPEGImages/2007_006704.jpg,PF-dataset-PASCAL/JPEGImages/2009_003753.jpg,20,0 5 | PF-dataset-PASCAL/JPEGImages/2011_001964.jpg,PF-dataset-PASCAL/JPEGImages/2010_002438.jpg,6,0 6 | PF-dataset-PASCAL/JPEGImages/2010_003861.jpg,PF-dataset-PASCAL/JPEGImages/2010_004409.jpg,18,0 7 | PF-dataset-PASCAL/JPEGImages/2009_003469.jpg,PF-dataset-PASCAL/JPEGImages/2009_005154.jpg,2,0 8 | PF-dataset-PASCAL/JPEGImages/2011_001733.jpg,PF-dataset-PASCAL/JPEGImages/2009_004718.jpg,18,0 9 | PF-dataset-PASCAL/JPEGImages/2010_004865.jpg,PF-dataset-PASCAL/JPEGImages/2010_003139.jpg,7,0 10 | PF-dataset-PASCAL/JPEGImages/2009_003469.jpg,PF-dataset-PASCAL/JPEGImages/2009_005103.jpg,2,0 11 | PF-dataset-PASCAL/JPEGImages/2008_006370.jpg,PF-dataset-PASCAL/JPEGImages/2010_000256.jpg,18,0 12 | PF-dataset-PASCAL/JPEGImages/2007_009788.jpg,PF-dataset-PASCAL/JPEGImages/2010_000056.jpg,2,0 13 | PF-dataset-PASCAL/JPEGImages/2009_004295.jpg,PF-dataset-PASCAL/JPEGImages/2009_004670.jpg,6,0 14 | PF-dataset-PASCAL/JPEGImages/2009_003543.jpg,PF-dataset-PASCAL/JPEGImages/2009_005287.jpg,19,0 15 | PF-dataset-PASCAL/JPEGImages/2008_004802.jpg,PF-dataset-PASCAL/JPEGImages/2010_001583.jpg,1,0 16 | PF-dataset-PASCAL/JPEGImages/2009_000985.jpg,PF-dataset-PASCAL/JPEGImages/2010_000898.jpg,2,0 17 | PF-dataset-PASCAL/JPEGImages/2008_001631.jpg,PF-dataset-PASCAL/JPEGImages/2010_004894.jpg,14,0 18 | PF-dataset-PASCAL/JPEGImages/2009_002415.jpg,PF-dataset-PASCAL/JPEGImages/2010_003157.jpg,11,0 19 | PF-dataset-PASCAL/JPEGImages/2008_004549.jpg,PF-dataset-PASCAL/JPEGImages/2010_002452.jpg,14,0 20 | PF-dataset-PASCAL/JPEGImages/2010_002147.jpg,PF-dataset-PASCAL/JPEGImages/2010_001448.jpg,1,0 21 | PF-dataset-PASCAL/JPEGImages/2010_004957.jpg,PF-dataset-PASCAL/JPEGImages/2010_002786.jpg,12,0 22 | PF-dataset-PASCAL/JPEGImages/2008_006758.jpg,PF-dataset-PASCAL/JPEGImages/2009_003768.jpg,13,0 23 | PF-dataset-PASCAL/JPEGImages/2010_003200.jpg,PF-dataset-PASCAL/JPEGImages/2010_000255.jpg,14,0 24 | PF-dataset-PASCAL/JPEGImages/2009_004670.jpg,PF-dataset-PASCAL/JPEGImages/2009_004295.jpg,6,0 25 | PF-dataset-PASCAL/JPEGImages/2009_003519.jpg,PF-dataset-PASCAL/JPEGImages/2010_002326.jpg,2,0 26 | PF-dataset-PASCAL/JPEGImages/2009_005104.jpg,PF-dataset-PASCAL/JPEGImages/2010_001638.jpg,7,0 27 | PF-dataset-PASCAL/JPEGImages/2011_002811.jpg,PF-dataset-PASCAL/JPEGImages/2010_001753.jpg,2,0 28 | PF-dataset-PASCAL/JPEGImages/2010_004425.jpg,PF-dataset-PASCAL/JPEGImages/2010_001140.jpg,8,0 29 | PF-dataset-PASCAL/JPEGImages/2010_004751.jpg,PF-dataset-PASCAL/JPEGImages/2010_002729.jpg,11,0 30 | PF-dataset-PASCAL/JPEGImages/2009_005120.jpg,PF-dataset-PASCAL/JPEGImages/2010_003495.jpg,1,0 31 | PF-dataset-PASCAL/JPEGImages/2009_004117.jpg,PF-dataset-PASCAL/JPEGImages/2010_004766.jpg,14,0 32 | PF-dataset-PASCAL/JPEGImages/2009_001332.jpg,PF-dataset-PASCAL/JPEGImages/2009_003806.jpg,1,0 33 | PF-dataset-PASCAL/JPEGImages/2010_001402.jpg,PF-dataset-PASCAL/JPEGImages/2010_003223.jpg,8,0 34 | PF-dataset-PASCAL/JPEGImages/2010_004957.jpg,PF-dataset-PASCAL/JPEGImages/2010_003043.jpg,12,0 35 | PF-dataset-PASCAL/JPEGImages/2010_003677.jpg,PF-dataset-PASCAL/JPEGImages/2010_000090.jpg,9,0 36 | PF-dataset-PASCAL/JPEGImages/2010_004422.jpg,PF-dataset-PASCAL/JPEGImages/2010_001810.jpg,14,0 37 | PF-dataset-PASCAL/JPEGImages/2010_002631.jpg,PF-dataset-PASCAL/JPEGImages/2010_003826.jpg,12,0 38 | PF-dataset-PASCAL/JPEGImages/2010_005211.jpg,PF-dataset-PASCAL/JPEGImages/2009_005098.jpg,2,0 39 | PF-dataset-PASCAL/JPEGImages/2008_001479.jpg,PF-dataset-PASCAL/JPEGImages/2010_001709.jpg,12,0 40 | PF-dataset-PASCAL/JPEGImages/2008_005976.jpg,PF-dataset-PASCAL/JPEGImages/2009_002232.jpg,20,0 41 | PF-dataset-PASCAL/JPEGImages/2010_005556.jpg,PF-dataset-PASCAL/JPEGImages/2010_000342.jpg,2,0 42 | PF-dataset-PASCAL/JPEGImages/2009_004594.jpg,PF-dataset-PASCAL/JPEGImages/2010_000908.jpg,7,0 43 | PF-dataset-PASCAL/JPEGImages/2010_005484.jpg,PF-dataset-PASCAL/JPEGImages/2009_003686.jpg,13,0 44 | PF-dataset-PASCAL/JPEGImages/2009_003005.jpg,PF-dataset-PASCAL/JPEGImages/2009_005189.jpg,1,0 45 | PF-dataset-PASCAL/JPEGImages/2010_000342.jpg,PF-dataset-PASCAL/JPEGImages/2010_002475.jpg,2,0 46 | PF-dataset-PASCAL/JPEGImages/2010_001080.jpg,PF-dataset-PASCAL/JPEGImages/2010_003453.jpg,7,0 47 | PF-dataset-PASCAL/JPEGImages/2008_004347.jpg,PF-dataset-PASCAL/JPEGImages/2010_001957.jpg,8,0 48 | PF-dataset-PASCAL/JPEGImages/2011_003148.jpg,PF-dataset-PASCAL/JPEGImages/2010_002030.jpg,2,0 49 | PF-dataset-PASCAL/JPEGImages/2010_003495.jpg,PF-dataset-PASCAL/JPEGImages/2010_002065.jpg,1,0 50 | PF-dataset-PASCAL/JPEGImages/2010_002227.jpg,PF-dataset-PASCAL/JPEGImages/2009_004869.jpg,9,0 51 | PF-dataset-PASCAL/JPEGImages/2009_000130.jpg,PF-dataset-PASCAL/JPEGImages/2009_003958.jpg,5,0 52 | PF-dataset-PASCAL/JPEGImages/2008_001446.jpg,PF-dataset-PASCAL/JPEGImages/2010_003024.jpg,7,0 53 | PF-dataset-PASCAL/JPEGImages/2009_001475.jpg,PF-dataset-PASCAL/JPEGImages/2010_004283.jpg,10,0 54 | PF-dataset-PASCAL/JPEGImages/2009_003565.jpg,PF-dataset-PASCAL/JPEGImages/2010_002459.jpg,13,0 55 | PF-dataset-PASCAL/JPEGImages/2008_004301.jpg,PF-dataset-PASCAL/JPEGImages/2009_004478.jpg,20,0 56 | PF-dataset-PASCAL/JPEGImages/2010_003495.jpg,PF-dataset-PASCAL/JPEGImages/2009_005120.jpg,1,0 57 | PF-dataset-PASCAL/JPEGImages/2010_005277.jpg,PF-dataset-PASCAL/JPEGImages/2010_001808.jpg,6,0 58 | PF-dataset-PASCAL/JPEGImages/2010_004747.jpg,PF-dataset-PASCAL/JPEGImages/2010_002048.jpg,7,0 59 | PF-dataset-PASCAL/JPEGImages/2009_001044.jpg,PF-dataset-PASCAL/JPEGImages/2010_003562.jpg,6,0 60 | PF-dataset-PASCAL/JPEGImages/2011_002031.jpg,PF-dataset-PASCAL/JPEGImages/2010_000330.jpg,16,0 61 | PF-dataset-PASCAL/JPEGImages/2009_004871.jpg,PF-dataset-PASCAL/JPEGImages/2010_000376.jpg,6,0 62 | PF-dataset-PASCAL/JPEGImages/2011_001084.jpg,PF-dataset-PASCAL/JPEGImages/2009_005131.jpg,3,0 63 | PF-dataset-PASCAL/JPEGImages/2008_002601.jpg,PF-dataset-PASCAL/JPEGImages/2009_004718.jpg,9,0 64 | PF-dataset-PASCAL/JPEGImages/2009_000576.jpg,PF-dataset-PASCAL/JPEGImages/2009_003775.jpg,13,0 65 | PF-dataset-PASCAL/JPEGImages/2011_001741.jpg,PF-dataset-PASCAL/JPEGImages/2009_005061.jpg,19,0 66 | PF-dataset-PASCAL/JPEGImages/2008_007114.jpg,PF-dataset-PASCAL/JPEGImages/2010_000807.jpg,20,0 67 | PF-dataset-PASCAL/JPEGImages/2008_002972.jpg,PF-dataset-PASCAL/JPEGImages/2010_001923.jpg,14,0 68 | PF-dataset-PASCAL/JPEGImages/2009_003031.jpg,PF-dataset-PASCAL/JPEGImages/2010_000036.jpg,7,0 69 | PF-dataset-PASCAL/JPEGImages/2009_004091.jpg,PF-dataset-PASCAL/JPEGImages/2009_004993.jpg,18,0 70 | PF-dataset-PASCAL/JPEGImages/2009_004914.jpg,PF-dataset-PASCAL/JPEGImages/2010_002211.jpg,2,0 71 | PF-dataset-PASCAL/JPEGImages/2010_002844.jpg,PF-dataset-PASCAL/JPEGImages/2010_001360.jpg,7,0 72 | PF-dataset-PASCAL/JPEGImages/2009_002995.jpg,PF-dataset-PASCAL/JPEGImages/2009_005256.jpg,20,0 73 | PF-dataset-PASCAL/JPEGImages/2010_004597.jpg,PF-dataset-PASCAL/JPEGImages/2010_002639.jpg,11,0 74 | PF-dataset-PASCAL/JPEGImages/2010_003358.jpg,PF-dataset-PASCAL/JPEGImages/2010_001240.jpg,8,0 75 | PF-dataset-PASCAL/JPEGImages/2008_000733.jpg,PF-dataset-PASCAL/JPEGImages/2008_004522.jpg,15,0 76 | PF-dataset-PASCAL/JPEGImages/2010_005935.jpg,PF-dataset-PASCAL/JPEGImages/2010_002192.jpg,8,0 77 | PF-dataset-PASCAL/JPEGImages/2010_003293.jpg,PF-dataset-PASCAL/JPEGImages/2010_000603.jpg,6,0 78 | PF-dataset-PASCAL/JPEGImages/2008_000733.jpg,PF-dataset-PASCAL/JPEGImages/2008_003884.jpg,15,0 79 | PF-dataset-PASCAL/JPEGImages/2008_002892.jpg,PF-dataset-PASCAL/JPEGImages/2010_001785.jpg,11,0 80 | PF-dataset-PASCAL/JPEGImages/2009_004295.jpg,PF-dataset-PASCAL/JPEGImages/2009_004836.jpg,6,0 81 | PF-dataset-PASCAL/JPEGImages/2011_002811.jpg,PF-dataset-PASCAL/JPEGImages/2008_004602.jpg,15,0 82 | PF-dataset-PASCAL/JPEGImages/2008_006834.jpg,PF-dataset-PASCAL/JPEGImages/2010_000511.jpg,9,0 83 | PF-dataset-PASCAL/JPEGImages/2009_001522.jpg,PF-dataset-PASCAL/JPEGImages/2010_002891.jpg,8,0 84 | PF-dataset-PASCAL/JPEGImages/2010_004680.jpg,PF-dataset-PASCAL/JPEGImages/2010_001013.jpg,5,0 85 | PF-dataset-PASCAL/JPEGImages/2010_001089.jpg,PF-dataset-PASCAL/JPEGImages/2010_002286.jpg,8,0 86 | PF-dataset-PASCAL/JPEGImages/2010_005826.jpg,PF-dataset-PASCAL/JPEGImages/2010_002901.jpg,12,0 87 | PF-dataset-PASCAL/JPEGImages/2008_006404.jpg,PF-dataset-PASCAL/JPEGImages/2009_000830.jpg,4,0 88 | PF-dataset-PASCAL/JPEGImages/2008_006968.jpg,PF-dataset-PASCAL/JPEGImages/2010_003060.jpg,12,0 89 | PF-dataset-PASCAL/JPEGImages/2010_003293.jpg,PF-dataset-PASCAL/JPEGImages/2010_000524.jpg,6,0 90 | PF-dataset-PASCAL/JPEGImages/2008_005976.jpg,PF-dataset-PASCAL/JPEGImages/2009_004341.jpg,20,0 91 | PF-dataset-PASCAL/JPEGImages/2011_002131.jpg,PF-dataset-PASCAL/JPEGImages/2010_001698.jpg,5,0 92 | PF-dataset-PASCAL/JPEGImages/2008_000244.jpg,PF-dataset-PASCAL/JPEGImages/2009_002967.jpg,20,0 93 | PF-dataset-PASCAL/JPEGImages/2011_000065.jpg,PF-dataset-PASCAL/JPEGImages/2010_001181.jpg,11,0 94 | PF-dataset-PASCAL/JPEGImages/2010_005782.jpg,PF-dataset-PASCAL/JPEGImages/2009_004972.jpg,2,0 95 | PF-dataset-PASCAL/JPEGImages/2009_003753.jpg,PF-dataset-PASCAL/JPEGImages/2010_000053.jpg,20,0 96 | PF-dataset-PASCAL/JPEGImages/2010_002577.jpg,PF-dataset-PASCAL/JPEGImages/2010_002542.jpg,1,0 97 | PF-dataset-PASCAL/JPEGImages/2008_008223.jpg,PF-dataset-PASCAL/JPEGImages/2009_004559.jpg,19,0 98 | PF-dataset-PASCAL/JPEGImages/2010_003453.jpg,PF-dataset-PASCAL/JPEGImages/2010_001080.jpg,7,0 99 | PF-dataset-PASCAL/JPEGImages/2010_004493.jpg,PF-dataset-PASCAL/JPEGImages/2010_002480.jpg,12,0 100 | PF-dataset-PASCAL/JPEGImages/2008_005763.jpg,PF-dataset-PASCAL/JPEGImages/2009_005220.jpg,19,0 101 | PF-dataset-PASCAL/JPEGImages/2009_002438.jpg,PF-dataset-PASCAL/JPEGImages/2010_000557.jpg,2,0 102 | PF-dataset-PASCAL/JPEGImages/2008_008748.jpg,PF-dataset-PASCAL/JPEGImages/2010_002978.jpg,6,0 103 | PF-dataset-PASCAL/JPEGImages/2011_000651.jpg,PF-dataset-PASCAL/JPEGImages/2010_000583.jpg,14,0 104 | PF-dataset-PASCAL/JPEGImages/2011_002131.jpg,PF-dataset-PASCAL/JPEGImages/2009_004856.jpg,5,0 105 | PF-dataset-PASCAL/JPEGImages/2009_000249.jpg,PF-dataset-PASCAL/JPEGImages/2010_001292.jpg,8,0 106 | PF-dataset-PASCAL/JPEGImages/2011_002833.jpg,PF-dataset-PASCAL/JPEGImages/2010_003673.jpg,12,0 107 | PF-dataset-PASCAL/JPEGImages/2010_002459.jpg,PF-dataset-PASCAL/JPEGImages/2009_003565.jpg,13,0 108 | PF-dataset-PASCAL/JPEGImages/2008_006599.jpg,PF-dataset-PASCAL/JPEGImages/2010_001401.jpg,8,0 109 | PF-dataset-PASCAL/JPEGImages/2009_000945.jpg,PF-dataset-PASCAL/JPEGImages/2009_004962.jpg,6,0 110 | PF-dataset-PASCAL/JPEGImages/2009_001984.jpg,PF-dataset-PASCAL/JPEGImages/2010_003736.jpg,14,0 111 | PF-dataset-PASCAL/JPEGImages/2009_002616.jpg,PF-dataset-PASCAL/JPEGImages/2009_004371.jpg,19,0 112 | PF-dataset-PASCAL/JPEGImages/2008_000731.jpg,PF-dataset-PASCAL/JPEGImages/2008_004365.jpg,15,0 113 | PF-dataset-PASCAL/JPEGImages/2009_003018.jpg,PF-dataset-PASCAL/JPEGImages/2010_000069.jpg,6,0 114 | PF-dataset-PASCAL/JPEGImages/2009_001627.jpg,PF-dataset-PASCAL/JPEGImages/2010_000202.jpg,16,0 115 | PF-dataset-PASCAL/JPEGImages/2010_002941.jpg,PF-dataset-PASCAL/JPEGImages/2009_003818.jpg,5,0 116 | PF-dataset-PASCAL/JPEGImages/2009_001677.jpg,PF-dataset-PASCAL/JPEGImages/2010_000991.jpg,14,0 117 | PF-dataset-PASCAL/JPEGImages/2010_000075.jpg,PF-dataset-PASCAL/JPEGImages/2009_002262.jpg,3,0 118 | PF-dataset-PASCAL/JPEGImages/2010_003677.jpg,PF-dataset-PASCAL/JPEGImages/2009_004939.jpg,9,0 119 | PF-dataset-PASCAL/JPEGImages/2008_004739.jpg,PF-dataset-PASCAL/JPEGImages/2009_004812.jpg,1,0 120 | PF-dataset-PASCAL/JPEGImages/2008_006616.jpg,PF-dataset-PASCAL/JPEGImages/2010_000329.jpg,18,0 121 | PF-dataset-PASCAL/JPEGImages/2008_003975.jpg,PF-dataset-PASCAL/JPEGImages/2010_001644.jpg,12,0 122 | PF-dataset-PASCAL/JPEGImages/2011_001369.jpg,PF-dataset-PASCAL/JPEGImages/2009_004554.jpg,16,0 123 | PF-dataset-PASCAL/JPEGImages/2008_005875.jpg,PF-dataset-PASCAL/JPEGImages/2010_003561.jpg,7,0 124 | PF-dataset-PASCAL/JPEGImages/2009_004298.jpg,PF-dataset-PASCAL/JPEGImages/2009_004457.jpg,18,0 125 | PF-dataset-PASCAL/JPEGImages/2008_005147.jpg,PF-dataset-PASCAL/JPEGImages/2009_005064.jpg,2,0 126 | PF-dataset-PASCAL/JPEGImages/2010_002645.jpg,PF-dataset-PASCAL/JPEGImages/2010_001361.jpg,8,0 127 | PF-dataset-PASCAL/JPEGImages/2011_002713.jpg,PF-dataset-PASCAL/JPEGImages/2010_003062.jpg,2,0 128 | PF-dataset-PASCAL/JPEGImages/2010_005826.jpg,PF-dataset-PASCAL/JPEGImages/2010_001529.jpg,12,0 129 | PF-dataset-PASCAL/JPEGImages/2009_004248.jpg,PF-dataset-PASCAL/JPEGImages/2010_001054.jpg,8,0 130 | PF-dataset-PASCAL/JPEGImages/2009_004855.jpg,PF-dataset-PASCAL/JPEGImages/2010_003138.jpg,12,0 131 | PF-dataset-PASCAL/JPEGImages/2009_003818.jpg,PF-dataset-PASCAL/JPEGImages/2010_002941.jpg,5,0 132 | PF-dataset-PASCAL/JPEGImages/2009_002867.jpg,PF-dataset-PASCAL/JPEGImages/2009_004255.jpg,19,0 133 | PF-dataset-PASCAL/JPEGImages/2009_004444.jpg,PF-dataset-PASCAL/JPEGImages/2010_000209.jpg,2,0 134 | PF-dataset-PASCAL/JPEGImages/2008_005962.jpg,PF-dataset-PASCAL/JPEGImages/2009_002705.jpg,20,0 135 | PF-dataset-PASCAL/JPEGImages/2010_001810.jpg,PF-dataset-PASCAL/JPEGImages/2010_004422.jpg,14,0 136 | PF-dataset-PASCAL/JPEGImages/2009_003614.jpg,PF-dataset-PASCAL/JPEGImages/2010_002263.jpg,6,0 137 | PF-dataset-PASCAL/JPEGImages/2010_005909.jpg,PF-dataset-PASCAL/JPEGImages/2010_002814.jpg,2,0 138 | PF-dataset-PASCAL/JPEGImages/2009_002837.jpg,PF-dataset-PASCAL/JPEGImages/2010_001544.jpg,8,0 139 | PF-dataset-PASCAL/JPEGImages/2009_004993.jpg,PF-dataset-PASCAL/JPEGImages/2009_004091.jpg,18,0 140 | PF-dataset-PASCAL/JPEGImages/2010_006084.jpg,PF-dataset-PASCAL/JPEGImages/2009_003109.jpg,3,0 141 | PF-dataset-PASCAL/JPEGImages/2009_001333.jpg,PF-dataset-PASCAL/JPEGImages/2010_005130.jpg,19,0 142 | PF-dataset-PASCAL/JPEGImages/2009_001522.jpg,PF-dataset-PASCAL/JPEGImages/2010_002485.jpg,8,0 143 | PF-dataset-PASCAL/JPEGImages/2011_002589.jpg,PF-dataset-PASCAL/JPEGImages/2010_003162.jpg,12,0 144 | PF-dataset-PASCAL/JPEGImages/2008_002568.jpg,PF-dataset-PASCAL/JPEGImages/2008_004636.jpg,15,0 145 | PF-dataset-PASCAL/JPEGImages/2009_000341.jpg,PF-dataset-PASCAL/JPEGImages/2010_000035.jpg,14,0 146 | PF-dataset-PASCAL/JPEGImages/2010_000330.jpg,PF-dataset-PASCAL/JPEGImages/2010_004382.jpg,16,0 147 | PF-dataset-PASCAL/JPEGImages/2008_000536.jpg,PF-dataset-PASCAL/JPEGImages/2010_002976.jpg,8,0 148 | PF-dataset-PASCAL/JPEGImages/2011_000060.jpg,PF-dataset-PASCAL/JPEGImages/2010_004941.jpg,14,0 149 | PF-dataset-PASCAL/JPEGImages/2008_006409.jpg,PF-dataset-PASCAL/JPEGImages/2009_002562.jpg,20,0 150 | PF-dataset-PASCAL/JPEGImages/2008_006686.jpg,PF-dataset-PASCAL/JPEGImages/2010_003469.jpg,14,0 151 | PF-dataset-PASCAL/JPEGImages/2010_003040.jpg,PF-dataset-PASCAL/JPEGImages/2009_004867.jpg,6,0 152 | PF-dataset-PASCAL/JPEGImages/2008_000916.jpg,PF-dataset-PASCAL/JPEGImages/2010_002722.jpg,19,0 153 | PF-dataset-PASCAL/JPEGImages/2009_001502.jpg,PF-dataset-PASCAL/JPEGImages/2010_001402.jpg,8,0 154 | PF-dataset-PASCAL/JPEGImages/2009_000027.jpg,PF-dataset-PASCAL/JPEGImages/2010_002758.jpg,6,0 155 | PF-dataset-PASCAL/JPEGImages/2010_004813.jpg,PF-dataset-PASCAL/JPEGImages/2009_004536.jpg,9,0 156 | PF-dataset-PASCAL/JPEGImages/2009_000402.jpg,PF-dataset-PASCAL/JPEGImages/2010_002172.jpg,14,0 157 | PF-dataset-PASCAL/JPEGImages/2009_001988.jpg,PF-dataset-PASCAL/JPEGImages/2010_004291.jpg,7,0 158 | PF-dataset-PASCAL/JPEGImages/2010_002333.jpg,PF-dataset-PASCAL/JPEGImages/2010_002645.jpg,8,0 159 | PF-dataset-PASCAL/JPEGImages/2010_000036.jpg,PF-dataset-PASCAL/JPEGImages/2010_002903.jpg,7,0 160 | PF-dataset-PASCAL/JPEGImages/2011_001015.jpg,PF-dataset-PASCAL/JPEGImages/2009_004374.jpg,6,0 161 | PF-dataset-PASCAL/JPEGImages/2008_007858.jpg,PF-dataset-PASCAL/JPEGImages/2010_003779.jpg,5,0 162 | PF-dataset-PASCAL/JPEGImages/2008_007264.jpg,PF-dataset-PASCAL/JPEGImages/2010_000495.jpg,14,0 163 | PF-dataset-PASCAL/JPEGImages/2011_000229.jpg,PF-dataset-PASCAL/JPEGImages/2010_000842.jpg,8,0 164 | PF-dataset-PASCAL/JPEGImages/2009_000184.jpg,PF-dataset-PASCAL/JPEGImages/2010_003200.jpg,14,0 165 | PF-dataset-PASCAL/JPEGImages/2011_002811.jpg,PF-dataset-PASCAL/JPEGImages/2008_006181.jpg,15,0 166 | PF-dataset-PASCAL/JPEGImages/2008_004620.jpg,PF-dataset-PASCAL/JPEGImages/2009_000932.jpg,3,0 167 | PF-dataset-PASCAL/JPEGImages/2011_000397.jpg,PF-dataset-PASCAL/JPEGImages/2010_002251.jpg,14,0 168 | PF-dataset-PASCAL/JPEGImages/2010_001717.jpg,PF-dataset-PASCAL/JPEGImages/2010_003651.jpg,16,0 169 | PF-dataset-PASCAL/JPEGImages/2008_008748.jpg,PF-dataset-PASCAL/JPEGImages/2010_002102.jpg,6,0 170 | PF-dataset-PASCAL/JPEGImages/2010_002844.jpg,PF-dataset-PASCAL/JPEGImages/2010_000952.jpg,7,0 171 | PF-dataset-PASCAL/JPEGImages/2008_001715.jpg,PF-dataset-PASCAL/JPEGImages/2010_001973.jpg,9,0 172 | PF-dataset-PASCAL/JPEGImages/2009_005256.jpg,PF-dataset-PASCAL/JPEGImages/2009_002995.jpg,20,0 173 | PF-dataset-PASCAL/JPEGImages/2007_009756.jpg,PF-dataset-PASCAL/JPEGImages/2010_005100.jpg,14,0 174 | PF-dataset-PASCAL/JPEGImages/2008_008215.jpg,PF-dataset-PASCAL/JPEGImages/2010_000250.jpg,1,0 175 | PF-dataset-PASCAL/JPEGImages/2009_000590.jpg,PF-dataset-PASCAL/JPEGImages/2009_005263.jpg,9,0 176 | PF-dataset-PASCAL/JPEGImages/2009_003806.jpg,PF-dataset-PASCAL/JPEGImages/2009_003950.jpg,1,0 177 | PF-dataset-PASCAL/JPEGImages/2011_000618.jpg,PF-dataset-PASCAL/JPEGImages/2010_002747.jpg,14,0 178 | PF-dataset-PASCAL/JPEGImages/2008_003821.jpg,PF-dataset-PASCAL/JPEGImages/2009_004746.jpg,9,0 179 | PF-dataset-PASCAL/JPEGImages/2008_004314.jpg,PF-dataset-PASCAL/JPEGImages/2008_004647.jpg,15,0 180 | PF-dataset-PASCAL/JPEGImages/2011_001643.jpg,PF-dataset-PASCAL/JPEGImages/2010_001592.jpg,2,0 181 | PF-dataset-PASCAL/JPEGImages/2009_002689.jpg,PF-dataset-PASCAL/JPEGImages/2009_004468.jpg,19,0 182 | PF-dataset-PASCAL/JPEGImages/2008_008522.jpg,PF-dataset-PASCAL/JPEGImages/2010_000097.jpg,18,0 183 | PF-dataset-PASCAL/JPEGImages/2010_003879.jpg,PF-dataset-PASCAL/JPEGImages/2010_001311.jpg,12,0 184 | PF-dataset-PASCAL/JPEGImages/2009_001259.jpg,PF-dataset-PASCAL/JPEGImages/2010_001951.jpg,19,0 185 | PF-dataset-PASCAL/JPEGImages/2009_004444.jpg,PF-dataset-PASCAL/JPEGImages/2010_003251.jpg,2,0 186 | PF-dataset-PASCAL/JPEGImages/2008_007465.jpg,PF-dataset-PASCAL/JPEGImages/2009_002685.jpg,3,0 187 | PF-dataset-PASCAL/JPEGImages/2010_002814.jpg,PF-dataset-PASCAL/JPEGImages/2009_004984.jpg,2,0 188 | PF-dataset-PASCAL/JPEGImages/2007_003022.jpg,PF-dataset-PASCAL/JPEGImages/2009_003822.jpg,13,0 189 | PF-dataset-PASCAL/JPEGImages/2008_007236.jpg,PF-dataset-PASCAL/JPEGImages/2010_002248.jpg,12,0 190 | PF-dataset-PASCAL/JPEGImages/2009_002755.jpg,PF-dataset-PASCAL/JPEGImages/2009_005163.jpg,9,0 191 | PF-dataset-PASCAL/JPEGImages/2008_005446.jpg,PF-dataset-PASCAL/JPEGImages/2010_002667.jpg,19,0 192 | PF-dataset-PASCAL/JPEGImages/2009_001704.jpg,PF-dataset-PASCAL/JPEGImages/2010_000829.jpg,2,0 193 | PF-dataset-PASCAL/JPEGImages/2010_000376.jpg,PF-dataset-PASCAL/JPEGImages/2009_004871.jpg,6,0 194 | PF-dataset-PASCAL/JPEGImages/2009_004797.jpg,PF-dataset-PASCAL/JPEGImages/2010_001183.jpg,2,0 195 | PF-dataset-PASCAL/JPEGImages/2009_003694.jpg,PF-dataset-PASCAL/JPEGImages/2010_001731.jpg,9,0 196 | PF-dataset-PASCAL/JPEGImages/2008_001428.jpg,PF-dataset-PASCAL/JPEGImages/2009_003523.jpg,20,0 197 | PF-dataset-PASCAL/JPEGImages/2009_003668.jpg,PF-dataset-PASCAL/JPEGImages/2008_007710.jpg,15,0 198 | PF-dataset-PASCAL/JPEGImages/2010_005782.jpg,PF-dataset-PASCAL/JPEGImages/2009_005156.jpg,2,0 199 | PF-dataset-PASCAL/JPEGImages/2011_000521.jpg,PF-dataset-PASCAL/JPEGImages/2010_000199.jpg,19,0 200 | PF-dataset-PASCAL/JPEGImages/2010_001731.jpg,PF-dataset-PASCAL/JPEGImages/2009_003694.jpg,9,0 201 | PF-dataset-PASCAL/JPEGImages/2008_005303.jpg,PF-dataset-PASCAL/JPEGImages/2009_001683.jpg,3,0 202 | PF-dataset-PASCAL/JPEGImages/2008_000724.jpg,PF-dataset-PASCAL/JPEGImages/2010_001418.jpg,8,0 203 | PF-dataset-PASCAL/JPEGImages/2010_000991.jpg,PF-dataset-PASCAL/JPEGImages/2010_000027.jpg,14,0 204 | PF-dataset-PASCAL/JPEGImages/2010_004808.jpg,PF-dataset-PASCAL/JPEGImages/2010_003309.jpg,8,0 205 | PF-dataset-PASCAL/JPEGImages/2008_005374.jpg,PF-dataset-PASCAL/JPEGImages/2010_003635.jpg,14,0 206 | PF-dataset-PASCAL/JPEGImages/2010_005508.jpg,PF-dataset-PASCAL/JPEGImages/2010_001457.jpg,8,0 207 | PF-dataset-PASCAL/JPEGImages/2010_000056.jpg,PF-dataset-PASCAL/JPEGImages/2010_001514.jpg,2,0 208 | PF-dataset-PASCAL/JPEGImages/2008_001022.jpg,PF-dataset-PASCAL/JPEGImages/2010_000573.jpg,6,0 209 | PF-dataset-PASCAL/JPEGImages/2011_002097.jpg,PF-dataset-PASCAL/JPEGImages/2010_000510.jpg,14,0 210 | PF-dataset-PASCAL/JPEGImages/2008_003225.jpg,PF-dataset-PASCAL/JPEGImages/2010_003390.jpg,16,0 211 | PF-dataset-PASCAL/JPEGImages/2011_001508.jpg,PF-dataset-PASCAL/JPEGImages/2010_000744.jpg,14,0 212 | PF-dataset-PASCAL/JPEGImages/2010_001448.jpg,PF-dataset-PASCAL/JPEGImages/2010_002147.jpg,1,0 213 | PF-dataset-PASCAL/JPEGImages/2009_000990.jpg,PF-dataset-PASCAL/JPEGImages/2010_003936.jpg,6,0 214 | PF-dataset-PASCAL/JPEGImages/2009_005081.jpg,PF-dataset-PASCAL/JPEGImages/2010_004322.jpg,10,0 215 | PF-dataset-PASCAL/JPEGImages/2010_000458.jpg,PF-dataset-PASCAL/JPEGImages/2010_002479.jpg,8,0 216 | PF-dataset-PASCAL/JPEGImages/2008_008252.jpg,PF-dataset-PASCAL/JPEGImages/2010_003057.jpg,6,0 217 | PF-dataset-PASCAL/JPEGImages/2007_002618.jpg,PF-dataset-PASCAL/JPEGImages/2010_000822.jpg,17,0 218 | PF-dataset-PASCAL/JPEGImages/2008_004938.jpg,PF-dataset-PASCAL/JPEGImages/2010_005193.jpg,18,0 219 | PF-dataset-PASCAL/JPEGImages/2010_003811.jpg,PF-dataset-PASCAL/JPEGImages/2010_000401.jpg,2,0 220 | PF-dataset-PASCAL/JPEGImages/2008_001260.jpg,PF-dataset-PASCAL/JPEGImages/2009_002897.jpg,4,0 221 | PF-dataset-PASCAL/JPEGImages/2009_001746.jpg,PF-dataset-PASCAL/JPEGImages/2009_004298.jpg,18,0 222 | PF-dataset-PASCAL/JPEGImages/2008_006234.jpg,PF-dataset-PASCAL/JPEGImages/2009_005178.jpg,2,0 223 | PF-dataset-PASCAL/JPEGImages/2010_003879.jpg,PF-dataset-PASCAL/JPEGImages/2010_002824.jpg,12,0 224 | PF-dataset-PASCAL/JPEGImages/2010_004283.jpg,PF-dataset-PASCAL/JPEGImages/2010_005458.jpg,10,0 225 | PF-dataset-PASCAL/JPEGImages/2008_007375.jpg,PF-dataset-PASCAL/JPEGImages/2010_001650.jpg,6,0 226 | PF-dataset-PASCAL/JPEGImages/2008_003892.jpg,PF-dataset-PASCAL/JPEGImages/2010_000490.jpg,14,0 227 | PF-dataset-PASCAL/JPEGImages/2011_002833.jpg,PF-dataset-PASCAL/JPEGImages/2010_002299.jpg,12,0 228 | PF-dataset-PASCAL/JPEGImages/2010_002941.jpg,PF-dataset-PASCAL/JPEGImages/2010_003329.jpg,5,0 229 | PF-dataset-PASCAL/JPEGImages/2008_003524.jpg,PF-dataset-PASCAL/JPEGImages/2009_005262.jpg,3,0 230 | PF-dataset-PASCAL/JPEGImages/2009_000006.jpg,PF-dataset-PASCAL/JPEGImages/2010_002203.jpg,6,0 231 | PF-dataset-PASCAL/JPEGImages/2010_001013.jpg,PF-dataset-PASCAL/JPEGImages/2009_005150.jpg,5,0 232 | PF-dataset-PASCAL/JPEGImages/2011_001771.jpg,PF-dataset-PASCAL/JPEGImages/2009_002830.jpg,4,0 233 | PF-dataset-PASCAL/JPEGImages/2009_002144.jpg,PF-dataset-PASCAL/JPEGImages/2010_004604.jpg,19,0 234 | PF-dataset-PASCAL/JPEGImages/2008_008120.jpg,PF-dataset-PASCAL/JPEGImages/2010_003040.jpg,6,0 235 | PF-dataset-PASCAL/JPEGImages/2008_007241.jpg,PF-dataset-PASCAL/JPEGImages/2010_004355.jpg,14,0 236 | PF-dataset-PASCAL/JPEGImages/2008_004620.jpg,PF-dataset-PASCAL/JPEGImages/2009_003273.jpg,3,0 237 | PF-dataset-PASCAL/JPEGImages/2011_003029.jpg,PF-dataset-PASCAL/JPEGImages/2010_000160.jpg,7,0 238 | PF-dataset-PASCAL/JPEGImages/2008_003894.jpg,PF-dataset-PASCAL/JPEGImages/2009_002056.jpg,3,0 239 | PF-dataset-PASCAL/JPEGImages/2008_005976.jpg,PF-dataset-PASCAL/JPEGImages/2009_001812.jpg,20,0 240 | PF-dataset-PASCAL/JPEGImages/2010_001199.jpg,PF-dataset-PASCAL/JPEGImages/2010_002721.jpg,11,0 241 | PF-dataset-PASCAL/JPEGImages/2008_007114.jpg,PF-dataset-PASCAL/JPEGImages/2009_004719.jpg,20,0 242 | PF-dataset-PASCAL/JPEGImages/2008_002954.jpg,PF-dataset-PASCAL/JPEGImages/2008_007254.jpg,15,0 243 | PF-dataset-PASCAL/JPEGImages/2008_007264.jpg,PF-dataset-PASCAL/JPEGImages/2010_004119.jpg,14,0 244 | PF-dataset-PASCAL/JPEGImages/2008_006751.jpg,PF-dataset-PASCAL/JPEGImages/2010_001717.jpg,16,0 245 | PF-dataset-PASCAL/JPEGImages/2010_002722.jpg,PF-dataset-PASCAL/JPEGImages/2009_004211.jpg,19,0 246 | PF-dataset-PASCAL/JPEGImages/2009_001910.jpg,PF-dataset-PASCAL/JPEGImages/2010_003855.jpg,6,0 247 | PF-dataset-PASCAL/JPEGImages/2009_002299.jpg,PF-dataset-PASCAL/JPEGImages/2010_004059.jpg,7,0 248 | PF-dataset-PASCAL/JPEGImages/2011_000688.jpg,PF-dataset-PASCAL/JPEGImages/2010_002369.jpg,19,0 249 | PF-dataset-PASCAL/JPEGImages/2009_003519.jpg,PF-dataset-PASCAL/JPEGImages/2010_000113.jpg,2,0 250 | PF-dataset-PASCAL/JPEGImages/2008_005269.jpg,PF-dataset-PASCAL/JPEGImages/2010_000651.jpg,19,0 251 | PF-dataset-PASCAL/JPEGImages/2010_001292.jpg,PF-dataset-PASCAL/JPEGImages/2010_001025.jpg,8,0 252 | PF-dataset-PASCAL/JPEGImages/2008_003382.jpg,PF-dataset-PASCAL/JPEGImages/2009_005191.jpg,3,0 253 | PF-dataset-PASCAL/JPEGImages/2008_003152.jpg,PF-dataset-PASCAL/JPEGImages/2008_004314.jpg,15,0 254 | PF-dataset-PASCAL/JPEGImages/2010_006082.jpg,PF-dataset-PASCAL/JPEGImages/2010_001426.jpg,1,0 255 | PF-dataset-PASCAL/JPEGImages/2008_008606.jpg,PF-dataset-PASCAL/JPEGImages/2009_004464.jpg,18,0 256 | PF-dataset-PASCAL/JPEGImages/2009_001589.jpg,PF-dataset-PASCAL/JPEGImages/2010_003837.jpg,12,0 257 | PF-dataset-PASCAL/JPEGImages/2010_000952.jpg,PF-dataset-PASCAL/JPEGImages/2010_002844.jpg,7,0 258 | PF-dataset-PASCAL/JPEGImages/2008_005356.jpg,PF-dataset-PASCAL/JPEGImages/2009_004934.jpg,2,0 259 | PF-dataset-PASCAL/JPEGImages/2008_003701.jpg,PF-dataset-PASCAL/JPEGImages/2010_000382.jpg,4,0 260 | PF-dataset-PASCAL/JPEGImages/2009_003383.jpg,PF-dataset-PASCAL/JPEGImages/2010_004625.jpg,6,0 261 | PF-dataset-PASCAL/JPEGImages/2009_005220.jpg,PF-dataset-PASCAL/JPEGImages/2010_001748.jpg,19,0 262 | PF-dataset-PASCAL/JPEGImages/2009_002517.jpg,PF-dataset-PASCAL/JPEGImages/2010_000682.jpg,6,0 263 | PF-dataset-PASCAL/JPEGImages/2010_005664.jpg,PF-dataset-PASCAL/JPEGImages/2010_002180.jpg,2,0 264 | PF-dataset-PASCAL/JPEGImages/2010_002645.jpg,PF-dataset-PASCAL/JPEGImages/2010_002333.jpg,8,0 265 | PF-dataset-PASCAL/JPEGImages/2007_007109.jpg,PF-dataset-PASCAL/JPEGImages/2010_001503.jpg,13,0 266 | PF-dataset-PASCAL/JPEGImages/2010_004409.jpg,PF-dataset-PASCAL/JPEGImages/2010_003861.jpg,18,0 267 | PF-dataset-PASCAL/JPEGImages/2008_005976.jpg,PF-dataset-PASCAL/JPEGImages/2009_001852.jpg,20,0 268 | PF-dataset-PASCAL/JPEGImages/2009_002982.jpg,PF-dataset-PASCAL/JPEGImages/2010_004779.jpg,19,0 269 | PF-dataset-PASCAL/JPEGImages/2010_005546.jpg,PF-dataset-PASCAL/JPEGImages/2010_002132.jpg,7,0 270 | PF-dataset-PASCAL/JPEGImages/2009_000199.jpg,PF-dataset-PASCAL/JPEGImages/2010_001206.jpg,6,0 271 | PF-dataset-PASCAL/JPEGImages/2008_000719.jpg,PF-dataset-PASCAL/JPEGImages/2008_003754.jpg,15,0 272 | PF-dataset-PASCAL/JPEGImages/2009_004203.jpg,PF-dataset-PASCAL/JPEGImages/2010_003816.jpg,1,0 273 | PF-dataset-PASCAL/JPEGImages/2010_004903.jpg,PF-dataset-PASCAL/JPEGImages/2009_004290.jpg,6,0 274 | PF-dataset-PASCAL/JPEGImages/2010_002480.jpg,PF-dataset-PASCAL/JPEGImages/2010_004493.jpg,12,0 275 | PF-dataset-PASCAL/JPEGImages/2007_006212.jpg,PF-dataset-PASCAL/JPEGImages/2010_002577.jpg,1,0 276 | PF-dataset-PASCAL/JPEGImages/2008_007709.jpg,PF-dataset-PASCAL/JPEGImages/2009_002608.jpg,3,0 277 | PF-dataset-PASCAL/JPEGImages/2008_003691.jpg,PF-dataset-PASCAL/JPEGImages/2009_005060.jpg,6,0 278 | PF-dataset-PASCAL/JPEGImages/2011_002006.jpg,PF-dataset-PASCAL/JPEGImages/2010_003350.jpg,2,0 279 | PF-dataset-PASCAL/JPEGImages/2009_002568.jpg,PF-dataset-PASCAL/JPEGImages/2009_004766.jpg,19,0 280 | PF-dataset-PASCAL/JPEGImages/2010_002721.jpg,PF-dataset-PASCAL/JPEGImages/2010_001199.jpg,11,0 281 | PF-dataset-PASCAL/JPEGImages/2010_002824.jpg,PF-dataset-PASCAL/JPEGImages/2010_003879.jpg,12,0 282 | PF-dataset-PASCAL/JPEGImages/2008_001249.jpg,PF-dataset-PASCAL/JPEGImages/2008_004567.jpg,15,0 283 | PF-dataset-PASCAL/JPEGImages/2008_002850.jpg,PF-dataset-PASCAL/JPEGImages/2009_002173.jpg,4,0 284 | PF-dataset-PASCAL/JPEGImages/2008_006170.jpg,PF-dataset-PASCAL/JPEGImages/2010_001242.jpg,12,0 285 | PF-dataset-PASCAL/JPEGImages/2011_000901.jpg,PF-dataset-PASCAL/JPEGImages/2010_003173.jpg,6,0 286 | PF-dataset-PASCAL/JPEGImages/2011_000122.jpg,PF-dataset-PASCAL/JPEGImages/2010_002534.jpg,12,0 287 | PF-dataset-PASCAL/JPEGImages/2009_001484.jpg,PF-dataset-PASCAL/JPEGImages/2010_001555.jpg,8,0 288 | PF-dataset-PASCAL/JPEGImages/2010_000495.jpg,PF-dataset-PASCAL/JPEGImages/2010_003695.jpg,14,0 289 | PF-dataset-PASCAL/JPEGImages/2008_001399.jpg,PF-dataset-PASCAL/JPEGImages/2009_004674.jpg,18,0 290 | PF-dataset-PASCAL/JPEGImages/2008_000401.jpg,PF-dataset-PASCAL/JPEGImages/2010_001331.jpg,8,0 291 | PF-dataset-PASCAL/JPEGImages/2009_004371.jpg,PF-dataset-PASCAL/JPEGImages/2010_004669.jpg,19,0 292 | PF-dataset-PASCAL/JPEGImages/2009_004594.jpg,PF-dataset-PASCAL/JPEGImages/2010_002363.jpg,7,0 293 | PF-dataset-PASCAL/JPEGImages/2008_005215.jpg,PF-dataset-PASCAL/JPEGImages/2009_004942.jpg,13,0 294 | PF-dataset-PASCAL/JPEGImages/2008_006834.jpg,PF-dataset-PASCAL/JPEGImages/2009_004921.jpg,9,0 295 | PF-dataset-PASCAL/JPEGImages/2011_000103.jpg,PF-dataset-PASCAL/JPEGImages/2009_004203.jpg,1,0 296 | PF-dataset-PASCAL/JPEGImages/2010_003826.jpg,PF-dataset-PASCAL/JPEGImages/2010_002631.jpg,12,0 297 | PF-dataset-PASCAL/JPEGImages/2008_001937.jpg,PF-dataset-PASCAL/JPEGImages/2008_004749.jpg,15,0 298 | PF-dataset-PASCAL/JPEGImages/2008_000839.jpg,PF-dataset-PASCAL/JPEGImages/2010_001885.jpg,8,0 299 | PF-dataset-PASCAL/JPEGImages/2008_008212.jpg,PF-dataset-PASCAL/JPEGImages/2010_001077.jpg,13,0 300 | PF-dataset-PASCAL/JPEGImages/2008_007441.jpg,PF-dataset-PASCAL/JPEGImages/2009_004078.jpg,4,0 301 | PF-dataset-PASCAL/JPEGImages/2010_000524.jpg,PF-dataset-PASCAL/JPEGImages/2010_003293.jpg,6,0 302 | PF-dataset-PASCAL/JPEGImages/2011_001004.jpg,PF-dataset-PASCAL/JPEGImages/2010_000198.jpg,7,0 303 | PF-dataset-PASCAL/JPEGImages/2009_004303.jpg,PF-dataset-PASCAL/JPEGImages/2010_000685.jpg,2,0 304 | PF-dataset-PASCAL/JPEGImages/2010_000276.jpg,PF-dataset-PASCAL/JPEGImages/2010_001760.jpg,12,0 305 | PF-dataset-PASCAL/JPEGImages/2010_001731.jpg,PF-dataset-PASCAL/JPEGImages/2010_000485.jpg,9,0 306 | PF-dataset-PASCAL/JPEGImages/2010_000395.jpg,PF-dataset-PASCAL/JPEGImages/2010_001370.jpg,12,0 307 | PF-dataset-PASCAL/JPEGImages/2008_005374.jpg,PF-dataset-PASCAL/JPEGImages/2010_005199.jpg,14,0 308 | PF-dataset-PASCAL/JPEGImages/2011_002131.jpg,PF-dataset-PASCAL/JPEGImages/2010_000213.jpg,5,0 309 | PF-dataset-PASCAL/JPEGImages/2009_002717.jpg,PF-dataset-PASCAL/JPEGImages/2009_004449.jpg,18,0 310 | -------------------------------------------------------------------------------- /dccnet_archi.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuaiyiHuang/DCCNet/e4f1932896153f535ba42c92f74980e3be27cefe/dccnet_archi.jpg -------------------------------------------------------------------------------- /eval_pf_pascal.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import torch 3 | 4 | from models.model_dynamic import DCCNet 5 | from lib.eval_util_dynamic import pfpascal_test_dataloader,pfdataset_pck 6 | 7 | import argparse 8 | 9 | print('DCCNet evaluation script - PF Pascal dataset') 10 | 11 | use_cuda = torch.cuda.is_available() 12 | 13 | # Argument parsing 14 | parser = argparse.ArgumentParser(description='Compute PF Pascal matches') 15 | parser.add_argument('--checkpoint', type=str, default='./trained_models/best_dccnet.pth.tar') 16 | parser.add_argument('--image_size', type=int, default=400) 17 | parser.add_argument('--eval_dataset_path', type=str, default='./datasets/pf-pascal/', help='path to PF Pascal dataset') 18 | parser.add_argument('--pck_alpha', type=float, default=0.1, help='pck alpha for evaluation') 19 | 20 | # DCCNet args 21 | parser.add_argument('--ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in neigh. cons.') 22 | parser.add_argument('--ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in neigh. cons') 23 | 24 | parser.add_argument('--sce_kernel_size',type=int,default=25,help='kernel size in sce.') 25 | parser.add_argument('--sce_hidden_dim',type=int,default=1024,help='hidden dim in sce') 26 | parser.add_argument('--scaleloss_weight',type=float,default=1.0,help='whether use scale loss, if use the weight for scale loss') 27 | parser.add_argument('--att_scale_ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in dynamic fusion net.') 28 | parser.add_argument('--att_scale_ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in dynamic fusion net') 29 | 30 | args = parser.parse_args() 31 | print(args) 32 | # Create model 33 | print('Creating CNN model...') 34 | model = DCCNet(use_cuda=use_cuda, 35 | checkpoint=args.checkpoint, 36 | ncons_kernel_sizes=args.ncons_kernel_sizes, 37 | ncons_channels=args.ncons_channels, 38 | sce_kernel_size=args.sce_kernel_size, 39 | sce_hidden_dim=args.sce_hidden_dim, 40 | att_scale_ncons_kernel_sizes=args.att_scale_ncons_kernel_sizes, 41 | att_scale_ncons_channels=args.att_scale_ncons_channels, 42 | ) 43 | 44 | # Dataset and dataloader 45 | dataloader = pfpascal_test_dataloader(image_size=args.image_size,eval_dataset_path=args.eval_dataset_path) 46 | 47 | pck = pfdataset_pck(dataloader=dataloader, model=model,verbose=True,alpha=args.pck_alpha) 48 | 49 | -------------------------------------------------------------------------------- /eval_pf_willow.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import numpy as np 4 | import torch 5 | from torch.utils.data import DataLoader 6 | 7 | from lib.eval_util_dynamic import pck_metric 8 | from lib.point_tnf_dynamic import corr_to_matches 9 | 10 | from lib.pf_willow_dataset import PFDataset 11 | from lib.normalization import NormalizeImageDict 12 | from lib.torch_util import BatchTensorToVars 13 | from lib.dataloader import default_collate 14 | 15 | from models.model_dynamic import DCCNet 16 | import argparse 17 | 18 | print('DCCNet evaluation script - PF Willow dataset') 19 | 20 | use_cuda = torch.cuda.is_available() 21 | 22 | # Argument parsing 23 | parser = argparse.ArgumentParser(description='Compute PF Willow matches') 24 | parser.add_argument('--checkpoint', type=str, default='./trained_models/best_dccnet.pth.tar') 25 | parser.add_argument('--image_size', type=int, default=400) 26 | parser.add_argument('--eval_dataset_path', type=str, default='./datasets/proposal-flow-willow', help='path to PF Willow dataset') 27 | parser.add_argument('--pck_alpha', type=float, default=0.1, help='pck alpha for evaluation') 28 | 29 | 30 | # DCCNet args 31 | parser.add_argument('--ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in neigh. cons.') 32 | parser.add_argument('--ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in neigh. cons') 33 | 34 | parser.add_argument('--sce_kernel_size',type=int,default=25,help='kernel size in sce.') 35 | parser.add_argument('--sce_hidden_dim',type=int,default=1024,help='hidden dim in sce') 36 | parser.add_argument('--scaleloss_weight',type=float,default=1.0,help='whether use scale loss, if use the weight for scale loss') 37 | parser.add_argument('--att_scale_ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in dynamic fusion net.') 38 | parser.add_argument('--att_scale_ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in dynamic fusion net') 39 | 40 | args = parser.parse_args() 41 | print(args) 42 | # Create model 43 | print('Creating CNN model...') 44 | model = DCCNet(use_cuda=use_cuda, 45 | checkpoint=args.checkpoint, 46 | ncons_kernel_sizes=args.ncons_kernel_sizes, 47 | ncons_channels=args.ncons_channels, 48 | sce_kernel_size=args.sce_kernel_size, 49 | sce_hidden_dim=args.sce_hidden_dim, 50 | att_scale_ncons_kernel_sizes=args.att_scale_ncons_kernel_sizes, 51 | att_scale_ncons_channels=args.att_scale_ncons_channels, 52 | ) 53 | 54 | # Dataset and dataloader 55 | Dataset = PFDataset 56 | collate_fn = default_collate 57 | csv_file = 'test_pairs_pf.csv' 58 | 59 | cnn_image_size = (args.image_size, args.image_size) 60 | 61 | dataset = Dataset(csv_file=os.path.join(args.eval_dataset_path, csv_file), 62 | dataset_path=args.eval_dataset_path, 63 | transform=NormalizeImageDict(['source_image', 'target_image']), 64 | output_size=cnn_image_size) 65 | dataset.pck_procedure = 'scnet' 66 | 67 | # Only batch_size=1 is supported for evaluation 68 | batch_size = 1 69 | 70 | dataloader = DataLoader(dataset, batch_size=batch_size, 71 | shuffle=False, num_workers=0, 72 | collate_fn=collate_fn) 73 | 74 | batch_tnf = BatchTensorToVars(use_cuda=use_cuda) 75 | 76 | model.eval() 77 | 78 | # initialize vector for storing results 79 | stats = {} 80 | stats['point_tnf'] = {} 81 | stats['point_tnf']['pck'] = np.zeros((len(dataset), 1)) 82 | 83 | # Compute 84 | for i, batch in enumerate(dataloader): 85 | batch = batch_tnf(batch) 86 | batch_start_idx = batch_size * i 87 | 88 | out = model(batch) 89 | 90 | # get matches 91 | xA, yA, xB, yB, sB = corr_to_matches(out, do_softmax=True) 92 | 93 | matches = (xA, yA, xB, yB) 94 | stats = pck_metric(batch, batch_start_idx, matches, stats, args, use_cuda,alpha=args.pck_alpha) 95 | 96 | print('Batch: [{}/{} ({:.0f}%)]'.format(i, len(dataloader), 100. * i / len(dataloader))) 97 | 98 | # Print results 99 | results = stats['point_tnf']['pck'] 100 | good_idx = np.flatnonzero((results != -1) * ~np.isnan(results)) 101 | print('Total: ' + str(results.size)) 102 | print('Valid: ' + str(good_idx.size)) 103 | filtered_results = results[good_idx] 104 | print('PCK:', '{:.2%}'.format(np.mean(filtered_results))) 105 | -------------------------------------------------------------------------------- /eval_tss.py: -------------------------------------------------------------------------------- 1 | # Thanks Qiuyue Wang's involvement in TSS evaluation code. 2 | 3 | from __future__ import print_function, division 4 | import os 5 | import numpy as np 6 | import torch 7 | 8 | from torch.utils.data import DataLoader 9 | 10 | from lib.eval_util_dynamic import corr_to_matches 11 | from lib.eval_util_dynamic import flow_metrics 12 | 13 | from lib.normalization import NormalizeImageDict 14 | from lib.torch_util import BatchTensorToVars 15 | 16 | from lib.dataloader import default_collate 17 | from lib.tss_dataset import TSSDataset 18 | 19 | from models.model_dynamic import DCCNet 20 | import argparse 21 | 22 | print('DCCNet evaluation script - TSS dataset') 23 | 24 | use_cuda = torch.cuda.is_available() 25 | 26 | # Argument parsing 27 | parser = argparse.ArgumentParser(description='Compute TSS matches') 28 | parser.add_argument('--checkpoint', type=str, default='./trained_models/best_dccnet.pth.tar') 29 | parser.add_argument('--image_size', type=int, default=400) 30 | parser.add_argument('--eval_dataset_path', type=str, default='./datasets/tss', help='path to TSS dataset') 31 | parser.add_argument('--flow_output_dir', type=str, default='./datasets/dccnet_results') 32 | parser.add_argument('--pck_alpha', type=float, default=0.1, help='pck alpha for evaluation') 33 | 34 | # DCCNet args 35 | parser.add_argument('--ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in neigh. cons.') 36 | parser.add_argument('--ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in neigh. cons') 37 | 38 | parser.add_argument('--sce_kernel_size',type=int,default=25,help='kernel size in sce.') 39 | parser.add_argument('--sce_hidden_dim',type=int,default=1024,help='hidden dim in sce') 40 | parser.add_argument('--scaleloss_weight',type=float,default=1.0,help='whether use scale loss, if use the weight for scale loss') 41 | parser.add_argument('--att_scale_ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in dynamic fusion net.') 42 | parser.add_argument('--att_scale_ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in dynamic fusion net') 43 | 44 | args = parser.parse_args() 45 | print(args) 46 | # Create model 47 | print('Creating CNN model...') 48 | model = DCCNet(use_cuda=use_cuda, 49 | checkpoint=args.checkpoint, 50 | ncons_kernel_sizes=args.ncons_kernel_sizes, 51 | ncons_channels=args.ncons_channels, 52 | sce_kernel_size=args.sce_kernel_size, 53 | sce_hidden_dim=args.sce_hidden_dim, 54 | att_scale_ncons_kernel_sizes=args.att_scale_ncons_kernel_sizes, 55 | att_scale_ncons_channels=args.att_scale_ncons_channels, 56 | ) 57 | 58 | 59 | # Dataset and dataloader 60 | Dataset = TSSDataset 61 | collate_fn = default_collate 62 | csv_file = 'test_pairs_tss.csv' 63 | 64 | cnn_image_size = (args.image_size, args.image_size) 65 | 66 | dataset = Dataset(csv_file=os.path.join(args.eval_dataset_path, csv_file), 67 | dataset_path=args.eval_dataset_path, 68 | transform=NormalizeImageDict(['source_image', 'target_image']), 69 | output_size=cnn_image_size) 70 | dataset.pck_procedure = 'scnet' 71 | 72 | # Only batch_size=1 is supported for evaluation 73 | batch_size = 1 74 | 75 | dataloader = DataLoader(dataset, batch_size=batch_size, 76 | shuffle=False, num_workers=0, 77 | collate_fn=collate_fn) 78 | 79 | batch_tnf = BatchTensorToVars(use_cuda=use_cuda) 80 | 81 | model.eval() 82 | 83 | # initialize vector for storing results 84 | stats = {} 85 | stats['point_tnf'] = {} 86 | stats['point_tnf']['pck'] = np.zeros((len(dataset), 1)) 87 | 88 | # Compute 89 | for i, batch in enumerate(dataloader): 90 | batch = batch_tnf(batch) 91 | batch_start_idx = batch_size * i 92 | 93 | out = model(batch) 94 | 95 | # get matches 96 | xA, yA, xB, yB, sB = corr_to_matches(out, do_softmax=True) 97 | 98 | matches = (xA, yA, xB, yB) 99 | # stats = pck_metric(batch, batch_start_idx, matches, stats, args, use_cuda) 100 | stats = flow_metrics(batch, batch_start_idx, matches, stats, args, use_cuda) 101 | print('Batch: [{}/{} ({:.0f}%)]'.format(i, len(dataloader), 100. * i / len(dataloader))) 102 | 103 | # Print results 104 | results = stats['point_tnf']['pck'] 105 | good_idx = np.flatnonzero((results != -1) * ~np.isnan(results)) 106 | print('Total: ' + str(results.size)) 107 | print('Valid: ' + str(good_idx.size)) 108 | filtered_results = results[good_idx] 109 | print('Flow files have been saved to '+args.flow_output_dir) 110 | -------------------------------------------------------------------------------- /geotnf/flow.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | from geotnf.point_tnf import normalize_axis, unnormalize_axis 6 | 7 | def read_flo_file(filename,verbose=False): 8 | """ 9 | Read from .flo optical flow file (Middlebury format) 10 | :param flow_file: name of the flow file 11 | :return: optical flow data in matrix 12 | 13 | adapted from https://github.com/liruoteng/OpticalFlowToolkit/ 14 | 15 | """ 16 | f = open(filename, 'rb') 17 | magic = np.fromfile(f, np.float32, count=1) 18 | data2d = None 19 | 20 | if 202021.25 != magic: 21 | raise TypeError('Magic number incorrect. Invalid .flo file') 22 | else: 23 | w = np.fromfile(f, np.int32, count=1) 24 | h = np.fromfile(f, np.int32, count=1) 25 | if verbose: 26 | print("Reading %d x %d flow file in .flo format" % (h, w)) 27 | data2d = np.fromfile(f, np.float32, count=int(2 * w * h)) 28 | # reshape data into 3D array (columns, rows, channels) 29 | data2d = np.resize(data2d, (h[0], w[0], 2)) 30 | f.close() 31 | return data2d 32 | 33 | def write_flo_file(flow, filename): 34 | """ 35 | Write optical flow in Middlebury .flo format 36 | 37 | :param flow: optical flow map 38 | :param filename: optical flow file path to be saved 39 | :return: None 40 | 41 | from https://github.com/liruoteng/OpticalFlowToolkit/ 42 | 43 | """ 44 | # forcing conversion to float32 precision 45 | flow = flow.astype(np.float32) 46 | f = open(filename, 'wb') 47 | magic = np.array([202021.25], dtype=np.float32) 48 | (height, width) = flow.shape[0:2] 49 | w = np.array([width], dtype=np.int32) 50 | h = np.array([height], dtype=np.int32) 51 | magic.tofile(f) 52 | w.tofile(f) 53 | h.tofile(f) 54 | flow.tofile(f) 55 | f.close() 56 | 57 | 58 | def warp_image(image, flow): 59 | """ 60 | Warp image (np.ndarray, shape=[h_src,w_src,3]) with flow (np.ndarray, shape=[h_tgt,w_tgt,2]) 61 | 62 | """ 63 | h_src,w_src=image.shape[0],image.shape[1] 64 | sampling_grid_torch = np_flow_to_th_sampling_grid(flow, h_src, w_src) 65 | image_torch = Variable(torch.FloatTensor(image.astype(np.float32)).transpose(1,2).transpose(0,1).unsqueeze(0)) 66 | warped_image_torch = F.grid_sample(image_torch, sampling_grid_torch) 67 | warped_image = warped_image_torch.data.squeeze(0).transpose(0,1).transpose(1,2).numpy().astype(np.uint8) 68 | return warped_image 69 | 70 | def np_flow_to_th_sampling_grid(flow,h_src,w_src,use_cuda=False): 71 | h_tgt,w_tgt=flow.shape[0],flow.shape[1] 72 | grid_x,grid_y = np.meshgrid(range(1,w_tgt+1),range(1,h_tgt+1)) 73 | disp_x=flow[:,:,0] 74 | disp_y=flow[:,:,1] 75 | source_x=grid_x+disp_x 76 | source_y=grid_y+disp_y 77 | source_x_norm=normalize_axis(source_x,w_src) 78 | source_y_norm=normalize_axis(source_y,h_src) 79 | sampling_grid=np.concatenate((np.expand_dims(source_x_norm,2), 80 | np.expand_dims(source_y_norm,2)),2) 81 | sampling_grid_torch = Variable(torch.FloatTensor(sampling_grid).unsqueeze(0)) 82 | if use_cuda: 83 | sampling_grid_torch = sampling_grid_torch.cuda() 84 | return sampling_grid_torch 85 | 86 | # def th_sampling_grid_to_np_flow(source_grid,h_src,w_src): 87 | # batch_size = source_grid.size(0) 88 | # h_tgt,w_tgt=source_grid.size(1),source_grid.size(2) 89 | # source_x_norm=source_grid[:,:,:,0] 90 | # source_y_norm=source_grid[:,:,:,1] 91 | # source_x=unnormalize_axis(source_x_norm,w_src) 92 | # source_y=unnormalize_axis(source_y_norm,h_src) 93 | # source_x=source_x.data.cpu().numpy() 94 | # source_y=source_y.data.cpu().numpy() 95 | # grid_x,grid_y = np.meshgrid(range(1,w_tgt+1),range(1,h_tgt+1)) 96 | # grid_x = np.repeat(grid_x,axis=0,repeats=batch_size) 97 | # grid_y = np.repeat(grid_y,axis=0,repeats=batch_size) 98 | # disp_x=source_x-grid_x 99 | # disp_y=source_y-grid_y 100 | # flow = np.concatenate((np.expand_dims(disp_x,3),np.expand_dims(disp_y,3)),3) 101 | # return flow 102 | 103 | def th_sampling_grid_to_np_flow(source_grid,h_src,w_src): 104 | # remove batch dimension 105 | source_grid = source_grid.squeeze(0) 106 | # get mask 107 | in_bound_mask=(source_grid.data[:,:,0]>-1) & (source_grid.data[:,:,0]<1) & (source_grid.data[:,:,1]>-1) & (source_grid.data[:,:,1]<1) 108 | in_bound_mask=in_bound_mask.cpu().numpy() 109 | # convert coords 110 | h_tgt,w_tgt=source_grid.size(0),source_grid.size(1) 111 | source_x_norm=source_grid[:,:,0] 112 | source_y_norm=source_grid[:,:,1] 113 | source_x=unnormalize_axis(source_x_norm,w_src) 114 | source_y=unnormalize_axis(source_y_norm,h_src) 115 | source_x=source_x.data.cpu().numpy() 116 | source_y=source_y.data.cpu().numpy() 117 | grid_x,grid_y = np.meshgrid(range(1,w_tgt+1),range(1,h_tgt+1)) 118 | disp_x=source_x-grid_x 119 | disp_y=source_y-grid_y 120 | # apply mask 121 | disp_x = disp_x*in_bound_mask+1e10*(1-in_bound_mask) 122 | disp_y = disp_y*in_bound_mask+1e10*(1-in_bound_mask) 123 | flow = np.concatenate((np.expand_dims(disp_x,2),np.expand_dims(disp_y,2)),2) 124 | return flow 125 | 126 | -------------------------------------------------------------------------------- /geotnf/point_tnf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import numpy as np 4 | from geotnf.transformation import TpsGridGen 5 | 6 | def normalize_axis(x,L): 7 | return (x-1-(L-1)/2)*2/(L-1) 8 | 9 | def unnormalize_axis(x,L): 10 | return x*(L-1)/2+1+(L-1)/2 11 | 12 | class PointTnf(object): 13 | """ 14 | 15 | Class with functions for transforming a set of points with affine/tps transformations 16 | 17 | """ 18 | def __init__(self, tps_grid_size=3, tps_reg_factor=0, use_cuda=True): 19 | self.use_cuda=use_cuda 20 | self.tpsTnf = TpsGridGen(grid_size=tps_grid_size, 21 | reg_factor=tps_reg_factor, 22 | use_cuda=self.use_cuda) 23 | 24 | def tpsPointTnf(self,theta,points): 25 | # points are expected in [B,2,N], where first row is X and second row is Y 26 | # reshape points for applying Tps transformation 27 | points=points.unsqueeze(3).transpose(1,3) 28 | # apply transformation 29 | warped_points = self.tpsTnf.apply_transformation(theta,points) 30 | # undo reshaping 31 | warped_points=warped_points.transpose(3,1).squeeze(3) 32 | return warped_points 33 | 34 | def affPointTnf(self,theta,points): 35 | theta_mat = theta.view(-1,2,3) 36 | warped_points = torch.bmm(theta_mat[:,:,:2],points) 37 | warped_points += theta_mat[:,:,2].unsqueeze(2).expand_as(warped_points) 38 | return warped_points 39 | 40 | def PointsToUnitCoords(P,im_size): 41 | h,w = im_size[:,0],im_size[:,1] 42 | P_norm = P.clone() 43 | # normalize Y 44 | P_norm[:,0,:] = normalize_axis(P[:,0,:],w.unsqueeze(1).expand_as(P[:,0,:])) 45 | # normalize X 46 | P_norm[:,1,:] = normalize_axis(P[:,1,:],h.unsqueeze(1).expand_as(P[:,1,:])) 47 | return P_norm 48 | 49 | def PointsToPixelCoords(P,im_size): 50 | h,w = im_size[:,0],im_size[:,1] 51 | P_norm = P.clone() 52 | # normalize Y 53 | P_norm[:,0,:] = unnormalize_axis(P[:,0,:],w.unsqueeze(1).expand_as(P[:,0,:])) 54 | # normalize X 55 | P_norm[:,1,:] = unnormalize_axis(P[:,1,:],h.unsqueeze(1).expand_as(P[:,1,:])) 56 | return P_norm -------------------------------------------------------------------------------- /geotnf/transformation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import sys 4 | from skimage import io 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | from torch.nn.modules.module import Module 9 | from torch.utils.data import Dataset 10 | from torch.autograd import Variable 11 | import torch.nn.functional as F 12 | from lib.torch_util import expand_dim 13 | 14 | class ComposedGeometricTnf(object): 15 | """ 16 | 17 | Composed geometric transfromation (affine+tps) 18 | 19 | """ 20 | def __init__(self, tps_grid_size=3, tps_reg_factor=0, out_h=240, out_w=240, 21 | offset_factor=1.0, 22 | padding_crop_factor=None, 23 | use_cuda=True): 24 | 25 | self.padding_crop_factor=padding_crop_factor 26 | 27 | self.affTnf = GeometricTnf(out_h=out_h,out_w=out_w, 28 | geometric_model='affine', 29 | offset_factor=offset_factor if padding_crop_factor is None else padding_crop_factor, 30 | use_cuda=use_cuda) 31 | 32 | self.tpsTnf = GeometricTnf(out_h=out_h,out_w=out_w, 33 | geometric_model='tps', 34 | tps_grid_size=tps_grid_size, 35 | tps_reg_factor=tps_reg_factor, 36 | offset_factor=offset_factor if padding_crop_factor is None else 1.0, 37 | use_cuda=use_cuda) 38 | 39 | def __call__(self, image_batch, theta_aff, theta_aff_tps, use_cuda=True): 40 | 41 | sampling_grid_aff = self.affTnf(image_batch=None, 42 | theta_batch=theta_aff.view(-1,2,3), 43 | return_sampling_grid=True, 44 | return_warped_image=False) 45 | 46 | sampling_grid_aff_tps = self.tpsTnf(image_batch=None, 47 | theta_batch=theta_aff_tps, 48 | return_sampling_grid=True, 49 | return_warped_image=False) 50 | 51 | if self.padding_crop_factor is not None: 52 | sampling_grid_aff_tps = sampling_grid_aff_tps*self.padding_crop_factor; 53 | 54 | # put 1e10 value in region out of bounds of sampling_grid_aff 55 | in_bound_mask_aff = ((sampling_grid_aff[:,:,:,0]>-1) * (sampling_grid_aff[:,:,:,0]<1) * (sampling_grid_aff[:,:,:,1]>-1) * (sampling_grid_aff[:,:,:,1]<1)).unsqueeze(3) 56 | in_bound_mask_aff = in_bound_mask_aff.expand_as(sampling_grid_aff) 57 | sampling_grid_aff = torch.mul(in_bound_mask_aff.float(),sampling_grid_aff) 58 | sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),sampling_grid_aff) 59 | 60 | # compose transformations 61 | sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3) 62 | 63 | # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp 64 | in_bound_mask_aff_tps=((sampling_grid_aff_tps[:,:,:,0]>-1) * (sampling_grid_aff_tps[:,:,:,0]<1) * (sampling_grid_aff_tps[:,:,:,1]>-1) * (sampling_grid_aff_tps[:,:,:,1]<1)).unsqueeze(3) 65 | in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp) 66 | sampling_grid_aff_tps_comp=torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp) 67 | sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),sampling_grid_aff_tps_comp) 68 | 69 | # sample transformed image 70 | warped_image_batch = F.grid_sample(image_batch, sampling_grid_aff_tps_comp) 71 | 72 | return warped_image_batch 73 | 74 | class GeometricTnf(object): 75 | """ 76 | 77 | Geometric transfromation to an image batch (wrapped in a PyTorch Variable) 78 | ( can be used with no transformation to perform bilinear resizing ) 79 | 80 | """ 81 | def __init__(self, geometric_model='affine', tps_grid_size=3, tps_reg_factor=0, out_h=240, out_w=240, offset_factor=None, use_cuda=True): 82 | self.out_h = out_h 83 | self.out_w = out_w 84 | self.geometric_model = geometric_model 85 | self.use_cuda = use_cuda 86 | self.offset_factor = offset_factor 87 | 88 | if geometric_model=='affine' and offset_factor is None: 89 | self.gridGen = AffineGridGen(out_h=out_h, out_w=out_w, use_cuda=use_cuda) 90 | elif geometric_model=='affine' and offset_factor is not None: 91 | self.gridGen = AffineGridGenV2(out_h=out_h, out_w=out_w, use_cuda=use_cuda) 92 | elif geometric_model=='tps': 93 | self.gridGen = TpsGridGen(out_h=out_h, out_w=out_w, grid_size=tps_grid_size, 94 | reg_factor=tps_reg_factor, use_cuda=use_cuda) 95 | if offset_factor is not None: 96 | self.gridGen.grid_X=self.gridGen.grid_X/offset_factor 97 | self.gridGen.grid_Y=self.gridGen.grid_Y/offset_factor 98 | 99 | self.theta_identity = torch.Tensor(np.expand_dims(np.array([[1,0,0],[0,1,0]]),0).astype(np.float32)) 100 | if use_cuda: 101 | self.theta_identity = self.theta_identity.cuda() 102 | 103 | def __call__(self, image_batch, theta_batch=None, out_h=None, out_w=None, return_warped_image=True, return_sampling_grid=False, padding_factor=1.0, crop_factor=1.0): 104 | if image_batch is None: 105 | b=1 106 | else: 107 | b=image_batch.size(0) 108 | if theta_batch is None: 109 | theta_batch = self.theta_identity 110 | theta_batch = theta_batch.expand(b,2,3).contiguous() 111 | theta_batch = Variable(theta_batch,requires_grad=False) 112 | 113 | # check if output dimensions have been specified at call time and have changed 114 | if (out_h is not None and out_w is not None) and (out_h!=self.out_h or out_w!=self.out_w): 115 | if self.geometric_model=='affine': 116 | gridGen = AffineGridGen(out_h, out_w) 117 | elif self.geometric_model=='tps': 118 | gridGen = TpsGridGen(out_h, out_w, use_cuda=self.use_cuda) 119 | else: 120 | gridGen = self.gridGen 121 | 122 | sampling_grid = gridGen(theta_batch) 123 | 124 | # rescale grid according to crop_factor and padding_factor 125 | if padding_factor != 1 or crop_factor !=1: 126 | sampling_grid = sampling_grid*(padding_factor*crop_factor) 127 | # rescale grid according to offset_factor 128 | if self.offset_factor is not None: 129 | sampling_grid = sampling_grid*self.offset_factor 130 | 131 | if return_sampling_grid and not return_warped_image: 132 | return sampling_grid 133 | 134 | # sample transformed image 135 | warped_image_batch = F.grid_sample(image_batch, sampling_grid) 136 | 137 | if return_sampling_grid and return_warped_image: 138 | return (warped_image_batch,sampling_grid) 139 | 140 | return warped_image_batch 141 | 142 | 143 | 144 | class SynthPairTnf(object): 145 | """ 146 | 147 | Generate a synthetically warped training pair using an affine transformation. 148 | 149 | """ 150 | def __init__(self, use_cuda=True, supervision='strong', geometric_model='affine', crop_factor=9/16, output_size=(240,240), padding_factor = 0.5): 151 | assert isinstance(use_cuda, (bool)) 152 | assert isinstance(crop_factor, (float)) 153 | assert isinstance(output_size, (tuple)) 154 | assert isinstance(padding_factor, (float)) 155 | self.supervision=supervision 156 | self.use_cuda=use_cuda 157 | self.crop_factor = crop_factor 158 | self.padding_factor = padding_factor 159 | self.out_h, self.out_w = output_size 160 | self.rescalingTnf = GeometricTnf('affine', out_h=self.out_h, out_w=self.out_w, 161 | use_cuda = self.use_cuda) 162 | self.geometricTnf = GeometricTnf(geometric_model, out_h=self.out_h, out_w=self.out_w, 163 | use_cuda = self.use_cuda) 164 | 165 | 166 | def __call__(self, batch): 167 | image_batch, theta_batch = batch['image'], batch['theta'] 168 | if self.use_cuda: 169 | image_batch = image_batch.cuda() 170 | theta_batch = theta_batch.cuda() 171 | 172 | b, c, h, w = image_batch.size() 173 | 174 | # generate symmetrically padded image for bigger sampling region 175 | image_batch = self.symmetricImagePad(image_batch,self.padding_factor) 176 | 177 | # convert to variables 178 | image_batch = Variable(image_batch,requires_grad=False) 179 | theta_batch = Variable(theta_batch,requires_grad=False) 180 | 181 | # get cropped image 182 | cropped_image_batch = self.rescalingTnf(image_batch=image_batch, 183 | theta_batch=None, 184 | padding_factor=self.padding_factor, 185 | crop_factor=self.crop_factor) # Identity is used as no theta given 186 | # get transformed image 187 | warped_image_batch = self.geometricTnf(image_batch=image_batch, 188 | theta_batch=theta_batch, 189 | padding_factor=self.padding_factor, 190 | crop_factor=self.crop_factor) # Identity is used as no theta given 191 | 192 | if self.supervision=='strong': 193 | return {'source_image': cropped_image_batch, 'target_image': warped_image_batch, 'theta_GT': theta_batch} 194 | 195 | elif self.supervision=='weak': 196 | pos_batch_idx = torch.LongTensor(range(int(b/2))) 197 | neg_batch_idx = torch.LongTensor(range(int(b/2),b)) 198 | if self.use_cuda: 199 | pos_batch_idx = pos_batch_idx.cuda() 200 | neg_batch_idx = neg_batch_idx.cuda() 201 | source_image = torch.cat((torch.index_select(cropped_image_batch,0,pos_batch_idx), 202 | torch.index_select(cropped_image_batch,0,pos_batch_idx)),0) 203 | target_image = torch.cat((torch.index_select(warped_image_batch,0,pos_batch_idx), 204 | torch.index_select(cropped_image_batch,0,neg_batch_idx)),0) 205 | return {'source_image': source_image, 'target_image': target_image, 'theta_GT': theta_batch} 206 | 207 | def symmetricImagePad(self, image_batch, padding_factor): 208 | b, c, h, w = image_batch.size() 209 | pad_h, pad_w = int(h*padding_factor), int(w*padding_factor) 210 | idx_pad_left = torch.LongTensor(range(pad_w-1,-1,-1)) 211 | idx_pad_right = torch.LongTensor(range(w-1,w-pad_w-1,-1)) 212 | idx_pad_top = torch.LongTensor(range(pad_h-1,-1,-1)) 213 | idx_pad_bottom = torch.LongTensor(range(h-1,h-pad_h-1,-1)) 214 | if self.use_cuda: 215 | idx_pad_left = idx_pad_left.cuda() 216 | idx_pad_right = idx_pad_right.cuda() 217 | idx_pad_top = idx_pad_top.cuda() 218 | idx_pad_bottom = idx_pad_bottom.cuda() 219 | image_batch = torch.cat((image_batch.index_select(3,idx_pad_left),image_batch, 220 | image_batch.index_select(3,idx_pad_right)),3) 221 | image_batch = torch.cat((image_batch.index_select(2,idx_pad_top),image_batch, 222 | image_batch.index_select(2,idx_pad_bottom)),2) 223 | return image_batch 224 | 225 | class SynthTwoStageTnf(SynthPairTnf): 226 | def __init__(self, use_cuda=True, crop_factor=9/16, output_size=(240,240), padding_factor = 0.5): 227 | super().__init__(use_cuda=use_cuda) 228 | # self.aff_reorder_idx=torch.LongTensor([3,2,5,1,0,4]) 229 | self.geometricTnf = ComposedGeometricTnf(padding_crop_factor=padding_factor*crop_factor,use_cuda=self.use_cuda) 230 | 231 | def __call__(self, batch): 232 | image_batch, theta_batch = batch['image'], batch['theta'] 233 | # theta_aff=torch.index_select(theta_batch[:,:6],1,self.aff_reorder_idx) 234 | theta_aff=theta_batch[:,:6].contiguous() 235 | theta_tps=theta_batch[:,6:] 236 | 237 | if self.use_cuda: 238 | image_batch = image_batch.cuda() 239 | theta_aff = theta_aff.cuda() 240 | theta_tps = theta_tps.cuda() 241 | 242 | b, c, h, w = image_batch.size() 243 | 244 | # generate symmetrically padded image for bigger sampling region 245 | image_batch = self.symmetricImagePad(image_batch,self.padding_factor) 246 | 247 | # convert to variables 248 | image_batch = Variable(image_batch,requires_grad=False) 249 | theta_aff = Variable(theta_aff,requires_grad=False) 250 | theta_tps = Variable(theta_tps,requires_grad=False) 251 | 252 | # get cropped image 253 | cropped_image_batch = self.rescalingTnf(image_batch=image_batch, 254 | theta_batch=None, 255 | padding_factor=self.padding_factor, 256 | crop_factor=self.crop_factor) # Identity is used as no theta given 257 | # get transformed image 258 | warped_image_batch = self.geometricTnf(image_batch=image_batch, 259 | theta_aff=theta_aff, 260 | theta_aff_tps=theta_tps) 261 | 262 | return {'source_image': cropped_image_batch, 'target_image': warped_image_batch, 'theta_GT_aff': theta_aff, 'theta_GT_tps': theta_tps} 263 | 264 | class SynthTwoStageTwoPairTnf(SynthPairTnf): 265 | def __init__(self, use_cuda=True, crop_factor=9/16, output_size=(240,240), padding_factor = 0.5): 266 | super().__init__(use_cuda=use_cuda) 267 | # self.aff_reorder_idx=torch.LongTensor([3,2,5,1,0,4]) 268 | self.geometricTnf = ComposedGeometricTnf(padding_crop_factor=padding_factor*crop_factor,use_cuda=self.use_cuda) 269 | self.affTnf = GeometricTnf(geometric_model='affine', out_h=self.out_h, out_w=self.out_w, 270 | use_cuda = self.use_cuda) 271 | self.tpsTnf = GeometricTnf(geometric_model='tps', out_h=self.out_h, out_w=self.out_w, 272 | use_cuda = self.use_cuda) 273 | 274 | def __call__(self, batch): 275 | image_batch, theta_batch = batch['image'], batch['theta'] 276 | theta_aff=theta_batch[:,:6].contiguous() 277 | # theta_aff=torch.index_select(theta_batch[:,:6],1,self.aff_reorder_idx) 278 | theta_tps=theta_batch[:,6:] 279 | 280 | if self.use_cuda: 281 | image_batch = image_batch.cuda() 282 | theta_aff = theta_aff.cuda() 283 | theta_tps = theta_tps.cuda() 284 | 285 | b, c, h, w = image_batch.size() 286 | 287 | # generate symmetrically padded image for bigger sampling region 288 | image_batch = self.symmetricImagePad(image_batch,self.padding_factor) 289 | 290 | # convert to variables 291 | image_batch = Variable(image_batch,requires_grad=False) 292 | theta_aff = Variable(theta_aff,requires_grad=False) 293 | theta_tps = Variable(theta_tps,requires_grad=False) 294 | 295 | # get cropped image 296 | cropped_image_batch = self.rescalingTnf(image_batch=image_batch, 297 | theta_batch=None, 298 | padding_factor=self.padding_factor, 299 | crop_factor=self.crop_factor) # Identity is used as no theta given 300 | # get transformed image 301 | target_image_tps = self.geometricTnf(image_batch=image_batch, 302 | theta_aff=theta_aff, 303 | theta_aff_tps=theta_tps) 304 | 305 | target_image_aff = self.affTnf(image_batch=image_batch, 306 | theta_batch=theta_aff, 307 | padding_factor=self.padding_factor, 308 | crop_factor=self.crop_factor) 309 | 310 | source_image_tps = self.affTnf(image_batch=cropped_image_batch, 311 | theta_batch=theta_aff, 312 | padding_factor=1.0, 313 | crop_factor=1.0) 314 | 315 | return {'source_image_aff': cropped_image_batch, 316 | 'target_image_aff': target_image_aff, 317 | 'source_image_tps': source_image_tps, 318 | 'target_image_tps': target_image_tps, 319 | 'theta_GT_aff': theta_aff, 320 | 'theta_GT_tps': theta_tps} 321 | 322 | class SynthTwoPairTnf(SynthPairTnf): 323 | def __init__(self, use_cuda=True, crop_factor=9/16, output_size=(240,240), padding_factor = 0.5): 324 | super().__init__(use_cuda=use_cuda) 325 | # self.aff_reorder_idx=torch.LongTensor([3,2,5,1,0,4]) 326 | self.affTnf = GeometricTnf(geometric_model='affine', out_h=self.out_h, out_w=self.out_w, 327 | use_cuda = self.use_cuda) 328 | self.tpsTnf = GeometricTnf(geometric_model='tps', out_h=self.out_h, out_w=self.out_w, 329 | use_cuda = self.use_cuda) 330 | 331 | def __call__(self, batch): 332 | image_batch, theta_batch = batch['image'], batch['theta'] 333 | # theta_aff=torch.index_select(theta_batch[:,:6],1,self.aff_reorder_idx) 334 | theta_aff=theta_batch[:,:6].contiguous() 335 | theta_tps=theta_batch[:,6:] 336 | 337 | if self.use_cuda: 338 | image_batch = image_batch.cuda() 339 | theta_aff = theta_aff.cuda() 340 | theta_tps = theta_tps.cuda() 341 | 342 | b, c, h, w = image_batch.size() 343 | 344 | # generate symmetrically padded image for bigger sampling region 345 | image_batch = self.symmetricImagePad(image_batch,self.padding_factor) 346 | 347 | # convert to variables 348 | image_batch = Variable(image_batch,requires_grad=False) 349 | theta_aff = Variable(theta_aff,requires_grad=False) 350 | theta_tps = Variable(theta_tps,requires_grad=False) 351 | 352 | # get cropped image 353 | cropped_image_batch = self.rescalingTnf(image_batch=image_batch, 354 | theta_batch=None, 355 | padding_factor=self.padding_factor, 356 | crop_factor=self.crop_factor) # Identity is used as no theta given 357 | # get transformed image 358 | warped_image_aff = self.affTnf(image_batch=image_batch, 359 | theta_batch=theta_aff, 360 | padding_factor=self.padding_factor, 361 | crop_factor=self.crop_factor) 362 | 363 | warped_image_tps = self.tpsTnf(image_batch=image_batch, 364 | theta_batch=theta_tps, 365 | padding_factor=self.padding_factor, 366 | crop_factor=self.crop_factor) 367 | 368 | return {'source_image': cropped_image_batch, 'target_image_aff': warped_image_aff, 'target_image_tps': warped_image_tps, 'theta_GT_aff': theta_aff, 'theta_GT_tps': theta_tps} 369 | 370 | 371 | class AffineGridGen(Module): 372 | def __init__(self, out_h=240, out_w=240, out_ch = 3, use_cuda=True): 373 | super(AffineGridGen, self).__init__() 374 | self.out_h = out_h 375 | self.out_w = out_w 376 | self.out_ch = out_ch 377 | 378 | def forward(self, theta): 379 | b=theta.size()[0] 380 | if not theta.size()==(b,2,3): 381 | theta = theta.view(-1,2,3) 382 | theta = theta.contiguous() 383 | batch_size = theta.size()[0] 384 | out_size = torch.Size((batch_size,self.out_ch,self.out_h,self.out_w)) 385 | return F.affine_grid(theta, out_size) 386 | 387 | class AffineGridGenV2(Module): 388 | def __init__(self, out_h=240, out_w=240, use_cuda=True): 389 | super(AffineGridGenV2, self).__init__() 390 | self.out_h, self.out_w = out_h, out_w 391 | self.use_cuda = use_cuda 392 | 393 | # create grid in numpy 394 | # self.grid = np.zeros( [self.out_h, self.out_w, 3], dtype=np.float32) 395 | # sampling grid with dim-0 coords (Y) 396 | self.grid_X,self.grid_Y = np.meshgrid(np.linspace(-1,1,out_w),np.linspace(-1,1,out_h)) 397 | # grid_X,grid_Y: size [1,H,W,1,1] 398 | self.grid_X = torch.FloatTensor(self.grid_X).unsqueeze(0).unsqueeze(3) 399 | self.grid_Y = torch.FloatTensor(self.grid_Y).unsqueeze(0).unsqueeze(3) 400 | self.grid_X = Variable(self.grid_X,requires_grad=False) 401 | self.grid_Y = Variable(self.grid_Y,requires_grad=False) 402 | if use_cuda: 403 | self.grid_X = self.grid_X.cuda() 404 | self.grid_Y = self.grid_Y.cuda() 405 | 406 | def forward(self, theta): 407 | b=theta.size(0) 408 | if not theta.size()==(b,6): 409 | theta = theta.view(b,6) 410 | theta = theta.contiguous() 411 | 412 | t0=theta[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3) 413 | t1=theta[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3) 414 | t2=theta[:,2].unsqueeze(1).unsqueeze(2).unsqueeze(3) 415 | t3=theta[:,3].unsqueeze(1).unsqueeze(2).unsqueeze(3) 416 | t4=theta[:,4].unsqueeze(1).unsqueeze(2).unsqueeze(3) 417 | t5=theta[:,5].unsqueeze(1).unsqueeze(2).unsqueeze(3) 418 | X = expand_dim(self.grid_X,0,b) 419 | Y = expand_dim(self.grid_Y,0,b) 420 | Xp = X*t0 + Y*t1 + t2 421 | Yp = X*t3 + Y*t4 + t5 422 | 423 | return torch.cat((Xp,Yp),3) 424 | 425 | class TpsGridGen(Module): 426 | def __init__(self, out_h=240, out_w=240, use_regular_grid=True, grid_size=3, reg_factor=0, use_cuda=True): 427 | super(TpsGridGen, self).__init__() 428 | self.out_h, self.out_w = out_h, out_w 429 | self.reg_factor = reg_factor 430 | self.use_cuda = use_cuda 431 | 432 | # create grid in numpy 433 | # self.grid = np.zeros( [self.out_h, self.out_w, 3], dtype=np.float32) 434 | # sampling grid with dim-0 coords (Y) 435 | self.grid_X,self.grid_Y = np.meshgrid(np.linspace(-1,1,out_w),np.linspace(-1,1,out_h)) 436 | # grid_X,grid_Y: size [1,H,W,1,1] 437 | self.grid_X = torch.FloatTensor(self.grid_X).unsqueeze(0).unsqueeze(3) 438 | self.grid_Y = torch.FloatTensor(self.grid_Y).unsqueeze(0).unsqueeze(3) 439 | self.grid_X = Variable(self.grid_X,requires_grad=False) 440 | self.grid_Y = Variable(self.grid_Y,requires_grad=False) 441 | if use_cuda: 442 | self.grid_X = self.grid_X.cuda() 443 | self.grid_Y = self.grid_Y.cuda() 444 | 445 | # initialize regular grid for control points P_i 446 | if use_regular_grid: 447 | axis_coords = np.linspace(-1,1,grid_size) 448 | self.N = grid_size*grid_size 449 | P_Y,P_X = np.meshgrid(axis_coords,axis_coords) 450 | P_X = np.reshape(P_X,(-1,1)) # size (N,1) 451 | P_Y = np.reshape(P_Y,(-1,1)) # size (N,1) 452 | P_X = torch.FloatTensor(P_X) 453 | P_Y = torch.FloatTensor(P_Y) 454 | self.Li = Variable(self.compute_L_inverse(P_X,P_Y).unsqueeze(0),requires_grad=False) 455 | self.P_X = P_X.unsqueeze(2).unsqueeze(3).unsqueeze(4).transpose(0,4) 456 | self.P_Y = P_Y.unsqueeze(2).unsqueeze(3).unsqueeze(4).transpose(0,4) 457 | self.P_X = Variable(self.P_X,requires_grad=False) 458 | self.P_Y = Variable(self.P_Y,requires_grad=False) 459 | if use_cuda: 460 | self.P_X = self.P_X.cuda() 461 | self.P_Y = self.P_Y.cuda() 462 | 463 | 464 | def forward(self, theta): 465 | warped_grid = self.apply_transformation(theta,torch.cat((self.grid_X,self.grid_Y),3)) 466 | 467 | return warped_grid 468 | 469 | def compute_L_inverse(self,X,Y): 470 | N = X.size()[0] # num of points (along dim 0) 471 | # construct matrix K 472 | Xmat = X.expand(N,N) 473 | Ymat = Y.expand(N,N) 474 | P_dist_squared = torch.pow(Xmat-Xmat.transpose(0,1),2)+torch.pow(Ymat-Ymat.transpose(0,1),2) 475 | P_dist_squared[P_dist_squared==0]=1 # make diagonal 1 to avoid NaN in log computation 476 | K = torch.mul(P_dist_squared,torch.log(P_dist_squared)) 477 | if self.reg_factor != 0: 478 | K+=torch.eye(K.size(0),K.size(1))*self.reg_factor 479 | # construct matrix L 480 | O = torch.FloatTensor(N,1).fill_(1) 481 | Z = torch.FloatTensor(3,3).fill_(0) 482 | P = torch.cat((O,X,Y),1) 483 | L = torch.cat((torch.cat((K,P),1),torch.cat((P.transpose(0,1),Z),1)),0) 484 | Li = torch.inverse(L) 485 | if self.use_cuda: 486 | Li = Li.cuda() 487 | return Li 488 | 489 | def apply_transformation(self,theta,points): 490 | if theta.dim()==2: 491 | theta = theta.unsqueeze(2).unsqueeze(3) 492 | # points should be in the [B,H,W,2] format, 493 | # where points[:,:,:,0] are the X coords 494 | # and points[:,:,:,1] are the Y coords 495 | 496 | # input are the corresponding control points P_i 497 | batch_size = theta.size()[0] 498 | # split theta into point coordinates 499 | Q_X=theta[:,:self.N,:,:].squeeze(3) 500 | Q_Y=theta[:,self.N:,:,:].squeeze(3) 501 | 502 | # get spatial dimensions of points 503 | points_b = points.size()[0] 504 | points_h = points.size()[1] 505 | points_w = points.size()[2] 506 | 507 | # repeat pre-defined control points along spatial dimensions of points to be transformed 508 | P_X = self.P_X.expand((1,points_h,points_w,1,self.N)) 509 | P_Y = self.P_Y.expand((1,points_h,points_w,1,self.N)) 510 | 511 | # compute weigths for non-linear part 512 | W_X = torch.bmm(self.Li[:,:self.N,:self.N].expand((batch_size,self.N,self.N)),Q_X) 513 | W_Y = torch.bmm(self.Li[:,:self.N,:self.N].expand((batch_size,self.N,self.N)),Q_Y) 514 | # reshape 515 | # W_X,W,Y: size [B,H,W,1,N] 516 | W_X = W_X.unsqueeze(3).unsqueeze(4).transpose(1,4).repeat(1,points_h,points_w,1,1) 517 | W_Y = W_Y.unsqueeze(3).unsqueeze(4).transpose(1,4).repeat(1,points_h,points_w,1,1) 518 | # compute weights for affine part 519 | A_X = torch.bmm(self.Li[:,self.N:,:self.N].expand((batch_size,3,self.N)),Q_X) 520 | A_Y = torch.bmm(self.Li[:,self.N:,:self.N].expand((batch_size,3,self.N)),Q_Y) 521 | # reshape 522 | # A_X,A,Y: size [B,H,W,1,3] 523 | A_X = A_X.unsqueeze(3).unsqueeze(4).transpose(1,4).repeat(1,points_h,points_w,1,1) 524 | A_Y = A_Y.unsqueeze(3).unsqueeze(4).transpose(1,4).repeat(1,points_h,points_w,1,1) 525 | 526 | # compute distance P_i - (grid_X,grid_Y) 527 | # grid is expanded in point dim 4, but not in batch dim 0, as points P_X,P_Y are fixed for all batch 528 | points_X_for_summation = points[:,:,:,0].unsqueeze(3).unsqueeze(4).expand(points[:,:,:,0].size()+(1,self.N)) 529 | points_Y_for_summation = points[:,:,:,1].unsqueeze(3).unsqueeze(4).expand(points[:,:,:,1].size()+(1,self.N)) 530 | 531 | if points_b==1: 532 | delta_X = points_X_for_summation-P_X 533 | delta_Y = points_Y_for_summation-P_Y 534 | else: 535 | # use expanded P_X,P_Y in batch dimension 536 | delta_X = points_X_for_summation-P_X.expand_as(points_X_for_summation) 537 | delta_Y = points_Y_for_summation-P_Y.expand_as(points_Y_for_summation) 538 | 539 | dist_squared = torch.pow(delta_X,2)+torch.pow(delta_Y,2) 540 | # U: size [1,H,W,1,N] 541 | dist_squared[dist_squared==0]=1 # avoid NaN in log computation 542 | U = torch.mul(dist_squared,torch.log(dist_squared)) 543 | 544 | # expand grid in batch dimension if necessary 545 | points_X_batch = points[:,:,:,0].unsqueeze(3) 546 | points_Y_batch = points[:,:,:,1].unsqueeze(3) 547 | if points_b==1: 548 | points_X_batch = points_X_batch.expand((batch_size,)+points_X_batch.size()[1:]) 549 | points_Y_batch = points_Y_batch.expand((batch_size,)+points_Y_batch.size()[1:]) 550 | 551 | points_X_prime = A_X[:,:,:,:,0]+ \ 552 | torch.mul(A_X[:,:,:,:,1],points_X_batch) + \ 553 | torch.mul(A_X[:,:,:,:,2],points_Y_batch) + \ 554 | torch.sum(torch.mul(W_X,U.expand_as(W_X)),4) 555 | 556 | points_Y_prime = A_Y[:,:,:,:,0]+ \ 557 | torch.mul(A_Y[:,:,:,:,1],points_X_batch) + \ 558 | torch.mul(A_Y[:,:,:,:,2],points_Y_batch) + \ 559 | torch.sum(torch.mul(W_Y,U.expand_as(W_Y)),4) 560 | 561 | return torch.cat((points_X_prime,points_Y_prime),3) 562 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuaiyiHuang/DCCNet/e4f1932896153f535ba42c92f74980e3be27cefe/lib/__init__.py -------------------------------------------------------------------------------- /lib/conv4d.py: -------------------------------------------------------------------------------- 1 | # conv4d.py file is from NC-Net Rocco et al. NIPS2018 open-source code 2 | 3 | import math 4 | import torch 5 | from torch.nn.parameter import Parameter 6 | import torch.nn.functional as F 7 | from torch.nn import Module 8 | from torch.nn.modules.conv import _ConvNd 9 | from torch.nn.modules.utils import _quadruple 10 | from torch.autograd import Variable 11 | from torch.nn import Conv2d 12 | 13 | def conv4d(data,filters,bias=None,permute_filters=True,use_half=False): 14 | b,c,h,w,d,t=data.size() 15 | 16 | data=data.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop 17 | 18 | # Same permutation is done with filters, unless already provided with permutation 19 | if permute_filters: 20 | filters=filters.permute(2,0,1,3,4,5).contiguous() # permute to avoid making contiguous inside loop 21 | 22 | c_out=filters.size(1) 23 | if use_half: 24 | output = Variable(torch.HalfTensor(h,b,c_out,w,d,t),requires_grad=data.requires_grad) 25 | else: 26 | output = Variable(torch.zeros(h,b,c_out,w,d,t),requires_grad=data.requires_grad) 27 | 28 | padding=filters.size(0)//2 29 | if use_half: 30 | Z=Variable(torch.zeros(padding,b,c,w,d,t).half()) 31 | else: 32 | Z=Variable(torch.zeros(padding,b,c,w,d,t)) 33 | 34 | if data.is_cuda: 35 | Z=Z.cuda(data.get_device()) 36 | output=output.cuda(data.get_device()) 37 | 38 | data_padded = torch.cat((Z,data,Z),0) 39 | 40 | 41 | for i in range(output.size(0)): # loop on first feature dimension 42 | # convolve with center channel of filter (at position=padding) 43 | output[i,:,:,:,:,:]=F.conv3d(data_padded[i+padding,:,:,:,:,:], 44 | filters[padding,:,:,:,:,:], bias=bias, stride=1, padding=padding) 45 | # convolve with upper/lower channels of filter (at postions [:padding] [padding+1:]) 46 | for p in range(1,padding+1): 47 | output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding-p,:,:,:,:,:], 48 | filters[padding-p,:,:,:,:,:], bias=None, stride=1, padding=padding) 49 | output[i,:,:,:,:,:]=output[i,:,:,:,:,:]+F.conv3d(data_padded[i+padding+p,:,:,:,:,:], 50 | filters[padding+p,:,:,:,:,:], bias=None, stride=1, padding=padding) 51 | 52 | output=output.permute(1,2,0,3,4,5).contiguous() 53 | return output 54 | 55 | class Conv4d(_ConvNd): 56 | """Applies a 4D convolution over an input signal composed of several input 57 | planes. 58 | """ 59 | 60 | def __init__(self, in_channels, out_channels, kernel_size, bias=True, pre_permuted_filters=True): 61 | # stride, dilation and groups !=1 functionality not tested 62 | stride=1 63 | dilation=1 64 | groups=1 65 | # zero padding is added automatically in conv4d function to preserve tensor size 66 | padding = 0 67 | kernel_size = _quadruple(kernel_size) 68 | stride = _quadruple(stride) 69 | padding = _quadruple(padding) 70 | dilation = _quadruple(dilation) 71 | super(Conv4d, self).__init__( 72 | in_channels, out_channels, kernel_size, stride, padding, dilation, 73 | False, _quadruple(0), groups, bias) 74 | # weights will be sliced along one dimension during convolution loop 75 | # make the looping dimension to be the first one in the tensor, 76 | # so that we don't need to call contiguous() inside the loop 77 | self.pre_permuted_filters=pre_permuted_filters 78 | if self.pre_permuted_filters: 79 | self.weight.data=self.weight.data.permute(2,0,1,3,4,5).contiguous() 80 | self.use_half=False 81 | 82 | 83 | def forward(self, input): 84 | return conv4d(input, self.weight, bias=self.bias,permute_filters=not self.pre_permuted_filters,use_half=self.use_half) # filters pre-permuted in constructor 85 | -------------------------------------------------------------------------------- /lib/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.multiprocessing as multiprocessing 3 | from torch.utils.data.sampler import SequentialSampler, RandomSampler, BatchSampler 4 | import collections 5 | import sys 6 | import traceback 7 | import threading 8 | import numpy as np 9 | import numpy.random 10 | 11 | #from torch._six import string_classes 12 | PY2 = sys.version_info[0] == 2 13 | PY3 = sys.version_info[0] == 3 14 | 15 | if PY2: 16 | string_classes = basestring 17 | else: 18 | string_classes = (str, bytes) 19 | 20 | 21 | if sys.version_info[0] == 2: 22 | import Queue as queue 23 | else: 24 | import queue 25 | 26 | 27 | _use_shared_memory = False 28 | """Whether to use shared memory in default_collate""" 29 | 30 | 31 | class ExceptionWrapper(object): 32 | "Wraps an exception plus traceback to communicate across threads" 33 | 34 | def __init__(self, exc_info): 35 | self.exc_type = exc_info[0] 36 | self.exc_msg = "".join(traceback.format_exception(*exc_info)) 37 | 38 | 39 | def _worker_loop(dataset, index_queue, data_queue, collate_fn, rng_seed): 40 | global _use_shared_memory 41 | _use_shared_memory = True 42 | 43 | np.random.seed(rng_seed) 44 | torch.set_num_threads(1) 45 | while True: 46 | r = index_queue.get() 47 | if r is None: 48 | data_queue.put(None) 49 | break 50 | idx, batch_indices = r 51 | try: 52 | samples = collate_fn([dataset[i] for i in batch_indices]) 53 | except Exception: 54 | data_queue.put((idx, ExceptionWrapper(sys.exc_info()))) 55 | else: 56 | data_queue.put((idx, samples)) 57 | 58 | 59 | def _pin_memory_loop(in_queue, out_queue, done_event): 60 | while True: 61 | try: 62 | r = in_queue.get() 63 | except: 64 | if done_event.is_set(): 65 | return 66 | raise 67 | if r is None: 68 | break 69 | if isinstance(r[1], ExceptionWrapper): 70 | out_queue.put(r) 71 | continue 72 | idx, batch = r 73 | try: 74 | batch = pin_memory_batch(batch) 75 | except Exception: 76 | out_queue.put((idx, ExceptionWrapper(sys.exc_info()))) 77 | else: 78 | out_queue.put((idx, batch)) 79 | 80 | 81 | numpy_type_map = { 82 | 'float64': torch.DoubleTensor, 83 | 'float32': torch.FloatTensor, 84 | 'float16': torch.HalfTensor, 85 | 'int64': torch.LongTensor, 86 | 'int32': torch.IntTensor, 87 | 'int16': torch.ShortTensor, 88 | 'int8': torch.CharTensor, 89 | 'uint8': torch.ByteTensor, 90 | } 91 | 92 | 93 | def default_collate(batch): 94 | "Puts each data field into a tensor with outer dimension batch size" 95 | if torch.is_tensor(batch[0]): 96 | out = None 97 | if _use_shared_memory: 98 | # If we're in a background process, concatenate directly into a 99 | # shared memory tensor to avoid an extra copy 100 | numel = sum([x.numel() for x in batch]) 101 | storage = batch[0].storage()._new_shared(numel) 102 | out = batch[0].new(storage) 103 | return torch.stack(batch, 0, out=out) 104 | elif type(batch[0]).__module__ == 'numpy': 105 | elem = batch[0] 106 | if type(elem).__name__ == 'ndarray': 107 | return torch.stack([torch.from_numpy(b) for b in batch], 0) 108 | if elem.shape == (): # scalars 109 | py_type = float if elem.dtype.name.startswith('float') else int 110 | return numpy_type_map[elem.dtype.name](list(map(py_type, batch))) 111 | elif isinstance(batch[0], int): 112 | return torch.LongTensor(batch) 113 | elif isinstance(batch[0], float): 114 | return torch.DoubleTensor(batch) 115 | elif isinstance(batch[0], string_classes): 116 | return batch 117 | elif isinstance(batch[0], collections.Mapping): 118 | return {key: default_collate([d[key] for d in batch]) for key in batch[0]} 119 | elif isinstance(batch[0], collections.Sequence): 120 | transposed = zip(*batch) 121 | return [default_collate(samples) for samples in transposed] 122 | 123 | raise TypeError(("batch must contain tensors, numbers, dicts or lists; found {}" 124 | .format(type(batch[0])))) 125 | 126 | 127 | def pin_memory_batch(batch): 128 | if torch.is_tensor(batch): 129 | return batch.pin_memory() 130 | elif isinstance(batch, string_classes): 131 | return batch 132 | elif isinstance(batch, collections.Mapping): 133 | return {k: pin_memory_batch(sample) for k, sample in batch.items()} 134 | elif isinstance(batch, collections.Sequence): 135 | return [pin_memory_batch(sample) for sample in batch] 136 | else: 137 | return batch 138 | 139 | 140 | class DataLoaderIter(object): 141 | "Iterates once over the DataLoader's dataset, as specified by the sampler" 142 | 143 | def __init__(self, loader): 144 | self.dataset = loader.dataset 145 | self.collate_fn = loader.collate_fn 146 | self.batch_sampler = loader.batch_sampler 147 | self.num_workers = loader.num_workers 148 | self.pin_memory = loader.pin_memory 149 | self.done_event = threading.Event() 150 | 151 | self.sample_iter = iter(self.batch_sampler) 152 | 153 | if self.num_workers > 0: 154 | self.index_queue = multiprocessing.SimpleQueue() 155 | self.data_queue = multiprocessing.SimpleQueue() 156 | self.batches_outstanding = 0 157 | self.shutdown = False 158 | self.send_idx = 0 159 | self.rcvd_idx = 0 160 | self.reorder_dict = {} 161 | 162 | self.workers = [ 163 | multiprocessing.Process( 164 | target=_worker_loop, 165 | args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn, np.random.randint(0, 4294967296, dtype='uint32'))) 166 | for _ in range(self.num_workers)] 167 | 168 | for w in self.workers: 169 | w.daemon = True # ensure that the worker exits on process exit 170 | w.start() 171 | 172 | if self.pin_memory: 173 | in_data = self.data_queue 174 | self.data_queue = queue.Queue() 175 | self.pin_thread = threading.Thread( 176 | target=_pin_memory_loop, 177 | args=(in_data, self.data_queue, self.done_event)) 178 | self.pin_thread.daemon = True 179 | self.pin_thread.start() 180 | 181 | # prime the prefetch loop 182 | for _ in range(2 * self.num_workers): 183 | self._put_indices() 184 | 185 | def __len__(self): 186 | return len(self.batch_sampler) 187 | 188 | def __next__(self): 189 | if self.num_workers == 0: # same-process loading 190 | indices = next(self.sample_iter) # may raise StopIteration 191 | batch = self.collate_fn([self.dataset[i] for i in indices]) 192 | if self.pin_memory: 193 | batch = pin_memory_batch(batch) 194 | return batch 195 | 196 | # check if the next sample has already been generated 197 | if self.rcvd_idx in self.reorder_dict: 198 | batch = self.reorder_dict.pop(self.rcvd_idx) 199 | return self._process_next_batch(batch) 200 | 201 | if self.batches_outstanding == 0: 202 | self._shutdown_workers() 203 | raise StopIteration 204 | 205 | while True: 206 | assert (not self.shutdown and self.batches_outstanding > 0) 207 | idx, batch = self.data_queue.get() 208 | self.batches_outstanding -= 1 209 | if idx != self.rcvd_idx: 210 | # store out-of-order samples 211 | self.reorder_dict[idx] = batch 212 | continue 213 | return self._process_next_batch(batch) 214 | 215 | next = __next__ # Python 2 compatibility 216 | 217 | def __iter__(self): 218 | return self 219 | 220 | def _put_indices(self): 221 | assert self.batches_outstanding < 2 * self.num_workers 222 | indices = next(self.sample_iter, None) 223 | if indices is None: 224 | return 225 | self.index_queue.put((self.send_idx, indices)) 226 | self.batches_outstanding += 1 227 | self.send_idx += 1 228 | 229 | def _process_next_batch(self, batch): 230 | self.rcvd_idx += 1 231 | self._put_indices() 232 | if isinstance(batch, ExceptionWrapper): 233 | raise batch.exc_type(batch.exc_msg) 234 | return batch 235 | 236 | def __getstate__(self): 237 | # TODO: add limited pickling support for sharing an iterator 238 | # across multiple threads for HOGWILD. 239 | # Probably the best way to do this is by moving the sample pushing 240 | # to a separate thread and then just sharing the data queue 241 | # but signalling the end is tricky without a non-blocking API 242 | raise NotImplementedError("DataLoaderIterator cannot be pickled") 243 | 244 | def _shutdown_workers(self): 245 | if not self.shutdown: 246 | self.shutdown = True 247 | self.done_event.set() 248 | for _ in self.workers: 249 | self.index_queue.put(None) 250 | 251 | def __del__(self): 252 | if self.num_workers > 0: 253 | self._shutdown_workers() 254 | 255 | 256 | class DataLoader(object): 257 | """ 258 | Data loader. Combines a dataset and a sampler, and provides 259 | single- or multi-process iterators over the dataset. 260 | 261 | Arguments: 262 | dataset (Dataset): dataset from which to load the data. 263 | batch_size (int, optional): how many samples per batch to load 264 | (default: 1). 265 | shuffle (bool, optional): set to ``True`` to have the data reshuffled 266 | at every epoch (default: False). 267 | sampler (Sampler, optional): defines the strategy to draw samples from 268 | the dataset. If specified, ``shuffle`` must be False. 269 | batch_sampler (Sampler, optional): like sampler, but returns a batch of 270 | indices at a time. Mutually exclusive with batch_size, shuffle, 271 | sampler, and drop_last. 272 | num_workers (int, optional): how many subprocesses to use for data 273 | loading. 0 means that the data will be loaded in the main process 274 | (default: 0) 275 | collate_fn (callable, optional): merges a list of samples to form a mini-batch. 276 | pin_memory (bool, optional): If ``True``, the data loader will copy tensors 277 | into CUDA pinned memory before returning them. 278 | drop_last (bool, optional): set to ``True`` to drop the last incomplete batch, 279 | if the dataset size is not divisible by the batch size. If False and 280 | the size of dataset is not divisible by the batch size, then the last batch 281 | will be smaller. (default: False) 282 | """ 283 | 284 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, 285 | num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False): 286 | self.dataset = dataset 287 | self.batch_size = batch_size 288 | self.num_workers = num_workers 289 | self.collate_fn = collate_fn 290 | self.pin_memory = pin_memory 291 | self.drop_last = drop_last 292 | 293 | if batch_sampler is not None: 294 | if batch_size > 1 or shuffle or sampler is not None or drop_last: 295 | raise ValueError('batch_sampler is mutually exclusive with ' 296 | 'batch_size, shuffle, sampler, and drop_last') 297 | 298 | if sampler is not None and shuffle: 299 | raise ValueError('sampler is mutually exclusive with shuffle') 300 | 301 | if batch_sampler is None: 302 | if sampler is None: 303 | if shuffle: 304 | sampler = RandomSampler(dataset) 305 | else: 306 | sampler = SequentialSampler(dataset) 307 | batch_sampler = BatchSampler(sampler, batch_size, drop_last) 308 | 309 | self.sampler = sampler 310 | self.batch_sampler = batch_sampler 311 | 312 | def __iter__(self): 313 | return DataLoaderIter(self) 314 | 315 | def __len__(self): 316 | return len(self.batch_sampler) -------------------------------------------------------------------------------- /lib/eval_util_dynamic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn 3 | import numpy as np 4 | import os 5 | from torch.autograd import Variable 6 | from torch.utils.data import DataLoader 7 | 8 | from lib.dataloader import default_collate 9 | from lib.torch_util import BatchTensorToVars 10 | 11 | from lib.point_tnf_dynamic import corr_to_matches 12 | from lib.normalization import NormalizeImageDict 13 | 14 | # dense flow 15 | from geotnf.flow import th_sampling_grid_to_np_flow,write_flo_file 16 | from lib.py_util import create_file_path 17 | 18 | from lib.pf_dataset import PFPascalDataset 19 | from lib.point_tnf_dynamic import PointsToUnitCoords, PointsToPixelCoords, bilinearInterpPointTnf 20 | 21 | def pck(source_points,warped_points,L_pck,alpha=0.1): 22 | # compute precentage of correct keypoints 23 | batch_size=source_points.size(0) 24 | pck=torch.zeros((batch_size)) 25 | for i in range(batch_size): 26 | p_src = source_points[i,:] 27 | p_wrp = warped_points[i,:] 28 | N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1)) 29 | point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5) 30 | L_pck_mat = L_pck[i].expand_as(point_distance) 31 | correct_points = torch.le(point_distance,L_pck_mat*alpha) 32 | pck[i]=torch.mean(correct_points.float()) 33 | return pck 34 | 35 | 36 | def pck_metric(batch,batch_start_idx,matches,stats,args,use_cuda=True,alpha=0.1): 37 | 38 | source_im_size = batch['source_im_size'] 39 | target_im_size = batch['target_im_size'] 40 | 41 | source_points = batch['source_points'] 42 | target_points = batch['target_points'] 43 | 44 | # warp points with estimated transformations 45 | target_points_norm = PointsToUnitCoords(target_points,target_im_size) 46 | 47 | # compute points stage 1 only 48 | warped_points_norm = bilinearInterpPointTnf(matches,target_points_norm) 49 | warped_points = PointsToPixelCoords(warped_points_norm,source_im_size) 50 | 51 | L_pck = batch['L_pck'].data 52 | 53 | current_batch_size=batch['source_im_size'].size(0) 54 | indices = range(batch_start_idx,batch_start_idx+current_batch_size) 55 | 56 | # compute PCK 57 | pck_batch = pck(source_points.data, warped_points.data, L_pck,alpha=alpha) 58 | stats['point_tnf']['pck'][indices] = pck_batch.unsqueeze(1).cpu().numpy() 59 | 60 | return stats 61 | 62 | #2019/02/22 Friday Shuaiyi 63 | ''' 64 | Given dataset and model, turn it into dataloader with batchsize=1, evaluate and report pck 65 | ''' 66 | 67 | def pfpascal_test_dataloader(image_size,eval_dataset_path,csv_file = 'image_pairs/test_pairs.csv'): 68 | # Dataset and dataloader 69 | Dataset = PFPascalDataset 70 | collate_fn = default_collate 71 | 72 | cnn_image_size = (image_size, image_size) 73 | 74 | dataset = Dataset(csv_file=os.path.join(eval_dataset_path, csv_file), 75 | dataset_path=eval_dataset_path, 76 | transform=NormalizeImageDict(['source_image', 'target_image']), 77 | output_size=cnn_image_size) 78 | dataset.pck_procedure = 'scnet' 79 | 80 | # Only batch_size=1 is supported for evaluation 81 | batch_size = 1 82 | 83 | dataloader = DataLoader(dataset, batch_size=batch_size, 84 | shuffle=False, num_workers=0, 85 | collate_fn=collate_fn) 86 | 87 | return dataloader 88 | 89 | def pfdataset_pck(dataloader, model, verbose = False,alpha=0.1): 90 | model.eval() 91 | use_cuda = torch.cuda.is_available() 92 | collate_fn = default_collate 93 | batch_size = 1 94 | 95 | batch_tnf = BatchTensorToVars(use_cuda=use_cuda) 96 | 97 | model.eval() 98 | 99 | # initialize vector for storing results 100 | stats = {} 101 | stats['point_tnf'] = {} 102 | stats['point_tnf']['pck'] = np.zeros((len(dataloader.dataset), 1)) 103 | 104 | # Compute 105 | for i, batch in enumerate(dataloader): 106 | 107 | batch = batch_tnf(batch) 108 | batch_start_idx = batch_size * i 109 | 110 | # corr4d = model(batch) 111 | out = model(batch) 112 | 113 | # get matches 114 | xA, yA, xB, yB, sB = corr_to_matches(out, do_softmax=True) 115 | 116 | matches = (xA, yA, xB, yB) 117 | stats = pck_metric(batch, batch_start_idx, matches, stats, None, use_cuda,alpha=alpha) 118 | if verbose: 119 | print('Batch: [{}/{} ({:.0f}%)]'.format(i, len(dataloader), 100. * i / len(dataloader))) 120 | 121 | # Print results 122 | results = stats['point_tnf']['pck'] 123 | good_idx = np.flatnonzero((results != -1) * ~np.isnan(results)) 124 | if verbose: 125 | print('Total: ' + str(results.size)) 126 | print('Valid: ' + str(good_idx.size)) 127 | filtered_results = results[good_idx] 128 | 129 | if verbose: 130 | print('PCK:', '{:.2%}'.format(np.mean(filtered_results))) 131 | 132 | pck_value = np.mean(filtered_results) 133 | 134 | return pck_value 135 | 136 | ''' 137 | Given val dataset and model, turn it into dataloader with batchsize=1, evaluate and report pck 138 | ''' 139 | def pfpascal_val_dataloader(image_size,eval_dataset_path,csv_file = 'image_pairs/val_pairs.csv'): 140 | # Dataset and dataloader 141 | Dataset = PFPascalDataset 142 | collate_fn = default_collate 143 | 144 | cnn_image_size = (image_size, image_size) 145 | 146 | dataset = Dataset(csv_file=os.path.join(eval_dataset_path, csv_file), 147 | dataset_path=eval_dataset_path, 148 | transform=NormalizeImageDict(['source_image', 'target_image']), 149 | output_size=cnn_image_size) 150 | dataset.pck_procedure = 'scnet' 151 | 152 | # Only batch_size=1 is supported for evaluation todo 153 | batch_size = 1 154 | 155 | dataloader = DataLoader(dataset, batch_size=batch_size, 156 | shuffle=False, num_workers=0, 157 | collate_fn=collate_fn) 158 | 159 | return dataloader 160 | 161 | # for dense flow evaluation 162 | def flow_metrics(batch, batch_start_idx, matches, stats, args, use_cuda=True): 163 | result_path = args.flow_output_dir 164 | 165 | # pt = PointTnf(use_cuda=use_cuda) 166 | 167 | batch_size = batch['source_im_size'].size(0) 168 | for b in range(batch_size): 169 | h_src = int(batch['source_im_size'][b, 0].data.cpu().numpy()) 170 | w_src = int(batch['source_im_size'][b, 1].data.cpu().numpy()) 171 | h_tgt = int(batch['target_im_size'][b, 0].data.cpu().numpy()) 172 | w_tgt = int(batch['target_im_size'][b, 1].data.cpu().numpy()) 173 | 174 | grid_X, grid_Y = np.meshgrid(np.linspace(-1, 1, w_tgt), np.linspace(-1, 1, h_tgt)) 175 | grid_X = torch.FloatTensor(grid_X).unsqueeze(0).unsqueeze(3) 176 | grid_Y = torch.FloatTensor(grid_Y).unsqueeze(0).unsqueeze(3) 177 | grid_X = Variable(grid_X, requires_grad=False) 178 | grid_Y = Variable(grid_Y, requires_grad=False) 179 | if use_cuda: 180 | grid_X = grid_X.cuda() 181 | grid_Y = grid_Y.cuda() 182 | 183 | grid_X_vec = grid_X.view(1, 1, -1) 184 | grid_Y_vec = grid_Y.view(1, 1, -1) 185 | 186 | grid_XY_vec = torch.cat((grid_X_vec, grid_Y_vec), 1) 187 | 188 | def pointsToGrid(x, h_tgt=h_tgt, w_tgt=w_tgt): 189 | return x.contiguous().view(1, 2, h_tgt, w_tgt).transpose(1, 2).transpose(2, 3) 190 | 191 | idx = batch_start_idx + b 192 | source_im_size = batch['source_im_size'] 193 | warped_points_norm = bilinearInterpPointTnf(matches, grid_XY_vec) 194 | 195 | # warped_points = PointsToPixelCoords(warped_points_norm,source_im_size) 196 | warped_points = pointsToGrid(warped_points_norm) 197 | 198 | # grid_aff = pointsToGrid(pt.affPointTnf(theta_aff[b, :].unsqueeze(0), grid_XY_vec)) 199 | flow_aff = th_sampling_grid_to_np_flow(source_grid=warped_points, h_src=h_src, w_src=w_src) 200 | flow_aff_path = os.path.join(result_path, batch['flow_path'][b]) 201 | 202 | create_file_path(flow_aff_path) 203 | 204 | write_flo_file(flow_aff, flow_aff_path) 205 | 206 | idx = batch_start_idx + b 207 | return stats -------------------------------------------------------------------------------- /lib/im_pair_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import torch 4 | from torch.autograd import Variable 5 | from torch.utils.data import Dataset 6 | from skimage import io 7 | import pandas as pd 8 | import numpy as np 9 | from lib.transformation import AffineTnf 10 | 11 | class ImagePairDataset(Dataset): 12 | 13 | """ 14 | 15 | Image pair dataset used for weak supervision 16 | 17 | 18 | Args: 19 | csv_file (string): Path to the csv file with image names and transformations. 20 | training_image_path (string): Directory with the images. 21 | output_size (2-tuple): Desired output size 22 | transform (callable): Transformation for post-processing the training pair (eg. image normalization) 23 | 24 | """ 25 | 26 | def __init__(self, dataset_csv_path, dataset_csv_file, dataset_image_path, dataset_size=0,output_size=(240,240),transform=None,random_crop=False): 27 | self.random_crop=random_crop 28 | self.out_h, self.out_w = output_size 29 | self.train_data = pd.read_csv(os.path.join(dataset_csv_path,dataset_csv_file)) 30 | if dataset_size is not None and dataset_size!=0: 31 | dataset_size = min((dataset_size,len(self.train_data))) 32 | self.train_data = self.train_data.iloc[0:dataset_size,:] 33 | self.img_A_names = self.train_data.iloc[:,0] 34 | self.img_B_names = self.train_data.iloc[:,1] 35 | # self.set = self.train_data.iloc[:,2].as_matrix() 36 | # self.flip = self.train_data.iloc[:, 3].as_matrix().astype('int') 37 | self.set = self.train_data.iloc[:,2].values 38 | self.flip = self.train_data.iloc[:, 3].values.astype('int') 39 | self.dataset_image_path = dataset_image_path 40 | self.transform = transform 41 | # no cuda as dataset is called from CPU threads in dataloader and produces confilct 42 | self.affineTnf = AffineTnf(out_h=self.out_h, out_w=self.out_w, use_cuda = False) 43 | 44 | def __len__(self): 45 | return len(self.img_A_names) 46 | 47 | def __getitem__(self, idx): 48 | # get pre-processed images 49 | image_A,im_size_A = self.get_image(self.img_A_names,idx,self.flip[idx]) 50 | image_B,im_size_B = self.get_image(self.img_B_names,idx,self.flip[idx]) 51 | 52 | image_set = self.set[idx] 53 | 54 | #sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 'target_im_size': im_size_B, 'set':image_set} 55 | 56 | #todo for dataparalle, not load image_set 57 | sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 58 | 'target_im_size': im_size_B,} 59 | 60 | if self.transform: 61 | sample = self.transform(sample) 62 | 63 | return sample 64 | 65 | def get_image(self,img_name_list,idx,flip): 66 | img_name = os.path.join(self.dataset_image_path, img_name_list.iloc[idx]) 67 | image = io.imread(img_name) 68 | 69 | # if grayscale convert to 3-channel image 70 | if image.ndim==2: 71 | image=np.repeat(np.expand_dims(image,2),axis=2,repeats=3) 72 | 73 | # do random crop 74 | if self.random_crop: 75 | h,w,c=image.shape 76 | top=np.random.randint(h/4) 77 | bottom=int(3*h/4+np.random.randint(h/4)) 78 | left=np.random.randint(w/4) 79 | right=int(3*w/4+np.random.randint(w/4)) 80 | image = image[top:bottom,left:right,:] 81 | 82 | # flip horizontally if needed 83 | if flip: 84 | image=np.flip(image,1) 85 | 86 | # get image size 87 | im_size = np.asarray(image.shape) 88 | 89 | # convert to torch Variable 90 | image = np.expand_dims(image.transpose((2,0,1)),0) 91 | image = torch.Tensor(image.astype(np.float32)) 92 | image_var = Variable(image,requires_grad=False) 93 | 94 | # Resize image using bilinear sampling with identity affine tnf 95 | image = self.affineTnf(image_var).data.squeeze(0) 96 | 97 | im_size = torch.Tensor(im_size.astype(np.float32)) 98 | 99 | return (image, im_size) 100 | 101 | -------------------------------------------------------------------------------- /lib/modules.py: -------------------------------------------------------------------------------- 1 | # modules from NC-Net Rocco et al. NIPS2018 open-source code 2 | 3 | from __future__ import print_function, division 4 | from collections import OrderedDict 5 | import torch 6 | import torch.nn as nn 7 | from torch.autograd import Variable 8 | import torchvision.models as models 9 | import numpy as np 10 | import numpy.matlib 11 | import pickle 12 | 13 | from lib.torch_util import Softmax1D 14 | from lib.conv4d import Conv4d 15 | 16 | import copy 17 | 18 | def featureL2Norm(feature): 19 | epsilon = 1e-6 20 | norm = torch.pow(torch.sum(torch.pow(feature,2),1)+epsilon,0.5).unsqueeze(1).expand_as(feature) 21 | return torch.div(feature,norm) 22 | 23 | class FeatureExtraction(torch.nn.Module): 24 | def __init__(self, train_fe=False, feature_extraction_cnn='resnet101', feature_extraction_model_file='', normalization=True, last_layer='', use_cuda=True): 25 | super(FeatureExtraction, self).__init__() 26 | self.normalization = normalization 27 | self.feature_extraction_cnn=feature_extraction_cnn 28 | if feature_extraction_cnn == 'vgg': 29 | self.model = models.vgg16(pretrained=True) 30 | # keep feature extraction network up to indicated layer 31 | vgg_feature_layers=['conv1_1','relu1_1','conv1_2','relu1_2','pool1','conv2_1', 32 | 'relu2_1','conv2_2','relu2_2','pool2','conv3_1','relu3_1', 33 | 'conv3_2','relu3_2','conv3_3','relu3_3','pool3','conv4_1', 34 | 'relu4_1','conv4_2','relu4_2','conv4_3','relu4_3','pool4', 35 | 'conv5_1','relu5_1','conv5_2','relu5_2','conv5_3','relu5_3','pool5'] 36 | if last_layer=='': 37 | last_layer = 'pool4' 38 | last_layer_idx = vgg_feature_layers.index(last_layer) 39 | self.model = nn.Sequential(*list(self.model.features.children())[:last_layer_idx+1]) 40 | # for resnet below 41 | resnet_feature_layers = ['conv1','bn1','relu','maxpool','layer1','layer2','layer3','layer4'] 42 | if feature_extraction_cnn=='resnet101': 43 | self.model = models.resnet101(pretrained=True) 44 | if last_layer=='': 45 | last_layer = 'layer3' 46 | resnet_module_list = [getattr(self.model,l) for l in resnet_feature_layers] 47 | last_layer_idx = resnet_feature_layers.index(last_layer) 48 | self.model = nn.Sequential(*resnet_module_list[:last_layer_idx+1]) 49 | 50 | if feature_extraction_cnn=='resnet101fpn': 51 | if feature_extraction_model_file!='': 52 | resnet = models.resnet101(pretrained=True) 53 | # swap stride (2,2) and (1,1) in first layers (PyTorch ResNet is slightly different to caffe2 ResNet) 54 | # this is required for compatibility with caffe2 models 55 | resnet.layer2[0].conv1.stride=(2,2) 56 | resnet.layer2[0].conv2.stride=(1,1) 57 | resnet.layer3[0].conv1.stride=(2,2) 58 | resnet.layer3[0].conv2.stride=(1,1) 59 | resnet.layer4[0].conv1.stride=(2,2) 60 | resnet.layer4[0].conv2.stride=(1,1) 61 | else: 62 | resnet = models.resnet101(pretrained=True) 63 | resnet_module_list = [getattr(resnet,l) for l in resnet_feature_layers] 64 | conv_body = nn.Sequential(*resnet_module_list) 65 | self.model = fpn_body(conv_body, 66 | resnet_feature_layers, 67 | fpn_layers=['layer1','layer2','layer3'], 68 | normalize=normalization, 69 | hypercols=True) 70 | if feature_extraction_model_file!='': 71 | self.model.load_pretrained_weights(feature_extraction_model_file) 72 | 73 | if feature_extraction_cnn == 'densenet201': 74 | self.model = models.densenet201(pretrained=True) 75 | # keep feature extraction network up to denseblock3 76 | # self.model = nn.Sequential(*list(self.model.features.children())[:-3]) 77 | # keep feature extraction network up to transitionlayer2 78 | self.model = nn.Sequential(*list(self.model.features.children())[:-4]) 79 | if train_fe==False: 80 | # freeze parameters 81 | for param in self.model.parameters(): 82 | param.requires_grad = False 83 | # move to GPU 84 | if use_cuda: 85 | self.model = self.model.cuda() 86 | 87 | def forward(self, image_batch): 88 | features = self.model(image_batch) 89 | if self.normalization and not self.feature_extraction_cnn=='resnet101fpn': 90 | features = featureL2Norm(features) 91 | return features 92 | 93 | class FeatureCorrelation(torch.nn.Module): 94 | def __init__(self,shape='3D',normalization=True): 95 | super(FeatureCorrelation, self).__init__() 96 | self.normalization = normalization 97 | self.shape=shape 98 | self.ReLU = nn.ReLU() 99 | 100 | def forward(self, feature_A, feature_B): 101 | if self.shape=='3D': 102 | b,c,h,w = feature_A.size() 103 | # reshape features for matrix multiplication 104 | feature_A = feature_A.transpose(2,3).contiguous().view(b,c,h*w) 105 | feature_B = feature_B.view(b,c,h*w).transpose(1,2) 106 | # perform matrix mult. 107 | feature_mul = torch.bmm(feature_B,feature_A) 108 | # indexed [batch,idx_A=row_A+h*col_A,row_B,col_B] 109 | correlation_tensor = feature_mul.view(b,h,w,h*w).transpose(2,3).transpose(1,2) 110 | elif self.shape=='4D': 111 | b,c,hA,wA = feature_A.size() 112 | b,c,hB,wB = feature_B.size() 113 | # reshape features for matrix multiplication 114 | feature_A = feature_A.view(b,c,hA*wA).transpose(1,2) # size [b,c,h*w] 115 | feature_B = feature_B.view(b,c,hB*wB) # size [b,c,h*w] 116 | # perform matrix mult. 117 | feature_mul = torch.bmm(feature_A,feature_B) 118 | # indexed [batch,row_A,col_A,row_B,col_B] 119 | correlation_tensor = feature_mul.view(b,hA,wA,hB,wB).unsqueeze(1) 120 | 121 | if self.normalization: 122 | correlation_tensor = featureL2Norm(self.ReLU(correlation_tensor)) 123 | 124 | return correlation_tensor 125 | 126 | class NeighConsensus(torch.nn.Module): 127 | def __init__(self, use_cuda=True, kernel_sizes=[3,3,3], channels=[10,10,1], symmetric_mode=True): 128 | super(NeighConsensus, self).__init__() 129 | self.symmetric_mode = symmetric_mode 130 | self.kernel_sizes = kernel_sizes 131 | self.channels = channels 132 | num_layers = len(kernel_sizes) 133 | nn_modules = list() 134 | for i in range(num_layers): 135 | if i==0: 136 | ch_in = 1 137 | else: 138 | ch_in = channels[i-1] 139 | ch_out = channels[i] 140 | k_size = kernel_sizes[i] 141 | nn_modules.append(Conv4d(in_channels=ch_in,out_channels=ch_out,kernel_size=k_size,bias=True)) 142 | nn_modules.append(nn.ReLU(inplace=True)) 143 | self.conv = nn.Sequential(*nn_modules) 144 | if use_cuda: 145 | self.conv.cuda() 146 | 147 | def forward(self, x): 148 | if self.symmetric_mode: 149 | # apply network on the input and its "transpose" (swapping A-B to B-A ordering of the correlation tensor), 150 | # this second result is "transposed back" to the A-B ordering to match the first result and be able to add together 151 | x = self.conv(x)+self.conv(x.permute(0,1,4,5,2,3)).permute(0,1,4,5,2,3) 152 | # because of the ReLU layers in between linear layers, 153 | # this operation is different than convolving a single time with the filters+filters^T 154 | # and therefore it makes sense to do this. 155 | else: 156 | x = self.conv(x) 157 | return x 158 | 159 | def MutualMatching(corr4d): 160 | # mutual matching 161 | batch_size,ch,fs1,fs2,fs3,fs4 = corr4d.size() 162 | 163 | corr4d_B=corr4d.view(batch_size,fs1*fs2,fs3,fs4) # [batch_idx,k_A,i_B,j_B] 164 | corr4d_A=corr4d.view(batch_size,fs1,fs2,fs3*fs4) 165 | 166 | # get max 167 | corr4d_B_max,_=torch.max(corr4d_B,dim=1,keepdim=True) 168 | corr4d_A_max,_=torch.max(corr4d_A,dim=3,keepdim=True) 169 | 170 | eps = 1e-5 171 | corr4d_B=corr4d_B/(corr4d_B_max+eps) 172 | corr4d_A=corr4d_A/(corr4d_A_max+eps) 173 | 174 | corr4d_B=corr4d_B.view(batch_size,1,fs1,fs2,fs3,fs4) 175 | corr4d_A=corr4d_A.view(batch_size,1,fs1,fs2,fs3,fs4) 176 | 177 | corr4d=corr4d*(corr4d_A*corr4d_B) # parenthesis are important for symmetric output 178 | 179 | return corr4d 180 | 181 | def maxpool4d(corr4d_hres,k_size=4): 182 | slices=[] 183 | for i in range(k_size): 184 | for j in range(k_size): 185 | for k in range(k_size): 186 | for l in range(k_size): 187 | slices.append(corr4d_hres[:,0,i::k_size,j::k_size,k::k_size,l::k_size].unsqueeze(0)) 188 | slices=torch.cat(tuple(slices),dim=1) 189 | corr4d,max_idx=torch.max(slices,dim=1,keepdim=True) 190 | max_l=torch.fmod(max_idx,k_size) 191 | max_k=torch.fmod(max_idx.sub(max_l).div(k_size),k_size) 192 | max_j=torch.fmod(max_idx.sub(max_l).div(k_size).sub(max_k).div(k_size),k_size) 193 | max_i=max_idx.sub(max_l).div(k_size).sub(max_k).div(k_size).sub(max_j).div(k_size) 194 | # i,j,k,l represent the *relative* coords of the max point in the box of size k_size*k_size*k_size*k_size 195 | return (corr4d,max_i,max_j,max_k,max_l) 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /lib/normalization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import transforms 3 | from torch.autograd import Variable 4 | 5 | class NormalizeImageDict(object): 6 | """ 7 | 8 | Normalizes Tensor images in dictionary 9 | 10 | Args: 11 | image_keys (list): dict. keys of the images to be normalized 12 | normalizeRange (bool): if True the image is divided by 255.0s 13 | 14 | """ 15 | 16 | def __init__(self,image_keys,normalizeRange=True): 17 | self.image_keys = image_keys 18 | self.normalizeRange=normalizeRange 19 | self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 20 | std=[0.229, 0.224, 0.225]) 21 | 22 | def __call__(self, sample): 23 | for key in self.image_keys: 24 | if self.normalizeRange: 25 | sample[key] /= 255.0 26 | sample[key] = self.normalize(sample[key]) 27 | return sample 28 | 29 | def normalize_image(image, forward=True, mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]): 30 | im_size = image.size() 31 | mean=torch.FloatTensor(mean).unsqueeze(1).unsqueeze(2) 32 | std=torch.FloatTensor(std).unsqueeze(1).unsqueeze(2) 33 | if image.is_cuda: 34 | mean = mean.cuda() 35 | std = std.cuda() 36 | if isinstance(image,torch.autograd.variable.Variable): 37 | mean = Variable(mean,requires_grad=False) 38 | std = Variable(std,requires_grad=False) 39 | if forward: 40 | if len(im_size)==3: 41 | result = image.sub(mean.expand(im_size)).div(std.expand(im_size)) 42 | elif len(im_size)==4: 43 | result = image.sub(mean.unsqueeze(0).expand(im_size)).div(std.unsqueeze(0).expand(im_size)) 44 | else: 45 | if len(im_size)==3: 46 | result = image.mul(std.expand(im_size)).add(mean.expand(im_size)) 47 | elif len(im_size)==4: 48 | result = image.mul(std.unsqueeze(0).expand(im_size)).add(mean.unsqueeze(0).expand(im_size)) 49 | 50 | return result -------------------------------------------------------------------------------- /lib/pf_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import torch 4 | from torch.autograd import Variable 5 | from skimage import io 6 | import pandas as pd 7 | import numpy as np 8 | from torch.utils.data import Dataset 9 | from lib.transformation import AffineTnf 10 | 11 | class PFPascalDataset(Dataset): 12 | 13 | """ 14 | 15 | Proposal Flow PASCAL image pair dataset 16 | 17 | 18 | Args: 19 | csv_file (string): Path to the csv file with image names and transformations. 20 | dataset_path (string): Directory with the images. 21 | output_size (2-tuple): Desired output size 22 | transform (callable): Transformation for post-processing the training pair (eg. image normalization) 23 | 24 | """ 25 | 26 | def __init__(self, csv_file, dataset_path, output_size=(240,240), transform=None, category=None, pck_procedure='pf'): 27 | 28 | self.category_names=['aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor'] 29 | self.out_h, self.out_w = output_size 30 | self.pairs = pd.read_csv(csv_file) 31 | # self.category = self.pairs.iloc[:,2].as_matrix().astype('float') 32 | self.category = self.pairs.iloc[:,2].values.astype('float') 33 | if category is not None: 34 | cat_idx = np.nonzero(self.category==category)[0] 35 | self.category=self.category[cat_idx] 36 | self.pairs=self.pairs.iloc[cat_idx,:] 37 | self.img_A_names = self.pairs.iloc[:,0] 38 | self.img_B_names = self.pairs.iloc[:,1] 39 | self.point_A_coords = self.pairs.iloc[:, 3:5] 40 | self.point_B_coords = self.pairs.iloc[:, 5:] 41 | self.dataset_path = dataset_path 42 | self.transform = transform 43 | # no cuda as dataset is called from CPU threads in dataloader and produces confilct 44 | self.affineTnf = AffineTnf(out_h=self.out_h, out_w=self.out_w, use_cuda = False) 45 | self.pck_procedure = pck_procedure 46 | 47 | def __len__(self): 48 | return len(self.pairs) 49 | 50 | def __getitem__(self, idx): 51 | # get pre-processed images 52 | image_A,im_size_A = self.get_image(self.img_A_names,idx) 53 | image_B,im_size_B = self.get_image(self.img_B_names,idx) 54 | 55 | # get pre-processed point coords 56 | point_A_coords = self.get_points(self.point_A_coords,idx) 57 | point_B_coords = self.get_points(self.point_B_coords,idx) 58 | 59 | # compute PCK reference length L_pck (equal to max bounding box side in image_A) 60 | #L_pck = torch.FloatTensor([torch.max(point_A_coords.max(1)[0]-point_A_coords.min(1)[0])]) 61 | N_pts = torch.sum(torch.ne(point_A_coords[0,:],-1)) 62 | 63 | if self.pck_procedure=='pf': 64 | L_pck = torch.FloatTensor([torch.max(point_A_coords[:,:N_pts].max(1)[0]-point_A_coords[:,:N_pts].min(1)[0])]) 65 | elif self.pck_procedure=='scnet': 66 | #modification to follow the evaluation procedure of SCNet 67 | point_A_coords[0,0:N_pts]=point_A_coords[0,0:N_pts]*224/im_size_A[1] 68 | point_A_coords[1,0:N_pts]=point_A_coords[1,0:N_pts]*224/im_size_A[0] 69 | 70 | point_B_coords[0,0:N_pts]=point_B_coords[0,0:N_pts]*224/im_size_B[1] 71 | point_B_coords[1,0:N_pts]=point_B_coords[1,0:N_pts]*224/im_size_B[0] 72 | 73 | im_size_A[0:2]=torch.FloatTensor([224,224]) 74 | im_size_B[0:2]=torch.FloatTensor([224,224]) 75 | 76 | L_pck = torch.FloatTensor([224.0]) 77 | 78 | #sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 'target_im_size': im_size_B, 'source_points': point_A_coords, 'target_points': point_B_coords, 'L_pck': L_pck} 79 | 80 | #shuaiyi add category name 81 | category_name = self.category_names[int(self.category[idx]-1)] 82 | sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 83 | 'target_im_size': im_size_B, 'source_points': point_A_coords, 'target_points': point_B_coords, 84 | 'L_pck': L_pck,'category_name':category_name} 85 | 86 | 87 | if self.transform: 88 | sample = self.transform(sample) 89 | 90 | return sample 91 | 92 | def get_image(self,img_name_list,idx): 93 | img_name = os.path.join(self.dataset_path, img_name_list.iloc[idx]) 94 | image = io.imread(img_name) 95 | 96 | # get image size 97 | im_size = np.asarray(image.shape) 98 | 99 | # convert to torch Variable 100 | image = np.expand_dims(image.transpose((2,0,1)),0) 101 | image = torch.Tensor(image.astype(np.float32)) 102 | image_var = Variable(image,requires_grad=False) 103 | 104 | # Resize image using bilinear sampling with identity affine tnf 105 | image = self.affineTnf(image_var).data.squeeze(0) 106 | 107 | im_size = torch.Tensor(im_size.astype(np.float32)) 108 | 109 | return (image, im_size) 110 | 111 | def get_points(self,point_coords_list,idx): 112 | X=np.fromstring(point_coords_list.iloc[idx,0],sep=';') 113 | Y=np.fromstring(point_coords_list.iloc[idx,1],sep=';') 114 | Xpad = -np.ones(20); Xpad[:len(X)]=X 115 | Ypad = -np.ones(20); Ypad[:len(X)]=Y 116 | point_coords = np.concatenate((Xpad.reshape(1,20),Ypad.reshape(1,20)),axis=0) 117 | 118 | # make arrays float tensor for subsequent processing 119 | point_coords = torch.Tensor(point_coords.astype(np.float32)) 120 | return point_coords 121 | 122 | -------------------------------------------------------------------------------- /lib/pf_willow_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import torch 4 | from torch.autograd import Variable 5 | from skimage import io 6 | import pandas as pd 7 | import numpy as np 8 | from torch.utils.data import Dataset 9 | from geotnf.transformation import GeometricTnf 10 | 11 | 12 | class PFDataset(Dataset): 13 | """ 14 | 15 | Proposal Flow image pair dataset 16 | 17 | 18 | Args: 19 | csv_file (string): Path to the csv file with image names and transformations. 20 | dataset_path (string): Directory with the images. 21 | output_size (2-tuple): Desired output size 22 | transform (callable): Transformation for post-processing the training pair (eg. image normalization) 23 | 24 | """ 25 | 26 | def __init__(self, csv_file, dataset_path, output_size=(240, 240), transform=None): 27 | self.out_h, self.out_w = output_size 28 | self.pairs = pd.read_csv(csv_file) 29 | self.img_A_names = self.pairs.iloc[:, 0] 30 | self.img_B_names = self.pairs.iloc[:, 1] 31 | self.point_A_coords = self.pairs.iloc[:, 2:22].values.astype('float') 32 | self.point_B_coords = self.pairs.iloc[:, 22:].values.astype('float') #as_matrix() 33 | self.dataset_path = dataset_path 34 | self.transform = transform 35 | # no cuda as dataset is called from CPU threads in dataloader and produces confilct 36 | self.affineTnf = GeometricTnf(out_h=self.out_h, out_w=self.out_w, use_cuda=False) 37 | 38 | def __len__(self): 39 | return len(self.pairs) 40 | 41 | def __getitem__(self, idx): 42 | # get pre-processed images 43 | image_A, im_size_A = self.get_image(self.img_A_names, idx) 44 | image_B, im_size_B = self.get_image(self.img_B_names, idx) 45 | 46 | # get pre-processed point coords 47 | point_A_coords = self.get_points(self.point_A_coords, idx) 48 | point_B_coords = self.get_points(self.point_B_coords, idx) 49 | 50 | # compute PCK reference length L_pck (equal to max bounding box side in image_A) 51 | L_pck = torch.FloatTensor([torch.max(point_A_coords.max(1)[0] - point_A_coords.min(1)[0])]) 52 | 53 | sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 54 | 'target_im_size': im_size_B, 'source_points': point_A_coords, 'target_points': point_B_coords, 55 | 'L_pck': L_pck} 56 | 57 | if self.transform: 58 | sample = self.transform(sample) 59 | 60 | return sample 61 | 62 | def get_image(self, img_name_list, idx): 63 | img_name = os.path.join(self.dataset_path, img_name_list[idx]) 64 | image = io.imread(img_name) 65 | 66 | # get image size 67 | im_size = np.asarray(image.shape) 68 | 69 | # convert to torch Variable 70 | image = np.expand_dims(image.transpose((2, 0, 1)), 0) 71 | image = torch.Tensor(image.astype(np.float32)) 72 | image_var = Variable(image, requires_grad=False) 73 | 74 | # Resize image using bilinear sampling with identity affine tnf 75 | image = self.affineTnf(image_var).data.squeeze(0) 76 | 77 | im_size = torch.Tensor(im_size.astype(np.float32)) 78 | 79 | return (image, im_size) 80 | 81 | def get_points(self, point_coords_list, idx): 82 | point_coords = point_coords_list[idx, :].reshape(2, 10) 83 | 84 | # # swap X,Y coords, as the the row,col order (Y,X) is used for computations 85 | # point_coords = point_coords[[1,0],:] 86 | 87 | # make arrays float tensor for subsequent processing 88 | point_coords = torch.Tensor(point_coords.astype(np.float32)) 89 | return point_coords 90 | 91 | 92 | class PFPascalDataset(Dataset): 93 | """ 94 | 95 | Proposal Flow image pair dataset 96 | 97 | 98 | Args: 99 | csv_file (string): Path to the csv file with image names and transformations. 100 | dataset_path (string): Directory with the images. 101 | output_size (2-tuple): Desired output size 102 | transform (callable): Transformation for post-processing the training pair (eg. image normalization) 103 | 104 | """ 105 | 106 | def __init__(self, csv_file, dataset_path, output_size=(240, 240), transform=None, category=None, 107 | pck_procedure='scnet'): 108 | 109 | self.category_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 110 | 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 111 | 'train', 'tvmonitor'] 112 | self.out_h, self.out_w = output_size 113 | self.pairs = pd.read_csv(csv_file) 114 | self.category = self.pairs.iloc[:, 2].as_matrix().astype('float') 115 | if category is not None: 116 | cat_idx = np.nonzero(self.category == category)[0] 117 | self.category = self.category[cat_idx] 118 | self.pairs = self.pairs.iloc[cat_idx, :] 119 | self.img_A_names = self.pairs.iloc[:, 0] 120 | self.img_B_names = self.pairs.iloc[:, 1] 121 | self.point_A_coords = self.pairs.iloc[:, 3:5] 122 | self.point_B_coords = self.pairs.iloc[:, 5:] 123 | self.dataset_path = dataset_path 124 | self.transform = transform 125 | # no cuda as dataset is called from CPU threads in dataloader and produces confilct 126 | self.affineTnf = GeometricTnf(out_h=self.out_h, out_w=self.out_w, use_cuda=False) 127 | self.pck_procedure = pck_procedure 128 | 129 | def __len__(self): 130 | return len(self.pairs) 131 | 132 | def __getitem__(self, idx): 133 | # get pre-processed images 134 | image_A, im_size_A = self.get_image(self.img_A_names, idx) 135 | image_B, im_size_B = self.get_image(self.img_B_names, idx) 136 | 137 | # get pre-processed point coords 138 | point_A_coords = self.get_points(self.point_A_coords, idx) 139 | point_B_coords = self.get_points(self.point_B_coords, idx) 140 | 141 | # compute PCK reference length L_pck (equal to max bounding box side in image_A) 142 | N_pts = torch.sum(torch.ne(point_A_coords[0, :], -1)) 143 | 144 | if self.pck_procedure == 'pf': 145 | L_pck = torch.FloatTensor( 146 | [torch.max(point_A_coords[:, :N_pts].max(1)[0] - point_A_coords[:, :N_pts].min(1)[0])]) 147 | elif self.pck_procedure == 'scnet': 148 | # modification to follow the evaluation procedure of SCNet 149 | point_A_coords[0, 0:N_pts] = point_A_coords[0, 0:N_pts] * 224 / im_size_A[1] 150 | point_A_coords[1, 0:N_pts] = point_A_coords[1, 0:N_pts] * 224 / im_size_A[0] 151 | 152 | point_B_coords[0, 0:N_pts] = point_B_coords[0, 0:N_pts] * 224 / im_size_B[1] 153 | point_B_coords[1, 0:N_pts] = point_B_coords[1, 0:N_pts] * 224 / im_size_B[0] 154 | 155 | im_size_A[0:2] = torch.FloatTensor([224, 224]) 156 | im_size_B[0:2] = torch.FloatTensor([224, 224]) 157 | 158 | L_pck = torch.FloatTensor([224.0]) 159 | 160 | sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 161 | 'target_im_size': im_size_B, 'source_points': point_A_coords, 'target_points': point_B_coords, 162 | 'L_pck': L_pck} 163 | 164 | if self.transform: 165 | sample = self.transform(sample) 166 | 167 | return sample 168 | 169 | def get_image(self, img_name_list, idx): 170 | img_name = os.path.join(self.dataset_path, img_name_list.iloc[idx]) 171 | image = io.imread(img_name) 172 | 173 | # get image size 174 | im_size = np.asarray(image.shape) 175 | 176 | # convert to torch Variable 177 | image = np.expand_dims(image.transpose((2, 0, 1)), 0) 178 | image = torch.Tensor(image.astype(np.float32)) 179 | image_var = Variable(image, requires_grad=False) 180 | 181 | # Resize image using bilinear sampling with identity affine tnf 182 | image = self.affineTnf(image_var).data.squeeze(0) 183 | 184 | im_size = torch.Tensor(im_size.astype(np.float32)) 185 | 186 | return (image, im_size) 187 | 188 | def get_points(self, point_coords_list, idx): 189 | X = np.fromstring(point_coords_list.iloc[idx, 0], sep=';') 190 | Y = np.fromstring(point_coords_list.iloc[idx, 1], sep=';') 191 | Xpad = -np.ones(20); 192 | Xpad[:len(X)] = X 193 | Ypad = -np.ones(20); 194 | Ypad[:len(X)] = Y 195 | point_coords = np.concatenate((Xpad.reshape(1, 20), Ypad.reshape(1, 20)), axis=0) 196 | 197 | # make arrays float tensor for subsequent processing 198 | point_coords = torch.Tensor(point_coords.astype(np.float32)) 199 | return point_coords 200 | 201 | -------------------------------------------------------------------------------- /lib/plot.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import os 6 | 7 | def plot_image(im,batch_idx=0,return_im=False): 8 | if im.dim()==4: 9 | im=im[batch_idx,:,:,:] 10 | mean=Variable(torch.FloatTensor([0.485, 0.456, 0.406]).view(3,1,1)) 11 | std=Variable(torch.FloatTensor([0.229, 0.224, 0.225]).view(3,1,1)) 12 | if im.is_cuda: 13 | mean=mean.cuda() 14 | std=std.cuda() 15 | im=im.mul(std).add(mean)*255.0 16 | im=im.permute(1,2,0).data.cpu().numpy().astype(np.uint8) 17 | if return_im: 18 | return im 19 | plt.imshow(im) 20 | plt.show() 21 | 22 | #2019-01-28 visualize 23 | def plot_image_debug(im,batch_idx=0,return_im=False): 24 | if im.dim()==4: 25 | im=im[batch_idx,:,:,:] 26 | mean=Variable(torch.FloatTensor([0.485, 0.456, 0.406]).view(3,1,1)) 27 | std=Variable(torch.FloatTensor([0.229, 0.224, 0.225]).view(3,1,1)) 28 | if im.is_cuda: 29 | mean=mean.cuda() 30 | std=std.cuda() 31 | im=im.mul(std).add(mean)*255.0 32 | im=im.permute(1,2,0).data.cpu().numpy().astype(np.uint8) 33 | #duplicate pair 34 | im = np.concatenate(np.stack([im,im],0)) 35 | 36 | #verbose 37 | verbose = False 38 | if verbose: 39 | print('debug imshape in plot.py',im.shape) #(800,800,3) 40 | if return_im: 41 | return im 42 | plt.imshow(im) 43 | plt.show() 44 | 45 | def save_plot(filename): 46 | plt.gca().set_axis_off() 47 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, 48 | hspace = 0, wspace = 0) 49 | plt.margins(0,0) 50 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 51 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 52 | plt.savefig(filename, bbox_inches = 'tight', 53 | pad_inches = 0) 54 | 55 | #2019-02-21 shuaiyi plot loss 56 | def plot_loss(train_loss, val_loss, pth, figname): 57 | 58 | N = len(train_loss) 59 | assert (len(val_loss) == N) 60 | x = np.arange(1,N+1) 61 | 62 | plt.figure() 63 | 64 | plt.plot(x,train_loss,label='Train loss') 65 | plt.plot(x, val_loss, label='Val loss') 66 | plt.title('Loss') 67 | plt.legend() 68 | 69 | plt.savefig(os.path.join(pth,figname)) 70 | 71 | return 72 | -------------------------------------------------------------------------------- /lib/point_tnf_dynamic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | def normalize_axis(x,L): 7 | return (x-1-(L-1)/2)*2/(L-1) 8 | 9 | def unnormalize_axis(x,L): 10 | return x*(L-1)/2+1+(L-1)/2 11 | 12 | #2019-03-12 Shuaiyi Huang 13 | #Dynamic corr_to_matches 14 | 15 | from models.loss_dynamic import score_for_single_corr4d 16 | def mergeA_mergeB_from_out(out): 17 | # corr_out_set = out['corr'] #(B,S,1,Ha,Wa,Hb,Wb) 18 | # A_scaleatts_set = out['scaleatt']['A'] #(B,S,Ha,Wa) 19 | # B_scaleatts_set = out['scaleatt']['B'] #(B,S,Hb,Wb) 20 | 21 | corr_out_set, A_scaleatts_set, B_scaleatts_set = out 22 | B,S,_,Ha,Wa,Hb,Wb = corr_out_set.shape 23 | 24 | score_pos_overscales = [] 25 | M_A_norm_overscales = [] 26 | M_B_norm_overscales = [] 27 | for si in range(S): #iterate over scales 28 | corr_out_si = corr_out_set[:,si,:,:,:,:,:] 29 | score_pos_si,M_A_norm,M_B_norm = score_for_single_corr4d(corr4d=corr_out_si) #MA_norm:(B,LB,Ha,Wa) 30 | 31 | #add 32 | score_pos_overscales.append(score_pos_si) 33 | M_A_norm_overscales.append(M_A_norm) 34 | M_B_norm_overscales.append(M_B_norm) 35 | 36 | M_A_norm_overscales = torch.stack(M_A_norm_overscales,dim=1) #(B,LB,Ha,Wa)->(B,S,LB,Ha,Wa) 37 | M_B_norm_overscales = torch.stack(M_B_norm_overscales, dim=1) #(B,LA,Hb,Wb)->(B,S,LA,Hb,Wb) 38 | 39 | #merge scoremap using atts 40 | MergedA = torch.sum(M_A_norm_overscales*A_scaleatts_set.view(B,S,1,Ha,Wa),dim=1) #(B,LB,Ha,Wa) 41 | MergedB = torch.sum(M_B_norm_overscales * B_scaleatts_set.view(B, S, 1, Hb, Wb), dim=1) #(B,LA,Hb,Wb) 42 | 43 | return MergedA,MergedB 44 | 45 | def corr_to_matches(out, delta4d=None, k_size=1, do_softmax=False, scale='centered', return_indices=False, invert_matching_direction=False): 46 | 47 | MergeA,MergeB = mergeA_mergeB_from_out(out) #MergeA (B,LB,Ha,Wa), MergeB (B,LA,Hb,Wb) 48 | 49 | # to_cuda = lambda x: x.cuda() if corr4d.is_cuda else x 50 | # batch_size,ch,fs1,fs2,fs3,fs4 = corr4d.size() 51 | 52 | to_cuda = lambda x: x.cuda() if out[0].is_cuda else x 53 | batch_size, ch,_, fs1, fs2, fs3, fs4 = out[0].size() #todo merge ch with S dimension 54 | 55 | if scale=='centered': 56 | XA,YA=np.meshgrid(np.linspace(-1,1,fs2*k_size),np.linspace(-1,1,fs1*k_size)) 57 | XB,YB=np.meshgrid(np.linspace(-1,1,fs4*k_size),np.linspace(-1,1,fs3*k_size)) 58 | elif scale=='positive': 59 | XA,YA=np.meshgrid(np.linspace(0,1,fs2*k_size),np.linspace(0,1,fs1*k_size)) 60 | XB,YB=np.meshgrid(np.linspace(0,1,fs4*k_size),np.linspace(0,1,fs3*k_size)) 61 | 62 | JA,IA=np.meshgrid(range(fs2),range(fs1)) 63 | JB,IB=np.meshgrid(range(fs4),range(fs3)) 64 | 65 | XA,YA=Variable(to_cuda(torch.FloatTensor(XA))),Variable(to_cuda(torch.FloatTensor(YA))) 66 | XB,YB=Variable(to_cuda(torch.FloatTensor(XB))),Variable(to_cuda(torch.FloatTensor(YB))) 67 | 68 | JA,IA=Variable(to_cuda(torch.LongTensor(JA).view(1,-1))),Variable(to_cuda(torch.LongTensor(IA).view(1,-1))) 69 | JB,IB=Variable(to_cuda(torch.LongTensor(JB).view(1,-1))),Variable(to_cuda(torch.LongTensor(IB).view(1,-1))) 70 | 71 | if invert_matching_direction: 72 | # nc_A_Bvec=corr4d.view(batch_size,fs1,fs2,fs3*fs4) #(B,Ha,Wa,LB) 73 | nc_A_Bvec = MergeA 74 | if do_softmax: 75 | nc_A_Bvec=torch.nn.functional.softmax(nc_A_Bvec,dim=1) 76 | 77 | match_A_vals,idx_A_Bvec=torch.max(nc_A_Bvec,dim=1) 78 | score=match_A_vals.view(batch_size,-1) 79 | 80 | iB=IB.view(-1)[idx_A_Bvec.view(-1)].view(batch_size,-1) 81 | jB=JB.view(-1)[idx_A_Bvec.view(-1)].view(batch_size,-1) 82 | iA=IA.expand_as(iB) 83 | jA=JA.expand_as(jB) 84 | 85 | else: 86 | # nc_B_Avec=corr4d.view(batch_size,fs1*fs2,fs3,fs4) # [batch_idx,k_A,i_B,j_B] #(B,LA,Hb,Wb) 87 | nc_B_Avec = MergeB 88 | if do_softmax: 89 | nc_B_Avec=torch.nn.functional.softmax(nc_B_Avec,dim=1) 90 | 91 | match_B_vals,idx_B_Avec=torch.max(nc_B_Avec,dim=1) 92 | score=match_B_vals.view(batch_size,-1) 93 | 94 | iA=IA.view(-1)[idx_B_Avec.view(-1)].view(batch_size,-1) 95 | jA=JA.view(-1)[idx_B_Avec.view(-1)].view(batch_size,-1) 96 | iB=IB.expand_as(iA) 97 | jB=JB.expand_as(jA) 98 | 99 | if delta4d is not None: # relocalization 100 | delta_iA,delta_jA,delta_iB,delta_jB = delta4d 101 | 102 | diA=delta_iA.squeeze(0).squeeze(0)[iA.view(-1),jA.view(-1),iB.view(-1),jB.view(-1)] 103 | djA=delta_jA.squeeze(0).squeeze(0)[iA.view(-1),jA.view(-1),iB.view(-1),jB.view(-1)] 104 | diB=delta_iB.squeeze(0).squeeze(0)[iA.view(-1),jA.view(-1),iB.view(-1),jB.view(-1)] 105 | djB=delta_jB.squeeze(0).squeeze(0)[iA.view(-1),jA.view(-1),iB.view(-1),jB.view(-1)] 106 | 107 | iA=iA*k_size+diA.expand_as(iA) 108 | jA=jA*k_size+djA.expand_as(jA) 109 | iB=iB*k_size+diB.expand_as(iB) 110 | jB=jB*k_size+djB.expand_as(jB) 111 | 112 | xA=XA[iA.view(-1),jA.view(-1)].view(batch_size,-1) 113 | yA=YA[iA.view(-1),jA.view(-1)].view(batch_size,-1) 114 | xB=XB[iB.view(-1),jB.view(-1)].view(batch_size,-1) 115 | yB=YB[iB.view(-1),jB.view(-1)].view(batch_size,-1) 116 | 117 | if return_indices: 118 | return (xA,yA,xB,yB,score,iA,jA,iB,jB) 119 | else: 120 | return (xA,yA,xB,yB,score) 121 | 122 | def nearestNeighPointTnf(matches,target_points_norm): 123 | xA,yA,xB,yB=matches 124 | 125 | # match target points to grid 126 | deltaX=target_points_norm[:,0,:].unsqueeze(1)-xB.unsqueeze(2) 127 | deltaY=target_points_norm[:,1,:].unsqueeze(1)-yB.unsqueeze(2) 128 | distB=torch.sqrt(torch.pow(deltaX,2)+torch.pow(deltaY,2)) 129 | vals,idx=torch.min(distB,dim=1) 130 | 131 | warped_points_x = xA.view(-1)[idx.view(-1)].view(1,1,-1) 132 | warped_points_y = yA.view(-1)[idx.view(-1)].view(1,1,-1) 133 | warped_points_norm = torch.cat((warped_points_x,warped_points_y),dim=1) 134 | return warped_points_norm 135 | 136 | def bilinearInterpPointTnf(matches,target_points_norm): 137 | xA,yA,xB,yB=matches 138 | 139 | feature_size=int(np.sqrt(xB.shape[-1])) 140 | 141 | b,_,N=target_points_norm.size() 142 | 143 | X_=xB.view(-1) 144 | Y_=yB.view(-1) 145 | 146 | grid = torch.FloatTensor(np.linspace(-1,1,feature_size)).unsqueeze(0).unsqueeze(2) 147 | if xB.is_cuda: 148 | grid=grid.cuda() 149 | if isinstance(xB,Variable): 150 | grid=Variable(grid) 151 | 152 | x_minus = torch.sum(((target_points_norm[:,0,:]-grid)>0).long(),dim=1,keepdim=True)-1 153 | x_minus[x_minus<0]=0 # fix edge case 154 | x_plus = x_minus+1 155 | 156 | y_minus = torch.sum(((target_points_norm[:,1,:]-grid)>0).long(),dim=1,keepdim=True)-1 157 | y_minus[y_minus<0]=0 # fix edge case 158 | y_plus = y_minus+1 159 | 160 | toidx = lambda x,y,L: y*L+x 161 | 162 | m_m_idx = toidx(x_minus,y_minus,feature_size) 163 | p_p_idx = toidx(x_plus,y_plus,feature_size) 164 | p_m_idx = toidx(x_plus,y_minus,feature_size) 165 | m_p_idx = toidx(x_minus,y_plus,feature_size) 166 | 167 | topoint = lambda idx, X, Y: torch.cat((X[idx.view(-1)].view(b,1,N).contiguous(), 168 | Y[idx.view(-1)].view(b,1,N).contiguous()),dim=1) 169 | 170 | P_m_m = topoint(m_m_idx,X_,Y_) 171 | P_p_p = topoint(p_p_idx,X_,Y_) 172 | P_p_m = topoint(p_m_idx,X_,Y_) 173 | P_m_p = topoint(m_p_idx,X_,Y_) 174 | 175 | multrows = lambda x: x[:,0,:]*x[:,1,:] 176 | 177 | f_p_p=multrows(torch.abs(target_points_norm-P_m_m)) 178 | f_m_m=multrows(torch.abs(target_points_norm-P_p_p)) 179 | f_m_p=multrows(torch.abs(target_points_norm-P_p_m)) 180 | f_p_m=multrows(torch.abs(target_points_norm-P_m_p)) 181 | 182 | Q_m_m = topoint(m_m_idx,xA.view(-1),yA.view(-1)) 183 | Q_p_p = topoint(p_p_idx,xA.view(-1),yA.view(-1)) 184 | Q_p_m = topoint(p_m_idx,xA.view(-1),yA.view(-1)) 185 | Q_m_p = topoint(m_p_idx,xA.view(-1),yA.view(-1)) 186 | 187 | warped_points_norm = (Q_m_m*f_m_m+Q_p_p*f_p_p+Q_m_p*f_m_p+Q_p_m*f_p_m)/(f_p_p+f_m_m+f_m_p+f_p_m) 188 | return warped_points_norm 189 | 190 | 191 | def PointsToUnitCoords(P,im_size): 192 | h,w = im_size[:,0],im_size[:,1] 193 | P_norm = P.clone() 194 | # normalize Y 195 | P_norm[:,0,:] = normalize_axis(P[:,0,:],w.unsqueeze(1).expand_as(P[:,0,:])) 196 | # normalize X 197 | P_norm[:,1,:] = normalize_axis(P[:,1,:],h.unsqueeze(1).expand_as(P[:,1,:])) 198 | return P_norm 199 | 200 | def PointsToPixelCoords(P,im_size): 201 | h,w = im_size[:,0],im_size[:,1] 202 | P_norm = P.clone() 203 | # normalize Y 204 | P_norm[:,0,:] = unnormalize_axis(P[:,0,:],w.unsqueeze(1).expand_as(P[:,0,:])) 205 | # normalize X 206 | P_norm[:,1,:] = unnormalize_axis(P[:,1,:],h.unsqueeze(1).expand_as(P[:,1,:])) 207 | return P_norm -------------------------------------------------------------------------------- /lib/py_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import errno 3 | 4 | def create_file_path(filename): 5 | if not os.path.exists(os.path.dirname(filename)): 6 | try: 7 | os.makedirs(os.path.dirname(filename)) 8 | except OSError as exc: # Guard against race condition 9 | if exc.errno != errno.EEXIST: 10 | raise -------------------------------------------------------------------------------- /lib/torch_util.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import torch 3 | from torch.autograd import Variable 4 | from os import makedirs, remove 5 | from os.path import exists, join, basename, dirname 6 | import collections 7 | from lib.dataloader import default_collate 8 | 9 | def collate_custom(batch): 10 | """ Custom collate function for the Dataset class 11 | * It doesn't convert numpy arrays to stacked-tensors, but rather combines them in a list 12 | * This is useful for processing annotations of different sizes 13 | """ 14 | # this case will occur in first pass, and will convert a 15 | # list of dictionaries (returned by the threads by sampling dataset[idx]) 16 | # to a unified dictionary of collated values 17 | if isinstance(batch[0], collections.Mapping): 18 | return {key: collate_custom([d[key] for d in batch]) for key in batch[0]} 19 | # these cases will occur in recursion 20 | elif torch.is_tensor(batch[0]): # for tensors, use standrard collating function 21 | return default_collate(batch) 22 | else: # for other types (i.e. lists), return as is 23 | return batch 24 | 25 | class BatchTensorToVars(object): 26 | """Convert tensors in dict batch to vars 27 | """ 28 | def __init__(self, use_cuda=True): 29 | self.use_cuda=use_cuda 30 | 31 | def __call__(self, batch): 32 | batch_var = {} 33 | for key,value in batch.items(): 34 | if isinstance(value,torch.Tensor) and not self.use_cuda: 35 | batch_var[key] = Variable(value,requires_grad=False) 36 | elif isinstance(value,torch.Tensor) and self.use_cuda: 37 | batch_var[key] = Variable(value,requires_grad=False).cuda() 38 | else: 39 | batch_var[key] = value 40 | return batch_var 41 | 42 | def Softmax1D(x,dim): 43 | x_k = torch.max(x,dim)[0].unsqueeze(dim) 44 | x -= x_k.expand_as(x) 45 | exp_x = torch.exp(x) 46 | return torch.div(exp_x,torch.sum(exp_x,dim).unsqueeze(dim).expand_as(x)) 47 | 48 | def save_checkpoint(state, is_best, file, save_all_epochs=False): 49 | model_dir = dirname(file) 50 | model_fn = basename(file) 51 | # make dir if needed (should be non-empty) 52 | if model_dir!='' and not exists(model_dir): 53 | makedirs(model_dir) 54 | if save_all_epochs: 55 | torch.save(state, join(model_dir,str(state['epoch'])+'_' + model_fn)) 56 | if is_best: 57 | shutil.copyfile(join(model_dir,str(state['epoch'])+'_' + model_fn), join(model_dir,'best_' +'epoch'+str(state['epoch'])+ model_fn)) 58 | else: 59 | if is_best: 60 | torch.save(state, file) 61 | shutil.copyfile(file, join(model_dir,'best_' +'dccnet'+'.pth.tar')) 62 | 63 | def str_to_bool(v): 64 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 65 | return True 66 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 67 | return False 68 | else: 69 | raise argparse.ArgumentTypeError('Boolean value expected.') 70 | 71 | def expand_dim(tensor,dim,desired_dim_len): 72 | sz = list(tensor.size()) 73 | sz[dim]=desired_dim_len 74 | return tensor.expand(tuple(sz)) 75 | -------------------------------------------------------------------------------- /lib/transformation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import sys 4 | from skimage import io 5 | import pandas as pd 6 | import numpy as np 7 | import torch 8 | from torch.nn.modules.module import Module 9 | from torch.utils.data import Dataset 10 | from torch.autograd import Variable 11 | import torch.nn.functional as F 12 | 13 | from lib.torch_util import expand_dim 14 | 15 | class AffineTnf(object): 16 | def __init__(self, out_h=240, out_w=240, use_cuda=True): 17 | self.out_h = out_h 18 | self.out_w = out_w 19 | self.use_cuda = use_cuda 20 | self.gridGen = AffineGridGen(out_h=out_h, out_w=out_w, use_cuda=use_cuda) 21 | self.theta_identity = torch.Tensor(np.expand_dims(np.array([[1,0,0],[0,1,0]]),0).astype(np.float32)) 22 | if use_cuda: 23 | self.theta_identity = self.theta_identity.cuda() 24 | 25 | def __call__(self, image_batch, theta_batch=None, out_h=None, out_w=None): 26 | if image_batch is None: 27 | b=1 28 | else: 29 | b=image_batch.size(0) 30 | if theta_batch is None: 31 | theta_batch = self.theta_identity 32 | theta_batch = theta_batch.expand(b,2,3).contiguous() 33 | theta_batch = Variable(theta_batch,requires_grad=False) 34 | 35 | # check if output dimensions have been specified at call time and have changed 36 | if (out_h is not None and out_w is not None) and (out_h!=self.out_h or out_w!=self.out_w): 37 | gridGen = AffineGridGen(out_h, out_w) 38 | else: 39 | gridGen = self.gridGen 40 | 41 | sampling_grid = gridGen(theta_batch) 42 | 43 | # sample transformed image 44 | warped_image_batch = F.grid_sample(image_batch, sampling_grid) 45 | 46 | return warped_image_batch 47 | 48 | 49 | class AffineGridGen(Module): 50 | def __init__(self, out_h=240, out_w=240, out_ch = 3, use_cuda=True): 51 | super(AffineGridGen, self).__init__() 52 | self.out_h = out_h 53 | self.out_w = out_w 54 | self.out_ch = out_ch 55 | 56 | def forward(self, theta): 57 | b=theta.size()[0] 58 | if not theta.size()==(b,2,3): 59 | theta = theta.view(-1,2,3) 60 | theta = theta.contiguous() 61 | batch_size = theta.size()[0] 62 | out_size = torch.Size((batch_size,self.out_ch,self.out_h,self.out_w)) 63 | return F.affine_grid(theta, out_size) 64 | -------------------------------------------------------------------------------- /lib/tss_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import torch 4 | from torch.autograd import Variable 5 | from skimage import io 6 | import pandas as pd 7 | import numpy as np 8 | from torch.utils.data import Dataset 9 | from geotnf.transformation import GeometricTnf 10 | from geotnf.flow import read_flo_file 11 | 12 | class TSSDataset(Dataset): 13 | 14 | """ 15 | 16 | TSS image pair dataset 17 | 18 | http://taniai.space/projects/cvpr16_dccs/ 19 | 20 | 21 | Args: 22 | csv_file (string): Path to the csv file with image names and annotation files. 23 | dataset_path (string): Directory with the images. 24 | output_size (2-tuple): Desired output size 25 | transform (callable): Transformation for post-processing the training pair (eg. image normalization) 26 | 27 | """ 28 | 29 | def __init__(self, csv_file, dataset_path,output_size=(240,240),transform=None): 30 | 31 | self.out_h, self.out_w = output_size 32 | self.pairs = pd.read_csv(csv_file) 33 | self.img_A_names = self.pairs.iloc[:,0] 34 | self.img_B_names = self.pairs.iloc[:,1] 35 | self.flow_direction = self.pairs.iloc[:, 2].as_matrix().astype('int') 36 | self.flip_img_A = self.pairs.iloc[:, 3].as_matrix().astype('int') 37 | self.pair_category = self.pairs.iloc[:, 4].as_matrix().astype('int') 38 | self.dataset_path = dataset_path 39 | self.transform = transform 40 | # no cuda as dataset is called from CPU threads in dataloader and produces confilct 41 | self.affineTnf = GeometricTnf(out_h=self.out_h, out_w=self.out_w, use_cuda = False) 42 | 43 | def __len__(self): 44 | return len(self.pairs) 45 | 46 | def __getitem__(self, idx): 47 | # get pre-processed images 48 | flip_img_A = self.flip_img_A[idx] 49 | image_A,im_size_A = self.get_image(self.img_A_names,idx,flip_img_A) 50 | image_B,im_size_B = self.get_image(self.img_B_names,idx) 51 | 52 | # get flow output path 53 | flow_path = self.get_GT_flow_relative_path(idx) 54 | 55 | sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 'target_im_size': im_size_B, 'flow_path': flow_path} 56 | 57 | # # get ground-truth flow 58 | # flow = self.get_GT_flow(idx) 59 | 60 | # sample = {'source_image': image_A, 'target_image': image_B, 'source_im_size': im_size_A, 'target_im_size': im_size_B, 'flow_GT': flow} 61 | 62 | if self.transform: 63 | sample = self.transform(sample) 64 | 65 | return sample 66 | 67 | def get_image(self,img_name_list,idx,flip=False): 68 | img_name = os.path.join(self.dataset_path, img_name_list[idx]) 69 | image = io.imread(img_name) 70 | 71 | # if grayscale convert to 3-channel image 72 | if image.ndim==2: 73 | image=np.repeat(np.expand_dims(image,2),axis=2,repeats=3) 74 | 75 | # flip horizontally if needed 76 | if flip: 77 | image=np.flip(image,1) 78 | 79 | # get image size 80 | im_size = np.asarray(image.shape) 81 | 82 | # convert to torch Variable 83 | image = np.expand_dims(image.transpose((2,0,1)),0) 84 | image = torch.Tensor(image.astype(np.float32)) 85 | image_var = Variable(image,requires_grad=False) 86 | 87 | # Resize image using bilinear sampling with identity affine tnf 88 | image = self.affineTnf(image_var).data.squeeze(0) 89 | 90 | im_size = torch.Tensor(im_size.astype(np.float32)) 91 | 92 | return (image, im_size) 93 | 94 | def get_GT_flow(self,idx): 95 | img_folder = os.path.dirname(self.img_A_names[idx]) 96 | flow_dir = self.flow_direction[idx] 97 | flow_file = 'flow'+str(flow_dir)+'.flo' 98 | flow_file_path = os.path.join(self.dataset_path, img_folder , flow_file) 99 | 100 | flow = torch.FloatTensor(read_flo_file(flow_file_path)) 101 | 102 | return flow 103 | 104 | def get_GT_flow_relative_path(self,idx): 105 | img_folder = os.path.dirname(self.img_A_names[idx]) 106 | flow_dir = self.flow_direction[idx] 107 | flow_file = 'flow'+str(flow_dir)+'.flo' 108 | flow_file_path = os.path.join(img_folder , flow_file) 109 | 110 | return flow_file_path 111 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShuaiyiHuang/DCCNet/e4f1932896153f535ba42c92f74980e3be27cefe/models/__init__.py -------------------------------------------------------------------------------- /models/dynamic_fusion_att.py: -------------------------------------------------------------------------------- 1 | # Shuaiyi Huang 2 | # Dynamic Fusion Network based on scale attention 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as Func 7 | from lib.modules import NeighConsensus 8 | 9 | 10 | class DynamicFusionNet(nn.Module): 11 | ''' 12 | Generate attention maps to dynamically fuse S kind of 4D correlation maps based on attention mechanism 13 | Input: 14 | corr_set: tensor of shape (B,S,1,Ha,Wa,Hb,Wb), S is num of scales, S=2 by default (local and context) 15 | Output: 16 | att_maps: attention maps for Image Ia and Ib, list of length 2. att_maps[0] is tensor of shape (B,S,Ha,Wa), S attention maps for Ia. 17 | ''' 18 | 19 | def __init__(self,S = 2, att_scale_ncons_kernel_sizes = None, att_scale_ncons_channels = None): 20 | super(DynamicFusionNet, self).__init__() 21 | 22 | self.S = S # num of scales 23 | att_scale_input_dim = self.S * 25 * 25 24 | 25 | use_cuda = True 26 | 27 | self.extract_corrfeas = NeighConsensus(use_cuda=use_cuda, 28 | kernel_sizes=att_scale_ncons_kernel_sizes, 29 | channels=att_scale_ncons_channels) 30 | 31 | self.att = nn.Sequential( 32 | nn.Conv2d(att_scale_input_dim, S, kernel_size=1, padding=0),) 33 | 34 | print('verbose in DynamicFusionNet.....input_attscale_dim {}, scale S {},' 35 | 'att_scale_ncons_kernel_sizes {}, att_scale_ncons_channels {}'. 36 | format(att_scale_input_dim, S,att_scale_ncons_kernel_sizes, att_scale_ncons_channels)) 37 | 38 | return 39 | 40 | def forward(self, corr_set): 41 | 42 | att_maps= self._forward_corr_to_att(corr_set=corr_set) 43 | 44 | return att_maps 45 | 46 | def _forward_feaextracture_by_4D(self,corr_set): 47 | ''' 48 | Feature extraction from 4D corr maps using 4D Conv 49 | Input: 50 | corr_set: tensor of shape (B,S,1,Ha,Wa,Hb,Wb) 51 | Return: 52 | corr_4dfea_set: tensor of shape (B,S,1,Ha,Wa,Hb,Wb) 53 | ''' 54 | S = self.S 55 | B, _, c, Ha, Wa, Hb, Wb = list(corr_set.shape) 56 | assert (_==S) 57 | 58 | fea_in = corr_set.view(B*S,1,Ha,Wa,Hb,Wb) 59 | fea_out = self.extract_corrfeas(fea_in) #(B*S,1,Ha,Wa,Hb,Wb) 60 | corr_4dfea_set = fea_out.view(B,S,1,Ha,Wa,Hb,Wb) 61 | 62 | return corr_4dfea_set 63 | 64 | def _forward_corr_to_att(self, corr_set): 65 | ''' 66 | Given S scales 4D correlation maps of Ia and Ib, generate attention maps for Ia and Ib. 67 | 68 | Inputs: 69 | corr_set: tensor of shape (B,S,1,Ha,Wa,Hb,Wb) 70 | Return: 71 | A_scaleatts_set: tensor of shape (B,S,H,W) with att from scale=1 to scale=S 72 | B_scaleatts_set: tensor of shape (B,S,H,W) with att from scale=1 to scale=S 73 | ''' 74 | # Num of scale 75 | S = self.S 76 | 77 | if type(corr_set) ==list: 78 | assert (len(corr_set[0].shape)==6) 79 | corr_set = torch.stack(corr_set,dim=1) #(B,S,1,Ha,Wa,Hb,Wb) 80 | 81 | # Apply 4D conv for corrmap feature extraction 82 | corr_set = self._forward_feaextracture_by_4D(corr_set=corr_set) 83 | 84 | B,_,c,Ha,Wa,Hb,Wb = list(corr_set.shape) 85 | La = Ha*Wa 86 | Lb = Hb*Wb 87 | assert (len(corr_set.shape)==7 and (_==S) and (c==1)) 88 | 89 | corr_set = corr_set.squeeze(2) #(B,S,Ha,Wa,Hb,Wb) 90 | assert (corr_set.shape==(B,S,Ha,Wa,Hb,Wb)) 91 | 92 | # prepare tensor A_att_in of shape [B,SxLB,HA,WA] 93 | A_att_in = corr_set.view(B,S,Ha,Wa,Hb*Wb).permute(0,1,4,2,3).contiguous() 94 | assert (A_att_in.shape==(B,S,Lb,Ha,Wa)) 95 | A_att_in = A_att_in.view(B,S*Lb,Ha,Wa) 96 | 97 | # prepare tensor B_att_in of shape [B,SxLA,HB,WB] 98 | B_att_in = corr_set.view(B,S,Ha*Wa,Hb,Wb) 99 | assert (B_att_in.shape==(B,S,La,Hb,Wb)) 100 | B_att_in = B_att_in.view(B,S*La,Hb,Wb) 101 | 102 | # compute att maps 103 | A_scaleatts_set = self.att.forward(A_att_in) #(B,S*Lb,Ha,Wa)->(B,S,Ha,Wa) 104 | B_scaleatts_set = self.att.forward(B_att_in) #(B,S*La,Hb,Wb)->(B,S,Hb,Wb) 105 | 106 | assert (A_scaleatts_set.shape==(B,S,Ha,Wa)) 107 | assert (B_scaleatts_set.shape==(B,S,Hb,Wb)) 108 | 109 | A_scaleatts_set = Func.softmax(A_scaleatts_set.view(B,S,Ha*Wa),dim=1).view(B,S,Ha,Wa) 110 | B_scaleatts_set = Func.softmax(B_scaleatts_set.view(B, S, Hb * Wb), dim=1).view(B, S, Hb, Wb) 111 | 112 | return A_scaleatts_set,B_scaleatts_set 113 | 114 | 115 | if __name__ == '__main__': 116 | print() -------------------------------------------------------------------------------- /models/loss_dynamic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | # Shuaiyi Huang 4 | # Dynamic loss 5 | 6 | def weak_loss(model, batch, normalization='softmax', scaleloss_weight=None): 7 | b = batch['source_image'].size(0) 8 | 9 | #positive 10 | score_pos_merge, score_pos_overscales = weak_loss_singlebatch(model=model,batch=batch,normalization=normalization,) 11 | 12 | #negative 13 | batch['source_image'] = batch['source_image'][np.roll(np.arange(b), -1), :] # roll 14 | score_neg_merge, score_neg_overscales = weak_loss_singlebatch(model=model, batch=batch,normalization=normalization, ) 15 | 16 | # loss 17 | loss_merge = score_neg_merge - score_pos_merge 18 | 19 | if scaleloss_weight: 20 | loss_scales = torch.sum(torch.cat(score_neg_overscales))-torch.sum(torch.cat(score_pos_overscales)) 21 | loss = loss_merge+scaleloss_weight*loss_scales 22 | else: 23 | loss = loss_merge 24 | 25 | return loss 26 | 27 | 28 | def weak_loss_singlebatch(model,batch,normalization='softmax', alpha=30): 29 | # positive 30 | out = model(batch) 31 | 32 | # corr_out_set = out['corr'] #(B,S,1,Ha,Wa,Hb,Wb) 33 | # A_scaleatts_set = out['scaleatt']['A'] #(B,S,Ha,Wa) 34 | # B_scaleatts_set = out['scaleatt']['B'] #(B,S,Hb,Wb) 35 | 36 | corr_out_set, A_scaleatts_set, B_scaleatts_set = out 37 | 38 | B,S,_,Ha,Wa,Hb,Wb = corr_out_set.shape 39 | 40 | score_pos_overscales = [] 41 | M_A_norm_overscales = [] 42 | M_B_norm_overscales = [] 43 | for si in range(S): #iterate over scales 44 | corr_out_si = corr_out_set[:,si,:,:,:,:,:].contiguous() #todo why 45 | score_pos_si,M_A_norm,M_B_norm = score_for_single_corr4d(corr4d=corr_out_si,normalization=normalization) #MA_norm:(B,LB,Ha,Wa) 46 | 47 | #add 48 | score_pos_overscales.append(score_pos_si) 49 | M_A_norm_overscales.append(M_A_norm) 50 | M_B_norm_overscales.append(M_B_norm) 51 | 52 | M_A_norm_overscales = torch.stack(M_A_norm_overscales,dim=1) #(B,LB,Ha,Wa)->(B,S,LB,Ha,Wa) 53 | M_B_norm_overscales = torch.stack(M_B_norm_overscales, dim=1) #(B,LA,Hb,Wb)->(B,S,LA,Hb,Wb) 54 | 55 | #merge scoremap using atts 56 | MergedA = torch.sum(M_A_norm_overscales*A_scaleatts_set.view(B,S,1,Ha,Wa),dim=1) #(B,LB,Ha,Wa) 57 | MergedB = torch.sum(M_B_norm_overscales * B_scaleatts_set.view(B, S, 1, Hb, Wb), dim=1) #(B,LA,Hb,Wb) 58 | 59 | # compute matching scores 60 | scores_B_merge, _ = torch.max(MergedB, dim=1) 61 | scores_A_merge, _ = torch.max(MergedA, dim=1) 62 | score_pos_merge = torch.mean(scores_A_merge + scores_B_merge) / 2 63 | 64 | return score_pos_merge,score_pos_overscales 65 | 66 | def score_for_single_corr4d(corr4d,normalization='softmax'): 67 | if normalization is None: 68 | normalize = lambda x: x 69 | elif normalization == 'softmax': 70 | normalize = lambda x: torch.nn.functional.softmax(x, 1) 71 | elif normalization == 'l1': 72 | normalize = lambda x: x / (torch.sum(x, dim=1, keepdim=True) + 0.0001) 73 | 74 | batch_size = corr4d.size(0) 75 | feature_size = corr4d.size(2) 76 | nc_B_Avec = corr4d.view(batch_size, feature_size * feature_size, feature_size, 77 | feature_size) # [batch_idx,k_A,i_B,j_B] (B,LA,HB,WB) 78 | nc_A_Bvec = corr4d.view(batch_size, feature_size, feature_size, feature_size * feature_size).permute(0, 3, 1, 2) #(B,LB,HA,WA) 79 | 80 | #normalize 81 | nc_B_Avec = normalize(nc_B_Avec) 82 | nc_A_Bvec = normalize(nc_A_Bvec) 83 | 84 | # compute matching scores 85 | scores_B, _ = torch.max(nc_B_Avec, dim=1) 86 | scores_A, _ = torch.max(nc_A_Bvec, dim=1) 87 | score_pos = torch.mean(scores_A + scores_B) / 2 88 | 89 | return score_pos,nc_A_Bvec,nc_B_Avec 90 | -------------------------------------------------------------------------------- /models/model_dynamic.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import torch 3 | import torch.nn as nn 4 | from collections import OrderedDict 5 | 6 | from lib.conv4d import Conv4d 7 | from lib.modules import FeatureExtraction,NeighConsensus,MutualMatching,FeatureCorrelation 8 | 9 | from models.spatial_context_encoder import SpatialContextEncoder 10 | from models.dynamic_fusion_att import DynamicFusionNet 11 | 12 | class DCCNet(nn.Module): 13 | def __init__(self, 14 | feature_extraction_cnn='resnet101', 15 | feature_extraction_last_layer='', 16 | feature_extraction_model_file=None, 17 | ncons_kernel_sizes=[3,3,3], 18 | ncons_channels=[10,10,1], 19 | normalize_features=True, 20 | train_fe=False, 21 | use_cuda=True, 22 | half_precision=False, 23 | checkpoint=None, 24 | 25 | sce_kernel_size = None, 26 | sce_hidden_dim=None, 27 | 28 | att_scale_ncons_kernel_sizes = None, #hsy 0316 29 | att_scale_ncons_channels = None, 30 | 31 | ): 32 | 33 | super(DCCNet, self).__init__() 34 | 35 | 36 | self.use_cuda = use_cuda 37 | self.normalize_features = normalize_features 38 | 39 | self.half_precision = half_precision 40 | 41 | self.FeatureExtraction = FeatureExtraction(train_fe=train_fe, 42 | feature_extraction_cnn=feature_extraction_cnn, 43 | feature_extraction_model_file=feature_extraction_model_file, 44 | last_layer=feature_extraction_last_layer, 45 | normalization=normalize_features, 46 | use_cuda=self.use_cuda) 47 | 48 | self.FeatureCorrelation = FeatureCorrelation(shape='4D', normalization=False) 49 | self.SpatialContextEncoder = SpatialContextEncoder(kernel_size=sce_kernel_size, input_dim=sce_kernel_size * sce_kernel_size + 1024, hidden_dim=sce_hidden_dim) 50 | 51 | self.NeighConsensus = NeighConsensus(use_cuda=self.use_cuda, 52 | kernel_sizes=ncons_kernel_sizes, 53 | channels=ncons_channels) 54 | 55 | self.DynamicFusionNet = DynamicFusionNet(att_scale_ncons_kernel_sizes=att_scale_ncons_kernel_sizes, att_scale_ncons_channels=att_scale_ncons_channels) 56 | self.DynamicFusionNet.cuda() 57 | 58 | ################################################# 59 | 60 | # Load weights 61 | if checkpoint is not None and checkpoint is not '': 62 | print('Loading checkpoint from{}...'.format(checkpoint)) 63 | checkpoint = torch.load(checkpoint, map_location=lambda storage, loc: storage) 64 | checkpoint['state_dict'] = OrderedDict( 65 | [(k.replace('vgg', 'model'), v) for k, v in checkpoint['state_dict'].items()]) 66 | 67 | # process dataparallel 68 | ckpt_statedict = OrderedDict() 69 | for k, v in checkpoint['state_dict'].items(): 70 | if k[:7] == 'module.': 71 | name = k[7:] # remove `module.` 72 | else: 73 | name = k 74 | 75 | ckpt_statedict[name] = v 76 | 77 | print('Copying weights...') 78 | self.load_state_dict(ckpt_statedict, strict=True) 79 | 80 | print('Done!') 81 | 82 | self.FeatureExtraction.eval() 83 | 84 | if self.half_precision: 85 | for p in self.NeighConsensus.parameters(): 86 | p.data=p.data.half() 87 | for l in self.NeighConsensus.conv: 88 | if isinstance(l,Conv4d): 89 | l.use_half=True 90 | 91 | 92 | # used only for foward pass at eval and for training with strong supervision 93 | def forward(self, tnf_batch): 94 | #Part1-a: extract features of different scales 95 | feat_set,corr_in_set = self.feat_compute_main(tnf_batch=tnf_batch) 96 | 97 | #Part2: Neighborhood concensus to produce score maps of different scales 98 | corr_out_set = self.scoremaps_compute_main(corr_in_set=corr_in_set) 99 | 100 | #Part3: Attention scale module 101 | A_scaleatts_set, B_scaleatts_set = self.scaleatt_compute_main(corr_out_set=corr_out_set) 102 | 103 | out = (corr_out_set,A_scaleatts_set,B_scaleatts_set) 104 | 105 | return out 106 | 107 | def feat_compute_main(self,tnf_batch): 108 | # scale1--local conv feature 109 | feature_A = self.FeatureExtraction(tnf_batch['source_image']) 110 | feature_B = self.FeatureExtraction(tnf_batch['target_image']) 111 | if self.half_precision: 112 | feature_A = feature_A.half() 113 | feature_B = feature_B.half() 114 | 115 | corr_lc = self.FeatureCorrelation(feature_A=feature_A,feature_B=feature_B) 116 | 117 | # scale2--context-aware semantic feature 118 | 119 | feature_A_embd = self.SpatialContextEncoder(feature_A) 120 | feature_B_embd = self.SpatialContextEncoder(feature_B) 121 | corr_embd = self.FeatureCorrelation(feature_A=feature_A_embd,feature_B=feature_B_embd) 122 | 123 | # output 124 | feat_scale_lc = torch.stack([feature_A,feature_B],dim=1) #(B,C1,H,W)->(B,2,C1,H,W) 125 | feat_scale_embd = torch.stack([feature_A_embd,feature_B_embd],dim=1) #(B,C2,H,W)->(B,2,C2,H,W) 126 | 127 | feat_set = [feat_scale_lc,feat_scale_embd] 128 | corr_set = [corr_lc,corr_embd] 129 | 130 | return feat_set,corr_set 131 | 132 | def scaleatt_compute_main(self,corr_out_set): 133 | A_scaleatts_set, B_scaleatts_set = self.DynamicFusionNet.forward(corr_out_set) 134 | 135 | return A_scaleatts_set, B_scaleatts_set 136 | 137 | def scoremaps_compute_main(self, corr_in_set): 138 | # Return list of 4D corrmap: [(B,1,Ha,Wa,Hb,Wb) for scale1, (B,1,Ha,Wa,Hb,Wb) for scale2,...] 139 | 140 | S = len(corr_in_set) 141 | corr_out_set = [] 142 | B,_,Ha,Wa,Hb,Wb = corr_in_set[0].shape 143 | 144 | for si in range(S): #iterate over scales 145 | corr4d_in_si = corr_in_set[si] 146 | corr4d_out_si = self.run_match_model(corr4d=corr4d_in_si) 147 | corr_out_set.append(corr4d_out_si) 148 | 149 | corr_out_set = torch.stack(corr_out_set,dim=1) #(B,S,1,Ha,Wa,Hb,Wb) 150 | assert (corr_out_set.shape==(B,S,1,Ha,Wa,Hb,Wb)),'corr_out_set shape {} is not consistent with{},{},1,{},{},{},{}'.format(corr_out_set.shape, 151 | B,S,Ha,Wa,Hb,Wb) 152 | 153 | return corr_out_set 154 | 155 | def run_match_model(self,corr4d): 156 | 157 | corr4d = MutualMatching(corr4d) 158 | 159 | corr4d = self.NeighConsensus(corr4d) 160 | 161 | corr4d = MutualMatching(corr4d) 162 | 163 | return corr4d -------------------------------------------------------------------------------- /models/sce_efficient.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Shuaiyi Huang 3 | Implement Spatial Context Encoder Efficient Version. Not used. 4 | ''' 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as Func 9 | from torch.autograd import Variable 10 | from torch.nn.modules.utils import _quadruple 11 | import numpy as np 12 | 13 | def global_spatial_representation_efficient(data,kernel_size): 14 | ''' 15 | 2019-04-27 Applies self local similarity with fixed sliding window. Efficient version. 16 | Args: 17 | data: featuren map, variable of shape (b,c,h,w) 18 | kernel_size: width/heigh of local window, int 19 | Returns: 20 | output: global spatial map, variable of shape (b,k^2,h,w) 21 | ''' 22 | 23 | padding = int(kernel_size//2) #5.7//2 = 2.0, 5.2//2 = 2.0 24 | b, c, h, w = data.shape 25 | p2d = _quadruple(padding) #(pad_l,pad_r,pad_t,pad_b) 26 | data_padded = Func.pad(data,p2d,'constant',0) #output variable 27 | assert data_padded.shape==(b,c,(h+2*padding),(w+2*padding)),'Error: data_padded shape{} wrong!'.format(data_padded.shape) 28 | 29 | output = Variable(torch.zeros(b,kernel_size*kernel_size,h,w),requires_grad = data.requires_grad) 30 | if data.is_cuda: 31 | output = output.cuda(data.get_device()) 32 | 33 | xs,xe = padding,w+padding 34 | ys,ye = padding,h+padding 35 | patch_center = data_padded[:,:,ys:ye,xs:xe] 36 | 37 | i = 0 38 | for dy in np.arange(-padding,padding+1): 39 | for dx in np.arange(-padding,padding+1): 40 | hs = ys+dy 41 | he = ye+dy 42 | ws = xs+dx 43 | we = xe+dx 44 | 45 | patch_neighbor = data_padded[:,:,hs:he,ws:we] #(b,c,h,w) 46 | correlation_tensor = torch.sum(patch_neighbor*patch_center,dim=1) 47 | output[:, i, :, :] = correlation_tensor 48 | i+=1 49 | 50 | return output 51 | 52 | def featureL2Norm(feature): 53 | epsilon = 1e-6 54 | norm = torch.pow(torch.sum(torch.pow(feature,2),1)+epsilon,0.5).unsqueeze(1).expand_as(feature) 55 | return torch.div(feature,norm) 56 | 57 | 58 | class SpatialContextEncoderEfficient(nn.Module): 59 | def __init__(self,kernel_size,input_dim,hidden_dim): 60 | super(SpatialContextEncoderEfficient, self).__init__() 61 | self.embeddingFea = nn.Sequential( 62 | nn.Conv2d(input_dim, hidden_dim, kernel_size=1, padding=0), 63 | nn.ReLU(inplace=True), 64 | ) 65 | 66 | self.kernel_size = kernel_size 67 | print('verbose...SpatialContextEncoderEfficientBlock with input_dim {},hidden_dim {}'.format(input_dim,hidden_dim)) 68 | 69 | def forward(self,x): 70 | kernel_size = self.kernel_size 71 | feature_gs = global_spatial_representation_efficient(x,kernel_size=kernel_size) 72 | 73 | #Add L2norm 74 | feature_gs = featureL2Norm(feature_gs) 75 | 76 | #concatenate 77 | feature_cat = torch.cat([x, feature_gs], 1) 78 | 79 | # embed 80 | feature_embd = self.embeddingFea(feature_cat) 81 | 82 | return feature_embd 83 | 84 | if __name__ == '__main__': 85 | print() 86 | import time 87 | 88 | b,c,h,w = 1,1024,25,25 89 | data_a = Variable(torch.rand(b,c,h,w)) 90 | data_b = Variable(torch.rand(b,c,h,w)) 91 | obj = SpatialContextEncoderEfficient(kernel_size=25, input_dim=1024+25*25, hidden_dim=1024) 92 | 93 | st = time.time() 94 | out = obj.forward(data_a) 95 | et= time.time() 96 | print('verbose log..', data_a.mean(), data_b.mean(), out.mean(), out.shape, 'time', et - st) -------------------------------------------------------------------------------- /models/spatial_context_encoder.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Shuaiyi Huang 3 | Implement Spatial Context Encoder. 4 | ''' 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as Func 9 | from torch.autograd import Variable 10 | from torch.nn.modules.utils import _quadruple 11 | 12 | 13 | def generate_spatial_descriptor(data, kernel_size): 14 | ''' 15 | Applies self local similarity with fixed sliding window. 16 | Args: 17 | data: featuren map, variable of shape (b,c,h,w) 18 | kernel_size: width/heigh of local window, int 19 | 20 | Returns: 21 | output: global spatial map, variable of shape (b,c,h,w) 22 | ''' 23 | 24 | padding = int(kernel_size//2) #5.7//2 = 2.0, 5.2//2 = 2.0 25 | b, c, h, w = data.shape 26 | p2d = _quadruple(padding) #(pad_l,pad_r,pad_t,pad_b) 27 | data_padded = Func.pad(data,p2d,'constant',0) #output variable 28 | assert data_padded.shape==(b,c,(h+2*padding),(w+2*padding)),'Error: data_padded shape{} wrong!'.format(data_padded.shape) 29 | 30 | output = Variable(torch.zeros(b,kernel_size*kernel_size,h,w),requires_grad = data.requires_grad) 31 | if data.is_cuda: 32 | output = output.cuda(data.get_device()) 33 | 34 | for hi in range(h): 35 | for wj in range(w): 36 | q = data[:,:,hi,wj].contiguous() #(b,c) 37 | i = hi+padding #h index in datapadded 38 | j = wj+padding #w index in datapadded 39 | 40 | hs = i-padding 41 | he = i+padding+1 42 | ws = j-padding 43 | we = j + padding + 1 44 | patch = data_padded[:,:,hs:he,ws:we].contiguous() #(b,c,k,k) 45 | assert (patch.shape==(b,c,kernel_size,kernel_size)) 46 | hk,wk = kernel_size,kernel_size 47 | 48 | # reshape features for matrix multiplication 49 | feature_a =q.view(b,c,1*1).transpose(1,2) #(b,1,c) input is not contigous 50 | feature_b = patch.view(b,c,hk*wk) #(b,c,L) 51 | 52 | # perform matrix mult. 53 | feature_mul = torch.bmm(feature_a,feature_b) #(b,1,L) 54 | assert (feature_mul.shape==(b,1,hk*wk)) 55 | # indexed [batch,row_A,col_A,row_B,col_B] 56 | correlation_tensor = feature_mul.unsqueeze(1) #(b,L) 57 | output[:,:,hi,wj] = correlation_tensor 58 | 59 | return output 60 | 61 | 62 | def featureL2Norm(feature): 63 | epsilon = 1e-6 64 | norm = torch.pow(torch.sum(torch.pow(feature,2),1)+epsilon,0.5).unsqueeze(1).expand_as(feature) 65 | return torch.div(feature,norm) 66 | 67 | 68 | class SpatialContextEncoder(torch.nn.Module): 69 | ''' 70 | Spatial Context Encoder. 71 | Author: Shuaiyi Huang 72 | Input: 73 | x: feature of shape (b,c,h,w) 74 | Output: 75 | feature_embd: context-aware semantic feature of shape (b,c+k**2,h,w), where k is the kernel size of spatial descriptor 76 | ''' 77 | def __init__(self, kernel_size=None,input_dim = None,hidden_dim=None): 78 | super(SpatialContextEncoder, self).__init__() 79 | self.embeddingFea = nn.Sequential( 80 | nn.Conv2d(input_dim, hidden_dim, kernel_size=1, padding=0), 81 | nn.ReLU(inplace=True), 82 | ) 83 | self.embeddingFea.cuda() 84 | self.kernel_size = kernel_size 85 | print('SpatialContextEncoder initialization: input_dim {},hidden_dim {}'.format(input_dim,hidden_dim)) 86 | 87 | return 88 | 89 | def forward(self, x): 90 | 91 | kernel_size = self.kernel_size 92 | feature_gs = generate_spatial_descriptor(x, kernel_size=kernel_size) 93 | 94 | #Add L2norm 95 | feature_gs = featureL2Norm(feature_gs) 96 | 97 | #concatenate 98 | feature_cat = torch.cat([x,feature_gs],1) 99 | 100 | #embed 101 | feature_embd = self.embeddingFea(feature_cat) 102 | 103 | return feature_embd 104 | 105 | 106 | 107 | if __name__ == '__main__': 108 | print() -------------------------------------------------------------------------------- /scripts/train_dccnet.sh: -------------------------------------------------------------------------------- 1 | cd ../ 2 | python train_dccnet.py --exp_name iccv2019_dccnet --num_epochs 5 --sce_hidden_dim 1024 --sce_kernel_size 25 --scaleloss_weight 1.0 3 | -------------------------------------------------------------------------------- /train_dccnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | from os.path import exists, join, basename, dirname 4 | from os import makedirs 5 | import numpy as np 6 | import datetime 7 | import time 8 | import argparse 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | 14 | from lib.dataloader import DataLoader 15 | from lib.im_pair_dataset import ImagePairDataset 16 | from lib.normalization import NormalizeImageDict 17 | from lib.torch_util import save_checkpoint 18 | from lib.torch_util import BatchTensorToVars 19 | from lib.eval_util_dynamic import pfdataset_pck, pfpascal_val_dataloader 20 | 21 | # import DCCNet 22 | from models.model_dynamic import DCCNet 23 | from models.loss_dynamic import weak_loss 24 | 25 | 26 | # Seed and CUDA 27 | use_cuda = torch.cuda.is_available() 28 | torch.manual_seed(1) 29 | if use_cuda: 30 | torch.cuda.manual_seed(1) 31 | np.random.seed(1) 32 | 33 | torch.backends.cudnn.deterministic = True 34 | torch.backends.cudnn.benchmark = False 35 | 36 | print('DCCNet training script') 37 | 38 | # Argument parsing 39 | parser = argparse.ArgumentParser(description='Compute PF Pascal matches') 40 | parser.add_argument('--checkpoint', type=str, default='') 41 | parser.add_argument('--image_size', type=int, default=400) 42 | parser.add_argument('--dataset_image_path', type=str, default='datasets/pf-pascal/', help='path to PF Pascal dataset') 43 | parser.add_argument('--dataset_csv_path', type=str, default='datasets/pf-pascal/image_pairs/', help='path to PF Pascal training csv') 44 | parser.add_argument('--num_epochs', type=int, default=5, help='number of training epochs') 45 | parser.add_argument('--batch_size', type=int, default=16, help='training batch size') 46 | parser.add_argument('--lr', type=float, default=0.0005, help='learning rate') 47 | parser.add_argument('--result_model_fn', type=str, default='checkpoint_adam', help='trained model filename') 48 | parser.add_argument('--result-model-dir', type=str, default='../model/checkpoints', help='path to trained models folder') 49 | parser.add_argument('--fe_finetune_params', type=int, default=0, help='number of layers to finetune') 50 | parser.add_argument('--exp_name', type=str, default='exp_delete', help='experiment name') 51 | 52 | # DCCNet args 53 | parser.add_argument('--ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in neigh. cons.') 54 | parser.add_argument('--ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in neigh. cons') 55 | 56 | parser.add_argument('--sce_kernel_size',type=int,default=25,help='kernel size in sce.') 57 | parser.add_argument('--sce_hidden_dim',type=int,default=1024,help='hidden dim in sce') 58 | parser.add_argument('--scaleloss_weight',type=float,default=1.0,help='whether use scale loss, if use the weight for scale loss') 59 | parser.add_argument('--att_scale_ncons_kernel_sizes', nargs='+', type=int, default=[5,5,5], help='kernels sizes in dynamic fusion net.') 60 | parser.add_argument('--att_scale_ncons_channels', nargs='+', type=int, default=[16,16,1], help='channels in dynamic fusion net') 61 | 62 | args = parser.parse_args() 63 | print(args) 64 | 65 | # Create model 66 | print('Creating CNN model...') 67 | model = DCCNet(use_cuda=use_cuda, 68 | checkpoint=args.checkpoint, 69 | ncons_kernel_sizes=args.ncons_kernel_sizes, 70 | ncons_channels=args.ncons_channels, 71 | sce_kernel_size=args.sce_kernel_size, 72 | sce_hidden_dim=args.sce_hidden_dim, 73 | att_scale_ncons_kernel_sizes=args.att_scale_ncons_kernel_sizes, 74 | att_scale_ncons_channels=args.att_scale_ncons_channels, 75 | ) 76 | 77 | #Multi-GPU support 78 | model = nn.DataParallel(model) 79 | 80 | # Set which parts of the model to train 81 | if args.fe_finetune_params>0: 82 | for i in range(args.fe_finetune_params): 83 | for p in model.module.FeatureExtraction.model[-1][-(i+1)].parameters(): 84 | p.requires_grad=True 85 | 86 | print('Trainable parameters:') 87 | count = 0 88 | for i,param in enumerate(model.named_parameters()): 89 | name,p = param 90 | if p.requires_grad: 91 | count+=1 92 | print(str(count)+": "+name+"\t"+str(p.shape)+"\t") 93 | 94 | print(model) 95 | 96 | 97 | # Optimizer 98 | print('using Adam optimizer') 99 | optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) 100 | 101 | cnn_image_size=(args.image_size,args.image_size) 102 | 103 | Dataset = ImagePairDataset 104 | train_csv = 'train_pairs.csv' 105 | #val_pairs_nocoords.csv: for compute loss, with flip column in csv, no coordinates 106 | #val_pairs.csv: for compute pck, with coordinates 107 | val_nocoordinates_csv = 'val_pairs_nocoords.csv' 108 | val_csv = 'image_pairs/val_pairs.csv' 109 | 110 | 111 | normalization_tnf = NormalizeImageDict(['source_image','target_image']) 112 | batch_preprocessing_fn = BatchTensorToVars(use_cuda=use_cuda) 113 | 114 | # Dataset and dataloader 115 | dataset = Dataset(transform=normalization_tnf, 116 | dataset_image_path=args.dataset_image_path, 117 | dataset_csv_path=args.dataset_csv_path, 118 | dataset_csv_file = train_csv, 119 | output_size=cnn_image_size, 120 | ) 121 | 122 | dataloader = DataLoader(dataset, batch_size=args.batch_size, 123 | shuffle=True, 124 | num_workers=0) 125 | 126 | dataset_val = Dataset(transform=normalization_tnf, 127 | dataset_image_path=args.dataset_image_path, 128 | dataset_csv_path=args.dataset_csv_path, 129 | dataset_csv_file=val_nocoordinates_csv, 130 | output_size=cnn_image_size) 131 | 132 | # compute val loss 133 | dataloader_val = DataLoader(dataset_val, batch_size=args.batch_size, 134 | shuffle=True, num_workers=4) 135 | 136 | # compute val pck 137 | dataloader_val_pck = pfpascal_val_dataloader(image_size=args.image_size, eval_dataset_path=args.dataset_image_path, csv_file=val_csv) #load pfpascal val dataset 138 | 139 | # Define checkpoint name 140 | checkpoint_dir = os.path.join(args.result_model_dir,args.exp_name) 141 | checkpoint_name = os.path.join(args.result_model_dir,args.exp_name, 142 | datetime.datetime.now().strftime("%Y-%m-%d_%H:%M")+'_'+args.result_model_fn + '.pth.tar') 143 | log_name = os.path.join(args.result_model_dir,args.exp_name, 'logmain_'+args.exp_name+'.txt') 144 | if not exists(dirname(log_name)): 145 | makedirs(dirname(log_name)) 146 | print('Checkpoint name: '+checkpoint_name) 147 | 148 | # Train 149 | best_val_pck = float("-inf") 150 | 151 | loss_fn = lambda model,batch: weak_loss(model, batch, normalization='softmax', scaleloss_weight=args.scaleloss_weight) 152 | 153 | # define epoch function 154 | def process_epoch(mode,epoch,model,loss_fn,optimizer,dataloader,batch_preprocessing_fn,use_cuda=True,log_interval=50): 155 | epoch_loss = 0 156 | for batch_idx, batch in enumerate(dataloader): 157 | 158 | st = time.time() 159 | 160 | if mode=='train': 161 | optimizer.zero_grad() 162 | tnf_batch = batch_preprocessing_fn(batch) 163 | loss = loss_fn(model,tnf_batch) 164 | loss_np = loss.data.cpu().numpy()[0] 165 | #loss_np = loss.data.cpu().numpy() 166 | epoch_loss += loss_np 167 | if mode=='train': 168 | loss.backward() 169 | optimizer.step() 170 | else: 171 | loss=None 172 | if batch_idx % log_interval == 0: 173 | print(mode.capitalize()+' Epoch: {} [{}/{} ({:.0f}%)]\t\tLoss: {:.12f}\t\tcost time: {:.1f}'.format( 174 | epoch, batch_idx , len(dataloader), 175 | 100. * batch_idx / len(dataloader), loss_np,time.time()-st)) 176 | epoch_loss /= len(dataloader) 177 | print(mode.capitalize()+' set: Average loss: {:.12f}'.format(epoch_loss)) 178 | return epoch_loss 179 | 180 | train_loss = np.zeros(args.num_epochs) 181 | val_loss = np.zeros(args.num_epochs) 182 | val_pcks = np.zeros(args.num_epochs) 183 | 184 | model.module.FeatureExtraction.eval() 185 | 186 | 187 | print('Starting training...') 188 | for epoch in range(1, args.num_epochs+1): 189 | st = time.time() 190 | train_loss_curepoch = process_epoch('train',epoch,model,loss_fn,optimizer,dataloader,batch_preprocessing_fn,log_interval=1) 191 | time_train = time.time()-st 192 | 193 | st = time.time() 194 | 195 | val_loss_curepoch = process_epoch('val', epoch, model, loss_fn, optimizer, dataloader_val, batch_preprocessing_fn, log_interval=1) 196 | 197 | time_valloss = time.time()-st 198 | 199 | st = time.time() 200 | val_pck_curepoch = pfdataset_pck(dataloader=dataloader_val_pck,model=model,verbose=False) 201 | time_valpck = time.time()-st 202 | 203 | train_loss[epoch - 1] = train_loss_curepoch 204 | val_loss[epoch - 1] = val_loss_curepoch 205 | val_pcks[epoch-1] = val_pck_curepoch 206 | 207 | # remember best loss 208 | is_best = val_pcks[epoch - 1] > best_val_pck 209 | best_val_pck = max(val_pcks[epoch - 1], best_val_pck) 210 | save_checkpoint({ 211 | 'epoch': epoch, 212 | 'args': args, 213 | 'state_dict': model.state_dict(), 214 | 'optimizer' : optimizer.state_dict(), 215 | 'train_loss': train_loss, 216 | 'val_loss': val_loss, 217 | 'val_pck': val_pcks, 218 | 'best_val_pck':best_val_pck, 219 | }, is_best,checkpoint_name,save_all_epochs=False) 220 | 221 | message = 'Epoch{}\tTrain_loss{:.6f}\tcost time{:.1f}\tVal_loss{:.6f}\tcost time{:.1f}\tVal_pck{:.6f}\tcost time{:.1f}\n'.format\ 222 | (epoch, train_loss_curepoch, time_train, val_loss_curepoch, time_valloss,val_pck_curepoch,time_valpck,) 223 | print(message) 224 | with open(log_name, "a") as log_file: 225 | log_file.write('%s\n' % message) 226 | 227 | 228 | print('Done!') 229 | -------------------------------------------------------------------------------- /trained_models/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | !download.sh 6 | -------------------------------------------------------------------------------- /trained_models/download.sh: -------------------------------------------------------------------------------- 1 | wget https://xxx/best_dccnet.pth.tar 2 | --------------------------------------------------------------------------------