├── .gitignore ├── pytorch-CycleGAN-and-pix2pix ├── .Rhistory ├── scripts │ ├── install_deps.sh │ ├── test_colorization.sh │ ├── train_colorization.sh │ ├── test_cyclegan.sh │ ├── train_cyclegan.sh │ ├── test_pix2pix.sh │ ├── test_single.sh │ ├── train_pix2pix.sh │ ├── conda_deps.sh │ ├── eval_cityscapes │ │ ├── download_fcn8s.sh │ │ ├── util.py │ │ ├── evaluate.py │ │ └── cityscapes.py │ ├── download_pix2pix_model.sh │ ├── download_cyclegan_model.sh │ ├── edges │ │ ├── PostprocessHED.m │ │ └── batch_hed.py │ └── test_before_push.py ├── requirements.txt ├── util │ ├── __init__.py │ ├── image_pool.py │ ├── html.py │ ├── util.py │ └── get_data.py ├── options │ ├── __init__.py │ ├── test_options.py │ └── train_options.py ├── environment.yml ├── docs │ ├── Dockerfile │ ├── docker.md │ └── datasets.md ├── .gitignore ├── data │ ├── single_dataset.py │ ├── image_folder.py │ ├── aligned_dataset.py │ ├── colorization_dataset.py │ ├── unaligned_dataset.py │ ├── template_dataset.py │ ├── __init__.py │ └── base_dataset.py ├── models │ ├── colorization_model.py │ ├── __init__.py │ ├── test_model.py │ ├── template_model.py │ └── pix2pix_model.py ├── LICENSE ├── test.py ├── readme.txt ├── train.py └── sp ├── recognize_process ├── path_test.py ├── Requirement_Recognize_part.txt ├── test_imgs │ ├── 17_7_6.jpg │ ├── 17_8_5.jpg │ ├── 17_女_3.jpg │ ├── 17_1972_4.jpg │ ├── 17_张先山_1.jpg │ ├── 17_达斡尔_2.jpg │ ├── 17_内江市东兴区公安局_9.jpg │ ├── 17_四川省内江市东兴区柳桥镇_7.jpg │ ├── 17_2017.03.28-长期_10.jpg │ └── 17_51101119720807380_8.jpg ├── Readme_Recognize_part.txt ├── model_save │ ├── recognize_model.meta │ ├── recognize_model.index │ └── recognize_model.data-00000-of-00001 ├── tools │ ├── __pycache__ │ │ ├── test_crnn.cpython-36.pyc │ │ ├── mytest_crnn.cpython-35.pyc │ │ └── mytest_crnn.cpython-36.pyc │ ├── test_crnn_jmz.py │ └── mytest_crnn.py ├── config │ ├── __pycache__ │ │ ├── model_config.cpython-35.pyc │ │ └── model_config.cpython-36.pyc │ └── model_config.py ├── crnn_model │ ├── __pycache__ │ │ ├── __init__.cpython-35.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── crnn_model.cpython-35.pyc │ │ ├── crnn_model.cpython-36.pyc │ │ ├── cnn_basenet.cpython-35.pyc │ │ └── cnn_basenet.cpython-36.pyc │ └── crnn_model.py ├── data_provider │ ├── __pycache__ │ │ └── read_tfrecord.cpython-36.pyc │ └── read_tfrecord.py ├── anno_test │ ├── image_list.txt │ └── image_list.json └── image_list.txt ├── cut_twist_process ├── Requirement_Recognize_part.txt ├── template │ ├── fan1_new.jpg │ ├── fan_new.jpg │ ├── zheng_new.jpg │ ├── fan_blurred_fan.jpg │ ├── fan_blurred_zheng.jpg │ ├── zheng_blurred_fan.jpg │ └── zheng_blurred_zheng.jpg ├── Readme_Recognize_part.txt ├── __pycache__ │ ├── cut_part.cpython-35.pyc │ ├── cut_part.cpython-36.pyc │ ├── twist_part.cpython-35.pyc │ ├── twist_part.cpython-36.pyc │ ├── cut_twist_join.cpython-35.pyc │ ├── cut_twist_join.cpython-36.pyc │ └── cut_twist_join.cpython-37.pyc └── cut_twist_join.py ├── show_imgs ├── 去水印效果.png ├── 系统架构.png └── webwxgetmsgimg.jpg ├── .gitmodules ├── test_data ├── 1ad1773d4ced4c348897826ad7268840.png ├── 1c3a0f9896bf493980d69f8ddeb7a19a.png ├── 1f9784999b6548b291e2a6e2bb99b052.png └── 2a1935934d6c4cdaaf9b24d4cb94d162.png ├── Requirement.txt ├── data_correction_and_generate_csv_file ├── data │ └── repitle_address_extract.json ├── template_imgs │ └── template_img_2.jpg ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── __init__.cpython-36.pyc │ ├── generate_test_csv_file.cpython-35.pyc │ └── generate_test_csv_file.cpython-36.pyc ├── currect_tools │ └── __pycache__ │ │ ├── valid_data.cpython-35.pyc │ │ ├── valid_data.cpython-36.pyc │ │ ├── address_correct.cpython-35.pyc │ │ ├── address_correct.cpython-36.pyc │ │ ├── birthday_id_number.cpython-35.pyc │ │ └── birthday_id_number.cpython-36.pyc └── readme.txt ├── .gitattributes ├── watermask_remover_and_split_data ├── __pycache__ │ ├── __init__.cpython-35.pyc │ ├── generate_train_data.cpython-36.pyc │ ├── watermask_process.cpython-35.pyc │ ├── watermask_process.cpython-36.pyc │ └── split_img_generate_data_temp.cpython-36.pyc ├── template_imgs │ ├── chusai_watermask_template.jpg │ ├── fusai_watermask_template.jpg │ └── origin_img_location_marker_template.jpg ├── tools │ ├── __pycache__ │ │ ├── fix_img_address_unit.cpython-35.pyc │ │ ├── fix_img_address_unit.cpython-36.pyc │ │ ├── preprocess_for_test.cpython-35.pyc │ │ ├── preprocess_for_test.cpython-36.pyc │ │ ├── split_img_generate_data.cpython-35.pyc │ │ ├── split_img_generate_data.cpython-36.pyc │ │ ├── extract_test_img_to_txts.cpython-35.pyc │ │ └── extract_test_img_to_txts.cpython-36.pyc │ ├── extract_test_img_to_txts.py │ ├── preprocess_for_test.py │ ├── fix_img_address_unit.py │ └── split_img_generate_data.py └── readme ├── LICENSE ├── README.md ├── main_process.py └── CCFTestResultFixValidData_release.csv /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | data_temp/* 3 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/.Rhistory: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/install_deps.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | pip install visdom 3 | pip install dominate 4 | -------------------------------------------------------------------------------- /recognize_process/path_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('./') 3 | 4 | 5 | print(sys.path) 6 | 7 | 8 | -------------------------------------------------------------------------------- /cut_twist_process/Requirement_Recognize_part.txt: -------------------------------------------------------------------------------- 1 | python==3.6 2 | tensorflow-gpu==1.12.0 3 | easydict==1.9.0 4 | -------------------------------------------------------------------------------- /recognize_process/Requirement_Recognize_part.txt: -------------------------------------------------------------------------------- 1 | python==3.6 2 | tensorflow-gpu==1.12.0 3 | easydict==1.9.0 4 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.2.0 2 | torchvision==0.2.1 3 | dominate==2.3.1 4 | visdom==0.1.8.3 5 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/util/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes a miscellaneous collection of useful helper functions.""" 2 | -------------------------------------------------------------------------------- /show_imgs/去水印效果.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/show_imgs/去水印效果.png -------------------------------------------------------------------------------- /show_imgs/系统架构.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/show_imgs/系统架构.png -------------------------------------------------------------------------------- /show_imgs/webwxgetmsgimg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/show_imgs/webwxgetmsgimg.jpg -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/test_colorization.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/colorization --name color_pix2pix --model colorization 3 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/train_colorization.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python train.py --dataroot ./datasets/colorization --name color_pix2pix --model colorization 3 | -------------------------------------------------------------------------------- /cut_twist_process/template/fan1_new.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/template/fan1_new.jpg -------------------------------------------------------------------------------- /cut_twist_process/template/fan_new.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/template/fan_new.jpg -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_7_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_7_6.jpg -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_8_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_8_5.jpg -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_女_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_女_3.jpg -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "pytorch-CycleGAN-and-pix2pix/models_data"] 2 | path = pytorch-CycleGAN-and-pix2pix/models_data 3 | url = git@github.com:Mingtzge/models_data.git 4 | -------------------------------------------------------------------------------- /cut_twist_process/template/zheng_new.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/template/zheng_new.jpg -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/test_cyclegan.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan --phase test --no_dropout 3 | -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_1972_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_1972_4.jpg -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_张先山_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_张先山_1.jpg -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_达斡尔_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_达斡尔_2.jpg -------------------------------------------------------------------------------- /cut_twist_process/Readme_Recognize_part.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/Readme_Recognize_part.txt -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/train_cyclegan.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python train.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan --pool_size 50 --no_dropout 3 | -------------------------------------------------------------------------------- /recognize_process/Readme_Recognize_part.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/Readme_Recognize_part.txt -------------------------------------------------------------------------------- /cut_twist_process/template/fan_blurred_fan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/template/fan_blurred_fan.jpg -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/options/__init__.py: -------------------------------------------------------------------------------- 1 | """This package options includes option modules: training options, test options, and basic options (used in both training and test).""" 2 | -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_内江市东兴区公安局_9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_内江市东兴区公安局_9.jpg -------------------------------------------------------------------------------- /test_data/1ad1773d4ced4c348897826ad7268840.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/test_data/1ad1773d4ced4c348897826ad7268840.png -------------------------------------------------------------------------------- /test_data/1c3a0f9896bf493980d69f8ddeb7a19a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/test_data/1c3a0f9896bf493980d69f8ddeb7a19a.png -------------------------------------------------------------------------------- /test_data/1f9784999b6548b291e2a6e2bb99b052.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/test_data/1f9784999b6548b291e2a6e2bb99b052.png -------------------------------------------------------------------------------- /test_data/2a1935934d6c4cdaaf9b24d4cb94d162.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/test_data/2a1935934d6c4cdaaf9b24d4cb94d162.png -------------------------------------------------------------------------------- /cut_twist_process/template/fan_blurred_zheng.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/template/fan_blurred_zheng.jpg -------------------------------------------------------------------------------- /cut_twist_process/template/zheng_blurred_fan.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/template/zheng_blurred_fan.jpg -------------------------------------------------------------------------------- /recognize_process/model_save/recognize_model.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/model_save/recognize_model.meta -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_四川省内江市东兴区柳桥镇_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_四川省内江市东兴区柳桥镇_7.jpg -------------------------------------------------------------------------------- /cut_twist_process/template/zheng_blurred_zheng.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/template/zheng_blurred_zheng.jpg -------------------------------------------------------------------------------- /recognize_process/model_save/recognize_model.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/model_save/recognize_model.index -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_2017.03.28-长期_10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_2017.03.28-长期_10.jpg -------------------------------------------------------------------------------- /Requirement.txt: -------------------------------------------------------------------------------- 1 | python==3.6 2 | tensorflow-gpu==1.12.0 3 | easydict==1.9.0 4 | opencv-python==4.1.0.25 5 | torch==1.2.0 6 | torchvision==0.4.1 7 | dominate==2.3.1 8 | visdom==0.1.8.3 9 | -------------------------------------------------------------------------------- /cut_twist_process/__pycache__/cut_part.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/__pycache__/cut_part.cpython-35.pyc -------------------------------------------------------------------------------- /cut_twist_process/__pycache__/cut_part.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/__pycache__/cut_part.cpython-36.pyc -------------------------------------------------------------------------------- /recognize_process/test_imgs/17_51101119720807380_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/test_imgs/17_51101119720807380_8.jpg -------------------------------------------------------------------------------- /cut_twist_process/__pycache__/twist_part.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/__pycache__/twist_part.cpython-35.pyc -------------------------------------------------------------------------------- /cut_twist_process/__pycache__/twist_part.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/__pycache__/twist_part.cpython-36.pyc -------------------------------------------------------------------------------- /cut_twist_process/__pycache__/cut_twist_join.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/__pycache__/cut_twist_join.cpython-35.pyc -------------------------------------------------------------------------------- /cut_twist_process/__pycache__/cut_twist_join.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/__pycache__/cut_twist_join.cpython-36.pyc -------------------------------------------------------------------------------- /cut_twist_process/__pycache__/cut_twist_join.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/cut_twist_process/__pycache__/cut_twist_join.cpython-37.pyc -------------------------------------------------------------------------------- /recognize_process/model_save/recognize_model.data-00000-of-00001: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:51bf737ff202d79bb53883745be01a7085cbf32d3fa1e5066a4d5fea43f57625 3 | size 141498884 4 | -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/data/repitle_address_extract.json: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:f6021cc1b6e451a4572698eedf9a48cfad2c14fdd392e6d9fcc8644541816f00 3 | size 26857155 4 | -------------------------------------------------------------------------------- /recognize_process/tools/__pycache__/test_crnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/tools/__pycache__/test_crnn.cpython-36.pyc -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/test_pix2pix.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --netG unet_256 --direction BtoA --dataset_mode aligned --norm batch 3 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/test_single.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python test.py --dataroot ./datasets/facades/testB/ --name facades_pix2pix --model test --netG unet_256 --direction BtoA --dataset_mode single --norm batch 3 | -------------------------------------------------------------------------------- /recognize_process/config/__pycache__/model_config.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/config/__pycache__/model_config.cpython-35.pyc -------------------------------------------------------------------------------- /recognize_process/config/__pycache__/model_config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/config/__pycache__/model_config.cpython-36.pyc -------------------------------------------------------------------------------- /recognize_process/crnn_model/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/crnn_model/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /recognize_process/crnn_model/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/crnn_model/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /recognize_process/tools/__pycache__/mytest_crnn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/tools/__pycache__/mytest_crnn.cpython-35.pyc -------------------------------------------------------------------------------- /recognize_process/tools/__pycache__/mytest_crnn.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/tools/__pycache__/mytest_crnn.cpython-36.pyc -------------------------------------------------------------------------------- /recognize_process/crnn_model/__pycache__/crnn_model.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/crnn_model/__pycache__/crnn_model.cpython-35.pyc -------------------------------------------------------------------------------- /recognize_process/crnn_model/__pycache__/crnn_model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/crnn_model/__pycache__/crnn_model.cpython-36.pyc -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | recognize_process/model_save/recognize_model.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text 2 | data_correction_and_generate_csv_file/data/repitle_address_extract.json filter=lfs diff=lfs merge=lfs -text 3 | -------------------------------------------------------------------------------- /recognize_process/crnn_model/__pycache__/cnn_basenet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/crnn_model/__pycache__/cnn_basenet.cpython-35.pyc -------------------------------------------------------------------------------- /recognize_process/crnn_model/__pycache__/cnn_basenet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/crnn_model/__pycache__/cnn_basenet.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/template_imgs/template_img_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/template_imgs/template_img_2.jpg -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /recognize_process/data_provider/__pycache__/read_tfrecord.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/recognize_process/data_provider/__pycache__/read_tfrecord.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/template_imgs/chusai_watermask_template.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/template_imgs/chusai_watermask_template.jpg -------------------------------------------------------------------------------- /watermask_remover_and_split_data/template_imgs/fusai_watermask_template.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/template_imgs/fusai_watermask_template.jpg -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/train_pix2pix.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | python train.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --netG unet_256 --direction BtoA --lambda_L1 100 --dataset_mode aligned --norm batch --pool_size 0 3 | -------------------------------------------------------------------------------- /recognize_process/anno_test/image_list.txt: -------------------------------------------------------------------------------- 1 | 17_张先山_1.jpg 2 | 17_7_6.jpg 3 | 17_8_5.jpg 4 | 17_1972_4.jpg 5 | 17_2017.03.28-长期_10.jpg 6 | 17_51101119720807380_8.jpg 7 | 17_达斡尔_2.jpg 8 | 17_内江市东兴区公安局_9.jpg 9 | 17_女_3.jpg 10 | 17_四川省内江市东兴区柳桥镇_7.jpg -------------------------------------------------------------------------------- /watermask_remover_and_split_data/__pycache__/generate_train_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/__pycache__/generate_train_data.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/__pycache__/watermask_process.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/__pycache__/watermask_process.cpython-35.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/__pycache__/watermask_process.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/__pycache__/watermask_process.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/template_imgs/origin_img_location_marker_template.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/template_imgs/origin_img_location_marker_template.jpg -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/fix_img_address_unit.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/fix_img_address_unit.cpython-35.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/fix_img_address_unit.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/fix_img_address_unit.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/preprocess_for_test.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/preprocess_for_test.cpython-35.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/preprocess_for_test.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/preprocess_for_test.cpython-36.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/__pycache__/generate_test_csv_file.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/__pycache__/generate_test_csv_file.cpython-35.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/__pycache__/generate_test_csv_file.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/__pycache__/generate_test_csv_file.cpython-36.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/currect_tools/__pycache__/valid_data.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/currect_tools/__pycache__/valid_data.cpython-35.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/currect_tools/__pycache__/valid_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/currect_tools/__pycache__/valid_data.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/__pycache__/split_img_generate_data_temp.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/__pycache__/split_img_generate_data_temp.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/split_img_generate_data.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/split_img_generate_data.cpython-35.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/split_img_generate_data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/split_img_generate_data.cpython-36.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/extract_test_img_to_txts.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/extract_test_img_to_txts.cpython-35.pyc -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/__pycache__/extract_test_img_to_txts.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/watermask_remover_and_split_data/tools/__pycache__/extract_test_img_to_txts.cpython-36.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/currect_tools/__pycache__/address_correct.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/currect_tools/__pycache__/address_correct.cpython-35.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/currect_tools/__pycache__/address_correct.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/currect_tools/__pycache__/address_correct.cpython-36.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/currect_tools/__pycache__/birthday_id_number.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/currect_tools/__pycache__/birthday_id_number.cpython-35.pyc -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/currect_tools/__pycache__/birthday_id_number.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/HEAD/data_correction_and_generate_csv_file/currect_tools/__pycache__/birthday_id_number.cpython-36.pyc -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/conda_deps.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | conda install numpy pyyaml mkl mkl-include setuptools cmake cffi typing 3 | conda install pytorch torchvision -c pytorch # add cuda90 if CUDA 9 4 | conda install visdom dominate -c conda-forge # install visdom and dominate 5 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/eval_cityscapes/download_fcn8s.sh: -------------------------------------------------------------------------------- 1 | URL=http://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/fcn-8s-cityscapes/fcn-8s-cityscapes.caffemodel 2 | OUTPUT_FILE=./scripts/eval_cityscapes/caffemodel/fcn-8s-cityscapes.caffemodel 3 | wget -N $URL -O $OUTPUT_FILE 4 | -------------------------------------------------------------------------------- /recognize_process/image_list.txt: -------------------------------------------------------------------------------- 1 | 17_张先山_1.jpg 张先山 2 | 17_7_6.jpg 7 3 | 17_8_5.jpg 8 4 | 17_1972_4.jpg 1972 5 | 17_2017.03.28-长期_10.jpg 20170328-长期 6 | 17_51101119720807380_8.jpg 51101119720807380 7 | 17_达斡尔_2.jpg 达斡尔 8 | 17_内江市东兴区公安局_9.jpg 内江市东兴区公安局 9 | 17_女_3.jpg 女 10 | 17_四川省内江市东兴区柳桥镇_7.jpg 四川省内江市东兴区柳桥镇 -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/environment.yml: -------------------------------------------------------------------------------- 1 | name: pytorch-CycleGAN-and-pix2pix 2 | channels: 3 | - peterjc123 4 | - defaults 5 | dependencies: 6 | - python=3.5.5 7 | - pytorch=0.4.1 8 | - scipy 9 | - pip: 10 | - dominate==2.3.1 11 | - git+https://github.com/pytorch/vision.git 12 | - Pillow==5.0.0 13 | - numpy==1.14.1 14 | - visdom==0.1.7 15 | -------------------------------------------------------------------------------- /data_correction_and_generate_csv_file/readme.txt: -------------------------------------------------------------------------------- 1 | 功能:纠正识别数据和生成CSV文件 2 | 文件介绍: 3 | currect_tools:纠正工具包,里面包含有效日期,出生日期,签发机关,地址,身份证号等纠正工具 4 | template_imgs:模板文件,里面包含用于标定图片坐标的图片 5 | data:纠正参考数据库,包含全国地址数据,签发机关数据,行政区号 6 | generate_test_csv_file.py:包含对外调用接口 7 | 8 | !!!注:复现的测试数据跟初赛和复赛的数据格式需要保持一致,每面身份证左上角需要有:"仅限DBCI比赛(复赛)使用"字样, 9 | 且字体大小格式位置应该跟初赛和复赛的保持一致,否则将严重影响识别的准确性甚至代码运行出错 -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/download_pix2pix_model.sh: -------------------------------------------------------------------------------- 1 | FILE=$1 2 | 3 | echo "Note: available models are edges2shoes, sat2map, map2sat, facades_label2photo, and day2night" 4 | echo "Specified [$FILE]" 5 | 6 | mkdir -p ./checkpoints/${FILE}_pretrained 7 | MODEL_FILE=./checkpoints/${FILE}_pretrained/latest_net_G.pth 8 | URL=http://efrosgans.eecs.berkeley.edu/pix2pix/models-pytorch/$FILE.pth 9 | 10 | wget -N $URL -O $MODEL_FILE 11 | -------------------------------------------------------------------------------- /recognize_process/anno_test/image_list.json: -------------------------------------------------------------------------------- 1 | {"17_7_6.jpg": "7", "17_8_5.jpg": "8", "17_1972_4.jpg": "1972", "17_2017.03.28-\u957f\u671f_10.jpg": "20170328-\u957f\u671f", "17_51101119720807380_8.jpg": "51101119720807380", "17_\u8fbe\u65a1\u5c14_2.jpg": "\u8fbe\u65a1\u5c14", "17_\u5185\u6c5f\u5e02\u4e1c\u5174\u533a\u516c\u5b89\u5c40_9.jpg": "\u5185\u6c5f\u5e02\u4e1c\u5174\u533a\u516c\u5b89\u5c40", "17_\u5973_3.jpg": "\u5973", "17_\u56db\u5ddd\u7701\u5185\u6c5f\u5e02\u4e1c\u5174\u533a\u67f3\u6865\u9547_7.jpg": "\u56db\u5ddd\u7701\u5185\u6c5f\u5e02\u4e1c\u5174\u533a\u67f3\u6865\u9547"} -------------------------------------------------------------------------------- /watermask_remover_and_split_data/readme: -------------------------------------------------------------------------------- 1 | 功能: 2 | 1,从图片中提取水印部分,去除水印并复原 3 | 2,对图片进行切割,提取身份证中各个元素部分 4 | 3,将签发机关和地址转化成一行,并截取文字部分,过滤空白部分 5 | 4,对图像进行识别前预处理,滤波,改变尺寸 6 | 5,将每套图片的各个元素的图片名写入一个txt文件,用于识别 7 | 文件介绍: 8 | tools: 9 | extract_test_img_to_txts.py: 功能5的实现 10 | fix_img_address_unit.py: 功能3的实现 11 | preprocess_for_test.py:功能4的实现 12 | split_img_generate_data.py:功能2的实现 13 | watermask_process.py:对外接口包,里面调用了各个功能的接口和调用了去水印模块去除数据集上的水印 14 | template_imgs:模板文件,里面包含用于标定图片坐标的图片 15 | 16 | !!!注:复现的测试数据跟初赛和复赛的数据格式需要保持一致,每面身份证左上角需要有:"仅限DBCI比赛(复赛)使用"字样, 17 | 且字体大小格式位置应该跟初赛和复赛的保持一致,否则将严重影响识别的准确性甚至代码运行出错 -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/docs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:9.0-base 2 | 3 | RUN apt update && apt install -y wget unzip curl bzip2 git 4 | RUN curl -LO http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh 5 | RUN bash Miniconda-latest-Linux-x86_64.sh -p /miniconda -b 6 | RUN rm Miniconda-latest-Linux-x86_64.sh 7 | ENV PATH=/miniconda/bin:${PATH} 8 | RUN conda update -y conda 9 | 10 | RUN conda install -y pytorch torchvision -c pytorch 11 | RUN mkdir /workspace/ && cd /workspace/ && git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix.git && cd pytorch-CycleGAN-and-pix2pix && pip install -r requirements.txt 12 | 13 | WORKDIR /workspace -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/download_cyclegan_model.sh: -------------------------------------------------------------------------------- 1 | FILE=$1 2 | 3 | echo "Note: available models are apple2orange, orange2apple, summer2winter_yosemite, winter2summer_yosemite, horse2zebra, zebra2horse, monet2photo, style_monet, style_cezanne, style_ukiyoe, style_vangogh, sat2map, map2sat, cityscapes_photo2label, cityscapes_label2photo, facades_photo2label, facades_label2photo, iphone2dslr_flower" 4 | 5 | echo "Specified [$FILE]" 6 | 7 | mkdir -p ./checkpoints/${FILE}_pretrained 8 | MODEL_FILE=./checkpoints/${FILE}_pretrained/latest_net_G.pth 9 | URL=http://efrosgans.eecs.berkeley.edu/cyclegan/pretrained_models/$FILE.pth 10 | 11 | wget -N $URL -O $MODEL_FILE 12 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | debug* 3 | datasets/ 4 | checkpoints/ 5 | results/ 6 | build/ 7 | dist/ 8 | *.png 9 | torch.egg-info/ 10 | */**/__pycache__ 11 | torch/version.py 12 | torch/csrc/generic/TensorMethods.cpp 13 | torch/lib/*.so* 14 | torch/lib/*.dylib* 15 | torch/lib/*.h 16 | torch/lib/build 17 | torch/lib/tmp_install 18 | torch/lib/include 19 | torch/lib/torch_shm_manager 20 | torch/csrc/cudnn/cuDNN.cpp 21 | torch/csrc/nn/THNN.cwrap 22 | torch/csrc/nn/THNN.cpp 23 | torch/csrc/nn/THCUNN.cwrap 24 | torch/csrc/nn/THCUNN.cpp 25 | torch/csrc/nn/THNN_generic.cwrap 26 | torch/csrc/nn/THNN_generic.cpp 27 | torch/csrc/nn/THNN_generic.h 28 | docs/src/**/* 29 | test/data/legacy_modules.t7 30 | test/data/gpu_tensors.pt 31 | test/htmlcov 32 | test/.coverage 33 | */*.pyc 34 | */**/*.pyc 35 | */**/**/*.pyc 36 | */**/**/**/*.pyc 37 | */**/**/**/**/*.pyc 38 | */*.so* 39 | */**/*.so* 40 | */**/*.dylib* 41 | test/data/legacy_serialized.pt 42 | *~ 43 | .idea 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 MingzhiJiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/eval_cityscapes/util.py: -------------------------------------------------------------------------------- 1 | # The following code is modified from https://github.com/shelhamer/clockwork-fcn 2 | import numpy as np 3 | 4 | 5 | def get_out_scoremap(net): 6 | return net.blobs['score'].data[0].argmax(axis=0).astype(np.uint8) 7 | 8 | 9 | def feed_net(net, in_): 10 | """ 11 | Load prepared input into net. 12 | """ 13 | net.blobs['data'].reshape(1, *in_.shape) 14 | net.blobs['data'].data[...] = in_ 15 | 16 | 17 | def segrun(net, in_): 18 | feed_net(net, in_) 19 | net.forward() 20 | return get_out_scoremap(net) 21 | 22 | 23 | def fast_hist(a, b, n): 24 | k = np.where((a >= 0) & (a < n))[0] 25 | bc = np.bincount(n * a[k].astype(int) + b[k], minlength=n**2) 26 | if len(bc) != n**2: 27 | # ignore this example if dimension mismatch 28 | return 0 29 | return bc.reshape(n, n) 30 | 31 | 32 | def get_scores(hist): 33 | # Mean pixel accuracy 34 | acc = np.diag(hist).sum() / (hist.sum() + 1e-12) 35 | 36 | # Per class accuracy 37 | cl_acc = np.diag(hist) / (hist.sum(1) + 1e-12) 38 | 39 | # Per class IoU 40 | iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist) + 1e-12) 41 | 42 | return acc, np.nanmean(cl_acc), np.nanmean(iu), cl_acc, iu 43 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/docs/docker.md: -------------------------------------------------------------------------------- 1 | # Docker image with pytorch-CycleGAN-and-pix2pix 2 | 3 | We provide both Dockerfile and pre-built Docker container that can run this code repo. 4 | 5 | ## Prerequisite 6 | 7 | - Install [docker-ce](https://docs.docker.com/install/linux/docker-ce/ubuntu/) 8 | - Install [nvidia-docker](https://github.com/NVIDIA/nvidia-docker#quickstart) 9 | 10 | ## Running pre-built Dockerfile 11 | 12 | - Pull the pre-built docker file 13 | 14 | ```bash 15 | docker pull taesungp/pytorch-cyclegan-and-pix2pix 16 | ``` 17 | 18 | - Start an interactive docker session. `-p 8097:8097` option is needed if you want to run `visdom` server on the Docker container. 19 | 20 | ```bash 21 | nvidia-docker run -it -p 8097:8097 taesungp/pytorch-cyclegan-and-pix2pix 22 | ``` 23 | 24 | - Now you are in the Docker environment. Go to our code repo and start running things. 25 | ```bash 26 | cd /workspace/pytorch-CycleGAN-and-pix2pix 27 | bash datasets/download_pix2pix_dataset.sh facades 28 | python -m visdom.server & 29 | bash scripts/train_pix2pix.sh 30 | ``` 31 | 32 | ## Running with Dockerfile 33 | 34 | We also posted the [Dockerfile](Dockerfile). To generate the pre-built file, download the Dockerfile in this directory and run 35 | ```bash 36 | docker build -t [target_tag] . 37 | ``` 38 | in the directory that contains the Dockerfile. 39 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/options/test_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TestOptions(BaseOptions): 5 | """This class includes test options. 6 | 7 | It also includes shared options defined in BaseOptions. 8 | """ 9 | 10 | def initialize(self, parser): 11 | parser = BaseOptions.initialize(self, parser) # define shared options 12 | parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.') 13 | parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.') 14 | parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images') 15 | parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 16 | # Dropout and Batchnorm has different behavioir during training and test. 17 | parser.add_argument('--eval', action='store_true', help='use eval mode during test time.') 18 | parser.add_argument('--num_test', type=int, default=50, help='how many test images to run') 19 | # rewrite devalue values 20 | parser.set_defaults(model='test') 21 | # To avoid cropping, the load_size should be the same as crop_size 22 | parser.set_defaults(load_size=parser.get_default('crop_size')) 23 | self.isTrain = False 24 | return parser 25 | -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/extract_test_img_to_txts.py: -------------------------------------------------------------------------------- 1 | import os 2 | from multiprocessing import Pool 3 | 4 | 5 | def run(idx, test_img_dst_path, test_img_names, img_names): 6 | fw = open(os.path.join(test_img_dst_path, "images_list" + "_" + str(idx) + ".txt"), "w") 7 | img_name = img_names[idx][:-5] 8 | for te_img in test_img_names: 9 | if img_name in te_img: 10 | fw.write(te_img + "\n") 11 | fw.close() 12 | 13 | 14 | def generate_txts(origin_img_path, test_img_path, test_img_dst_path, pool_num): 15 | """ 16 | :param origin_img_path: 原始的图片的路径 17 | :param test_img_path: 用于识别的图片的路径 18 | :param test_img_dst_path: 生成的txts图片列表存放路径 19 | :param pool_num: 进程数 20 | 描述:将每套身份证的10个元素图片名,保存在同一个txt文件中,用于文字识别 21 | """ 22 | if not os.path.exists(test_img_dst_path): 23 | os.makedirs(test_img_dst_path) 24 | imgs = os.listdir(origin_img_path) 25 | test_img_names = os.listdir(test_img_path) 26 | img_names = [im.split("_")[0] for im in imgs if im.split("_")[1][0] == "0"] # 提取图片名 27 | epoch_count = len(img_names) 28 | if pool_num > 0: 29 | pool = Pool(pool_num) 30 | for idx in range(epoch_count): 31 | pool.apply_async(run, (idx, test_img_dst_path, test_img_names, img_names,)) 32 | pool.close() 33 | pool.join() 34 | else: 35 | for idx in range(epoch_count): 36 | run(idx, test_img_dst_path, test_img_names, img_names) 37 | print("txts generation finished") 38 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/single_dataset.py: -------------------------------------------------------------------------------- 1 | from data.base_dataset import BaseDataset, get_transform 2 | from data.image_folder import make_dataset 3 | from PIL import Image 4 | 5 | 6 | class SingleDataset(BaseDataset): 7 | """This dataset class can load a set of images specified by the path --dataroot /path/to/data. 8 | 9 | It can be used for generating CycleGAN results only for one side with the model option '-model test'. 10 | """ 11 | 12 | def __init__(self, opt): 13 | """Initialize this dataset class. 14 | 15 | Parameters: 16 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 17 | """ 18 | BaseDataset.__init__(self, opt) 19 | self.A_paths = sorted(make_dataset(opt.dataroot, opt.max_dataset_size)) 20 | input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc 21 | self.transform = get_transform(opt, grayscale=(input_nc == 1)) 22 | 23 | def __getitem__(self, index): 24 | """Return a data point and its metadata information. 25 | 26 | Parameters: 27 | index - - a random integer for data indexing 28 | 29 | Returns a dictionary that contains A and A_paths 30 | A(tensor) - - an image in one domain 31 | A_paths(str) - - the path of the image 32 | """ 33 | A_path = self.A_paths[index] 34 | A_img = Image.open(A_path).convert('RGB') 35 | A = self.transform(A_img) 36 | return {'A': A, 'A_paths': A_path} 37 | 38 | def __len__(self): 39 | """Return the total number of images in the dataset.""" 40 | return len(self.A_paths) 41 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/image_folder.py: -------------------------------------------------------------------------------- 1 | """A modified image folder class 2 | 3 | We modify the official PyTorch image folder (https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py) 4 | so that this class can load images from both current directory and its subdirectories. 5 | """ 6 | 7 | import torch.utils.data as data 8 | 9 | from PIL import Image 10 | import os 11 | import os.path 12 | 13 | IMG_EXTENSIONS = [ 14 | '.jpg', '.JPG', '.jpeg', '.JPEG', 15 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 16 | ] 17 | 18 | 19 | def is_image_file(filename): 20 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 21 | 22 | 23 | def make_dataset(dir, max_dataset_size=float("inf")): 24 | images = [] 25 | assert os.path.isdir(dir), '%s is not a valid directory' % dir 26 | 27 | for root, _, fnames in sorted(os.walk(dir)): 28 | for fname in fnames: 29 | if is_image_file(fname): 30 | path = os.path.join(root, fname) 31 | images.append(path) 32 | return images[:min(max_dataset_size, len(images))] 33 | 34 | 35 | def default_loader(path): 36 | return Image.open(path).convert('RGB') 37 | 38 | 39 | class ImageFolder(data.Dataset): 40 | 41 | def __init__(self, root, transform=None, return_paths=False, 42 | loader=default_loader): 43 | imgs = make_dataset(root) 44 | if len(imgs) == 0: 45 | raise(RuntimeError("Found 0 images in: " + root + "\n" 46 | "Supported image extensions are: " + 47 | ",".join(IMG_EXTENSIONS))) 48 | 49 | self.root = root 50 | self.imgs = imgs 51 | self.transform = transform 52 | self.return_paths = return_paths 53 | self.loader = loader 54 | 55 | def __getitem__(self, index): 56 | path = self.imgs[index] 57 | img = self.loader(path) 58 | if self.transform is not None: 59 | img = self.transform(img) 60 | if self.return_paths: 61 | return img, path 62 | else: 63 | return img 64 | 65 | def __len__(self): 66 | return len(self.imgs) 67 | -------------------------------------------------------------------------------- /recognize_process/config/model_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-11-19 23:45 4 | # @Author : Miao Wenqiang 5 | # @Reference : https://github.com/MaybeShewill-CV/CRNN_Tensorflow 6 | # @File : global_config.py 7 | # @IDE: PyCharm Community Edition 8 | """ 9 | Set some global configuration 10 | """ 11 | from easydict import EasyDict as edict 12 | 13 | __C = edict() 14 | # Consumers can get config by: from config import cfg 15 | 16 | cfg = __C 17 | 18 | __C.ARCH = edict() 19 | 20 | # Number of units in each LSTM cell 21 | __C.ARCH.HIDDEN_UNITS = 256 22 | # Number of stacked LSTM cells 23 | __C.ARCH.HIDDEN_LAYERS = 2 24 | # Sequence length. This has to be the width of the final feature map of the CNN, which is input size width / 4 25 | __C.ARCH.SEQ_LENGTH = 100 # cn dataset 26 | # Width x height into which training / testing images are resized before feeding into the network 27 | __C.ARCH.INPUT_SIZE = (400, 32) # 输入图片宽高 28 | # Number of channels in images 29 | __C.ARCH.INPUT_CHANNELS = 3 # 输入channel 30 | # Number character classes 31 | __C.ARCH.NUM_CLASSES = 6031 # 识别的字符类别数+1(含空格) 32 | 33 | # Train options 34 | __C.TRAIN = edict() 35 | # Set the shadownet training epochs 36 | __C.TRAIN.EPOCHS = 580000 # 训练终止步数 37 | # Set the display step 38 | __C.TRAIN.DISPLAY_STEP = 200 # 训练过程中可视化步数 39 | # Set the initial learning rate 40 | __C.TRAIN.LEARNING_RATE = 30000.0 # 初始学习率 41 | # Set the shadownet training batch size 42 | __C.TRAIN.BATCH_SIZE = 64 # batch_size 43 | # Set the learning rate decay steps 44 | __C.TRAIN.LR_DECAY_STEPS = 2000 # 使用学习率指数衰减,衰减步幅 45 | # Set the learning rate decay rate 46 | __C.TRAIN.LR_DECAY_RATE = 0.94 # 衰减值 47 | # Set multi process nums 48 | __C.TRAIN.CPU_MULTI_PROCESS_NUMS = 20 # 多线程 49 | # Set moving average decay 50 | __C.TRAIN.SAVE_STEPS = 10000 # 每隔多少步保存一次模型 51 | # Set the GPU resource used during training process 52 | __C.TRAIN.GPU_MEMORY_FRACTION = 0.9 # 允许占用GPU运存的最大比例 53 | # Set the GPU allow growth parameter during tensorflow training process 54 | __C.TRAIN.TF_ALLOW_GROWTH = True 55 | -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/preprocess_for_test.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os 4 | from multiprocessing import Pool 5 | 6 | 7 | def _resize_image(img, dst_height): 8 | h_old = img.shape[0] 9 | w_old = img.shape[1] 10 | height = dst_height 11 | width = int(w_old * height / h_old) 12 | resized_img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC) 13 | 14 | return resized_img 15 | 16 | 17 | def preprocess_one_img(img): 18 | resize_img = _resize_image(img, 32) # 修改图片的高度 19 | # 对图片进行滤波处理 20 | resize_img = cv2.normalize(resize_img, dst=None, alpha=230, beta=20, norm_type=cv2.NORM_MINMAX) 21 | resize_img = cv2.bilateralFilter(src=resize_img, d=3, sigmaColor=200, sigmaSpace=10) 22 | resize_img = cv2.filter2D(resize_img, -1, kernel=np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], np.float32)) 23 | return resize_img 24 | 25 | 26 | def cv_imread(image_path): 27 | cv_img = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), -1) 28 | return cv_img 29 | 30 | 31 | def cv_imwrite(write_path, img): 32 | cv2.imencode('.jpg', img, )[1].tofile(write_path) 33 | return 34 | 35 | 36 | def preprocess_imgs(img_path, save_path, pool_num): 37 | """ 38 | :param img_path: 处理的图片路径 39 | :param save_path: 保存路径 40 | :param pool_num: 处理进程数 41 | 描述:主要是对图片进行滤波处理和尺寸变换(模型对输入图片的尺寸有求),提高识别的准确率 42 | """ 43 | img_names = os.listdir(img_path) 44 | if not os.path.exists(img_path): 45 | print("not exists ", img_path, " exit...") 46 | if not os.path.exists(save_path): 47 | os.makedirs(save_path) 48 | params = [] 49 | for img_name in img_names: 50 | params.append((img_path, save_path, img_name)) 51 | if pool_num > 0: 52 | pool = Pool(pool_num) 53 | pool.map(pre_run, params) 54 | pool.close() 55 | pool.join() 56 | else: 57 | for param in params: 58 | pre_run(param) 59 | 60 | 61 | def pre_run(params): 62 | run(params[0], params[1], params[2]) 63 | 64 | 65 | def run(img_path, save_path, img_name): 66 | img = cv_imread(os.path.join(img_path, img_name)) 67 | img_blurred = preprocess_one_img(img) 68 | cv_imwrite(os.path.join(save_path, img_name), img_blurred) 69 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/edges/PostprocessHED.m: -------------------------------------------------------------------------------- 1 | %%% Prerequisites 2 | % You need to get the cpp file edgesNmsMex.cpp from https://raw.githubusercontent.com/pdollar/edges/master/private/edgesNmsMex.cpp 3 | % and compile it in Matlab: mex edgesNmsMex.cpp 4 | % You also need to download and install Piotr's Computer Vision Matlab Toolbox: https://pdollar.github.io/toolbox/ 5 | 6 | %%% parameters 7 | % hed_mat_dir: the hed mat file directory (the output of 'batch_hed.py') 8 | % edge_dir: the output HED edges directory 9 | % image_width: resize the edge map to [image_width, image_width] 10 | % threshold: threshold for image binarization (default 25.0/255.0) 11 | % small_edge: remove small edges (default 5) 12 | 13 | function [] = PostprocessHED(hed_mat_dir, edge_dir, image_width, threshold, small_edge) 14 | 15 | if ~exist(edge_dir, 'dir') 16 | mkdir(edge_dir); 17 | end 18 | fileList = dir(fullfile(hed_mat_dir, '*.mat')); 19 | nFiles = numel(fileList); 20 | fprintf('find %d mat files\n', nFiles); 21 | 22 | for n = 1 : nFiles 23 | if mod(n, 1000) == 0 24 | fprintf('process %d/%d images\n', n, nFiles); 25 | end 26 | fileName = fileList(n).name; 27 | filePath = fullfile(hed_mat_dir, fileName); 28 | jpgName = strrep(fileName, '.mat', '.jpg'); 29 | edge_path = fullfile(edge_dir, jpgName); 30 | 31 | if ~exist(edge_path, 'file') 32 | E = GetEdge(filePath); 33 | E = imresize(E,[image_width,image_width]); 34 | E_simple = SimpleEdge(E, threshold, small_edge); 35 | E_simple = uint8(E_simple*255); 36 | imwrite(E_simple, edge_path, 'Quality',100); 37 | end 38 | end 39 | end 40 | 41 | 42 | 43 | 44 | function [E] = GetEdge(filePath) 45 | load(filePath); 46 | E = 1-predict; 47 | end 48 | 49 | function [E4] = SimpleEdge(E, threshold, small_edge) 50 | if nargin <= 1 51 | threshold = 25.0/255.0; 52 | end 53 | 54 | if nargin <= 2 55 | small_edge = 5; 56 | end 57 | 58 | if ndims(E) == 3 59 | E = E(:,:,1); 60 | end 61 | 62 | E1 = 1 - E; 63 | E2 = EdgeNMS(E1); 64 | E3 = double(E2>=max(eps,threshold)); 65 | E3 = bwmorph(E3,'thin',inf); 66 | E4 = bwareaopen(E3, small_edge); 67 | E4=1-E4; 68 | end 69 | 70 | function [E_nms] = EdgeNMS( E ) 71 | E=single(E); 72 | [Ox,Oy] = gradient2(convTri(E,4)); 73 | [Oxx,~] = gradient2(Ox); 74 | [Oxy,Oyy] = gradient2(Oy); 75 | O = mod(atan(Oyy.*sign(-Oxy)./(Oxx+1e-5)),pi); 76 | E_nms = edgesNmsMex(E,O,1,5,1.01,1); 77 | end 78 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/util/image_pool.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | 4 | 5 | class ImagePool(): 6 | """This class implements an image buffer that stores previously generated images. 7 | 8 | This buffer enables us to update discriminators using a history of generated images 9 | rather than the ones produced by the latest generators. 10 | """ 11 | 12 | def __init__(self, pool_size): 13 | """Initialize the ImagePool class 14 | 15 | Parameters: 16 | pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created 17 | """ 18 | self.pool_size = pool_size 19 | if self.pool_size > 0: # create an empty pool 20 | self.num_imgs = 0 21 | self.images = [] 22 | 23 | def query(self, images): 24 | """Return an image from the pool. 25 | 26 | Parameters: 27 | images: the latest generated images from the generator 28 | 29 | Returns images from the buffer. 30 | 31 | By 50/100, the buffer will return input images. 32 | By 50/100, the buffer will return images previously stored in the buffer, 33 | and insert the current images to the buffer. 34 | """ 35 | if self.pool_size == 0: # if the buffer size is 0, do nothing 36 | return images 37 | return_images = [] 38 | for image in images: 39 | image = torch.unsqueeze(image.data, 0) 40 | if self.num_imgs < self.pool_size: # if the buffer is not full; keep inserting current images to the buffer 41 | self.num_imgs = self.num_imgs + 1 42 | self.images.append(image) 43 | return_images.append(image) 44 | else: 45 | p = random.uniform(0, 1) 46 | if p > 0.5: # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer 47 | random_id = random.randint(0, self.pool_size - 1) # randint is inclusive 48 | tmp = self.images[random_id].clone() 49 | self.images[random_id] = image 50 | return_images.append(tmp) 51 | else: # by another 50% chance, the buffer will return the current image 52 | return_images.append(image) 53 | return_images = torch.cat(return_images, 0) # collect all the images and return 54 | return return_images 55 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/test_before_push.py: -------------------------------------------------------------------------------- 1 | # Simple script to make sure basic usage 2 | # such as training, testing, saving and loading 3 | # runs without errors. 4 | import os 5 | 6 | 7 | def run(command): 8 | print(command) 9 | exit_status = os.system(command) 10 | if exit_status > 0: 11 | exit(1) 12 | 13 | 14 | if __name__ == '__main__': 15 | # download mini datasets 16 | if not os.path.exists('./datasets/mini'): 17 | run('bash ./datasets/download_cyclegan_dataset.sh mini') 18 | 19 | if not os.path.exists('./datasets/mini_pix2pix'): 20 | run('bash ./datasets/download_cyclegan_dataset.sh mini_pix2pix') 21 | 22 | # pretrained cyclegan model 23 | if not os.path.exists('./checkpoints/horse2zebra_pretrained/latest_net_G.pth'): 24 | run('bash ./scripts/download_cyclegan_model.sh horse2zebra') 25 | run('python test.py --model test --dataroot ./datasets/mini --name horse2zebra_pretrained --no_dropout --num_test 1 --no_dropout') 26 | 27 | # pretrained pix2pix model 28 | if not os.path.exists('./checkpoints/facades_label2photo_pretrained/latest_net_G.pth'): 29 | run('bash ./scripts/download_pix2pix_model.sh facades_label2photo') 30 | if not os.path.exists('./datasets/facades'): 31 | run('bash ./datasets/download_pix2pix_dataset.sh facades') 32 | run('python test.py --dataroot ./datasets/facades/ --direction BtoA --model pix2pix --name facades_label2photo_pretrained --num_test 1') 33 | 34 | # cyclegan train/test 35 | run('python train.py --model cycle_gan --name temp_cyclegan --dataroot ./datasets/mini --niter 1 --niter_decay 0 --save_latest_freq 10 --print_freq 1 --display_id -1') 36 | run('python test.py --model test --name temp_cyclegan --dataroot ./datasets/mini --num_test 1 --model_suffix "_A" --no_dropout') 37 | 38 | # pix2pix train/test 39 | run('python train.py --model pix2pix --name temp_pix2pix --dataroot ./datasets/mini_pix2pix --niter 1 --niter_decay 5 --save_latest_freq 10 --display_id -1') 40 | run('python test.py --model pix2pix --name temp_pix2pix --dataroot ./datasets/mini_pix2pix --num_test 1') 41 | 42 | # template train/test 43 | run('python train.py --model template --name temp2 --dataroot ./datasets/mini_pix2pix --niter 1 --niter_decay 0 --save_latest_freq 10 --display_id -1') 44 | run('python test.py --model template --name temp2 --dataroot ./datasets/mini_pix2pix --num_test 1') 45 | 46 | # colorization train/test (optional) 47 | if not os.path.exists('./datasets/mini_colorization'): 48 | run('bash ./datasets/download_cyclegan_dataset.sh mini_colorization') 49 | 50 | run('python train.py --model colorization --name temp_color --dataroot ./datasets/mini_colorization --niter 1 --niter_decay 0 --save_latest_freq 5 --display_id -1') 51 | run('python test.py --model colorization --name temp_color --dataroot ./datasets/mini_colorization --num_test 1') 52 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/aligned_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from data.base_dataset import BaseDataset, get_params, get_transform 3 | from data.image_folder import make_dataset 4 | import torchvision.transforms as transforms 5 | from PIL import Image 6 | 7 | 8 | class AlignedDataset(BaseDataset): 9 | """A dataset class for paired image dataset. 10 | 11 | It assumes that the directory '/path/to/data/train' contains image pairs in the form of {A,B}. 12 | During test time, you need to prepare a directory '/path/to/data/test'. 13 | """ 14 | 15 | def __init__(self, opt): 16 | """Initialize this dataset class. 17 | 18 | Parameters: 19 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 20 | """ 21 | BaseDataset.__init__(self, opt) 22 | self.dir_AB = os.path.join(opt.dataroot, opt.phase) # get the image directory 23 | self.AB_paths = sorted(make_dataset(self.dir_AB, opt.max_dataset_size)) # get image paths 24 | assert(self.opt.load_size >= self.opt.crop_size) # crop_size should be smaller than the size of loaded image 25 | self.input_nc = self.opt.output_nc if self.opt.direction == 'BtoA' else self.opt.input_nc 26 | self.output_nc = self.opt.input_nc if self.opt.direction == 'BtoA' else self.opt.output_nc 27 | 28 | def __getitem__(self, index): 29 | """Return a data point and its metadata information. 30 | 31 | Parameters: 32 | index - - a random integer for data indexing 33 | 34 | Returns a dictionary that contains A, B, A_paths and B_paths 35 | A (tensor) - - an image in the input domain 36 | B (tensor) - - its corresponding image in the target domain 37 | A_paths (str) - - image paths 38 | B_paths (str) - - image paths (same as A_paths) 39 | """ 40 | # read a image given a random integer index 41 | AB_path = self.AB_paths[index] 42 | AB = Image.open(AB_path).convert('RGB') 43 | # split AB image into A and B 44 | w, h = AB.size 45 | w2 = int(w / 2) 46 | if self.opt.add_contrast: 47 | ## 增加亮度和对比度 48 | AB = transforms.ColorJitter(contrast=0.1, brightness=0.1)(AB) 49 | 50 | A = AB.crop((0, 0, w2, h)) 51 | B = AB.crop((w2, 0, w, h)) 52 | 53 | # apply the same transform to both A and B 54 | transform_params = get_params(self.opt, A.size) 55 | A_transform = get_transform(self.opt, transform_params, grayscale=(self.input_nc == 1)) 56 | B_transform = get_transform(self.opt, transform_params, grayscale=(self.output_nc == 1)) 57 | 58 | A = A_transform(A) 59 | B = B_transform(B) 60 | 61 | return {'A': A, 'B': B, 'A_paths': AB_path, 'B_paths': AB_path} 62 | 63 | def __len__(self): 64 | """Return the total number of images in the dataset.""" 65 | return len(self.AB_paths) 66 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/colorization_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from data.base_dataset import BaseDataset, get_transform 3 | from data.image_folder import make_dataset 4 | from skimage import color # require skimage 5 | from PIL import Image 6 | import numpy as np 7 | import torchvision.transforms as transforms 8 | 9 | 10 | class ColorizationDataset(BaseDataset): 11 | """This dataset class can load a set of natural images in RGB, and convert RGB format into (L, ab) pairs in Lab color space. 12 | 13 | This dataset is required by pix2pix-based colorization model ('--model colorization') 14 | """ 15 | @staticmethod 16 | def modify_commandline_options(parser, is_train): 17 | """Add new dataset-specific options, and rewrite default values for existing options. 18 | 19 | Parameters: 20 | parser -- original option parser 21 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 22 | 23 | Returns: 24 | the modified parser. 25 | 26 | By default, the number of channels for input image is 1 (L) and 27 | the nubmer of channels for output image is 2 (ab). The direction is from A to B 28 | """ 29 | parser.set_defaults(input_nc=1, output_nc=2, direction='AtoB') 30 | return parser 31 | 32 | def __init__(self, opt): 33 | """Initialize this dataset class. 34 | 35 | Parameters: 36 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 37 | """ 38 | BaseDataset.__init__(self, opt) 39 | self.dir = os.path.join(opt.dataroot, opt.phase) 40 | self.AB_paths = sorted(make_dataset(self.dir, opt.max_dataset_size)) 41 | assert(opt.input_nc == 1 and opt.output_nc == 2 and opt.direction == 'AtoB') 42 | self.transform = get_transform(self.opt, convert=False) 43 | 44 | def __getitem__(self, index): 45 | """Return a data point and its metadata information. 46 | 47 | Parameters: 48 | index - - a random integer for data indexing 49 | 50 | Returns a dictionary that contains A, B, A_paths and B_paths 51 | A (tensor) - - the L channel of an image 52 | B (tensor) - - the ab channels of the same image 53 | A_paths (str) - - image paths 54 | B_paths (str) - - image paths (same as A_paths) 55 | """ 56 | path = self.AB_paths[index] 57 | im = Image.open(path).convert('RGB') 58 | im = self.transform(im) 59 | im = np.array(im) 60 | lab = color.rgb2lab(im).astype(np.float32) 61 | lab_t = transforms.ToTensor()(lab) 62 | A = lab_t[[0], ...] / 50.0 - 1.0 63 | B = lab_t[[1, 2], ...] / 110.0 64 | return {'A': A, 'B': B, 'A_paths': path, 'B_paths': path} 65 | 66 | def __len__(self): 67 | """Return the total number of images in the dataset.""" 68 | return len(self.AB_paths) 69 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/models/colorization_model.py: -------------------------------------------------------------------------------- 1 | from .pix2pix_model import Pix2PixModel 2 | import torch 3 | from skimage import color # used for lab2rgb 4 | import numpy as np 5 | 6 | 7 | class ColorizationModel(Pix2PixModel): 8 | """This is a subclass of Pix2PixModel for image colorization (black & white image -> colorful images). 9 | 10 | The model training requires '-dataset_model colorization' dataset. 11 | It trains a pix2pix model, mapping from L channel to ab channels in Lab color space. 12 | By default, the colorization dataset will automatically set '--input_nc 1' and '--output_nc 2'. 13 | """ 14 | @staticmethod 15 | def modify_commandline_options(parser, is_train=True): 16 | """Add new dataset-specific options, and rewrite default values for existing options. 17 | 18 | Parameters: 19 | parser -- original option parser 20 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 21 | 22 | Returns: 23 | the modified parser. 24 | 25 | By default, we use 'colorization' dataset for this model. 26 | See the original pix2pix paper (https://arxiv.org/pdf/1611.07004.pdf) and colorization results (Figure 9 in the paper) 27 | """ 28 | Pix2PixModel.modify_commandline_options(parser, is_train) 29 | parser.set_defaults(dataset_mode='colorization') 30 | return parser 31 | 32 | def __init__(self, opt): 33 | """Initialize the class. 34 | 35 | Parameters: 36 | opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions 37 | 38 | For visualization, we set 'visual_names' as 'real_A' (input real image), 39 | 'real_B_rgb' (ground truth RGB image), and 'fake_B_rgb' (predicted RGB image) 40 | We convert the Lab image 'real_B' (inherited from Pix2pixModel) to a RGB image 'real_B_rgb'. 41 | we convert the Lab image 'fake_B' (inherited from Pix2pixModel) to a RGB image 'fake_B_rgb'. 42 | """ 43 | # reuse the pix2pix model 44 | Pix2PixModel.__init__(self, opt) 45 | # specify the images to be visualized. 46 | self.visual_names = ['real_A', 'real_B_rgb', 'fake_B_rgb'] 47 | 48 | def lab2rgb(self, L, AB): 49 | """Convert an Lab tensor image to a RGB numpy output 50 | Parameters: 51 | L (1-channel tensor array): L channel images (range: [-1, 1], torch tensor array) 52 | AB (2-channel tensor array): ab channel images (range: [-1, 1], torch tensor array) 53 | 54 | Returns: 55 | rgb (RGB numpy image): rgb output images (range: [0, 255], numpy array) 56 | """ 57 | AB2 = AB * 110.0 58 | L2 = (L + 1.0) * 50.0 59 | Lab = torch.cat([L2, AB2], dim=1) 60 | Lab = Lab[0].data.cpu().float().numpy() 61 | Lab = np.transpose(Lab.astype(np.float64), (1, 2, 0)) 62 | rgb = color.lab2rgb(Lab) * 255 63 | return rgb 64 | 65 | def compute_visuals(self): 66 | """Calculate additional output images for visdom and HTML visualization""" 67 | self.real_B_rgb = self.lab2rgb(self.real_A, self.real_B) 68 | self.fake_B_rgb = self.lab2rgb(self.real_A, self.fake_B) 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # 整体介绍 3 | [赛题介绍](https://www.datafountain.cn/competitions/346) 4 | 5 | 6 | 我们的队名是:鹏脱单攻略队 后面改为"天晨破晓" 最终我们团队成绩在复赛AB榜均排在第一名,识别准确率达0.996952 7 | 8 | ![官网截图](https://github.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/blob/master/show_imgs/webwxgetmsgimg.jpg) 9 | 10 | 团队成绩:2019CCF-BDCI大赛 最佳创新探索奖 和 "基于OCR的身份证要素提取"单赛题冠军 11 | 12 | ### 系统处理流程图 13 | 14 | ![流程图](https://github.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/blob/master/show_imgs/%E7%B3%BB%E7%BB%9F%E6%9E%B6%E6%9E%84.png) 15 | 16 | ### 方案亮点 17 | 我们采用条件生成对抗网络(CGAN)处理赛题中的水印干扰,取到了比较好的效果,展示一下效果图片: 18 | ![去水印效果](https://github.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/blob/master/show_imgs/%E5%8E%BB%E6%B0%B4%E5%8D%B0%E6%95%88%E6%9E%9C.png) 19 | 20 | [生成仿真数据源码](https://github.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-fake_data_generator),生成仿真训练数据训练去水印模型和文字识别模型 21 | 22 | [方案PPT](https://discussion.datafountain.cn/questions/2260/answers/23380) [方案论文](https://discussion.datafountain.cn/questions/2232/answers/23321) 23 | 24 | # 执行方式介绍 25 | 完整执行示例: 26 | CPU执行,单进程: 27 | python main_process.py --test_experiment_name test_example --test_data_dir ./test_data --gan_ids -1 --pool_num 0 28 | 参数详解: 29 | --test_experiment_name:实验名,将决定中间数据结果存放目录 30 | --test_data_dir: 数据目录 31 | --gan_ids: 去水印模型:如果是-1 则是cpu运行, 大于0,则是GPU 32 | --pool_num 0单进程 大于0多进程 33 | 其他参数参考main_process.py中的help 34 | 35 | # 项目整体文件结构说明: 36 | (按照处理流程介绍,具体文件介绍见文件内的readme) 37 | 38 | ## 身份证区域提取模块 cut_twist_process 39 | 剪切、翻转部分代码;用于将身份证正反面从原始图片中切分出来 40 | 41 | ## 去除水印\关键文本定位模块 watermask_remover_and_split_data 42 | 进行水印去除,身份证切割,提取文字部分,滤波 43 | 44 | ## 去水印模型 pytorch-CycleGAN-and-pix2pix 45 | 我们训练好的去除水印模型地址: 46 | ### 参考资料 47 | 去水印模型采用条件gan网络。论文[链接](https://arxiv.org/pdf/1611.07004.pdf) 48 | 49 | 参考了GitHub上gan pix2pix 项目,[链接](https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix#cyclegan-and-pix2pix-in-pytorch),我们基于此项目进行了一些更改 50 | 51 | ## 文字识别模块 recognize_process 52 | 用于识别图片中的文字信息 53 | ### 参考资料 54 | 识别模型采用CRNN。论文[链接](https://arxiv.org/abs/1507.05717) 55 | 56 | 参考了GitHub上两个模型的TensorFlow实现 57 | 58 | [项目1](https://github.com/MaybeShewill-CV/CRNN_Tensorflow) 59 | 60 | [项目2](https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow) 61 | 62 | ## 文本纠正模块 data_correction_and_generate_csv_file 63 | 对识别结果进行纠正,以及生成最终的csv文件 64 | 65 | ## data_temp 66 | 中间数据存放目录,目录名称:实验名+日期 67 | 68 | ## CCFTestResultFixValidData_release.csv 69 | 生成的结果文件 70 | 71 | ## main_process.py 72 | 执行脚本 73 | 74 | ## Requirement.txt 75 | 运行的环境要求 76 | 77 | # 注意 78 | 去水印模型[地址](https://github.com/Mingtzge/models_data) 79 | 80 | 这项是工程的submodel,需要克隆的时候需要加上"--recursive"参数 81 | 去水印模型较大,采用了git-lfs,要安装这个包,不然可能会导致clone失败或者比较慢 82 | 83 | 84 | 由于lfs超出限额,功能失效,可能会导致模型文件(文字识别和去水印模型)clone失败,我们把模型文件上传百度云了,由于数据文件包含身份证图片,数据敏感,百度云链接很容易失效,需要赛题数据和模型文件的话,添加我百度网盘好友,具体见[About data download](https://github.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-OCR-IdentificationIDElement/issues/10#issue-546035222) 85 | 86 | !!!注:测试数据跟初赛和复赛的数据格式需要保持一致,每面身份证左上角需要有:"仅限DBCI比赛(复赛)使用"字样, 87 | 且字体大小格式位置应该跟初赛和复赛的保持一致,否则将严重影响识别的准确性甚至代码运行出错 88 | 原因:对于身份证各个元素的识别,我们是先裁剪出来,再识别的.我们在裁剪的时候,是以"限DBCI"为参考的 89 | ,每次裁剪前,都会用模板在图片匹配对应的位置,得到参考坐标,再相对于这个参考坐标裁剪各个元素. 90 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/models/__init__.py: -------------------------------------------------------------------------------- 1 | """This package contains modules related to objective functions, optimizations, and network architectures. 2 | 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. 4 | You need to implement the following five functions: 5 | -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). 6 | -- : unpack data from dataset and apply preprocessing. 7 | -- : produce intermediate results. 8 | -- : calculate loss, gradients, and update network weights. 9 | -- : (optionally) add model-specific options and set default options. 10 | 11 | In the function <__init__>, you need to define four lists: 12 | -- self.loss_names (str list): specify the training losses that you want to plot and save. 13 | -- self.model_names (str list): define networks used in our training. 14 | -- self.visual_names (str list): specify the images that you want to display and save. 15 | -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. 16 | 17 | Now you can use the model class by specifying flag '--model dummy'. 18 | See our template model class 'template_model.py' for more details. 19 | """ 20 | 21 | import importlib 22 | from models.base_model import BaseModel 23 | 24 | 25 | def find_model_using_name(model_name): 26 | """Import the module "models/[model_name]_model.py". 27 | 28 | In the file, the class called DatasetNameModel() will 29 | be instantiated. It has to be a subclass of BaseModel, 30 | and it is case-insensitive. 31 | """ 32 | model_filename = "models." + model_name + "_model" 33 | modellib = importlib.import_module(model_filename) 34 | model = None 35 | target_model_name = model_name.replace('_', '') + 'model' 36 | for name, cls in modellib.__dict__.items(): 37 | if name.lower() == target_model_name.lower() \ 38 | and issubclass(cls, BaseModel): 39 | model = cls 40 | 41 | if model is None: 42 | print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) 43 | exit(0) 44 | 45 | return model 46 | 47 | 48 | def get_option_setter(model_name): 49 | """Return the static method of the model class.""" 50 | model_class = find_model_using_name(model_name) 51 | return model_class.modify_commandline_options 52 | 53 | 54 | def create_model(opt): 55 | """Create a model given the option. 56 | 57 | This function warps the class CustomDatasetDataLoader. 58 | This is the main interface between this package and 'train.py'/'test.py' 59 | 60 | Example: 61 | >>> from models import create_model 62 | >>> model = create_model(opt) 63 | """ 64 | model = find_model_using_name(opt.model) 65 | instance = model(opt) 66 | print("model [%s] was created" % type(instance).__name__) 67 | return instance 68 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/models/test_model.py: -------------------------------------------------------------------------------- 1 | from .base_model import BaseModel 2 | from . import networks 3 | 4 | 5 | class TestModel(BaseModel): 6 | """ This TesteModel can be used to generate CycleGAN results for only one direction. 7 | This model will automatically set '--dataset_mode single', which only loads the images from one collection. 8 | 9 | See the test instruction for more details. 10 | """ 11 | @staticmethod 12 | def modify_commandline_options(parser, is_train=True): 13 | """Add new dataset-specific options, and rewrite default values for existing options. 14 | 15 | Parameters: 16 | parser -- original option parser 17 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 18 | 19 | Returns: 20 | the modified parser. 21 | 22 | The model can only be used during test time. It requires '--dataset_mode single'. 23 | You need to specify the network using the option '--model_suffix'. 24 | """ 25 | assert not is_train, 'TestModel cannot be used during training time' 26 | parser.set_defaults(dataset_mode='single') 27 | parser.add_argument('--model_suffix', type=str, default='', help='In checkpoints_dir, [epoch]_net_G[model_suffix].pth will be loaded as the generator.') 28 | 29 | return parser 30 | 31 | def __init__(self, opt): 32 | """Initialize the pix2pix class. 33 | 34 | Parameters: 35 | opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions 36 | """ 37 | assert(not opt.isTrain) 38 | BaseModel.__init__(self, opt) 39 | # specify the training losses you want to print out. The training/test scripts will call 40 | self.loss_names = [] 41 | # specify the images you want to save/display. The training/test scripts will call 42 | self.visual_names = ['real', 'fake'] 43 | # specify the models you want to save to the disk. The training/test scripts will call and 44 | self.model_names = ['G' + opt.model_suffix] # only generator is needed. 45 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, 46 | opt.norm, not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids) 47 | 48 | # assigns the model to self.netG_[suffix] so that it can be loaded 49 | # please see 50 | setattr(self, 'netG' + opt.model_suffix, self.netG) # store netG in self. 51 | 52 | def set_input(self, input): 53 | """Unpack input data from the dataloader and perform necessary pre-processing steps. 54 | 55 | Parameters: 56 | input: a dictionary that contains the data itself and its metadata information. 57 | 58 | We need to use 'single_dataset' dataset mode. It only load images from one domain. 59 | """ 60 | self.real = input['A'].to(self.device) 61 | self.image_paths = input['A_paths'] 62 | 63 | def forward(self): 64 | """Run forward pass.""" 65 | self.fake = self.netG(self.real) # G(real) 66 | 67 | def optimize_parameters(self): 68 | """No optimization for test model.""" 69 | pass 70 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/eval_cityscapes/evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import caffe 3 | import argparse 4 | import numpy as np 5 | import scipy.misc 6 | from PIL import Image 7 | from util import segrun, fast_hist, get_scores 8 | from cityscapes import cityscapes 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("--cityscapes_dir", type=str, required=True, help="Path to the original cityscapes dataset") 12 | parser.add_argument("--result_dir", type=str, required=True, help="Path to the generated images to be evaluated") 13 | parser.add_argument("--output_dir", type=str, required=True, help="Where to save the evaluation results") 14 | parser.add_argument("--caffemodel_dir", type=str, default='./scripts/eval_cityscapes/caffemodel/', help="Where the FCN-8s caffemodel stored") 15 | parser.add_argument("--gpu_id", type=int, default=0, help="Which gpu id to use") 16 | parser.add_argument("--split", type=str, default='val', help="Data split to be evaluated") 17 | parser.add_argument("--save_output_images", type=int, default=0, help="Whether to save the FCN output images") 18 | args = parser.parse_args() 19 | 20 | 21 | def main(): 22 | if not os.path.isdir(args.output_dir): 23 | os.makedirs(args.output_dir) 24 | if args.save_output_images > 0: 25 | output_image_dir = args.output_dir + 'image_outputs/' 26 | if not os.path.isdir(output_image_dir): 27 | os.makedirs(output_image_dir) 28 | CS = cityscapes(args.cityscapes_dir) 29 | n_cl = len(CS.classes) 30 | label_frames = CS.list_label_frames(args.split) 31 | caffe.set_device(args.gpu_id) 32 | caffe.set_mode_gpu() 33 | net = caffe.Net(args.caffemodel_dir + '/deploy.prototxt', 34 | args.caffemodel_dir + 'fcn-8s-cityscapes.caffemodel', 35 | caffe.TEST) 36 | 37 | hist_perframe = np.zeros((n_cl, n_cl)) 38 | for i, idx in enumerate(label_frames): 39 | if i % 10 == 0: 40 | print('Evaluating: %d/%d' % (i, len(label_frames))) 41 | city = idx.split('_')[0] 42 | # idx is city_shot_frame 43 | label = CS.load_label(args.split, city, idx) 44 | im_file = args.result_dir + '/' + idx + '_leftImg8bit.png' 45 | im = np.array(Image.open(im_file)) 46 | im = scipy.misc.imresize(im, (label.shape[1], label.shape[2])) 47 | out = segrun(net, CS.preprocess(im)) 48 | hist_perframe += fast_hist(label.flatten(), out.flatten(), n_cl) 49 | if args.save_output_images > 0: 50 | label_im = CS.palette(label) 51 | pred_im = CS.palette(out) 52 | scipy.misc.imsave(output_image_dir + '/' + str(i) + '_pred.jpg', pred_im) 53 | scipy.misc.imsave(output_image_dir + '/' + str(i) + '_gt.jpg', label_im) 54 | scipy.misc.imsave(output_image_dir + '/' + str(i) + '_input.jpg', im) 55 | 56 | mean_pixel_acc, mean_class_acc, mean_class_iou, per_class_acc, per_class_iou = get_scores(hist_perframe) 57 | with open(args.output_dir + '/evaluation_results.txt', 'w') as f: 58 | f.write('Mean pixel accuracy: %f\n' % mean_pixel_acc) 59 | f.write('Mean class accuracy: %f\n' % mean_class_acc) 60 | f.write('Mean class IoU: %f\n' % mean_class_iou) 61 | f.write('************ Per class numbers below ************\n') 62 | for i, cl in enumerate(CS.classes): 63 | while len(cl) < 15: 64 | cl = cl + ' ' 65 | f.write('%s: acc = %f, iou = %f\n' % (cl, per_class_acc[i], per_class_iou[i])) 66 | 67 | 68 | main() 69 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/options/train_options.py: -------------------------------------------------------------------------------- 1 | from .base_options import BaseOptions 2 | 3 | 4 | class TrainOptions(BaseOptions): 5 | """This class includes training options. 6 | 7 | It also includes shared options defined in BaseOptions. 8 | """ 9 | 10 | def initialize(self, parser): 11 | parser = BaseOptions.initialize(self, parser) 12 | # visdom and HTML visualization parameters 13 | parser.add_argument('--display_freq', type=int, default=400, help='frequency of showing training results on screen') 14 | parser.add_argument('--display_ncols', type=int, default=4, help='if positive, display all images in a single visdom web panel with certain number of images per row.') 15 | parser.add_argument('--display_id', type=int, default=1, help='window id of the web display') 16 | parser.add_argument('--display_server', type=str, default="http://localhost", help='visdom server of the web display') 17 | parser.add_argument('--display_env', type=str, default='main', help='visdom display environment name (default is "main")') 18 | parser.add_argument('--display_port', type=int, default=8001, help='visdom port of the web display') 19 | parser.add_argument('--update_html_freq', type=int, default=1000, help='frequency of saving training results to html') 20 | parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console') 21 | parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/') 22 | # network saving and loading parameters 23 | parser.add_argument('--save_latest_freq', type=int, default=5000, help='frequency of saving the latest results') 24 | parser.add_argument('--save_epoch_freq', type=int, default=2, help='frequency of saving checkpoints at the end of epochs') 25 | parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration') 26 | parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model') 27 | parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by , +, ...') 28 | parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') 29 | # training parameters 30 | parser.add_argument('--niter', type=int, default=0, help='# of iter at starting learning rate') 31 | parser.add_argument('--niter_decay', type=int, default=3, help='# of iter to linearly decay learning rate to zero') 32 | parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam') 33 | parser.add_argument('--lr', type=float, default=0.003, help='initial learning rate for adam') 34 | parser.add_argument('--gan_mode', type=str, default='lsgan', help='the type of GAN objective. [vanilla| lsgan | wgangp]. vanilla GAN loss is the cross-entropy objective used in the original GAN paper.') 35 | parser.add_argument('--pool_size', type=int, default=50, help='the size of image buffer that stores previously generated images') 36 | parser.add_argument('--lr_policy', type=str, default='linear', help='learning rate policy. [linear | step | plateau | cosine]') 37 | parser.add_argument('--lr_decay_iters', type=int, default=50, help='multiply by a gamma every lr_decay_iters iterations') 38 | 39 | self.isTrain = True 40 | return parser 41 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/util/html.py: -------------------------------------------------------------------------------- 1 | import dominate 2 | from dominate.tags import meta, h3, table, tr, td, p, a, img, br 3 | import os 4 | 5 | 6 | class HTML: 7 | """This HTML class allows us to save images and write texts into a single HTML file. 8 | 9 | It consists of functions such as (add a text header to the HTML file), 10 | (add a row of images to the HTML file), and (save the HTML to the disk). 11 | It is based on Python library 'dominate', a Python library for creating and manipulating HTML documents using a DOM API. 12 | """ 13 | 14 | def __init__(self, web_dir, title, refresh=0): 15 | """Initialize the HTML classes 16 | 17 | Parameters: 18 | web_dir (str) -- a directory that stores the webpage. HTML file will be created at /index.html; images will be saved at 0: 32 | with self.doc.head: 33 | meta(http_equiv="refresh", content=str(refresh)) 34 | 35 | def get_image_dir(self): 36 | """Return the directory that stores images""" 37 | return self.img_dir 38 | 39 | def add_header(self, text): 40 | """Insert a header to the HTML file 41 | 42 | Parameters: 43 | text (str) -- the header text 44 | """ 45 | with self.doc: 46 | h3(text) 47 | 48 | def add_images(self, ims, txts, links, width=400): 49 | """add images to the HTML file 50 | 51 | Parameters: 52 | ims (str list) -- a list of image paths 53 | txts (str list) -- a list of image names shown on the website 54 | links (str list) -- a list of hyperref links; when you click an image, it will redirect you to a new page 55 | """ 56 | self.t = table(border=1, style="table-layout: fixed;") # Insert a table 57 | self.doc.add(self.t) 58 | with self.t: 59 | with tr(): 60 | for im, txt, link in zip(ims, txts, links): 61 | with td(style="word-wrap: break-word;", halign="center", valign="top"): 62 | with p(): 63 | with a(href=os.path.join('images', link)): 64 | img(style="width:%dpx" % width, src=os.path.join('images', im)) 65 | br() 66 | p(txt) 67 | 68 | def save(self): 69 | """save the current content to the HMTL file""" 70 | html_file = '%s/index.html' % self.web_dir 71 | f = open(html_file, 'wt') 72 | f.write(self.doc.render()) 73 | f.close() 74 | 75 | 76 | if __name__ == '__main__': # we show an example usage here. 77 | html = HTML('web/', 'test_html') 78 | html.add_header('hello world') 79 | 80 | ims, txts, links = [], [], [] 81 | for n in range(4): 82 | ims.append('image_%d.png' % n) 83 | txts.append('text_%d' % n) 84 | links.append('image_%d.png' % n) 85 | html.add_images(ims, txts, links) 86 | html.save() 87 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/unaligned_dataset.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from data.base_dataset import BaseDataset, get_transform 3 | from data.image_folder import make_dataset 4 | from PIL import Image 5 | import random 6 | 7 | 8 | class UnalignedDataset(BaseDataset): 9 | """ 10 | This dataset class can load unaligned/unpaired datasets. 11 | 12 | It requires two directories to host training images from domain A '/path/to/data/trainA' 13 | and from domain B '/path/to/data/trainB' respectively. 14 | You can train the model with the dataset flag '--dataroot /path/to/data'. 15 | Similarly, you need to prepare two directories: 16 | '/path/to/data/testA' and '/path/to/data/testB' during test time. 17 | """ 18 | 19 | def __init__(self, opt): 20 | """Initialize this dataset class. 21 | 22 | Parameters: 23 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 24 | """ 25 | BaseDataset.__init__(self, opt) 26 | self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A') # create a path '/path/to/data/trainA' 27 | self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B') # create a path '/path/to/data/trainB' 28 | 29 | self.A_paths = sorted(make_dataset(self.dir_A, opt.max_dataset_size)) # load images from '/path/to/data/trainA' 30 | self.B_paths = sorted(make_dataset(self.dir_B, opt.max_dataset_size)) # load images from '/path/to/data/trainB' 31 | self.A_size = len(self.A_paths) # get the size of dataset A 32 | self.B_size = len(self.B_paths) # get the size of dataset B 33 | btoA = self.opt.direction == 'BtoA' 34 | input_nc = self.opt.output_nc if btoA else self.opt.input_nc # get the number of channels of input image 35 | output_nc = self.opt.input_nc if btoA else self.opt.output_nc # get the number of channels of output image 36 | self.transform_A = get_transform(self.opt, grayscale=(input_nc == 1)) 37 | self.transform_B = get_transform(self.opt, grayscale=(output_nc == 1)) 38 | 39 | def __getitem__(self, index): 40 | """Return a data point and its metadata information. 41 | 42 | Parameters: 43 | index (int) -- a random integer for data indexing 44 | 45 | Returns a dictionary that contains A, B, A_paths and B_paths 46 | A (tensor) -- an image in the input domain 47 | B (tensor) -- its corresponding image in the target domain 48 | A_paths (str) -- image paths 49 | B_paths (str) -- image paths 50 | """ 51 | A_path = self.A_paths[index % self.A_size] # make sure index is within then range 52 | if self.opt.serial_batches: # make sure index is within then range 53 | index_B = index % self.B_size 54 | else: # randomize the index for domain B to avoid fixed pairs. 55 | index_B = random.randint(0, self.B_size - 1) 56 | B_path = self.B_paths[index_B] 57 | A_img = Image.open(A_path).convert('RGB') 58 | B_img = Image.open(B_path).convert('RGB') 59 | # apply image transformation 60 | A = self.transform_A(A_img) 61 | B = self.transform_B(B_img) 62 | 63 | return {'A': A, 'B': B, 'A_paths': A_path, 'B_paths': B_path} 64 | 65 | def __len__(self): 66 | """Return the total number of images in the dataset. 67 | 68 | As we have two datasets with potentially different number of images, 69 | we take a maximum of 70 | """ 71 | return max(self.A_size, self.B_size) 72 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/util/util.py: -------------------------------------------------------------------------------- 1 | """This module contains simple helper functions """ 2 | from __future__ import print_function 3 | import torch 4 | import numpy as np 5 | from PIL import Image 6 | import os 7 | 8 | 9 | def tensor2im(input_image, imtype=np.uint8): 10 | """"Converts a Tensor array into a numpy image array. 11 | 12 | Parameters: 13 | input_image (tensor) -- the input image tensor array 14 | imtype (type) -- the desired type of the converted numpy array 15 | """ 16 | if not isinstance(input_image, np.ndarray): 17 | if isinstance(input_image, torch.Tensor): # get the data from a variable 18 | image_tensor = input_image.data 19 | else: 20 | return input_image 21 | image_numpy = image_tensor[0].cpu().float().numpy() # convert it into a numpy array 22 | if image_numpy.shape[0] == 1: # grayscale to RGB 23 | image_numpy = np.tile(image_numpy, (3, 1, 1)) 24 | image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + 1) / 2.0 * 255.0 # post-processing: tranpose and scaling 25 | else: # if it is a numpy array, do nothing 26 | image_numpy = input_image 27 | return image_numpy.astype(imtype) 28 | 29 | 30 | def diagnose_network(net, name='network'): 31 | """Calculate and print the mean of average absolute(gradients) 32 | 33 | Parameters: 34 | net (torch network) -- Torch network 35 | name (str) -- the name of the network 36 | """ 37 | mean = 0.0 38 | count = 0 39 | for param in net.parameters(): 40 | if param.grad is not None: 41 | mean += torch.mean(torch.abs(param.grad.data)) 42 | count += 1 43 | if count > 0: 44 | mean = mean / count 45 | print(name) 46 | print(mean) 47 | 48 | 49 | def save_image(image_numpy, image_path, aspect_ratio=1.0): 50 | """Save a numpy image to the disk 51 | 52 | Parameters: 53 | image_numpy (numpy array) -- input numpy array 54 | image_path (str) -- the path of the image 55 | """ 56 | 57 | image_pil = Image.fromarray(image_numpy) 58 | h, w, _ = image_numpy.shape 59 | 60 | if aspect_ratio > 1.0: 61 | image_pil = image_pil.resize((h, int(w * aspect_ratio)), Image.BICUBIC) 62 | if aspect_ratio < 1.0: 63 | image_pil = image_pil.resize((int(h / aspect_ratio), w), Image.BICUBIC) 64 | image_pil.save(image_path) 65 | 66 | 67 | def print_numpy(x, val=True, shp=False): 68 | """Print the mean, min, max, median, std, and size of a numpy array 69 | 70 | Parameters: 71 | val (bool) -- if print the values of the numpy array 72 | shp (bool) -- if print the shape of the numpy array 73 | """ 74 | x = x.astype(np.float64) 75 | if shp: 76 | print('shape,', x.shape) 77 | if val: 78 | x = x.flatten() 79 | print('mean = %3.3f, min = %3.3f, max = %3.3f, median = %3.3f, std=%3.3f' % ( 80 | np.mean(x), np.min(x), np.max(x), np.median(x), np.std(x))) 81 | 82 | 83 | def mkdirs(paths): 84 | """create empty directories if they don't exist 85 | 86 | Parameters: 87 | paths (str list) -- a list of directory paths 88 | """ 89 | if isinstance(paths, list) and not isinstance(paths, str): 90 | for path in paths: 91 | mkdir(path) 92 | else: 93 | mkdir(paths) 94 | 95 | 96 | def mkdir(path): 97 | """create a single empty directory if it didn't exist 98 | 99 | Parameters: 100 | path (str) -- a single directory path 101 | """ 102 | if not os.path.exists(path): 103 | os.makedirs(path) 104 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017, Jun-Yan Zhu and Taesung Park 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | 26 | --------------------------- LICENSE FOR pix2pix -------------------------------- 27 | BSD License 28 | 29 | For pix2pix software 30 | Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu 31 | All rights reserved. 32 | 33 | Redistribution and use in source and binary forms, with or without 34 | modification, are permitted provided that the following conditions are met: 35 | 36 | * Redistributions of source code must retain the above copyright notice, this 37 | list of conditions and the following disclaimer. 38 | 39 | * Redistributions in binary form must reproduce the above copyright notice, 40 | this list of conditions and the following disclaimer in the documentation 41 | and/or other materials provided with the distribution. 42 | 43 | ----------------------------- LICENSE FOR DCGAN -------------------------------- 44 | BSD License 45 | 46 | For dcgan.torch software 47 | 48 | Copyright (c) 2015, Facebook, Inc. All rights reserved. 49 | 50 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 51 | 52 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 53 | 54 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 55 | 56 | Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 57 | 58 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 59 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/template_dataset.py: -------------------------------------------------------------------------------- 1 | """Dataset class template 2 | 3 | This module provides a template for users to implement custom datasets. 4 | You can specify '--dataset_mode template' to use this dataset. 5 | The class name should be consistent with both the filename and its dataset_mode option. 6 | The filename should be _dataset.py 7 | The class name should be Dataset.py 8 | You need to implement the following functions: 9 | -- : Add dataset-specific options and rewrite default values for existing options. 10 | -- <__init__>: Initialize this dataset class. 11 | -- <__getitem__>: Return a data point and its metadata information. 12 | -- <__len__>: Return the number of images. 13 | """ 14 | from data.base_dataset import BaseDataset, get_transform 15 | # from data.image_folder import make_dataset 16 | # from PIL import Image 17 | 18 | 19 | class TemplateDataset(BaseDataset): 20 | """A template dataset class for you to implement custom datasets.""" 21 | @staticmethod 22 | def modify_commandline_options(parser, is_train): 23 | """Add new dataset-specific options, and rewrite default values for existing options. 24 | 25 | Parameters: 26 | parser -- original option parser 27 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 28 | 29 | Returns: 30 | the modified parser. 31 | """ 32 | parser.add_argument('--new_dataset_option', type=float, default=1.0, help='new dataset option') 33 | parser.set_defaults(max_dataset_size=10, new_dataset_option=2.0) # specify dataset-specific default values 34 | return parser 35 | 36 | def __init__(self, opt): 37 | """Initialize this dataset class. 38 | 39 | Parameters: 40 | opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions 41 | 42 | A few things can be done here. 43 | - save the options (have been done in BaseDataset) 44 | - get image paths and meta information of the dataset. 45 | - define the image transformation. 46 | """ 47 | # save the option and dataset root 48 | BaseDataset.__init__(self, opt) 49 | # get the image paths of your dataset; 50 | self.image_paths = [] # You can call sorted(make_dataset(self.root, opt.max_dataset_size)) to get all the image paths under the directory self.root 51 | # define the default transform function. You can use ; You can also define your custom transform function 52 | self.transform = get_transform(opt) 53 | 54 | def __getitem__(self, index): 55 | """Return a data point and its metadata information. 56 | 57 | Parameters: 58 | index -- a random integer for data indexing 59 | 60 | Returns: 61 | a dictionary of data with their names. It usually contains the data itself and its metadata information. 62 | 63 | Step 1: get a random image path: e.g., path = self.image_paths[index] 64 | Step 2: load your data from the disk: e.g., image = Image.open(path).convert('RGB'). 65 | Step 3: convert your data to a PyTorch tensor. You can use helpder functions such as self.transform. e.g., data = self.transform(image) 66 | Step 4: return a data point as a dictionary. 67 | """ 68 | path = 'temp' # needs to be a string 69 | data_A = None # needs to be a tensor 70 | data_B = None # needs to be a tensor 71 | return {'data_A': data_A, 'data_B': data_B, 'path': path} 72 | 73 | def __len__(self): 74 | """Return the total number of images.""" 75 | return len(self.image_paths) 76 | -------------------------------------------------------------------------------- /recognize_process/data_provider/read_tfrecord.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-11-20 0:15 4 | # @Author : Miao Wenqiang 5 | # @Reference : https://github.com/MaybeShewill-CV/CRNN_Tensorflow 6 | # https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow 7 | # @File : read_tfrecord.py 8 | # @IDE: PyCharm 9 | """ 10 | Efficient tfrecords writer interface 11 | """ 12 | 13 | 14 | import os 15 | import os.path as ops 16 | import random 17 | import glob 18 | import tensorflow as tf 19 | from recognize_process.config import model_config 20 | 21 | CFG = model_config.cfg 22 | 23 | 24 | class CrnnDataFeeder(object): 25 | 26 | def __init__(self, dataset_dir, char_dict_path, flags='train'): 27 | 28 | self._tfrecords_dir = dataset_dir 29 | if not ops.exists(self._tfrecords_dir): 30 | raise ValueError('{:s} not exist, please check again'.format(self._tfrecords_dir)) 31 | 32 | self._dataset_flags = flags.lower() 33 | if self._dataset_flags not in ['train', 'test', 'val']: 34 | raise ValueError('flags of the data feeder should be \'train\', \'test\', \'val\'') 35 | 36 | self._char_dict_path = char_dict_path 37 | 38 | def sample_counts(self): 39 | tfrecords_file_paths = glob.glob('{:s}/{:s}*.tfrecords'.format(self._tfrecords_dir, self._dataset_flags)) 40 | counts = 0 41 | 42 | for record in tfrecords_file_paths: 43 | counts += sum(1 for _ in tf.python_io.tf_record_iterator(record)) 44 | 45 | return counts 46 | 47 | def _extract_features_batch(self, serialized_batch): 48 | features = tf.parse_example( 49 | serialized_batch, 50 | features={'images': tf.FixedLenFeature([], tf.string), 51 | 'imagepaths': tf.FixedLenFeature([], tf.string), 52 | 'labels': tf.VarLenFeature(tf.int64), 53 | }) 54 | 55 | bs = features['images'].shape[0] 56 | images = tf.decode_raw(features['images'], tf.uint8) 57 | w, h = tuple(CFG.ARCH.INPUT_SIZE) 58 | images = tf.cast(x=images, dtype=tf.float32) 59 | #images = tf.subtract(tf.divide(images, 128.0), 1.0) 60 | images = tf.reshape(images, [bs, h, -1, CFG.ARCH.INPUT_CHANNELS]) 61 | 62 | labels = features['labels'] 63 | labels = tf.cast(labels, tf.int32) 64 | 65 | imagepaths = features['imagepaths'] 66 | 67 | return images, labels, imagepaths 68 | 69 | 70 | def _inputs(self, tfrecords_path, batch_size, num_threads): 71 | dataset = tf.data.TFRecordDataset(tfrecords_path) 72 | dataset = dataset.batch(batch_size, drop_remainder=True) 73 | 74 | dataset = dataset.map(map_func=self._extract_features_batch, num_parallel_calls=num_threads) 75 | 76 | if self._dataset_flags != 'test': 77 | dataset = dataset.shuffle(buffer_size=1000) 78 | dataset = dataset.repeat() 79 | 80 | iterator = dataset.make_one_shot_iterator() 81 | 82 | return iterator.get_next(name='{:s}_IteratorGetNext'.format(self._dataset_flags)) 83 | 84 | 85 | def inputs(self, batch_size): 86 | 87 | tfrecords_file_paths = glob.glob('{:s}/{:s}*.tfrecords'.format(\ 88 | self._tfrecords_dir, self._dataset_flags)) 89 | if not tfrecords_file_paths: 90 | raise ValueError('Dataset does not contain any tfrecords for {:s}'.format(\ 91 | self._dataset_flags)) 92 | 93 | random.shuffle(tfrecords_file_paths) 94 | 95 | return self._inputs( 96 | tfrecords_path=tfrecords_file_paths, 97 | batch_size=batch_size, 98 | num_threads=CFG.TRAIN.CPU_MULTI_PROCESS_NUMS 99 | ) 100 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/edges/batch_hed.py: -------------------------------------------------------------------------------- 1 | # HED batch processing script; modified from https://github.com/s9xie/hed/blob/master/examples/hed/HED-tutorial.ipynb 2 | # Step 1: download the hed repo: https://github.com/s9xie/hed 3 | # Step 2: download the models and protoxt, and put them under {caffe_root}/examples/hed/ 4 | # Step 3: put this script under {caffe_root}/examples/hed/ 5 | # Step 4: run the following script: 6 | # python batch_hed.py --images_dir=/data/to/path/photos/ --hed_mat_dir=/data/to/path/hed_mat_files/ 7 | # The code sometimes crashes after computation is done. Error looks like "Check failed: ... driver shutting down". You can just kill the job. 8 | # For large images, it will produce gpu memory issue. Therefore, you better resize the images before running this script. 9 | # Step 5: run the MATLAB post-processing script "PostprocessHED.m" 10 | 11 | 12 | import numpy as np 13 | from PIL import Image 14 | import os 15 | import argparse 16 | import sys 17 | import scipy.io as sio 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description='batch proccesing: photos->edges') 22 | parser.add_argument('--caffe_root', dest='caffe_root', help='caffe root', default='../../', type=str) 23 | parser.add_argument('--caffemodel', dest='caffemodel', help='caffemodel', default='./hed_pretrained_bsds.caffemodel', type=str) 24 | parser.add_argument('--prototxt', dest='prototxt', help='caffe prototxt file', default='./deploy.prototxt', type=str) 25 | parser.add_argument('--images_dir', dest='images_dir', help='directory to store input photos', type=str) 26 | parser.add_argument('--hed_mat_dir', dest='hed_mat_dir', help='directory to store output hed edges in mat file', type=str) 27 | parser.add_argument('--border', dest='border', help='padding border', type=int, default=128) 28 | parser.add_argument('--gpu_id', dest='gpu_id', help='gpu id', type=int, default=1) 29 | args = parser.parse_args() 30 | return args 31 | 32 | 33 | args = parse_args() 34 | for arg in vars(args): 35 | print('[%s] =' % arg, getattr(args, arg)) 36 | # Make sure that caffe is on the python path: 37 | caffe_root = args.caffe_root # this file is expected to be in {caffe_root}/examples/hed/ 38 | sys.path.insert(0, caffe_root + 'python') 39 | import caffe 40 | 41 | 42 | if not os.path.exists(args.hed_mat_dir): 43 | print('create output directory %s' % args.hed_mat_dir) 44 | os.makedirs(args.hed_mat_dir) 45 | 46 | imgList = os.listdir(args.images_dir) 47 | nImgs = len(imgList) 48 | print('#images = %d' % nImgs) 49 | 50 | caffe.set_mode_gpu() 51 | caffe.set_device(args.gpu_id) 52 | # load net 53 | net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) 54 | # pad border 55 | border = args.border 56 | 57 | for i in range(nImgs): 58 | if i % 500 == 0: 59 | print('processing image %d/%d' % (i, nImgs)) 60 | im = Image.open(os.path.join(args.images_dir, imgList[i])) 61 | 62 | in_ = np.array(im, dtype=np.float32) 63 | in_ = np.pad(in_, ((border, border), (border, border), (0, 0)), 'reflect') 64 | 65 | in_ = in_[:, :, 0:3] 66 | in_ = in_[:, :, ::-1] 67 | in_ -= np.array((104.00698793, 116.66876762, 122.67891434)) 68 | in_ = in_.transpose((2, 0, 1)) 69 | # remove the following two lines if testing with cpu 70 | 71 | # shape for input (data blob is N x C x H x W), set data 72 | net.blobs['data'].reshape(1, *in_.shape) 73 | net.blobs['data'].data[...] = in_ 74 | # run net and take argmax for prediction 75 | net.forward() 76 | fuse = net.blobs['sigmoid-fuse'].data[0][0, :, :] 77 | # get rid of the border 78 | fuse = fuse[border:-border, border:-border] 79 | # save hed file to the disk 80 | name, ext = os.path.splitext(imgList[i]) 81 | sio.savemat(os.path.join(args.hed_mat_dir, name + '.mat'), {'predict': fuse}) 82 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/__init__.py: -------------------------------------------------------------------------------- 1 | """This package includes all the modules related to data loading and preprocessing 2 | 3 | To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset. 4 | You need to implement four functions: 5 | -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). 6 | -- <__len__>: return the size of dataset. 7 | -- <__getitem__>: get a data point from data loader. 8 | -- : (optionally) add dataset-specific options and set default options. 9 | 10 | Now you can use the dataset class by specifying flag '--dataset_mode dummy'. 11 | See our template dataset class 'template_dataset.py' for more details. 12 | """ 13 | import importlib 14 | import torch.utils.data 15 | from data.base_dataset import BaseDataset 16 | 17 | 18 | def find_dataset_using_name(dataset_name): 19 | """Import the module "data/[dataset_name]_dataset.py". 20 | 21 | In the file, the class called DatasetNameDataset() will 22 | be instantiated. It has to be a subclass of BaseDataset, 23 | and it is case-insensitive. 24 | """ 25 | dataset_filename = "data." + dataset_name + "_dataset" 26 | datasetlib = importlib.import_module(dataset_filename) 27 | 28 | dataset = None 29 | target_dataset_name = dataset_name.replace('_', '') + 'dataset' 30 | for name, cls in datasetlib.__dict__.items(): 31 | if name.lower() == target_dataset_name.lower() \ 32 | and issubclass(cls, BaseDataset): 33 | dataset = cls 34 | 35 | if dataset is None: 36 | raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) 37 | 38 | return dataset 39 | 40 | 41 | def get_option_setter(dataset_name): 42 | """Return the static method of the dataset class.""" 43 | dataset_class = find_dataset_using_name(dataset_name) 44 | return dataset_class.modify_commandline_options 45 | 46 | 47 | def create_dataset(opt): 48 | """Create a dataset given the option. 49 | 50 | This function wraps the class CustomDatasetDataLoader. 51 | This is the main interface between this package and 'train.py'/'test.py' 52 | 53 | Example: 54 | >>> from data import create_dataset 55 | >>> dataset = create_dataset(opt) 56 | """ 57 | data_loader = CustomDatasetDataLoader(opt) 58 | dataset = data_loader.load_data() 59 | return dataset 60 | 61 | 62 | class CustomDatasetDataLoader(): 63 | """Wrapper class of Dataset class that performs multi-threaded data loading""" 64 | 65 | def __init__(self, opt): 66 | """Initialize this class 67 | 68 | Step 1: create a dataset instance given the name [dataset_mode] 69 | Step 2: create a multi-threaded data loader. 70 | """ 71 | self.opt = opt 72 | dataset_class = find_dataset_using_name(opt.dataset_mode) 73 | self.dataset = dataset_class(opt) 74 | print("dataset [%s] was created" % type(self.dataset).__name__) 75 | self.dataloader = torch.utils.data.DataLoader( 76 | self.dataset, 77 | batch_size=opt.batch_size, 78 | shuffle=not opt.serial_batches, 79 | num_workers=int(opt.num_threads)) 80 | 81 | def load_data(self): 82 | return self 83 | 84 | def __len__(self): 85 | """Return the number of data in the dataset""" 86 | return min(len(self.dataset), self.opt.max_dataset_size) 87 | 88 | def __iter__(self): 89 | """Return a batch of data""" 90 | for i, data in enumerate(self.dataloader): 91 | if i * self.opt.batch_size >= self.opt.max_dataset_size: 92 | break 93 | yield data 94 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/test.py: -------------------------------------------------------------------------------- 1 | """General-purpose test script for image-to-image translation. 2 | 3 | Once you have trained your model with train.py, you can use this script to test the model. 4 | It will load a saved model from --checkpoints_dir and save the results to --results_dir. 5 | 6 | It first creates model and dataset given the option. It will hard-code some parameters. 7 | It then runs inference for --num_test images and save results to an HTML file. 8 | 9 | Example (You need to train models first or download pre-trained models from our website): 10 | Test a CycleGAN model (both sides): 11 | python test.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan 12 | 13 | Test a CycleGAN model (one side only): 14 | python test.py --dataroot datasets/horse2zebra/testA --name horse2zebra_pretrained --model test --no_dropout 15 | 16 | The option '--model test' is used for generating CycleGAN results only for one side. 17 | This option will automatically set '--dataset_mode single', which only loads the images from one set. 18 | On the contrary, using '--model cycle_gan' requires loading and generating results in both directions, 19 | which is sometimes unnecessary. The results will be saved at ./results/. 20 | Use '--results_dir ' to specify the results directory. 21 | 22 | Test a pix2pix model: 23 | python test.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --direction BtoA 24 | 25 | See options/base_options.py and options/test_options.py for more test options. 26 | See training and test tips at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/tips.md 27 | See frequently asked questions at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/qa.md 28 | """ 29 | import os 30 | from options.test_options import TestOptions 31 | from data import create_dataset 32 | from models import create_model 33 | from util.visualizer import save_images 34 | from util import html 35 | 36 | 37 | if __name__ == '__main__': 38 | opt = TestOptions().parse() # get test options 39 | # hard-code some parameters for test 40 | opt.num_threads = 0 # test code only supports num_threads = 1 41 | opt.batch_size = 1 # test code only supports batch_size = 1 42 | opt.serial_batches = True # disable data shuffling; comment this line if results on randomly chosen images are needed. 43 | opt.no_flip = True # no flip; comment this line if results on flipped images are needed. 44 | opt.display_id = -1 # no visdom display; the test code saves the results to a HTML file. 45 | dataset = create_dataset(opt) # create a dataset given opt.dataset_mode and other options 46 | model = create_model(opt) # create a model given opt.model and other options 47 | model.setup(opt) # regular setup: load and print networks; create schedulers 48 | # create a website 49 | web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.epoch)) # define the website directory 50 | webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.epoch)) 51 | # test with eval mode. This only affects layers like batchnorm and dropout. 52 | # For [pix2pix]: we use batchnorm and dropout in the original pix2pix. You can experiment it with and without eval() mode. 53 | # For [CycleGAN]: It should not affect CycleGAN as CycleGAN uses instancenorm without dropout. 54 | if opt.eval: 55 | model.eval() 56 | for i, data in enumerate(dataset): 57 | if i >= opt.num_test: # only apply our model to opt.num_test images. 58 | break 59 | model.set_input(data) # unpack data from data loader 60 | model.test() # run inference 61 | visuals = model.get_current_visuals() # get image results 62 | img_path = model.get_image_paths() # get image paths 63 | if i % 5 == 0: # save images to an HTML file 64 | print('processing (%04d)-th image... %s' % (i, img_path)) 65 | save_images(webpage, visuals, img_path, aspect_ratio=opt.aspect_ratio, width=opt.display_winsize) 66 | webpage.save() # save the HTML 67 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/util/get_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import tarfile 4 | import requests 5 | from warnings import warn 6 | from zipfile import ZipFile 7 | from bs4 import BeautifulSoup 8 | from os.path import abspath, isdir, join, basename 9 | 10 | 11 | class GetData(object): 12 | """A Python script for downloading CycleGAN or pix2pix datasets. 13 | 14 | Parameters: 15 | technique (str) -- One of: 'cyclegan' or 'pix2pix'. 16 | verbose (bool) -- If True, print additional information. 17 | 18 | Examples: 19 | >>> from util.get_data import GetData 20 | >>> gd = GetData(technique='cyclegan') 21 | >>> new_data_path = gd.get(save_path='./datasets') # options will be displayed. 22 | 23 | Alternatively, You can use bash scripts: 'scripts/download_pix2pix_model.sh' 24 | and 'scripts/download_cyclegan_model.sh'. 25 | """ 26 | 27 | def __init__(self, technique='cyclegan', verbose=True): 28 | url_dict = { 29 | 'pix2pix': 'http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/', 30 | 'cyclegan': 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets' 31 | } 32 | self.url = url_dict.get(technique.lower()) 33 | self._verbose = verbose 34 | 35 | def _print(self, text): 36 | if self._verbose: 37 | print(text) 38 | 39 | @staticmethod 40 | def _get_options(r): 41 | soup = BeautifulSoup(r.text, 'lxml') 42 | options = [h.text for h in soup.find_all('a', href=True) 43 | if h.text.endswith(('.zip', 'tar.gz'))] 44 | return options 45 | 46 | def _present_options(self): 47 | r = requests.get(self.url) 48 | options = self._get_options(r) 49 | print('Options:\n') 50 | for i, o in enumerate(options): 51 | print("{0}: {1}".format(i, o)) 52 | choice = input("\nPlease enter the number of the " 53 | "dataset above you wish to download:") 54 | return options[int(choice)] 55 | 56 | def _download_data(self, dataset_url, save_path): 57 | if not isdir(save_path): 58 | os.makedirs(save_path) 59 | 60 | base = basename(dataset_url) 61 | temp_save_path = join(save_path, base) 62 | 63 | with open(temp_save_path, "wb") as f: 64 | r = requests.get(dataset_url) 65 | f.write(r.content) 66 | 67 | if base.endswith('.tar.gz'): 68 | obj = tarfile.open(temp_save_path) 69 | elif base.endswith('.zip'): 70 | obj = ZipFile(temp_save_path, 'r') 71 | else: 72 | raise ValueError("Unknown File Type: {0}.".format(base)) 73 | 74 | self._print("Unpacking Data...") 75 | obj.extractall(save_path) 76 | obj.close() 77 | os.remove(temp_save_path) 78 | 79 | def get(self, save_path, dataset=None): 80 | """ 81 | 82 | Download a dataset. 83 | 84 | Parameters: 85 | save_path (str) -- A directory to save the data to. 86 | dataset (str) -- (optional). A specific dataset to download. 87 | Note: this must include the file extension. 88 | If None, options will be presented for you 89 | to choose from. 90 | 91 | Returns: 92 | save_path_full (str) -- the absolute path to the downloaded data. 93 | 94 | """ 95 | if dataset is None: 96 | selected_dataset = self._present_options() 97 | else: 98 | selected_dataset = dataset 99 | 100 | save_path_full = join(save_path, selected_dataset.split('.')[0]) 101 | 102 | if isdir(save_path_full): 103 | warn("\n'{0}' already exists. Voiding Download.".format( 104 | save_path_full)) 105 | else: 106 | self._print('Downloading Data...') 107 | url = "{0}/{1}".format(self.url, selected_dataset) 108 | self._download_data(url, save_path=save_path) 109 | 110 | return abspath(save_path_full) 111 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/readme.txt: -------------------------------------------------------------------------------- 1 | 此项目主要引用 2 | **************************************************参考资料********************************************************** 3 | *去水印模型采用gan网络。论文链接: 4 | * https://arxiv.org/pdf/1611.07004.pdf 5 | *参考了GitHub上gan pix2pix 项目,链接为: 6 | https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix#cyclegan-and-pix2pix-in-pytorch 7 | ******************************************************************************************************************** 8 | 在此项目的基础上,做了些许修改,主要更改的地方:训练的时候加入了数据增强,以及一些结果展示上的修改,但是主体基本上来自于该项目 9 | 功能说明:主要用此项目的pix2pix的图片生成方法,去除初赛和复赛中的水印. 10 | 模型训练: 11 | 数据:数据来自我们自己生成的数据集 12 | 参数:在训练和测试的时候,我们对其中的参数进行了调节 13 | 14 | 模型训练详细步骤: 15 | 注:数据集生成方式在数据集生成文件有详细说明 16 | 17 | 生成仿真数据源码地址:https://github.com/Mingtzge/2019-CCF-BDCI-OCR-MCZJ-fake_data_generator 18 | 19 | 复印无效: 数据生成方式见--->2019-CCF-BDCI-OCR-MCZJ-fake_data_generator/chusai_fuyinwuxiao/readme 20 | 数据集1:14万 21 | 数据集2:20万 22 | 数据集3:30+万 23 | 第一轮训练: 24 | 时长估计:3~4天 25 | 数据dir1:数据集1+数据集2 26 | python train.py --dataroot dir1 --name model_for_example_chusai --model pix2pix --direction AtoB --add_contrast --gan_mode lsgan 27 | --crop_size 512 --load_size 512 --niter 20 --niter_decay 10 --lr 0.005 28 | 命令解释: 29 | 具体可在 ./options/base_options.py 和 ./options/train_options.py两个文件中看 30 | --dataroot 数据目录,定位到你解压之后的文件即可,子目录需要包含test val train三个文件夹 31 | --name 实验名 用这个来辨别不同的模型,每次新的训练需要更改一下 32 | 第二轮训练: 33 | 数据dir2: 数据集3 34 | 时长估计14时 35 | python train.py --dataroot dir3 --name model_for_example_chusai --model pix2pix --direction AtoB --add_contrast --gan_mode lsgan 36 | --crop_size 512 --load_size 512 --niter 0 --niter_decay 3 --lr 0.005 --continue_train 37 | 注:此次训练为finetune, 两次的模型名--name 参数应该保持一致 38 | 39 | 40 | 禁止复印:数据生成方式见--->2019-CCF-BDCI-OCR-MCZJ-fake_data_generator/rematch_jinzhifuyin/readme 41 | 数据集4:40+万 42 | 第一轮训练: 43 | 时长估计1~2天 44 | 数据dir3:数据集4 45 | python train.py --dataroot dir3 --name model_for_example_fusai --model pix2pix --direction AtoB --add_contrast --gan_mode lsgan 46 | --crop_size 256 --load_size 256 --niter 2 --niter_decay 5 --lr 0.005 47 | 48 | # gan改进去"禁止复印"水印模型实验流程,在第一轮训练的基础上finetune, 模型名--name 参数应该保持一致 49 | 50 | 1. 全部的训练过程都是使用下面的指令: 51 | 52 | python3 train.py --dataroot dataset --name model_for_example_fusai --model pix2pix --direction AtoB --checkpoints_dir checkpoint_path --add_contrast --gan_mode lsgan --gpu_ids gpu_id --load_size 256 --crop_size 256 --niter iter_num --niter_decay iter_decay_num --input_nc 1 --output_nc 1 --continue_train --lr learning_rate 53 | 54 | 对上面指令中的需要改变的参数进行介绍。 55 | 56 | --dataroot:存放训练数据集的路径,不同的训练阶段需要使用不同的训练集。 57 | 58 | --name:本次训练保存模型的名称,因为整个过程都是在之前的基础上进行微调,所以这个参数与前一阶段训练对应模型的名称是相同的,训练结束后的模型会保存在该命名的文件夹下,并覆盖前一阶段训练的模型。 59 | 60 | --checkpoints_dir: 存放上述模型的文件夹的名称 61 | 62 | --gpu_ids: 指定显卡 63 | 64 | --niter: 65 | 66 | --niter_decay: 67 | 68 | --lr: 此次训练的起始学习率 69 | 70 | 下面给出不同阶段训练对应的上述参数。 71 | 72 | 2. 在815张图片上使用水印平移方法生成4万张图片,水印平移程序第345行传入的第二个参数设置为2,生成的数据分为train、val、test三个部分存放在dir_1文件夹中(dir_1仅做示意用),使用的水印模板是水印平移程序文件夹下的roi_2.jpg,将水印平移程序第332行template_file修改为roi_2.jpg,在之前的模型上进行优化(使用finetune_model名称来示意,该文件夹保存在与训练程序相同层级的model文件夹下)。 73 | 74 | --dataset: dir_1 75 | 76 | --name: finetune_model 77 | 78 | --chechpoints_dir: model 79 | 80 | --niter: 5 81 | 82 | --niter_decay: 5 83 | 84 | --lr: 0.005 85 | 86 | 3. 在815张图片上使用水印平移方法生成2万余张图片,水印平移程序第345行传入的第二个参数设置为1,生成的数据分为train、val、test三个部分存放在dir_2文件夹中(dir_2仅做示意用),使用的水印模板是水印平移程序文件夹下的roi.jpg,将水印平移程序第332行template_file修改为roi.jpg,在之前的模型上进行优化(使用finetune_model名称来示意,该文件夹保存在与训练程序相同层级的model文件夹下)。 87 | 88 | --dataset: dir_2 89 | 90 | --name: finetune_model 91 | 92 | --chechpoints_dir: model 93 | 94 | --niter: 5 95 | 96 | --niter_decay: 5 97 | 98 | --lr: 0.001 99 | 100 | 4. 在经过清洗的596张图片上使用水印平移方法生成2万余张图片,水印平移程序第345行传入的第二个参数设置为1,生成的数据分为train、val、test三个部分存放在dir_3文件夹中(dir_3仅做示意用),使用的水印模板是水印平移程序文件夹下的roi.jpg,将水印平移程序第332行template_file修改为roi.jpg,在之前的模型上进行优化(使用finetune_model名称来示意,该文件夹保存在与训练程序相同层级的model文件夹下)。 101 | 102 | --dataset: dir_3 103 | 104 | --name: finetune_model 105 | 106 | --chechpoints_dir: model 107 | 108 | --niter: 2 109 | 110 | --niter_decay: 1 111 | 112 | --lr: 0.001 113 | 114 | 5. 使用不同的生成水印图片的方法,生成了14万张训练图片,存放在dir_4文件夹下,在之前的模型上进行优化(使用finetune_model名称来示意,该文件夹保存在与训练程序相同层级的model文件夹下)。 115 | 116 | --dataset: dir_4 117 | 118 | --name: finetune_model 119 | 120 | --chechpoints_dir: model 121 | 122 | --niter: 5 123 | 124 | --niter_decay: 5 125 | 126 | --lr: 0.001 127 | 128 | 6. 从上一阶段使用到的14万张图片中随机选择4万多张图片,使用水印平移程序生成4万多张图片,水印平移程序第345行传入的第二个参数设置为1,生成的数据分为train、val、test三个部分存放在dir_5文件夹中(dir_5仅做示意用),使用的水印模板是水印平移程序文件夹下的roi.jpg,将水印平移程序第332行template_file修改为roi.jpg,对两部分数据进行融合得到了总共9万3千多张训练图片,随val、test存放在dir_5文件夹下,在上一步的基础上进行finetune。 129 | 130 | --dataset: dir_5 131 | 132 | --name: finetune_model 133 | 134 | --chechpoints_dir: model 135 | 136 | --niter: 5 137 | 138 | --niter_decay: 5 139 | 140 | --lr: 0.0001 141 | 142 | 143 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/train.py: -------------------------------------------------------------------------------- 1 | """General-purpose training script for image-to-image translation. 2 | 3 | This script works for various models (with option '--model': e.g., pix2pix, cyclegan, colorization) and 4 | different datasets (with option '--dataset_mode': e.g., aligned, unaligned, single, colorization). 5 | You need to specify the dataset ('--dataroot'), experiment name ('--name'), and model ('--model'). 6 | 7 | It first creates model, dataset, and visualizer given the option. 8 | It then does standard network training. During the training, it also visualize/save the images, print/save the loss plot, and save models. 9 | The script supports continue/resume training. Use '--continue_train' to resume your previous training. 10 | 11 | Example: 12 | Train a CycleGAN model: 13 | python train.py --dataroot ./datasets/maps --name maps_cyclegan --model cycle_gan 14 | Train a pix2pix model: 15 | python train.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --direction BtoA 16 | 17 | See options/base_options.py and options/train_options.py for more training options. 18 | See training and test tips at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/tips.md 19 | See frequently asked questions at: https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/docs/qa.md 20 | """ 21 | import time 22 | from options.train_options import TrainOptions 23 | from data import create_dataset 24 | from models import create_model 25 | from util.visualizer import Visualizer 26 | 27 | if __name__ == '__main__': 28 | opt = TrainOptions().parse() # get training options 29 | dataset = create_dataset(opt) # create a dataset given opt.dataset_mode and other options 30 | dataset_size = len(dataset) # get the number of images in the dataset. 31 | print('The number of training images = %d' % dataset_size) 32 | 33 | model = create_model(opt) # create a model given opt.model and other options 34 | model.setup(opt) # regular setup: load and print networks; create schedulers 35 | visualizer = Visualizer(opt) # create a visualizer that display/save images and plots 36 | total_iters = 0 # the total number of training iterations 37 | 38 | for epoch in range(opt.epoch_count, opt.niter + opt.niter_decay + 1): # outer loop for different epochs; we save the model by , + 39 | epoch_start_time = time.time() # timer for entire epoch 40 | iter_data_time = time.time() # timer for data loading per iteration 41 | epoch_iter = 0 # the number of training iterations in current epoch, reset to 0 every epoch 42 | 43 | for i, data in enumerate(dataset): # inner loop within one epoch 44 | iter_start_time = time.time() # timer for computation per iteration 45 | if total_iters % opt.print_freq == 0: 46 | t_data = iter_start_time - iter_data_time 47 | visualizer.reset() 48 | total_iters += opt.batch_size 49 | epoch_iter += opt.batch_size 50 | model.set_input(data) # unpack data from dataset and apply preprocessing 51 | model.optimize_parameters() # calculate loss functions, get gradients, update network weights 52 | 53 | if total_iters % opt.display_freq == 0: # display images on visdom and save images to a HTML file 54 | save_result = total_iters % opt.update_html_freq == 0 55 | model.compute_visuals() 56 | #visualizer.display_current_results(model.get_current_visuals(), epoch, save_result) 57 | 58 | if total_iters % opt.print_freq == 0: # print training losses and save logging information to the disk 59 | losses = model.get_current_losses() 60 | t_comp = (time.time() - iter_start_time) / opt.batch_size 61 | visualizer.print_current_losses(epoch, epoch_iter, losses, t_comp, t_data) 62 | #if opt.display_id > 0: 63 | #visualizer.plot_current_losses(epoch, float(epoch_iter) / dataset_size, losses) 64 | 65 | if total_iters % opt.save_latest_freq == 0: # cache our latest model every iterations 66 | print('saving the latest model (epoch %d, total_iters %d)' % (epoch, total_iters)) 67 | save_suffix = 'iter_%d' % total_iters if opt.save_by_iter else 'latest' 68 | model.save_networks(save_suffix) 69 | 70 | iter_data_time = time.time() 71 | if epoch % opt.save_epoch_freq == 0: # cache our model every epochs 72 | print('saving the model at the end of epoch %d, iters %d' % (epoch, total_iters)) 73 | model.save_networks('latest') 74 | model.save_networks(epoch) 75 | 76 | print('End of epoch %d / %d \t Time Taken: %d sec' % (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time)) 77 | model.update_learning_rate() # update learning rates at the end of every epoch. 78 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/docs/datasets.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### CycleGAN Datasets 4 | Download the CycleGAN datasets using the following script. Some of the datasets are collected by other researchers. Please cite their papers if you use the data. 5 | ```bash 6 | bash ./datasets/download_cyclegan_dataset.sh dataset_name 7 | ``` 8 | - `facades`: 400 images from the [CMP Facades dataset](http://cmp.felk.cvut.cz/~tylecr1/facade). [[Citation](../datasets/bibtex/facades.tex)] 9 | - `cityscapes`: 2975 images from the [Cityscapes training set](https://www.cityscapes-dataset.com). [[Citation](../datasets/bibtex/cityscapes.tex)]. Note: Due to license issue, we cannot directly provide the Cityscapes dataset. Please download the Cityscapes dataset from [https://cityscapes-dataset.com](https://cityscapes-dataset.com) and use the script `./datasets/prepare_cityscapes_dataset.py`. 10 | - `maps`: 1096 training images scraped from Google Maps. 11 | - `horse2zebra`: 939 horse images and 1177 zebra images downloaded from [ImageNet](http://www.image-net.org) using keywords `wild horse` and `zebra` 12 | - `apple2orange`: 996 apple images and 1020 orange images downloaded from [ImageNet](http://www.image-net.org) using keywords `apple` and `navel orange`. 13 | - `summer2winter_yosemite`: 1273 summer Yosemite images and 854 winter Yosemite images were downloaded using Flickr API. See more details in our paper. 14 | - `monet2photo`, `vangogh2photo`, `ukiyoe2photo`, `cezanne2photo`: The art images were downloaded from [Wikiart](https://www.wikiart.org/). The real photos are downloaded from Flickr using the combination of the tags *landscape* and *landscapephotography*. The training set size of each class is Monet:1074, Cezanne:584, Van Gogh:401, Ukiyo-e:1433, Photographs:6853. 15 | - `iphone2dslr_flower`: both classes of images were downlaoded from Flickr. The training set size of each class is iPhone:1813, DSLR:3316. See more details in our paper. 16 | 17 | To train a model on your own datasets, you need to create a data folder with two subdirectories `trainA` and `trainB` that contain images from domain A and B. You can test your model on your training set by setting `--phase train` in `test.py`. You can also create subdirectories `testA` and `testB` if you have test data. 18 | 19 | You should **not** expect our method to work on just any random combination of input and output datasets (e.g. `cats<->keyboards`). From our experiments, we find it works better if two datasets share similar visual content. For example, `landscape painting<->landscape photographs` works much better than `portrait painting <-> landscape photographs`. `zebras<->horses` achieves compelling results while `cats<->dogs` completely fails. 20 | 21 | ### pix2pix datasets 22 | Download the pix2pix datasets using the following script. Some of the datasets are collected by other researchers. Please cite their papers if you use the data. 23 | ```bash 24 | bash ./datasets/download_pix2pix_dataset.sh dataset_name 25 | ``` 26 | - `facades`: 400 images from [CMP Facades dataset](http://cmp.felk.cvut.cz/~tylecr1/facade). [[Citation](../datasets/bibtex/facades.tex)] 27 | - `cityscapes`: 2975 images from the [Cityscapes training set](https://www.cityscapes-dataset.com). [[Citation](../datasets/bibtex/cityscapes.tex)] 28 | - `maps`: 1096 training images scraped from Google Maps 29 | - `edges2shoes`: 50k training images from [UT Zappos50K dataset](http://vision.cs.utexas.edu/projects/finegrained/utzap50k). Edges are computed by [HED](https://github.com/s9xie/hed) edge detector + post-processing. [[Citation](datasets/bibtex/shoes.tex)] 30 | - `edges2handbags`: 137K Amazon Handbag images from [iGAN project](https://github.com/junyanz/iGAN). Edges are computed by [HED](https://github.com/s9xie/hed) edge detector + post-processing. [[Citation](datasets/bibtex/handbags.tex)] 31 | - `night2day`: around 20K natural scene images from [Transient Attributes dataset](http://transattr.cs.brown.edu/) [[Citation](datasets/bibtex/transattr.tex)]. To train a `day2night` pix2pix model, you need to add `--direction BtoA`. 32 | 33 | We provide a python script to generate pix2pix training data in the form of pairs of images {A,B}, where A and B are two different depictions of the same underlying scene. For example, these might be pairs {label map, photo} or {bw image, color image}. Then we can learn to translate A to B or B to A: 34 | 35 | Create folder `/path/to/data` with subfolders `A` and `B`. `A` and `B` should each have their own subfolders `train`, `val`, `test`, etc. In `/path/to/data/A/train`, put training images in style A. In `/path/to/data/B/train`, put the corresponding images in style B. Repeat same for other data splits (`val`, `test`, etc). 36 | 37 | Corresponding images in a pair {A,B} must be the same size and have the same filename, e.g., `/path/to/data/A/train/1.jpg` is considered to correspond to `/path/to/data/B/train/1.jpg`. 38 | 39 | Once the data is formatted this way, call: 40 | ```bash 41 | python datasets/combine_A_and_B.py --fold_A /path/to/data/A --fold_B /path/to/data/B --fold_AB /path/to/data 42 | ``` 43 | 44 | This will combine each pair of images (A,B) into a single image file, ready for training. 45 | -------------------------------------------------------------------------------- /main_process.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | 输出结果为: 4 | CCFTestResultFixValidData_release.csv 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import os 10 | import time 11 | 12 | sys.path.append('./') 13 | 14 | from cut_twist_process import cut_twist_join # 预处理,将身份证正反面从原始图片切分出来并旋转 15 | from recognize_process.tools import mytest_crnn, test_crnn_jmz 16 | from watermask_remover_and_split_data.watermask_process import WatermarkRemover 17 | from data_correction_and_generate_csv_file.generate_test_csv_file import generate_csv 18 | 19 | 20 | def recoginze_init_args(): 21 | """ 22 | 初始化识别过程需要的参数 23 | :return: None 24 | """ 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('-rc_w', '--recognize_weights_path', type=str, 27 | help='Path to the pre-trained weights to use', 28 | default='./recognize_process/model_save/recognize_model') 29 | parser.add_argument('-rc_c', '--recognize_char_dict_path', type=str, 30 | help='Directory where character dictionaries for the dataset were stored', 31 | default='./recognize_process/char_map/char_map.json') 32 | parser.add_argument('-rc_i', '--recognize_image_path', type=str, 33 | help='Path to the image to be tested', 34 | default='./recognize_process/test_imgs/') 35 | parser.add_argument('-rc_t', '--recognize_txt_path', type=str, 36 | help='Whether to display images', 37 | default='./recognize_process/image_list.txt') 38 | parser.add_argument("--no_gen_data_chu", action="store_true", help="generate chusai new test data") 39 | parser.add_argument("--no_gen_data_fu", action="store_true", help="generate fusai new test data") 40 | parser.add_argument("--no_preprocessed", action="store_true", help="if preprocessed test data") 41 | parser.add_argument("--no_gan_test", action="store_true", help="test data with gan model") 42 | parser.add_argument("--no_gan_test_rematch", action="store_true", help="test rematch data with gan model") 43 | parser.add_argument("--no_rec_img", action="store_true", help="if recover img") 44 | parser.add_argument("--no_rec_img_rematch", action="store_true", help="if recover img") 45 | parser.add_argument("--no_test_data", action="store_true", help="if generate test data") 46 | parser.add_argument("--no_fix_img", action="store_true", help="if fix img of address and unit") 47 | parser.add_argument("--no_gen_txts", action="store_true", help="if txt files for recognize") 48 | parser.add_argument("--debug", action="store_true", help="if debug") 49 | parser.add_argument("--gan_chu", default="chusai_watermask_remover_model", help="model name of chusai") 50 | parser.add_argument("--gan_fu", default="fusai_watermask_remover_model", help="model name of fusai") 51 | parser.add_argument("--pool_num", default=-1, help="the number of threads for process data") 52 | parser.add_argument("--test_data_dir", required=True, help="the dir of test data") 53 | parser.add_argument("--test_experiment_name", required=True, help="the dir of test data") 54 | parser.add_argument("--gan_ids", required=True, help="-1 for cpu, 0 or 0,1.. for GPU") 55 | 56 | return parser.parse_args() 57 | 58 | 59 | if __name__ == '__main__': 60 | args = recoginze_init_args() 61 | origin_img_path = args.test_data_dir 62 | time_log = time.strftime("%y_%m_%d_%H_%M_%S") 63 | header_dir = os.path.join("./data_temp", args.test_experiment_name + "_" + time_log) 64 | if not os.path.exists(header_dir): 65 | os.makedirs(header_dir) 66 | cut_twisted_save_path = os.path.join(header_dir, 'data_cut_twist') # 切分、旋转后数据保存路径 67 | cut_twist_template_names = ['./cut_twist_process/template/fan_blurred_fan.jpg', # 0 反面反 68 | './cut_twist_process/template/fan_blurred_zheng.jpg', # 1 反面正 69 | './cut_twist_process/template/zheng_blurred_fan.jpg', # 2 正面反 70 | './cut_twist_process/template/zheng_blurred_zheng.jpg', # 3 正面正 71 | './cut_twist_process/template/zheng_new.jpg', # 4 新水印正面 72 | './cut_twist_process/template/fan_new.jpg' # 5 新水印反面 73 | ] # 模板图片路径 74 | # 切分身份证 75 | cut_twist_join.process_cut_twist_imgs(img_path=origin_img_path, template_names=cut_twist_template_names, 76 | save_path=cut_twisted_save_path, norm_parm=[0.95, 0.95, 0.7, 0.7]) 77 | # 去水印和对图片进行切割和处理 78 | watermask_handler = WatermarkRemover(args, header_dir, cut_twisted_save_path) 79 | watermask_handler.watermask_remover_run() 80 | recognize_image_path = os.path.join(header_dir, "test_data_preprocessed") 81 | recognize_txt_path = os.path.join(header_dir, "test_data_txts") 82 | test_crnn_jmz.recognize_jmz(image_path=recognize_image_path, weights_path=args.recognize_weights_path, 83 | char_dict_path=args.recognize_char_dict_path, txt_file_path=recognize_txt_path) 84 | origin_watermask_removed_img_path = os.path.join(header_dir, "recover_image_fu_dir") 85 | generate_csv(origin_watermask_removed_img_path, recognize_txt_path, "./") 86 | -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/fix_img_address_unit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | from multiprocessing import Pool 5 | 6 | type_7 = [[(0, 0), (209, 21)], [(5, 21), (209, 42)], [(5, 42), (209, 63)]] 7 | type_9 = [[(0, 0), (229, 20)], [(5, 19), (229, 39)]] 8 | 9 | 10 | def preprocess_img(img, name): 11 | resize_img = cv2.resize(img, (int(2.0 * img.shape[1]), int(2.0 * img.shape[0])), interpolation=cv2.INTER_CUBIC) 12 | # 放大两倍,更容易识别 13 | resize_img = cv2.convertScaleAbs(resize_img, alpha=0.35, beta=20) 14 | resize_img = cv2.normalize(resize_img, dst=None, alpha=300, beta=10, norm_type=cv2.NORM_MINMAX) 15 | img_blurred = cv2.medianBlur(resize_img, 7) # 中值滤波 16 | img_blurred = cv2.medianBlur(img_blurred, 3) 17 | # 这里面的几个参数,alpha,beta都可以调节,目前感觉效果还行,但是应该还可以调整地更好 18 | 19 | return img_blurred 20 | 21 | 22 | def detect_fn(img, img_name, img_save_path): 23 | resize_img = cv2.resize(img, (int(2.0 * img.shape[1]), int(2.0 * img.shape[0])), interpolation=cv2.INTER_CUBIC) 24 | img = preprocess_img(img, img_name) 25 | # cv2.imwrite(img_save_path + img_name + '_processed.jpg', img) 26 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 27 | sobel = cv2.Sobel(gray, cv2.CV_8U, 1, 0, ksize=3) 28 | # 二值化 29 | ret, binary = cv2.threshold(sobel, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY) 30 | element1 = cv2.getStructuringElement(cv2.MORPH_RECT, (16, 6)) 31 | element2 = cv2.getStructuringElement(cv2.MORPH_RECT, (13, 4)) # 两个参数可调 32 | # 膨胀一次,让轮廓突出 33 | dilation = cv2.dilate(binary, element2, iterations=1) 34 | # 腐蚀一次,去掉细节,如表格线等。注意这里去掉的是竖直的线 35 | erosion = cv2.erode(dilation, element1, iterations=1) 36 | dilation2 = cv2.dilate(erosion, element2, iterations=2) 37 | 38 | # cv2.imwrite(img_save_path + img_name + '_dilation.jpg', dilation2) 39 | 40 | region = [] 41 | # 查找轮廓 42 | contours, hierarchy = cv2.findContours(dilation2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 43 | # 利用以上函数可以得到多个轮廓区域,存在一个列表中。 44 | # 筛选那些面积小的 45 | for i in range(len(contours)): 46 | cnt = contours[i] 47 | area = cv2.contourArea(cnt) 48 | if (area < 50): 49 | continue 50 | # 找到最小的矩形,该矩形可能有方向 51 | rect = cv2.minAreaRect(cnt) 52 | # box是四个点的坐标 53 | box = cv2.boxPoints(rect) 54 | box = np.int0(box) 55 | # 计算高和宽 56 | height = abs(box[0][1] - box[2][1]) 57 | width = abs(box[0][0] - box[2][0]) 58 | # 筛选那些太细的矩形,留下扁的 59 | if 25 < height < 80 and width > 25 and height < width * 1.3: 60 | region.append(box) 61 | max_x = 0 62 | for box in region: # 每个box是左下,左上,右上,右下坐标 63 | for box_p in box: 64 | if box_p[0] > max_x: 65 | max_x = box_p[0] 66 | h, w, c = resize_img.shape 67 | return resize_img[0:h, 0:min(max_x + 50, w)] 68 | 69 | 70 | def merge_img(img_path, points): 71 | """ 72 | :param img_path: 图片路径 73 | :param points: 元素坐标 74 | :return: 合并之后的图片 75 | 描述: 三行的地址数据和两行的签发机关转换成一行 76 | """ 77 | img = cv2.imread(img_path) 78 | img_count = len(points) 79 | # 根据切割点对图片进行切割和拼接 80 | image3 = np.hstack([img[points[0][0][1]:points[0][1][1], points[0][0][0]:points[0][1][0]], 81 | img[points[1][0][1]:points[1][1][1], points[1][0][0]:points[1][1][0]]]) 82 | if img_count == 3: 83 | image3 = np.hstack([image3, img[points[2][0][1]:points[2][1][1], points[2][0][0]:points[2][1][0]]]) 84 | return image3 85 | 86 | 87 | def fix_address_unit(test_data_path, save_data_path, pool_num): 88 | """ 89 | :param test_data_path: 测试集元素切割完之后的数据,从中提取地址和签发机关的数据进行水平拼接 90 | :param save_data_path: 处理之后的保存路径 91 | :param pool_num: 进程数量 92 | """ 93 | if not os.path.exists(save_data_path): 94 | os.makedirs(save_data_path) 95 | if not os.path.exists(test_data_path): 96 | print("not exist train data ,exit...") 97 | return 98 | train_img_names = os.listdir(test_data_path) 99 | params = [] 100 | for index, train_img_name in enumerate(train_img_names): 101 | params.append((train_img_name, test_data_path, save_data_path)) 102 | if pool_num > 0: 103 | pool = Pool(pool_num) 104 | pool.map(pre_run, params) 105 | pool.close() 106 | pool.join() 107 | else: 108 | for param in params: 109 | pre_run(param) 110 | 111 | 112 | def pre_run(params): 113 | run(params[0], params[1], params[2]) 114 | 115 | 116 | def run(train_img_name, test_data_path, save_data_path): 117 | """ 118 | :param train_img_name: 处理的图片名 119 | :param test_data_path: 图片父目录 120 | :param save_data_path: 备份路径(保存处理之前的数据) 121 | """ 122 | if train_img_name[-5] in ["9", "7"]: 123 | os.system( 124 | "cp %s %s" % (os.path.join(test_data_path, train_img_name), os.path.join(save_data_path, train_img_name))) 125 | p = type_9 126 | if train_img_name[-5] == "7": 127 | p = type_7 128 | try: 129 | new_img = merge_img(os.path.join(test_data_path, train_img_name), p) # 切割图片 130 | img = detect_fn(new_img, train_img_name, test_data_path) # 处理图片,过滤空白部分,避免造成干扰 131 | cv2.imwrite(os.path.join(test_data_path, train_img_name), img) # 保存处理之后的图片 132 | except Exception: 133 | print("修复图片出错:--->", train_img_name) 134 | -------------------------------------------------------------------------------- /watermask_remover_and_split_data/tools/split_img_generate_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from multiprocessing import Pool 3 | import cv2 4 | 5 | # 身份证上面各个元素的准确坐标,长宽,序号,用于从图片企鹅个 6 | issuing_unit = { 7 | "x_d": 167, 8 | "y_d": 191, 9 | "w": 230, 10 | "h": 40, 11 | "index": 9 12 | } 13 | effective_data = { 14 | "x_d": 167, 15 | "y_d": 227, 16 | "w": 192, 17 | "h": 19, 18 | "index": 10 19 | } 20 | name = { 21 | "x_d": 85, 22 | "y_d": 39, 23 | "w": 106, 24 | "h": 24, 25 | "index": 1 26 | } 27 | gender = { 28 | "x_d": 87, 29 | "y_d": 72, 30 | "w": 24, 31 | "h": 24, 32 | "index": 3 33 | } 34 | nationality = { 35 | "x_d": 185, 36 | "y_d": 72, 37 | "w": 121, 38 | "h": 25, 39 | "index": 2 40 | } 41 | birthday_year = { 42 | "x_d": 84, 43 | "y_d": 105, 44 | "w": 47, 45 | "h": 21, 46 | "index": 4 47 | } 48 | birthday_month = { 49 | "x_d": 147, 50 | "y_d": 105, 51 | "w": 31, 52 | "h": 23, 53 | "index": 5 54 | } 55 | birthday_day = { 56 | "x_d": 198, 57 | "y_d": 105, 58 | "w": 29, 59 | "h": 22, 60 | "index": 6 61 | } 62 | address = { 63 | "x_d": 82, 64 | "y_d": 138, 65 | "w": 210, 66 | "h": 64, 67 | "index": 7 68 | } 69 | id_card = { 70 | "x_d": 131, 71 | "y_d": 221, 72 | "w": 246, 73 | "h": 24, 74 | "index": 8 75 | } 76 | 77 | 78 | def match_img(image, template, value): 79 | """ 80 | :param image: 图片 81 | :param template: 模板 82 | :param value: 阈值 83 | :return: 水印坐标 84 | 描述:用于获得这幅图片模板对应的位置坐标,用途:校准元素位置信息 85 | """ 86 | res = cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) 87 | threshold = value 88 | min_v, max_v, min_pt, max_pt = cv2.minMaxLoc(res) 89 | if max_v < threshold: 90 | return False 91 | if not max_pt[0] in range(10, 40) or max_pt[1] > 20: 92 | return False 93 | return max_pt 94 | 95 | 96 | def crop_img(mark_point, args, ori_img, save_path, seq, label, type_c): 97 | """ 98 | :param mark_point: 信标点 99 | :param args: 元素相关参数,坐标,长宽 100 | :param ori_img: 图片 101 | :param save_path: 切割之后的元素保存路径 102 | :param seq: 序号 103 | :param label: 标记 104 | :param type_c: 类型(没有用到) 105 | :return: 106 | """ 107 | try: 108 | x_p = mark_point[0] + args["x_d"] 109 | y_p = mark_point[1] + args["y_d"] 110 | c_img = ori_img[y_p:y_p + args["h"], x_p: x_p + args["w"]] 111 | c_img_save_path = os.path.join(save_path, "%s_%s_%s.jpg" % (str(seq), label, str(args["index"]))) 112 | cv2.imwrite(c_img_save_path, c_img) 113 | except(): 114 | print("crop except") 115 | return 116 | 117 | 118 | def generate_data(ori_img_path, template, save_path, flag, thr_value, seq, label, type_c): 119 | """ 120 | :param ori_img_path: 图片路径 121 | :param template: 模板 122 | :param save_path: 保存路径 123 | :param flag: 正反面标记 124 | :param thr_value: 匹配阈值 125 | :param seq: 序号 126 | :param label: 标记 127 | :param type_c: 类型 128 | :return: 无 129 | """ 130 | ori_img = cv2.imread(ori_img_path) 131 | ori_img = cv2.cvtColor(ori_img, cv2.COLOR_RGB2GRAY) 132 | mark_point = match_img(ori_img, template, thr_value) # 获取各个元素的参考坐标 133 | if mark_point is False: 134 | print(" failed") 135 | return 136 | mark_point = (max(0, mark_point[0] - 20), mark_point[1]) 137 | if flag == "0": 138 | # 截取背面两种元素 139 | crop_img(mark_point, issuing_unit, ori_img, save_path, seq, label, type_c) 140 | crop_img(mark_point, effective_data, ori_img, save_path, seq, label, type_c) 141 | else: 142 | # 截取正面两种元素 143 | crop_img(mark_point, name, ori_img, save_path, seq, label, type_c) 144 | crop_img(mark_point, gender, ori_img, save_path, seq, label, type_c) 145 | crop_img(mark_point, birthday_year, ori_img, save_path, seq, label, type_c) 146 | crop_img(mark_point, birthday_month, ori_img, save_path, seq, label, type_c) 147 | crop_img(mark_point, birthday_day, ori_img, save_path, seq, label, type_c) 148 | crop_img(mark_point, address, ori_img, save_path, seq, label, type_c) 149 | crop_img(mark_point, id_card, ori_img, save_path, seq, label, type_c) 150 | crop_img(mark_point, nationality, ori_img, save_path, seq, label, type_c) 151 | 152 | 153 | def run_gen_test_data(final_save_path, template_base_path, origin_img_path, pool_num): 154 | """ 155 | :param final_save_path: 保存路径 156 | :param template_base_path: 模板图片路径 157 | :param origin_img_path: 待切割图片的路径 158 | :param pool_num: 进程数量 159 | :return: 无 160 | """ 161 | template_img = cv2.imread(template_base_path, 0) 162 | img_names = os.listdir(origin_img_path) 163 | if not os.path.exists(final_save_path): 164 | os.makedirs(final_save_path) 165 | pool = Pool(1) 166 | if pool_num > 0: 167 | pool = Pool(pool_num) 168 | for count, img_name in enumerate(img_names): 169 | img_path = os.path.join(origin_img_path, img_name) 170 | names = img_name.split("_") 171 | if pool_num > 0: 172 | pool.apply_async(generate_data,(img_path, template_img, final_save_path, names[1][0], 0.2, count, img_name[:-4], "Test", )) 173 | else: 174 | generate_data(img_path, template_img, final_save_path, names[1][0], 0.2, count, img_name[:-4], "Test") 175 | pool.close() 176 | pool.join() 177 | -------------------------------------------------------------------------------- /cut_twist_process/cut_twist_join.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 19-11-19 22:10 3 | # @Author : Miao Wenqiang 4 | # @Reference : None 5 | # @File : cut_twist_join.py 6 | # @IDE : PyCharm Community Edition 7 | """ 8 | 将身份证正反面从原始图片中切分出来,如果方向不正确则旋转。使用传统图像处理方法实现。 9 | 需要的参数有: 10 | 1.图片所在路径。 11 | 2.图片处理结果保存路径 12 | 13 | 输出结果为: 14 | 切分和旋转后的身份证正反面图片。 15 | 对于图片A.jpg,输出A_0.jpg和A_1.jpg,其中A_0.jpg表示身份证反面(国徽面),A_1.jpg表示身份证正面 16 | """ 17 | 18 | 19 | import cv2 20 | import os 21 | from cut_twist_process import cut_part 22 | from cut_twist_process import twist_part 23 | 24 | 25 | def preprecess_cut_twist_one_img(img_path, img_name, template_list, save_path, norm_parm): 26 | """ 27 | 函数用于处理单张原始图片,切分处身份证正反面并旋转,然后写入到指定目录。 28 | :param img_path: 图片所在路径 29 | :param img_name: 图片名 30 | :param template_list: 保存有模板名的list 31 | :param save_path: 结果保存路径 32 | :param norm_parm: 归一化超参数 33 | :return: None 34 | """ 35 | error_temp, res_bbox = cut_part.preprocess_cut_one_img(img_path, img_name) 36 | # 裁剪出身份证正反面,error_temp = 0,表示裁剪部分未出错,其余表示出错;res_bbox是身份证区域 37 | if error_temp == 0: 38 | flag_judge1, img_rgb_res1, mode_type1, mode_value1 = twist_part.preprocess_twist_one_img(res_bbox[0], template_list, norm_parm=norm_parm) 39 | # 旋转图片 40 | flag_judge2, img_rgb_res2, mode_type2, mode_value2 = twist_part.preprocess_twist_one_img(res_bbox[1], template_list, norm_parm=norm_parm) 41 | if mode_type1 != mode_type2: # 证明这一对图片处理的没错 42 | res_img_name1 = img_name.split('.')[0] + '_' + str(mode_type1) + '.jpg' # 保存时的图片名 43 | res_img_name2 = img_name.split('.')[0] + '_' + str(mode_type2) + '.jpg' 44 | cv2.imwrite(os.path.join(save_path, res_img_name1), img_rgb_res1) # 写入 45 | cv2.imwrite(os.path.join(save_path, res_img_name2), img_rgb_res2) 46 | elif mode_value1 > mode_value2: # 如果识别两个图片同为正面或反面,则判定匹配结果最大的为正确,另一个为另一面 47 | # print('img {name} was wrong when twist,start correct program'.format(name=img_name)) 测试用,打印出错信息 48 | res_img_name1 = img_name.split('.')[0] + '_' + str(mode_type1) + '.jpg' 49 | res_img_name2 = img_name.split('.')[0] + '_' + str(abs(1 - mode_type1)) + '.jpg' 50 | cv2.imwrite(os.path.join(save_path, res_img_name1), img_rgb_res1) 51 | cv2.imwrite(os.path.join(save_path, res_img_name2), img_rgb_res2) 52 | else: 53 | # print('img {name} was wrong when twist,start correct program'.format(name=img_name)) 54 | res_img_name1 = img_name + '_' + str(1 - mode_type2) + '.jpg' 55 | res_img_name2 = img_name + '_' + str(mode_type2) + '.jpg' 56 | cv2.imwrite(os.path.join(save_path, res_img_name1), img_rgb_res1) 57 | cv2.imwrite(os.path.join(save_path, res_img_name2), img_rgb_res2) 58 | else: # 处理出错的情况,直接放弃,但要保证处理的结果仍是正反两张图 59 | img = cv2.imread(os.path.join(img_path, img_name)) 60 | res_img_name1 = img_name + '_0.jpg' 61 | res_img_name2 = img_name + '_1.jpg' 62 | img_rgb_res1 = cv2.resize(img, (450, 290)) # 仍缩放到身份证区域大小 63 | img_rgb_res2 = cv2.resize(img, (490, 290)) 64 | cv2.imwrite(os.path.join(save_path, res_img_name1), img_rgb_res1) 65 | cv2.imwrite(os.path.join(save_path, res_img_name2), img_rgb_res2) 66 | 67 | return 68 | 69 | 70 | def process_cut_twist_imgs(img_path, template_names, save_path, norm_parm): 71 | """ 72 | 批量处理目录下的所有原始图片,裁剪出身份证正反面,旋转并保存 73 | :param img_path: 原始图片所在路径 74 | :param template_names: 模板地址的列表 75 | :param save_path: 身份证正反面保存的目录 76 | :param norm_parm: 归一化参数 77 | :return: None 78 | """ 79 | if not os.path.exists(img_path): # 判断图片路径是否存在 80 | print('img path {name} is not exits, please check again!'.format(name=img_path)) 81 | return 82 | if not os.path.exists(save_path): # 保存路径不存在,则创建路径 83 | os.makedirs(save_path) 84 | 85 | img_names = os.listdir(img_path) # 列出路径下所有需要处理的图片名 86 | img_names.sort() # 排序,至关重要 87 | 88 | template_list = [] 89 | for template_name in template_names: # 读取模板图片 90 | template_list.append(cv2.imread(template_name, 0)) 91 | 92 | for img_name in img_names: # 依次处理 93 | preprecess_cut_twist_one_img(img_path=img_path, img_name=img_name, 94 | template_list=template_list, save_path=save_path, norm_parm=norm_parm) 95 | 96 | return 97 | 98 | 99 | 100 | 101 | if __name__ == '__main__': 102 | origin_img_path = 'E:/Python/IDCARD/data_fusai/test/' # 数据集存放路径 103 | cut_twisted_save_path = './res_fusai_test/' # 数据集保存路径 104 | cut_twist_template_names = ['./cut_twist_process/template/fan_blurred_fan.jpg', # 0 反面反 105 | './cut_twist_process/template/fan_blurred_zheng.jpg', # 1 反面正 106 | './cut_twist_process/template/zheng_blurred_fan.jpg', # 2 正面反 107 | './cut_twist_process/template/zheng_blurred_zheng.jpg', # 3 正面正 108 | './cut_twist_process/template/zheng_new.jpg', # 4 新水印正面 109 | './cut_twist_process/template/fan_new.jpg' # 5 新水印反面 110 | ] # 模板图片路径 111 | cut_twist_norm_prams = [0.95, 0.95, 0.7, 0.7] # 超参数,归一化用 112 | process_cut_twist_imgs(img_path=origin_img_path, template_names=cut_twist_template_names, 113 | save_path=cut_twisted_save_path, norm_parm=cut_twist_norm_prams) 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/data/base_dataset.py: -------------------------------------------------------------------------------- 1 | """This module implements an abstract base class (ABC) 'BaseDataset' for datasets. 2 | 3 | It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses. 4 | """ 5 | import random 6 | import numpy as np 7 | import torch.utils.data as data 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | from abc import ABC, abstractmethod 11 | 12 | 13 | class BaseDataset(data.Dataset, ABC): 14 | """This class is an abstract base class (ABC) for datasets. 15 | 16 | To create a subclass, you need to implement the following four functions: 17 | -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). 18 | -- <__len__>: return the size of dataset. 19 | -- <__getitem__>: get a data point. 20 | -- : (optionally) add dataset-specific options and set default options. 21 | """ 22 | 23 | def __init__(self, opt): 24 | """Initialize the class; save the options in the class 25 | 26 | Parameters: 27 | opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions 28 | """ 29 | self.opt = opt 30 | self.root = opt.dataroot 31 | 32 | @staticmethod 33 | def modify_commandline_options(parser, is_train): 34 | """Add new dataset-specific options, and rewrite default values for existing options. 35 | 36 | Parameters: 37 | parser -- original option parser 38 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 39 | 40 | Returns: 41 | the modified parser. 42 | """ 43 | return parser 44 | 45 | @abstractmethod 46 | def __len__(self): 47 | """Return the total number of images in the dataset.""" 48 | return 0 49 | 50 | @abstractmethod 51 | def __getitem__(self, index): 52 | """Return a data point and its metadata information. 53 | 54 | Parameters: 55 | index - - a random integer for data indexing 56 | 57 | Returns: 58 | a dictionary of data with their names. It ususally contains the data itself and its metadata information. 59 | """ 60 | pass 61 | 62 | 63 | def get_params(opt, size): 64 | w, h = size 65 | new_h = h 66 | new_w = w 67 | if opt.preprocess == 'resize_and_crop': 68 | new_h = new_w = opt.load_size 69 | elif opt.preprocess == 'scale_width_and_crop': 70 | new_w = opt.load_size 71 | new_h = opt.load_size * h // w 72 | 73 | x = random.randint(0, np.maximum(0, new_w - opt.crop_size)) 74 | y = random.randint(0, np.maximum(0, new_h - opt.crop_size)) 75 | 76 | flip = random.random() > 0.5 77 | 78 | return {'crop_pos': (x, y), 'flip': flip} 79 | 80 | 81 | def get_transform(opt, params=None, grayscale=False, method=Image.BICUBIC, convert=True): 82 | transform_list = [] 83 | if grayscale: 84 | transform_list.append(transforms.Grayscale(1)) 85 | if 'resize' in opt.preprocess: 86 | osize = [opt.load_size, opt.load_size] 87 | transform_list.append(transforms.Resize(osize, method)) 88 | elif 'scale_width' in opt.preprocess: 89 | transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size, method))) 90 | 91 | if 'crop' in opt.preprocess: 92 | if params is None: 93 | transform_list.append(transforms.RandomCrop(opt.crop_size)) 94 | else: 95 | transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_pos'], opt.crop_size))) 96 | 97 | if opt.preprocess == 'none': 98 | transform_list.append(transforms.Lambda(lambda img: __make_power_2(img, base=4, method=method))) 99 | 100 | if not opt.no_flip: 101 | if params is None: 102 | transform_list.append(transforms.RandomHorizontalFlip()) 103 | elif params['flip']: 104 | transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip']))) 105 | ## 106 | if convert: 107 | transform_list += [transforms.ToTensor()] 108 | if grayscale: 109 | transform_list += [transforms.Normalize((0.5,), (0.5,))] 110 | else: 111 | transform_list += [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] 112 | return transforms.Compose(transform_list) 113 | 114 | 115 | def __make_power_2(img, base, method=Image.BICUBIC): 116 | ow, oh = img.size 117 | h = int(round(oh / base) * base) 118 | w = int(round(ow / base) * base) 119 | if (h == oh) and (w == ow): 120 | return img 121 | 122 | __print_size_warning(ow, oh, w, h) 123 | return img.resize((w, h), method) 124 | 125 | 126 | def __scale_width(img, target_width, method=Image.BICUBIC): 127 | ow, oh = img.size 128 | if (ow == target_width): 129 | return img 130 | w = target_width 131 | h = int(target_width * oh / ow) 132 | return img.resize((w, h), method) 133 | 134 | 135 | def __crop(img, pos, size): 136 | ow, oh = img.size 137 | x1, y1 = pos 138 | tw = th = size 139 | if (ow > tw or oh > th): 140 | return img.crop((x1, y1, x1 + tw, y1 + th)) 141 | return img 142 | 143 | 144 | def __flip(img, flip): 145 | if flip: 146 | return img.transpose(Image.FLIP_LEFT_RIGHT) 147 | return img 148 | 149 | 150 | def __print_size_warning(ow, oh, w, h): 151 | """Print warning information about image size(only print once)""" 152 | if not hasattr(__print_size_warning, 'has_printed'): 153 | print("The image size needs to be a multiple of 4. " 154 | "The loaded image size was (%d, %d), so it was adjusted to " 155 | "(%d, %d). This adjustment will be done to all images " 156 | "whose sizes are not multiples of 4" % (ow, oh, w, h)) 157 | __print_size_warning.has_printed = True 158 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/sp: -------------------------------------------------------------------------------- 1 | """This module implements an abstract base class (ABC) 'BaseDataset' for datasets. 2 | 3 | It also includes common transformation functions (e.g., get_transform, __scale_width), which can be later used in subclasses. 4 | """ 5 | import random 6 | import numpy as np 7 | import torch.utils.data as data 8 | from PIL import Image 9 | import torchvision.transforms as transforms 10 | from abc import ABC, abstractmethod 11 | 12 | 13 | class BaseDataset(data.Dataset, ABC): 14 | """This class is an abstract base class (ABC) for datasets. 15 | 16 | To create a subclass, you need to implement the following four functions: 17 | -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). 18 | -- <__len__>: return the size of dataset. 19 | -- <__getitem__>: get a data point. 20 | -- : (optionally) add dataset-specific options and set default options. 21 | """ 22 | 23 | def __init__(self, opt): 24 | """Initialize the class; save the options in the class 25 | 26 | Parameters: 27 | opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions 28 | """ 29 | self.opt = opt 30 | self.root = opt.dataroot 31 | 32 | @staticmethod 33 | def modify_commandline_options(parser, is_train): 34 | """Add new dataset-specific options, and rewrite default values for existing options. 35 | 36 | Parameters: 37 | parser -- original option parser 38 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 39 | 40 | Returns: 41 | the modified parser. 42 | """ 43 | return parser 44 | 45 | @abstractmethod 46 | def __len__(self): 47 | """Return the total number of images in the dataset.""" 48 | return 0 49 | 50 | @abstractmethod 51 | def __getitem__(self, index): 52 | """Return a data point and its metadata information. 53 | 54 | Parameters: 55 | index - - a random integer for data indexing 56 | 57 | Returns: 58 | a dictionary of data with their names. It ususally contains the data itself and its metadata information. 59 | """ 60 | pass 61 | 62 | 63 | def get_params(opt, size): 64 | w, h = size 65 | new_h = h 66 | new_w = w 67 | if opt.preprocess == 'resize_and_crop': 68 | new_h = new_w = opt.load_size 69 | elif opt.preprocess == 'scale_width_and_crop': 70 | new_w = opt.load_size 71 | new_h = opt.load_size * h // w 72 | 73 | x = random.randint(0, np.maximum(0, new_w - opt.crop_size)) 74 | y = random.randint(0, np.maximum(0, new_h - opt.crop_size)) 75 | 76 | flip = random.random() > 0.5 77 | 78 | return {'crop_pos': (x, y), 'flip': flip} 79 | 80 | 81 | def get_transform(opt, params=None, grayscale=False, method=Image.BICUBIC, convert=True): 82 | transform_list = [] 83 | if grayscale: 84 | transform_list.append(transforms.Grayscale(1)) 85 | if 'resize' in opt.preprocess: 86 | osize = [opt.load_size, opt.load_size] 87 | transform_list.append(transforms.Resize(osize, method)) 88 | elif 'scale_width' in opt.preprocess: 89 | transform_list.append(transforms.Lambda(lambda img: __scale_width(img, opt.load_size, method))) 90 | 91 | if 'crop' in opt.preprocess: 92 | if params is None: 93 | transform_list.append(transforms.RandomCrop(opt.crop_size)) 94 | else: 95 | transform_list.append(transforms.Lambda(lambda img: __crop(img, params['crop_pos'], opt.crop_size))) 96 | 97 | if opt.preprocess == 'none': 98 | transform_list.append(transforms.Lambda(lambda img: __make_power_2(img, base=4, method=method))) 99 | 100 | if not opt.no_flip: 101 | if params is None: 102 | transform_list.append(transforms.RandomHorizontalFlip()) 103 | elif params['flip']: 104 | transform_list.append(transforms.Lambda(lambda img: __flip(img, params['flip']))) 105 | ## 增加亮度和对比度 106 | transform_list.append(transforms.ColorJitter(contrast=1, brightness=1)) 107 | ## 108 | if convert: 109 | transform_list += [transforms.ToTensor()] 110 | if grayscale: 111 | transform_list += [transforms.Normalize((0.5,), (0.5,))] 112 | else: 113 | transform_list += [transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))] 114 | return transforms.Compose(transform_list) 115 | 116 | 117 | def __make_power_2(img, base, method=Image.BICUBIC): 118 | ow, oh = img.size 119 | h = int(round(oh / base) * base) 120 | w = int(round(ow / base) * base) 121 | if (h == oh) and (w == ow): 122 | return img 123 | 124 | __print_size_warning(ow, oh, w, h) 125 | return img.resize((w, h), method) 126 | 127 | 128 | def __scale_width(img, target_width, method=Image.BICUBIC): 129 | ow, oh = img.size 130 | if (ow == target_width): 131 | return img 132 | w = target_width 133 | h = int(target_width * oh / ow) 134 | return img.resize((w, h), method) 135 | 136 | 137 | def __crop(img, pos, size): 138 | ow, oh = img.size 139 | x1, y1 = pos 140 | tw = th = size 141 | if (ow > tw or oh > th): 142 | return img.crop((x1, y1, x1 + tw, y1 + th)) 143 | return img 144 | 145 | 146 | def __flip(img, flip): 147 | if flip: 148 | return img.transpose(Image.FLIP_LEFT_RIGHT) 149 | return img 150 | 151 | 152 | def __print_size_warning(ow, oh, w, h): 153 | """Print warning information about image size(only print once)""" 154 | if not hasattr(__print_size_warning, 'has_printed'): 155 | print("The image size needs to be a multiple of 4. " 156 | "The loaded image size was (%d, %d), so it was adjusted to " 157 | "(%d, %d). This adjustment will be done to all images " 158 | "whose sizes are not multiples of 4" % (ow, oh, w, h)) 159 | __print_size_warning.has_printed = True 160 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/models/template_model.py: -------------------------------------------------------------------------------- 1 | """Model class template 2 | 3 | This module provides a template for users to implement custom models. 4 | You can specify '--model template' to use this model. 5 | The class name should be consistent with both the filename and its model option. 6 | The filename should be _dataset.py 7 | The class name should be Dataset.py 8 | It implements a simple image-to-image translation baseline based on regression loss. 9 | Given input-output pairs (data_A, data_B), it learns a network netG that can minimize the following L1 loss: 10 | min_ ||netG(data_A) - data_B||_1 11 | You need to implement the following functions: 12 | : Add model-specific options and rewrite default values for existing options. 13 | <__init__>: Initialize this model class. 14 | : Unpack input data and perform data pre-processing. 15 | : Run forward pass. This will be called by both and . 16 | : Update network weights; it will be called in every training iteration. 17 | """ 18 | import torch 19 | from .base_model import BaseModel 20 | from . import networks 21 | 22 | 23 | class TemplateModel(BaseModel): 24 | @staticmethod 25 | def modify_commandline_options(parser, is_train=True): 26 | """Add new model-specific options and rewrite default values for existing options. 27 | 28 | Parameters: 29 | parser -- the option parser 30 | is_train -- if it is training phase or test phase. You can use this flag to add training-specific or test-specific options. 31 | 32 | Returns: 33 | the modified parser. 34 | """ 35 | parser.set_defaults(dataset_mode='aligned') # You can rewrite default values for this model. For example, this model usually uses aligned dataset as its dataset. 36 | if is_train: 37 | parser.add_argument('--lambda_regression', type=float, default=1.0, help='weight for the regression loss') # You can define new arguments for this model. 38 | 39 | return parser 40 | 41 | def __init__(self, opt): 42 | """Initialize this model class. 43 | 44 | Parameters: 45 | opt -- training/test options 46 | 47 | A few things can be done here. 48 | - (required) call the initialization function of BaseModel 49 | - define loss function, visualization images, model names, and optimizers 50 | """ 51 | BaseModel.__init__(self, opt) # call the initialization method of BaseModel 52 | # specify the training losses you want to print out. The program will call base_model.get_current_losses to plot the losses to the console and save them to the disk. 53 | self.loss_names = ['loss_G'] 54 | # specify the images you want to save and display. The program will call base_model.get_current_visuals to save and display these images. 55 | self.visual_names = ['data_A', 'data_B', 'output'] 56 | # specify the models you want to save to the disk. The program will call base_model.save_networks and base_model.load_networks to save and load networks. 57 | # you can use opt.isTrain to specify different behaviors for training and test. For example, some networks will not be used during test, and you don't need to load them. 58 | self.model_names = ['G'] 59 | # define networks; you can use opt.isTrain to specify different behaviors for training and test. 60 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, gpu_ids=self.gpu_ids) 61 | if self.isTrain: # only defined during training time 62 | # define your loss functions. You can use losses provided by torch.nn such as torch.nn.L1Loss. 63 | # We also provide a GANLoss class "networks.GANLoss". self.criterionGAN = networks.GANLoss().to(self.device) 64 | self.criterionLoss = torch.nn.L1Loss() 65 | # define and initialize optimizers. You can define one optimizer for each network. 66 | # If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an example. 67 | self.optimizer = torch.optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) 68 | self.optimizers = [self.optimizer] 69 | 70 | # Our program will automatically call to define schedulers, load networks, and print networks 71 | 72 | def set_input(self, input): 73 | """Unpack input data from the dataloader and perform necessary pre-processing steps. 74 | 75 | Parameters: 76 | input: a dictionary that contains the data itself and its metadata information. 77 | """ 78 | AtoB = self.opt.direction == 'AtoB' # use to swap data_A and data_B 79 | self.data_A = input['A' if AtoB else 'B'].to(self.device) # get image data A 80 | self.data_B = input['B' if AtoB else 'A'].to(self.device) # get image data B 81 | self.image_paths = input['A_paths' if AtoB else 'B_paths'] # get image paths 82 | 83 | def forward(self): 84 | """Run forward pass. This will be called by both functions and .""" 85 | self.output = self.netG(self.data_A) # generate output image given the input data_A 86 | 87 | def backward(self): 88 | """Calculate losses, gradients, and update network weights; called in every training iteration""" 89 | # caculate the intermediate results if necessary; here self.output has been computed during function 90 | # calculate loss given the input and intermediate results 91 | self.loss_G = self.criterionLoss(self.output, self.data_B) * self.opt.lambda_regression 92 | self.loss_G.backward() # calculate gradients of network G w.r.t. loss_G 93 | 94 | def optimize_parameters(self): 95 | """Update network weights; it will be called in every training iteration.""" 96 | self.forward() # first call forward to calculate intermediate results 97 | self.optimizer.zero_grad() # clear network G's existing gradients 98 | self.backward() # calculate gradients for network G 99 | self.optimizer.step() # update gradients for network G 100 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/scripts/eval_cityscapes/cityscapes.py: -------------------------------------------------------------------------------- 1 | # The following code is modified from https://github.com/shelhamer/clockwork-fcn 2 | import sys 3 | import os 4 | import glob 5 | import numpy as np 6 | from PIL import Image 7 | 8 | 9 | class cityscapes: 10 | def __init__(self, data_path): 11 | # data_path something like /data2/cityscapes 12 | self.dir = data_path 13 | self.classes = ['road', 'sidewalk', 'building', 'wall', 'fence', 14 | 'pole', 'traffic light', 'traffic sign', 'vegetation', 'terrain', 15 | 'sky', 'person', 'rider', 'car', 'truck', 16 | 'bus', 'train', 'motorcycle', 'bicycle'] 17 | self.mean = np.array((72.78044, 83.21195, 73.45286), dtype=np.float32) 18 | # import cityscapes label helper and set up label mappings 19 | sys.path.insert(0, '{}/scripts/helpers/'.format(self.dir)) 20 | labels = __import__('labels') 21 | self.id2trainId = {label.id: label.trainId for label in labels.labels} # dictionary mapping from raw IDs to train IDs 22 | self.trainId2color = {label.trainId: label.color for label in labels.labels} # dictionary mapping train IDs to colors as 3-tuples 23 | 24 | def get_dset(self, split): 25 | ''' 26 | List images as (city, id) for the specified split 27 | 28 | TODO(shelhamer) generate splits from cityscapes itself, instead of 29 | relying on these separately made text files. 30 | ''' 31 | if split == 'train': 32 | dataset = open('{}/ImageSets/segFine/train.txt'.format(self.dir)).read().splitlines() 33 | else: 34 | dataset = open('{}/ImageSets/segFine/val.txt'.format(self.dir)).read().splitlines() 35 | return [(item.split('/')[0], item.split('/')[1]) for item in dataset] 36 | 37 | def load_image(self, split, city, idx): 38 | im = Image.open('{}/leftImg8bit_sequence/{}/{}/{}_leftImg8bit.png'.format(self.dir, split, city, idx)) 39 | return im 40 | 41 | def assign_trainIds(self, label): 42 | """ 43 | Map the given label IDs to the train IDs appropriate for training 44 | Use the label mapping provided in labels.py from the cityscapes scripts 45 | """ 46 | label = np.array(label, dtype=np.float32) 47 | if sys.version_info[0] < 3: 48 | for k, v in self.id2trainId.iteritems(): 49 | label[label == k] = v 50 | else: 51 | for k, v in self.id2trainId.items(): 52 | label[label == k] = v 53 | return label 54 | 55 | def load_label(self, split, city, idx): 56 | """ 57 | Load label image as 1 x height x width integer array of label indices. 58 | The leading singleton dimension is required by the loss. 59 | """ 60 | label = Image.open('{}/gtFine/{}/{}/{}_gtFine_labelIds.png'.format(self.dir, split, city, idx)) 61 | label = self.assign_trainIds(label) # get proper labels for eval 62 | label = np.array(label, dtype=np.uint8) 63 | label = label[np.newaxis, ...] 64 | return label 65 | 66 | def preprocess(self, im): 67 | """ 68 | Preprocess loaded image (by load_image) for Caffe: 69 | - cast to float 70 | - switch channels RGB -> BGR 71 | - subtract mean 72 | - transpose to channel x height x width order 73 | """ 74 | in_ = np.array(im, dtype=np.float32) 75 | in_ = in_[:, :, ::-1] 76 | in_ -= self.mean 77 | in_ = in_.transpose((2, 0, 1)) 78 | return in_ 79 | 80 | def palette(self, label): 81 | ''' 82 | Map trainIds to colors as specified in labels.py 83 | ''' 84 | if label.ndim == 3: 85 | label = label[0] 86 | color = np.empty((label.shape[0], label.shape[1], 3)) 87 | if sys.version_info[0] < 3: 88 | for k, v in self.trainId2color.iteritems(): 89 | color[label == k, :] = v 90 | else: 91 | for k, v in self.trainId2color.items(): 92 | color[label == k, :] = v 93 | return color 94 | 95 | def make_boundaries(label, thickness=None): 96 | """ 97 | Input is an image label, output is a numpy array mask encoding the boundaries of the objects 98 | Extract pixels at the true boundary by dilation - erosion of label. 99 | Don't just pick the void label as it is not exclusive to the boundaries. 100 | """ 101 | assert(thickness is not None) 102 | import skimage.morphology as skm 103 | void = 255 104 | mask = np.logical_and(label > 0, label != void)[0] 105 | selem = skm.disk(thickness) 106 | boundaries = np.logical_xor(skm.dilation(mask, selem), 107 | skm.erosion(mask, selem)) 108 | return boundaries 109 | 110 | def list_label_frames(self, split): 111 | """ 112 | Select labeled frames from a split for evaluation 113 | collected as (city, shot, idx) tuples 114 | """ 115 | def file2idx(f): 116 | """Helper to convert file path into frame ID""" 117 | city, shot, frame = (os.path.basename(f).split('_')[:3]) 118 | return "_".join([city, shot, frame]) 119 | frames = [] 120 | cities = [os.path.basename(f) for f in glob.glob('{}/gtFine/{}/*'.format(self.dir, split))] 121 | for c in cities: 122 | files = sorted(glob.glob('{}/gtFine/{}/{}/*labelIds.png'.format(self.dir, split, c))) 123 | frames.extend([file2idx(f) for f in files]) 124 | return frames 125 | 126 | def collect_frame_sequence(self, split, idx, length): 127 | """ 128 | Collect sequence of frames preceding (and including) a labeled frame 129 | as a list of Images. 130 | 131 | Note: 19 preceding frames are provided for each labeled frame. 132 | """ 133 | SEQ_LEN = length 134 | city, shot, frame = idx.split('_') 135 | frame = int(frame) 136 | frame_seq = [] 137 | for i in range(frame - SEQ_LEN, frame + 1): 138 | frame_path = '{0}/leftImg8bit_sequence/val/{1}/{1}_{2}_{3:0>6d}_leftImg8bit.png'.format( 139 | self.dir, city, shot, i) 140 | frame_seq.append(Image.open(frame_path)) 141 | return frame_seq 142 | -------------------------------------------------------------------------------- /CCFTestResultFixValidData_release.csv: -------------------------------------------------------------------------------- 1 | 00b943aea3ca4f4d81fab2a7122a9078,林琼辉,汉,男,1965,9,14,安徽省宿州市埇桥区栏杆镇大旺村,341302196509149963,宿州市埇桥区公安局,2019.05.01-长期 2 | 0a515aebce5b46e4a8bcb62ea4801d16,伏旭,鄂温克,男,2002,5,24,河南省洛阳市涧西区长春路街道办事处,41030520020524275,洛阳市涧西区公安局,2012.05.22-2017.05.22 3 | 0b54ffb83c99410e833bf36598efd7e2,郑渝昆,汉,男,2001,5,11,西藏自治区那曲地区双湖县嘎措乡玛威荣那村,54243120010511464,那曲地区双湖县公安局,2016.05.05-2021.05.05 4 | 0a252459c77f4614a235336067e94286,郭宏毅,朝鲜,女,1973,7,24,四川省广元市苍溪县歧坪镇曙光村,510824197307244306,广元市苍溪县公安局,2015.06.15-2035.06.15 5 | 0af6a5d184cc421d9e4fcae6a308c29d,王文宪,普米,男,1978,10,14,山东省德州市宁津县杜集镇东段村,371422197810148143,德州市宁津县公安局,2014.09.25-2034.09.25 6 | 0bae313e9508400ca64356a24418c0ed,潘尚仁,回,女,1976,3,3,山东省烟台市莱阳市吕格庄镇响水沟村,37068219760303033,烟台市莱阳市公安局,2017.10.07-2037.10.07 7 | 0ac55c4bd3794d1fab562bbc1ab85928,王作伦,水,女,1995,6,1,山西省长治市黎城县黎侯镇望北村,14042619950601915,长治市黎城县公安局,2013.03.28-2023.03.28 8 | 0af2b8b3eb4e4c299d650a924aea7315,鲍国栋,哈萨克,女,2000,1,11,河北省邢台市广宗县广宗镇,13053120000111908,邢台市广宗县公安局,2013.12.31-2018.12.31 9 | 0a08613bc369497eaacb48dcfba7e459,胡忠群,苗,女,2006,4,10,陕西省安康市汉滨区早阳镇大沟河村,61090220060410317,安康市汉滨区公安局,2019.04.28-2024.04.28 10 | 0aa34d9715c144b995bfd7837bfb938c,尚慧峰,高山,男,2005,9,22,贵州省遵义市务川仡佬族苗族自治县分水镇三星村,520326200509222529,遵义市务川仡佬族苗族自治县公安局,2011.08.01-2016.08.01 11 | 0b7a8665ab05464895dc5030f0440d0b,何昌菊,黎,女,1959,5,17,浙江省金华市东阳市千祥镇高宅村,330783195905172308,金华市东阳市公安局,2015.03.05-长期 12 | 0b4f20afc9894994a025a5c87d151d3f,赵则玲,普米,女,1985,1,4,湖北省黄冈市浠水县洗马镇万当铺村,42112519850104094,黄冈市浠水县公安局,2010.12.24-2030.12.24 13 | 0a10a649a8f24276b7b0ab90ba0f814e,范晓杰,傣,女,1997,1,31,河北省沧州市泊头市富镇镇前董屯村,13098119970131285,沧州市泊头市公安局,2011.05.11-2016.05.11 14 | 0b88a52a5f144c7eb705c353120b0a52,孙明洲,汉,男,1979,6,3,河南省新乡市卫辉市柳庄乡焦浩屯村,41078119790603278,新乡市卫辉市公安局,2014.07.08-2034.07.08 15 | 0b3f9a6822de4425923752d9134e3948,余明,怒,女,2006,9,27,四川省广元市剑阁县张王乡,51082320060927697,广元市剑阁县公安局,2015.03.28-2020.03.28 16 | 0b3084c85e1444ce853e01355779b635,张煜涵,拉祜,男,1976,1,2,河北省石家庄市桥西区友谊街道办事处,130104197601027607,石家庄市桥西区公安局,2013.05.11-2033.05.11 17 | 0abb03b9408e45ef80f4210f16531590,李安芬,傈僳,男,1977,10,26,山东省青岛市胶州市胶莱镇徐家闸子村,37028119771026577,青岛市胶州市公安局,2012.07.14-2032.07.14 18 | 0bd94df97f924a83926d55f10945de9d,李道山,布朗,女,1997,8,16,浙江省台州市黄岩区上郑乡毛家村,331003199708166346,台州市黄岩区公安局,2017.10.05-2027.10.05 19 | 0b2b41964edb4cea916814351bab4dc2,郭秋明,苗,男,1975,11,21,山东省泰安市岱岳区粥店街道下旺村,37091119751121163X,泰安市岱岳区公安局,2013.01.18-2033.01.18 20 | 0a5f282593bc48e0bfc2654f21d22dab,宋庆飞,傣,女,1966,1,17,江西省吉安市泰和县螺溪镇普田村,36082619660117927,吉安市泰和县公安局,2011.10.11-长期 21 | 00abd04bd86a4dcb89daecb3753f5981,黄庆和,京,男,1967,4,7,山东省聊城市临清市松林镇马张村,371581196704077127,聊城市临清市公安局,2014.08.21-长期 22 | 0a57de87638b48dc80a67d1d6f4ef669,李锡涛,毛难,男,1997,11,15,河北省廊坊市固安县礼让店乡闫家庄村,131022199711156781,廊坊市固安县公安局,2013.11.08-2023.11.08 23 | 0b047777cc9041c28d4056a9ba6fb054,张华军,东乡,女,2006,9,15,广东省湛江市遂溪县杨柑镇新宁村,440823200609159764,湛江市遂溪县公安局,2018.08.25-2023.08.25 24 | 0a2248193a7248bab8383659d5e60118,柳晓菊,水,男,1986,4,24,河北省石家庄市行唐县上方乡许由村,130125198604246297,石家庄市行唐县公安局,2015.03.21-2035.03.21 25 | 0b9d6a0e346e4563b173593bbd0d3f83,牛红光,白,女,1960,9,2,西藏自治区昌都市察雅县扩达乡岗卡村,540325196009028744,昌都市察雅县公安局,2015.08.28-长期 26 | 0b7fcfac6aee41ce9cdd79853bc0365f,克丽斯,傣,男,2008,3,25,江西省南昌市东湖区滕王阁街道办事处,36010220080325541,南昌市东湖区公安局,2015.11.13-2020.11.13 27 | 0a2480f08ca04692ba919f7f4d17febe,黄福志,保安,女,1973,5,31,天津市市辖区西青区中北镇,12011119730531008,市辖区西青区公安局,2016.08.17-2036.08.17 28 | 0adab4b7a8c347668f8bc1b92b5f37ed,殷宗泽,汉,男,1983,7,20,河南省濮阳市清丰县仙庄镇赵娄高村,41092219830720714,濮阳市清丰县公安局,2017.04.09-2037.04.09 29 | 0b31285aacdd4113887050ed474c0d18,卞京,京,女,2000,11,4,黑龙江省齐齐哈尔市龙江县七棵树镇前进村,230221200011042786,齐齐哈尔市龙江县公安局,2017.11.11-2027.11.11 30 | 0ad931096e774c9e97f63bba1c74e09f,崔雨佳,基诺,男,1991,3,16,云南省昆明市盘龙区青云街道办事处,53010319910316260,昆明市盘龙区公安局,2012.08.23-2022.08.23 31 | 0a4d23725e8340e78af4d40918b52185,罗晓庆,独龙,女,1976,1,22,广东省湛江市霞山区乐华街道办事处,440803197601220612,湛江市霞山区公安局,2011.01.22-2031.01.22 32 | 0a52478d14f84724a0793cb7ee9885e9,夏祥国,黎,女,1974,7,22,辽宁省抚顺市清原满族自治县南山城镇四道碱厂村,210423197407227942,抚顺市清原满族自治县公安局,2009.11.17-2029.11.17 33 | 0abac9a64efe4fea8b51ba8f70337eee,蔺琴,侗,男,1997,11,24,河北省保定市莲池区联盟街道办事处,13060619971124669,保定市莲池区公安局,2010.05.18-2015.05.18 34 | 0b4d09352d5644308e13f3b698d0a1fd,姚凤群,乌孜别克,女,1962,4,10,河北省保定市清苑区大庄镇黎沟村,13060819620410039,保定市清苑区公安局,2013.07.01-长期 35 | 0a310fbcb9da4e498e11fddbcd111d4e,金裕民,仫佬,女,1993,8,9,广东省韶关市南雄市百顺镇百顺村,440282199308091294,韶关市南雄市公安局,2018.06.06-2038.06.06 36 | 0bbeeb54d8be4ef2a6b6fdaf04465917,谢绍芬,汉,男,1995,8,26,河南省周口市太康县独塘乡十里铺村,411627199508265239,周口市太康县公安局,2016.06.07-2026.06.07 37 | 0b851db9455a4e18b534c2bc384e7173,周海玲,白,男,1987,1,11,吉林省延边朝鲜族自治州汪清县大兴沟镇东阳村,22242419870111677,延边朝鲜族自治州汪清县公安局,2016.05.28-2036.05.28 38 | 0b38af89dc0c4759af673d3126d2ebe4,姜秀秀,怒,女,1999,3,1,辽宁省铁岭市西丰县成平满族乡景贤村,21122319990301137,铁岭市西丰县公安局,2018.07.07-2028.07.07 39 | 0aaff5dc917c4e0582881ca344ed4d9c,傅一鸣,汉,男,1977,2,2,河南省信阳市光山县马畈镇柳林村,41152219770202946,信阳市光山县公安局,2017.05.14-2037.05.14 40 | 0b6679f1db824ac0a32c838bcc4af0b0,樊志起,佤,女,1994,11,15,四川省南充市仪陇县三蛟镇燕山村,51132419941115088,南充市仪陇县公安局,2011.09.05-2021.09.05 41 | 0b7624f8b4e048c5bd0ec5296c5fe8f2,田玉文,纳西,女,1973,11,21,河南省信阳市罗山县竹竿镇张老店村,41152119731121858,信阳市罗山县公安局,2010.06.24-2030.06.24 42 | 0ad8c21fefa54d96ab22a4b4992864df,陈昌郁,满,女,1986,1,30,河北省廊坊市固安县固安镇大留村,131022198601304910,廊坊市固安县公安局,2010.07.13-2020.07.13 43 | 0b4ff84580114062a90b927bf2c85db8,陆剑云,裕固,男,1959,3,23,贵州省贵阳市观山湖区金华园社区服务中心,52011519590323275,贵阳市观山湖区公安局,2010.08.16-长期 44 | 0b57e0fbe6524a849e4f7657e7739773,袁晓婕,门巴,男,1987,2,3,西藏自治区昌都市类乌齐县岗色乡岗达村,540323198702036861,昌都市类乌齐县公安局,2014.01.03-2034.01.03 45 | 0a2c730645c84d72af62b6729a282d1e,李树桥,苗,男,1984,7,24,山东省临沂市临沂高新技术产业开发区马厂湖镇古城村,37137119840724860X,临沂市临沂高新技术产业开发区公安局,2010.02.15-2030.02.15 46 | 0b1ac9bd73e64dfca46df4c1a11b1a2d,周华明,塔塔尔,女,1974,12,28,陕西省渭南市华州区下庙镇新下新型农村,61050319741228898,渭南市华州区公安局,2014.10.08-2034.10.08 47 | 0b286d76d76846e2a9f936013e68a727,沈荣根,汉,男,1977,1,4,广西壮族自治区北海市合浦县公馆镇长坡村,450521197701048811,北海市合浦县公安局,2011.11.21-2031.11.21 48 | 0a68db93167f4a6db1385c9eb3387e5b,浦宏,鄂伦春,女,1999,9,3,河南省三门峡市灵宝市西阎乡东邱村,41128219990903868,三门峡市灵宝市公安局,2011.12.04-2016.12.04 49 | 0b8dbb962d314817a86a051c2892410f,王学鹏,汉,男,1960,10,29,青海省西宁市湟中县甘河滩镇卡跃村,630122196010295155,西宁市湟中县公安局,2015.05.14-长期 50 | 0a02219e4a3d46ef8b741f9637236c85,师至洁,阿昌,女,1966,10,4,四川省自贡市贡井区五宝镇凤翔村,51030319661004051,自贡市贡井区公安局,2009.11.27-2029.11.27 51 | 0ab3a2bd34ef4291ab98c02ada272cf2,沈海洋,达斡尔,女,1999,5,16,四川省南充市南部县楠木镇楠新村,51132119990516357,南充市南部县公安局,2014.03.23-2019.03.23 52 | 0b99ef666fb04f0db5f46bd77c568596,金颂,苗,男,1981,7,24,湖南省常德市石门县壶瓶山镇九岭村,43072619810724971,常德市石门县公安局,2010.06.12-2030.06.12 53 | 0b84c613703d49bfbe28ce8b503785d0,孙玉财,怒,男,1986,11,11,河北省保定市徐水区大因镇龙化村,13060919861111690,保定市徐水区公安局,2016.11.03-2036.11.03 54 | 0b90f526aca9448ea1503033f369124f,藏旭恒,藏,女,1963,5,15,河北省沧州市运河区南环中路街道办事处,13090319630515731,沧州市运河区公安局,2015.05.07-长期 55 | 0ae67087ca544f5dbb5beb45ccfe52e0,赵得明,鄂伦春,女,1989,2,8,江苏省泰州市姜堰区梁徐镇坡岭村,32120419890208177,泰州市姜堰区公安局,2009.12.11-2019.12.11 56 | 0abc1ec4a7ee4483b9f08a2a3587d717,谭万富,汉,女,2005,11,26,吉林省四平市公主岭市怀德镇明伦村,22038120051126422,四平市公主岭市公安局,2013.04.30-2018.04.30 57 | -------------------------------------------------------------------------------- /pytorch-CycleGAN-and-pix2pix/models/pix2pix_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .base_model import BaseModel 3 | from . import networks 4 | 5 | 6 | class Pix2PixModel(BaseModel): 7 | """ This class implements the pix2pix model, for learning a mapping from input images to output images given paired data. 8 | 9 | The model training requires '--dataset_mode aligned' dataset. 10 | By default, it uses a '--netG unet256' U-Net generator, 11 | a '--netD basic' discriminator (PatchGAN), 12 | and a '--gan_mode' vanilla GAN loss (the cross-entropy objective used in the orignal GAN paper). 13 | 14 | pix2pix paper: https://arxiv.org/pdf/1611.07004.pdf 15 | """ 16 | @staticmethod 17 | def modify_commandline_options(parser, is_train=True): 18 | """Add new dataset-specific options, and rewrite default values for existing options. 19 | 20 | Parameters: 21 | parser -- original option parser 22 | is_train (bool) -- whether training phase or test phase. You can use this flag to add training-specific or test-specific options. 23 | 24 | Returns: 25 | the modified parser. 26 | 27 | For pix2pix, we do not use image buffer 28 | The training objective is: GAN Loss + lambda_L1 * ||G(A)-B||_1 29 | By default, we use vanilla GAN loss, UNet with batchnorm, and aligned datasets. 30 | """ 31 | # changing the default values to match the pix2pix paper (https://phillipi.github.io/pix2pix/) 32 | parser.set_defaults(norm='batch', netG='unet_256', dataset_mode='aligned') 33 | if is_train: 34 | parser.set_defaults(pool_size=0, gan_mode='vanilla') 35 | parser.add_argument('--lambda_L1', type=float, default=100.0, help='weight for L1 loss') 36 | 37 | return parser 38 | 39 | def __init__(self, opt): 40 | """Initialize the pix2pix class. 41 | 42 | Parameters: 43 | opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions 44 | """ 45 | BaseModel.__init__(self, opt) 46 | # specify the training losses you want to print out. The training/test scripts will call 47 | self.loss_names = ['G_GAN', 'G_L1', 'D_real', 'D_fake'] 48 | # specify the images you want to save/display. The training/test scripts will call 49 | self.visual_names = ['real_A', 'fake_B', 'real_B'] 50 | # specify the models you want to save to the disk. The training/test scripts will call and 51 | if self.isTrain: 52 | self.model_names = ['G', 'D'] 53 | else: # during test time, only load G 54 | self.model_names = ['G'] 55 | # define networks (both generator and discriminator) 56 | self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.netG, opt.norm, 57 | not opt.no_dropout, opt.init_type, opt.init_gain, self.gpu_ids) 58 | 59 | if self.isTrain: # define a discriminator; conditional GANs need to take both input and output images; Therefore, #channels for D is input_nc + output_nc 60 | self.netD = networks.define_D(opt.input_nc + opt.output_nc, opt.ndf, opt.netD, 61 | opt.n_layers_D, opt.norm, opt.init_type, opt.init_gain, self.gpu_ids) 62 | 63 | if self.isTrain: 64 | # define loss functions 65 | self.criterionGAN = networks.GANLoss(opt.gan_mode).to(self.device) 66 | self.criterionL1 = torch.nn.L1Loss() 67 | # initialize optimizers; schedulers will be automatically created by function . 68 | self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) 69 | self.optimizer_D = torch.optim.Adam(self.netD.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) 70 | self.optimizers.append(self.optimizer_G) 71 | self.optimizers.append(self.optimizer_D) 72 | 73 | def set_input(self, input): 74 | """Unpack input data from the dataloader and perform necessary pre-processing steps. 75 | 76 | Parameters: 77 | input (dict): include the data itself and its metadata information. 78 | 79 | The option 'direction' can be used to swap images in domain A and domain B. 80 | """ 81 | AtoB = self.opt.direction == 'AtoB' 82 | self.real_A = input['A' if AtoB else 'B'].to(self.device) 83 | self.real_B = input['B' if AtoB else 'A'].to(self.device) 84 | self.image_paths = input['A_paths' if AtoB else 'B_paths'] 85 | 86 | def forward(self): 87 | """Run forward pass; called by both functions and .""" 88 | self.fake_B = self.netG(self.real_A) # G(A) 89 | 90 | def backward_D(self): 91 | """Calculate GAN loss for the discriminator""" 92 | # Fake; stop backprop to the generator by detaching fake_B 93 | fake_AB = torch.cat((self.real_A, self.fake_B), 1) # we use conditional GANs; we need to feed both input and output to the discriminator 94 | pred_fake = self.netD(fake_AB.detach()) 95 | self.loss_D_fake = self.criterionGAN(pred_fake, False) 96 | # Real 97 | real_AB = torch.cat((self.real_A, self.real_B), 1) 98 | pred_real = self.netD(real_AB) 99 | self.loss_D_real = self.criterionGAN(pred_real, True) 100 | # combine loss and calculate gradients 101 | self.loss_D = (self.loss_D_fake + self.loss_D_real) * 0.5 102 | self.loss_D.backward() 103 | 104 | def backward_G(self): 105 | """Calculate GAN and L1 loss for the generator""" 106 | # First, G(A) should fake the discriminator 107 | fake_AB = torch.cat((self.real_A, self.fake_B), 1) 108 | pred_fake = self.netD(fake_AB) 109 | self.loss_G_GAN = self.criterionGAN(pred_fake, True) 110 | # Second, G(A) = B 111 | self.loss_G_L1 = self.criterionL1(self.fake_B, self.real_B) * self.opt.lambda_L1 112 | # combine loss and calculate gradients 113 | self.loss_G = self.loss_G_GAN + self.loss_G_L1 114 | self.loss_G.backward() 115 | 116 | def optimize_parameters(self): 117 | self.forward() # compute fake images: G(A) 118 | # update D 119 | self.set_requires_grad(self.netD, True) # enable backprop for D 120 | self.optimizer_D.zero_grad() # set D's gradients to zero 121 | self.backward_D() # calculate gradients for D 122 | self.optimizer_D.step() # update D's weights 123 | # update G 124 | self.set_requires_grad(self.netD, False) # D requires no gradients when optimizing G 125 | self.optimizer_G.zero_grad() # set G's gradients to zero 126 | self.backward_G() # calculate graidents for G 127 | self.optimizer_G.step() # udpate G's weights 128 | -------------------------------------------------------------------------------- /recognize_process/crnn_model/crnn_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 17-9-21 下午6:39 4 | # @Author : MaybeShewill-CV 5 | # @Site : https://github.com/MaybeShewill-CV/CRNN_Tensorflow 6 | # @File : crnn_net.py 7 | # @IDE: PyCharm Community Edition 8 | """ 9 | Implement the crnn model mentioned in An End-to-End Trainable Neural Network for Image-based Sequence 10 | Recognition and Its Application to Scene Text Recognition paper 11 | """ 12 | import numpy as np 13 | import tensorflow as tf 14 | from tensorflow.contrib import rnn 15 | 16 | from recognize_process.crnn_model import cnn_basenet 17 | from recognize_process.config import model_config 18 | 19 | CFG = model_config.cfg 20 | 21 | 22 | class ShadowNet(cnn_basenet.CNNBaseModel): 23 | 24 | def __init__(self, phase, hidden_nums, layers_nums, num_classes): 25 | super(ShadowNet, self).__init__() 26 | if phase == 'train': 27 | self._phase = tf.constant('train', dtype=tf.string) 28 | else: 29 | self._phase = tf.constant('test', dtype=tf.string) 30 | self._hidden_nums = hidden_nums 31 | self._layers_nums = layers_nums 32 | self._num_classes = num_classes 33 | self._is_training = self._init_phase() 34 | 35 | def _init_phase(self): 36 | return tf.equal(self._phase, tf.constant('train', dtype=tf.string)) 37 | 38 | def _conv_stage(self, inputdata, out_dims, name): 39 | with tf.variable_scope(name_or_scope=name): 40 | conv = self.conv2d(inputdata=inputdata, out_channel=out_dims, \ 41 | kernel_size=3, stride=1, use_bias=True, name='conv') 42 | bn = self.layerbn(inputdata=conv, is_training=self._is_training, name='bn') 43 | relu = self.relu( inputdata=bn, name='relu') 44 | max_pool = self.maxpooling(inputdata=relu, kernel_size=2, stride=2, name='max_pool') 45 | return max_pool 46 | 47 | def _feature_sequence_extraction(self, inputdata, name): 48 | 49 | with tf.variable_scope(name_or_scope=name): 50 | conv1 = self._conv_stage(inputdata=inputdata, out_dims=64, name='conv1') 51 | conv2 = self._conv_stage(inputdata=conv1, out_dims=128, name='conv2') 52 | conv3 = self.conv2d(inputdata=conv2, out_channel=256, kernel_size=3, \ 53 | stride=1, use_bias=False, name='conv3') 54 | bn3 = self.layerbn(inputdata=conv3, is_training=self._is_training, name='bn3') 55 | relu3 = self.relu(inputdata=bn3, name='relu3') 56 | conv4 = self.conv2d(inputdata=relu3, out_channel=256, kernel_size=3, \ 57 | stride=1, use_bias=False, name='conv4') 58 | bn4 = self.layerbn(inputdata=conv4, is_training=self._is_training, name='bn4') 59 | relu4 = self.relu(inputdata=bn4, name='relu4') 60 | max_pool4 = self.maxpooling(inputdata=relu4, kernel_size=[2, 1], \ 61 | stride=[2, 1], padding='VALID', name='max_pool4') 62 | conv5 = self.conv2d(inputdata=max_pool4, out_channel=512, kernel_size=3, \ 63 | stride=1, use_bias=False, name='conv5') 64 | bn5 = self.layerbn(inputdata=conv5, is_training=self._is_training, name='bn5') 65 | relu5 = self.relu(inputdata=bn5, name='bn5') 66 | conv6 = self.conv2d(inputdata=relu5, out_channel=512, kernel_size=3, \ 67 | stride=1, use_bias=False, name='conv6') 68 | bn6 = self.layerbn(inputdata=conv6, is_training=self._is_training, name='bn6') 69 | relu6 = self.relu(inputdata=bn6, name='relu6') 70 | max_pool6 = self.maxpooling(inputdata=relu6, kernel_size=[2, 1], \ 71 | stride=[2, 1], name='max_pool6') 72 | conv7 = self.conv2d(inputdata=max_pool6, out_channel=512, kernel_size=2, \ 73 | stride=[2, 1], use_bias=False, name='conv7') 74 | bn7 = self.layerbn(inputdata=conv7, is_training=self._is_training, name='bn7') 75 | relu7 = self.relu(inputdata=bn7, name='bn7') 76 | 77 | return relu7 78 | 79 | def _map_to_sequence(self, inputdata, name): 80 | with tf.variable_scope(name_or_scope=name): 81 | shape = inputdata.get_shape().as_list() 82 | assert shape[1] == 1 # H of the feature map must equal to 1 83 | 84 | ret = self.squeeze(inputdata=inputdata, axis=1, name='squeeze') 85 | 86 | return ret 87 | 88 | def _sequence_label(self, inputdata, name): 89 | with tf.variable_scope(name_or_scope=name): 90 | fw_cell_list = [tf.nn.rnn_cell.LSTMCell(nh, forget_bias=1.0) for 91 | nh in [self._hidden_nums] * self._layers_nums] 92 | # Backward direction cells 93 | bw_cell_list = [tf.nn.rnn_cell.LSTMCell(nh, forget_bias=1.0) for 94 | nh in [self._hidden_nums] * self._layers_nums] 95 | 96 | stack_lstm_layer, _, _ = rnn.stack_bidirectional_dynamic_rnn(fw_cell_list, 97 | bw_cell_list, inputdata, #sequence_length=CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TRAIN.BATCH_SIZE), 98 | dtype=tf.float32) 99 | #stack_lstm_layer = self.dropout(inputdata=stack_lstm_layer, keep_prob=0.5,\ 100 | # is_training=self._is_training, name='sequence_drop_out') 101 | 102 | [batch_s, _, hidden_nums] = inputdata.get_shape().as_list() # [batch, width, 2*n_hidden] 103 | 104 | shape = tf.shape(stack_lstm_layer) 105 | rnn_reshaped = tf.reshape(stack_lstm_layer, [shape[0] * shape[1], shape[2]]) 106 | 107 | w = tf.get_variable(name='w',shape=[hidden_nums, self._num_classes],\ 108 | initializer=tf.truncated_normal_initializer(stddev=0.02),trainable=True) 109 | # Doing the affine projection 110 | logits = tf.matmul(rnn_reshaped, w, name='logits') 111 | logits = tf.reshape(logits, [shape[0], shape[1], self._num_classes], name='logits_reshape') 112 | raw_pred = tf.argmax(tf.nn.softmax(logits), axis=2, name='raw_prediction') 113 | # Swap batch and batch axis 114 | rnn_out = tf.transpose(logits, [1, 0, 2], name='transpose_time_major') # [width, batch, n_classes] 115 | 116 | return rnn_out, raw_pred 117 | 118 | def inference(self, inputdata, name, reuse=False): 119 | with tf.variable_scope(name_or_scope=name, reuse=reuse): 120 | # first apply the cnn feature extraction stage 121 | cnn_out = self._feature_sequence_extraction( 122 | inputdata=inputdata, name='feature_extraction_module') 123 | # second apply the map to sequence stage 124 | sequence = self._map_to_sequence( 125 | inputdata=cnn_out, name='map_to_sequence_module') 126 | # third apply the sequence label stage 127 | net_out, raw_pred = self._sequence_label( 128 | inputdata=sequence, name='sequence_rnn_module') 129 | 130 | return net_out 131 | 132 | def compute_loss(self, inputdata, labels, name, reuse): 133 | inference_ret = self.inference( 134 | inputdata=inputdata, name=name, reuse=reuse) 135 | 136 | loss = tf.reduce_mean(tf.nn.ctc_loss(labels=labels, inputs=inference_ret, sequence_length=\ 137 | CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TRAIN.BATCH_SIZE), ignore_longer_outputs_than_inputs=True), name='ctc_loss') 138 | 139 | return inference_ret, loss -------------------------------------------------------------------------------- /recognize_process/tools/test_crnn_jmz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-11-19 23:45 4 | # @Author : Jiang Mingzhi 5 | # @Reference : https://github.com/MaybeShewill-CV/CRNN_Tensorflow 6 | # https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow 7 | # @File : test_crnn_jmz.py 8 | # @IDE : PyCharm Community Edition 9 | """ 10 | 识别图片中的文本。需要的参数有: 11 | 1.图片所在路径。 12 | 2.保存有图片名称的txt文件 13 | 3.加载模型的路径 14 | 15 | 输出结果为: 16 | csv文件。 17 | """ 18 | import sys 19 | sys.path.append('./') 20 | #print(sys.path) 21 | import argparse 22 | import os 23 | import cv2 24 | import numpy as np 25 | import tensorflow as tf 26 | import json 27 | 28 | from recognize_process.config import model_config 29 | from recognize_process.crnn_model import crnn_model 30 | from multiprocessing import Pool 31 | CFG = model_config.cfg 32 | 33 | 34 | def init_args(): 35 | """ 36 | 初始化参数 37 | :return: None 38 | """ 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('-i', '--image_path', type=str, 41 | help='Path to the image to be tested', 42 | default='./recognize_process/test_imgs/') 43 | parser.add_argument('-w', '--weights_path', type=str, 44 | help='Path to the pre-trained weights to use', 45 | default='./recognize_process/model_save/recognize_model') 46 | parser.add_argument('-c', '--char_dict_path', type=str, 47 | help='Directory where character dictionaries for the dataset were stored', 48 | default='./recognize_process/char_map/char_map.json') 49 | parser.add_argument('-t', '--txt_path', type=str, 50 | help='Whether to display images', 51 | default='./recognize_process/anno_test/') 52 | 53 | return parser.parse_args() 54 | 55 | 56 | def _resize_image(img): 57 | """ 58 | 用于将图片resize为固定高度(32) 59 | :param img: 输入图片 60 | :return: resize为固定高度的图片 61 | """ 62 | dst_height = CFG.ARCH.INPUT_SIZE[1] 63 | h_old, w_old, _ = img.shape 64 | height = dst_height 65 | width = int(w_old * height / h_old) 66 | resized_img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC) 67 | 68 | return resized_img 69 | 70 | 71 | def _sparse_matrix_to_list(sparse_matrix, char_map_dict_path=None): 72 | """ 73 | 将矩阵拆分为list,参考:https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow 74 | :param sparse_matrix: 75 | :param char_map_dict_path: 76 | :return: 77 | """ 78 | indices = sparse_matrix.indices 79 | values = sparse_matrix.values 80 | dense_shape = sparse_matrix.dense_shape 81 | 82 | # the last index in sparse_matrix is ctc blanck note 83 | char_map_dict = json.load(open(char_map_dict_path, 'r',encoding='utf-8')) 84 | if char_map_dict is None: 85 | print("error") 86 | assert (isinstance(char_map_dict, dict) and 'char_map_dict is not a dict') 87 | 88 | dense_matrix = len(char_map_dict.keys()) * np.ones(dense_shape, dtype=np.int32) 89 | for i, indice in enumerate(indices): 90 | dense_matrix[indice[0], indice[1]] = values[i] 91 | string_list = [] 92 | for row in dense_matrix: 93 | string = [] 94 | for val in row: 95 | string.append(_int_to_string(val, char_map_dict)) 96 | string_list.append(''.join(s for s in string if s != '*')) 97 | return string_list 98 | 99 | 100 | def _int_to_string(value, char_map_dict=None): 101 | """ 102 | 将识别结果转化为string,参考:https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow 103 | :param value: 104 | :param char_map_dict: 105 | :return: 106 | """ 107 | if char_map_dict is None: 108 | print("error") 109 | #char_map_dict = json.load(open(FLAGS.char_map_json_file, 'r')) 110 | assert (isinstance(char_map_dict, dict) and 'char_map_dict is not a dict') 111 | 112 | for key in char_map_dict.keys(): 113 | if char_map_dict[key] == int(value): 114 | return str(key) 115 | elif len(char_map_dict.keys()) == int(value): 116 | return "" 117 | raise ValueError('char map dict not has {:d} value. convert index to char failed.'.format(value)) 118 | 119 | 120 | def recognize_jmz(image_path, weights_path, char_dict_path, txt_file_path): 121 | """ 122 | 识别函数 123 | :param image_path: 图片所在路径 124 | :param weights_path: 模型保存路径 125 | :param char_dict_path: 字典文件存放位置 126 | :param txt_file_path: 包含图片名的txt文件 127 | :return: None 128 | """ 129 | files = os.listdir(txt_file_path) 130 | txt_files = [txt for txt in files if txt.endswith(".txt") and txt.split(".")[0] + ".json" not in files] 131 | 132 | inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS], # 宽度可变 133 | name='input') 134 | input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length') 135 | 136 | net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, 137 | layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) 138 | 139 | inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) 140 | 141 | decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length, # 序列宽度可变 142 | merge_repeated=False, beam_width=1) 143 | 144 | # config tf saver 145 | saver = tf.train.Saver() 146 | 147 | # config tf session 148 | sess_config = tf.ConfigProto(allow_soft_placement=True) 149 | # sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION 150 | # sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH 151 | 152 | sess_config.gpu_options.allow_growth = True 153 | sess = tf.Session(config=sess_config) 154 | with sess.as_default(): 155 | saver.restore(sess=sess, save_path=weights_path) 156 | for idx, txt_file in enumerate(txt_files): 157 | reg_result = {} 158 | txt_path = os.path.join(txt_file_path, txt_file) 159 | with open(txt_path, 'r') as fd: 160 | image_names = [line.strip() for line in fd.readlines()] 161 | for image_name in image_names: 162 | image_paths = os.path.join(image_path, image_name) 163 | image = cv2.imread(image_paths, cv2.IMREAD_COLOR) 164 | if image is None: 165 | print(image_paths+'is not exist') 166 | continue 167 | image = _resize_image(image) 168 | image = np.array(image, np.float32) / 127.5 - 1.0 169 | seq_len = np.array([image.shape[1] / 4], dtype=np.int32) 170 | preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len}) 171 | 172 | preds = _sparse_matrix_to_list(preds[0], char_dict_path) 173 | reg_result[image_name] = preds[0] 174 | print('Predict image {:s} result: {:s}'.format(image_name, preds[0])) 175 | with open(txt_path[:-4] + ".json", "w") as fw: # 建议改为.split('.') 176 | json.dump(reg_result, fw) 177 | sess.close() 178 | 179 | return 180 | 181 | 182 | if __name__ == '__main__': 183 | # init images 184 | args = init_args() 185 | # detect images 186 | # os.environ["CUDA_VISIBLE_DEVICES"] = "2"#指定在第0块GPU上跑 187 | # pool.apply_async(recognize, (args.image_path, args.weights_path, args.char_dict_path, os.path.join(args.txt_path,txt_file), )) 188 | recognize_jmz(image_path=args.image_path, weights_path=args.weights_path, 189 | char_dict_path=args.char_dict_path, txt_file_path=args.txt_path) 190 | -------------------------------------------------------------------------------- /recognize_process/tools/mytest_crnn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 19-11-19 23:45 4 | # @Author : Miao Wenqiang 5 | # @Reference : https://github.com/MaybeShewill-CV/CRNN_Tensorflow 6 | # https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow 7 | # @File : test_crnn.py 8 | # @IDE : PyCharm Community Edition 9 | """ 10 | 识别图片中的文本。需要的参数有: 11 | 1.图片所在路径。 12 | 2.保存有图片名称的txt文件 13 | 3.加载模型的路径 14 | 15 | 输出结果为: 16 | 识别结果 17 | """ 18 | import argparse 19 | import os 20 | import time 21 | import cv2 22 | import numpy as np 23 | import tensorflow as tf 24 | import json 25 | 26 | import sys 27 | #sys.path.append('./') 28 | #print(sys.path) 29 | 30 | from recognize_process.config import model_config 31 | from recognize_process.crnn_model import crnn_model 32 | 33 | CFG = model_config.cfg 34 | 35 | 36 | def init_args(): 37 | """ 38 | 初始化参数 39 | :return: None 40 | """ 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument('-i', '--image_path', type=str, 43 | help='Path to the image to be tested', 44 | # default='./recognize_process/test_imgs/') 45 | default='./test_imgs/') 46 | parser.add_argument('-w', '--weights_path', type=str, 47 | help='Path to the pre-trained weights to use', 48 | # default='./recognize_process/model_save/recognize_model') 49 | default='./recognize_model') 50 | parser.add_argument('-c', '--char_dict_path', type=str, 51 | help='Directory where character dictionaries for the dataset were stored', 52 | # default='./recognize_process/char_map/char_map.json') 53 | default='./char_map/char_map.json') 54 | parser.add_argument('-t', '--txt_path', type=str, 55 | help='Whether to display images', 56 | # default='./recognize_process/img_list.txt') 57 | default='./img_list.txt') 58 | 59 | return parser.parse_args() 60 | 61 | 62 | def _resize_image(img): 63 | """ 64 | 用于将图片resize为固定高度(32) 65 | :param img: 输入图片 66 | :return: resize为固定高度的图片 67 | """ 68 | dst_height = CFG.ARCH.INPUT_SIZE[1] 69 | h_old, w_old, _ = img.shape 70 | height = dst_height 71 | width = int(w_old * height / h_old) 72 | resized_img = cv2.resize(img, (width, height), interpolation=cv2.INTER_CUBIC) 73 | 74 | return resized_img 75 | 76 | 77 | def _sparse_matrix_to_list(sparse_matrix, char_map_dict_path=None): 78 | """ 79 | 将矩阵拆分为list,参考:https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow 80 | :param sparse_matrix: 81 | :param char_map_dict_path: 82 | :return: 83 | """ 84 | indices = sparse_matrix.indices 85 | values = sparse_matrix.values 86 | dense_shape = sparse_matrix.dense_shape 87 | 88 | # the last index in sparse_matrix is ctc blanck note 89 | char_map_dict = json.load(open(char_map_dict_path, 'r', encoding='UTF-8')) 90 | if char_map_dict is None: 91 | print("error") 92 | assert (isinstance(char_map_dict, dict) and 'char_map_dict is not a dict') 93 | 94 | dense_matrix = len(char_map_dict.keys()) * np.ones(dense_shape, dtype=np.int32) 95 | for i, indice in enumerate(indices): 96 | dense_matrix[indice[0], indice[1]] = values[i] 97 | string_list = [] 98 | for row in dense_matrix: 99 | string = [] 100 | for val in row: 101 | string.append(_int_to_string(val, char_map_dict)) 102 | string_list.append(''.join(s for s in string if s != '*')) 103 | return string_list 104 | 105 | 106 | def _int_to_string(value, char_map_dict=None): 107 | """ 108 | 将识别结果转化为string,参考:https://github.com/bai-shang/crnn_ctc_ocr.Tensorflow 109 | :param value: 110 | :param char_map_dict: 111 | :return: 112 | """ 113 | if char_map_dict is None: 114 | print("error") 115 | #char_map_dict = json.load(open(FLAGS.char_map_json_file, 'r')) 116 | assert (isinstance(char_map_dict, dict) and 'char_map_dict is not a dict') 117 | 118 | for key in char_map_dict.keys(): 119 | if char_map_dict[key] == int(value): 120 | return str(key) 121 | elif len(char_map_dict.keys()) == int(value): 122 | return "" 123 | raise ValueError('char map dict not has {:d} value. convert index to char failed.'.format(value)) 124 | 125 | 126 | def recognize(image_path, weights_path, char_dict_path, txt_path): 127 | """ 128 | 识别函数 129 | :param image_path: 图片所在路径 130 | :param weights_path: 模型保存路径 131 | :param char_dict_path: 字典文件存放位置 132 | :param txt_path: 包含图片名的txt文件 133 | :return: None 134 | """ 135 | with open(txt_path, 'r', encoding='UTF-8') as fd: 136 | # image_names = [line.split(' ')[0] for line in fd.readlines()] # 有标注的情况 137 | image_names = [line.strip() for line in fd.readlines()] # 无标注的情况 138 | #with tf.device('/gpu:0'): 139 | inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS], #宽度可变 140 | name='input') 141 | 142 | input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length') 143 | 144 | net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, 145 | layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) 146 | 147 | inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) 148 | 149 | #decodes = inference_ret 150 | decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length, # 序列宽度可变 151 | merge_repeated=False, beam_width=10) 152 | #preds = _sparse_matrix_to_list(decodes[0], char_dict_path) 153 | # 更改到此结束,把with tf.device注释了20191120 154 | 155 | # config tf saver 156 | saver = tf.train.Saver() 157 | 158 | # config tf session 159 | sess_config = tf.ConfigProto(allow_soft_placement=True) #, log_device_placement=True) 160 | # allow_soft_placement=True 不能在gpu上运行的自动迁移到cpu; log_device_placement=True 打印使用的设备 161 | sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION 162 | sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH 163 | 164 | sess = tf.Session(config=sess_config) 165 | 166 | with sess.as_default(): 167 | saver.restore(sess=sess, save_path=weights_path) 168 | 169 | for image_name in image_names: 170 | # time_start = time.time() 171 | image_paths = os.path.join(image_path, image_name) 172 | # print(image_paths) 173 | image = cv2.imread(image_paths, cv2.IMREAD_COLOR) 174 | if image is None: 175 | print(image_paths+' is not exist') 176 | continue 177 | image = np.array(image, np.float32) / 127.5 - 1.0 178 | seq_len = np.array([image.shape[1] / 4], dtype=np.int32) 179 | # time_end_1 = time.time() 180 | preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len}) 181 | # time_end_2 = time.time() 182 | preds = _sparse_matrix_to_list(preds[0], char_dict_path) 183 | # time_end_3 = time.time() 184 | # print('Predict image {:s} result: {:s} cost time:{:f}'.format(image_name, preds[0], time_end-time_start)) 185 | # print('Predict image {:s} total time:{:f} pre_process time:{:f}, run time:{:f}, convert_time:{:f}'.format(preds[0], time_end_3 - time_start, time_end_1 - time_start, time_end_2 - time_end_1, time_end_3 - time_end_2)) 186 | print('Predict image {:s} result: {:s}'.format(image_name, preds[0])) 187 | 188 | sess.close() 189 | 190 | return 191 | 192 | 193 | if __name__ == '__main__': 194 | # init images 195 | args = init_args() 196 | # detect images 197 | recognize(image_path=args.image_path, weights_path=args.weights_path, 198 | char_dict_path=args.char_dict_path, txt_path=args.txt_path) 199 | --------------------------------------------------------------------------------