├── sc2bench ├── common │ ├── __init__.py │ └── config_util.py ├── __init__.py ├── models │ ├── detection │ │ └── __init__.py │ ├── segmentation │ │ └── __init__.py │ └── __init__.py ├── transforms │ ├── __init__.py │ └── collator.py └── loss.py ├── script ├── task │ ├── coco │ │ └── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── dataset.py │ │ └── eval.py │ └── custom │ │ ├── __init__.py │ │ └── collator.py ├── software │ ├── install_vtm.sh │ └── install_bpg.sh ├── neural_input_compression │ ├── coco2017-object_detection.sh │ ├── ilsvrc2012-image_classification.sh │ ├── pascal_voc2012-semantic_segmentation.sh │ └── README.md ├── codec_input_compression │ ├── coco2017-object_detection.sh │ ├── ilsvrc2012-image_classification.sh │ ├── pascal_voc2012-semantic_segmentation.sh │ └── README.md └── README.md ├── .gitignore ├── MANIFEST.in ├── imgs ├── ilsvrc2012-overview.png └── input_vs_supervised_compression.png ├── legacy ├── README.md ├── script │ ├── software │ │ ├── install_vtm.sh │ │ └── install_bpg.sh │ ├── neural_input_compression │ │ ├── coco2017-object_detection.sh │ │ ├── ilsvrc2012-image_classification.sh │ │ ├── pascal_voc2012-semantic_segmentation.sh │ │ └── README.md │ ├── codec_input_compression │ │ ├── coco2017-object_detection.sh │ │ ├── ilsvrc2012-image_classification.sh │ │ ├── pascal_voc2012-semantic_segmentation.sh │ │ └── README.md │ └── README.md └── configs │ ├── coco2017 │ └── input_compression │ │ ├── jpeg-faster_rcnn_resnet50_fpn.yaml │ │ ├── webp-faster_rcnn_resnet50_fpn.yaml │ │ ├── bpg-faster_rcnn_resnet50_fpn.yaml │ │ ├── factorized_prior-faster_rcnn_resnet50_fpn.yaml │ │ ├── mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml │ │ ├── scale_hyperprior-faster_rcnn_resnet50_fpn.yaml │ │ └── joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml │ ├── ilsvrc2012 │ ├── input_compression │ │ ├── jpeg-tf_efficientnet_l2_ns.yaml │ │ ├── jpeg-tf_efficientnet_l2_ns_475.yaml │ │ ├── jpeg-resnet50.yaml │ │ ├── webp-resnet50.yaml │ │ ├── jpeg-resnet101.yaml │ │ ├── jpeg-resnet152.yaml │ │ ├── webp-resnet101.yaml │ │ ├── webp-resnet152.yaml │ │ ├── bpg-resnet50.yaml │ │ ├── bpg-resnet101.yaml │ │ ├── bpg-resnet152.yaml │ │ ├── vtm-resnet50.yaml │ │ ├── factorized_prior-resnet50.yaml │ │ ├── scale_hyperprior-resnet50.yaml │ │ ├── mean_scale_hyperprior-resnet50.yaml │ │ ├── joint_autoregressive_hierarchical_prior-resnet50.yaml │ │ ├── factorized_prior-tf_efficientnet_l2_ns.yaml │ │ ├── mean_scale_hyperprior-tf_efficientnet_l2_ns.yaml │ │ ├── scale_hyperprior-tf_efficientnet_l2_ns.yaml │ │ ├── scale_hyperprior-tf_efficientnet_l2_ns_475.yaml │ │ ├── factorized_prior-tf_efficientnet_l2_ns_475.yaml │ │ └── mean_scale_hyperprior-tf_efficientnet_l2_ns_475.yaml │ └── feature_compression │ │ ├── jpeg-resnet50.yaml │ │ └── webp-resnet50.yaml │ └── pascal_voc2012 │ └── input_compression │ ├── jpeg-deeplabv3_resnet50.yaml │ ├── webp-deeplabv3_resnet50.yaml │ ├── jpeg-deeplabv3_resnet101.yaml │ ├── webp-deeplabv3_resnet101.yaml │ ├── bpg-deeplabv3_resnet50.yaml │ ├── bpg-deeplabv3_resnet101.yaml │ ├── factorized_prior-deeplabv3_resnet50.yaml │ ├── scale_hyperprior-deeplabv3_resnet50.yaml │ ├── factorized_prior-deeplabv3_resnet101.yaml │ ├── mean_scale_hyperprior-deeplabv3_resnet101.yaml │ ├── mean_scale_hyperprior-deeplabv3_resnet50.yaml │ ├── scale_hyperprior-deeplabv3_resnet101.yaml │ ├── joint_autoregressive_hierarchical_prior-deeplabv3_resnet101.yaml │ └── joint_autoregressive_hierarchical_prior-deeplabv3_resnet50.yaml ├── setup.cfg ├── docs └── source │ ├── subpkgs │ ├── loss.rst │ ├── analysis.rst │ ├── common.rst │ ├── transform.rst │ └── models.rst │ ├── package.rst │ ├── usage.rst │ └── conf.py ├── CITATION.bib ├── Pipfile ├── .github ├── workflows │ ├── documentation.yaml │ └── python-publish.yml └── ISSUE_TEMPLATE │ └── bug-report--not-question-.md ├── setup.py ├── LICENSE └── configs ├── coco2017 └── input_compression │ ├── mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml │ ├── factorized_prior-faster_rcnn_resnet50_fpn.yaml │ ├── scale_hyperprior-faster_rcnn_resnet50_fpn.yaml │ ├── joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml │ ├── jpeg-faster_rcnn_resnet50_fpn.yaml │ ├── webp-faster_rcnn_resnet50_fpn.yaml │ └── bpg-faster_rcnn_resnet50_fpn.yaml ├── ilsvrc2012 ├── input_compression │ ├── jpeg-resnet50.yaml │ ├── webp-resnet50.yaml │ ├── jpeg-resnet101.yaml │ ├── jpeg-resnet152.yaml │ ├── webp-resnet101.yaml │ ├── webp-resnet152.yaml │ ├── bpg-resnet50.yaml │ ├── bpg-resnet101.yaml │ ├── bpg-resnet152.yaml │ ├── jpeg-tf_efficientnet_l2_ns.yaml │ ├── jpeg-tf_efficientnet_l2_ns_475.yaml │ ├── mean_scale_hyperprior-resnet50.yaml │ ├── factorized_prior-resnet50.yaml │ ├── scale_hyperprior-resnet50.yaml │ ├── joint_autoregressive_hierarchical_prior-resnet50.yaml │ └── vtm-resnet50.yaml └── feature_compression │ ├── jpeg-resnet50.yaml │ └── webp-resnet50.yaml └── pascal_voc2012 └── input_compression ├── jpeg-deeplabv3_resnet50.yaml ├── webp-deeplabv3_resnet101.yaml ├── webp-deeplabv3_resnet50.yaml ├── jpeg-deeplabv3_resnet101.yaml ├── bpg-deeplabv3_resnet50.yaml └── bpg-deeplabv3_resnet101.yaml /sc2bench/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /sc2bench/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.1-dev' 2 | -------------------------------------------------------------------------------- /sc2bench/models/detection/__init__.py: -------------------------------------------------------------------------------- 1 | from . import rcnn 2 | -------------------------------------------------------------------------------- /script/task/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from . import dataset, eval 2 | -------------------------------------------------------------------------------- /script/task/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import eval, dataset 2 | -------------------------------------------------------------------------------- /sc2bench/models/segmentation/__init__.py: -------------------------------------------------------------------------------- 1 | from . import deeplabv3 2 | -------------------------------------------------------------------------------- /script/task/custom/__init__.py: -------------------------------------------------------------------------------- 1 | from . import collator, sampler, transform 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | .ipynb_checkpoints/ 3 | __pycache__/ 4 | .editorconfig 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include LICENSE 3 | 4 | recursive-exclude * __pycache__ -------------------------------------------------------------------------------- /imgs/ilsvrc2012-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yoshitomo-matsubara/sc2-benchmark/HEAD/imgs/ilsvrc2012-overview.png -------------------------------------------------------------------------------- /legacy/README.md: -------------------------------------------------------------------------------- 1 | # ***legacy/*** 2 | The configurations and scripts in `legacy/` are designed for sc2bench <= v0.0.4 and torchdistill <= v0.3.3. 3 | -------------------------------------------------------------------------------- /imgs/input_vs_supervised_compression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yoshitomo-matsubara/sc2-benchmark/HEAD/imgs/input_vs_supervised_compression.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license = MIT 3 | license_files = LICENSE 4 | 5 | [pep8] 6 | max-line-length = 120 7 | 8 | [flake8] 9 | max-line-length = 120 10 | exclude = venv 11 | -------------------------------------------------------------------------------- /docs/source/subpkgs/loss.rst: -------------------------------------------------------------------------------- 1 | sc2bench.loss 2 | ===== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | :caption: Contents: 8 | 9 | .. automodule:: sc2bench.loss 10 | :members: 11 | -------------------------------------------------------------------------------- /docs/source/subpkgs/analysis.rst: -------------------------------------------------------------------------------- 1 | sc2bench.analysis 2 | ===== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | :caption: Contents: 8 | 9 | .. automodule:: sc2bench.analysis 10 | :members: 11 | -------------------------------------------------------------------------------- /script/task/utils/dataset.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def get_num_iterations(dataset, batch_size, world_size): 5 | num_iterations = math.ceil(len(dataset) / batch_size / world_size) 6 | return num_iterations 7 | -------------------------------------------------------------------------------- /docs/source/package.rst: -------------------------------------------------------------------------------- 1 | sc2bench API 2 | ===== 3 | 4 | 5 | .. toctree:: 6 | subpkgs/models 7 | subpkgs/transform 8 | subpkgs/common 9 | subpkgs/loss 10 | subpkgs/analysis 11 | :maxdepth: 2 12 | :caption: Overview -------------------------------------------------------------------------------- /script/software/install_vtm.sh: -------------------------------------------------------------------------------- 1 | mkdir ~/software 2 | cd ~/software 3 | git clone https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM.git 4 | cd VVCSoftware_VTM 5 | mkdir build 6 | cd build 7 | cmake .. -DCMAKE_BUILD_TYPE=Release 8 | make -j 9 | -------------------------------------------------------------------------------- /legacy/script/software/install_vtm.sh: -------------------------------------------------------------------------------- 1 | mkdir ~/software 2 | cd ~/software 3 | git clone https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM.git 4 | cd VVCSoftware_VTM 5 | mkdir build 6 | cd build 7 | cmake .. -DCMAKE_BUILD_TYPE=Release 8 | make -j 9 | -------------------------------------------------------------------------------- /docs/source/subpkgs/common.rst: -------------------------------------------------------------------------------- 1 | sc2bench.common 2 | ===== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | :caption: Contents: 8 | 9 | ---- 10 | 11 | sc2bench.common.config_util 12 | ------------ 13 | 14 | .. automodule:: sc2bench.common.config_util 15 | :members: 16 | -------------------------------------------------------------------------------- /sc2bench/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from . import collator 2 | from .codec import CODEC_TRANSFORM_MODULE_DICT 3 | from .misc import MISC_TRANSFORM_MODULE_DICT 4 | 5 | TRANSFORM_MODULE_DICT = dict() 6 | TRANSFORM_MODULE_DICT.update(CODEC_TRANSFORM_MODULE_DICT) 7 | TRANSFORM_MODULE_DICT.update(MISC_TRANSFORM_MODULE_DICT) 8 | -------------------------------------------------------------------------------- /CITATION.bib: -------------------------------------------------------------------------------- 1 | @article{matsubara2023sc2, 2 | title={{SC2 Benchmark: Supervised Compression for Split Computing}}, 3 | author={Matsubara, Yoshitomo and Yang, Ruihan and Levorato, Marco and Mandt, Stephan}, 4 | journal={Transactions on Machine Learning Research}, 5 | issn={2835-8856}, 6 | year={2023}, 7 | url={https://openreview.net/forum?id=p28wv4G65d} 8 | } -------------------------------------------------------------------------------- /script/software/install_bpg.sh: -------------------------------------------------------------------------------- 1 | mkdir ~/software 2 | cd ~/software 3 | wget https://bellard.org/bpg/libbpg-0.9.8.tar.gz 4 | tar -xvf libbpg-0.9.8.tar.gz 5 | cd libbpg-0.9.8/ 6 | sudo apt-get -y install libpng-dev 7 | sudo apt-get -y install libjpeg-dev 8 | sudo apt-get -y install libsdl-dev 9 | sudo apt-get -y install libsdl-image1.2-dev 10 | sudo apt-get remove libnuma-dev 11 | sudo make 12 | sudo apt-get install libnuma-dev 13 | -------------------------------------------------------------------------------- /legacy/script/software/install_bpg.sh: -------------------------------------------------------------------------------- 1 | mkdir ~/software 2 | cd ~/software 3 | wget https://bellard.org/bpg/libbpg-0.9.8.tar.gz 4 | tar -xvf libbpg-0.9.8.tar.gz 5 | cd libbpg-0.9.8/ 6 | sudo apt-get -y install libpng-dev 7 | sudo apt-get -y install libjpeg-dev 8 | sudo apt-get -y install libsdl-dev 9 | sudo apt-get -y install libsdl-image1.2-dev 10 | sudo apt-get remove libnuma-dev 11 | sudo make 12 | sudo apt-get install libnuma-dev 13 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | torch = ">=2.0.0" 10 | torchvision = ">=0.15.1" 11 | numpy = "*" 12 | scipy = "*" 13 | cython = "*" 14 | pycocotools = ">=2.0.2" 15 | matplotlib = "*" 16 | seaborn = "*" 17 | torchdistill = ">=1.0.0" 18 | compressai = ">=1.2.3" 19 | timm = "*" 20 | sc2bench = {editable = true, path = "."} 21 | 22 | [requires] 23 | python_version = "3.12" 24 | -------------------------------------------------------------------------------- /docs/source/subpkgs/transform.rst: -------------------------------------------------------------------------------- 1 | sc2bench.transform 2 | ===== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | :caption: Contents: 8 | 9 | ---- 10 | 11 | sc2bench.transform.codec 12 | ------------ 13 | 14 | .. automodule:: sc2bench.transforms.codec 15 | :members: 16 | 17 | ---- 18 | 19 | sc2bench.transform.collator 20 | ------------ 21 | 22 | .. automodule:: sc2bench.transforms.collator 23 | :members: 24 | 25 | ---- 26 | 27 | sc2bench.transform.misc 28 | ------------ 29 | 30 | .. automodule:: sc2bench.transforms.misc 31 | :members: 32 | -------------------------------------------------------------------------------- /script/neural_input_compression/coco2017-object_detection.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | MAX_QUALITY=${2} 3 | 4 | if [ $# -ne 2 ]; then 5 | echo "Illegal number of arguments" 6 | exit 2 7 | fi 8 | 9 | for quality in $(seq 1 1 ${MAX_QUALITY}); 10 | do 11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": ' 12 | json_str+=${quality} 13 | json_str+='}}}}}' 14 | python script/task/object_detection.py \ 15 | --config configs/coco2017/input_compression/${BASE_NAME}.yaml \ 16 | --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \ 17 | --json "${json_str}" -student_only -test_only -no_dp_eval 18 | done 19 | -------------------------------------------------------------------------------- /script/neural_input_compression/ilsvrc2012-image_classification.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | MAX_QUALITY=${2} 3 | 4 | if [ $# -ne 2 ]; then 5 | echo "Illegal number of arguments" 6 | exit 2 7 | fi 8 | 9 | for quality in $(seq 1 1 ${MAX_QUALITY}); 10 | do 11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": ' 12 | json_str+=${quality} 13 | json_str+='}}}}}' 14 | python script/task/image_classification.py \ 15 | --config configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \ 16 | --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \ 17 | --json "${json_str}" -student_only -test_only -no_dp_eval 18 | done 19 | -------------------------------------------------------------------------------- /legacy/script/neural_input_compression/coco2017-object_detection.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | MAX_QUALITY=${2} 3 | 4 | if [ $# -ne 2 ]; then 5 | echo "Illegal number of arguments" 6 | exit 2 7 | fi 8 | 9 | for quality in $(seq 1 1 ${MAX_QUALITY}); 10 | do 11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": ' 12 | json_str+=${quality} 13 | json_str+='}}}}}' 14 | python legacy/script/task/object_detection.py \ 15 | --config legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml \ 16 | --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \ 17 | --json "${json_str}" -student_only -test_only -no_dp_eval 18 | done 19 | -------------------------------------------------------------------------------- /script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | MAX_QUALITY=${2} 3 | 4 | if [ $# -ne 2 ]; then 5 | echo "Illegal number of arguments" 6 | exit 2 7 | fi 8 | 9 | for quality in $(seq 1 1 ${MAX_QUALITY}); 10 | do 11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": ' 12 | json_str+=${quality} 13 | json_str+='}}}}}' 14 | python script/task/semantic_segmentation.py \ 15 | --config configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \ 16 | --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \ 17 | --json "${json_str}" -student_only -test_only -no_dp_eval 18 | done 19 | -------------------------------------------------------------------------------- /legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | MAX_QUALITY=${2} 3 | 4 | if [ $# -ne 2 ]; then 5 | echo "Illegal number of arguments" 6 | exit 2 7 | fi 8 | 9 | for quality in $(seq 1 1 ${MAX_QUALITY}); 10 | do 11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": ' 12 | json_str+=${quality} 13 | json_str+='}}}}}' 14 | python legacy/script/task/image_classification.py \ 15 | --config legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \ 16 | --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \ 17 | --json "${json_str}" -student_only -test_only -no_dp_eval 18 | done 19 | -------------------------------------------------------------------------------- /legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | MAX_QUALITY=${2} 3 | 4 | if [ $# -ne 2 ]; then 5 | echo "Illegal number of arguments" 6 | exit 2 7 | fi 8 | 9 | for quality in $(seq 1 1 ${MAX_QUALITY}); 10 | do 11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": ' 12 | json_str+=${quality} 13 | json_str+='}}}}}' 14 | python legacy/script/task/semantic_segmentation.py \ 15 | --config legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \ 16 | --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \ 17 | --json "${json_str}" -student_only -test_only -no_dp_eval 18 | done 19 | -------------------------------------------------------------------------------- /sc2bench/common/config_util.py: -------------------------------------------------------------------------------- 1 | def overwrite_config(org_config, sub_config): 2 | """ 3 | Overwrites a configuration. 4 | 5 | :param org_config: (nested) dictionary of configuration to be updated. 6 | :type org_config: dict 7 | :param sub_config: (nested) dictionary to be added to org_config. 8 | :type sub_config: dict 9 | """ 10 | for sub_key, sub_value in sub_config.items(): 11 | if sub_key in org_config: 12 | if isinstance(sub_value, dict): 13 | overwrite_config(org_config[sub_key], sub_value) 14 | else: 15 | org_config[sub_key] = sub_value 16 | else: 17 | org_config[sub_key] = sub_value 18 | -------------------------------------------------------------------------------- /sc2bench/models/__init__.py: -------------------------------------------------------------------------------- 1 | from . import registry, detection, segmentation 2 | from .backbone import BACKBONE_CLASS_DICT, BACKBONE_FUNC_DICT 3 | from .detection.registry import DETECTION_MODEL_CLASS_DICT, DETECTION_MODEL_FUNC_DICT 4 | from .segmentation.registry import SEGMENTATION_MODEL_CLASS_DICT, SEGMENTATION_MODEL_FUNC_DICT 5 | from .wrapper import WRAPPER_CLASS_DICT 6 | 7 | MODEL_DICT = dict() 8 | MODEL_DICT.update(BACKBONE_CLASS_DICT) 9 | MODEL_DICT.update(BACKBONE_FUNC_DICT) 10 | MODEL_DICT.update(DETECTION_MODEL_CLASS_DICT) 11 | MODEL_DICT.update(DETECTION_MODEL_FUNC_DICT) 12 | MODEL_DICT.update(SEGMENTATION_MODEL_CLASS_DICT) 13 | MODEL_DICT.update(SEGMENTATION_MODEL_FUNC_DICT) 14 | MODEL_DICT.update(WRAPPER_CLASS_DICT) 15 | -------------------------------------------------------------------------------- /script/codec_input_compression/coco2017-object_detection.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | FORMAT_NAME=${2} 3 | 4 | if [ $# -eq 5 ] 5 | then 6 | MIN_QUALITY=${3} 7 | STEP_SIZE=${4} 8 | MAX_QUALITY=${5} 9 | else 10 | MIN_QUALITY=10 11 | STEP_SIZE=10 12 | MAX_QUALITY=100 13 | fi 14 | 15 | 16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY}); 17 | do 18 | sed -i "s/quality:.*/quality: ${quality}/g" configs/coco2017/input_compression/${BASE_NAME}.yaml 19 | python script/task/object_detection.py \ 20 | --config configs/coco2017/input_compression/${BASE_NAME}.yaml \ 21 | --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval 22 | done 23 | 24 | sed -i "s/quality:.*/quality:/g" configs/coco2017/input_compression/${BASE_NAME}.yaml 25 | -------------------------------------------------------------------------------- /script/codec_input_compression/ilsvrc2012-image_classification.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | FORMAT_NAME=${2} 3 | 4 | if [ $# -eq 5 ] 5 | then 6 | MIN_QUALITY=${3} 7 | STEP_SIZE=${4} 8 | MAX_QUALITY=${5} 9 | else 10 | MIN_QUALITY=10 11 | STEP_SIZE=10 12 | MAX_QUALITY=100 13 | fi 14 | 15 | 16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY}); 17 | do 18 | sed -i "s/quality:.*/quality: ${quality}/g" configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml 19 | python script/task/image_classification.py \ 20 | --config configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \ 21 | --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval 22 | done 23 | 24 | sed -i "s/quality:.*/quality:/g" configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml 25 | -------------------------------------------------------------------------------- /legacy/script/codec_input_compression/coco2017-object_detection.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | FORMAT_NAME=${2} 3 | 4 | if [ $# -eq 5 ] 5 | then 6 | MIN_QUALITY=${3} 7 | STEP_SIZE=${4} 8 | MAX_QUALITY=${5} 9 | else 10 | MIN_QUALITY=10 11 | STEP_SIZE=10 12 | MAX_QUALITY=100 13 | fi 14 | 15 | 16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY}); 17 | do 18 | sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml 19 | python legacy/script/task/object_detection.py \ 20 | --config legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml \ 21 | --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval 22 | done 23 | 24 | sed -i "s/quality:.*/quality:/g" legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml 25 | -------------------------------------------------------------------------------- /script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | FORMAT_NAME=${2} 3 | 4 | if [ $# -eq 5 ] 5 | then 6 | MIN_QUALITY=${3} 7 | STEP_SIZE=${4} 8 | MAX_QUALITY=${5} 9 | else 10 | MIN_QUALITY=10 11 | STEP_SIZE=10 12 | MAX_QUALITY=100 13 | fi 14 | 15 | 16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY}); 17 | do 18 | sed -i "s/quality:.*/quality: ${quality}/g" configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml 19 | python script/task/semantic_segmentation.py \ 20 | --config configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \ 21 | --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval 22 | done 23 | 24 | sed -i "s/quality:.*/quality:/g" configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml 25 | -------------------------------------------------------------------------------- /legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | FORMAT_NAME=${2} 3 | 4 | if [ $# -eq 5 ] 5 | then 6 | MIN_QUALITY=${3} 7 | STEP_SIZE=${4} 8 | MAX_QUALITY=${5} 9 | else 10 | MIN_QUALITY=10 11 | STEP_SIZE=10 12 | MAX_QUALITY=100 13 | fi 14 | 15 | 16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY}); 17 | do 18 | sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml 19 | python legacy/script/task/image_classification.py \ 20 | --config legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \ 21 | --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval 22 | done 23 | 24 | sed -i "s/quality:.*/quality:/g" legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml 25 | -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | :caption: Overview 8 | 9 | Installation 10 | ------------ 11 | 12 | To use `sc2bench `_, first install it using pip: 13 | 14 | .. code-block:: console 15 | 16 | $ pip install sc2bench 17 | 18 | 19 | Examples 20 | ------------ 21 | 22 | `The official repository (https://github.com/yoshitomo-matsubara/sc2-benchmark) `_ 23 | offers many example scripts, configs, and checkpoints of trained models in `sc2bench`. 24 | 25 | Currently, `example scripts `_ 26 | cover the following three tasks: 27 | 28 | - Image classification (ILSVRC 2012) 29 | - Object detection (COCO 2017) 30 | - Semantic segmentation (PASCAL VOC 2012) 31 | -------------------------------------------------------------------------------- /legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh: -------------------------------------------------------------------------------- 1 | BASE_NAME=${1} 2 | FORMAT_NAME=${2} 3 | 4 | if [ $# -eq 5 ] 5 | then 6 | MIN_QUALITY=${3} 7 | STEP_SIZE=${4} 8 | MAX_QUALITY=${5} 9 | else 10 | MIN_QUALITY=10 11 | STEP_SIZE=10 12 | MAX_QUALITY=100 13 | fi 14 | 15 | 16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY}); 17 | do 18 | sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml 19 | python legacy/script/task/semantic_segmentation.py \ 20 | --config legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \ 21 | --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval 22 | done 23 | 24 | sed -i "s/quality:.*/quality:/g" legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml 25 | -------------------------------------------------------------------------------- /.github/workflows/documentation.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Sphinx Documentation 2 | on: [push, pull_request, workflow_dispatch] 3 | permissions: 4 | contents: write 5 | jobs: 6 | docs: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - name: Set up Python 11 | uses: actions/setup-python@v5 12 | with: 13 | python-version: '3.10' 14 | - name: Install dependencies 15 | run: | 16 | pip install sphinx sphinx_rtd_theme sphinxcontrib-youtube 17 | - name: Install sc2bench 18 | run: | 19 | pip install -e . 20 | - name: Sphinx build 21 | run: | 22 | sphinx-build -b html docs/source/ docs/_build/ 23 | - name: Deploy 24 | uses: peaceiris/actions-gh-pages@v3 25 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }} 26 | with: 27 | publish_branch: gh-pages 28 | github_token: ${{ secrets.GITHUB_TOKEN }} 29 | publish_dir: docs/_build/ 30 | force_orphan: true 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report--not-question-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report (Not question) 3 | about: Create a report to help us improve (Use Discussions to ask questions) 4 | title: "[BUG] Please use Discussions instead of Issues to ask questions" 5 | labels: bug 6 | assignees: yoshitomo-matsubara 7 | 8 | --- 9 | 10 | Please use [Discussions](https://github.com/yoshitomo-matsubara/sc2-benchmark/discussions) to ask questions. 11 | 12 | **Describe the bug** 13 | A clear and concise description of what the bug is. 14 | 15 | **To Reproduce** 16 | Provide 17 | 1. Exact command to run your code 18 | 2. Whether or not you made any changes in Python code (if so, how you made the changes?) 19 | 3. YAML config file 20 | 4. Log file 21 | 22 | **Expected behavior** 23 | A clear and concise description of what you expected to happen. 24 | 25 | 26 | **Environment (please complete the following information):** 27 | - OS: [e.g. Ubuntu 20.04 LTS] 28 | - Python ver. [e.g. 3.8] 29 | - sc2bench and torchdistill vers. [e.g. v0.0.2 and v0.3.2] 30 | 31 | 32 | **Additional context** 33 | Add any other context about the problem here. 34 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | import sc2bench 4 | 5 | with open('README.md', 'r') as f: 6 | long_description = f.read() 7 | 8 | description = 'SC2 Benchmark: Supervised Compression for Split Computing.' 9 | setup( 10 | name='sc2bench', 11 | version=sc2bench.__version__, 12 | author='Yoshitomo Matsubara', 13 | description=description, 14 | long_description=long_description, 15 | long_description_content_type='text/markdown', 16 | url='https://github.com/yoshitomo-matsubara/sc2-benchmark', 17 | packages=find_packages(exclude=('configs', 'resources', 'script', 'tests')), 18 | python_requires='>=3.9', 19 | install_requires=[ 20 | 'torch>=2.0.0', 21 | 'torchvision>=0.15.1', 22 | 'numpy', 23 | 'pyyaml>=6.0.0', 24 | 'scipy', 25 | 'cython', 26 | 'pycocotools>=2.0.2', 27 | 'torchdistill>=1.0.0', 28 | 'compressai>=1.2.3', 29 | 'timm>=1.0.3' 30 | ], 31 | extras_require={ 32 | 'test': ['pytest'], 33 | 'docs': ['sphinx', 'sphinx_rtd_theme', 'sphinxcontrib-youtube'] 34 | } 35 | ) 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Yoshitomo Matsubara 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v4 25 | - name: Set up Python 26 | uses: actions/setup-python@v5 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.PYPI_API_TOKEN }} 40 | -------------------------------------------------------------------------------- /script/task/custom/collator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torchdistill.datasets.registry import register_collate_func 4 | 5 | 6 | @register_collate_func 7 | def coco_collate_fn(batch): 8 | return tuple(zip(*batch)) 9 | 10 | 11 | def _cat_list(images, fill_value=0): 12 | if len(images) == 1 and not isinstance(images[0], torch.Tensor): 13 | return images 14 | 15 | max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) 16 | batch_shape = (len(images),) + max_size 17 | batched_imgs = images[0].new(*batch_shape).fill_(fill_value) 18 | for img, pad_img in zip(images, batched_imgs): 19 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) 20 | return batched_imgs 21 | 22 | 23 | @register_collate_func 24 | def coco_seg_collate_fn(batch): 25 | images, targets, supp_dicts = list(zip(*batch)) 26 | batched_imgs = _cat_list(images, fill_value=0) 27 | batched_targets = _cat_list(targets, fill_value=255) 28 | return batched_imgs, batched_targets, supp_dicts 29 | 30 | 31 | @register_collate_func 32 | def coco_seg_eval_collate_fn(batch): 33 | images, targets = list(zip(*batch)) 34 | batched_imgs = _cat_list(images, fill_value=0) 35 | batched_targets = _cat_list(targets, fill_value=255) 36 | return batched_imgs, batched_targets 37 | -------------------------------------------------------------------------------- /legacy/configs/coco2017/input_compression/jpeg-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | coco2017: 3 | name: &dataset_name 'coco2017' 4 | type: 'cocodetect' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &coco_val !join [*dataset_name, '/val'] 9 | images: !join [*root_dir, '/val2017'] 10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json'] 11 | annotated_only: False 12 | 13 | models: 14 | model: 15 | name: 'InputCompressionDetectionModel' 16 | params: 17 | codec_params: 18 | - type: 'PILImageModule' 19 | params: 20 | format: 'JPEG' 21 | quality: 90 22 | returns_file_size: True 23 | analysis_config: 24 | analyzer_configs: 25 | - type: 'FileSizeAccumulator' 26 | params: 27 | unit: 'KB' 28 | analyzes_after_compress: True 29 | adaptive_pad_config: 30 | pre_transform_params: 31 | post_transform_params: 32 | detection_model: 33 | name: 'fasterrcnn_resnet50_fpn' 34 | params: 35 | pretrained: True 36 | progress: True 37 | pretrained_backbone: True 38 | ckpt: '' 39 | 40 | test: 41 | test_data_loader: 42 | dataset_id: *coco_val 43 | random_sample: False 44 | batch_size: 1 45 | num_workers: 4 46 | collate_fn: 'coco_collate_fn' 47 | -------------------------------------------------------------------------------- /legacy/configs/coco2017/input_compression/webp-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | coco2017: 3 | name: &dataset_name 'coco2017' 4 | type: 'cocodetect' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &coco_val !join [*dataset_name, '/val'] 9 | images: !join [*root_dir, '/val2017'] 10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json'] 11 | annotated_only: False 12 | 13 | models: 14 | model: 15 | name: 'InputCompressionDetectionModel' 16 | params: 17 | codec_params: 18 | - type: 'PILImageModule' 19 | params: 20 | format: 'WEBP' 21 | quality: 90 22 | returns_file_size: True 23 | analysis_config: 24 | analyzer_configs: 25 | - type: 'FileSizeAccumulator' 26 | params: 27 | unit: 'KB' 28 | analyzes_after_compress: True 29 | adaptive_pad_config: 30 | pre_transform_params: 31 | post_transform_params: 32 | detection_model: 33 | name: 'fasterrcnn_resnet50_fpn' 34 | params: 35 | pretrained: True 36 | progress: True 37 | pretrained_backbone: True 38 | ckpt: '' 39 | 40 | test: 41 | test_data_loader: 42 | dataset_id: *coco_val 43 | random_sample: False 44 | batch_size: 1 45 | num_workers: 4 46 | collate_fn: 'coco_collate_fn' 47 | -------------------------------------------------------------------------------- /sc2bench/loss.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torchdistill.losses.mid_level import register_mid_level_loss 3 | 4 | 5 | @register_mid_level_loss 6 | class BppLoss(nn.Module): 7 | """ 8 | Bit-per-pixel (or rate) loss. 9 | 10 | :param entropy_module_path: entropy module path to extract its output from io_dict 11 | :type entropy_module_path: str 12 | :param reduction: reduction type ('sum', 'batchmean', or 'mean') 13 | :type reduction: str or None 14 | """ 15 | def __init__(self, entropy_module_path, reduction='mean'): 16 | super().__init__() 17 | self.entropy_module_path = entropy_module_path 18 | self.reduction = reduction 19 | 20 | def forward(self, student_io_dict, *args, **kwargs): 21 | """ 22 | Computes a rate loss. 23 | 24 | :param student_io_dict: io_dict of model to be trained 25 | :type student_io_dict: dict 26 | """ 27 | entropy_module_dict = student_io_dict[self.entropy_module_path] 28 | intermediate_features, likelihoods = entropy_module_dict['output'] 29 | n, _, h, w = intermediate_features.shape 30 | num_pixels = n * h * w 31 | if self.reduction == 'sum': 32 | bpp = -likelihoods.log2().sum() 33 | elif self.reduction == 'batchmean': 34 | bpp = -likelihoods.log2().sum() / n 35 | else: 36 | bpp = -likelihoods.log2().sum() / num_pixels 37 | return bpp 38 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 833 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: [800, 800] 19 | - type: 'PILImageModule' 20 | params: 21 | format: 'JPEG' 22 | quality: 90 23 | - type: 'ToTensor' 24 | params: 25 | - type: 'Normalize' 26 | params: 27 | mean: [0.485, 0.456, 0.406] 28 | std: [0.229, 0.224, 0.225] 29 | 30 | models: 31 | model: 32 | name: &model_name 'tf_efficientnet_l2_ns' 33 | repo_or_dir: 'rwightman/pytorch-image-models' 34 | params: 35 | num_classes: 1000 36 | pretrained: True 37 | experiment: &experiment !join [*dataset_name, '-', *model_name] 38 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 39 | 40 | test: 41 | test_data_loader: 42 | dataset_id: *imagenet_val 43 | random_sample: False 44 | batch_size: 1 45 | num_workers: 16 46 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns_475.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 507 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: [475, 475] 19 | - type: 'PILImageModule' 20 | params: 21 | format: 'JPEG' 22 | quality: 90 23 | - type: 'ToTensor' 24 | params: 25 | - type: 'Normalize' 26 | params: 27 | mean: [0.485, 0.456, 0.406] 28 | std: [0.229, 0.224, 0.225] 29 | 30 | models: 31 | model: 32 | name: &model_name 'tf_efficientnet_l2_ns_475' 33 | repo_or_dir: 'rwightman/pytorch-image-models' 34 | params: 35 | num_classes: 1000 36 | pretrained: True 37 | experiment: &experiment !join [*dataset_name, '-', *model_name] 38 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 39 | 40 | test: 41 | test_data_loader: 42 | dataset_id: *imagenet_val 43 | random_sample: False 44 | batch_size: 1 45 | num_workers: 16 46 | -------------------------------------------------------------------------------- /legacy/configs/coco2017/input_compression/bpg-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | coco2017: 3 | name: &dataset_name 'coco2017' 4 | type: 'cocodetect' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &coco_val !join [*dataset_name, '/val'] 9 | images: !join [*root_dir, '/val2017'] 10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json'] 11 | annotated_only: False 12 | 13 | models: 14 | model: 15 | name: 'InputCompressionDetectionModel' 16 | params: 17 | codec_params: 18 | - type: 'BPGModule' 19 | params: 20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 22 | quality: 50 23 | returns_file_size: True 24 | analysis_config: 25 | analyzer_configs: 26 | - type: 'FileSizeAccumulator' 27 | params: 28 | unit: 'KB' 29 | analyzes_after_compress: True 30 | adaptive_pad_config: 31 | pre_transform_params: 32 | post_transform_params: 33 | detection_model: 34 | name: 'fasterrcnn_resnet50_fpn' 35 | params: 36 | pretrained: True 37 | progress: True 38 | pretrained_backbone: True 39 | ckpt: '' 40 | 41 | test: 42 | test_data_loader: 43 | dataset_id: *coco_val 44 | random_sample: False 45 | batch_size: 1 46 | num_workers: 4 47 | collate_fn: 'coco_collate_fn' 48 | -------------------------------------------------------------------------------- /script/task/utils/eval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SegEvaluator(object): 5 | def __init__(self, num_classes): 6 | self.num_classes = num_classes 7 | self.mat = None 8 | 9 | def update(self, a, b): 10 | n = self.num_classes 11 | if self.mat is None: 12 | self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) 13 | with torch.no_grad(): 14 | k = (a >= 0) & (a < n) 15 | inds = n * a[k].to(torch.int64) + b[k] 16 | self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) 17 | 18 | def reset(self): 19 | self.mat.zero_() 20 | 21 | def compute(self): 22 | h = self.mat.float() 23 | acc_global = torch.diag(h).sum() / h.sum() * 100.0 24 | acc = torch.diag(h) / h.sum(1) * 100.0 25 | iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) * 100.0 26 | return acc_global, acc, iu 27 | 28 | def reduce_from_all_processes(self): 29 | if not torch.distributed.is_available(): 30 | return 31 | if not torch.distributed.is_initialized(): 32 | return 33 | torch.distributed.barrier() 34 | torch.distributed.all_reduce(self.mat) 35 | 36 | def __str__(self): 37 | acc_global, acc, iu = self.compute() 38 | return 'mean IoU: {:.1f}, IoU: {}, Global pixelwise acc: {:.1f}, Average row correct: {}'.format( 39 | iu.mean().item(), ['{:.1f}'.format(i) for i in iu.tolist()], 40 | acc_global.item(), ['{:.1f}'.format(i) for i in acc.tolist()] 41 | ) 42 | -------------------------------------------------------------------------------- /configs/coco2017/input_compression/mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: 'custom' 3 | 4 | datasets: 5 | &coco_val coco2017/val: !import_call 6 | _name: &dataset_name 'coco2017' 7 | _root: &root_dir !join ['~/datasets/', *dataset_name] 8 | key: 'coco.dataset.coco_dataset' 9 | init: 10 | kwargs: 11 | img_dir_path: !join [*root_dir, '/val2017'] 12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json'] 13 | annotated_only: False 14 | is_segment: False 15 | 16 | models: 17 | model: 18 | key: 'InputCompressionDetectionModel' 19 | kwargs: 20 | analysis_config: 21 | analyzer_configs: 22 | - key: 'FileSizeAnalyzer' 23 | kwargs: 24 | unit: 'KB' 25 | analyzes_after_compress: True 26 | adaptive_pad_kwargs: 27 | fill: 0 28 | padding_mode: 'constant' 29 | factor: 128 30 | pre_transform: 31 | post_transform: 32 | compression_model: 33 | key: 'mbt2018_mean' 34 | kwargs: 35 | pretrained: True 36 | quality: 8 37 | metric: 'mse' 38 | detection_model: 39 | key: 'fasterrcnn_resnet50_fpn' 40 | kwargs: 41 | pretrained: True 42 | progress: True 43 | 44 | test: 45 | test_data_loader: 46 | dataset_id: *coco_val 47 | sampler: 48 | class_or_func: !import_get 49 | key: 'torch.utils.data.SequentialSampler' 50 | kwargs: 51 | collate_fn: 'coco_collate_fn' 52 | kwargs: 53 | batch_size: 1 54 | num_workers: 4 55 | -------------------------------------------------------------------------------- /configs/coco2017/input_compression/factorized_prior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: 'custom' 3 | 4 | datasets: 5 | &coco_val coco2017/val: !import_call 6 | _name: &dataset_name 'coco2017' 7 | _root: &root_dir !join ['~/datasets/', *dataset_name] 8 | key: 'coco.dataset.coco_dataset' 9 | init: 10 | kwargs: 11 | img_dir_path: !join [*root_dir, '/val2017'] 12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json'] 13 | annotated_only: False 14 | is_segment: False 15 | 16 | models: 17 | model: 18 | key: 'InputCompressionDetectionModel' 19 | kwargs: 20 | analysis_config: 21 | analyzer_configs: 22 | - key: 'FileSizeAnalyzer' 23 | kwargs: 24 | unit: 'KB' 25 | analyzes_after_compress: True 26 | adaptive_pad_kwargs: 27 | fill: 0 28 | padding_mode: 'constant' 29 | factor: 128 30 | pre_transform: 31 | post_transform: 32 | compression_model: 33 | key: 'bmshj2018_factorized' 34 | kwargs: 35 | pretrained: True 36 | quality: 8 37 | metric: 'mse' 38 | detection_model: 39 | key: 'fasterrcnn_resnet50_fpn' 40 | kwargs: 41 | pretrained: True 42 | progress: True 43 | 44 | test: 45 | test_data_loader: 46 | dataset_id: *coco_val 47 | sampler: 48 | class_or_func: !import_get 49 | key: 'torch.utils.data.SequentialSampler' 50 | kwargs: 51 | collate_fn: 'coco_collate_fn' 52 | kwargs: 53 | batch_size: 1 54 | num_workers: 4 55 | -------------------------------------------------------------------------------- /configs/coco2017/input_compression/scale_hyperprior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: 'custom' 3 | 4 | datasets: 5 | &coco_val coco2017/val: !import_call 6 | _name: &dataset_name 'coco2017' 7 | _root: &root_dir !join ['~/datasets/', *dataset_name] 8 | key: 'coco.dataset.coco_dataset' 9 | init: 10 | kwargs: 11 | img_dir_path: !join [*root_dir, '/val2017'] 12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json'] 13 | annotated_only: False 14 | is_segment: False 15 | 16 | models: 17 | model: 18 | key: 'InputCompressionDetectionModel' 19 | kwargs: 20 | analysis_config: 21 | analyzer_configs: 22 | - key: 'FileSizeAnalyzer' 23 | kwargs: 24 | unit: 'KB' 25 | analyzes_after_compress: True 26 | adaptive_pad_kwargs: 27 | fill: 0 28 | padding_mode: 'constant' 29 | factor: 128 30 | pre_transform: 31 | post_transform: 32 | compression_model: 33 | key: 'bmshj2018_hyperprior' 34 | kwargs: 35 | pretrained: True 36 | quality: 8 37 | metric: 'mse' 38 | detection_model: 39 | key: 'fasterrcnn_resnet50_fpn' 40 | kwargs: 41 | pretrained: True 42 | progress: True 43 | 44 | test: 45 | test_data_loader: 46 | dataset_id: *coco_val 47 | sampler: 48 | class_or_func: !import_get 49 | key: 'torch.utils.data.SequentialSampler' 50 | kwargs: 51 | collate_fn: 'coco_collate_fn' 52 | kwargs: 53 | batch_size: 1 54 | num_workers: 4 55 | -------------------------------------------------------------------------------- /configs/coco2017/input_compression/joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: 'custom' 3 | 4 | datasets: 5 | &coco_val coco2017/val: !import_call 6 | _name: &dataset_name 'coco2017' 7 | _root: &root_dir !join ['~/datasets/', *dataset_name] 8 | key: 'coco.dataset.coco_dataset' 9 | init: 10 | kwargs: 11 | img_dir_path: !join [*root_dir, '/val2017'] 12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json'] 13 | annotated_only: False 14 | is_segment: False 15 | 16 | models: 17 | model: 18 | key: 'InputCompressionDetectionModel' 19 | kwargs: 20 | analysis_config: 21 | analyzer_configs: 22 | - key: 'FileSizeAnalyzer' 23 | kwargs: 24 | unit: 'KB' 25 | analyzes_after_compress: True 26 | adaptive_pad_kwargs: 27 | fill: 0 28 | padding_mode: 'constant' 29 | factor: 128 30 | pre_transform: 31 | post_transform: 32 | uses_cpu4compression_model: True 33 | compression_model: 34 | key: 'mbt2018' 35 | kwargs: 36 | pretrained: True 37 | quality: 8 38 | metric: 'mse' 39 | detection_model: 40 | key: 'fasterrcnn_resnet50_fpn' 41 | kwargs: 42 | pretrained: True 43 | progress: True 44 | 45 | test: 46 | test_data_loader: 47 | dataset_id: *coco_val 48 | sampler: 49 | class_or_func: !import_get 50 | key: 'torch.utils.data.SequentialSampler' 51 | kwargs: 52 | collate_fn: 'coco_collate_fn' 53 | kwargs: 54 | batch_size: 1 55 | num_workers: 4 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/jpeg-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'PILImageModule' 25 | params: 26 | format: 'JPEG' 27 | quality: 90 28 | returns_file_size: True 29 | post_transform_params: 30 | - type: 'ToTensor' 31 | params: 32 | - type: 'Normalize' 33 | params: 34 | mean: [0.485, 0.456, 0.406] 35 | std: [0.229, 0.224, 0.225] 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | classification_model: 42 | name: &model_name 'resnet50' 43 | params: 44 | num_classes: 1000 45 | pretrained: True 46 | experiment: &experiment !join [*dataset_name, '-', *model_name] 47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 48 | 49 | test: 50 | test_data_loader: 51 | dataset_id: *imagenet_val 52 | random_sample: False 53 | batch_size: 1 54 | num_workers: 16 55 | collate_fn: 'default_collate_w_pil' 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/webp-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'PILImageModule' 25 | params: 26 | format: 'WEBP' 27 | quality: 90 28 | returns_file_size: True 29 | post_transform_params: 30 | - type: 'ToTensor' 31 | params: 32 | - type: 'Normalize' 33 | params: 34 | mean: [0.485, 0.456, 0.406] 35 | std: [0.229, 0.224, 0.225] 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | classification_model: 42 | name: &model_name 'resnet50' 43 | params: 44 | num_classes: 1000 45 | pretrained: True 46 | experiment: &experiment !join [*dataset_name, '-', *model_name] 47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 48 | 49 | test: 50 | test_data_loader: 51 | dataset_id: *imagenet_val 52 | random_sample: False 53 | batch_size: 1 54 | num_workers: 16 55 | collate_fn: 'default_collate_w_pil' 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/jpeg-resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'PILImageModule' 25 | params: 26 | format: 'JPEG' 27 | quality: 90 28 | returns_file_size: True 29 | post_transform_params: 30 | - type: 'ToTensor' 31 | params: 32 | - type: 'Normalize' 33 | params: 34 | mean: [0.485, 0.456, 0.406] 35 | std: [0.229, 0.224, 0.225] 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | classification_model: 42 | name: &model_name 'resnet101' 43 | params: 44 | num_classes: 1000 45 | pretrained: True 46 | experiment: &experiment !join [*dataset_name, '-', *model_name] 47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 48 | 49 | test: 50 | test_data_loader: 51 | dataset_id: *imagenet_val 52 | random_sample: False 53 | batch_size: 1 54 | num_workers: 16 55 | collate_fn: 'default_collate_w_pil' 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/jpeg-resnet152.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'PILImageModule' 25 | params: 26 | format: 'JPEG' 27 | quality: 90 28 | returns_file_size: True 29 | post_transform_params: 30 | - type: 'ToTensor' 31 | params: 32 | - type: 'Normalize' 33 | params: 34 | mean: [0.485, 0.456, 0.406] 35 | std: [0.229, 0.224, 0.225] 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | classification_model: 42 | name: &model_name 'resnet152' 43 | params: 44 | num_classes: 1000 45 | pretrained: True 46 | experiment: &experiment !join [*dataset_name, '-', *model_name] 47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 48 | 49 | test: 50 | test_data_loader: 51 | dataset_id: *imagenet_val 52 | random_sample: False 53 | batch_size: 1 54 | num_workers: 16 55 | collate_fn: 'default_collate_w_pil' 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/webp-resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'PILImageModule' 25 | params: 26 | format: 'WEBP' 27 | quality: 90 28 | returns_file_size: True 29 | post_transform_params: 30 | - type: 'ToTensor' 31 | params: 32 | - type: 'Normalize' 33 | params: 34 | mean: [0.485, 0.456, 0.406] 35 | std: [0.229, 0.224, 0.225] 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | classification_model: 42 | name: &model_name 'resnet101' 43 | params: 44 | num_classes: 1000 45 | pretrained: True 46 | experiment: &experiment !join [*dataset_name, '-', *model_name] 47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 48 | 49 | test: 50 | test_data_loader: 51 | dataset_id: *imagenet_val 52 | random_sample: False 53 | batch_size: 1 54 | num_workers: 16 55 | collate_fn: 'default_collate_w_pil' 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/webp-resnet152.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'PILImageModule' 25 | params: 26 | format: 'WEBP' 27 | quality: 90 28 | returns_file_size: True 29 | post_transform_params: 30 | - type: 'ToTensor' 31 | params: 32 | - type: 'Normalize' 33 | params: 34 | mean: [0.485, 0.456, 0.406] 35 | std: [0.229, 0.224, 0.225] 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | classification_model: 42 | name: &model_name 'resnet152' 43 | params: 44 | num_classes: 1000 45 | pretrained: True 46 | experiment: &experiment !join [*dataset_name, '-', *model_name] 47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 48 | 49 | test: 50 | test_data_loader: 51 | dataset_id: *imagenet_val 52 | random_sample: False 53 | batch_size: 1 54 | num_workers: 16 55 | collate_fn: 'default_collate_w_pil' 56 | -------------------------------------------------------------------------------- /script/codec_input_compression/README.md: -------------------------------------------------------------------------------- 1 | # Codec-based Input Compression Baselines 2 | 3 | We considered the following codec-based image compression methods: 4 | - JPEG 5 | - WebP 6 | - BPG 7 | 8 | If you want to use BPG, you will need to manually install the software 9 | ```shell 10 | bash script/software/install_bpg.sh 11 | ``` 12 | 13 | The script will place the encoder and decoder in `~/software/` 14 | 15 | ## ImageNet (ILSVRC 2012): Image Classification 16 | Codec-based input compression followed by ResNet-50 17 | 18 | ```shell 19 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh jpeg-resnet50 jpeg 20 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh webp-resnet50 webp 21 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh bpg-resnet50 bpg 5 5 50 22 | ``` 23 | 24 | ## COCO 2017: Object Detection 25 | Codec-based input compression followed by Faster R-CNN with ResNet-50 and FPN 26 | 27 | ```shell 28 | bash script/codec_input_compression/coco2017-object_detection.sh jpeg-faster_rcnn_resnet50_fpn jpeg 29 | bash script/codec_input_compression/coco2017-object_detection.sh webp-faster_rcnn_resnet50_fpn webp 30 | bash script/codec_input_compression/coco2017-object_detection.sh bpg-faster_rcnn_resnet50_fpn bpg 5 5 50 31 | ``` 32 | 33 | ## PASCAL VOC 2012: Semantic Segmentation 34 | Codec-based input compression followed by DeepLabv3 with ResNet-50 35 | 36 | ```shell 37 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh jpeg-deeplabv3_resnet50 jpeg 38 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh webp-deeplabv3_resnet50 webp 39 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh bpg-deeplabv3_resnet50 bpg 5 5 50 40 | ``` 41 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/bpg-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'BPGModule' 25 | params: 26 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 27 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 28 | quality: 50 29 | returns_file_size: True 30 | post_transform_params: 31 | - type: 'ToTensor' 32 | params: 33 | - type: 'Normalize' 34 | params: 35 | mean: [0.485, 0.456, 0.406] 36 | std: [0.229, 0.224, 0.225] 37 | analysis_config: 38 | analyzer_configs: 39 | - type: 'FileSizeAccumulator' 40 | params: 41 | unit: 'KB' 42 | classification_model: 43 | name: &model_name 'resnet50' 44 | params: 45 | num_classes: 1000 46 | pretrained: True 47 | experiment: &experiment !join [*dataset_name, '-', *model_name] 48 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 49 | 50 | test: 51 | test_data_loader: 52 | dataset_id: *imagenet_val 53 | random_sample: False 54 | batch_size: 1 55 | num_workers: 16 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/bpg-resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'BPGModule' 25 | params: 26 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 27 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 28 | quality: 50 29 | returns_file_size: True 30 | post_transform_params: 31 | - type: 'ToTensor' 32 | params: 33 | - type: 'Normalize' 34 | params: 35 | mean: [0.485, 0.456, 0.406] 36 | std: [0.229, 0.224, 0.225] 37 | analysis_config: 38 | analyzer_configs: 39 | - type: 'FileSizeAccumulator' 40 | params: 41 | unit: 'KB' 42 | classification_model: 43 | name: &model_name 'resnet101' 44 | params: 45 | num_classes: 1000 46 | pretrained: True 47 | experiment: &experiment !join [*dataset_name, '-', *model_name] 48 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 49 | 50 | test: 51 | test_data_loader: 52 | dataset_id: *imagenet_val 53 | random_sample: False 54 | batch_size: 1 55 | num_workers: 16 56 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/bpg-resnet152.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'BPGModule' 25 | params: 26 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 27 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 28 | quality: 50 29 | returns_file_size: True 30 | post_transform_params: 31 | - type: 'ToTensor' 32 | params: 33 | - type: 'Normalize' 34 | params: 35 | mean: [0.485, 0.456, 0.406] 36 | std: [0.229, 0.224, 0.225] 37 | analysis_config: 38 | analyzer_configs: 39 | - type: 'FileSizeAccumulator' 40 | params: 41 | unit: 'KB' 42 | classification_model: 43 | name: &model_name 'resnet152' 44 | params: 45 | num_classes: 1000 46 | pretrained: True 47 | experiment: &experiment !join [*dataset_name, '-', *model_name] 48 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 49 | 50 | test: 51 | test_data_loader: 52 | dataset_id: *imagenet_val 53 | random_sample: False 54 | batch_size: 1 55 | num_workers: 16 56 | -------------------------------------------------------------------------------- /configs/coco2017/input_compression/jpeg-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: 'custom' 3 | 4 | datasets: 5 | &coco_val coco2017/val: !import_call 6 | _name: &dataset_name 'coco2017' 7 | _root: &root_dir !join ['~/datasets/', *dataset_name] 8 | key: 'coco.dataset.coco_dataset' 9 | init: 10 | kwargs: 11 | img_dir_path: !join [*root_dir, '/val2017'] 12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json'] 13 | annotated_only: False 14 | is_segment: False 15 | 16 | models: 17 | model: 18 | key: 'InputCompressionDetectionModel' 19 | kwargs: 20 | codec_encoder_decoder: !import_call 21 | key: 'torchvision.transforms.Compose' 22 | init: 23 | kwargs: 24 | transforms: 25 | - !import_call 26 | key: 'sc2bench.transforms.codec.PILImageModule' 27 | init: 28 | kwargs: 29 | format: 'JPEG' 30 | quality: 90 31 | returns_file_size: True 32 | analysis_config: 33 | analyzer_configs: 34 | - key: 'FileSizeAccumulator' 35 | kwargs: 36 | unit: 'KB' 37 | analyzes_after_compress: True 38 | adaptive_pad_config: 39 | pre_transform: 40 | post_transform: 41 | detection_model: 42 | key: 'fasterrcnn_resnet50_fpn' 43 | kwargs: 44 | pretrained: True 45 | progress: True 46 | 47 | test: 48 | test_data_loader: 49 | dataset_id: *coco_val 50 | sampler: 51 | class_or_func: !import_get 52 | key: 'torch.utils.data.SequentialSampler' 53 | kwargs: 54 | collate_fn: 'coco_collate_fn' 55 | kwargs: 56 | batch_size: 1 57 | num_workers: 4 58 | -------------------------------------------------------------------------------- /configs/coco2017/input_compression/webp-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: 'custom' 3 | 4 | datasets: 5 | &coco_val coco2017/val: !import_call 6 | _name: &dataset_name 'coco2017' 7 | _root: &root_dir !join ['~/datasets/', *dataset_name] 8 | key: 'coco.dataset.coco_dataset' 9 | init: 10 | kwargs: 11 | img_dir_path: !join [*root_dir, '/val2017'] 12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json'] 13 | annotated_only: False 14 | is_segment: False 15 | 16 | models: 17 | model: 18 | key: 'InputCompressionDetectionModel' 19 | kwargs: 20 | codec_encoder_decoder: !import_call 21 | key: 'torchvision.transforms.Compose' 22 | init: 23 | kwargs: 24 | transforms: 25 | - !import_call 26 | key: 'sc2bench.transforms.codec.PILImageModule' 27 | init: 28 | kwargs: 29 | format: 'WEBP' 30 | quality: 90 31 | returns_file_size: True 32 | analysis_config: 33 | analyzer_configs: 34 | - key: 'FileSizeAccumulator' 35 | kwargs: 36 | unit: 'KB' 37 | analyzes_after_compress: True 38 | adaptive_pad_config: 39 | pre_transform: 40 | post_transform: 41 | detection_model: 42 | key: 'fasterrcnn_resnet50_fpn' 43 | kwargs: 44 | pretrained: True 45 | progress: True 46 | 47 | test: 48 | test_data_loader: 49 | dataset_id: *coco_val 50 | sampler: 51 | class_or_func: !import_get 52 | key: 'torch.utils.data.SequentialSampler' 53 | kwargs: 54 | collate_fn: 'coco_collate_fn' 55 | kwargs: 56 | batch_size: 1 57 | num_workers: 4 58 | -------------------------------------------------------------------------------- /legacy/script/codec_input_compression/README.md: -------------------------------------------------------------------------------- 1 | # Codec-based Input Compression Baselines 2 | 3 | We considered the following codec-based image compression methods: 4 | - JPEG 5 | - WebP 6 | - BPG 7 | 8 | If you want to use BPG, you will need to manually install the software 9 | ```shell 10 | bash script/software/install_bpg.sh 11 | ``` 12 | 13 | The script will place the encoder and decoder in `~/software/` 14 | 15 | ## ImageNet (ILSVRC 2012): Image Classification 16 | Codec-based input compression followed by ResNet-50 17 | 18 | ```shell 19 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh jpeg-resnet50 jpeg 20 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh webp-resnet50 webp 21 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh bpg-resnet50 bpg 5 5 50 22 | ``` 23 | 24 | ## COCO 2017: Object Detection 25 | Codec-based input compression followed by Faster R-CNN with ResNet-50 and FPN 26 | 27 | ```shell 28 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh jpeg-faster_rcnn_resnet50_fpn jpeg 29 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh webp-faster_rcnn_resnet50_fpn webp 30 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh bpg-faster_rcnn_resnet50_fpn bpg 5 5 50 31 | ``` 32 | 33 | ## PASCAL VOC 2012: Semantic Segmentation 34 | Codec-based input compression followed by DeepLabv3 with ResNet-50 35 | 36 | ```shell 37 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh jpeg-deeplabv3_resnet50 jpeg 38 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh webp-deeplabv3_resnet50 webp 39 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh bpg-deeplabv3_resnet50 bpg 5 5 50 40 | ``` 41 | -------------------------------------------------------------------------------- /legacy/configs/coco2017/input_compression/factorized_prior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | coco2017: 3 | name: &dataset_name 'coco2017' 4 | type: 'cocodetect' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &coco_val !join [*dataset_name, '/val'] 9 | images: !join [*root_dir, '/val2017'] 10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json'] 11 | annotated_only: False 12 | 13 | models: 14 | model: 15 | name: 'InputCompressionDetectionModel' 16 | params: 17 | codec_params: 18 | - type: 'BPGModule' 19 | params: 20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 22 | quality: 50 23 | returns_file_size: True 24 | analysis_config: 25 | analyzer_configs: 26 | - type: 'FileSizeAnalyzer' 27 | params: 28 | unit: 'KB' 29 | analyzes_after_compress: True 30 | adaptive_pad_kwargs: 31 | fill: 0 32 | padding_mode: 'constant' 33 | factor: 128 34 | pre_transform_params: 35 | post_transform_params: 36 | compression_model: 37 | name: 'bmshj2018_factorized' 38 | params: 39 | pretrained: True 40 | quality: 8 41 | metric: 'mse' 42 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt' 43 | detection_model: 44 | name: 'fasterrcnn_resnet50_fpn' 45 | params: 46 | pretrained: True 47 | progress: True 48 | pretrained_backbone: True 49 | ckpt: '' 50 | 51 | test: 52 | test_data_loader: 53 | dataset_id: *coco_val 54 | random_sample: False 55 | batch_size: 1 56 | num_workers: 4 57 | collate_fn: 'coco_collate_fn' 58 | -------------------------------------------------------------------------------- /legacy/configs/coco2017/input_compression/mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | coco2017: 3 | name: &dataset_name 'coco2017' 4 | type: 'cocodetect' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &coco_val !join [*dataset_name, '/val'] 9 | images: !join [*root_dir, '/val2017'] 10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json'] 11 | annotated_only: False 12 | 13 | models: 14 | model: 15 | name: 'InputCompressionDetectionModel' 16 | params: 17 | codec_params: 18 | - type: 'BPGModule' 19 | params: 20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 22 | quality: 50 23 | returns_file_size: True 24 | analysis_config: 25 | analyzer_configs: 26 | - type: 'FileSizeAnalyzer' 27 | params: 28 | unit: 'KB' 29 | analyzes_after_compress: True 30 | adaptive_pad_kwargs: 31 | fill: 0 32 | padding_mode: 'constant' 33 | factor: 128 34 | pre_transform_params: 35 | post_transform_params: 36 | compression_model: 37 | name: 'mbt2018_mean' 38 | params: 39 | pretrained: True 40 | quality: 8 41 | metric: 'mse' 42 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt' 43 | detection_model: 44 | name: 'fasterrcnn_resnet50_fpn' 45 | params: 46 | pretrained: True 47 | progress: True 48 | pretrained_backbone: True 49 | ckpt: '' 50 | 51 | test: 52 | test_data_loader: 53 | dataset_id: *coco_val 54 | random_sample: False 55 | batch_size: 1 56 | num_workers: 4 57 | collate_fn: 'coco_collate_fn' 58 | -------------------------------------------------------------------------------- /legacy/configs/coco2017/input_compression/scale_hyperprior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | coco2017: 3 | name: &dataset_name 'coco2017' 4 | type: 'cocodetect' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &coco_val !join [*dataset_name, '/val'] 9 | images: !join [*root_dir, '/val2017'] 10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json'] 11 | annotated_only: False 12 | 13 | models: 14 | model: 15 | name: 'InputCompressionDetectionModel' 16 | params: 17 | codec_params: 18 | - type: 'BPGModule' 19 | params: 20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 22 | quality: 50 23 | returns_file_size: True 24 | analysis_config: 25 | analyzer_configs: 26 | - type: 'FileSizeAnalyzer' 27 | params: 28 | unit: 'KB' 29 | analyzes_after_compress: True 30 | adaptive_pad_kwargs: 31 | fill: 0 32 | padding_mode: 'constant' 33 | factor: 128 34 | pre_transform_params: 35 | post_transform_params: 36 | compression_model: 37 | name: 'bmshj2018_hyperprior' 38 | params: 39 | pretrained: True 40 | quality: 8 41 | metric: 'mse' 42 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt' 43 | detection_model: 44 | name: 'fasterrcnn_resnet50_fpn' 45 | params: 46 | pretrained: True 47 | progress: True 48 | pretrained_backbone: True 49 | ckpt: '' 50 | 51 | test: 52 | test_data_loader: 53 | dataset_id: *coco_val 54 | random_sample: False 55 | batch_size: 1 56 | num_workers: 4 57 | collate_fn: 'coco_collate_fn' 58 | -------------------------------------------------------------------------------- /configs/coco2017/input_compression/bpg-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | - name: 'custom' 3 | 4 | datasets: 5 | &coco_val coco2017/val: !import_call 6 | _name: &dataset_name 'coco2017' 7 | _root: &root_dir !join ['~/datasets/', *dataset_name] 8 | key: 'coco.dataset.coco_dataset' 9 | init: 10 | kwargs: 11 | img_dir_path: !join [*root_dir, '/val2017'] 12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json'] 13 | annotated_only: False 14 | is_segment: False 15 | 16 | models: 17 | model: 18 | key: 'InputCompressionDetectionModel' 19 | kwargs: 20 | codec_encoder_decoder: !import_call 21 | key: 'torchvision.transforms.Compose' 22 | init: 23 | kwargs: 24 | transforms: 25 | - !import_call 26 | key: 'sc2bench.transforms.codec.BPGModule' 27 | init: 28 | kwargs: 29 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 30 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 31 | quality: 50 32 | returns_file_size: True 33 | analysis_config: 34 | analyzer_configs: 35 | - key: 'FileSizeAccumulator' 36 | kwargs: 37 | unit: 'KB' 38 | analyzes_after_compress: True 39 | adaptive_pad_config: 40 | pre_transform: 41 | post_transform: 42 | detection_model: 43 | key: 'fasterrcnn_resnet50_fpn' 44 | kwargs: 45 | pretrained: True 46 | progress: True 47 | 48 | test: 49 | test_data_loader: 50 | dataset_id: *coco_val 51 | sampler: 52 | class_or_func: !import_get 53 | key: 'torch.utils.data.SequentialSampler' 54 | kwargs: 55 | collate_fn: 'coco_collate_fn' 56 | kwargs: 57 | batch_size: 1 58 | num_workers: 4 59 | -------------------------------------------------------------------------------- /legacy/configs/coco2017/input_compression/joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | coco2017: 3 | name: &dataset_name 'coco2017' 4 | type: 'cocodetect' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &coco_val !join [*dataset_name, '/val'] 9 | images: !join [*root_dir, '/val2017'] 10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json'] 11 | annotated_only: False 12 | 13 | models: 14 | model: 15 | name: 'InputCompressionDetectionModel' 16 | params: 17 | codec_params: 18 | - type: 'BPGModule' 19 | params: 20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 22 | quality: 50 23 | returns_file_size: True 24 | analysis_config: 25 | analyzer_configs: 26 | - type: 'FileSizeAnalyzer' 27 | params: 28 | unit: 'KB' 29 | analyzes_after_compress: True 30 | adaptive_pad_kwargs: 31 | fill: 0 32 | padding_mode: 'constant' 33 | factor: 128 34 | pre_transform_params: 35 | post_transform_params: 36 | uses_cpu4compression_model: True 37 | compression_model: 38 | name: 'mbt2018' 39 | params: 40 | pretrained: True 41 | quality: 8 42 | metric: 'mse' 43 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt' 44 | detection_model: 45 | name: 'fasterrcnn_resnet50_fpn' 46 | params: 47 | pretrained: True 48 | progress: True 49 | pretrained_backbone: True 50 | ckpt: '' 51 | 52 | test: 53 | test_data_loader: 54 | dataset_id: *coco_val 55 | random_sample: False 56 | batch_size: 1 57 | num_workers: 4 58 | collate_fn: 'coco_collate_fn' 59 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/vtm-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | 19 | models: 20 | model: 21 | name: 'CodecInputCompressionClassifier' 22 | params: 23 | codec_params: 24 | - type: 'VTMModule' 25 | params: 26 | encoder_path: '~/software/VVCSoftware_VTM/bin/EncoderAppStatic' 27 | decoder_path: '~/software/VVCSoftware_VTM/bin/DecoderAppStatic' 28 | config_path: '~/software/VVCSoftware_VTM/cfg/encoder_intra_vtm.cfg' 29 | color_mode: 'ycbcr' 30 | quality: 63 31 | returns_file_size: True 32 | post_transform_params: 33 | - type: 'ToTensor' 34 | params: 35 | - type: 'Normalize' 36 | params: 37 | mean: [0.485, 0.456, 0.406] 38 | std: [0.229, 0.224, 0.225] 39 | analysis_config: 40 | analyzer_configs: 41 | - type: 'FileSizeAccumulator' 42 | params: 43 | unit: 'KB' 44 | classification_model: 45 | name: &model_name 'resnet50' 46 | params: 47 | num_classes: 1000 48 | pretrained: True 49 | experiment: &experiment !join [*dataset_name, '-', *model_name] 50 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 51 | 52 | test: 53 | test_data_loader: 54 | dataset_id: *imagenet_val 55 | random_sample: False 56 | batch_size: 1 57 | num_workers: 16 58 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/feature_compression/jpeg-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | - type: 'ToTensor' 19 | params: 20 | - type: 'Normalize' 21 | params: 22 | mean: [0.485, 0.456, 0.406] 23 | std: [0.229, 0.224, 0.225] 24 | 25 | models: 26 | model: 27 | name: 'CodecFeatureCompressionClassifier' 28 | params: 29 | codec_params: 30 | - type: 'PILTensorModule' 31 | params: 32 | format: 'JPEG' 33 | quality: 90 34 | returns_file_size: True 35 | encoder_config: 36 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2'] 37 | decoder_config: 38 | sequential: ['layer3', 'layer4', 'avgpool'] 39 | classifier_config: 40 | sequential: ['fc'] 41 | post_transform_params: 42 | analysis_config: 43 | analyzer_configs: 44 | - type: 'FileSizeAccumulator' 45 | params: 46 | unit: 'KB' 47 | classification_model: 48 | name: &model_name 'resnet50' 49 | params: 50 | num_classes: 1000 51 | pretrained: True 52 | experiment: &experiment !join [*dataset_name, '-', *model_name] 53 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 54 | 55 | test: 56 | test_data_loader: 57 | dataset_id: *imagenet_val 58 | random_sample: False 59 | batch_size: 1 60 | num_workers: 16 61 | collate_fn: 'default_collate_w_pil' 62 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/feature_compression/webp-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: [224, 224] 18 | - type: 'ToTensor' 19 | params: 20 | - type: 'Normalize' 21 | params: 22 | mean: [0.485, 0.456, 0.406] 23 | std: [0.229, 0.224, 0.225] 24 | 25 | models: 26 | model: 27 | name: 'CodecFeatureCompressionClassifier' 28 | params: 29 | codec_params: 30 | - type: 'PILTensorModule' 31 | params: 32 | format: 'WEBP' 33 | quality: 90 34 | returns_file_size: True 35 | encoder_config: 36 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2'] 37 | decoder_config: 38 | sequential: ['layer3', 'layer4', 'avgpool'] 39 | classifier_config: 40 | sequential: ['fc'] 41 | post_transform_params: 42 | analysis_config: 43 | analyzer_configs: 44 | - type: 'FileSizeAccumulator' 45 | params: 46 | unit: 'KB' 47 | classification_model: 48 | name: &model_name 'resnet50' 49 | params: 50 | num_classes: 1000 51 | pretrained: True 52 | experiment: &experiment !join [*dataset_name, '-', *model_name] 53 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 54 | 55 | test: 56 | test_data_loader: 57 | dataset_id: *imagenet_val 58 | random_sample: False 59 | batch_size: 1 60 | num_workers: 16 61 | collate_fn: 'default_collate_w_pil' 62 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/factorized_prior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: &input_size [224, 224] 18 | - type: 'ToTensor' 19 | params: 20 | - type: 'AdaptivePad' 21 | params: 22 | fill: 0 23 | factor: 64 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionClassifier' 28 | params: 29 | post_transform_params: 30 | - type: 'CenterCrop' 31 | params: 32 | size: *input_size 33 | - type: 'Normalize' 34 | params: 35 | mean: [0.485, 0.456, 0.406] 36 | std: [0.229, 0.224, 0.225] 37 | analysis_config: 38 | analyzes_after_compress: True 39 | analyzer_configs: 40 | - type: 'FileSizeAnalyzer' 41 | params: 42 | unit: 'KB' 43 | compression_model: 44 | name: 'bmshj2018_factorized' 45 | params: 46 | pretrained: True 47 | quality: 8 48 | metric: 'mse' 49 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt' 50 | classification_model: 51 | name: &model_name 'resnet50' 52 | params: 53 | num_classes: 1000 54 | pretrained: True 55 | experiment: &experiment !join [*dataset_name, '-', *model_name] 56 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 57 | 58 | test: 59 | test_data_loader: 60 | dataset_id: *imagenet_val 61 | random_sample: False 62 | batch_size: 1 63 | num_workers: 16 64 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: &input_size [224, 224] 18 | - type: 'ToTensor' 19 | params: 20 | - type: 'AdaptivePad' 21 | params: 22 | fill: 0 23 | factor: 64 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionClassifier' 28 | params: 29 | post_transform_params: 30 | - type: 'CenterCrop' 31 | params: 32 | size: *input_size 33 | - type: 'Normalize' 34 | params: 35 | mean: [0.485, 0.456, 0.406] 36 | std: [0.229, 0.224, 0.225] 37 | analysis_config: 38 | analyzes_after_compress: True 39 | analyzer_configs: 40 | - type: 'FileSizeAnalyzer' 41 | params: 42 | unit: 'KB' 43 | compression_model: 44 | name: 'bmshj2018_hyperprior' 45 | params: 46 | pretrained: True 47 | quality: 8 48 | metric: 'mse' 49 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt' 50 | classification_model: 51 | name: &model_name 'resnet50' 52 | params: 53 | num_classes: 1000 54 | pretrained: True 55 | experiment: &experiment !join [*dataset_name, '-', *model_name] 56 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 57 | 58 | test: 59 | test_data_loader: 60 | dataset_id: *imagenet_val 61 | random_sample: False 62 | batch_size: 1 63 | num_workers: 16 64 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: &input_size [224, 224] 18 | - type: 'ToTensor' 19 | params: 20 | - type: 'AdaptivePad' 21 | params: 22 | fill: 0 23 | factor: 64 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionClassifier' 28 | params: 29 | post_transform_params: 30 | - type: 'CenterCrop' 31 | params: 32 | size: *input_size 33 | - type: 'Normalize' 34 | params: 35 | mean: [0.485, 0.456, 0.406] 36 | std: [0.229, 0.224, 0.225] 37 | analysis_config: 38 | analyzes_after_compress: True 39 | analyzer_configs: 40 | - type: 'FileSizeAnalyzer' 41 | params: 42 | unit: 'KB' 43 | compression_model: 44 | name: 'mbt2018_mean' 45 | params: 46 | pretrained: True 47 | quality: 8 48 | metric: 'mse' 49 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt' 50 | classification_model: 51 | name: &model_name 'resnet50' 52 | params: 53 | num_classes: 1000 54 | pretrained: True 55 | experiment: &experiment !join [*dataset_name, '-', *model_name] 56 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 57 | 58 | test: 59 | test_data_loader: 60 | dataset_id: *imagenet_val 61 | random_sample: False 62 | batch_size: 1 63 | num_workers: 16 64 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: False 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'CodecInputCompressionSegmentationModel' 28 | params: 29 | codec_params: 30 | - type: 'PILImageModule' 31 | params: 32 | format: 'JPEG' 33 | quality: 90 34 | returns_file_size: True 35 | analysis_config: 36 | analyzer_configs: 37 | - type: 'FileSizeAccumulator' 38 | params: 39 | unit: 'KB' 40 | post_transform_params: 41 | - type: 'ToTensor' 42 | params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | segmentation_model: 48 | name: 'deeplabv3_resnet50' 49 | params: 50 | pretrained: True 51 | pretrained_backbone: True 52 | num_classes: 21 53 | aux_loss: True 54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 55 | 56 | test: 57 | test_data_loader: 58 | dataset_id: *pascal_val 59 | random_sample: False 60 | batch_size: 1 61 | num_workers: 16 62 | collate_fn: 'pascal_seg_eval_collate_fn' 63 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: False 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'CodecInputCompressionSegmentationModel' 28 | params: 29 | codec_params: 30 | - type: 'PILImageModule' 31 | params: 32 | format: 'WEBP' 33 | quality: 90 34 | returns_file_size: True 35 | analysis_config: 36 | analyzer_configs: 37 | - type: 'FileSizeAccumulator' 38 | params: 39 | unit: 'KB' 40 | post_transform_params: 41 | - type: 'ToTensor' 42 | params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | segmentation_model: 48 | name: 'deeplabv3_resnet50' 49 | params: 50 | pretrained: True 51 | pretrained_backbone: True 52 | num_classes: 21 53 | aux_loss: True 54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 55 | 56 | test: 57 | test_data_loader: 58 | dataset_id: *pascal_val 59 | random_sample: False 60 | batch_size: 1 61 | num_workers: 16 62 | collate_fn: 'pascal_seg_eval_collate_fn' 63 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: False 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'CodecInputCompressionSegmentationModel' 28 | params: 29 | codec_params: 30 | - type: 'PILImageModule' 31 | params: 32 | format: 'JPEG' 33 | quality: 90 34 | returns_file_size: True 35 | analysis_config: 36 | analyzer_configs: 37 | - type: 'FileSizeAccumulator' 38 | params: 39 | unit: 'KB' 40 | post_transform_params: 41 | - type: 'ToTensor' 42 | params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | segmentation_model: 48 | name: 'deeplabv3_resnet101' 49 | params: 50 | pretrained: False 51 | pretrained_backbone: True 52 | num_classes: 21 53 | aux_loss: True 54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 55 | 56 | test: 57 | test_data_loader: 58 | dataset_id: *pascal_val 59 | random_sample: False 60 | batch_size: 1 61 | num_workers: 16 62 | collate_fn: 'pascal_seg_eval_collate_fn' 63 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: False 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'CodecInputCompressionSegmentationModel' 28 | params: 29 | codec_params: 30 | - type: 'PILImageModule' 31 | params: 32 | format: 'WEBP' 33 | quality: 90 34 | returns_file_size: True 35 | analysis_config: 36 | analyzer_configs: 37 | - type: 'FileSizeAccumulator' 38 | params: 39 | unit: 'KB' 40 | post_transform_params: 41 | - type: 'ToTensor' 42 | params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | segmentation_model: 48 | name: 'deeplabv3_resnet101' 49 | params: 50 | pretrained: True 51 | pretrained_backbone: True 52 | num_classes: 21 53 | aux_loss: True 54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 55 | 56 | test: 57 | test_data_loader: 58 | dataset_id: *pascal_val 59 | random_sample: False 60 | batch_size: 1 61 | num_workers: 16 62 | collate_fn: 'pascal_seg_eval_collate_fn' 63 | -------------------------------------------------------------------------------- /script/neural_input_compression/README.md: -------------------------------------------------------------------------------- 1 | # Neural Input Compression Baselines 2 | 3 | We considered the following neural image compression models: 4 | - Factorized Prior 5 | - Scale Hyperprior 6 | - Mean-scale Hyperprior 7 | - Joint Autoregressive Hierarchical Prior 8 | 9 | 10 | ## ImageNet (ILSVRC 2012): Image Classification 11 | Neural input compression followed by ResNet-50 12 | 13 | ```shell 14 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh factorized_prior-resnet50 8 15 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh scale_hyperprior-resnet50 8 16 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh mean_scale_hyperprior-resnet50 8 17 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh joint_autoregressive_hierarchical_prior-resnet50 8 18 | ``` 19 | 20 | ## COCO 2017: Object Detection 21 | Neural input compression followed by Faster R-CNN with ResNet-50 and FPN 22 | 23 | ```shell 24 | bash script/neural_input_compression/coco2017-object_detection.sh factorized_prior-faster_rcnn_resnet50_fpn 8 25 | bash script/neural_input_compression/coco2017-object_detection.sh scale_hyperprior-faster_rcnn_resnet50_fpn 8 26 | bash script/neural_input_compression/coco2017-object_detection.sh mean_scale_hyperprior-faster_rcnn_resnet50_fpn 8 27 | bash script/neural_input_compression/coco2017-object_detection.sh joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn 8 28 | ``` 29 | 30 | ## PASCAL VOC 2012: Semantic Segmentation 31 | Neural input compression followed by DeepLabv3 with ResNet-50 32 | 33 | ```shell 34 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh factorized_prior-deeplabv3_resnet50 8 35 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh scale_hyperprior-deeplabv3_resnet50 8 36 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh mean_scale_hyperprior-deeplabv3_resnet50 8 37 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh joint_autoregressive_hierarchical_prior-deeplabv3_resnet50 8 38 | ``` 39 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/joint_autoregressive_hierarchical_prior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'Resize' 13 | params: 14 | size: 256 15 | - type: 'CenterCrop' 16 | params: 17 | size: &input_size [224, 224] 18 | - type: 'ToTensor' 19 | params: 20 | - type: 'AdaptivePad' 21 | params: 22 | fill: 0 23 | factor: 64 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionClassifier' 28 | params: 29 | post_transform_params: 30 | - type: 'CenterCrop' 31 | params: 32 | size: *input_size 33 | - type: 'Normalize' 34 | params: 35 | mean: [0.485, 0.456, 0.406] 36 | std: [0.229, 0.224, 0.225] 37 | analysis_config: 38 | analyzes_after_compress: True 39 | analyzer_configs: 40 | - type: 'FileSizeAnalyzer' 41 | params: 42 | unit: 'KB' 43 | uses_cpu4compression_model: True 44 | compression_model: 45 | name: 'mbt2018' 46 | params: 47 | pretrained: True 48 | quality: 8 49 | metric: 'mse' 50 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt' 51 | classification_model: 52 | name: &model_name 'resnet50' 53 | params: 54 | num_classes: 1000 55 | pretrained: True 56 | experiment: &experiment !join [*dataset_name, '-', *model_name] 57 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt'] 58 | 59 | test: 60 | test_data_loader: 61 | dataset_id: *imagenet_val 62 | random_sample: False 63 | batch_size: 1 64 | num_workers: 16 65 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: False 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'CodecInputCompressionSegmentationModel' 28 | params: 29 | codec_params: 30 | - type: 'BPGModule' 31 | params: 32 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 33 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 34 | quality: 50 35 | returns_file_size: True 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | post_transform_params: 42 | - type: 'ToTensor' 43 | params: 44 | - type: 'Normalize' 45 | params: 46 | mean: [0.485, 0.456, 0.406] 47 | std: [0.229, 0.224, 0.225] 48 | segmentation_model: 49 | name: 'deeplabv3_resnet50' 50 | params: 51 | pretrained: True 52 | pretrained_backbone: True 53 | num_classes: 21 54 | aux_loss: True 55 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 56 | 57 | test: 58 | test_data_loader: 59 | dataset_id: *pascal_val 60 | random_sample: False 61 | batch_size: 1 62 | num_workers: 16 63 | collate_fn: 'pascal_seg_eval_collate_fn' 64 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: False 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'CodecInputCompressionSegmentationModel' 28 | params: 29 | codec_params: 30 | - type: 'BPGModule' 31 | params: 32 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 33 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 34 | quality: 50 35 | returns_file_size: True 36 | analysis_config: 37 | analyzer_configs: 38 | - type: 'FileSizeAccumulator' 39 | params: 40 | unit: 'KB' 41 | post_transform_params: 42 | - type: 'ToTensor' 43 | params: 44 | - type: 'Normalize' 45 | params: 46 | mean: [0.485, 0.456, 0.406] 47 | std: [0.229, 0.224, 0.225] 48 | segmentation_model: 49 | name: 'deeplabv3_resnet101' 50 | params: 51 | pretrained: True 52 | pretrained_backbone: True 53 | num_classes: 21 54 | aux_loss: True 55 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 56 | 57 | test: 58 | test_data_loader: 59 | dataset_id: *pascal_val 60 | random_sample: False 61 | batch_size: 1 62 | num_workers: 16 63 | collate_fn: 'pascal_seg_eval_collate_fn' 64 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/factorized_prior-tf_efficientnet_l2_ns.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 833 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: &input_size [800, 800] 19 | - type: 'ToTensor' 20 | params: 21 | - type: 'AdaptivePad' 22 | params: 23 | fill: 0 24 | factor: 64 25 | 26 | models: 27 | model: 28 | name: 'NeuralInputCompressionClassifier' 29 | params: 30 | post_transform_params: 31 | - type: 'CenterCrop' 32 | params: 33 | size: *input_size 34 | - type: 'Normalize' 35 | params: 36 | mean: [0.485, 0.456, 0.406] 37 | std: [0.229, 0.224, 0.225] 38 | analysis_config: 39 | analyzes_after_compress: True 40 | analyzer_configs: 41 | - type: 'FileSizeAnalyzer' 42 | params: 43 | unit: 'KB' 44 | compression_model: 45 | name: 'bmshj2018_factorized' 46 | params: 47 | pretrained: True 48 | quality: 8 49 | metric: 'mse' 50 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt' 51 | classification_model: 52 | name: &model_name 'tf_efficientnet_l2_ns' 53 | repo_or_dir: 'rwightman/pytorch-image-models' 54 | params: 55 | num_classes: 1000 56 | pretrained: True 57 | experiment: &experiment !join [*dataset_name, '-', *model_name] 58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 59 | 60 | test: 61 | test_data_loader: 62 | dataset_id: *imagenet_val 63 | random_sample: False 64 | batch_size: 1 65 | num_workers: 16 66 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-tf_efficientnet_l2_ns.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 833 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: &input_size [800, 800] 19 | - type: 'ToTensor' 20 | params: 21 | - type: 'AdaptivePad' 22 | params: 23 | fill: 0 24 | factor: 64 25 | 26 | models: 27 | model: 28 | name: 'NeuralInputCompressionClassifier' 29 | params: 30 | post_transform_params: 31 | - type: 'CenterCrop' 32 | params: 33 | size: *input_size 34 | - type: 'Normalize' 35 | params: 36 | mean: [0.485, 0.456, 0.406] 37 | std: [0.229, 0.224, 0.225] 38 | analysis_config: 39 | analyzes_after_compress: True 40 | analyzer_configs: 41 | - type: 'FileSizeAnalyzer' 42 | params: 43 | unit: 'KB' 44 | compression_model: 45 | name: 'mbt2018_mean' 46 | params: 47 | pretrained: True 48 | quality: 8 49 | metric: 'mse' 50 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt' 51 | classification_model: 52 | name: &model_name 'tf_efficientnet_l2_ns' 53 | repo_or_dir: 'rwightman/pytorch-image-models' 54 | params: 55 | num_classes: 1000 56 | pretrained: True 57 | experiment: &experiment !join [*dataset_name, '-', *model_name] 58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 59 | 60 | test: 61 | test_data_loader: 62 | dataset_id: *imagenet_val 63 | random_sample: False 64 | batch_size: 1 65 | num_workers: 16 66 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-tf_efficientnet_l2_ns.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 833 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: &input_size [800, 800] 19 | - type: 'ToTensor' 20 | params: 21 | - type: 'AdaptivePad' 22 | params: 23 | fill: 0 24 | factor: 64 25 | 26 | models: 27 | model: 28 | name: 'NeuralInputCompressionClassifier' 29 | params: 30 | post_transform_params: 31 | - type: 'CenterCrop' 32 | params: 33 | size: *input_size 34 | - type: 'Normalize' 35 | params: 36 | mean: [0.485, 0.456, 0.406] 37 | std: [0.229, 0.224, 0.225] 38 | analysis_config: 39 | analyzes_after_compress: True 40 | analyzer_configs: 41 | - type: 'FileSizeAnalyzer' 42 | params: 43 | unit: 'KB' 44 | compression_model: 45 | name: 'bmshj2018_hyperprior' 46 | params: 47 | pretrained: True 48 | quality: 8 49 | metric: 'mse' 50 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt' 51 | classification_model: 52 | name: &model_name 'tf_efficientnet_l2_ns' 53 | repo_or_dir: 'rwightman/pytorch-image-models' 54 | params: 55 | num_classes: 1000 56 | pretrained: True 57 | experiment: &experiment !join [*dataset_name, '-', *model_name] 58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 59 | 60 | test: 61 | test_data_loader: 62 | dataset_id: *imagenet_val 63 | random_sample: False 64 | batch_size: 1 65 | num_workers: 16 66 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-tf_efficientnet_l2_ns_475.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 507 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: &input_size [475, 475] 19 | - type: 'ToTensor' 20 | params: 21 | - type: 'AdaptivePad' 22 | params: 23 | fill: 0 24 | factor: 64 25 | 26 | models: 27 | model: 28 | name: 'NeuralInputCompressionClassifier' 29 | params: 30 | post_transform_params: 31 | - type: 'CenterCrop' 32 | params: 33 | size: *input_size 34 | - type: 'Normalize' 35 | params: 36 | mean: [0.485, 0.456, 0.406] 37 | std: [0.229, 0.224, 0.225] 38 | analysis_config: 39 | analyzes_after_compress: True 40 | analyzer_configs: 41 | - type: 'FileSizeAnalyzer' 42 | params: 43 | unit: 'KB' 44 | compression_model: 45 | name: 'bmshj2018_hyperprior' 46 | params: 47 | pretrained: True 48 | quality: 8 49 | metric: 'mse' 50 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt' 51 | classification_model: 52 | name: &model_name 'tf_efficientnet_l2_ns' 53 | repo_or_dir: 'rwightman/pytorch-image-models' 54 | params: 55 | num_classes: 1000 56 | pretrained: True 57 | experiment: &experiment !join [*dataset_name, '-', *model_name] 58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 59 | 60 | test: 61 | test_data_loader: 62 | dataset_id: *imagenet_val 63 | random_sample: False 64 | batch_size: 1 65 | num_workers: 16 66 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/factorized_prior-tf_efficientnet_l2_ns_475.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 507 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: &input_size [475, 475] 19 | - type: 'ToTensor' 20 | params: 21 | - type: 'AdaptivePad' 22 | params: 23 | fill: 0 24 | factor: 64 25 | 26 | models: 27 | model: 28 | name: 'NeuralInputCompressionClassifier' 29 | params: 30 | post_transform_params: 31 | - type: 'CenterCrop' 32 | params: 33 | size: *input_size 34 | - type: 'Normalize' 35 | params: 36 | mean: [0.485, 0.456, 0.406] 37 | std: [0.229, 0.224, 0.225] 38 | analysis_config: 39 | analyzes_after_compress: True 40 | analyzer_configs: 41 | - type: 'FileSizeAnalyzer' 42 | params: 43 | unit: 'KB' 44 | compression_model: 45 | name: 'bmshj2018_factorized' 46 | params: 47 | pretrained: True 48 | quality: 8 49 | metric: 'mse' 50 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt' 51 | classification_model: 52 | name: &model_name 'tf_efficientnet_l2_ns_475' 53 | repo_or_dir: 'rwightman/pytorch-image-models' 54 | params: 55 | num_classes: 1000 56 | pretrained: True 57 | experiment: &experiment !join [*dataset_name, '-', *model_name] 58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 59 | 60 | test: 61 | test_data_loader: 62 | dataset_id: *imagenet_val 63 | random_sample: False 64 | batch_size: 1 65 | num_workers: 16 66 | -------------------------------------------------------------------------------- /legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-tf_efficientnet_l2_ns_475.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | ilsvrc2012: 3 | name: &dataset_name 'ilsvrc2012' 4 | type: 'ImageFolder' 5 | root: &root_dir !join ['~/dataset/', *dataset_name] 6 | splits: 7 | val: 8 | dataset_id: &imagenet_val !join [*dataset_name, '/val'] 9 | params: 10 | root: !join [*root_dir, '/val'] 11 | transform_params: 12 | - type: 'WrappedResize' 13 | params: 14 | size: 507 15 | interpolation: 'bicubic' 16 | - type: 'CenterCrop' 17 | params: 18 | size: &input_size [475, 475] 19 | - type: 'ToTensor' 20 | params: 21 | - type: 'AdaptivePad' 22 | params: 23 | fill: 0 24 | factor: 64 25 | 26 | models: 27 | model: 28 | name: 'NeuralInputCompressionClassifier' 29 | params: 30 | post_transform_params: 31 | - type: 'CenterCrop' 32 | params: 33 | size: *input_size 34 | - type: 'Normalize' 35 | params: 36 | mean: [0.485, 0.456, 0.406] 37 | std: [0.229, 0.224, 0.225] 38 | analysis_config: 39 | analyzes_after_compress: True 40 | analyzer_configs: 41 | - type: 'FileSizeAnalyzer' 42 | params: 43 | unit: 'KB' 44 | compression_model: 45 | name: 'mbt2018_mean' 46 | params: 47 | pretrained: True 48 | quality: 8 49 | metric: 'mse' 50 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt' 51 | classification_model: 52 | name: &model_name 'tf_efficientnet_l2_ns_475' 53 | repo_or_dir: 'rwightman/pytorch-image-models' 54 | params: 55 | num_classes: 1000 56 | pretrained: True 57 | experiment: &experiment !join [*dataset_name, '-', *model_name] 58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt'] 59 | 60 | test: 61 | test_data_loader: 62 | dataset_id: *imagenet_val 63 | random_sample: False 64 | batch_size: 1 65 | num_workers: 16 66 | -------------------------------------------------------------------------------- /legacy/script/neural_input_compression/README.md: -------------------------------------------------------------------------------- 1 | # Neural Input Compression Baselines 2 | 3 | We considered the following neural image compression models: 4 | - Factorized Prior 5 | - Scale Hyperprior 6 | - Mean-scale Hyperprior 7 | - Joint Autoregressive Hierarchical Prior 8 | 9 | 10 | ## ImageNet (ILSVRC 2012): Image Classification 11 | Neural input compression followed by ResNet-50 12 | 13 | ```shell 14 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh factorized_prior-resnet50 8 15 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh scale_hyperprior-resnet50 8 16 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh mean_scale_hyperprior-resnet50 8 17 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh joint_autoregressive_hierarchical_prior-resnet50 8 18 | ``` 19 | 20 | ## COCO 2017: Object Detection 21 | Neural input compression followed by Faster R-CNN with ResNet-50 and FPN 22 | 23 | ```shell 24 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh factorized_prior-faster_rcnn_resnet50_fpn 8 25 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh scale_hyperprior-faster_rcnn_resnet50_fpn 8 26 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh mean_scale_hyperprior-faster_rcnn_resnet50_fpn 8 27 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn 8 28 | ``` 29 | 30 | ## PASCAL VOC 2012: Semantic Segmentation 31 | Neural input compression followed by DeepLabv3 with ResNet-50 32 | 33 | ```shell 34 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh factorized_prior-deeplabv3_resnet50 8 35 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh scale_hyperprior-deeplabv3_resnet50 8 36 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh mean_scale_hyperprior-deeplabv3_resnet50 8 37 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh joint_autoregressive_hierarchical_prior-deeplabv3_resnet50 8 38 | ``` 39 | -------------------------------------------------------------------------------- /sc2bench/transforms/collator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchdistill.datasets.registry import register_collate_func 3 | 4 | 5 | def cat_list(images, fill_value=0): 6 | """ 7 | Concatenates a list of images with the max size for each of heights and widths and 8 | fills empty spaces with a specified value. 9 | 10 | :param images: batch tensor 11 | :type images: torch.Tensor 12 | :param fill_value: value to be filled 13 | :type fill_value: int 14 | :return: backbone model 15 | :rtype: torch.Tensor 16 | """ 17 | if len(images) == 1 and not isinstance(images[0], torch.Tensor): 18 | return images 19 | 20 | max_size = tuple(max(s) for s in zip(*[img.shape for img in images])) 21 | batch_shape = (len(images),) + max_size 22 | batched_imgs = images[0].new(*batch_shape).fill_(fill_value) 23 | for img, pad_img in zip(images, batched_imgs): 24 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) 25 | return batched_imgs 26 | 27 | 28 | @register_collate_func 29 | def pascal_seg_collate_fn(batch): 30 | """ 31 | Collates input data for PASCAL VOC 2012 segmentation. 32 | 33 | :param batch: list/tuple of triplets (image, target, supp_dict), where supp_dict can be an empty dict 34 | :type batch: list or tuple 35 | :return: collated images, targets, and supplementary dicts 36 | :rtype: (torch.Tensor, tensor.Tensor, list[dict]) 37 | """ 38 | images, targets, supp_dicts = list(zip(*batch)) 39 | batched_imgs = cat_list(images, fill_value=0) 40 | batched_targets = cat_list(targets, fill_value=255) 41 | return batched_imgs, batched_targets, supp_dicts 42 | 43 | 44 | @register_collate_func 45 | def pascal_seg_eval_collate_fn(batch): 46 | """ 47 | Collates input data for PASCAL VOC 2012 segmentation in evaluation 48 | 49 | :param batch: list/tuple of tuples (image, target) 50 | :type batch: list or tuple 51 | :return: collated images and targets 52 | :rtype: (torch.Tensor, tensor.Tensor) 53 | """ 54 | images, targets = list(zip(*batch)) 55 | batched_imgs = cat_list(images, fill_value=0) 56 | batched_targets = cat_list(targets, fill_value=255) 57 | return batched_imgs, batched_targets 58 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/factorized_prior-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | compression_model: 48 | name: 'bmshj2018_factorized' 49 | params: 50 | pretrained: True 51 | quality: 8 52 | metric: 'mse' 53 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt' 54 | segmentation_model: 55 | name: 'deeplabv3_resnet50' 56 | params: 57 | pretrained: True 58 | pretrained_backbone: True 59 | num_classes: 21 60 | aux_loss: True 61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 62 | 63 | test: 64 | test_data_loader: 65 | dataset_id: *pascal_val 66 | random_sample: False 67 | batch_size: 1 68 | num_workers: 16 69 | collate_fn: 'pascal_seg_eval_collate_fn' 70 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/scale_hyperprior-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | compression_model: 48 | name: 'bmshj2018_hyperprior' 49 | params: 50 | pretrained: True 51 | quality: 8 52 | metric: 'mse' 53 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt' 54 | segmentation_model: 55 | name: 'deeplabv3_resnet50' 56 | params: 57 | pretrained: True 58 | pretrained_backbone: True 59 | num_classes: 21 60 | aux_loss: True 61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 62 | 63 | test: 64 | test_data_loader: 65 | dataset_id: *pascal_val 66 | random_sample: False 67 | batch_size: 1 68 | num_workers: 16 69 | collate_fn: 'pascal_seg_eval_collate_fn' 70 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/factorized_prior-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | compression_model: 48 | name: 'bmshj2018_factorized' 49 | params: 50 | pretrained: True 51 | quality: 8 52 | metric: 'mse' 53 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt' 54 | segmentation_model: 55 | name: 'deeplabv3_resnet101' 56 | params: 57 | pretrained: True 58 | pretrained_backbone: True 59 | num_classes: 21 60 | aux_loss: True 61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 62 | 63 | test: 64 | test_data_loader: 65 | dataset_id: *pascal_val 66 | random_sample: False 67 | batch_size: 1 68 | num_workers: 16 69 | collate_fn: 'pascal_seg_eval_collate_fn' 70 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/mean_scale_hyperprior-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | compression_model: 48 | name: 'mbt2018_mean' 49 | params: 50 | pretrained: True 51 | quality: 8 52 | metric: 'mse' 53 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt' 54 | segmentation_model: 55 | name: 'deeplabv3_resnet101' 56 | params: 57 | pretrained: True 58 | pretrained_backbone: True 59 | num_classes: 21 60 | aux_loss: True 61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 62 | 63 | test: 64 | test_data_loader: 65 | dataset_id: *pascal_val 66 | random_sample: False 67 | batch_size: 1 68 | num_workers: 16 69 | collate_fn: 'pascal_seg_eval_collate_fn' 70 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/mean_scale_hyperprior-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | compression_model: 48 | name: 'mbt2018_mean' 49 | params: 50 | pretrained: True 51 | quality: 8 52 | metric: 'mse' 53 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt' 54 | segmentation_model: 55 | name: 'deeplabv3_resnet50' 56 | params: 57 | pretrained: True 58 | pretrained_backbone: True 59 | num_classes: 21 60 | aux_loss: True 61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 62 | 63 | test: 64 | test_data_loader: 65 | dataset_id: *pascal_val 66 | random_sample: False 67 | batch_size: 1 68 | num_workers: 16 69 | collate_fn: 'pascal_seg_eval_collate_fn' 70 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/scale_hyperprior-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | compression_model: 48 | name: 'bmshj2018_hyperprior' 49 | params: 50 | pretrained: True 51 | quality: 8 52 | metric: 'mse' 53 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt' 54 | segmentation_model: 55 | name: 'deeplabv3_resnet101' 56 | params: 57 | pretrained: True 58 | pretrained_backbone: True 59 | num_classes: 21 60 | aux_loss: True 61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 62 | 63 | test: 64 | test_data_loader: 65 | dataset_id: *pascal_val 66 | random_sample: False 67 | batch_size: 1 68 | num_workers: 16 69 | collate_fn: 'pascal_seg_eval_collate_fn' 70 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/joint_autoregressive_hierarchical_prior-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | uses_cpu4compression_model: True 48 | compression_model: 49 | name: 'mbt2018' 50 | params: 51 | pretrained: True 52 | quality: 8 53 | metric: 'mse' 54 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt' 55 | segmentation_model: 56 | name: 'deeplabv3_resnet101' 57 | params: 58 | pretrained: True 59 | pretrained_backbone: True 60 | num_classes: 21 61 | aux_loss: True 62 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 63 | 64 | test: 65 | test_data_loader: 66 | dataset_id: *pascal_val 67 | random_sample: False 68 | batch_size: 1 69 | num_workers: 16 70 | collate_fn: 'pascal_seg_eval_collate_fn' 71 | -------------------------------------------------------------------------------- /legacy/configs/pascal_voc2012/input_compression/joint_autoregressive_hierarchical_prior-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | pascal_voc: 3 | name: &dataset_name 'pascal_voc2012' 4 | type: 'VOCSegmentation' 5 | root: &root_dir '~/dataset' 6 | splits: 7 | val: 8 | dataset_id: &pascal_val !join [*dataset_name, '/val'] 9 | params: 10 | root: *root_dir 11 | image_set: 'val' 12 | year: '2012' 13 | download: False 14 | transforms_compose_cls: 'CustomCompose' 15 | transforms_params: &val_transform 16 | - type: 'CustomRandomResize' 17 | params: 18 | min_size: 513 19 | max_size: 513 20 | - type: 'CustomToTensor' 21 | params: 22 | converts_sample: True 23 | converts_target: True 24 | 25 | models: 26 | model: 27 | name: 'NeuralInputCompressionSegmentationModel' 28 | params: 29 | pre_transform_params: 30 | - type: 'AdaptivePad' 31 | params: 32 | padding_position: 'right_bottom' 33 | returns_org_patch_size: True 34 | fill: 0 35 | factor: 64 36 | analysis_config: 37 | analyzes_after_compress: True 38 | analyzer_configs: 39 | - type: 'FileSizeAnalyzer' 40 | params: 41 | unit: 'KB' 42 | post_transform_params: 43 | - type: 'Normalize' 44 | params: 45 | mean: [0.485, 0.456, 0.406] 46 | std: [0.229, 0.224, 0.225] 47 | uses_cpu4compression_model: True 48 | compression_model: 49 | name: 'mbt2018' 50 | params: 51 | pretrained: True 52 | quality: 8 53 | metric: 'mse' 54 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt' 55 | segmentation_model: 56 | name: 'deeplabv3_resnet50' 57 | params: 58 | pretrained: True 59 | pretrained_backbone: True 60 | num_classes: 21 61 | aux_loss: True 62 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 63 | 64 | test: 65 | test_data_loader: 66 | dataset_id: *pascal_val 67 | random_sample: False 68 | batch_size: 1 69 | num_workers: 16 70 | collate_fn: 'pascal_seg_eval_collate_fn' 71 | -------------------------------------------------------------------------------- /legacy/script/README.md: -------------------------------------------------------------------------------- 1 | # Datasets 2 | 3 | Download and preprocess datasets before you run experiments. 4 | Here, we provide three examples: ImageNet (ILSVRC 2012), COCO 2017, and PASCAL VOC 2012. 5 | 6 | ## 1. ImageNet (ILSVRC 2012): Image Classification 7 | ### 1.1 Download the datasets 8 | As the terms of use do not allow to distribute the URLs, you will have to create an account [here](http://image-net.org/download) to get the URLs, and replace `${TRAIN_DATASET_URL}` and `${VAL_DATASET_URL}` with them. 9 | ```shell 10 | wget ${TRAIN_DATASET_URL} ./ 11 | wget ${VAL_DATASET_URL} ./ 12 | ``` 13 | 14 | ### 1.2 Untar and extract files 15 | ```shell 16 | # Go to the root of this repository 17 | mkdir ~/dataset/ilsvrc2012/{train,val} -p 18 | mv ILSVRC2012_img_train.tar ~/dataset/ilsvrc2012/train/ 19 | mv ILSVRC2012_img_val.tar ~/dataset/ilsvrc2012/val/ 20 | cd ~/dataset/ilsvrc2012/train/ 21 | tar -xvf ILSVRC2012_img_train.tar 22 | mv ILSVRC2012_img_train.tar ../ 23 | for f in *.tar; do 24 | d=`basename $f .tar` 25 | mkdir $d 26 | (cd $d && tar xf ../$f) 27 | done 28 | rm -r *.tar 29 | cd ../../../../ 30 | 31 | wget https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 32 | mv valprep.sh ~/dataset/ilsvrc2012/val/ 33 | cd ~/dataset/ilsvrc2012/val/ 34 | tar -xvf ILSVRC2012_img_val.tar 35 | mv ILSVRC2012_img_val.tar ../ 36 | sh valprep.sh 37 | mv valprep.sh ../ 38 | cd ../../../../ 39 | ``` 40 | 41 | 42 | ## 2. COCO 2017: Object Detection 43 | ### 2.1 Download the datasets 44 | ```shell 45 | wget http://images.cocodataset.org/zips/train2017.zip ./ 46 | wget http://images.cocodataset.org/zips/val2017.zip ./ 47 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip ./ 48 | ``` 49 | 50 | ### 2.2 Unzip and extract files 51 | ```shell 52 | # Go to the root of this repository 53 | mkdir ~/dataset/coco2017/ -p 54 | mv train2017.zip ~/dataset/coco2017/ 55 | mv val2017.zip ~/dataset/coco2017/ 56 | mv annotations_trainval2017.zip ~/dataset/coco2017/ 57 | cd ~/dataset/coco2017/ 58 | unzip train2017.zip 59 | unzip val2017.zip 60 | unzip annotations_trainval2017.zip 61 | cd ../../../ 62 | ``` 63 | 64 | 65 | ## 3. PASCAL VOC 2012: Semantic Segmentation 66 | You can skip Steps 3.1 and 3.2 by replacing `download: False` in a yaml config file with `download: True`. 67 | 68 | ### 3.1 Download the datasets 69 | ```shell 70 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 71 | ``` 72 | 73 | ### 3.2 Untar and extract files 74 | ```shell 75 | # Go to the root of this repository 76 | mkdir ~/dataset/ -p 77 | mv VOCtrainval_11-May-2012.tar ~/dataset/ 78 | cd ~/dataset/ 79 | tar -xvf ILSVRC2012_img_val.tar 80 | cd ../../ 81 | ``` 82 | -------------------------------------------------------------------------------- /script/README.md: -------------------------------------------------------------------------------- 1 | # Datasets 2 | 3 | Download and preprocess datasets before you run experiments. 4 | Here, we provide three examples: ImageNet (ILSVRC 2012), COCO 2017, and PASCAL VOC 2012. 5 | 6 | ## 1. ImageNet (ILSVRC 2012): Image Classification 7 | ### 1.1 Download the datasets 8 | As the terms of use do not allow to distribute the URLs, you will have to create an account [here](http://image-net.org/download) to get the URLs, and replace `${TRAIN_DATASET_URL}` and `${VAL_DATASET_URL}` with them. 9 | ```shell 10 | wget ${TRAIN_DATASET_URL} ./ 11 | wget ${VAL_DATASET_URL} ./ 12 | ``` 13 | 14 | ### 1.2 Untar and extract files 15 | ```shell 16 | # Go to the root of this repository 17 | mkdir ~/datasets/ilsvrc2012/{train,val} -p 18 | mv ILSVRC2012_img_train.tar ~/datasets/ilsvrc2012/train/ 19 | mv ILSVRC2012_img_val.tar ~/datasets/ilsvrc2012/val/ 20 | cd ~/datasets/ilsvrc2012/train/ 21 | tar -xvf ILSVRC2012_img_train.tar 22 | mv ILSVRC2012_img_train.tar ../ 23 | for f in *.tar; do 24 | d=`basename $f .tar` 25 | mkdir $d 26 | (cd $d && tar xf ../$f) 27 | done 28 | rm -r *.tar 29 | cd ../../../../ 30 | 31 | wget https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh 32 | mv valprep.sh ~/datasets/ilsvrc2012/val/ 33 | cd ~/datasets/ilsvrc2012/val/ 34 | tar -xvf ILSVRC2012_img_val.tar 35 | mv ILSVRC2012_img_val.tar ../ 36 | sh valprep.sh 37 | mv valprep.sh ../ 38 | cd ../../../../ 39 | ``` 40 | 41 | 42 | ## 2. COCO 2017: Object Detection 43 | ### 2.1 Download the datasets 44 | ```shell 45 | wget http://images.cocodataset.org/zips/train2017.zip ./ 46 | wget http://images.cocodataset.org/zips/val2017.zip ./ 47 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip ./ 48 | ``` 49 | 50 | ### 2.2 Unzip and extract files 51 | ```shell 52 | # Go to the root of this repository 53 | mkdir ~/datasets/coco2017/ -p 54 | mv train2017.zip ~/datasets/coco2017/ 55 | mv val2017.zip ~/datasets/coco2017/ 56 | mv annotations_trainval2017.zip ~/datasets/coco2017/ 57 | cd ~/datasets/coco2017/ 58 | unzip train2017.zip 59 | unzip val2017.zip 60 | unzip annotations_trainval2017.zip 61 | cd ../../../ 62 | ``` 63 | 64 | 65 | ## 3. PASCAL VOC 2012: Semantic Segmentation 66 | You can skip Steps 3.1 and 3.2 by replacing `download: False` in a yaml config file with `download: True`. 67 | 68 | ### 3.1 Download the datasets 69 | ```shell 70 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 71 | ``` 72 | 73 | ### 3.2 Untar and extract files 74 | ```shell 75 | # Go to the root of this repository 76 | mkdir ~/datasets/ -p 77 | mv VOCtrainval_11-May-2012.tar ~/datasets/ 78 | cd ~/datasets/ 79 | tar -xvf ILSVRC2012_img_val.tar 80 | cd ../../ 81 | ``` 82 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/jpeg-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.PILImageModule' 36 | init: 37 | kwargs: 38 | format: 'JPEG' 39 | quality: 90 40 | returns_file_size: True 41 | post_transform: !import_call 42 | key: 'torchvision.transforms.Compose' 43 | init: 44 | kwargs: 45 | transforms: 46 | - !import_call 47 | key: 'torchvision.transforms.ToTensor' 48 | init: 49 | - !import_call 50 | key: 'torchvision.transforms.Normalize' 51 | init: 52 | kwargs: 53 | mean: [0.485, 0.456, 0.406] 54 | std: [0.229, 0.224, 0.225] 55 | analysis_config: 56 | analyzer_configs: 57 | - key: 'FileSizeAccumulator' 58 | kwargs: 59 | unit: 'KB' 60 | classification_model: 61 | key: 'resnet50' 62 | _weights: &model_weights_enum !import_get 63 | key: 'torchvision.models.resnet.ResNet50_Weights' 64 | kwargs: 65 | num_classes: 1000 66 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 67 | 68 | test: 69 | test_data_loader: 70 | dataset_id: *imagenet_val 71 | collate_fn: 'default_collate_w_pil' 72 | sampler: 73 | class_or_func: !import_get 74 | key: 'torch.utils.data.SequentialSampler' 75 | kwargs: 76 | kwargs: 77 | batch_size: 1 78 | num_workers: 16 79 | drop_last: False 80 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/webp-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.PILImageModule' 36 | init: 37 | kwargs: 38 | format: 'WEBP' 39 | quality: 90 40 | returns_file_size: True 41 | post_transform: !import_call 42 | key: 'torchvision.transforms.Compose' 43 | init: 44 | kwargs: 45 | transforms: 46 | - !import_call 47 | key: 'torchvision.transforms.ToTensor' 48 | init: 49 | - !import_call 50 | key: 'torchvision.transforms.Normalize' 51 | init: 52 | kwargs: 53 | mean: [0.485, 0.456, 0.406] 54 | std: [0.229, 0.224, 0.225] 55 | analysis_config: 56 | analyzer_configs: 57 | - key: 'FileSizeAccumulator' 58 | kwargs: 59 | unit: 'KB' 60 | classification_model: 61 | key: 'resnet50' 62 | _weights: &model_weights_enum !import_get 63 | key: 'torchvision.models.resnet.ResNet50_Weights' 64 | kwargs: 65 | num_classes: 1000 66 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 67 | 68 | test: 69 | test_data_loader: 70 | dataset_id: *imagenet_val 71 | collate_fn: 'default_collate_w_pil' 72 | sampler: 73 | class_or_func: !import_get 74 | key: 'torch.utils.data.SequentialSampler' 75 | kwargs: 76 | kwargs: 77 | batch_size: 1 78 | num_workers: 16 79 | drop_last: False 80 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/jpeg-resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.PILImageModule' 36 | init: 37 | kwargs: 38 | format: 'JPEG' 39 | quality: 90 40 | returns_file_size: True 41 | post_transform: !import_call 42 | key: 'torchvision.transforms.Compose' 43 | init: 44 | kwargs: 45 | transforms: 46 | - !import_call 47 | key: 'torchvision.transforms.ToTensor' 48 | init: 49 | - !import_call 50 | key: 'torchvision.transforms.Normalize' 51 | init: 52 | kwargs: 53 | mean: [0.485, 0.456, 0.406] 54 | std: [0.229, 0.224, 0.225] 55 | analysis_config: 56 | analyzer_configs: 57 | - key: 'FileSizeAccumulator' 58 | kwargs: 59 | unit: 'KB' 60 | classification_model: 61 | 62 | key: 'resnet101' 63 | _weights: &model_weights_enum !import_get 64 | key: 'torchvision.models.resnet.ResNet101_Weights' 65 | kwargs: 66 | num_classes: 1000 67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 68 | 69 | test: 70 | test_data_loader: 71 | dataset_id: *imagenet_val 72 | collate_fn: 'default_collate_w_pil' 73 | sampler: 74 | class_or_func: !import_get 75 | key: 'torch.utils.data.SequentialSampler' 76 | kwargs: 77 | kwargs: 78 | batch_size: 1 79 | num_workers: 16 80 | drop_last: False 81 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/jpeg-resnet152.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.PILImageModule' 36 | init: 37 | kwargs: 38 | format: 'JPEG' 39 | quality: 90 40 | returns_file_size: True 41 | post_transform: !import_call 42 | key: 'torchvision.transforms.Compose' 43 | init: 44 | kwargs: 45 | transforms: 46 | - !import_call 47 | key: 'torchvision.transforms.ToTensor' 48 | init: 49 | - !import_call 50 | key: 'torchvision.transforms.Normalize' 51 | init: 52 | kwargs: 53 | mean: [0.485, 0.456, 0.406] 54 | std: [0.229, 0.224, 0.225] 55 | analysis_config: 56 | analyzer_configs: 57 | - key: 'FileSizeAccumulator' 58 | kwargs: 59 | unit: 'KB' 60 | classification_model: 61 | 62 | key: 'resnet152' 63 | _weights: &model_weights_enum !import_get 64 | key: 'torchvision.models.resnet.ResNet152_Weights' 65 | kwargs: 66 | num_classes: 1000 67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 68 | 69 | test: 70 | test_data_loader: 71 | dataset_id: *imagenet_val 72 | collate_fn: 'default_collate_w_pil' 73 | sampler: 74 | class_or_func: !import_get 75 | key: 'torch.utils.data.SequentialSampler' 76 | kwargs: 77 | kwargs: 78 | batch_size: 1 79 | num_workers: 16 80 | drop_last: False 81 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/webp-resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.PILImageModule' 36 | init: 37 | kwargs: 38 | format: 'WEBP' 39 | quality: 90 40 | returns_file_size: True 41 | post_transform: !import_call 42 | key: 'torchvision.transforms.Compose' 43 | init: 44 | kwargs: 45 | transforms: 46 | - !import_call 47 | key: 'torchvision.transforms.ToTensor' 48 | init: 49 | - !import_call 50 | key: 'torchvision.transforms.Normalize' 51 | init: 52 | kwargs: 53 | mean: [0.485, 0.456, 0.406] 54 | std: [0.229, 0.224, 0.225] 55 | analysis_config: 56 | analyzer_configs: 57 | - key: 'FileSizeAccumulator' 58 | kwargs: 59 | unit: 'KB' 60 | classification_model: 61 | 62 | key: 'resnet101' 63 | _weights: &model_weights_enum !import_get 64 | key: 'torchvision.models.resnet.ResNet101_Weights' 65 | kwargs: 66 | num_classes: 1000 67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 68 | 69 | test: 70 | test_data_loader: 71 | dataset_id: *imagenet_val 72 | collate_fn: 'default_collate_w_pil' 73 | sampler: 74 | class_or_func: !import_get 75 | key: 'torch.utils.data.SequentialSampler' 76 | kwargs: 77 | kwargs: 78 | batch_size: 1 79 | num_workers: 16 80 | drop_last: False 81 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/webp-resnet152.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.PILImageModule' 36 | init: 37 | kwargs: 38 | format: 'WEBP' 39 | quality: 90 40 | returns_file_size: True 41 | post_transform: !import_call 42 | key: 'torchvision.transforms.Compose' 43 | init: 44 | kwargs: 45 | transforms: 46 | - !import_call 47 | key: 'torchvision.transforms.ToTensor' 48 | init: 49 | - !import_call 50 | key: 'torchvision.transforms.Normalize' 51 | init: 52 | kwargs: 53 | mean: [0.485, 0.456, 0.406] 54 | std: [0.229, 0.224, 0.225] 55 | analysis_config: 56 | analyzer_configs: 57 | - key: 'FileSizeAccumulator' 58 | kwargs: 59 | unit: 'KB' 60 | classification_model: 61 | 62 | key: 'resnet152' 63 | _weights: &model_weights_enum !import_get 64 | key: 'torchvision.models.resnet.ResNet152_Weights' 65 | kwargs: 66 | num_classes: 1000 67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 68 | 69 | test: 70 | test_data_loader: 71 | dataset_id: *imagenet_val 72 | collate_fn: 'default_collate_w_pil' 73 | sampler: 74 | class_or_func: !import_get 75 | key: 'torch.utils.data.SequentialSampler' 76 | kwargs: 77 | kwargs: 78 | batch_size: 1 79 | num_workers: 16 80 | drop_last: False 81 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/bpg-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.BPGModule' 36 | init: 37 | kwargs: 38 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 39 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 40 | quality: 50 41 | returns_file_size: True 42 | post_transform: !import_call 43 | key: 'torchvision.transforms.Compose' 44 | init: 45 | kwargs: 46 | transforms: 47 | - !import_call 48 | key: 'torchvision.transforms.ToTensor' 49 | init: 50 | - !import_call 51 | key: 'torchvision.transforms.Normalize' 52 | init: 53 | kwargs: 54 | mean: [0.485, 0.456, 0.406] 55 | std: [0.229, 0.224, 0.225] 56 | analysis_config: 57 | analyzer_configs: 58 | - key: 'FileSizeAccumulator' 59 | kwargs: 60 | unit: 'KB' 61 | classification_model: 62 | key: 'resnet50' 63 | _weights: &model_weights_enum !import_get 64 | key: 'torchvision.models.resnet.ResNet50_Weights' 65 | kwargs: 66 | num_classes: 1000 67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 68 | 69 | test: 70 | test_data_loader: 71 | dataset_id: *imagenet_val 72 | sampler: 73 | class_or_func: !import_get 74 | key: 'torch.utils.data.SequentialSampler' 75 | kwargs: 76 | kwargs: 77 | batch_size: 1 78 | num_workers: 16 79 | drop_last: False 80 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/bpg-resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.BPGModule' 36 | init: 37 | kwargs: 38 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 39 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 40 | quality: 50 41 | returns_file_size: True 42 | post_transform: !import_call 43 | key: 'torchvision.transforms.Compose' 44 | init: 45 | kwargs: 46 | transforms: 47 | - !import_call 48 | key: 'torchvision.transforms.ToTensor' 49 | init: 50 | - !import_call 51 | key: 'torchvision.transforms.Normalize' 52 | init: 53 | kwargs: 54 | mean: [0.485, 0.456, 0.406] 55 | std: [0.229, 0.224, 0.225] 56 | analysis_config: 57 | analyzer_configs: 58 | - key: 'FileSizeAccumulator' 59 | kwargs: 60 | unit: 'KB' 61 | classification_model: 62 | 63 | key: 'resnet101' 64 | _weights: &model_weights_enum !import_get 65 | key: 'torchvision.models.resnet.ResNet101_Weights' 66 | kwargs: 67 | num_classes: 1000 68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 69 | 70 | test: 71 | test_data_loader: 72 | dataset_id: *imagenet_val 73 | sampler: 74 | class_or_func: !import_get 75 | key: 'torch.utils.data.SequentialSampler' 76 | kwargs: 77 | kwargs: 78 | batch_size: 1 79 | num_workers: 16 80 | drop_last: False 81 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/bpg-resnet152.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.BPGModule' 36 | init: 37 | kwargs: 38 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 39 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 40 | quality: 50 41 | returns_file_size: True 42 | post_transform: !import_call 43 | key: 'torchvision.transforms.Compose' 44 | init: 45 | kwargs: 46 | transforms: 47 | - !import_call 48 | key: 'torchvision.transforms.ToTensor' 49 | init: 50 | - !import_call 51 | key: 'torchvision.transforms.Normalize' 52 | init: 53 | kwargs: 54 | mean: [0.485, 0.456, 0.406] 55 | std: [0.229, 0.224, 0.225] 56 | analysis_config: 57 | analyzer_configs: 58 | - key: 'FileSizeAccumulator' 59 | kwargs: 60 | unit: 'KB' 61 | classification_model: 62 | 63 | key: 'resnet152' 64 | _weights: &model_weights_enum !import_get 65 | key: 'torchvision.models.resnet.ResNet152_Weights' 66 | kwargs: 67 | num_classes: 1000 68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 69 | 70 | test: 71 | test_data_loader: 72 | dataset_id: *imagenet_val 73 | sampler: 74 | class_or_func: !import_get 75 | key: 'torch.utils.data.SequentialSampler' 76 | kwargs: 77 | kwargs: 78 | batch_size: 1 79 | num_workers: 16 80 | drop_last: False 81 | -------------------------------------------------------------------------------- /docs/source/subpkgs/models.rst: -------------------------------------------------------------------------------- 1 | sc2bench.models 2 | ===== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 4 7 | :caption: Contents: 8 | 9 | ---- 10 | 11 | sc2bench.models.layer 12 | ------------ 13 | 14 | .. automodule:: sc2bench.models.layer 15 | :members: 16 | :exclude-members: forward 17 | 18 | ---- 19 | 20 | sc2bench.models.registry 21 | ------------ 22 | 23 | .. automodule:: sc2bench.models.registry 24 | :members: 25 | :exclude-members: forward 26 | 27 | ---- 28 | 29 | sc2bench.models.wrapper 30 | ------------ 31 | 32 | .. automodule:: sc2bench.models.wrapper 33 | :members: 34 | :exclude-members: forward 35 | 36 | ---- 37 | 38 | sc2bench.models.backbone 39 | ------------ 40 | 41 | .. automodule:: sc2bench.models.backbone 42 | :members: 43 | :exclude-members: forward 44 | 45 | ---- 46 | 47 | sc2bench.models.detection 48 | ------------ 49 | 50 | .. automodule:: sc2bench.models.detection 51 | :members: 52 | 53 | ---- 54 | 55 | sc2bench.models.detection.base 56 | ^^^^^^^^^^^^ 57 | 58 | .. automodule:: sc2bench.models.detection.base 59 | :members: 60 | :exclude-members: forward 61 | 62 | ---- 63 | 64 | sc2bench.models.detection.rcnn 65 | ^^^^^^^^^^^^ 66 | 67 | .. automodule:: sc2bench.models.detection.rcnn 68 | :members: 69 | :exclude-members: forward 70 | 71 | ---- 72 | 73 | sc2bench.models.detection.registry 74 | ^^^^^^^^^^^^ 75 | 76 | .. automodule:: sc2bench.models.detection.registry 77 | :members: 78 | 79 | ---- 80 | 81 | sc2bench.models.detection.transform 82 | ^^^^^^^^^^^^ 83 | 84 | .. automodule:: sc2bench.models.detection.transform 85 | :members: 86 | :exclude-members: forward 87 | 88 | ---- 89 | 90 | sc2bench.models.detection.wrapper 91 | ^^^^^^^^^^^^ 92 | 93 | .. automodule:: sc2bench.models.detection.wrapper 94 | :members: 95 | :exclude-members: forward 96 | 97 | 98 | sc2bench.models.segmentation 99 | ------------ 100 | 101 | .. automodule:: sc2bench.models.segmentation 102 | :members: 103 | 104 | ---- 105 | 106 | sc2bench.models.segmentation.base 107 | ^^^^^^^^^^^^ 108 | .. automodule:: sc2bench.models.segmentation.base 109 | :members: 110 | :exclude-members: forward 111 | 112 | ---- 113 | 114 | sc2bench.models.segmentation.deeplabv3 115 | ^^^^^^^^^^^^ 116 | 117 | .. automodule:: sc2bench.models.segmentation.deeplabv3 118 | :members: 119 | :exclude-members: forward 120 | 121 | ---- 122 | 123 | sc2bench.models.segmentation.registry 124 | ^^^^^^^^^^^^ 125 | 126 | .. automodule:: sc2bench.models.segmentation.registry 127 | :members: 128 | 129 | ---- 130 | 131 | sc2bench.models.segmentation.wrapper 132 | ^^^^^^^^^^^^ 133 | 134 | .. automodule:: sc2bench.models.segmentation.wrapper 135 | :members: 136 | :exclude-members: forward 137 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 833 19 | interpolation: !getattr 20 | - !import_get 21 | key: 'torchvision.transforms.functional.InterpolationMode' 22 | - 'BICUBIC' 23 | - !import_call 24 | key: 'torchvision.transforms.CenterCrop' 25 | init: 26 | kwargs: 27 | size: [800, 800] 28 | 29 | models: 30 | model: 31 | key: 'CodecInputCompressionClassifier' 32 | kwargs: 33 | codec_encoder_decoder: !import_call 34 | key: 'torchvision.transforms.Compose' 35 | init: 36 | kwargs: 37 | transforms: 38 | - !import_call 39 | key: 'sc2bench.transforms.codec.PILImageModule' 40 | init: 41 | kwargs: 42 | format: 'JPEG' 43 | quality: 90 44 | returns_file_size: True 45 | post_transform: !import_call 46 | key: 'torchvision.transforms.Compose' 47 | init: 48 | kwargs: 49 | transforms: 50 | - !import_call 51 | key: 'torchvision.transforms.ToTensor' 52 | init: 53 | - !import_call 54 | key: 'torchvision.transforms.Normalize' 55 | init: 56 | kwargs: 57 | mean: [0.485, 0.456, 0.406] 58 | std: [0.229, 0.224, 0.225] 59 | analysis_config: 60 | analyzer_configs: 61 | - key: 'FileSizeAccumulator' 62 | kwargs: 63 | unit: 'KB' 64 | classification_model: 65 | key: 'tf_efficientnet_l2_ns' 66 | repo_or_dir: 'rwightman/pytorch-image-models' 67 | kwargs: 68 | num_classes: 1000 69 | pretrained: True 70 | 71 | test: 72 | test_data_loader: 73 | dataset_id: *imagenet_val 74 | sampler: 75 | class_or_func: !import_get 76 | key: 'torch.utils.data.SequentialSampler' 77 | kwargs: 78 | kwargs: 79 | batch_size: 1 80 | num_workers: 16 81 | drop_last: False 82 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns_475.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 507 19 | interpolation: !getattr 20 | - !import_get 21 | key: 'torchvision.transforms.functional.InterpolationMode' 22 | - 'BICUBIC' 23 | - !import_call 24 | key: 'torchvision.transforms.CenterCrop' 25 | init: 26 | kwargs: 27 | size: [475, 475] 28 | 29 | models: 30 | model: 31 | key: 'CodecInputCompressionClassifier' 32 | kwargs: 33 | codec_encoder_decoder: !import_call 34 | key: 'torchvision.transforms.Compose' 35 | init: 36 | kwargs: 37 | transforms: 38 | - !import_call 39 | key: 'sc2bench.transforms.codec.PILImageModule' 40 | init: 41 | kwargs: 42 | format: 'JPEG' 43 | quality: 90 44 | returns_file_size: True 45 | post_transform: !import_call 46 | key: 'torchvision.transforms.Compose' 47 | init: 48 | kwargs: 49 | transforms: 50 | - !import_call 51 | key: 'torchvision.transforms.ToTensor' 52 | init: 53 | - !import_call 54 | key: 'torchvision.transforms.Normalize' 55 | init: 56 | kwargs: 57 | mean: [0.485, 0.456, 0.406] 58 | std: [0.229, 0.224, 0.225] 59 | analysis_config: 60 | analyzer_configs: 61 | - key: 'FileSizeAccumulator' 62 | kwargs: 63 | unit: 'KB' 64 | classification_model: 65 | key: 'tf_efficientnet_l2_ns_475' 66 | repo_or_dir: 'rwightman/pytorch-image-models' 67 | kwargs: 68 | num_classes: 1000 69 | pretrained: True 70 | 71 | test: 72 | test_data_loader: 73 | dataset_id: *imagenet_val 74 | sampler: 75 | class_or_func: !import_get 76 | key: 'torch.utils.data.SequentialSampler' 77 | kwargs: 78 | kwargs: 79 | batch_size: 1 80 | num_workers: 16 81 | drop_last: False 82 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/feature_compression/jpeg-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | - !import_call 25 | key: 'torchvision.transforms.ToTensor' 26 | init: 27 | - !import_call 28 | key: 'torchvision.transforms.Normalize' 29 | init: 30 | kwargs: 31 | mean: [0.485, 0.456, 0.406] 32 | std: [0.229, 0.224, 0.225] 33 | 34 | models: 35 | model: 36 | key: 'CodecFeatureCompressionClassifier' 37 | kwargs: 38 | codec_encoder_decoder: !import_call 39 | key: 'torchvision.transforms.Compose' 40 | init: 41 | kwargs: 42 | transforms: 43 | - !import_call 44 | key: 'sc2bench.transforms.codec.PILTensorModule' 45 | init: 46 | kwargs: 47 | format: 'JPEG' 48 | quality: 90 49 | returns_file_size: True 50 | encoder_config: 51 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2'] 52 | decoder_config: 53 | sequential: ['layer3', 'layer4', 'avgpool'] 54 | classifier_config: 55 | sequential: ['fc'] 56 | post_transform: 57 | analysis_config: 58 | analyzer_configs: 59 | - key: 'FileSizeAccumulator' 60 | kwargs: 61 | unit: 'KB' 62 | classification_model: 63 | key: 'resnet50' 64 | _weights: &model_weights_enum !import_get 65 | key: 'torchvision.models.resnet.ResNet50_Weights' 66 | kwargs: 67 | num_classes: 1000 68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 69 | 70 | test: 71 | test_data_loader: 72 | dataset_id: *imagenet_val 73 | collate_fn: 'default_collate_w_pil' 74 | sampler: 75 | class_or_func: !import_get 76 | key: 'torch.utils.data.SequentialSampler' 77 | kwargs: 78 | kwargs: 79 | batch_size: 1 80 | num_workers: 16 81 | drop_last: False 82 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/feature_compression/webp-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | - !import_call 25 | key: 'torchvision.transforms.ToTensor' 26 | init: 27 | - !import_call 28 | key: 'torchvision.transforms.Normalize' 29 | init: 30 | kwargs: 31 | mean: [0.485, 0.456, 0.406] 32 | std: [0.229, 0.224, 0.225] 33 | 34 | models: 35 | model: 36 | key: 'CodecFeatureCompressionClassifier' 37 | kwargs: 38 | codec_encoder_decoder: !import_call 39 | key: 'torchvision.transforms.Compose' 40 | init: 41 | kwargs: 42 | transforms: 43 | - !import_call 44 | key: 'sc2bench.transforms.codec.PILTensorModule' 45 | init: 46 | kwargs: 47 | format: 'WEBP' 48 | quality: 90 49 | returns_file_size: True 50 | encoder_config: 51 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2'] 52 | decoder_config: 53 | sequential: ['layer3', 'layer4', 'avgpool'] 54 | classifier_config: 55 | sequential: ['fc'] 56 | post_transform: 57 | analysis_config: 58 | analyzer_configs: 59 | - key: 'FileSizeAccumulator' 60 | kwargs: 61 | unit: 'KB' 62 | classification_model: 63 | key: 'resnet50' 64 | _weights: &model_weights_enum !import_get 65 | key: 'torchvision.models.resnet.ResNet50_Weights' 66 | kwargs: 67 | num_classes: 1000 68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 69 | 70 | test: 71 | test_data_loader: 72 | dataset_id: *imagenet_val 73 | collate_fn: 'default_collate_w_pil' 74 | sampler: 75 | class_or_func: !import_get 76 | key: 'torch.utils.data.SequentialSampler' 77 | kwargs: 78 | kwargs: 79 | batch_size: 1 80 | num_workers: 16 81 | drop_last: False 82 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/mean_scale_hyperprior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: &input_size [224, 224] 24 | - !import_call 25 | key: 'torchvision.transforms.ToTensor' 26 | init: 27 | - !import_call 28 | key: 'sc2bench.transforms.misc.AdaptivePad' 29 | init: 30 | kwargs: 31 | fill: 0 32 | factor: 64 33 | 34 | models: 35 | model: 36 | key: 'NeuralInputCompressionClassifier' 37 | kwargs: 38 | post_transform: !import_call 39 | key: 'torchvision.transforms.Compose' 40 | init: 41 | kwargs: 42 | transforms: 43 | - !import_call 44 | key: 'torchvision.transforms.CenterCrop' 45 | init: 46 | kwargs: 47 | size: *input_size 48 | - !import_call 49 | key: 'torchvision.transforms.Normalize' 50 | init: 51 | kwargs: 52 | mean: [0.485, 0.456, 0.406] 53 | std: [0.229, 0.224, 0.225] 54 | analysis_config: 55 | analyzes_after_compress: True 56 | analyzer_configs: 57 | - key: 'FileSizeAnalyzer' 58 | kwargs: 59 | unit: 'KB' 60 | compression_model: 61 | key: 'mbt2018_mean' 62 | kwargs: 63 | pretrained: True 64 | quality: 8 65 | metric: 'mse' 66 | classification_model: 67 | key: 'resnet50' 68 | _weights: &model_weights_enum !import_get 69 | key: 'torchvision.models.resnet.ResNet50_Weights' 70 | kwargs: 71 | num_classes: 1000 72 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 73 | 74 | test: 75 | test_data_loader: 76 | dataset_id: *imagenet_val 77 | sampler: 78 | class_or_func: !import_get 79 | key: 'torch.utils.data.SequentialSampler' 80 | kwargs: 81 | kwargs: 82 | batch_size: 1 83 | num_workers: 16 84 | drop_last: False 85 | -------------------------------------------------------------------------------- /configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &pascal_val 'pascal_voc2012/val': !import_call 3 | _name: 'pascal_voc2012' 4 | _root: &root_dir '~/datasets' 5 | key: 'torchvision.datasets.VOCSegmentation' 6 | init: 7 | kwargs: 8 | root: *root_dir 9 | image_set: 'val' 10 | year: '2012' 11 | download: True 12 | transforms: !import_call 13 | key: 'custom.transform.CustomCompose' 14 | init: 15 | kwargs: 16 | transforms: 17 | - !import_call 18 | key: 'custom.transform.CustomRandomResize' 19 | init: 20 | kwargs: 21 | min_size: 513 22 | max_size: 513 23 | - !import_call 24 | key: 'sc2bench.transforms.misc.CustomToTensor' 25 | init: 26 | kwargs: 27 | converts_sample: False 28 | converts_target: True 29 | 30 | models: 31 | model: 32 | key: 'CodecInputCompressionSegmentationModel' 33 | kwargs: 34 | codec_encoder_decoder: !import_call 35 | key: 'torchvision.transforms.Compose' 36 | init: 37 | kwargs: 38 | transforms: 39 | - !import_call 40 | key: 'sc2bench.transforms.codec.PILImageModule' 41 | init: 42 | kwargs: 43 | format: 'JPEG' 44 | quality: 90 45 | returns_file_size: True 46 | analysis_config: 47 | analyzer_configs: 48 | - key: 'FileSizeAccumulator' 49 | kwargs: 50 | unit: 'KB' 51 | post_transform: !import_call 52 | key: 'torchvision.transforms.Compose' 53 | init: 54 | kwargs: 55 | transforms: 56 | - !import_call 57 | key: 'torchvision.transforms.ToTensor' 58 | init: 59 | - !import_call 60 | key: 'torchvision.transforms.Normalize' 61 | init: 62 | kwargs: 63 | mean: [0.485, 0.456, 0.406] 64 | std: [0.229, 0.224, 0.225] 65 | segmentation_model: 66 | key: 'deeplabv3_resnet50' 67 | kwargs: 68 | pretrained: True 69 | num_classes: 21 70 | aux_loss: True 71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 72 | 73 | test: 74 | test_data_loader: 75 | dataset_id: *pascal_val 76 | sampler: 77 | class_or_func: !import_get 78 | key: 'torch.utils.data.SequentialSampler' 79 | kwargs: 80 | collate_fn: 'pascal_seg_eval_collate_fn' 81 | kwargs: 82 | batch_size: 1 83 | num_workers: 16 84 | -------------------------------------------------------------------------------- /configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &pascal_val 'pascal_voc2012/val': !import_call 3 | _name: 'pascal_voc2012' 4 | _root: &root_dir '~/datasets' 5 | key: 'torchvision.datasets.VOCSegmentation' 6 | init: 7 | kwargs: 8 | root: *root_dir 9 | image_set: 'val' 10 | year: '2012' 11 | download: True 12 | transforms: !import_call 13 | key: 'custom.transform.CustomCompose' 14 | init: 15 | kwargs: 16 | transforms: 17 | - !import_call 18 | key: 'custom.transform.CustomRandomResize' 19 | init: 20 | kwargs: 21 | min_size: 513 22 | max_size: 513 23 | - !import_call 24 | key: 'sc2bench.transforms.misc.CustomToTensor' 25 | init: 26 | kwargs: 27 | converts_sample: False 28 | converts_target: True 29 | 30 | models: 31 | model: 32 | key: 'CodecInputCompressionSegmentationModel' 33 | kwargs: 34 | codec_encoder_decoder: !import_call 35 | key: 'torchvision.transforms.Compose' 36 | init: 37 | kwargs: 38 | transforms: 39 | - !import_call 40 | key: 'sc2bench.transforms.codec.PILImageModule' 41 | init: 42 | kwargs: 43 | format: 'WEBP' 44 | quality: 90 45 | returns_file_size: True 46 | analysis_config: 47 | analyzer_configs: 48 | - key: 'FileSizeAccumulator' 49 | kwargs: 50 | unit: 'KB' 51 | post_transform: !import_call 52 | key: 'torchvision.transforms.Compose' 53 | init: 54 | kwargs: 55 | transforms: 56 | - !import_call 57 | key: 'torchvision.transforms.ToTensor' 58 | init: 59 | - !import_call 60 | key: 'torchvision.transforms.Normalize' 61 | init: 62 | kwargs: 63 | mean: [0.485, 0.456, 0.406] 64 | std: [0.229, 0.224, 0.225] 65 | segmentation_model: 66 | key: 'deeplabv3_resnet101' 67 | kwargs: 68 | pretrained: True 69 | num_classes: 21 70 | aux_loss: True 71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 72 | 73 | test: 74 | test_data_loader: 75 | dataset_id: *pascal_val 76 | sampler: 77 | class_or_func: !import_get 78 | key: 'torch.utils.data.SequentialSampler' 79 | kwargs: 80 | collate_fn: 'pascal_seg_eval_collate_fn' 81 | kwargs: 82 | batch_size: 1 83 | num_workers: 16 84 | -------------------------------------------------------------------------------- /configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &pascal_val 'pascal_voc2012/val': !import_call 3 | _name: 'pascal_voc2012' 4 | _root: &root_dir '~/datasets' 5 | key: 'torchvision.datasets.VOCSegmentation' 6 | init: 7 | kwargs: 8 | root: *root_dir 9 | image_set: 'val' 10 | year: '2012' 11 | download: True 12 | transforms: !import_call 13 | key: 'custom.transform.CustomCompose' 14 | init: 15 | kwargs: 16 | transforms: 17 | - !import_call 18 | key: 'custom.transform.CustomRandomResize' 19 | init: 20 | kwargs: 21 | min_size: 513 22 | max_size: 513 23 | - !import_call 24 | key: 'sc2bench.transforms.misc.CustomToTensor' 25 | init: 26 | kwargs: 27 | converts_sample: False 28 | converts_target: True 29 | 30 | models: 31 | model: 32 | key: 'CodecInputCompressionSegmentationModel' 33 | kwargs: 34 | codec_encoder_decoder: !import_call 35 | key: 'torchvision.transforms.Compose' 36 | init: 37 | kwargs: 38 | transforms: 39 | - !import_call 40 | key: 'sc2bench.transforms.codec.PILImageModule' 41 | init: 42 | kwargs: 43 | format: 'WEBP' 44 | quality: 90 45 | returns_file_size: True 46 | analysis_config: 47 | analyzer_configs: 48 | - key: 'FileSizeAccumulator' 49 | kwargs: 50 | unit: 'KB' 51 | post_transform: !import_call 52 | key: 'torchvision.transforms.Compose' 53 | init: 54 | kwargs: 55 | transforms: 56 | - !import_call 57 | key: 'torchvision.transforms.ToTensor' 58 | init: 59 | - !import_call 60 | key: 'torchvision.transforms.Normalize' 61 | init: 62 | kwargs: 63 | mean: [0.485, 0.456, 0.406] 64 | std: [0.229, 0.224, 0.225] 65 | segmentation_model: 66 | key: 'deeplabv3_resnet50' 67 | kwargs: 68 | pretrained: True 69 | num_classes: 21 70 | aux_loss: True 71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 72 | 73 | test: 74 | test_data_loader: 75 | dataset_id: *pascal_val 76 | sampler: 77 | class_or_func: !import_get 78 | key: 'torch.utils.data.SequentialSampler' 79 | kwargs: 80 | collate_fn: 'pascal_seg_eval_collate_fn' 81 | kwargs: 82 | batch_size: 1 83 | num_workers: 16 84 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/factorized_prior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: &input_size [224, 224] 24 | - !import_call 25 | key: 'torchvision.transforms.ToTensor' 26 | init: 27 | - !import_call 28 | key: 'sc2bench.transforms.misc.AdaptivePad' 29 | init: 30 | kwargs: 31 | fill: 0 32 | factor: 64 33 | 34 | models: 35 | model: 36 | key: 'NeuralInputCompressionClassifier' 37 | kwargs: 38 | post_transform: !import_call 39 | key: 'torchvision.transforms.Compose' 40 | init: 41 | kwargs: 42 | transforms: 43 | - !import_call 44 | key: 'torchvision.transforms.CenterCrop' 45 | init: 46 | kwargs: 47 | size: *input_size 48 | - !import_call 49 | key: 'torchvision.transforms.Normalize' 50 | init: 51 | kwargs: 52 | mean: [0.485, 0.456, 0.406] 53 | std: [0.229, 0.224, 0.225] 54 | analysis_config: 55 | analyzes_after_compress: True 56 | analyzer_configs: 57 | - key: 'FileSizeAnalyzer' 58 | kwargs: 59 | unit: 'KB' 60 | compression_model: 61 | key: 'bmshj2018_factorized' 62 | kwargs: 63 | pretrained: True 64 | quality: 8 65 | metric: 'mse' 66 | classification_model: 67 | key: 'resnet50' 68 | _weights: &model_weights_enum !import_get 69 | key: 'torchvision.models.resnet.ResNet50_Weights' 70 | kwargs: 71 | num_classes: 1000 72 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 73 | 74 | test: 75 | test_data_loader: 76 | dataset_id: *imagenet_val 77 | sampler: 78 | class_or_func: !import_get 79 | key: 'torch.utils.data.SequentialSampler' 80 | kwargs: 81 | kwargs: 82 | batch_size: 1 83 | num_workers: 16 84 | drop_last: False 85 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/scale_hyperprior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: &input_size [224, 224] 24 | - !import_call 25 | key: 'torchvision.transforms.ToTensor' 26 | init: 27 | - !import_call 28 | key: 'sc2bench.transforms.misc.AdaptivePad' 29 | init: 30 | kwargs: 31 | fill: 0 32 | factor: 64 33 | 34 | models: 35 | model: 36 | key: 'NeuralInputCompressionClassifier' 37 | kwargs: 38 | post_transform: !import_call 39 | key: 'torchvision.transforms.Compose' 40 | init: 41 | kwargs: 42 | transforms: 43 | - !import_call 44 | key: 'torchvision.transforms.CenterCrop' 45 | init: 46 | kwargs: 47 | size: *input_size 48 | - !import_call 49 | key: 'torchvision.transforms.Normalize' 50 | init: 51 | kwargs: 52 | mean: [0.485, 0.456, 0.406] 53 | std: [0.229, 0.224, 0.225] 54 | analysis_config: 55 | analyzes_after_compress: True 56 | analyzer_configs: 57 | - key: 'FileSizeAnalyzer' 58 | kwargs: 59 | unit: 'KB' 60 | compression_model: 61 | key: 'bmshj2018_hyperprior' 62 | kwargs: 63 | pretrained: True 64 | quality: 8 65 | metric: 'mse' 66 | classification_model: 67 | key: 'resnet50' 68 | _weights: &model_weights_enum !import_get 69 | key: 'torchvision.models.resnet.ResNet50_Weights' 70 | kwargs: 71 | num_classes: 1000 72 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 73 | 74 | test: 75 | test_data_loader: 76 | dataset_id: *imagenet_val 77 | sampler: 78 | class_or_func: !import_get 79 | key: 'torch.utils.data.SequentialSampler' 80 | kwargs: 81 | kwargs: 82 | batch_size: 1 83 | num_workers: 16 84 | drop_last: False 85 | -------------------------------------------------------------------------------- /configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &pascal_val 'pascal_voc2012/val': !import_call 3 | _name: 'pascal_voc2012' 4 | _root: &root_dir '~/datasets' 5 | key: 'torchvision.datasets.VOCSegmentation' 6 | init: 7 | kwargs: 8 | root: *root_dir 9 | image_set: 'val' 10 | year: '2012' 11 | download: True 12 | transforms: !import_call 13 | key: 'custom.transform.CustomCompose' 14 | init: 15 | kwargs: 16 | transforms: 17 | - !import_call 18 | key: 'custom.transform.CustomRandomResize' 19 | init: 20 | kwargs: 21 | min_size: 513 22 | max_size: 513 23 | - !import_call 24 | key: 'sc2bench.transforms.misc.CustomToTensor' 25 | init: 26 | kwargs: 27 | converts_sample: False 28 | converts_target: True 29 | 30 | models: 31 | model: 32 | key: 'CodecInputCompressionSegmentationModel' 33 | kwargs: 34 | codec_encoder_decoder: !import_call 35 | key: 'torchvision.transforms.Compose' 36 | init: 37 | kwargs: 38 | transforms: 39 | - !import_call 40 | key: 'sc2bench.transforms.codec.PILImageModule' 41 | init: 42 | kwargs: 43 | format: 'JPEG' 44 | quality: 90 45 | returns_file_size: True 46 | analysis_config: 47 | analyzer_configs: 48 | - key: 'FileSizeAccumulator' 49 | kwargs: 50 | unit: 'KB' 51 | post_transform: !import_call 52 | key: 'torchvision.transforms.Compose' 53 | init: 54 | kwargs: 55 | transforms: 56 | - !import_call 57 | key: 'torchvision.transforms.ToTensor' 58 | init: 59 | - !import_call 60 | key: 'torchvision.transforms.Normalize' 61 | init: 62 | kwargs: 63 | mean: [0.485, 0.456, 0.406] 64 | std: [0.229, 0.224, 0.225] 65 | segmentation_model: 66 | key: 'deeplabv3_resnet101' 67 | kwargs: 68 | pretrained: False 69 | num_classes: 21 70 | aux_loss: True 71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 72 | 73 | test: 74 | test_data_loader: 75 | dataset_id: *pascal_val 76 | sampler: 77 | class_or_func: !import_get 78 | key: 'torch.utils.data.SequentialSampler' 79 | kwargs: 80 | collate_fn: 'pascal_seg_eval_collate_fn' 81 | kwargs: 82 | batch_size: 1 83 | num_workers: 16 84 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/joint_autoregressive_hierarchical_prior-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [*root_dir, '/val'] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: &input_size [224, 224] 24 | - !import_call 25 | key: 'torchvision.transforms.ToTensor' 26 | init: 27 | - !import_call 28 | key: 'sc2bench.transforms.misc.AdaptivePad' 29 | init: 30 | kwargs: 31 | fill: 0 32 | factor: 64 33 | 34 | models: 35 | model: 36 | key: 'NeuralInputCompressionClassifier' 37 | kwargs: 38 | post_transform: !import_call 39 | key: 'torchvision.transforms.Compose' 40 | init: 41 | kwargs: 42 | transforms: 43 | - !import_call 44 | key: 'torchvision.transforms.CenterCrop' 45 | init: 46 | kwargs: 47 | size: *input_size 48 | - !import_call 49 | key: 'torchvision.transforms.Normalize' 50 | init: 51 | kwargs: 52 | mean: [0.485, 0.456, 0.406] 53 | std: [0.229, 0.224, 0.225] 54 | analysis_config: 55 | analyzes_after_compress: True 56 | analyzer_configs: 57 | - key: 'FileSizeAnalyzer' 58 | kwargs: 59 | unit: 'KB' 60 | uses_cpu4compression_model: True 61 | compression_model: 62 | key: 'mbt2018' 63 | kwargs: 64 | pretrained: True 65 | quality: 8 66 | metric: 'mse' 67 | classification_model: 68 | key: 'resnet50' 69 | _weights: &model_weights_enum !import_get 70 | key: 'torchvision.models.resnet.ResNet50_Weights' 71 | kwargs: 72 | num_classes: 1000 73 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 74 | 75 | test: 76 | test_data_loader: 77 | dataset_id: *imagenet_val 78 | sampler: 79 | class_or_func: !import_get 80 | key: 'torch.utils.data.SequentialSampler' 81 | kwargs: 82 | kwargs: 83 | batch_size: 1 84 | num_workers: 16 85 | drop_last: False 86 | -------------------------------------------------------------------------------- /configs/ilsvrc2012/input_compression/vtm-resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &imagenet_val ilsvrc2012/val: !import_call 3 | _name: &dataset_name 'ilsvrc2012' 4 | _root: &root_dir !join ['~/datasets/', *dataset_name] 5 | key: 'torchvision.datasets.ImageFolder' 6 | init: 7 | kwargs: 8 | root: !join [ *root_dir, '/val' ] 9 | transform: !import_call 10 | key: 'torchvision.transforms.Compose' 11 | init: 12 | kwargs: 13 | transforms: 14 | - !import_call 15 | key: 'torchvision.transforms.Resize' 16 | init: 17 | kwargs: 18 | size: 256 19 | - !import_call 20 | key: 'torchvision.transforms.CenterCrop' 21 | init: 22 | kwargs: 23 | size: [224, 224] 24 | 25 | models: 26 | model: 27 | key: 'CodecInputCompressionClassifier' 28 | kwargs: 29 | codec_encoder_decoder: !import_call 30 | key: 'torchvision.transforms.Compose' 31 | init: 32 | kwargs: 33 | transforms: 34 | - !import_call 35 | key: 'sc2bench.transforms.codec.VTMModule' 36 | init: 37 | kwargs: 38 | encoder_path: '~/software/VVCSoftware_VTM/bin/EncoderAppStatic' 39 | decoder_path: '~/software/VVCSoftware_VTM/bin/DecoderAppStatic' 40 | config_path: '~/software/VVCSoftware_VTM/cfg/encoder_intra_vtm.cfg' 41 | color_mode: 'ycbcr' 42 | quality: 63 43 | returns_file_size: True 44 | post_transform: !import_call 45 | key: 'torchvision.transforms.Compose' 46 | init: 47 | kwargs: 48 | transforms: 49 | - !import_call 50 | key: 'torchvision.transforms.ToTensor' 51 | init: 52 | - !import_call 53 | key: 'torchvision.transforms.Normalize' 54 | init: 55 | kwargs: 56 | mean: [0.485, 0.456, 0.406] 57 | std: [0.229, 0.224, 0.225] 58 | analysis_config: 59 | analyzer_configs: 60 | - key: 'FileSizeAccumulator' 61 | kwargs: 62 | unit: 'KB' 63 | classification_model: 64 | key: 'resnet50' 65 | _weights: &model_weights_enum !import_get 66 | key: 'torchvision.models.resnet.ResNet50_Weights' 67 | kwargs: 68 | num_classes: 1000 69 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1'] 70 | 71 | test: 72 | test_data_loader: 73 | dataset_id: *imagenet_val 74 | sampler: 75 | class_or_func: !import_get 76 | key: 'torch.utils.data.SequentialSampler' 77 | kwargs: 78 | kwargs: 79 | batch_size: 1 80 | num_workers: 16 81 | drop_last: False 82 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import sys 14 | import pathlib 15 | 16 | sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix()) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = 'SC2 Benchmark' 22 | copyright = '2023, Yoshitomo Matsubara' 23 | author = 'Yoshitomo Matsubara' 24 | 25 | # The full version, including alpha/beta/rc tags 26 | import sc2bench 27 | version = 'v' + sc2bench.__version__ 28 | release = version 29 | 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = [ 37 | 'sphinx.ext.autodoc', 38 | 'sphinx.ext.autosummary', 39 | 'sphinx.ext.doctest', 40 | 'sphinx.ext.viewcode', 41 | 'sphinx_rtd_theme', 42 | 'sphinxcontrib.youtube' 43 | ] 44 | autodoc_member_order = 'bysource' 45 | highlight_language = 'python' 46 | 47 | html_show_sourcelink = False 48 | html_context = { 49 | 'display_github': True, 50 | 'github_user': 'yoshitomo-matsubara', 51 | 'github_repo': 'sc2-benchmark', 52 | 'github_version': 'main', 53 | 'conf_py_path': '/docs/source/' 54 | } 55 | 56 | import sphinx_rtd_theme 57 | html_theme = 'sphinx_rtd_theme' 58 | 59 | html_theme_options = { 60 | 'analytics_id': 'G-39T9X4DN85', 61 | 'display_version': True, 62 | 'style_external_links': True 63 | } 64 | 65 | # Add any paths that contain templates here, relative to this directory. 66 | templates_path = ['_templates'] 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | # This pattern also affects html_static_path and html_extra_path. 71 | exclude_patterns = [] 72 | 73 | 74 | # -- Options for HTML output ------------------------------------------------- 75 | 76 | # The theme to use for HTML and HTML Help pages. See the documentation for 77 | # a list of builtin themes. 78 | # 79 | html_theme = 'sphinx_rtd_theme' 80 | 81 | # Add any paths that contain custom static files (such as style sheets) here, 82 | # relative to this directory. They are copied after the builtin static files, 83 | # so a file named "default.css" will overwrite the builtin "default.css". 84 | html_static_path = ['_static'] 85 | 86 | -------------------------------------------------------------------------------- /configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet50.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &pascal_val 'pascal_voc2012/val': !import_call 3 | _name: 'pascal_voc2012' 4 | _root: &root_dir '~/datasets' 5 | key: 'torchvision.datasets.VOCSegmentation' 6 | init: 7 | kwargs: 8 | root: *root_dir 9 | image_set: 'val' 10 | year: '2012' 11 | download: True 12 | transforms: !import_call 13 | key: 'custom.transform.CustomCompose' 14 | init: 15 | kwargs: 16 | transforms: 17 | - !import_call 18 | key: 'custom.transform.CustomRandomResize' 19 | init: 20 | kwargs: 21 | min_size: 513 22 | max_size: 513 23 | - !import_call 24 | key: 'sc2bench.transforms.misc.CustomToTensor' 25 | init: 26 | kwargs: 27 | converts_sample: False 28 | converts_target: True 29 | 30 | models: 31 | model: 32 | key: 'CodecInputCompressionSegmentationModel' 33 | kwargs: 34 | codec_encoder_decoder: !import_call 35 | key: 'torchvision.transforms.Compose' 36 | init: 37 | kwargs: 38 | transforms: 39 | - !import_call 40 | key: 'sc2bench.transforms.codec.BPGModule' 41 | init: 42 | kwargs: 43 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 44 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 45 | quality: 50 46 | returns_file_size: True 47 | analysis_config: 48 | analyzer_configs: 49 | - key: 'FileSizeAccumulator' 50 | kwargs: 51 | unit: 'KB' 52 | post_transform: !import_call 53 | key: 'torchvision.transforms.Compose' 54 | init: 55 | kwargs: 56 | transforms: 57 | - !import_call 58 | key: 'torchvision.transforms.ToTensor' 59 | init: 60 | - !import_call 61 | key: 'torchvision.transforms.Normalize' 62 | init: 63 | kwargs: 64 | mean: [0.485, 0.456, 0.406] 65 | std: [0.229, 0.224, 0.225] 66 | segmentation_model: 67 | key: 'deeplabv3_resnet50' 68 | kwargs: 69 | pretrained: True 70 | num_classes: 21 71 | aux_loss: True 72 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt' 73 | 74 | test: 75 | test_data_loader: 76 | dataset_id: *pascal_val 77 | sampler: 78 | class_or_func: !import_get 79 | key: 'torch.utils.data.SequentialSampler' 80 | kwargs: 81 | collate_fn: 'pascal_seg_eval_collate_fn' 82 | kwargs: 83 | batch_size: 1 84 | num_workers: 16 85 | -------------------------------------------------------------------------------- /configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet101.yaml: -------------------------------------------------------------------------------- 1 | datasets: 2 | &pascal_val 'pascal_voc2012/val': !import_call 3 | _name: 'pascal_voc2012' 4 | _root: &root_dir '~/datasets' 5 | key: 'torchvision.datasets.VOCSegmentation' 6 | init: 7 | kwargs: 8 | root: *root_dir 9 | image_set: 'val' 10 | year: '2012' 11 | download: True 12 | transforms: !import_call 13 | key: 'custom.transform.CustomCompose' 14 | init: 15 | kwargs: 16 | transforms: 17 | - !import_call 18 | key: 'custom.transform.CustomRandomResize' 19 | init: 20 | kwargs: 21 | min_size: 513 22 | max_size: 513 23 | - !import_call 24 | key: 'sc2bench.transforms.misc.CustomToTensor' 25 | init: 26 | kwargs: 27 | converts_sample: False 28 | converts_target: True 29 | 30 | models: 31 | model: 32 | key: 'CodecInputCompressionSegmentationModel' 33 | kwargs: 34 | codec_encoder_decoder: !import_call 35 | key: 'torchvision.transforms.Compose' 36 | init: 37 | kwargs: 38 | transforms: 39 | - !import_call 40 | key: 'sc2bench.transforms.codec.BPGModule' 41 | init: 42 | kwargs: 43 | encoder_path: '~/software/libbpg-0.9.8/bpgenc' 44 | decoder_path: '~/software/libbpg-0.9.8/bpgdec' 45 | quality: 50 46 | returns_file_size: True 47 | analysis_config: 48 | analyzer_configs: 49 | - key: 'FileSizeAccumulator' 50 | kwargs: 51 | unit: 'KB' 52 | post_transform: !import_call 53 | key: 'torchvision.transforms.Compose' 54 | init: 55 | kwargs: 56 | transforms: 57 | - !import_call 58 | key: 'torchvision.transforms.ToTensor' 59 | init: 60 | - !import_call 61 | key: 'torchvision.transforms.Normalize' 62 | init: 63 | kwargs: 64 | mean: [0.485, 0.456, 0.406] 65 | std: [0.229, 0.224, 0.225] 66 | segmentation_model: 67 | key: 'deeplabv3_resnet101' 68 | kwargs: 69 | pretrained: True 70 | num_classes: 21 71 | aux_loss: True 72 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt' 73 | 74 | test: 75 | test_data_loader: 76 | dataset_id: *pascal_val 77 | sampler: 78 | class_or_func: !import_get 79 | key: 'torch.utils.data.SequentialSampler' 80 | kwargs: 81 | collate_fn: 'pascal_seg_eval_collate_fn' 82 | kwargs: 83 | batch_size: 1 84 | num_workers: 16 85 | --------------------------------------------------------------------------------