├── sc2bench
├── common
│ ├── __init__.py
│ └── config_util.py
├── __init__.py
├── models
│ ├── detection
│ │ └── __init__.py
│ ├── segmentation
│ │ └── __init__.py
│ └── __init__.py
├── transforms
│ ├── __init__.py
│ └── collator.py
└── loss.py
├── script
├── task
│ ├── coco
│ │ └── __init__.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── dataset.py
│ │ └── eval.py
│ └── custom
│ │ ├── __init__.py
│ │ └── collator.py
├── software
│ ├── install_vtm.sh
│ └── install_bpg.sh
├── neural_input_compression
│ ├── coco2017-object_detection.sh
│ ├── ilsvrc2012-image_classification.sh
│ ├── pascal_voc2012-semantic_segmentation.sh
│ └── README.md
├── codec_input_compression
│ ├── coco2017-object_detection.sh
│ ├── ilsvrc2012-image_classification.sh
│ ├── pascal_voc2012-semantic_segmentation.sh
│ └── README.md
└── README.md
├── .gitignore
├── MANIFEST.in
├── imgs
├── ilsvrc2012-overview.png
└── input_vs_supervised_compression.png
├── legacy
├── README.md
├── script
│ ├── software
│ │ ├── install_vtm.sh
│ │ └── install_bpg.sh
│ ├── neural_input_compression
│ │ ├── coco2017-object_detection.sh
│ │ ├── ilsvrc2012-image_classification.sh
│ │ ├── pascal_voc2012-semantic_segmentation.sh
│ │ └── README.md
│ ├── codec_input_compression
│ │ ├── coco2017-object_detection.sh
│ │ ├── ilsvrc2012-image_classification.sh
│ │ ├── pascal_voc2012-semantic_segmentation.sh
│ │ └── README.md
│ └── README.md
└── configs
│ ├── coco2017
│ └── input_compression
│ │ ├── jpeg-faster_rcnn_resnet50_fpn.yaml
│ │ ├── webp-faster_rcnn_resnet50_fpn.yaml
│ │ ├── bpg-faster_rcnn_resnet50_fpn.yaml
│ │ ├── factorized_prior-faster_rcnn_resnet50_fpn.yaml
│ │ ├── mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
│ │ ├── scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
│ │ └── joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml
│ ├── ilsvrc2012
│ ├── input_compression
│ │ ├── jpeg-tf_efficientnet_l2_ns.yaml
│ │ ├── jpeg-tf_efficientnet_l2_ns_475.yaml
│ │ ├── jpeg-resnet50.yaml
│ │ ├── webp-resnet50.yaml
│ │ ├── jpeg-resnet101.yaml
│ │ ├── jpeg-resnet152.yaml
│ │ ├── webp-resnet101.yaml
│ │ ├── webp-resnet152.yaml
│ │ ├── bpg-resnet50.yaml
│ │ ├── bpg-resnet101.yaml
│ │ ├── bpg-resnet152.yaml
│ │ ├── vtm-resnet50.yaml
│ │ ├── factorized_prior-resnet50.yaml
│ │ ├── scale_hyperprior-resnet50.yaml
│ │ ├── mean_scale_hyperprior-resnet50.yaml
│ │ ├── joint_autoregressive_hierarchical_prior-resnet50.yaml
│ │ ├── factorized_prior-tf_efficientnet_l2_ns.yaml
│ │ ├── mean_scale_hyperprior-tf_efficientnet_l2_ns.yaml
│ │ ├── scale_hyperprior-tf_efficientnet_l2_ns.yaml
│ │ ├── scale_hyperprior-tf_efficientnet_l2_ns_475.yaml
│ │ ├── factorized_prior-tf_efficientnet_l2_ns_475.yaml
│ │ └── mean_scale_hyperprior-tf_efficientnet_l2_ns_475.yaml
│ └── feature_compression
│ │ ├── jpeg-resnet50.yaml
│ │ └── webp-resnet50.yaml
│ └── pascal_voc2012
│ └── input_compression
│ ├── jpeg-deeplabv3_resnet50.yaml
│ ├── webp-deeplabv3_resnet50.yaml
│ ├── jpeg-deeplabv3_resnet101.yaml
│ ├── webp-deeplabv3_resnet101.yaml
│ ├── bpg-deeplabv3_resnet50.yaml
│ ├── bpg-deeplabv3_resnet101.yaml
│ ├── factorized_prior-deeplabv3_resnet50.yaml
│ ├── scale_hyperprior-deeplabv3_resnet50.yaml
│ ├── factorized_prior-deeplabv3_resnet101.yaml
│ ├── mean_scale_hyperprior-deeplabv3_resnet101.yaml
│ ├── mean_scale_hyperprior-deeplabv3_resnet50.yaml
│ ├── scale_hyperprior-deeplabv3_resnet101.yaml
│ ├── joint_autoregressive_hierarchical_prior-deeplabv3_resnet101.yaml
│ └── joint_autoregressive_hierarchical_prior-deeplabv3_resnet50.yaml
├── setup.cfg
├── docs
└── source
│ ├── subpkgs
│ ├── loss.rst
│ ├── analysis.rst
│ ├── common.rst
│ ├── transform.rst
│ └── models.rst
│ ├── package.rst
│ ├── usage.rst
│ └── conf.py
├── CITATION.bib
├── Pipfile
├── .github
├── workflows
│ ├── documentation.yaml
│ └── python-publish.yml
└── ISSUE_TEMPLATE
│ └── bug-report--not-question-.md
├── setup.py
├── LICENSE
└── configs
├── coco2017
└── input_compression
│ ├── mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
│ ├── factorized_prior-faster_rcnn_resnet50_fpn.yaml
│ ├── scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
│ ├── joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml
│ ├── jpeg-faster_rcnn_resnet50_fpn.yaml
│ ├── webp-faster_rcnn_resnet50_fpn.yaml
│ └── bpg-faster_rcnn_resnet50_fpn.yaml
├── ilsvrc2012
├── input_compression
│ ├── jpeg-resnet50.yaml
│ ├── webp-resnet50.yaml
│ ├── jpeg-resnet101.yaml
│ ├── jpeg-resnet152.yaml
│ ├── webp-resnet101.yaml
│ ├── webp-resnet152.yaml
│ ├── bpg-resnet50.yaml
│ ├── bpg-resnet101.yaml
│ ├── bpg-resnet152.yaml
│ ├── jpeg-tf_efficientnet_l2_ns.yaml
│ ├── jpeg-tf_efficientnet_l2_ns_475.yaml
│ ├── mean_scale_hyperprior-resnet50.yaml
│ ├── factorized_prior-resnet50.yaml
│ ├── scale_hyperprior-resnet50.yaml
│ ├── joint_autoregressive_hierarchical_prior-resnet50.yaml
│ └── vtm-resnet50.yaml
└── feature_compression
│ ├── jpeg-resnet50.yaml
│ └── webp-resnet50.yaml
└── pascal_voc2012
└── input_compression
├── jpeg-deeplabv3_resnet50.yaml
├── webp-deeplabv3_resnet101.yaml
├── webp-deeplabv3_resnet50.yaml
├── jpeg-deeplabv3_resnet101.yaml
├── bpg-deeplabv3_resnet50.yaml
└── bpg-deeplabv3_resnet101.yaml
/sc2bench/common/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/sc2bench/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.1-dev'
2 |
--------------------------------------------------------------------------------
/sc2bench/models/detection/__init__.py:
--------------------------------------------------------------------------------
1 | from . import rcnn
2 |
--------------------------------------------------------------------------------
/script/task/coco/__init__.py:
--------------------------------------------------------------------------------
1 | from . import dataset, eval
2 |
--------------------------------------------------------------------------------
/script/task/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import eval, dataset
2 |
--------------------------------------------------------------------------------
/sc2bench/models/segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from . import deeplabv3
2 |
--------------------------------------------------------------------------------
/script/task/custom/__init__.py:
--------------------------------------------------------------------------------
1 | from . import collator, sampler, transform
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .ipynb_checkpoints/
3 | __pycache__/
4 | .editorconfig
5 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include LICENSE
3 |
4 | recursive-exclude * __pycache__
--------------------------------------------------------------------------------
/imgs/ilsvrc2012-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yoshitomo-matsubara/sc2-benchmark/HEAD/imgs/ilsvrc2012-overview.png
--------------------------------------------------------------------------------
/legacy/README.md:
--------------------------------------------------------------------------------
1 | # ***legacy/***
2 | The configurations and scripts in `legacy/` are designed for sc2bench <= v0.0.4 and torchdistill <= v0.3.3.
3 |
--------------------------------------------------------------------------------
/imgs/input_vs_supervised_compression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yoshitomo-matsubara/sc2-benchmark/HEAD/imgs/input_vs_supervised_compression.png
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | license = MIT
3 | license_files = LICENSE
4 |
5 | [pep8]
6 | max-line-length = 120
7 |
8 | [flake8]
9 | max-line-length = 120
10 | exclude = venv
11 |
--------------------------------------------------------------------------------
/docs/source/subpkgs/loss.rst:
--------------------------------------------------------------------------------
1 | sc2bench.loss
2 | =====
3 |
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 | :caption: Contents:
8 |
9 | .. automodule:: sc2bench.loss
10 | :members:
11 |
--------------------------------------------------------------------------------
/docs/source/subpkgs/analysis.rst:
--------------------------------------------------------------------------------
1 | sc2bench.analysis
2 | =====
3 |
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 | :caption: Contents:
8 |
9 | .. automodule:: sc2bench.analysis
10 | :members:
11 |
--------------------------------------------------------------------------------
/script/task/utils/dataset.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 |
4 | def get_num_iterations(dataset, batch_size, world_size):
5 | num_iterations = math.ceil(len(dataset) / batch_size / world_size)
6 | return num_iterations
7 |
--------------------------------------------------------------------------------
/docs/source/package.rst:
--------------------------------------------------------------------------------
1 | sc2bench API
2 | =====
3 |
4 |
5 | .. toctree::
6 | subpkgs/models
7 | subpkgs/transform
8 | subpkgs/common
9 | subpkgs/loss
10 | subpkgs/analysis
11 | :maxdepth: 2
12 | :caption: Overview
--------------------------------------------------------------------------------
/script/software/install_vtm.sh:
--------------------------------------------------------------------------------
1 | mkdir ~/software
2 | cd ~/software
3 | git clone https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM.git
4 | cd VVCSoftware_VTM
5 | mkdir build
6 | cd build
7 | cmake .. -DCMAKE_BUILD_TYPE=Release
8 | make -j
9 |
--------------------------------------------------------------------------------
/legacy/script/software/install_vtm.sh:
--------------------------------------------------------------------------------
1 | mkdir ~/software
2 | cd ~/software
3 | git clone https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM.git
4 | cd VVCSoftware_VTM
5 | mkdir build
6 | cd build
7 | cmake .. -DCMAKE_BUILD_TYPE=Release
8 | make -j
9 |
--------------------------------------------------------------------------------
/docs/source/subpkgs/common.rst:
--------------------------------------------------------------------------------
1 | sc2bench.common
2 | =====
3 |
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 | :caption: Contents:
8 |
9 | ----
10 |
11 | sc2bench.common.config_util
12 | ------------
13 |
14 | .. automodule:: sc2bench.common.config_util
15 | :members:
16 |
--------------------------------------------------------------------------------
/sc2bench/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from . import collator
2 | from .codec import CODEC_TRANSFORM_MODULE_DICT
3 | from .misc import MISC_TRANSFORM_MODULE_DICT
4 |
5 | TRANSFORM_MODULE_DICT = dict()
6 | TRANSFORM_MODULE_DICT.update(CODEC_TRANSFORM_MODULE_DICT)
7 | TRANSFORM_MODULE_DICT.update(MISC_TRANSFORM_MODULE_DICT)
8 |
--------------------------------------------------------------------------------
/CITATION.bib:
--------------------------------------------------------------------------------
1 | @article{matsubara2023sc2,
2 | title={{SC2 Benchmark: Supervised Compression for Split Computing}},
3 | author={Matsubara, Yoshitomo and Yang, Ruihan and Levorato, Marco and Mandt, Stephan},
4 | journal={Transactions on Machine Learning Research},
5 | issn={2835-8856},
6 | year={2023},
7 | url={https://openreview.net/forum?id=p28wv4G65d}
8 | }
--------------------------------------------------------------------------------
/script/software/install_bpg.sh:
--------------------------------------------------------------------------------
1 | mkdir ~/software
2 | cd ~/software
3 | wget https://bellard.org/bpg/libbpg-0.9.8.tar.gz
4 | tar -xvf libbpg-0.9.8.tar.gz
5 | cd libbpg-0.9.8/
6 | sudo apt-get -y install libpng-dev
7 | sudo apt-get -y install libjpeg-dev
8 | sudo apt-get -y install libsdl-dev
9 | sudo apt-get -y install libsdl-image1.2-dev
10 | sudo apt-get remove libnuma-dev
11 | sudo make
12 | sudo apt-get install libnuma-dev
13 |
--------------------------------------------------------------------------------
/legacy/script/software/install_bpg.sh:
--------------------------------------------------------------------------------
1 | mkdir ~/software
2 | cd ~/software
3 | wget https://bellard.org/bpg/libbpg-0.9.8.tar.gz
4 | tar -xvf libbpg-0.9.8.tar.gz
5 | cd libbpg-0.9.8/
6 | sudo apt-get -y install libpng-dev
7 | sudo apt-get -y install libjpeg-dev
8 | sudo apt-get -y install libsdl-dev
9 | sudo apt-get -y install libsdl-image1.2-dev
10 | sudo apt-get remove libnuma-dev
11 | sudo make
12 | sudo apt-get install libnuma-dev
13 |
--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
1 | [[source]]
2 | name = "pypi"
3 | url = "https://pypi.org/simple"
4 | verify_ssl = true
5 |
6 | [dev-packages]
7 |
8 | [packages]
9 | torch = ">=2.0.0"
10 | torchvision = ">=0.15.1"
11 | numpy = "*"
12 | scipy = "*"
13 | cython = "*"
14 | pycocotools = ">=2.0.2"
15 | matplotlib = "*"
16 | seaborn = "*"
17 | torchdistill = ">=1.0.0"
18 | compressai = ">=1.2.3"
19 | timm = "*"
20 | sc2bench = {editable = true, path = "."}
21 |
22 | [requires]
23 | python_version = "3.12"
24 |
--------------------------------------------------------------------------------
/docs/source/subpkgs/transform.rst:
--------------------------------------------------------------------------------
1 | sc2bench.transform
2 | =====
3 |
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 | :caption: Contents:
8 |
9 | ----
10 |
11 | sc2bench.transform.codec
12 | ------------
13 |
14 | .. automodule:: sc2bench.transforms.codec
15 | :members:
16 |
17 | ----
18 |
19 | sc2bench.transform.collator
20 | ------------
21 |
22 | .. automodule:: sc2bench.transforms.collator
23 | :members:
24 |
25 | ----
26 |
27 | sc2bench.transform.misc
28 | ------------
29 |
30 | .. automodule:: sc2bench.transforms.misc
31 | :members:
32 |
--------------------------------------------------------------------------------
/script/neural_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | MAX_QUALITY=${2}
3 |
4 | if [ $# -ne 2 ]; then
5 | echo "Illegal number of arguments"
6 | exit 2
7 | fi
8 |
9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 | json_str+=${quality}
13 | json_str+='}}}}}'
14 | python script/task/object_detection.py \
15 | --config configs/coco2017/input_compression/${BASE_NAME}.yaml \
16 | --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 | --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 |
--------------------------------------------------------------------------------
/script/neural_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | MAX_QUALITY=${2}
3 |
4 | if [ $# -ne 2 ]; then
5 | echo "Illegal number of arguments"
6 | exit 2
7 | fi
8 |
9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 | json_str+=${quality}
13 | json_str+='}}}}}'
14 | python script/task/image_classification.py \
15 | --config configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
16 | --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 | --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 |
--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | MAX_QUALITY=${2}
3 |
4 | if [ $# -ne 2 ]; then
5 | echo "Illegal number of arguments"
6 | exit 2
7 | fi
8 |
9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 | json_str+=${quality}
13 | json_str+='}}}}}'
14 | python legacy/script/task/object_detection.py \
15 | --config legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml \
16 | --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 | --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 |
--------------------------------------------------------------------------------
/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | MAX_QUALITY=${2}
3 |
4 | if [ $# -ne 2 ]; then
5 | echo "Illegal number of arguments"
6 | exit 2
7 | fi
8 |
9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 | json_str+=${quality}
13 | json_str+='}}}}}'
14 | python script/task/semantic_segmentation.py \
15 | --config configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
16 | --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 | --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 |
--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | MAX_QUALITY=${2}
3 |
4 | if [ $# -ne 2 ]; then
5 | echo "Illegal number of arguments"
6 | exit 2
7 | fi
8 |
9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 | json_str+=${quality}
13 | json_str+='}}}}}'
14 | python legacy/script/task/image_classification.py \
15 | --config legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
16 | --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 | --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 |
--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | MAX_QUALITY=${2}
3 |
4 | if [ $# -ne 2 ]; then
5 | echo "Illegal number of arguments"
6 | exit 2
7 | fi
8 |
9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 | json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 | json_str+=${quality}
13 | json_str+='}}}}}'
14 | python legacy/script/task/semantic_segmentation.py \
15 | --config legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
16 | --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 | --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 |
--------------------------------------------------------------------------------
/sc2bench/common/config_util.py:
--------------------------------------------------------------------------------
1 | def overwrite_config(org_config, sub_config):
2 | """
3 | Overwrites a configuration.
4 |
5 | :param org_config: (nested) dictionary of configuration to be updated.
6 | :type org_config: dict
7 | :param sub_config: (nested) dictionary to be added to org_config.
8 | :type sub_config: dict
9 | """
10 | for sub_key, sub_value in sub_config.items():
11 | if sub_key in org_config:
12 | if isinstance(sub_value, dict):
13 | overwrite_config(org_config[sub_key], sub_value)
14 | else:
15 | org_config[sub_key] = sub_value
16 | else:
17 | org_config[sub_key] = sub_value
18 |
--------------------------------------------------------------------------------
/sc2bench/models/__init__.py:
--------------------------------------------------------------------------------
1 | from . import registry, detection, segmentation
2 | from .backbone import BACKBONE_CLASS_DICT, BACKBONE_FUNC_DICT
3 | from .detection.registry import DETECTION_MODEL_CLASS_DICT, DETECTION_MODEL_FUNC_DICT
4 | from .segmentation.registry import SEGMENTATION_MODEL_CLASS_DICT, SEGMENTATION_MODEL_FUNC_DICT
5 | from .wrapper import WRAPPER_CLASS_DICT
6 |
7 | MODEL_DICT = dict()
8 | MODEL_DICT.update(BACKBONE_CLASS_DICT)
9 | MODEL_DICT.update(BACKBONE_FUNC_DICT)
10 | MODEL_DICT.update(DETECTION_MODEL_CLASS_DICT)
11 | MODEL_DICT.update(DETECTION_MODEL_FUNC_DICT)
12 | MODEL_DICT.update(SEGMENTATION_MODEL_CLASS_DICT)
13 | MODEL_DICT.update(SEGMENTATION_MODEL_FUNC_DICT)
14 | MODEL_DICT.update(WRAPPER_CLASS_DICT)
15 |
--------------------------------------------------------------------------------
/script/codec_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | FORMAT_NAME=${2}
3 |
4 | if [ $# -eq 5 ]
5 | then
6 | MIN_QUALITY=${3}
7 | STEP_SIZE=${4}
8 | MAX_QUALITY=${5}
9 | else
10 | MIN_QUALITY=10
11 | STEP_SIZE=10
12 | MAX_QUALITY=100
13 | fi
14 |
15 |
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 | sed -i "s/quality:.*/quality: ${quality}/g" configs/coco2017/input_compression/${BASE_NAME}.yaml
19 | python script/task/object_detection.py \
20 | --config configs/coco2017/input_compression/${BASE_NAME}.yaml \
21 | --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 |
24 | sed -i "s/quality:.*/quality:/g" configs/coco2017/input_compression/${BASE_NAME}.yaml
25 |
--------------------------------------------------------------------------------
/script/codec_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | FORMAT_NAME=${2}
3 |
4 | if [ $# -eq 5 ]
5 | then
6 | MIN_QUALITY=${3}
7 | STEP_SIZE=${4}
8 | MAX_QUALITY=${5}
9 | else
10 | MIN_QUALITY=10
11 | STEP_SIZE=10
12 | MAX_QUALITY=100
13 | fi
14 |
15 |
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 | sed -i "s/quality:.*/quality: ${quality}/g" configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
19 | python script/task/image_classification.py \
20 | --config configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
21 | --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 |
24 | sed -i "s/quality:.*/quality:/g" configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
25 |
--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | FORMAT_NAME=${2}
3 |
4 | if [ $# -eq 5 ]
5 | then
6 | MIN_QUALITY=${3}
7 | STEP_SIZE=${4}
8 | MAX_QUALITY=${5}
9 | else
10 | MIN_QUALITY=10
11 | STEP_SIZE=10
12 | MAX_QUALITY=100
13 | fi
14 |
15 |
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 | sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml
19 | python legacy/script/task/object_detection.py \
20 | --config legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml \
21 | --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 |
24 | sed -i "s/quality:.*/quality:/g" legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml
25 |
--------------------------------------------------------------------------------
/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | FORMAT_NAME=${2}
3 |
4 | if [ $# -eq 5 ]
5 | then
6 | MIN_QUALITY=${3}
7 | STEP_SIZE=${4}
8 | MAX_QUALITY=${5}
9 | else
10 | MIN_QUALITY=10
11 | STEP_SIZE=10
12 | MAX_QUALITY=100
13 | fi
14 |
15 |
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 | sed -i "s/quality:.*/quality: ${quality}/g" configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
19 | python script/task/semantic_segmentation.py \
20 | --config configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
21 | --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 |
24 | sed -i "s/quality:.*/quality:/g" configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
25 |
--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | FORMAT_NAME=${2}
3 |
4 | if [ $# -eq 5 ]
5 | then
6 | MIN_QUALITY=${3}
7 | STEP_SIZE=${4}
8 | MAX_QUALITY=${5}
9 | else
10 | MIN_QUALITY=10
11 | STEP_SIZE=10
12 | MAX_QUALITY=100
13 | fi
14 |
15 |
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 | sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
19 | python legacy/script/task/image_classification.py \
20 | --config legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
21 | --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 |
24 | sed -i "s/quality:.*/quality:/g" legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
25 |
--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
1 | Usage
2 | =====
3 |
4 |
5 | .. toctree::
6 | :maxdepth: 3
7 | :caption: Overview
8 |
9 | Installation
10 | ------------
11 |
12 | To use `sc2bench `_, first install it using pip:
13 |
14 | .. code-block:: console
15 |
16 | $ pip install sc2bench
17 |
18 |
19 | Examples
20 | ------------
21 |
22 | `The official repository (https://github.com/yoshitomo-matsubara/sc2-benchmark) `_
23 | offers many example scripts, configs, and checkpoints of trained models in `sc2bench`.
24 |
25 | Currently, `example scripts `_
26 | cover the following three tasks:
27 |
28 | - Image classification (ILSVRC 2012)
29 | - Object detection (COCO 2017)
30 | - Semantic segmentation (PASCAL VOC 2012)
31 |
--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
1 | BASE_NAME=${1}
2 | FORMAT_NAME=${2}
3 |
4 | if [ $# -eq 5 ]
5 | then
6 | MIN_QUALITY=${3}
7 | STEP_SIZE=${4}
8 | MAX_QUALITY=${5}
9 | else
10 | MIN_QUALITY=10
11 | STEP_SIZE=10
12 | MAX_QUALITY=100
13 | fi
14 |
15 |
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 | sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
19 | python legacy/script/task/semantic_segmentation.py \
20 | --config legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
21 | --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 |
24 | sed -i "s/quality:.*/quality:/g" legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
25 |
--------------------------------------------------------------------------------
/.github/workflows/documentation.yaml:
--------------------------------------------------------------------------------
1 | name: Deploy Sphinx Documentation
2 | on: [push, pull_request, workflow_dispatch]
3 | permissions:
4 | contents: write
5 | jobs:
6 | docs:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v4
10 | - name: Set up Python
11 | uses: actions/setup-python@v5
12 | with:
13 | python-version: '3.10'
14 | - name: Install dependencies
15 | run: |
16 | pip install sphinx sphinx_rtd_theme sphinxcontrib-youtube
17 | - name: Install sc2bench
18 | run: |
19 | pip install -e .
20 | - name: Sphinx build
21 | run: |
22 | sphinx-build -b html docs/source/ docs/_build/
23 | - name: Deploy
24 | uses: peaceiris/actions-gh-pages@v3
25 | if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
26 | with:
27 | publish_branch: gh-pages
28 | github_token: ${{ secrets.GITHUB_TOKEN }}
29 | publish_dir: docs/_build/
30 | force_orphan: true
31 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report--not-question-.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report (Not question)
3 | about: Create a report to help us improve (Use Discussions to ask questions)
4 | title: "[BUG] Please use Discussions instead of Issues to ask questions"
5 | labels: bug
6 | assignees: yoshitomo-matsubara
7 |
8 | ---
9 |
10 | Please use [Discussions](https://github.com/yoshitomo-matsubara/sc2-benchmark/discussions) to ask questions.
11 |
12 | **Describe the bug**
13 | A clear and concise description of what the bug is.
14 |
15 | **To Reproduce**
16 | Provide
17 | 1. Exact command to run your code
18 | 2. Whether or not you made any changes in Python code (if so, how you made the changes?)
19 | 3. YAML config file
20 | 4. Log file
21 |
22 | **Expected behavior**
23 | A clear and concise description of what you expected to happen.
24 |
25 |
26 | **Environment (please complete the following information):**
27 | - OS: [e.g. Ubuntu 20.04 LTS]
28 | - Python ver. [e.g. 3.8]
29 | - sc2bench and torchdistill vers. [e.g. v0.0.2 and v0.3.2]
30 |
31 |
32 | **Additional context**
33 | Add any other context about the problem here.
34 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | import sc2bench
4 |
5 | with open('README.md', 'r') as f:
6 | long_description = f.read()
7 |
8 | description = 'SC2 Benchmark: Supervised Compression for Split Computing.'
9 | setup(
10 | name='sc2bench',
11 | version=sc2bench.__version__,
12 | author='Yoshitomo Matsubara',
13 | description=description,
14 | long_description=long_description,
15 | long_description_content_type='text/markdown',
16 | url='https://github.com/yoshitomo-matsubara/sc2-benchmark',
17 | packages=find_packages(exclude=('configs', 'resources', 'script', 'tests')),
18 | python_requires='>=3.9',
19 | install_requires=[
20 | 'torch>=2.0.0',
21 | 'torchvision>=0.15.1',
22 | 'numpy',
23 | 'pyyaml>=6.0.0',
24 | 'scipy',
25 | 'cython',
26 | 'pycocotools>=2.0.2',
27 | 'torchdistill>=1.0.0',
28 | 'compressai>=1.2.3',
29 | 'timm>=1.0.3'
30 | ],
31 | extras_require={
32 | 'test': ['pytest'],
33 | 'docs': ['sphinx', 'sphinx_rtd_theme', 'sphinxcontrib-youtube']
34 | }
35 | )
36 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Yoshitomo Matsubara
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | # This workflow uses actions that are not certified by GitHub.
5 | # They are provided by a third-party and are governed by
6 | # separate terms of service, privacy policy, and support
7 | # documentation.
8 |
9 | name: Upload Python Package
10 |
11 | on:
12 | release:
13 | types: [published]
14 |
15 | permissions:
16 | contents: read
17 |
18 | jobs:
19 | deploy:
20 |
21 | runs-on: ubuntu-latest
22 |
23 | steps:
24 | - uses: actions/checkout@v4
25 | - name: Set up Python
26 | uses: actions/setup-python@v5
27 | with:
28 | python-version: '3.x'
29 | - name: Install dependencies
30 | run: |
31 | python -m pip install --upgrade pip
32 | pip install build
33 | - name: Build package
34 | run: python -m build
35 | - name: Publish package
36 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 | with:
38 | user: __token__
39 | password: ${{ secrets.PYPI_API_TOKEN }}
40 |
--------------------------------------------------------------------------------
/script/task/custom/collator.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from torchdistill.datasets.registry import register_collate_func
4 |
5 |
6 | @register_collate_func
7 | def coco_collate_fn(batch):
8 | return tuple(zip(*batch))
9 |
10 |
11 | def _cat_list(images, fill_value=0):
12 | if len(images) == 1 and not isinstance(images[0], torch.Tensor):
13 | return images
14 |
15 | max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
16 | batch_shape = (len(images),) + max_size
17 | batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
18 | for img, pad_img in zip(images, batched_imgs):
19 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
20 | return batched_imgs
21 |
22 |
23 | @register_collate_func
24 | def coco_seg_collate_fn(batch):
25 | images, targets, supp_dicts = list(zip(*batch))
26 | batched_imgs = _cat_list(images, fill_value=0)
27 | batched_targets = _cat_list(targets, fill_value=255)
28 | return batched_imgs, batched_targets, supp_dicts
29 |
30 |
31 | @register_collate_func
32 | def coco_seg_eval_collate_fn(batch):
33 | images, targets = list(zip(*batch))
34 | batched_imgs = _cat_list(images, fill_value=0)
35 | batched_targets = _cat_list(targets, fill_value=255)
36 | return batched_imgs, batched_targets
37 |
--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/jpeg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | coco2017:
3 | name: &dataset_name 'coco2017'
4 | type: 'cocodetect'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &coco_val !join [*dataset_name, '/val']
9 | images: !join [*root_dir, '/val2017']
10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 | annotated_only: False
12 |
13 | models:
14 | model:
15 | name: 'InputCompressionDetectionModel'
16 | params:
17 | codec_params:
18 | - type: 'PILImageModule'
19 | params:
20 | format: 'JPEG'
21 | quality: 90
22 | returns_file_size: True
23 | analysis_config:
24 | analyzer_configs:
25 | - type: 'FileSizeAccumulator'
26 | params:
27 | unit: 'KB'
28 | analyzes_after_compress: True
29 | adaptive_pad_config:
30 | pre_transform_params:
31 | post_transform_params:
32 | detection_model:
33 | name: 'fasterrcnn_resnet50_fpn'
34 | params:
35 | pretrained: True
36 | progress: True
37 | pretrained_backbone: True
38 | ckpt: ''
39 |
40 | test:
41 | test_data_loader:
42 | dataset_id: *coco_val
43 | random_sample: False
44 | batch_size: 1
45 | num_workers: 4
46 | collate_fn: 'coco_collate_fn'
47 |
--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/webp-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | coco2017:
3 | name: &dataset_name 'coco2017'
4 | type: 'cocodetect'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &coco_val !join [*dataset_name, '/val']
9 | images: !join [*root_dir, '/val2017']
10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 | annotated_only: False
12 |
13 | models:
14 | model:
15 | name: 'InputCompressionDetectionModel'
16 | params:
17 | codec_params:
18 | - type: 'PILImageModule'
19 | params:
20 | format: 'WEBP'
21 | quality: 90
22 | returns_file_size: True
23 | analysis_config:
24 | analyzer_configs:
25 | - type: 'FileSizeAccumulator'
26 | params:
27 | unit: 'KB'
28 | analyzes_after_compress: True
29 | adaptive_pad_config:
30 | pre_transform_params:
31 | post_transform_params:
32 | detection_model:
33 | name: 'fasterrcnn_resnet50_fpn'
34 | params:
35 | pretrained: True
36 | progress: True
37 | pretrained_backbone: True
38 | ckpt: ''
39 |
40 | test:
41 | test_data_loader:
42 | dataset_id: *coco_val
43 | random_sample: False
44 | batch_size: 1
45 | num_workers: 4
46 | collate_fn: 'coco_collate_fn'
47 |
--------------------------------------------------------------------------------
/sc2bench/loss.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from torchdistill.losses.mid_level import register_mid_level_loss
3 |
4 |
5 | @register_mid_level_loss
6 | class BppLoss(nn.Module):
7 | """
8 | Bit-per-pixel (or rate) loss.
9 |
10 | :param entropy_module_path: entropy module path to extract its output from io_dict
11 | :type entropy_module_path: str
12 | :param reduction: reduction type ('sum', 'batchmean', or 'mean')
13 | :type reduction: str or None
14 | """
15 | def __init__(self, entropy_module_path, reduction='mean'):
16 | super().__init__()
17 | self.entropy_module_path = entropy_module_path
18 | self.reduction = reduction
19 |
20 | def forward(self, student_io_dict, *args, **kwargs):
21 | """
22 | Computes a rate loss.
23 |
24 | :param student_io_dict: io_dict of model to be trained
25 | :type student_io_dict: dict
26 | """
27 | entropy_module_dict = student_io_dict[self.entropy_module_path]
28 | intermediate_features, likelihoods = entropy_module_dict['output']
29 | n, _, h, w = intermediate_features.shape
30 | num_pixels = n * h * w
31 | if self.reduction == 'sum':
32 | bpp = -likelihoods.log2().sum()
33 | elif self.reduction == 'batchmean':
34 | bpp = -likelihoods.log2().sum() / n
35 | else:
36 | bpp = -likelihoods.log2().sum() / num_pixels
37 | return bpp
38 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 833
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: [800, 800]
19 | - type: 'PILImageModule'
20 | params:
21 | format: 'JPEG'
22 | quality: 90
23 | - type: 'ToTensor'
24 | params:
25 | - type: 'Normalize'
26 | params:
27 | mean: [0.485, 0.456, 0.406]
28 | std: [0.229, 0.224, 0.225]
29 |
30 | models:
31 | model:
32 | name: &model_name 'tf_efficientnet_l2_ns'
33 | repo_or_dir: 'rwightman/pytorch-image-models'
34 | params:
35 | num_classes: 1000
36 | pretrained: True
37 | experiment: &experiment !join [*dataset_name, '-', *model_name]
38 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
39 |
40 | test:
41 | test_data_loader:
42 | dataset_id: *imagenet_val
43 | random_sample: False
44 | batch_size: 1
45 | num_workers: 16
46 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 507
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: [475, 475]
19 | - type: 'PILImageModule'
20 | params:
21 | format: 'JPEG'
22 | quality: 90
23 | - type: 'ToTensor'
24 | params:
25 | - type: 'Normalize'
26 | params:
27 | mean: [0.485, 0.456, 0.406]
28 | std: [0.229, 0.224, 0.225]
29 |
30 | models:
31 | model:
32 | name: &model_name 'tf_efficientnet_l2_ns_475'
33 | repo_or_dir: 'rwightman/pytorch-image-models'
34 | params:
35 | num_classes: 1000
36 | pretrained: True
37 | experiment: &experiment !join [*dataset_name, '-', *model_name]
38 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
39 |
40 | test:
41 | test_data_loader:
42 | dataset_id: *imagenet_val
43 | random_sample: False
44 | batch_size: 1
45 | num_workers: 16
46 |
--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/bpg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | coco2017:
3 | name: &dataset_name 'coco2017'
4 | type: 'cocodetect'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &coco_val !join [*dataset_name, '/val']
9 | images: !join [*root_dir, '/val2017']
10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 | annotated_only: False
12 |
13 | models:
14 | model:
15 | name: 'InputCompressionDetectionModel'
16 | params:
17 | codec_params:
18 | - type: 'BPGModule'
19 | params:
20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 | quality: 50
23 | returns_file_size: True
24 | analysis_config:
25 | analyzer_configs:
26 | - type: 'FileSizeAccumulator'
27 | params:
28 | unit: 'KB'
29 | analyzes_after_compress: True
30 | adaptive_pad_config:
31 | pre_transform_params:
32 | post_transform_params:
33 | detection_model:
34 | name: 'fasterrcnn_resnet50_fpn'
35 | params:
36 | pretrained: True
37 | progress: True
38 | pretrained_backbone: True
39 | ckpt: ''
40 |
41 | test:
42 | test_data_loader:
43 | dataset_id: *coco_val
44 | random_sample: False
45 | batch_size: 1
46 | num_workers: 4
47 | collate_fn: 'coco_collate_fn'
48 |
--------------------------------------------------------------------------------
/script/task/utils/eval.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class SegEvaluator(object):
5 | def __init__(self, num_classes):
6 | self.num_classes = num_classes
7 | self.mat = None
8 |
9 | def update(self, a, b):
10 | n = self.num_classes
11 | if self.mat is None:
12 | self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
13 | with torch.no_grad():
14 | k = (a >= 0) & (a < n)
15 | inds = n * a[k].to(torch.int64) + b[k]
16 | self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
17 |
18 | def reset(self):
19 | self.mat.zero_()
20 |
21 | def compute(self):
22 | h = self.mat.float()
23 | acc_global = torch.diag(h).sum() / h.sum() * 100.0
24 | acc = torch.diag(h) / h.sum(1) * 100.0
25 | iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) * 100.0
26 | return acc_global, acc, iu
27 |
28 | def reduce_from_all_processes(self):
29 | if not torch.distributed.is_available():
30 | return
31 | if not torch.distributed.is_initialized():
32 | return
33 | torch.distributed.barrier()
34 | torch.distributed.all_reduce(self.mat)
35 |
36 | def __str__(self):
37 | acc_global, acc, iu = self.compute()
38 | return 'mean IoU: {:.1f}, IoU: {}, Global pixelwise acc: {:.1f}, Average row correct: {}'.format(
39 | iu.mean().item(), ['{:.1f}'.format(i) for i in iu.tolist()],
40 | acc_global.item(), ['{:.1f}'.format(i) for i in acc.tolist()]
41 | )
42 |
--------------------------------------------------------------------------------
/configs/coco2017/input_compression/mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: 'custom'
3 |
4 | datasets:
5 | &coco_val coco2017/val: !import_call
6 | _name: &dataset_name 'coco2017'
7 | _root: &root_dir !join ['~/datasets/', *dataset_name]
8 | key: 'coco.dataset.coco_dataset'
9 | init:
10 | kwargs:
11 | img_dir_path: !join [*root_dir, '/val2017']
12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 | annotated_only: False
14 | is_segment: False
15 |
16 | models:
17 | model:
18 | key: 'InputCompressionDetectionModel'
19 | kwargs:
20 | analysis_config:
21 | analyzer_configs:
22 | - key: 'FileSizeAnalyzer'
23 | kwargs:
24 | unit: 'KB'
25 | analyzes_after_compress: True
26 | adaptive_pad_kwargs:
27 | fill: 0
28 | padding_mode: 'constant'
29 | factor: 128
30 | pre_transform:
31 | post_transform:
32 | compression_model:
33 | key: 'mbt2018_mean'
34 | kwargs:
35 | pretrained: True
36 | quality: 8
37 | metric: 'mse'
38 | detection_model:
39 | key: 'fasterrcnn_resnet50_fpn'
40 | kwargs:
41 | pretrained: True
42 | progress: True
43 |
44 | test:
45 | test_data_loader:
46 | dataset_id: *coco_val
47 | sampler:
48 | class_or_func: !import_get
49 | key: 'torch.utils.data.SequentialSampler'
50 | kwargs:
51 | collate_fn: 'coco_collate_fn'
52 | kwargs:
53 | batch_size: 1
54 | num_workers: 4
55 |
--------------------------------------------------------------------------------
/configs/coco2017/input_compression/factorized_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: 'custom'
3 |
4 | datasets:
5 | &coco_val coco2017/val: !import_call
6 | _name: &dataset_name 'coco2017'
7 | _root: &root_dir !join ['~/datasets/', *dataset_name]
8 | key: 'coco.dataset.coco_dataset'
9 | init:
10 | kwargs:
11 | img_dir_path: !join [*root_dir, '/val2017']
12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 | annotated_only: False
14 | is_segment: False
15 |
16 | models:
17 | model:
18 | key: 'InputCompressionDetectionModel'
19 | kwargs:
20 | analysis_config:
21 | analyzer_configs:
22 | - key: 'FileSizeAnalyzer'
23 | kwargs:
24 | unit: 'KB'
25 | analyzes_after_compress: True
26 | adaptive_pad_kwargs:
27 | fill: 0
28 | padding_mode: 'constant'
29 | factor: 128
30 | pre_transform:
31 | post_transform:
32 | compression_model:
33 | key: 'bmshj2018_factorized'
34 | kwargs:
35 | pretrained: True
36 | quality: 8
37 | metric: 'mse'
38 | detection_model:
39 | key: 'fasterrcnn_resnet50_fpn'
40 | kwargs:
41 | pretrained: True
42 | progress: True
43 |
44 | test:
45 | test_data_loader:
46 | dataset_id: *coco_val
47 | sampler:
48 | class_or_func: !import_get
49 | key: 'torch.utils.data.SequentialSampler'
50 | kwargs:
51 | collate_fn: 'coco_collate_fn'
52 | kwargs:
53 | batch_size: 1
54 | num_workers: 4
55 |
--------------------------------------------------------------------------------
/configs/coco2017/input_compression/scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: 'custom'
3 |
4 | datasets:
5 | &coco_val coco2017/val: !import_call
6 | _name: &dataset_name 'coco2017'
7 | _root: &root_dir !join ['~/datasets/', *dataset_name]
8 | key: 'coco.dataset.coco_dataset'
9 | init:
10 | kwargs:
11 | img_dir_path: !join [*root_dir, '/val2017']
12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 | annotated_only: False
14 | is_segment: False
15 |
16 | models:
17 | model:
18 | key: 'InputCompressionDetectionModel'
19 | kwargs:
20 | analysis_config:
21 | analyzer_configs:
22 | - key: 'FileSizeAnalyzer'
23 | kwargs:
24 | unit: 'KB'
25 | analyzes_after_compress: True
26 | adaptive_pad_kwargs:
27 | fill: 0
28 | padding_mode: 'constant'
29 | factor: 128
30 | pre_transform:
31 | post_transform:
32 | compression_model:
33 | key: 'bmshj2018_hyperprior'
34 | kwargs:
35 | pretrained: True
36 | quality: 8
37 | metric: 'mse'
38 | detection_model:
39 | key: 'fasterrcnn_resnet50_fpn'
40 | kwargs:
41 | pretrained: True
42 | progress: True
43 |
44 | test:
45 | test_data_loader:
46 | dataset_id: *coco_val
47 | sampler:
48 | class_or_func: !import_get
49 | key: 'torch.utils.data.SequentialSampler'
50 | kwargs:
51 | collate_fn: 'coco_collate_fn'
52 | kwargs:
53 | batch_size: 1
54 | num_workers: 4
55 |
--------------------------------------------------------------------------------
/configs/coco2017/input_compression/joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: 'custom'
3 |
4 | datasets:
5 | &coco_val coco2017/val: !import_call
6 | _name: &dataset_name 'coco2017'
7 | _root: &root_dir !join ['~/datasets/', *dataset_name]
8 | key: 'coco.dataset.coco_dataset'
9 | init:
10 | kwargs:
11 | img_dir_path: !join [*root_dir, '/val2017']
12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 | annotated_only: False
14 | is_segment: False
15 |
16 | models:
17 | model:
18 | key: 'InputCompressionDetectionModel'
19 | kwargs:
20 | analysis_config:
21 | analyzer_configs:
22 | - key: 'FileSizeAnalyzer'
23 | kwargs:
24 | unit: 'KB'
25 | analyzes_after_compress: True
26 | adaptive_pad_kwargs:
27 | fill: 0
28 | padding_mode: 'constant'
29 | factor: 128
30 | pre_transform:
31 | post_transform:
32 | uses_cpu4compression_model: True
33 | compression_model:
34 | key: 'mbt2018'
35 | kwargs:
36 | pretrained: True
37 | quality: 8
38 | metric: 'mse'
39 | detection_model:
40 | key: 'fasterrcnn_resnet50_fpn'
41 | kwargs:
42 | pretrained: True
43 | progress: True
44 |
45 | test:
46 | test_data_loader:
47 | dataset_id: *coco_val
48 | sampler:
49 | class_or_func: !import_get
50 | key: 'torch.utils.data.SequentialSampler'
51 | kwargs:
52 | collate_fn: 'coco_collate_fn'
53 | kwargs:
54 | batch_size: 1
55 | num_workers: 4
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'PILImageModule'
25 | params:
26 | format: 'JPEG'
27 | quality: 90
28 | returns_file_size: True
29 | post_transform_params:
30 | - type: 'ToTensor'
31 | params:
32 | - type: 'Normalize'
33 | params:
34 | mean: [0.485, 0.456, 0.406]
35 | std: [0.229, 0.224, 0.225]
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | classification_model:
42 | name: &model_name 'resnet50'
43 | params:
44 | num_classes: 1000
45 | pretrained: True
46 | experiment: &experiment !join [*dataset_name, '-', *model_name]
47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 |
49 | test:
50 | test_data_loader:
51 | dataset_id: *imagenet_val
52 | random_sample: False
53 | batch_size: 1
54 | num_workers: 16
55 | collate_fn: 'default_collate_w_pil'
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'PILImageModule'
25 | params:
26 | format: 'WEBP'
27 | quality: 90
28 | returns_file_size: True
29 | post_transform_params:
30 | - type: 'ToTensor'
31 | params:
32 | - type: 'Normalize'
33 | params:
34 | mean: [0.485, 0.456, 0.406]
35 | std: [0.229, 0.224, 0.225]
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | classification_model:
42 | name: &model_name 'resnet50'
43 | params:
44 | num_classes: 1000
45 | pretrained: True
46 | experiment: &experiment !join [*dataset_name, '-', *model_name]
47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 |
49 | test:
50 | test_data_loader:
51 | dataset_id: *imagenet_val
52 | random_sample: False
53 | batch_size: 1
54 | num_workers: 16
55 | collate_fn: 'default_collate_w_pil'
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'PILImageModule'
25 | params:
26 | format: 'JPEG'
27 | quality: 90
28 | returns_file_size: True
29 | post_transform_params:
30 | - type: 'ToTensor'
31 | params:
32 | - type: 'Normalize'
33 | params:
34 | mean: [0.485, 0.456, 0.406]
35 | std: [0.229, 0.224, 0.225]
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | classification_model:
42 | name: &model_name 'resnet101'
43 | params:
44 | num_classes: 1000
45 | pretrained: True
46 | experiment: &experiment !join [*dataset_name, '-', *model_name]
47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 |
49 | test:
50 | test_data_loader:
51 | dataset_id: *imagenet_val
52 | random_sample: False
53 | batch_size: 1
54 | num_workers: 16
55 | collate_fn: 'default_collate_w_pil'
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-resnet152.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'PILImageModule'
25 | params:
26 | format: 'JPEG'
27 | quality: 90
28 | returns_file_size: True
29 | post_transform_params:
30 | - type: 'ToTensor'
31 | params:
32 | - type: 'Normalize'
33 | params:
34 | mean: [0.485, 0.456, 0.406]
35 | std: [0.229, 0.224, 0.225]
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | classification_model:
42 | name: &model_name 'resnet152'
43 | params:
44 | num_classes: 1000
45 | pretrained: True
46 | experiment: &experiment !join [*dataset_name, '-', *model_name]
47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 |
49 | test:
50 | test_data_loader:
51 | dataset_id: *imagenet_val
52 | random_sample: False
53 | batch_size: 1
54 | num_workers: 16
55 | collate_fn: 'default_collate_w_pil'
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/webp-resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'PILImageModule'
25 | params:
26 | format: 'WEBP'
27 | quality: 90
28 | returns_file_size: True
29 | post_transform_params:
30 | - type: 'ToTensor'
31 | params:
32 | - type: 'Normalize'
33 | params:
34 | mean: [0.485, 0.456, 0.406]
35 | std: [0.229, 0.224, 0.225]
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | classification_model:
42 | name: &model_name 'resnet101'
43 | params:
44 | num_classes: 1000
45 | pretrained: True
46 | experiment: &experiment !join [*dataset_name, '-', *model_name]
47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 |
49 | test:
50 | test_data_loader:
51 | dataset_id: *imagenet_val
52 | random_sample: False
53 | batch_size: 1
54 | num_workers: 16
55 | collate_fn: 'default_collate_w_pil'
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/webp-resnet152.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'PILImageModule'
25 | params:
26 | format: 'WEBP'
27 | quality: 90
28 | returns_file_size: True
29 | post_transform_params:
30 | - type: 'ToTensor'
31 | params:
32 | - type: 'Normalize'
33 | params:
34 | mean: [0.485, 0.456, 0.406]
35 | std: [0.229, 0.224, 0.225]
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | classification_model:
42 | name: &model_name 'resnet152'
43 | params:
44 | num_classes: 1000
45 | pretrained: True
46 | experiment: &experiment !join [*dataset_name, '-', *model_name]
47 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 |
49 | test:
50 | test_data_loader:
51 | dataset_id: *imagenet_val
52 | random_sample: False
53 | batch_size: 1
54 | num_workers: 16
55 | collate_fn: 'default_collate_w_pil'
56 |
--------------------------------------------------------------------------------
/script/codec_input_compression/README.md:
--------------------------------------------------------------------------------
1 | # Codec-based Input Compression Baselines
2 |
3 | We considered the following codec-based image compression methods:
4 | - JPEG
5 | - WebP
6 | - BPG
7 |
8 | If you want to use BPG, you will need to manually install the software
9 | ```shell
10 | bash script/software/install_bpg.sh
11 | ```
12 |
13 | The script will place the encoder and decoder in `~/software/`
14 |
15 | ## ImageNet (ILSVRC 2012): Image Classification
16 | Codec-based input compression followed by ResNet-50
17 |
18 | ```shell
19 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh jpeg-resnet50 jpeg
20 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh webp-resnet50 webp
21 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh bpg-resnet50 bpg 5 5 50
22 | ```
23 |
24 | ## COCO 2017: Object Detection
25 | Codec-based input compression followed by Faster R-CNN with ResNet-50 and FPN
26 |
27 | ```shell
28 | bash script/codec_input_compression/coco2017-object_detection.sh jpeg-faster_rcnn_resnet50_fpn jpeg
29 | bash script/codec_input_compression/coco2017-object_detection.sh webp-faster_rcnn_resnet50_fpn webp
30 | bash script/codec_input_compression/coco2017-object_detection.sh bpg-faster_rcnn_resnet50_fpn bpg 5 5 50
31 | ```
32 |
33 | ## PASCAL VOC 2012: Semantic Segmentation
34 | Codec-based input compression followed by DeepLabv3 with ResNet-50
35 |
36 | ```shell
37 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh jpeg-deeplabv3_resnet50 jpeg
38 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh webp-deeplabv3_resnet50 webp
39 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh bpg-deeplabv3_resnet50 bpg 5 5 50
40 | ```
41 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/bpg-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'BPGModule'
25 | params:
26 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
27 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
28 | quality: 50
29 | returns_file_size: True
30 | post_transform_params:
31 | - type: 'ToTensor'
32 | params:
33 | - type: 'Normalize'
34 | params:
35 | mean: [0.485, 0.456, 0.406]
36 | std: [0.229, 0.224, 0.225]
37 | analysis_config:
38 | analyzer_configs:
39 | - type: 'FileSizeAccumulator'
40 | params:
41 | unit: 'KB'
42 | classification_model:
43 | name: &model_name 'resnet50'
44 | params:
45 | num_classes: 1000
46 | pretrained: True
47 | experiment: &experiment !join [*dataset_name, '-', *model_name]
48 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
49 |
50 | test:
51 | test_data_loader:
52 | dataset_id: *imagenet_val
53 | random_sample: False
54 | batch_size: 1
55 | num_workers: 16
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/bpg-resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'BPGModule'
25 | params:
26 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
27 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
28 | quality: 50
29 | returns_file_size: True
30 | post_transform_params:
31 | - type: 'ToTensor'
32 | params:
33 | - type: 'Normalize'
34 | params:
35 | mean: [0.485, 0.456, 0.406]
36 | std: [0.229, 0.224, 0.225]
37 | analysis_config:
38 | analyzer_configs:
39 | - type: 'FileSizeAccumulator'
40 | params:
41 | unit: 'KB'
42 | classification_model:
43 | name: &model_name 'resnet101'
44 | params:
45 | num_classes: 1000
46 | pretrained: True
47 | experiment: &experiment !join [*dataset_name, '-', *model_name]
48 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
49 |
50 | test:
51 | test_data_loader:
52 | dataset_id: *imagenet_val
53 | random_sample: False
54 | batch_size: 1
55 | num_workers: 16
56 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/bpg-resnet152.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'BPGModule'
25 | params:
26 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
27 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
28 | quality: 50
29 | returns_file_size: True
30 | post_transform_params:
31 | - type: 'ToTensor'
32 | params:
33 | - type: 'Normalize'
34 | params:
35 | mean: [0.485, 0.456, 0.406]
36 | std: [0.229, 0.224, 0.225]
37 | analysis_config:
38 | analyzer_configs:
39 | - type: 'FileSizeAccumulator'
40 | params:
41 | unit: 'KB'
42 | classification_model:
43 | name: &model_name 'resnet152'
44 | params:
45 | num_classes: 1000
46 | pretrained: True
47 | experiment: &experiment !join [*dataset_name, '-', *model_name]
48 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
49 |
50 | test:
51 | test_data_loader:
52 | dataset_id: *imagenet_val
53 | random_sample: False
54 | batch_size: 1
55 | num_workers: 16
56 |
--------------------------------------------------------------------------------
/configs/coco2017/input_compression/jpeg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: 'custom'
3 |
4 | datasets:
5 | &coco_val coco2017/val: !import_call
6 | _name: &dataset_name 'coco2017'
7 | _root: &root_dir !join ['~/datasets/', *dataset_name]
8 | key: 'coco.dataset.coco_dataset'
9 | init:
10 | kwargs:
11 | img_dir_path: !join [*root_dir, '/val2017']
12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 | annotated_only: False
14 | is_segment: False
15 |
16 | models:
17 | model:
18 | key: 'InputCompressionDetectionModel'
19 | kwargs:
20 | codec_encoder_decoder: !import_call
21 | key: 'torchvision.transforms.Compose'
22 | init:
23 | kwargs:
24 | transforms:
25 | - !import_call
26 | key: 'sc2bench.transforms.codec.PILImageModule'
27 | init:
28 | kwargs:
29 | format: 'JPEG'
30 | quality: 90
31 | returns_file_size: True
32 | analysis_config:
33 | analyzer_configs:
34 | - key: 'FileSizeAccumulator'
35 | kwargs:
36 | unit: 'KB'
37 | analyzes_after_compress: True
38 | adaptive_pad_config:
39 | pre_transform:
40 | post_transform:
41 | detection_model:
42 | key: 'fasterrcnn_resnet50_fpn'
43 | kwargs:
44 | pretrained: True
45 | progress: True
46 |
47 | test:
48 | test_data_loader:
49 | dataset_id: *coco_val
50 | sampler:
51 | class_or_func: !import_get
52 | key: 'torch.utils.data.SequentialSampler'
53 | kwargs:
54 | collate_fn: 'coco_collate_fn'
55 | kwargs:
56 | batch_size: 1
57 | num_workers: 4
58 |
--------------------------------------------------------------------------------
/configs/coco2017/input_compression/webp-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: 'custom'
3 |
4 | datasets:
5 | &coco_val coco2017/val: !import_call
6 | _name: &dataset_name 'coco2017'
7 | _root: &root_dir !join ['~/datasets/', *dataset_name]
8 | key: 'coco.dataset.coco_dataset'
9 | init:
10 | kwargs:
11 | img_dir_path: !join [*root_dir, '/val2017']
12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 | annotated_only: False
14 | is_segment: False
15 |
16 | models:
17 | model:
18 | key: 'InputCompressionDetectionModel'
19 | kwargs:
20 | codec_encoder_decoder: !import_call
21 | key: 'torchvision.transforms.Compose'
22 | init:
23 | kwargs:
24 | transforms:
25 | - !import_call
26 | key: 'sc2bench.transforms.codec.PILImageModule'
27 | init:
28 | kwargs:
29 | format: 'WEBP'
30 | quality: 90
31 | returns_file_size: True
32 | analysis_config:
33 | analyzer_configs:
34 | - key: 'FileSizeAccumulator'
35 | kwargs:
36 | unit: 'KB'
37 | analyzes_after_compress: True
38 | adaptive_pad_config:
39 | pre_transform:
40 | post_transform:
41 | detection_model:
42 | key: 'fasterrcnn_resnet50_fpn'
43 | kwargs:
44 | pretrained: True
45 | progress: True
46 |
47 | test:
48 | test_data_loader:
49 | dataset_id: *coco_val
50 | sampler:
51 | class_or_func: !import_get
52 | key: 'torch.utils.data.SequentialSampler'
53 | kwargs:
54 | collate_fn: 'coco_collate_fn'
55 | kwargs:
56 | batch_size: 1
57 | num_workers: 4
58 |
--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/README.md:
--------------------------------------------------------------------------------
1 | # Codec-based Input Compression Baselines
2 |
3 | We considered the following codec-based image compression methods:
4 | - JPEG
5 | - WebP
6 | - BPG
7 |
8 | If you want to use BPG, you will need to manually install the software
9 | ```shell
10 | bash script/software/install_bpg.sh
11 | ```
12 |
13 | The script will place the encoder and decoder in `~/software/`
14 |
15 | ## ImageNet (ILSVRC 2012): Image Classification
16 | Codec-based input compression followed by ResNet-50
17 |
18 | ```shell
19 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh jpeg-resnet50 jpeg
20 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh webp-resnet50 webp
21 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh bpg-resnet50 bpg 5 5 50
22 | ```
23 |
24 | ## COCO 2017: Object Detection
25 | Codec-based input compression followed by Faster R-CNN with ResNet-50 and FPN
26 |
27 | ```shell
28 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh jpeg-faster_rcnn_resnet50_fpn jpeg
29 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh webp-faster_rcnn_resnet50_fpn webp
30 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh bpg-faster_rcnn_resnet50_fpn bpg 5 5 50
31 | ```
32 |
33 | ## PASCAL VOC 2012: Semantic Segmentation
34 | Codec-based input compression followed by DeepLabv3 with ResNet-50
35 |
36 | ```shell
37 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh jpeg-deeplabv3_resnet50 jpeg
38 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh webp-deeplabv3_resnet50 webp
39 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh bpg-deeplabv3_resnet50 bpg 5 5 50
40 | ```
41 |
--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/factorized_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | coco2017:
3 | name: &dataset_name 'coco2017'
4 | type: 'cocodetect'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &coco_val !join [*dataset_name, '/val']
9 | images: !join [*root_dir, '/val2017']
10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 | annotated_only: False
12 |
13 | models:
14 | model:
15 | name: 'InputCompressionDetectionModel'
16 | params:
17 | codec_params:
18 | - type: 'BPGModule'
19 | params:
20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 | quality: 50
23 | returns_file_size: True
24 | analysis_config:
25 | analyzer_configs:
26 | - type: 'FileSizeAnalyzer'
27 | params:
28 | unit: 'KB'
29 | analyzes_after_compress: True
30 | adaptive_pad_kwargs:
31 | fill: 0
32 | padding_mode: 'constant'
33 | factor: 128
34 | pre_transform_params:
35 | post_transform_params:
36 | compression_model:
37 | name: 'bmshj2018_factorized'
38 | params:
39 | pretrained: True
40 | quality: 8
41 | metric: 'mse'
42 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
43 | detection_model:
44 | name: 'fasterrcnn_resnet50_fpn'
45 | params:
46 | pretrained: True
47 | progress: True
48 | pretrained_backbone: True
49 | ckpt: ''
50 |
51 | test:
52 | test_data_loader:
53 | dataset_id: *coco_val
54 | random_sample: False
55 | batch_size: 1
56 | num_workers: 4
57 | collate_fn: 'coco_collate_fn'
58 |
--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | coco2017:
3 | name: &dataset_name 'coco2017'
4 | type: 'cocodetect'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &coco_val !join [*dataset_name, '/val']
9 | images: !join [*root_dir, '/val2017']
10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 | annotated_only: False
12 |
13 | models:
14 | model:
15 | name: 'InputCompressionDetectionModel'
16 | params:
17 | codec_params:
18 | - type: 'BPGModule'
19 | params:
20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 | quality: 50
23 | returns_file_size: True
24 | analysis_config:
25 | analyzer_configs:
26 | - type: 'FileSizeAnalyzer'
27 | params:
28 | unit: 'KB'
29 | analyzes_after_compress: True
30 | adaptive_pad_kwargs:
31 | fill: 0
32 | padding_mode: 'constant'
33 | factor: 128
34 | pre_transform_params:
35 | post_transform_params:
36 | compression_model:
37 | name: 'mbt2018_mean'
38 | params:
39 | pretrained: True
40 | quality: 8
41 | metric: 'mse'
42 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
43 | detection_model:
44 | name: 'fasterrcnn_resnet50_fpn'
45 | params:
46 | pretrained: True
47 | progress: True
48 | pretrained_backbone: True
49 | ckpt: ''
50 |
51 | test:
52 | test_data_loader:
53 | dataset_id: *coco_val
54 | random_sample: False
55 | batch_size: 1
56 | num_workers: 4
57 | collate_fn: 'coco_collate_fn'
58 |
--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | coco2017:
3 | name: &dataset_name 'coco2017'
4 | type: 'cocodetect'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &coco_val !join [*dataset_name, '/val']
9 | images: !join [*root_dir, '/val2017']
10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 | annotated_only: False
12 |
13 | models:
14 | model:
15 | name: 'InputCompressionDetectionModel'
16 | params:
17 | codec_params:
18 | - type: 'BPGModule'
19 | params:
20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 | quality: 50
23 | returns_file_size: True
24 | analysis_config:
25 | analyzer_configs:
26 | - type: 'FileSizeAnalyzer'
27 | params:
28 | unit: 'KB'
29 | analyzes_after_compress: True
30 | adaptive_pad_kwargs:
31 | fill: 0
32 | padding_mode: 'constant'
33 | factor: 128
34 | pre_transform_params:
35 | post_transform_params:
36 | compression_model:
37 | name: 'bmshj2018_hyperprior'
38 | params:
39 | pretrained: True
40 | quality: 8
41 | metric: 'mse'
42 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
43 | detection_model:
44 | name: 'fasterrcnn_resnet50_fpn'
45 | params:
46 | pretrained: True
47 | progress: True
48 | pretrained_backbone: True
49 | ckpt: ''
50 |
51 | test:
52 | test_data_loader:
53 | dataset_id: *coco_val
54 | random_sample: False
55 | batch_size: 1
56 | num_workers: 4
57 | collate_fn: 'coco_collate_fn'
58 |
--------------------------------------------------------------------------------
/configs/coco2017/input_compression/bpg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | dependencies:
2 | - name: 'custom'
3 |
4 | datasets:
5 | &coco_val coco2017/val: !import_call
6 | _name: &dataset_name 'coco2017'
7 | _root: &root_dir !join ['~/datasets/', *dataset_name]
8 | key: 'coco.dataset.coco_dataset'
9 | init:
10 | kwargs:
11 | img_dir_path: !join [*root_dir, '/val2017']
12 | ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 | annotated_only: False
14 | is_segment: False
15 |
16 | models:
17 | model:
18 | key: 'InputCompressionDetectionModel'
19 | kwargs:
20 | codec_encoder_decoder: !import_call
21 | key: 'torchvision.transforms.Compose'
22 | init:
23 | kwargs:
24 | transforms:
25 | - !import_call
26 | key: 'sc2bench.transforms.codec.BPGModule'
27 | init:
28 | kwargs:
29 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
30 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
31 | quality: 50
32 | returns_file_size: True
33 | analysis_config:
34 | analyzer_configs:
35 | - key: 'FileSizeAccumulator'
36 | kwargs:
37 | unit: 'KB'
38 | analyzes_after_compress: True
39 | adaptive_pad_config:
40 | pre_transform:
41 | post_transform:
42 | detection_model:
43 | key: 'fasterrcnn_resnet50_fpn'
44 | kwargs:
45 | pretrained: True
46 | progress: True
47 |
48 | test:
49 | test_data_loader:
50 | dataset_id: *coco_val
51 | sampler:
52 | class_or_func: !import_get
53 | key: 'torch.utils.data.SequentialSampler'
54 | kwargs:
55 | collate_fn: 'coco_collate_fn'
56 | kwargs:
57 | batch_size: 1
58 | num_workers: 4
59 |
--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | coco2017:
3 | name: &dataset_name 'coco2017'
4 | type: 'cocodetect'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &coco_val !join [*dataset_name, '/val']
9 | images: !join [*root_dir, '/val2017']
10 | annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 | annotated_only: False
12 |
13 | models:
14 | model:
15 | name: 'InputCompressionDetectionModel'
16 | params:
17 | codec_params:
18 | - type: 'BPGModule'
19 | params:
20 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 | quality: 50
23 | returns_file_size: True
24 | analysis_config:
25 | analyzer_configs:
26 | - type: 'FileSizeAnalyzer'
27 | params:
28 | unit: 'KB'
29 | analyzes_after_compress: True
30 | adaptive_pad_kwargs:
31 | fill: 0
32 | padding_mode: 'constant'
33 | factor: 128
34 | pre_transform_params:
35 | post_transform_params:
36 | uses_cpu4compression_model: True
37 | compression_model:
38 | name: 'mbt2018'
39 | params:
40 | pretrained: True
41 | quality: 8
42 | metric: 'mse'
43 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
44 | detection_model:
45 | name: 'fasterrcnn_resnet50_fpn'
46 | params:
47 | pretrained: True
48 | progress: True
49 | pretrained_backbone: True
50 | ckpt: ''
51 |
52 | test:
53 | test_data_loader:
54 | dataset_id: *coco_val
55 | random_sample: False
56 | batch_size: 1
57 | num_workers: 4
58 | collate_fn: 'coco_collate_fn'
59 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/vtm-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 |
19 | models:
20 | model:
21 | name: 'CodecInputCompressionClassifier'
22 | params:
23 | codec_params:
24 | - type: 'VTMModule'
25 | params:
26 | encoder_path: '~/software/VVCSoftware_VTM/bin/EncoderAppStatic'
27 | decoder_path: '~/software/VVCSoftware_VTM/bin/DecoderAppStatic'
28 | config_path: '~/software/VVCSoftware_VTM/cfg/encoder_intra_vtm.cfg'
29 | color_mode: 'ycbcr'
30 | quality: 63
31 | returns_file_size: True
32 | post_transform_params:
33 | - type: 'ToTensor'
34 | params:
35 | - type: 'Normalize'
36 | params:
37 | mean: [0.485, 0.456, 0.406]
38 | std: [0.229, 0.224, 0.225]
39 | analysis_config:
40 | analyzer_configs:
41 | - type: 'FileSizeAccumulator'
42 | params:
43 | unit: 'KB'
44 | classification_model:
45 | name: &model_name 'resnet50'
46 | params:
47 | num_classes: 1000
48 | pretrained: True
49 | experiment: &experiment !join [*dataset_name, '-', *model_name]
50 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
51 |
52 | test:
53 | test_data_loader:
54 | dataset_id: *imagenet_val
55 | random_sample: False
56 | batch_size: 1
57 | num_workers: 16
58 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/feature_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 | - type: 'ToTensor'
19 | params:
20 | - type: 'Normalize'
21 | params:
22 | mean: [0.485, 0.456, 0.406]
23 | std: [0.229, 0.224, 0.225]
24 |
25 | models:
26 | model:
27 | name: 'CodecFeatureCompressionClassifier'
28 | params:
29 | codec_params:
30 | - type: 'PILTensorModule'
31 | params:
32 | format: 'JPEG'
33 | quality: 90
34 | returns_file_size: True
35 | encoder_config:
36 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
37 | decoder_config:
38 | sequential: ['layer3', 'layer4', 'avgpool']
39 | classifier_config:
40 | sequential: ['fc']
41 | post_transform_params:
42 | analysis_config:
43 | analyzer_configs:
44 | - type: 'FileSizeAccumulator'
45 | params:
46 | unit: 'KB'
47 | classification_model:
48 | name: &model_name 'resnet50'
49 | params:
50 | num_classes: 1000
51 | pretrained: True
52 | experiment: &experiment !join [*dataset_name, '-', *model_name]
53 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
54 |
55 | test:
56 | test_data_loader:
57 | dataset_id: *imagenet_val
58 | random_sample: False
59 | batch_size: 1
60 | num_workers: 16
61 | collate_fn: 'default_collate_w_pil'
62 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/feature_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: [224, 224]
18 | - type: 'ToTensor'
19 | params:
20 | - type: 'Normalize'
21 | params:
22 | mean: [0.485, 0.456, 0.406]
23 | std: [0.229, 0.224, 0.225]
24 |
25 | models:
26 | model:
27 | name: 'CodecFeatureCompressionClassifier'
28 | params:
29 | codec_params:
30 | - type: 'PILTensorModule'
31 | params:
32 | format: 'WEBP'
33 | quality: 90
34 | returns_file_size: True
35 | encoder_config:
36 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
37 | decoder_config:
38 | sequential: ['layer3', 'layer4', 'avgpool']
39 | classifier_config:
40 | sequential: ['fc']
41 | post_transform_params:
42 | analysis_config:
43 | analyzer_configs:
44 | - type: 'FileSizeAccumulator'
45 | params:
46 | unit: 'KB'
47 | classification_model:
48 | name: &model_name 'resnet50'
49 | params:
50 | num_classes: 1000
51 | pretrained: True
52 | experiment: &experiment !join [*dataset_name, '-', *model_name]
53 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
54 |
55 | test:
56 | test_data_loader:
57 | dataset_id: *imagenet_val
58 | random_sample: False
59 | batch_size: 1
60 | num_workers: 16
61 | collate_fn: 'default_collate_w_pil'
62 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/factorized_prior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: &input_size [224, 224]
18 | - type: 'ToTensor'
19 | params:
20 | - type: 'AdaptivePad'
21 | params:
22 | fill: 0
23 | factor: 64
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionClassifier'
28 | params:
29 | post_transform_params:
30 | - type: 'CenterCrop'
31 | params:
32 | size: *input_size
33 | - type: 'Normalize'
34 | params:
35 | mean: [0.485, 0.456, 0.406]
36 | std: [0.229, 0.224, 0.225]
37 | analysis_config:
38 | analyzes_after_compress: True
39 | analyzer_configs:
40 | - type: 'FileSizeAnalyzer'
41 | params:
42 | unit: 'KB'
43 | compression_model:
44 | name: 'bmshj2018_factorized'
45 | params:
46 | pretrained: True
47 | quality: 8
48 | metric: 'mse'
49 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
50 | classification_model:
51 | name: &model_name 'resnet50'
52 | params:
53 | num_classes: 1000
54 | pretrained: True
55 | experiment: &experiment !join [*dataset_name, '-', *model_name]
56 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
57 |
58 | test:
59 | test_data_loader:
60 | dataset_id: *imagenet_val
61 | random_sample: False
62 | batch_size: 1
63 | num_workers: 16
64 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: &input_size [224, 224]
18 | - type: 'ToTensor'
19 | params:
20 | - type: 'AdaptivePad'
21 | params:
22 | fill: 0
23 | factor: 64
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionClassifier'
28 | params:
29 | post_transform_params:
30 | - type: 'CenterCrop'
31 | params:
32 | size: *input_size
33 | - type: 'Normalize'
34 | params:
35 | mean: [0.485, 0.456, 0.406]
36 | std: [0.229, 0.224, 0.225]
37 | analysis_config:
38 | analyzes_after_compress: True
39 | analyzer_configs:
40 | - type: 'FileSizeAnalyzer'
41 | params:
42 | unit: 'KB'
43 | compression_model:
44 | name: 'bmshj2018_hyperprior'
45 | params:
46 | pretrained: True
47 | quality: 8
48 | metric: 'mse'
49 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
50 | classification_model:
51 | name: &model_name 'resnet50'
52 | params:
53 | num_classes: 1000
54 | pretrained: True
55 | experiment: &experiment !join [*dataset_name, '-', *model_name]
56 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
57 |
58 | test:
59 | test_data_loader:
60 | dataset_id: *imagenet_val
61 | random_sample: False
62 | batch_size: 1
63 | num_workers: 16
64 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: &input_size [224, 224]
18 | - type: 'ToTensor'
19 | params:
20 | - type: 'AdaptivePad'
21 | params:
22 | fill: 0
23 | factor: 64
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionClassifier'
28 | params:
29 | post_transform_params:
30 | - type: 'CenterCrop'
31 | params:
32 | size: *input_size
33 | - type: 'Normalize'
34 | params:
35 | mean: [0.485, 0.456, 0.406]
36 | std: [0.229, 0.224, 0.225]
37 | analysis_config:
38 | analyzes_after_compress: True
39 | analyzer_configs:
40 | - type: 'FileSizeAnalyzer'
41 | params:
42 | unit: 'KB'
43 | compression_model:
44 | name: 'mbt2018_mean'
45 | params:
46 | pretrained: True
47 | quality: 8
48 | metric: 'mse'
49 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
50 | classification_model:
51 | name: &model_name 'resnet50'
52 | params:
53 | num_classes: 1000
54 | pretrained: True
55 | experiment: &experiment !join [*dataset_name, '-', *model_name]
56 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
57 |
58 | test:
59 | test_data_loader:
60 | dataset_id: *imagenet_val
61 | random_sample: False
62 | batch_size: 1
63 | num_workers: 16
64 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: False
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'CodecInputCompressionSegmentationModel'
28 | params:
29 | codec_params:
30 | - type: 'PILImageModule'
31 | params:
32 | format: 'JPEG'
33 | quality: 90
34 | returns_file_size: True
35 | analysis_config:
36 | analyzer_configs:
37 | - type: 'FileSizeAccumulator'
38 | params:
39 | unit: 'KB'
40 | post_transform_params:
41 | - type: 'ToTensor'
42 | params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | segmentation_model:
48 | name: 'deeplabv3_resnet50'
49 | params:
50 | pretrained: True
51 | pretrained_backbone: True
52 | num_classes: 21
53 | aux_loss: True
54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
55 |
56 | test:
57 | test_data_loader:
58 | dataset_id: *pascal_val
59 | random_sample: False
60 | batch_size: 1
61 | num_workers: 16
62 | collate_fn: 'pascal_seg_eval_collate_fn'
63 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: False
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'CodecInputCompressionSegmentationModel'
28 | params:
29 | codec_params:
30 | - type: 'PILImageModule'
31 | params:
32 | format: 'WEBP'
33 | quality: 90
34 | returns_file_size: True
35 | analysis_config:
36 | analyzer_configs:
37 | - type: 'FileSizeAccumulator'
38 | params:
39 | unit: 'KB'
40 | post_transform_params:
41 | - type: 'ToTensor'
42 | params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | segmentation_model:
48 | name: 'deeplabv3_resnet50'
49 | params:
50 | pretrained: True
51 | pretrained_backbone: True
52 | num_classes: 21
53 | aux_loss: True
54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
55 |
56 | test:
57 | test_data_loader:
58 | dataset_id: *pascal_val
59 | random_sample: False
60 | batch_size: 1
61 | num_workers: 16
62 | collate_fn: 'pascal_seg_eval_collate_fn'
63 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: False
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'CodecInputCompressionSegmentationModel'
28 | params:
29 | codec_params:
30 | - type: 'PILImageModule'
31 | params:
32 | format: 'JPEG'
33 | quality: 90
34 | returns_file_size: True
35 | analysis_config:
36 | analyzer_configs:
37 | - type: 'FileSizeAccumulator'
38 | params:
39 | unit: 'KB'
40 | post_transform_params:
41 | - type: 'ToTensor'
42 | params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | segmentation_model:
48 | name: 'deeplabv3_resnet101'
49 | params:
50 | pretrained: False
51 | pretrained_backbone: True
52 | num_classes: 21
53 | aux_loss: True
54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
55 |
56 | test:
57 | test_data_loader:
58 | dataset_id: *pascal_val
59 | random_sample: False
60 | batch_size: 1
61 | num_workers: 16
62 | collate_fn: 'pascal_seg_eval_collate_fn'
63 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: False
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'CodecInputCompressionSegmentationModel'
28 | params:
29 | codec_params:
30 | - type: 'PILImageModule'
31 | params:
32 | format: 'WEBP'
33 | quality: 90
34 | returns_file_size: True
35 | analysis_config:
36 | analyzer_configs:
37 | - type: 'FileSizeAccumulator'
38 | params:
39 | unit: 'KB'
40 | post_transform_params:
41 | - type: 'ToTensor'
42 | params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | segmentation_model:
48 | name: 'deeplabv3_resnet101'
49 | params:
50 | pretrained: True
51 | pretrained_backbone: True
52 | num_classes: 21
53 | aux_loss: True
54 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
55 |
56 | test:
57 | test_data_loader:
58 | dataset_id: *pascal_val
59 | random_sample: False
60 | batch_size: 1
61 | num_workers: 16
62 | collate_fn: 'pascal_seg_eval_collate_fn'
63 |
--------------------------------------------------------------------------------
/script/neural_input_compression/README.md:
--------------------------------------------------------------------------------
1 | # Neural Input Compression Baselines
2 |
3 | We considered the following neural image compression models:
4 | - Factorized Prior
5 | - Scale Hyperprior
6 | - Mean-scale Hyperprior
7 | - Joint Autoregressive Hierarchical Prior
8 |
9 |
10 | ## ImageNet (ILSVRC 2012): Image Classification
11 | Neural input compression followed by ResNet-50
12 |
13 | ```shell
14 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh factorized_prior-resnet50 8
15 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh scale_hyperprior-resnet50 8
16 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh mean_scale_hyperprior-resnet50 8
17 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh joint_autoregressive_hierarchical_prior-resnet50 8
18 | ```
19 |
20 | ## COCO 2017: Object Detection
21 | Neural input compression followed by Faster R-CNN with ResNet-50 and FPN
22 |
23 | ```shell
24 | bash script/neural_input_compression/coco2017-object_detection.sh factorized_prior-faster_rcnn_resnet50_fpn 8
25 | bash script/neural_input_compression/coco2017-object_detection.sh scale_hyperprior-faster_rcnn_resnet50_fpn 8
26 | bash script/neural_input_compression/coco2017-object_detection.sh mean_scale_hyperprior-faster_rcnn_resnet50_fpn 8
27 | bash script/neural_input_compression/coco2017-object_detection.sh joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn 8
28 | ```
29 |
30 | ## PASCAL VOC 2012: Semantic Segmentation
31 | Neural input compression followed by DeepLabv3 with ResNet-50
32 |
33 | ```shell
34 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh factorized_prior-deeplabv3_resnet50 8
35 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh scale_hyperprior-deeplabv3_resnet50 8
36 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh mean_scale_hyperprior-deeplabv3_resnet50 8
37 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh joint_autoregressive_hierarchical_prior-deeplabv3_resnet50 8
38 | ```
39 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/joint_autoregressive_hierarchical_prior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'Resize'
13 | params:
14 | size: 256
15 | - type: 'CenterCrop'
16 | params:
17 | size: &input_size [224, 224]
18 | - type: 'ToTensor'
19 | params:
20 | - type: 'AdaptivePad'
21 | params:
22 | fill: 0
23 | factor: 64
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionClassifier'
28 | params:
29 | post_transform_params:
30 | - type: 'CenterCrop'
31 | params:
32 | size: *input_size
33 | - type: 'Normalize'
34 | params:
35 | mean: [0.485, 0.456, 0.406]
36 | std: [0.229, 0.224, 0.225]
37 | analysis_config:
38 | analyzes_after_compress: True
39 | analyzer_configs:
40 | - type: 'FileSizeAnalyzer'
41 | params:
42 | unit: 'KB'
43 | uses_cpu4compression_model: True
44 | compression_model:
45 | name: 'mbt2018'
46 | params:
47 | pretrained: True
48 | quality: 8
49 | metric: 'mse'
50 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
51 | classification_model:
52 | name: &model_name 'resnet50'
53 | params:
54 | num_classes: 1000
55 | pretrained: True
56 | experiment: &experiment !join [*dataset_name, '-', *model_name]
57 | ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
58 |
59 | test:
60 | test_data_loader:
61 | dataset_id: *imagenet_val
62 | random_sample: False
63 | batch_size: 1
64 | num_workers: 16
65 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: False
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'CodecInputCompressionSegmentationModel'
28 | params:
29 | codec_params:
30 | - type: 'BPGModule'
31 | params:
32 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
33 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
34 | quality: 50
35 | returns_file_size: True
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | post_transform_params:
42 | - type: 'ToTensor'
43 | params:
44 | - type: 'Normalize'
45 | params:
46 | mean: [0.485, 0.456, 0.406]
47 | std: [0.229, 0.224, 0.225]
48 | segmentation_model:
49 | name: 'deeplabv3_resnet50'
50 | params:
51 | pretrained: True
52 | pretrained_backbone: True
53 | num_classes: 21
54 | aux_loss: True
55 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
56 |
57 | test:
58 | test_data_loader:
59 | dataset_id: *pascal_val
60 | random_sample: False
61 | batch_size: 1
62 | num_workers: 16
63 | collate_fn: 'pascal_seg_eval_collate_fn'
64 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: False
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'CodecInputCompressionSegmentationModel'
28 | params:
29 | codec_params:
30 | - type: 'BPGModule'
31 | params:
32 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
33 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
34 | quality: 50
35 | returns_file_size: True
36 | analysis_config:
37 | analyzer_configs:
38 | - type: 'FileSizeAccumulator'
39 | params:
40 | unit: 'KB'
41 | post_transform_params:
42 | - type: 'ToTensor'
43 | params:
44 | - type: 'Normalize'
45 | params:
46 | mean: [0.485, 0.456, 0.406]
47 | std: [0.229, 0.224, 0.225]
48 | segmentation_model:
49 | name: 'deeplabv3_resnet101'
50 | params:
51 | pretrained: True
52 | pretrained_backbone: True
53 | num_classes: 21
54 | aux_loss: True
55 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
56 |
57 | test:
58 | test_data_loader:
59 | dataset_id: *pascal_val
60 | random_sample: False
61 | batch_size: 1
62 | num_workers: 16
63 | collate_fn: 'pascal_seg_eval_collate_fn'
64 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/factorized_prior-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 833
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: &input_size [800, 800]
19 | - type: 'ToTensor'
20 | params:
21 | - type: 'AdaptivePad'
22 | params:
23 | fill: 0
24 | factor: 64
25 |
26 | models:
27 | model:
28 | name: 'NeuralInputCompressionClassifier'
29 | params:
30 | post_transform_params:
31 | - type: 'CenterCrop'
32 | params:
33 | size: *input_size
34 | - type: 'Normalize'
35 | params:
36 | mean: [0.485, 0.456, 0.406]
37 | std: [0.229, 0.224, 0.225]
38 | analysis_config:
39 | analyzes_after_compress: True
40 | analyzer_configs:
41 | - type: 'FileSizeAnalyzer'
42 | params:
43 | unit: 'KB'
44 | compression_model:
45 | name: 'bmshj2018_factorized'
46 | params:
47 | pretrained: True
48 | quality: 8
49 | metric: 'mse'
50 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
51 | classification_model:
52 | name: &model_name 'tf_efficientnet_l2_ns'
53 | repo_or_dir: 'rwightman/pytorch-image-models'
54 | params:
55 | num_classes: 1000
56 | pretrained: True
57 | experiment: &experiment !join [*dataset_name, '-', *model_name]
58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 |
60 | test:
61 | test_data_loader:
62 | dataset_id: *imagenet_val
63 | random_sample: False
64 | batch_size: 1
65 | num_workers: 16
66 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 833
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: &input_size [800, 800]
19 | - type: 'ToTensor'
20 | params:
21 | - type: 'AdaptivePad'
22 | params:
23 | fill: 0
24 | factor: 64
25 |
26 | models:
27 | model:
28 | name: 'NeuralInputCompressionClassifier'
29 | params:
30 | post_transform_params:
31 | - type: 'CenterCrop'
32 | params:
33 | size: *input_size
34 | - type: 'Normalize'
35 | params:
36 | mean: [0.485, 0.456, 0.406]
37 | std: [0.229, 0.224, 0.225]
38 | analysis_config:
39 | analyzes_after_compress: True
40 | analyzer_configs:
41 | - type: 'FileSizeAnalyzer'
42 | params:
43 | unit: 'KB'
44 | compression_model:
45 | name: 'mbt2018_mean'
46 | params:
47 | pretrained: True
48 | quality: 8
49 | metric: 'mse'
50 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
51 | classification_model:
52 | name: &model_name 'tf_efficientnet_l2_ns'
53 | repo_or_dir: 'rwightman/pytorch-image-models'
54 | params:
55 | num_classes: 1000
56 | pretrained: True
57 | experiment: &experiment !join [*dataset_name, '-', *model_name]
58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 |
60 | test:
61 | test_data_loader:
62 | dataset_id: *imagenet_val
63 | random_sample: False
64 | batch_size: 1
65 | num_workers: 16
66 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 833
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: &input_size [800, 800]
19 | - type: 'ToTensor'
20 | params:
21 | - type: 'AdaptivePad'
22 | params:
23 | fill: 0
24 | factor: 64
25 |
26 | models:
27 | model:
28 | name: 'NeuralInputCompressionClassifier'
29 | params:
30 | post_transform_params:
31 | - type: 'CenterCrop'
32 | params:
33 | size: *input_size
34 | - type: 'Normalize'
35 | params:
36 | mean: [0.485, 0.456, 0.406]
37 | std: [0.229, 0.224, 0.225]
38 | analysis_config:
39 | analyzes_after_compress: True
40 | analyzer_configs:
41 | - type: 'FileSizeAnalyzer'
42 | params:
43 | unit: 'KB'
44 | compression_model:
45 | name: 'bmshj2018_hyperprior'
46 | params:
47 | pretrained: True
48 | quality: 8
49 | metric: 'mse'
50 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
51 | classification_model:
52 | name: &model_name 'tf_efficientnet_l2_ns'
53 | repo_or_dir: 'rwightman/pytorch-image-models'
54 | params:
55 | num_classes: 1000
56 | pretrained: True
57 | experiment: &experiment !join [*dataset_name, '-', *model_name]
58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 |
60 | test:
61 | test_data_loader:
62 | dataset_id: *imagenet_val
63 | random_sample: False
64 | batch_size: 1
65 | num_workers: 16
66 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 507
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: &input_size [475, 475]
19 | - type: 'ToTensor'
20 | params:
21 | - type: 'AdaptivePad'
22 | params:
23 | fill: 0
24 | factor: 64
25 |
26 | models:
27 | model:
28 | name: 'NeuralInputCompressionClassifier'
29 | params:
30 | post_transform_params:
31 | - type: 'CenterCrop'
32 | params:
33 | size: *input_size
34 | - type: 'Normalize'
35 | params:
36 | mean: [0.485, 0.456, 0.406]
37 | std: [0.229, 0.224, 0.225]
38 | analysis_config:
39 | analyzes_after_compress: True
40 | analyzer_configs:
41 | - type: 'FileSizeAnalyzer'
42 | params:
43 | unit: 'KB'
44 | compression_model:
45 | name: 'bmshj2018_hyperprior'
46 | params:
47 | pretrained: True
48 | quality: 8
49 | metric: 'mse'
50 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
51 | classification_model:
52 | name: &model_name 'tf_efficientnet_l2_ns'
53 | repo_or_dir: 'rwightman/pytorch-image-models'
54 | params:
55 | num_classes: 1000
56 | pretrained: True
57 | experiment: &experiment !join [*dataset_name, '-', *model_name]
58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 |
60 | test:
61 | test_data_loader:
62 | dataset_id: *imagenet_val
63 | random_sample: False
64 | batch_size: 1
65 | num_workers: 16
66 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/factorized_prior-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 507
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: &input_size [475, 475]
19 | - type: 'ToTensor'
20 | params:
21 | - type: 'AdaptivePad'
22 | params:
23 | fill: 0
24 | factor: 64
25 |
26 | models:
27 | model:
28 | name: 'NeuralInputCompressionClassifier'
29 | params:
30 | post_transform_params:
31 | - type: 'CenterCrop'
32 | params:
33 | size: *input_size
34 | - type: 'Normalize'
35 | params:
36 | mean: [0.485, 0.456, 0.406]
37 | std: [0.229, 0.224, 0.225]
38 | analysis_config:
39 | analyzes_after_compress: True
40 | analyzer_configs:
41 | - type: 'FileSizeAnalyzer'
42 | params:
43 | unit: 'KB'
44 | compression_model:
45 | name: 'bmshj2018_factorized'
46 | params:
47 | pretrained: True
48 | quality: 8
49 | metric: 'mse'
50 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
51 | classification_model:
52 | name: &model_name 'tf_efficientnet_l2_ns_475'
53 | repo_or_dir: 'rwightman/pytorch-image-models'
54 | params:
55 | num_classes: 1000
56 | pretrained: True
57 | experiment: &experiment !join [*dataset_name, '-', *model_name]
58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 |
60 | test:
61 | test_data_loader:
62 | dataset_id: *imagenet_val
63 | random_sample: False
64 | batch_size: 1
65 | num_workers: 16
66 |
--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | ilsvrc2012:
3 | name: &dataset_name 'ilsvrc2012'
4 | type: 'ImageFolder'
5 | root: &root_dir !join ['~/dataset/', *dataset_name]
6 | splits:
7 | val:
8 | dataset_id: &imagenet_val !join [*dataset_name, '/val']
9 | params:
10 | root: !join [*root_dir, '/val']
11 | transform_params:
12 | - type: 'WrappedResize'
13 | params:
14 | size: 507
15 | interpolation: 'bicubic'
16 | - type: 'CenterCrop'
17 | params:
18 | size: &input_size [475, 475]
19 | - type: 'ToTensor'
20 | params:
21 | - type: 'AdaptivePad'
22 | params:
23 | fill: 0
24 | factor: 64
25 |
26 | models:
27 | model:
28 | name: 'NeuralInputCompressionClassifier'
29 | params:
30 | post_transform_params:
31 | - type: 'CenterCrop'
32 | params:
33 | size: *input_size
34 | - type: 'Normalize'
35 | params:
36 | mean: [0.485, 0.456, 0.406]
37 | std: [0.229, 0.224, 0.225]
38 | analysis_config:
39 | analyzes_after_compress: True
40 | analyzer_configs:
41 | - type: 'FileSizeAnalyzer'
42 | params:
43 | unit: 'KB'
44 | compression_model:
45 | name: 'mbt2018_mean'
46 | params:
47 | pretrained: True
48 | quality: 8
49 | metric: 'mse'
50 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
51 | classification_model:
52 | name: &model_name 'tf_efficientnet_l2_ns_475'
53 | repo_or_dir: 'rwightman/pytorch-image-models'
54 | params:
55 | num_classes: 1000
56 | pretrained: True
57 | experiment: &experiment !join [*dataset_name, '-', *model_name]
58 | ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 |
60 | test:
61 | test_data_loader:
62 | dataset_id: *imagenet_val
63 | random_sample: False
64 | batch_size: 1
65 | num_workers: 16
66 |
--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/README.md:
--------------------------------------------------------------------------------
1 | # Neural Input Compression Baselines
2 |
3 | We considered the following neural image compression models:
4 | - Factorized Prior
5 | - Scale Hyperprior
6 | - Mean-scale Hyperprior
7 | - Joint Autoregressive Hierarchical Prior
8 |
9 |
10 | ## ImageNet (ILSVRC 2012): Image Classification
11 | Neural input compression followed by ResNet-50
12 |
13 | ```shell
14 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh factorized_prior-resnet50 8
15 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh scale_hyperprior-resnet50 8
16 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh mean_scale_hyperprior-resnet50 8
17 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh joint_autoregressive_hierarchical_prior-resnet50 8
18 | ```
19 |
20 | ## COCO 2017: Object Detection
21 | Neural input compression followed by Faster R-CNN with ResNet-50 and FPN
22 |
23 | ```shell
24 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh factorized_prior-faster_rcnn_resnet50_fpn 8
25 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh scale_hyperprior-faster_rcnn_resnet50_fpn 8
26 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh mean_scale_hyperprior-faster_rcnn_resnet50_fpn 8
27 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn 8
28 | ```
29 |
30 | ## PASCAL VOC 2012: Semantic Segmentation
31 | Neural input compression followed by DeepLabv3 with ResNet-50
32 |
33 | ```shell
34 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh factorized_prior-deeplabv3_resnet50 8
35 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh scale_hyperprior-deeplabv3_resnet50 8
36 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh mean_scale_hyperprior-deeplabv3_resnet50 8
37 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh joint_autoregressive_hierarchical_prior-deeplabv3_resnet50 8
38 | ```
39 |
--------------------------------------------------------------------------------
/sc2bench/transforms/collator.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchdistill.datasets.registry import register_collate_func
3 |
4 |
5 | def cat_list(images, fill_value=0):
6 | """
7 | Concatenates a list of images with the max size for each of heights and widths and
8 | fills empty spaces with a specified value.
9 |
10 | :param images: batch tensor
11 | :type images: torch.Tensor
12 | :param fill_value: value to be filled
13 | :type fill_value: int
14 | :return: backbone model
15 | :rtype: torch.Tensor
16 | """
17 | if len(images) == 1 and not isinstance(images[0], torch.Tensor):
18 | return images
19 |
20 | max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
21 | batch_shape = (len(images),) + max_size
22 | batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
23 | for img, pad_img in zip(images, batched_imgs):
24 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
25 | return batched_imgs
26 |
27 |
28 | @register_collate_func
29 | def pascal_seg_collate_fn(batch):
30 | """
31 | Collates input data for PASCAL VOC 2012 segmentation.
32 |
33 | :param batch: list/tuple of triplets (image, target, supp_dict), where supp_dict can be an empty dict
34 | :type batch: list or tuple
35 | :return: collated images, targets, and supplementary dicts
36 | :rtype: (torch.Tensor, tensor.Tensor, list[dict])
37 | """
38 | images, targets, supp_dicts = list(zip(*batch))
39 | batched_imgs = cat_list(images, fill_value=0)
40 | batched_targets = cat_list(targets, fill_value=255)
41 | return batched_imgs, batched_targets, supp_dicts
42 |
43 |
44 | @register_collate_func
45 | def pascal_seg_eval_collate_fn(batch):
46 | """
47 | Collates input data for PASCAL VOC 2012 segmentation in evaluation
48 |
49 | :param batch: list/tuple of tuples (image, target)
50 | :type batch: list or tuple
51 | :return: collated images and targets
52 | :rtype: (torch.Tensor, tensor.Tensor)
53 | """
54 | images, targets = list(zip(*batch))
55 | batched_imgs = cat_list(images, fill_value=0)
56 | batched_targets = cat_list(targets, fill_value=255)
57 | return batched_imgs, batched_targets
58 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/factorized_prior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | compression_model:
48 | name: 'bmshj2018_factorized'
49 | params:
50 | pretrained: True
51 | quality: 8
52 | metric: 'mse'
53 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
54 | segmentation_model:
55 | name: 'deeplabv3_resnet50'
56 | params:
57 | pretrained: True
58 | pretrained_backbone: True
59 | num_classes: 21
60 | aux_loss: True
61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
62 |
63 | test:
64 | test_data_loader:
65 | dataset_id: *pascal_val
66 | random_sample: False
67 | batch_size: 1
68 | num_workers: 16
69 | collate_fn: 'pascal_seg_eval_collate_fn'
70 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/scale_hyperprior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | compression_model:
48 | name: 'bmshj2018_hyperprior'
49 | params:
50 | pretrained: True
51 | quality: 8
52 | metric: 'mse'
53 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
54 | segmentation_model:
55 | name: 'deeplabv3_resnet50'
56 | params:
57 | pretrained: True
58 | pretrained_backbone: True
59 | num_classes: 21
60 | aux_loss: True
61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
62 |
63 | test:
64 | test_data_loader:
65 | dataset_id: *pascal_val
66 | random_sample: False
67 | batch_size: 1
68 | num_workers: 16
69 | collate_fn: 'pascal_seg_eval_collate_fn'
70 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/factorized_prior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | compression_model:
48 | name: 'bmshj2018_factorized'
49 | params:
50 | pretrained: True
51 | quality: 8
52 | metric: 'mse'
53 | ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
54 | segmentation_model:
55 | name: 'deeplabv3_resnet101'
56 | params:
57 | pretrained: True
58 | pretrained_backbone: True
59 | num_classes: 21
60 | aux_loss: True
61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
62 |
63 | test:
64 | test_data_loader:
65 | dataset_id: *pascal_val
66 | random_sample: False
67 | batch_size: 1
68 | num_workers: 16
69 | collate_fn: 'pascal_seg_eval_collate_fn'
70 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/mean_scale_hyperprior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | compression_model:
48 | name: 'mbt2018_mean'
49 | params:
50 | pretrained: True
51 | quality: 8
52 | metric: 'mse'
53 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
54 | segmentation_model:
55 | name: 'deeplabv3_resnet101'
56 | params:
57 | pretrained: True
58 | pretrained_backbone: True
59 | num_classes: 21
60 | aux_loss: True
61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
62 |
63 | test:
64 | test_data_loader:
65 | dataset_id: *pascal_val
66 | random_sample: False
67 | batch_size: 1
68 | num_workers: 16
69 | collate_fn: 'pascal_seg_eval_collate_fn'
70 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/mean_scale_hyperprior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | compression_model:
48 | name: 'mbt2018_mean'
49 | params:
50 | pretrained: True
51 | quality: 8
52 | metric: 'mse'
53 | ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
54 | segmentation_model:
55 | name: 'deeplabv3_resnet50'
56 | params:
57 | pretrained: True
58 | pretrained_backbone: True
59 | num_classes: 21
60 | aux_loss: True
61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
62 |
63 | test:
64 | test_data_loader:
65 | dataset_id: *pascal_val
66 | random_sample: False
67 | batch_size: 1
68 | num_workers: 16
69 | collate_fn: 'pascal_seg_eval_collate_fn'
70 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/scale_hyperprior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | compression_model:
48 | name: 'bmshj2018_hyperprior'
49 | params:
50 | pretrained: True
51 | quality: 8
52 | metric: 'mse'
53 | ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
54 | segmentation_model:
55 | name: 'deeplabv3_resnet101'
56 | params:
57 | pretrained: True
58 | pretrained_backbone: True
59 | num_classes: 21
60 | aux_loss: True
61 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
62 |
63 | test:
64 | test_data_loader:
65 | dataset_id: *pascal_val
66 | random_sample: False
67 | batch_size: 1
68 | num_workers: 16
69 | collate_fn: 'pascal_seg_eval_collate_fn'
70 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/joint_autoregressive_hierarchical_prior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | uses_cpu4compression_model: True
48 | compression_model:
49 | name: 'mbt2018'
50 | params:
51 | pretrained: True
52 | quality: 8
53 | metric: 'mse'
54 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
55 | segmentation_model:
56 | name: 'deeplabv3_resnet101'
57 | params:
58 | pretrained: True
59 | pretrained_backbone: True
60 | num_classes: 21
61 | aux_loss: True
62 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
63 |
64 | test:
65 | test_data_loader:
66 | dataset_id: *pascal_val
67 | random_sample: False
68 | batch_size: 1
69 | num_workers: 16
70 | collate_fn: 'pascal_seg_eval_collate_fn'
71 |
--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/joint_autoregressive_hierarchical_prior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | pascal_voc:
3 | name: &dataset_name 'pascal_voc2012'
4 | type: 'VOCSegmentation'
5 | root: &root_dir '~/dataset'
6 | splits:
7 | val:
8 | dataset_id: &pascal_val !join [*dataset_name, '/val']
9 | params:
10 | root: *root_dir
11 | image_set: 'val'
12 | year: '2012'
13 | download: False
14 | transforms_compose_cls: 'CustomCompose'
15 | transforms_params: &val_transform
16 | - type: 'CustomRandomResize'
17 | params:
18 | min_size: 513
19 | max_size: 513
20 | - type: 'CustomToTensor'
21 | params:
22 | converts_sample: True
23 | converts_target: True
24 |
25 | models:
26 | model:
27 | name: 'NeuralInputCompressionSegmentationModel'
28 | params:
29 | pre_transform_params:
30 | - type: 'AdaptivePad'
31 | params:
32 | padding_position: 'right_bottom'
33 | returns_org_patch_size: True
34 | fill: 0
35 | factor: 64
36 | analysis_config:
37 | analyzes_after_compress: True
38 | analyzer_configs:
39 | - type: 'FileSizeAnalyzer'
40 | params:
41 | unit: 'KB'
42 | post_transform_params:
43 | - type: 'Normalize'
44 | params:
45 | mean: [0.485, 0.456, 0.406]
46 | std: [0.229, 0.224, 0.225]
47 | uses_cpu4compression_model: True
48 | compression_model:
49 | name: 'mbt2018'
50 | params:
51 | pretrained: True
52 | quality: 8
53 | metric: 'mse'
54 | ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
55 | segmentation_model:
56 | name: 'deeplabv3_resnet50'
57 | params:
58 | pretrained: True
59 | pretrained_backbone: True
60 | num_classes: 21
61 | aux_loss: True
62 | ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
63 |
64 | test:
65 | test_data_loader:
66 | dataset_id: *pascal_val
67 | random_sample: False
68 | batch_size: 1
69 | num_workers: 16
70 | collate_fn: 'pascal_seg_eval_collate_fn'
71 |
--------------------------------------------------------------------------------
/legacy/script/README.md:
--------------------------------------------------------------------------------
1 | # Datasets
2 |
3 | Download and preprocess datasets before you run experiments.
4 | Here, we provide three examples: ImageNet (ILSVRC 2012), COCO 2017, and PASCAL VOC 2012.
5 |
6 | ## 1. ImageNet (ILSVRC 2012): Image Classification
7 | ### 1.1 Download the datasets
8 | As the terms of use do not allow to distribute the URLs, you will have to create an account [here](http://image-net.org/download) to get the URLs, and replace `${TRAIN_DATASET_URL}` and `${VAL_DATASET_URL}` with them.
9 | ```shell
10 | wget ${TRAIN_DATASET_URL} ./
11 | wget ${VAL_DATASET_URL} ./
12 | ```
13 |
14 | ### 1.2 Untar and extract files
15 | ```shell
16 | # Go to the root of this repository
17 | mkdir ~/dataset/ilsvrc2012/{train,val} -p
18 | mv ILSVRC2012_img_train.tar ~/dataset/ilsvrc2012/train/
19 | mv ILSVRC2012_img_val.tar ~/dataset/ilsvrc2012/val/
20 | cd ~/dataset/ilsvrc2012/train/
21 | tar -xvf ILSVRC2012_img_train.tar
22 | mv ILSVRC2012_img_train.tar ../
23 | for f in *.tar; do
24 | d=`basename $f .tar`
25 | mkdir $d
26 | (cd $d && tar xf ../$f)
27 | done
28 | rm -r *.tar
29 | cd ../../../../
30 |
31 | wget https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
32 | mv valprep.sh ~/dataset/ilsvrc2012/val/
33 | cd ~/dataset/ilsvrc2012/val/
34 | tar -xvf ILSVRC2012_img_val.tar
35 | mv ILSVRC2012_img_val.tar ../
36 | sh valprep.sh
37 | mv valprep.sh ../
38 | cd ../../../../
39 | ```
40 |
41 |
42 | ## 2. COCO 2017: Object Detection
43 | ### 2.1 Download the datasets
44 | ```shell
45 | wget http://images.cocodataset.org/zips/train2017.zip ./
46 | wget http://images.cocodataset.org/zips/val2017.zip ./
47 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip ./
48 | ```
49 |
50 | ### 2.2 Unzip and extract files
51 | ```shell
52 | # Go to the root of this repository
53 | mkdir ~/dataset/coco2017/ -p
54 | mv train2017.zip ~/dataset/coco2017/
55 | mv val2017.zip ~/dataset/coco2017/
56 | mv annotations_trainval2017.zip ~/dataset/coco2017/
57 | cd ~/dataset/coco2017/
58 | unzip train2017.zip
59 | unzip val2017.zip
60 | unzip annotations_trainval2017.zip
61 | cd ../../../
62 | ```
63 |
64 |
65 | ## 3. PASCAL VOC 2012: Semantic Segmentation
66 | You can skip Steps 3.1 and 3.2 by replacing `download: False` in a yaml config file with `download: True`.
67 |
68 | ### 3.1 Download the datasets
69 | ```shell
70 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
71 | ```
72 |
73 | ### 3.2 Untar and extract files
74 | ```shell
75 | # Go to the root of this repository
76 | mkdir ~/dataset/ -p
77 | mv VOCtrainval_11-May-2012.tar ~/dataset/
78 | cd ~/dataset/
79 | tar -xvf ILSVRC2012_img_val.tar
80 | cd ../../
81 | ```
82 |
--------------------------------------------------------------------------------
/script/README.md:
--------------------------------------------------------------------------------
1 | # Datasets
2 |
3 | Download and preprocess datasets before you run experiments.
4 | Here, we provide three examples: ImageNet (ILSVRC 2012), COCO 2017, and PASCAL VOC 2012.
5 |
6 | ## 1. ImageNet (ILSVRC 2012): Image Classification
7 | ### 1.1 Download the datasets
8 | As the terms of use do not allow to distribute the URLs, you will have to create an account [here](http://image-net.org/download) to get the URLs, and replace `${TRAIN_DATASET_URL}` and `${VAL_DATASET_URL}` with them.
9 | ```shell
10 | wget ${TRAIN_DATASET_URL} ./
11 | wget ${VAL_DATASET_URL} ./
12 | ```
13 |
14 | ### 1.2 Untar and extract files
15 | ```shell
16 | # Go to the root of this repository
17 | mkdir ~/datasets/ilsvrc2012/{train,val} -p
18 | mv ILSVRC2012_img_train.tar ~/datasets/ilsvrc2012/train/
19 | mv ILSVRC2012_img_val.tar ~/datasets/ilsvrc2012/val/
20 | cd ~/datasets/ilsvrc2012/train/
21 | tar -xvf ILSVRC2012_img_train.tar
22 | mv ILSVRC2012_img_train.tar ../
23 | for f in *.tar; do
24 | d=`basename $f .tar`
25 | mkdir $d
26 | (cd $d && tar xf ../$f)
27 | done
28 | rm -r *.tar
29 | cd ../../../../
30 |
31 | wget https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
32 | mv valprep.sh ~/datasets/ilsvrc2012/val/
33 | cd ~/datasets/ilsvrc2012/val/
34 | tar -xvf ILSVRC2012_img_val.tar
35 | mv ILSVRC2012_img_val.tar ../
36 | sh valprep.sh
37 | mv valprep.sh ../
38 | cd ../../../../
39 | ```
40 |
41 |
42 | ## 2. COCO 2017: Object Detection
43 | ### 2.1 Download the datasets
44 | ```shell
45 | wget http://images.cocodataset.org/zips/train2017.zip ./
46 | wget http://images.cocodataset.org/zips/val2017.zip ./
47 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip ./
48 | ```
49 |
50 | ### 2.2 Unzip and extract files
51 | ```shell
52 | # Go to the root of this repository
53 | mkdir ~/datasets/coco2017/ -p
54 | mv train2017.zip ~/datasets/coco2017/
55 | mv val2017.zip ~/datasets/coco2017/
56 | mv annotations_trainval2017.zip ~/datasets/coco2017/
57 | cd ~/datasets/coco2017/
58 | unzip train2017.zip
59 | unzip val2017.zip
60 | unzip annotations_trainval2017.zip
61 | cd ../../../
62 | ```
63 |
64 |
65 | ## 3. PASCAL VOC 2012: Semantic Segmentation
66 | You can skip Steps 3.1 and 3.2 by replacing `download: False` in a yaml config file with `download: True`.
67 |
68 | ### 3.1 Download the datasets
69 | ```shell
70 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
71 | ```
72 |
73 | ### 3.2 Untar and extract files
74 | ```shell
75 | # Go to the root of this repository
76 | mkdir ~/datasets/ -p
77 | mv VOCtrainval_11-May-2012.tar ~/datasets/
78 | cd ~/datasets/
79 | tar -xvf ILSVRC2012_img_val.tar
80 | cd ../../
81 | ```
82 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.PILImageModule'
36 | init:
37 | kwargs:
38 | format: 'JPEG'
39 | quality: 90
40 | returns_file_size: True
41 | post_transform: !import_call
42 | key: 'torchvision.transforms.Compose'
43 | init:
44 | kwargs:
45 | transforms:
46 | - !import_call
47 | key: 'torchvision.transforms.ToTensor'
48 | init:
49 | - !import_call
50 | key: 'torchvision.transforms.Normalize'
51 | init:
52 | kwargs:
53 | mean: [0.485, 0.456, 0.406]
54 | std: [0.229, 0.224, 0.225]
55 | analysis_config:
56 | analyzer_configs:
57 | - key: 'FileSizeAccumulator'
58 | kwargs:
59 | unit: 'KB'
60 | classification_model:
61 | key: 'resnet50'
62 | _weights: &model_weights_enum !import_get
63 | key: 'torchvision.models.resnet.ResNet50_Weights'
64 | kwargs:
65 | num_classes: 1000
66 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
67 |
68 | test:
69 | test_data_loader:
70 | dataset_id: *imagenet_val
71 | collate_fn: 'default_collate_w_pil'
72 | sampler:
73 | class_or_func: !import_get
74 | key: 'torch.utils.data.SequentialSampler'
75 | kwargs:
76 | kwargs:
77 | batch_size: 1
78 | num_workers: 16
79 | drop_last: False
80 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.PILImageModule'
36 | init:
37 | kwargs:
38 | format: 'WEBP'
39 | quality: 90
40 | returns_file_size: True
41 | post_transform: !import_call
42 | key: 'torchvision.transforms.Compose'
43 | init:
44 | kwargs:
45 | transforms:
46 | - !import_call
47 | key: 'torchvision.transforms.ToTensor'
48 | init:
49 | - !import_call
50 | key: 'torchvision.transforms.Normalize'
51 | init:
52 | kwargs:
53 | mean: [0.485, 0.456, 0.406]
54 | std: [0.229, 0.224, 0.225]
55 | analysis_config:
56 | analyzer_configs:
57 | - key: 'FileSizeAccumulator'
58 | kwargs:
59 | unit: 'KB'
60 | classification_model:
61 | key: 'resnet50'
62 | _weights: &model_weights_enum !import_get
63 | key: 'torchvision.models.resnet.ResNet50_Weights'
64 | kwargs:
65 | num_classes: 1000
66 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
67 |
68 | test:
69 | test_data_loader:
70 | dataset_id: *imagenet_val
71 | collate_fn: 'default_collate_w_pil'
72 | sampler:
73 | class_or_func: !import_get
74 | key: 'torch.utils.data.SequentialSampler'
75 | kwargs:
76 | kwargs:
77 | batch_size: 1
78 | num_workers: 16
79 | drop_last: False
80 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.PILImageModule'
36 | init:
37 | kwargs:
38 | format: 'JPEG'
39 | quality: 90
40 | returns_file_size: True
41 | post_transform: !import_call
42 | key: 'torchvision.transforms.Compose'
43 | init:
44 | kwargs:
45 | transforms:
46 | - !import_call
47 | key: 'torchvision.transforms.ToTensor'
48 | init:
49 | - !import_call
50 | key: 'torchvision.transforms.Normalize'
51 | init:
52 | kwargs:
53 | mean: [0.485, 0.456, 0.406]
54 | std: [0.229, 0.224, 0.225]
55 | analysis_config:
56 | analyzer_configs:
57 | - key: 'FileSizeAccumulator'
58 | kwargs:
59 | unit: 'KB'
60 | classification_model:
61 |
62 | key: 'resnet101'
63 | _weights: &model_weights_enum !import_get
64 | key: 'torchvision.models.resnet.ResNet101_Weights'
65 | kwargs:
66 | num_classes: 1000
67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 |
69 | test:
70 | test_data_loader:
71 | dataset_id: *imagenet_val
72 | collate_fn: 'default_collate_w_pil'
73 | sampler:
74 | class_or_func: !import_get
75 | key: 'torch.utils.data.SequentialSampler'
76 | kwargs:
77 | kwargs:
78 | batch_size: 1
79 | num_workers: 16
80 | drop_last: False
81 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-resnet152.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.PILImageModule'
36 | init:
37 | kwargs:
38 | format: 'JPEG'
39 | quality: 90
40 | returns_file_size: True
41 | post_transform: !import_call
42 | key: 'torchvision.transforms.Compose'
43 | init:
44 | kwargs:
45 | transforms:
46 | - !import_call
47 | key: 'torchvision.transforms.ToTensor'
48 | init:
49 | - !import_call
50 | key: 'torchvision.transforms.Normalize'
51 | init:
52 | kwargs:
53 | mean: [0.485, 0.456, 0.406]
54 | std: [0.229, 0.224, 0.225]
55 | analysis_config:
56 | analyzer_configs:
57 | - key: 'FileSizeAccumulator'
58 | kwargs:
59 | unit: 'KB'
60 | classification_model:
61 |
62 | key: 'resnet152'
63 | _weights: &model_weights_enum !import_get
64 | key: 'torchvision.models.resnet.ResNet152_Weights'
65 | kwargs:
66 | num_classes: 1000
67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 |
69 | test:
70 | test_data_loader:
71 | dataset_id: *imagenet_val
72 | collate_fn: 'default_collate_w_pil'
73 | sampler:
74 | class_or_func: !import_get
75 | key: 'torch.utils.data.SequentialSampler'
76 | kwargs:
77 | kwargs:
78 | batch_size: 1
79 | num_workers: 16
80 | drop_last: False
81 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/webp-resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.PILImageModule'
36 | init:
37 | kwargs:
38 | format: 'WEBP'
39 | quality: 90
40 | returns_file_size: True
41 | post_transform: !import_call
42 | key: 'torchvision.transforms.Compose'
43 | init:
44 | kwargs:
45 | transforms:
46 | - !import_call
47 | key: 'torchvision.transforms.ToTensor'
48 | init:
49 | - !import_call
50 | key: 'torchvision.transforms.Normalize'
51 | init:
52 | kwargs:
53 | mean: [0.485, 0.456, 0.406]
54 | std: [0.229, 0.224, 0.225]
55 | analysis_config:
56 | analyzer_configs:
57 | - key: 'FileSizeAccumulator'
58 | kwargs:
59 | unit: 'KB'
60 | classification_model:
61 |
62 | key: 'resnet101'
63 | _weights: &model_weights_enum !import_get
64 | key: 'torchvision.models.resnet.ResNet101_Weights'
65 | kwargs:
66 | num_classes: 1000
67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 |
69 | test:
70 | test_data_loader:
71 | dataset_id: *imagenet_val
72 | collate_fn: 'default_collate_w_pil'
73 | sampler:
74 | class_or_func: !import_get
75 | key: 'torch.utils.data.SequentialSampler'
76 | kwargs:
77 | kwargs:
78 | batch_size: 1
79 | num_workers: 16
80 | drop_last: False
81 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/webp-resnet152.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.PILImageModule'
36 | init:
37 | kwargs:
38 | format: 'WEBP'
39 | quality: 90
40 | returns_file_size: True
41 | post_transform: !import_call
42 | key: 'torchvision.transforms.Compose'
43 | init:
44 | kwargs:
45 | transforms:
46 | - !import_call
47 | key: 'torchvision.transforms.ToTensor'
48 | init:
49 | - !import_call
50 | key: 'torchvision.transforms.Normalize'
51 | init:
52 | kwargs:
53 | mean: [0.485, 0.456, 0.406]
54 | std: [0.229, 0.224, 0.225]
55 | analysis_config:
56 | analyzer_configs:
57 | - key: 'FileSizeAccumulator'
58 | kwargs:
59 | unit: 'KB'
60 | classification_model:
61 |
62 | key: 'resnet152'
63 | _weights: &model_weights_enum !import_get
64 | key: 'torchvision.models.resnet.ResNet152_Weights'
65 | kwargs:
66 | num_classes: 1000
67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 |
69 | test:
70 | test_data_loader:
71 | dataset_id: *imagenet_val
72 | collate_fn: 'default_collate_w_pil'
73 | sampler:
74 | class_or_func: !import_get
75 | key: 'torch.utils.data.SequentialSampler'
76 | kwargs:
77 | kwargs:
78 | batch_size: 1
79 | num_workers: 16
80 | drop_last: False
81 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/bpg-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.BPGModule'
36 | init:
37 | kwargs:
38 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
39 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
40 | quality: 50
41 | returns_file_size: True
42 | post_transform: !import_call
43 | key: 'torchvision.transforms.Compose'
44 | init:
45 | kwargs:
46 | transforms:
47 | - !import_call
48 | key: 'torchvision.transforms.ToTensor'
49 | init:
50 | - !import_call
51 | key: 'torchvision.transforms.Normalize'
52 | init:
53 | kwargs:
54 | mean: [0.485, 0.456, 0.406]
55 | std: [0.229, 0.224, 0.225]
56 | analysis_config:
57 | analyzer_configs:
58 | - key: 'FileSizeAccumulator'
59 | kwargs:
60 | unit: 'KB'
61 | classification_model:
62 | key: 'resnet50'
63 | _weights: &model_weights_enum !import_get
64 | key: 'torchvision.models.resnet.ResNet50_Weights'
65 | kwargs:
66 | num_classes: 1000
67 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 |
69 | test:
70 | test_data_loader:
71 | dataset_id: *imagenet_val
72 | sampler:
73 | class_or_func: !import_get
74 | key: 'torch.utils.data.SequentialSampler'
75 | kwargs:
76 | kwargs:
77 | batch_size: 1
78 | num_workers: 16
79 | drop_last: False
80 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/bpg-resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.BPGModule'
36 | init:
37 | kwargs:
38 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
39 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
40 | quality: 50
41 | returns_file_size: True
42 | post_transform: !import_call
43 | key: 'torchvision.transforms.Compose'
44 | init:
45 | kwargs:
46 | transforms:
47 | - !import_call
48 | key: 'torchvision.transforms.ToTensor'
49 | init:
50 | - !import_call
51 | key: 'torchvision.transforms.Normalize'
52 | init:
53 | kwargs:
54 | mean: [0.485, 0.456, 0.406]
55 | std: [0.229, 0.224, 0.225]
56 | analysis_config:
57 | analyzer_configs:
58 | - key: 'FileSizeAccumulator'
59 | kwargs:
60 | unit: 'KB'
61 | classification_model:
62 |
63 | key: 'resnet101'
64 | _weights: &model_weights_enum !import_get
65 | key: 'torchvision.models.resnet.ResNet101_Weights'
66 | kwargs:
67 | num_classes: 1000
68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 |
70 | test:
71 | test_data_loader:
72 | dataset_id: *imagenet_val
73 | sampler:
74 | class_or_func: !import_get
75 | key: 'torch.utils.data.SequentialSampler'
76 | kwargs:
77 | kwargs:
78 | batch_size: 1
79 | num_workers: 16
80 | drop_last: False
81 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/bpg-resnet152.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.BPGModule'
36 | init:
37 | kwargs:
38 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
39 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
40 | quality: 50
41 | returns_file_size: True
42 | post_transform: !import_call
43 | key: 'torchvision.transforms.Compose'
44 | init:
45 | kwargs:
46 | transforms:
47 | - !import_call
48 | key: 'torchvision.transforms.ToTensor'
49 | init:
50 | - !import_call
51 | key: 'torchvision.transforms.Normalize'
52 | init:
53 | kwargs:
54 | mean: [0.485, 0.456, 0.406]
55 | std: [0.229, 0.224, 0.225]
56 | analysis_config:
57 | analyzer_configs:
58 | - key: 'FileSizeAccumulator'
59 | kwargs:
60 | unit: 'KB'
61 | classification_model:
62 |
63 | key: 'resnet152'
64 | _weights: &model_weights_enum !import_get
65 | key: 'torchvision.models.resnet.ResNet152_Weights'
66 | kwargs:
67 | num_classes: 1000
68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 |
70 | test:
71 | test_data_loader:
72 | dataset_id: *imagenet_val
73 | sampler:
74 | class_or_func: !import_get
75 | key: 'torch.utils.data.SequentialSampler'
76 | kwargs:
77 | kwargs:
78 | batch_size: 1
79 | num_workers: 16
80 | drop_last: False
81 |
--------------------------------------------------------------------------------
/docs/source/subpkgs/models.rst:
--------------------------------------------------------------------------------
1 | sc2bench.models
2 | =====
3 |
4 |
5 | .. toctree::
6 | :maxdepth: 4
7 | :caption: Contents:
8 |
9 | ----
10 |
11 | sc2bench.models.layer
12 | ------------
13 |
14 | .. automodule:: sc2bench.models.layer
15 | :members:
16 | :exclude-members: forward
17 |
18 | ----
19 |
20 | sc2bench.models.registry
21 | ------------
22 |
23 | .. automodule:: sc2bench.models.registry
24 | :members:
25 | :exclude-members: forward
26 |
27 | ----
28 |
29 | sc2bench.models.wrapper
30 | ------------
31 |
32 | .. automodule:: sc2bench.models.wrapper
33 | :members:
34 | :exclude-members: forward
35 |
36 | ----
37 |
38 | sc2bench.models.backbone
39 | ------------
40 |
41 | .. automodule:: sc2bench.models.backbone
42 | :members:
43 | :exclude-members: forward
44 |
45 | ----
46 |
47 | sc2bench.models.detection
48 | ------------
49 |
50 | .. automodule:: sc2bench.models.detection
51 | :members:
52 |
53 | ----
54 |
55 | sc2bench.models.detection.base
56 | ^^^^^^^^^^^^
57 |
58 | .. automodule:: sc2bench.models.detection.base
59 | :members:
60 | :exclude-members: forward
61 |
62 | ----
63 |
64 | sc2bench.models.detection.rcnn
65 | ^^^^^^^^^^^^
66 |
67 | .. automodule:: sc2bench.models.detection.rcnn
68 | :members:
69 | :exclude-members: forward
70 |
71 | ----
72 |
73 | sc2bench.models.detection.registry
74 | ^^^^^^^^^^^^
75 |
76 | .. automodule:: sc2bench.models.detection.registry
77 | :members:
78 |
79 | ----
80 |
81 | sc2bench.models.detection.transform
82 | ^^^^^^^^^^^^
83 |
84 | .. automodule:: sc2bench.models.detection.transform
85 | :members:
86 | :exclude-members: forward
87 |
88 | ----
89 |
90 | sc2bench.models.detection.wrapper
91 | ^^^^^^^^^^^^
92 |
93 | .. automodule:: sc2bench.models.detection.wrapper
94 | :members:
95 | :exclude-members: forward
96 |
97 |
98 | sc2bench.models.segmentation
99 | ------------
100 |
101 | .. automodule:: sc2bench.models.segmentation
102 | :members:
103 |
104 | ----
105 |
106 | sc2bench.models.segmentation.base
107 | ^^^^^^^^^^^^
108 | .. automodule:: sc2bench.models.segmentation.base
109 | :members:
110 | :exclude-members: forward
111 |
112 | ----
113 |
114 | sc2bench.models.segmentation.deeplabv3
115 | ^^^^^^^^^^^^
116 |
117 | .. automodule:: sc2bench.models.segmentation.deeplabv3
118 | :members:
119 | :exclude-members: forward
120 |
121 | ----
122 |
123 | sc2bench.models.segmentation.registry
124 | ^^^^^^^^^^^^
125 |
126 | .. automodule:: sc2bench.models.segmentation.registry
127 | :members:
128 |
129 | ----
130 |
131 | sc2bench.models.segmentation.wrapper
132 | ^^^^^^^^^^^^
133 |
134 | .. automodule:: sc2bench.models.segmentation.wrapper
135 | :members:
136 | :exclude-members: forward
137 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 833
19 | interpolation: !getattr
20 | - !import_get
21 | key: 'torchvision.transforms.functional.InterpolationMode'
22 | - 'BICUBIC'
23 | - !import_call
24 | key: 'torchvision.transforms.CenterCrop'
25 | init:
26 | kwargs:
27 | size: [800, 800]
28 |
29 | models:
30 | model:
31 | key: 'CodecInputCompressionClassifier'
32 | kwargs:
33 | codec_encoder_decoder: !import_call
34 | key: 'torchvision.transforms.Compose'
35 | init:
36 | kwargs:
37 | transforms:
38 | - !import_call
39 | key: 'sc2bench.transforms.codec.PILImageModule'
40 | init:
41 | kwargs:
42 | format: 'JPEG'
43 | quality: 90
44 | returns_file_size: True
45 | post_transform: !import_call
46 | key: 'torchvision.transforms.Compose'
47 | init:
48 | kwargs:
49 | transforms:
50 | - !import_call
51 | key: 'torchvision.transforms.ToTensor'
52 | init:
53 | - !import_call
54 | key: 'torchvision.transforms.Normalize'
55 | init:
56 | kwargs:
57 | mean: [0.485, 0.456, 0.406]
58 | std: [0.229, 0.224, 0.225]
59 | analysis_config:
60 | analyzer_configs:
61 | - key: 'FileSizeAccumulator'
62 | kwargs:
63 | unit: 'KB'
64 | classification_model:
65 | key: 'tf_efficientnet_l2_ns'
66 | repo_or_dir: 'rwightman/pytorch-image-models'
67 | kwargs:
68 | num_classes: 1000
69 | pretrained: True
70 |
71 | test:
72 | test_data_loader:
73 | dataset_id: *imagenet_val
74 | sampler:
75 | class_or_func: !import_get
76 | key: 'torch.utils.data.SequentialSampler'
77 | kwargs:
78 | kwargs:
79 | batch_size: 1
80 | num_workers: 16
81 | drop_last: False
82 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 507
19 | interpolation: !getattr
20 | - !import_get
21 | key: 'torchvision.transforms.functional.InterpolationMode'
22 | - 'BICUBIC'
23 | - !import_call
24 | key: 'torchvision.transforms.CenterCrop'
25 | init:
26 | kwargs:
27 | size: [475, 475]
28 |
29 | models:
30 | model:
31 | key: 'CodecInputCompressionClassifier'
32 | kwargs:
33 | codec_encoder_decoder: !import_call
34 | key: 'torchvision.transforms.Compose'
35 | init:
36 | kwargs:
37 | transforms:
38 | - !import_call
39 | key: 'sc2bench.transforms.codec.PILImageModule'
40 | init:
41 | kwargs:
42 | format: 'JPEG'
43 | quality: 90
44 | returns_file_size: True
45 | post_transform: !import_call
46 | key: 'torchvision.transforms.Compose'
47 | init:
48 | kwargs:
49 | transforms:
50 | - !import_call
51 | key: 'torchvision.transforms.ToTensor'
52 | init:
53 | - !import_call
54 | key: 'torchvision.transforms.Normalize'
55 | init:
56 | kwargs:
57 | mean: [0.485, 0.456, 0.406]
58 | std: [0.229, 0.224, 0.225]
59 | analysis_config:
60 | analyzer_configs:
61 | - key: 'FileSizeAccumulator'
62 | kwargs:
63 | unit: 'KB'
64 | classification_model:
65 | key: 'tf_efficientnet_l2_ns_475'
66 | repo_or_dir: 'rwightman/pytorch-image-models'
67 | kwargs:
68 | num_classes: 1000
69 | pretrained: True
70 |
71 | test:
72 | test_data_loader:
73 | dataset_id: *imagenet_val
74 | sampler:
75 | class_or_func: !import_get
76 | key: 'torch.utils.data.SequentialSampler'
77 | kwargs:
78 | kwargs:
79 | batch_size: 1
80 | num_workers: 16
81 | drop_last: False
82 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/feature_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 | - !import_call
25 | key: 'torchvision.transforms.ToTensor'
26 | init:
27 | - !import_call
28 | key: 'torchvision.transforms.Normalize'
29 | init:
30 | kwargs:
31 | mean: [0.485, 0.456, 0.406]
32 | std: [0.229, 0.224, 0.225]
33 |
34 | models:
35 | model:
36 | key: 'CodecFeatureCompressionClassifier'
37 | kwargs:
38 | codec_encoder_decoder: !import_call
39 | key: 'torchvision.transforms.Compose'
40 | init:
41 | kwargs:
42 | transforms:
43 | - !import_call
44 | key: 'sc2bench.transforms.codec.PILTensorModule'
45 | init:
46 | kwargs:
47 | format: 'JPEG'
48 | quality: 90
49 | returns_file_size: True
50 | encoder_config:
51 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
52 | decoder_config:
53 | sequential: ['layer3', 'layer4', 'avgpool']
54 | classifier_config:
55 | sequential: ['fc']
56 | post_transform:
57 | analysis_config:
58 | analyzer_configs:
59 | - key: 'FileSizeAccumulator'
60 | kwargs:
61 | unit: 'KB'
62 | classification_model:
63 | key: 'resnet50'
64 | _weights: &model_weights_enum !import_get
65 | key: 'torchvision.models.resnet.ResNet50_Weights'
66 | kwargs:
67 | num_classes: 1000
68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 |
70 | test:
71 | test_data_loader:
72 | dataset_id: *imagenet_val
73 | collate_fn: 'default_collate_w_pil'
74 | sampler:
75 | class_or_func: !import_get
76 | key: 'torch.utils.data.SequentialSampler'
77 | kwargs:
78 | kwargs:
79 | batch_size: 1
80 | num_workers: 16
81 | drop_last: False
82 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/feature_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 | - !import_call
25 | key: 'torchvision.transforms.ToTensor'
26 | init:
27 | - !import_call
28 | key: 'torchvision.transforms.Normalize'
29 | init:
30 | kwargs:
31 | mean: [0.485, 0.456, 0.406]
32 | std: [0.229, 0.224, 0.225]
33 |
34 | models:
35 | model:
36 | key: 'CodecFeatureCompressionClassifier'
37 | kwargs:
38 | codec_encoder_decoder: !import_call
39 | key: 'torchvision.transforms.Compose'
40 | init:
41 | kwargs:
42 | transforms:
43 | - !import_call
44 | key: 'sc2bench.transforms.codec.PILTensorModule'
45 | init:
46 | kwargs:
47 | format: 'WEBP'
48 | quality: 90
49 | returns_file_size: True
50 | encoder_config:
51 | sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
52 | decoder_config:
53 | sequential: ['layer3', 'layer4', 'avgpool']
54 | classifier_config:
55 | sequential: ['fc']
56 | post_transform:
57 | analysis_config:
58 | analyzer_configs:
59 | - key: 'FileSizeAccumulator'
60 | kwargs:
61 | unit: 'KB'
62 | classification_model:
63 | key: 'resnet50'
64 | _weights: &model_weights_enum !import_get
65 | key: 'torchvision.models.resnet.ResNet50_Weights'
66 | kwargs:
67 | num_classes: 1000
68 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 |
70 | test:
71 | test_data_loader:
72 | dataset_id: *imagenet_val
73 | collate_fn: 'default_collate_w_pil'
74 | sampler:
75 | class_or_func: !import_get
76 | key: 'torch.utils.data.SequentialSampler'
77 | kwargs:
78 | kwargs:
79 | batch_size: 1
80 | num_workers: 16
81 | drop_last: False
82 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: &input_size [224, 224]
24 | - !import_call
25 | key: 'torchvision.transforms.ToTensor'
26 | init:
27 | - !import_call
28 | key: 'sc2bench.transforms.misc.AdaptivePad'
29 | init:
30 | kwargs:
31 | fill: 0
32 | factor: 64
33 |
34 | models:
35 | model:
36 | key: 'NeuralInputCompressionClassifier'
37 | kwargs:
38 | post_transform: !import_call
39 | key: 'torchvision.transforms.Compose'
40 | init:
41 | kwargs:
42 | transforms:
43 | - !import_call
44 | key: 'torchvision.transforms.CenterCrop'
45 | init:
46 | kwargs:
47 | size: *input_size
48 | - !import_call
49 | key: 'torchvision.transforms.Normalize'
50 | init:
51 | kwargs:
52 | mean: [0.485, 0.456, 0.406]
53 | std: [0.229, 0.224, 0.225]
54 | analysis_config:
55 | analyzes_after_compress: True
56 | analyzer_configs:
57 | - key: 'FileSizeAnalyzer'
58 | kwargs:
59 | unit: 'KB'
60 | compression_model:
61 | key: 'mbt2018_mean'
62 | kwargs:
63 | pretrained: True
64 | quality: 8
65 | metric: 'mse'
66 | classification_model:
67 | key: 'resnet50'
68 | _weights: &model_weights_enum !import_get
69 | key: 'torchvision.models.resnet.ResNet50_Weights'
70 | kwargs:
71 | num_classes: 1000
72 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
73 |
74 | test:
75 | test_data_loader:
76 | dataset_id: *imagenet_val
77 | sampler:
78 | class_or_func: !import_get
79 | key: 'torch.utils.data.SequentialSampler'
80 | kwargs:
81 | kwargs:
82 | batch_size: 1
83 | num_workers: 16
84 | drop_last: False
85 |
--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &pascal_val 'pascal_voc2012/val': !import_call
3 | _name: 'pascal_voc2012'
4 | _root: &root_dir '~/datasets'
5 | key: 'torchvision.datasets.VOCSegmentation'
6 | init:
7 | kwargs:
8 | root: *root_dir
9 | image_set: 'val'
10 | year: '2012'
11 | download: True
12 | transforms: !import_call
13 | key: 'custom.transform.CustomCompose'
14 | init:
15 | kwargs:
16 | transforms:
17 | - !import_call
18 | key: 'custom.transform.CustomRandomResize'
19 | init:
20 | kwargs:
21 | min_size: 513
22 | max_size: 513
23 | - !import_call
24 | key: 'sc2bench.transforms.misc.CustomToTensor'
25 | init:
26 | kwargs:
27 | converts_sample: False
28 | converts_target: True
29 |
30 | models:
31 | model:
32 | key: 'CodecInputCompressionSegmentationModel'
33 | kwargs:
34 | codec_encoder_decoder: !import_call
35 | key: 'torchvision.transforms.Compose'
36 | init:
37 | kwargs:
38 | transforms:
39 | - !import_call
40 | key: 'sc2bench.transforms.codec.PILImageModule'
41 | init:
42 | kwargs:
43 | format: 'JPEG'
44 | quality: 90
45 | returns_file_size: True
46 | analysis_config:
47 | analyzer_configs:
48 | - key: 'FileSizeAccumulator'
49 | kwargs:
50 | unit: 'KB'
51 | post_transform: !import_call
52 | key: 'torchvision.transforms.Compose'
53 | init:
54 | kwargs:
55 | transforms:
56 | - !import_call
57 | key: 'torchvision.transforms.ToTensor'
58 | init:
59 | - !import_call
60 | key: 'torchvision.transforms.Normalize'
61 | init:
62 | kwargs:
63 | mean: [0.485, 0.456, 0.406]
64 | std: [0.229, 0.224, 0.225]
65 | segmentation_model:
66 | key: 'deeplabv3_resnet50'
67 | kwargs:
68 | pretrained: True
69 | num_classes: 21
70 | aux_loss: True
71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
72 |
73 | test:
74 | test_data_loader:
75 | dataset_id: *pascal_val
76 | sampler:
77 | class_or_func: !import_get
78 | key: 'torch.utils.data.SequentialSampler'
79 | kwargs:
80 | collate_fn: 'pascal_seg_eval_collate_fn'
81 | kwargs:
82 | batch_size: 1
83 | num_workers: 16
84 |
--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &pascal_val 'pascal_voc2012/val': !import_call
3 | _name: 'pascal_voc2012'
4 | _root: &root_dir '~/datasets'
5 | key: 'torchvision.datasets.VOCSegmentation'
6 | init:
7 | kwargs:
8 | root: *root_dir
9 | image_set: 'val'
10 | year: '2012'
11 | download: True
12 | transforms: !import_call
13 | key: 'custom.transform.CustomCompose'
14 | init:
15 | kwargs:
16 | transforms:
17 | - !import_call
18 | key: 'custom.transform.CustomRandomResize'
19 | init:
20 | kwargs:
21 | min_size: 513
22 | max_size: 513
23 | - !import_call
24 | key: 'sc2bench.transforms.misc.CustomToTensor'
25 | init:
26 | kwargs:
27 | converts_sample: False
28 | converts_target: True
29 |
30 | models:
31 | model:
32 | key: 'CodecInputCompressionSegmentationModel'
33 | kwargs:
34 | codec_encoder_decoder: !import_call
35 | key: 'torchvision.transforms.Compose'
36 | init:
37 | kwargs:
38 | transforms:
39 | - !import_call
40 | key: 'sc2bench.transforms.codec.PILImageModule'
41 | init:
42 | kwargs:
43 | format: 'WEBP'
44 | quality: 90
45 | returns_file_size: True
46 | analysis_config:
47 | analyzer_configs:
48 | - key: 'FileSizeAccumulator'
49 | kwargs:
50 | unit: 'KB'
51 | post_transform: !import_call
52 | key: 'torchvision.transforms.Compose'
53 | init:
54 | kwargs:
55 | transforms:
56 | - !import_call
57 | key: 'torchvision.transforms.ToTensor'
58 | init:
59 | - !import_call
60 | key: 'torchvision.transforms.Normalize'
61 | init:
62 | kwargs:
63 | mean: [0.485, 0.456, 0.406]
64 | std: [0.229, 0.224, 0.225]
65 | segmentation_model:
66 | key: 'deeplabv3_resnet101'
67 | kwargs:
68 | pretrained: True
69 | num_classes: 21
70 | aux_loss: True
71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
72 |
73 | test:
74 | test_data_loader:
75 | dataset_id: *pascal_val
76 | sampler:
77 | class_or_func: !import_get
78 | key: 'torch.utils.data.SequentialSampler'
79 | kwargs:
80 | collate_fn: 'pascal_seg_eval_collate_fn'
81 | kwargs:
82 | batch_size: 1
83 | num_workers: 16
84 |
--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &pascal_val 'pascal_voc2012/val': !import_call
3 | _name: 'pascal_voc2012'
4 | _root: &root_dir '~/datasets'
5 | key: 'torchvision.datasets.VOCSegmentation'
6 | init:
7 | kwargs:
8 | root: *root_dir
9 | image_set: 'val'
10 | year: '2012'
11 | download: True
12 | transforms: !import_call
13 | key: 'custom.transform.CustomCompose'
14 | init:
15 | kwargs:
16 | transforms:
17 | - !import_call
18 | key: 'custom.transform.CustomRandomResize'
19 | init:
20 | kwargs:
21 | min_size: 513
22 | max_size: 513
23 | - !import_call
24 | key: 'sc2bench.transforms.misc.CustomToTensor'
25 | init:
26 | kwargs:
27 | converts_sample: False
28 | converts_target: True
29 |
30 | models:
31 | model:
32 | key: 'CodecInputCompressionSegmentationModel'
33 | kwargs:
34 | codec_encoder_decoder: !import_call
35 | key: 'torchvision.transforms.Compose'
36 | init:
37 | kwargs:
38 | transforms:
39 | - !import_call
40 | key: 'sc2bench.transforms.codec.PILImageModule'
41 | init:
42 | kwargs:
43 | format: 'WEBP'
44 | quality: 90
45 | returns_file_size: True
46 | analysis_config:
47 | analyzer_configs:
48 | - key: 'FileSizeAccumulator'
49 | kwargs:
50 | unit: 'KB'
51 | post_transform: !import_call
52 | key: 'torchvision.transforms.Compose'
53 | init:
54 | kwargs:
55 | transforms:
56 | - !import_call
57 | key: 'torchvision.transforms.ToTensor'
58 | init:
59 | - !import_call
60 | key: 'torchvision.transforms.Normalize'
61 | init:
62 | kwargs:
63 | mean: [0.485, 0.456, 0.406]
64 | std: [0.229, 0.224, 0.225]
65 | segmentation_model:
66 | key: 'deeplabv3_resnet50'
67 | kwargs:
68 | pretrained: True
69 | num_classes: 21
70 | aux_loss: True
71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
72 |
73 | test:
74 | test_data_loader:
75 | dataset_id: *pascal_val
76 | sampler:
77 | class_or_func: !import_get
78 | key: 'torch.utils.data.SequentialSampler'
79 | kwargs:
80 | collate_fn: 'pascal_seg_eval_collate_fn'
81 | kwargs:
82 | batch_size: 1
83 | num_workers: 16
84 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/factorized_prior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: &input_size [224, 224]
24 | - !import_call
25 | key: 'torchvision.transforms.ToTensor'
26 | init:
27 | - !import_call
28 | key: 'sc2bench.transforms.misc.AdaptivePad'
29 | init:
30 | kwargs:
31 | fill: 0
32 | factor: 64
33 |
34 | models:
35 | model:
36 | key: 'NeuralInputCompressionClassifier'
37 | kwargs:
38 | post_transform: !import_call
39 | key: 'torchvision.transforms.Compose'
40 | init:
41 | kwargs:
42 | transforms:
43 | - !import_call
44 | key: 'torchvision.transforms.CenterCrop'
45 | init:
46 | kwargs:
47 | size: *input_size
48 | - !import_call
49 | key: 'torchvision.transforms.Normalize'
50 | init:
51 | kwargs:
52 | mean: [0.485, 0.456, 0.406]
53 | std: [0.229, 0.224, 0.225]
54 | analysis_config:
55 | analyzes_after_compress: True
56 | analyzer_configs:
57 | - key: 'FileSizeAnalyzer'
58 | kwargs:
59 | unit: 'KB'
60 | compression_model:
61 | key: 'bmshj2018_factorized'
62 | kwargs:
63 | pretrained: True
64 | quality: 8
65 | metric: 'mse'
66 | classification_model:
67 | key: 'resnet50'
68 | _weights: &model_weights_enum !import_get
69 | key: 'torchvision.models.resnet.ResNet50_Weights'
70 | kwargs:
71 | num_classes: 1000
72 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
73 |
74 | test:
75 | test_data_loader:
76 | dataset_id: *imagenet_val
77 | sampler:
78 | class_or_func: !import_get
79 | key: 'torch.utils.data.SequentialSampler'
80 | kwargs:
81 | kwargs:
82 | batch_size: 1
83 | num_workers: 16
84 | drop_last: False
85 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: &input_size [224, 224]
24 | - !import_call
25 | key: 'torchvision.transforms.ToTensor'
26 | init:
27 | - !import_call
28 | key: 'sc2bench.transforms.misc.AdaptivePad'
29 | init:
30 | kwargs:
31 | fill: 0
32 | factor: 64
33 |
34 | models:
35 | model:
36 | key: 'NeuralInputCompressionClassifier'
37 | kwargs:
38 | post_transform: !import_call
39 | key: 'torchvision.transforms.Compose'
40 | init:
41 | kwargs:
42 | transforms:
43 | - !import_call
44 | key: 'torchvision.transforms.CenterCrop'
45 | init:
46 | kwargs:
47 | size: *input_size
48 | - !import_call
49 | key: 'torchvision.transforms.Normalize'
50 | init:
51 | kwargs:
52 | mean: [0.485, 0.456, 0.406]
53 | std: [0.229, 0.224, 0.225]
54 | analysis_config:
55 | analyzes_after_compress: True
56 | analyzer_configs:
57 | - key: 'FileSizeAnalyzer'
58 | kwargs:
59 | unit: 'KB'
60 | compression_model:
61 | key: 'bmshj2018_hyperprior'
62 | kwargs:
63 | pretrained: True
64 | quality: 8
65 | metric: 'mse'
66 | classification_model:
67 | key: 'resnet50'
68 | _weights: &model_weights_enum !import_get
69 | key: 'torchvision.models.resnet.ResNet50_Weights'
70 | kwargs:
71 | num_classes: 1000
72 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
73 |
74 | test:
75 | test_data_loader:
76 | dataset_id: *imagenet_val
77 | sampler:
78 | class_or_func: !import_get
79 | key: 'torch.utils.data.SequentialSampler'
80 | kwargs:
81 | kwargs:
82 | batch_size: 1
83 | num_workers: 16
84 | drop_last: False
85 |
--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &pascal_val 'pascal_voc2012/val': !import_call
3 | _name: 'pascal_voc2012'
4 | _root: &root_dir '~/datasets'
5 | key: 'torchvision.datasets.VOCSegmentation'
6 | init:
7 | kwargs:
8 | root: *root_dir
9 | image_set: 'val'
10 | year: '2012'
11 | download: True
12 | transforms: !import_call
13 | key: 'custom.transform.CustomCompose'
14 | init:
15 | kwargs:
16 | transforms:
17 | - !import_call
18 | key: 'custom.transform.CustomRandomResize'
19 | init:
20 | kwargs:
21 | min_size: 513
22 | max_size: 513
23 | - !import_call
24 | key: 'sc2bench.transforms.misc.CustomToTensor'
25 | init:
26 | kwargs:
27 | converts_sample: False
28 | converts_target: True
29 |
30 | models:
31 | model:
32 | key: 'CodecInputCompressionSegmentationModel'
33 | kwargs:
34 | codec_encoder_decoder: !import_call
35 | key: 'torchvision.transforms.Compose'
36 | init:
37 | kwargs:
38 | transforms:
39 | - !import_call
40 | key: 'sc2bench.transforms.codec.PILImageModule'
41 | init:
42 | kwargs:
43 | format: 'JPEG'
44 | quality: 90
45 | returns_file_size: True
46 | analysis_config:
47 | analyzer_configs:
48 | - key: 'FileSizeAccumulator'
49 | kwargs:
50 | unit: 'KB'
51 | post_transform: !import_call
52 | key: 'torchvision.transforms.Compose'
53 | init:
54 | kwargs:
55 | transforms:
56 | - !import_call
57 | key: 'torchvision.transforms.ToTensor'
58 | init:
59 | - !import_call
60 | key: 'torchvision.transforms.Normalize'
61 | init:
62 | kwargs:
63 | mean: [0.485, 0.456, 0.406]
64 | std: [0.229, 0.224, 0.225]
65 | segmentation_model:
66 | key: 'deeplabv3_resnet101'
67 | kwargs:
68 | pretrained: False
69 | num_classes: 21
70 | aux_loss: True
71 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
72 |
73 | test:
74 | test_data_loader:
75 | dataset_id: *pascal_val
76 | sampler:
77 | class_or_func: !import_get
78 | key: 'torch.utils.data.SequentialSampler'
79 | kwargs:
80 | collate_fn: 'pascal_seg_eval_collate_fn'
81 | kwargs:
82 | batch_size: 1
83 | num_workers: 16
84 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/joint_autoregressive_hierarchical_prior-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [*root_dir, '/val']
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: &input_size [224, 224]
24 | - !import_call
25 | key: 'torchvision.transforms.ToTensor'
26 | init:
27 | - !import_call
28 | key: 'sc2bench.transforms.misc.AdaptivePad'
29 | init:
30 | kwargs:
31 | fill: 0
32 | factor: 64
33 |
34 | models:
35 | model:
36 | key: 'NeuralInputCompressionClassifier'
37 | kwargs:
38 | post_transform: !import_call
39 | key: 'torchvision.transforms.Compose'
40 | init:
41 | kwargs:
42 | transforms:
43 | - !import_call
44 | key: 'torchvision.transforms.CenterCrop'
45 | init:
46 | kwargs:
47 | size: *input_size
48 | - !import_call
49 | key: 'torchvision.transforms.Normalize'
50 | init:
51 | kwargs:
52 | mean: [0.485, 0.456, 0.406]
53 | std: [0.229, 0.224, 0.225]
54 | analysis_config:
55 | analyzes_after_compress: True
56 | analyzer_configs:
57 | - key: 'FileSizeAnalyzer'
58 | kwargs:
59 | unit: 'KB'
60 | uses_cpu4compression_model: True
61 | compression_model:
62 | key: 'mbt2018'
63 | kwargs:
64 | pretrained: True
65 | quality: 8
66 | metric: 'mse'
67 | classification_model:
68 | key: 'resnet50'
69 | _weights: &model_weights_enum !import_get
70 | key: 'torchvision.models.resnet.ResNet50_Weights'
71 | kwargs:
72 | num_classes: 1000
73 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
74 |
75 | test:
76 | test_data_loader:
77 | dataset_id: *imagenet_val
78 | sampler:
79 | class_or_func: !import_get
80 | key: 'torch.utils.data.SequentialSampler'
81 | kwargs:
82 | kwargs:
83 | batch_size: 1
84 | num_workers: 16
85 | drop_last: False
86 |
--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/vtm-resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &imagenet_val ilsvrc2012/val: !import_call
3 | _name: &dataset_name 'ilsvrc2012'
4 | _root: &root_dir !join ['~/datasets/', *dataset_name]
5 | key: 'torchvision.datasets.ImageFolder'
6 | init:
7 | kwargs:
8 | root: !join [ *root_dir, '/val' ]
9 | transform: !import_call
10 | key: 'torchvision.transforms.Compose'
11 | init:
12 | kwargs:
13 | transforms:
14 | - !import_call
15 | key: 'torchvision.transforms.Resize'
16 | init:
17 | kwargs:
18 | size: 256
19 | - !import_call
20 | key: 'torchvision.transforms.CenterCrop'
21 | init:
22 | kwargs:
23 | size: [224, 224]
24 |
25 | models:
26 | model:
27 | key: 'CodecInputCompressionClassifier'
28 | kwargs:
29 | codec_encoder_decoder: !import_call
30 | key: 'torchvision.transforms.Compose'
31 | init:
32 | kwargs:
33 | transforms:
34 | - !import_call
35 | key: 'sc2bench.transforms.codec.VTMModule'
36 | init:
37 | kwargs:
38 | encoder_path: '~/software/VVCSoftware_VTM/bin/EncoderAppStatic'
39 | decoder_path: '~/software/VVCSoftware_VTM/bin/DecoderAppStatic'
40 | config_path: '~/software/VVCSoftware_VTM/cfg/encoder_intra_vtm.cfg'
41 | color_mode: 'ycbcr'
42 | quality: 63
43 | returns_file_size: True
44 | post_transform: !import_call
45 | key: 'torchvision.transforms.Compose'
46 | init:
47 | kwargs:
48 | transforms:
49 | - !import_call
50 | key: 'torchvision.transforms.ToTensor'
51 | init:
52 | - !import_call
53 | key: 'torchvision.transforms.Normalize'
54 | init:
55 | kwargs:
56 | mean: [0.485, 0.456, 0.406]
57 | std: [0.229, 0.224, 0.225]
58 | analysis_config:
59 | analyzer_configs:
60 | - key: 'FileSizeAccumulator'
61 | kwargs:
62 | unit: 'KB'
63 | classification_model:
64 | key: 'resnet50'
65 | _weights: &model_weights_enum !import_get
66 | key: 'torchvision.models.resnet.ResNet50_Weights'
67 | kwargs:
68 | num_classes: 1000
69 | weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
70 |
71 | test:
72 | test_data_loader:
73 | dataset_id: *imagenet_val
74 | sampler:
75 | class_or_func: !import_get
76 | key: 'torch.utils.data.SequentialSampler'
77 | kwargs:
78 | kwargs:
79 | batch_size: 1
80 | num_workers: 16
81 | drop_last: False
82 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import sys
14 | import pathlib
15 |
16 | sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix())
17 |
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = 'SC2 Benchmark'
22 | copyright = '2023, Yoshitomo Matsubara'
23 | author = 'Yoshitomo Matsubara'
24 |
25 | # The full version, including alpha/beta/rc tags
26 | import sc2bench
27 | version = 'v' + sc2bench.__version__
28 | release = version
29 |
30 |
31 | # -- General configuration ---------------------------------------------------
32 |
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35 | # ones.
36 | extensions = [
37 | 'sphinx.ext.autodoc',
38 | 'sphinx.ext.autosummary',
39 | 'sphinx.ext.doctest',
40 | 'sphinx.ext.viewcode',
41 | 'sphinx_rtd_theme',
42 | 'sphinxcontrib.youtube'
43 | ]
44 | autodoc_member_order = 'bysource'
45 | highlight_language = 'python'
46 |
47 | html_show_sourcelink = False
48 | html_context = {
49 | 'display_github': True,
50 | 'github_user': 'yoshitomo-matsubara',
51 | 'github_repo': 'sc2-benchmark',
52 | 'github_version': 'main',
53 | 'conf_py_path': '/docs/source/'
54 | }
55 |
56 | import sphinx_rtd_theme
57 | html_theme = 'sphinx_rtd_theme'
58 |
59 | html_theme_options = {
60 | 'analytics_id': 'G-39T9X4DN85',
61 | 'display_version': True,
62 | 'style_external_links': True
63 | }
64 |
65 | # Add any paths that contain templates here, relative to this directory.
66 | templates_path = ['_templates']
67 |
68 | # List of patterns, relative to source directory, that match files and
69 | # directories to ignore when looking for source files.
70 | # This pattern also affects html_static_path and html_extra_path.
71 | exclude_patterns = []
72 |
73 |
74 | # -- Options for HTML output -------------------------------------------------
75 |
76 | # The theme to use for HTML and HTML Help pages. See the documentation for
77 | # a list of builtin themes.
78 | #
79 | html_theme = 'sphinx_rtd_theme'
80 |
81 | # Add any paths that contain custom static files (such as style sheets) here,
82 | # relative to this directory. They are copied after the builtin static files,
83 | # so a file named "default.css" will overwrite the builtin "default.css".
84 | html_static_path = ['_static']
85 |
86 |
--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &pascal_val 'pascal_voc2012/val': !import_call
3 | _name: 'pascal_voc2012'
4 | _root: &root_dir '~/datasets'
5 | key: 'torchvision.datasets.VOCSegmentation'
6 | init:
7 | kwargs:
8 | root: *root_dir
9 | image_set: 'val'
10 | year: '2012'
11 | download: True
12 | transforms: !import_call
13 | key: 'custom.transform.CustomCompose'
14 | init:
15 | kwargs:
16 | transforms:
17 | - !import_call
18 | key: 'custom.transform.CustomRandomResize'
19 | init:
20 | kwargs:
21 | min_size: 513
22 | max_size: 513
23 | - !import_call
24 | key: 'sc2bench.transforms.misc.CustomToTensor'
25 | init:
26 | kwargs:
27 | converts_sample: False
28 | converts_target: True
29 |
30 | models:
31 | model:
32 | key: 'CodecInputCompressionSegmentationModel'
33 | kwargs:
34 | codec_encoder_decoder: !import_call
35 | key: 'torchvision.transforms.Compose'
36 | init:
37 | kwargs:
38 | transforms:
39 | - !import_call
40 | key: 'sc2bench.transforms.codec.BPGModule'
41 | init:
42 | kwargs:
43 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
44 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
45 | quality: 50
46 | returns_file_size: True
47 | analysis_config:
48 | analyzer_configs:
49 | - key: 'FileSizeAccumulator'
50 | kwargs:
51 | unit: 'KB'
52 | post_transform: !import_call
53 | key: 'torchvision.transforms.Compose'
54 | init:
55 | kwargs:
56 | transforms:
57 | - !import_call
58 | key: 'torchvision.transforms.ToTensor'
59 | init:
60 | - !import_call
61 | key: 'torchvision.transforms.Normalize'
62 | init:
63 | kwargs:
64 | mean: [0.485, 0.456, 0.406]
65 | std: [0.229, 0.224, 0.225]
66 | segmentation_model:
67 | key: 'deeplabv3_resnet50'
68 | kwargs:
69 | pretrained: True
70 | num_classes: 21
71 | aux_loss: True
72 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
73 |
74 | test:
75 | test_data_loader:
76 | dataset_id: *pascal_val
77 | sampler:
78 | class_or_func: !import_get
79 | key: 'torch.utils.data.SequentialSampler'
80 | kwargs:
81 | collate_fn: 'pascal_seg_eval_collate_fn'
82 | kwargs:
83 | batch_size: 1
84 | num_workers: 16
85 |
--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
1 | datasets:
2 | &pascal_val 'pascal_voc2012/val': !import_call
3 | _name: 'pascal_voc2012'
4 | _root: &root_dir '~/datasets'
5 | key: 'torchvision.datasets.VOCSegmentation'
6 | init:
7 | kwargs:
8 | root: *root_dir
9 | image_set: 'val'
10 | year: '2012'
11 | download: True
12 | transforms: !import_call
13 | key: 'custom.transform.CustomCompose'
14 | init:
15 | kwargs:
16 | transforms:
17 | - !import_call
18 | key: 'custom.transform.CustomRandomResize'
19 | init:
20 | kwargs:
21 | min_size: 513
22 | max_size: 513
23 | - !import_call
24 | key: 'sc2bench.transforms.misc.CustomToTensor'
25 | init:
26 | kwargs:
27 | converts_sample: False
28 | converts_target: True
29 |
30 | models:
31 | model:
32 | key: 'CodecInputCompressionSegmentationModel'
33 | kwargs:
34 | codec_encoder_decoder: !import_call
35 | key: 'torchvision.transforms.Compose'
36 | init:
37 | kwargs:
38 | transforms:
39 | - !import_call
40 | key: 'sc2bench.transforms.codec.BPGModule'
41 | init:
42 | kwargs:
43 | encoder_path: '~/software/libbpg-0.9.8/bpgenc'
44 | decoder_path: '~/software/libbpg-0.9.8/bpgdec'
45 | quality: 50
46 | returns_file_size: True
47 | analysis_config:
48 | analyzer_configs:
49 | - key: 'FileSizeAccumulator'
50 | kwargs:
51 | unit: 'KB'
52 | post_transform: !import_call
53 | key: 'torchvision.transforms.Compose'
54 | init:
55 | kwargs:
56 | transforms:
57 | - !import_call
58 | key: 'torchvision.transforms.ToTensor'
59 | init:
60 | - !import_call
61 | key: 'torchvision.transforms.Normalize'
62 | init:
63 | kwargs:
64 | mean: [0.485, 0.456, 0.406]
65 | std: [0.229, 0.224, 0.225]
66 | segmentation_model:
67 | key: 'deeplabv3_resnet101'
68 | kwargs:
69 | pretrained: True
70 | num_classes: 21
71 | aux_loss: True
72 | src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
73 |
74 | test:
75 | test_data_loader:
76 | dataset_id: *pascal_val
77 | sampler:
78 | class_or_func: !import_get
79 | key: 'torch.utils.data.SequentialSampler'
80 | kwargs:
81 | collate_fn: 'pascal_seg_eval_collate_fn'
82 | kwargs:
83 | batch_size: 1
84 | num_workers: 16
85 |
--------------------------------------------------------------------------------