├── sc2bench
    ├── common
    │   ├── __init__.py
    │   └── config_util.py
    ├── __init__.py
    ├── models
    │   ├── detection
    │   │   └── __init__.py
    │   ├── segmentation
    │   │   └── __init__.py
    │   └── __init__.py
    ├── transforms
    │   ├── __init__.py
    │   └── collator.py
    └── loss.py
├── script
    ├── task
    │   ├── coco
    │   │   └── __init__.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── dataset.py
    │   │   └── eval.py
    │   └── custom
    │   │   ├── __init__.py
    │   │   └── collator.py
    ├── software
    │   ├── install_vtm.sh
    │   └── install_bpg.sh
    ├── neural_input_compression
    │   ├── coco2017-object_detection.sh
    │   ├── ilsvrc2012-image_classification.sh
    │   ├── pascal_voc2012-semantic_segmentation.sh
    │   └── README.md
    ├── codec_input_compression
    │   ├── coco2017-object_detection.sh
    │   ├── ilsvrc2012-image_classification.sh
    │   ├── pascal_voc2012-semantic_segmentation.sh
    │   └── README.md
    └── README.md
├── .gitignore
├── MANIFEST.in
├── imgs
    ├── ilsvrc2012-overview.png
    └── input_vs_supervised_compression.png
├── legacy
    ├── README.md
    ├── script
    │   ├── software
    │   │   ├── install_vtm.sh
    │   │   └── install_bpg.sh
    │   ├── neural_input_compression
    │   │   ├── coco2017-object_detection.sh
    │   │   ├── ilsvrc2012-image_classification.sh
    │   │   ├── pascal_voc2012-semantic_segmentation.sh
    │   │   └── README.md
    │   ├── codec_input_compression
    │   │   ├── coco2017-object_detection.sh
    │   │   ├── ilsvrc2012-image_classification.sh
    │   │   ├── pascal_voc2012-semantic_segmentation.sh
    │   │   └── README.md
    │   └── README.md
    └── configs
    │   ├── coco2017
    │       └── input_compression
    │       │   ├── jpeg-faster_rcnn_resnet50_fpn.yaml
    │       │   ├── webp-faster_rcnn_resnet50_fpn.yaml
    │       │   ├── bpg-faster_rcnn_resnet50_fpn.yaml
    │       │   ├── factorized_prior-faster_rcnn_resnet50_fpn.yaml
    │       │   ├── mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
    │       │   ├── scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
    │       │   └── joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml
    │   ├── ilsvrc2012
    │       ├── input_compression
    │       │   ├── jpeg-tf_efficientnet_l2_ns.yaml
    │       │   ├── jpeg-tf_efficientnet_l2_ns_475.yaml
    │       │   ├── jpeg-resnet50.yaml
    │       │   ├── webp-resnet50.yaml
    │       │   ├── jpeg-resnet101.yaml
    │       │   ├── jpeg-resnet152.yaml
    │       │   ├── webp-resnet101.yaml
    │       │   ├── webp-resnet152.yaml
    │       │   ├── bpg-resnet50.yaml
    │       │   ├── bpg-resnet101.yaml
    │       │   ├── bpg-resnet152.yaml
    │       │   ├── vtm-resnet50.yaml
    │       │   ├── factorized_prior-resnet50.yaml
    │       │   ├── scale_hyperprior-resnet50.yaml
    │       │   ├── mean_scale_hyperprior-resnet50.yaml
    │       │   ├── joint_autoregressive_hierarchical_prior-resnet50.yaml
    │       │   ├── factorized_prior-tf_efficientnet_l2_ns.yaml
    │       │   ├── mean_scale_hyperprior-tf_efficientnet_l2_ns.yaml
    │       │   ├── scale_hyperprior-tf_efficientnet_l2_ns.yaml
    │       │   ├── scale_hyperprior-tf_efficientnet_l2_ns_475.yaml
    │       │   ├── factorized_prior-tf_efficientnet_l2_ns_475.yaml
    │       │   └── mean_scale_hyperprior-tf_efficientnet_l2_ns_475.yaml
    │       └── feature_compression
    │       │   ├── jpeg-resnet50.yaml
    │       │   └── webp-resnet50.yaml
    │   └── pascal_voc2012
    │       └── input_compression
    │           ├── jpeg-deeplabv3_resnet50.yaml
    │           ├── webp-deeplabv3_resnet50.yaml
    │           ├── jpeg-deeplabv3_resnet101.yaml
    │           ├── webp-deeplabv3_resnet101.yaml
    │           ├── bpg-deeplabv3_resnet50.yaml
    │           ├── bpg-deeplabv3_resnet101.yaml
    │           ├── factorized_prior-deeplabv3_resnet50.yaml
    │           ├── scale_hyperprior-deeplabv3_resnet50.yaml
    │           ├── factorized_prior-deeplabv3_resnet101.yaml
    │           ├── mean_scale_hyperprior-deeplabv3_resnet101.yaml
    │           ├── mean_scale_hyperprior-deeplabv3_resnet50.yaml
    │           ├── scale_hyperprior-deeplabv3_resnet101.yaml
    │           ├── joint_autoregressive_hierarchical_prior-deeplabv3_resnet101.yaml
    │           └── joint_autoregressive_hierarchical_prior-deeplabv3_resnet50.yaml
├── setup.cfg
├── docs
    └── source
    │   ├── subpkgs
    │       ├── loss.rst
    │       ├── analysis.rst
    │       ├── common.rst
    │       ├── transform.rst
    │       └── models.rst
    │   ├── package.rst
    │   ├── usage.rst
    │   └── conf.py
├── CITATION.bib
├── Pipfile
├── .github
    ├── workflows
    │   ├── documentation.yaml
    │   └── python-publish.yml
    └── ISSUE_TEMPLATE
    │   └── bug-report--not-question-.md
├── setup.py
├── LICENSE
└── configs
    ├── coco2017
        └── input_compression
        │   ├── mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
        │   ├── factorized_prior-faster_rcnn_resnet50_fpn.yaml
        │   ├── scale_hyperprior-faster_rcnn_resnet50_fpn.yaml
        │   ├── joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml
        │   ├── jpeg-faster_rcnn_resnet50_fpn.yaml
        │   ├── webp-faster_rcnn_resnet50_fpn.yaml
        │   └── bpg-faster_rcnn_resnet50_fpn.yaml
    ├── ilsvrc2012
        ├── input_compression
        │   ├── jpeg-resnet50.yaml
        │   ├── webp-resnet50.yaml
        │   ├── jpeg-resnet101.yaml
        │   ├── jpeg-resnet152.yaml
        │   ├── webp-resnet101.yaml
        │   ├── webp-resnet152.yaml
        │   ├── bpg-resnet50.yaml
        │   ├── bpg-resnet101.yaml
        │   ├── bpg-resnet152.yaml
        │   ├── jpeg-tf_efficientnet_l2_ns.yaml
        │   ├── jpeg-tf_efficientnet_l2_ns_475.yaml
        │   ├── mean_scale_hyperprior-resnet50.yaml
        │   ├── factorized_prior-resnet50.yaml
        │   ├── scale_hyperprior-resnet50.yaml
        │   ├── joint_autoregressive_hierarchical_prior-resnet50.yaml
        │   └── vtm-resnet50.yaml
        └── feature_compression
        │   ├── jpeg-resnet50.yaml
        │   └── webp-resnet50.yaml
    └── pascal_voc2012
        └── input_compression
            ├── jpeg-deeplabv3_resnet50.yaml
            ├── webp-deeplabv3_resnet101.yaml
            ├── webp-deeplabv3_resnet50.yaml
            ├── jpeg-deeplabv3_resnet101.yaml
            ├── bpg-deeplabv3_resnet50.yaml
            └── bpg-deeplabv3_resnet101.yaml


/sc2bench/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sc2bench/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.1-dev'
2 | 


--------------------------------------------------------------------------------
/sc2bench/models/detection/__init__.py:
--------------------------------------------------------------------------------
1 | from . import rcnn
2 | 


--------------------------------------------------------------------------------
/script/task/coco/__init__.py:
--------------------------------------------------------------------------------
1 | from . import dataset, eval
2 | 


--------------------------------------------------------------------------------
/script/task/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import eval, dataset
2 | 


--------------------------------------------------------------------------------
/sc2bench/models/segmentation/__init__.py:
--------------------------------------------------------------------------------
1 | from . import deeplabv3
2 | 


--------------------------------------------------------------------------------
/script/task/custom/__init__.py:
--------------------------------------------------------------------------------
1 | from . import collator, sampler, transform
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | .ipynb_checkpoints/
3 | __pycache__/
4 | .editorconfig
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include LICENSE
3 | 
4 | recursive-exclude * __pycache__


--------------------------------------------------------------------------------
/imgs/ilsvrc2012-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yoshitomo-matsubara/sc2-benchmark/HEAD/imgs/ilsvrc2012-overview.png


--------------------------------------------------------------------------------
/legacy/README.md:
--------------------------------------------------------------------------------
1 | # ***legacy/***
2 | The configurations and scripts in `legacy/` are designed for sc2bench <= v0.0.4 and torchdistill <= v0.3.3.
3 | 


--------------------------------------------------------------------------------
/imgs/input_vs_supervised_compression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yoshitomo-matsubara/sc2-benchmark/HEAD/imgs/input_vs_supervised_compression.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | license = MIT
 3 | license_files = LICENSE
 4 | 
 5 | [pep8]
 6 | max-line-length = 120
 7 | 
 8 | [flake8]
 9 | max-line-length = 120
10 | exclude = venv
11 | 


--------------------------------------------------------------------------------
/docs/source/subpkgs/loss.rst:
--------------------------------------------------------------------------------
 1 | sc2bench.loss
 2 | =====
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: Contents:
 8 | 
 9 | .. automodule:: sc2bench.loss
10 |    :members:
11 | 


--------------------------------------------------------------------------------
/docs/source/subpkgs/analysis.rst:
--------------------------------------------------------------------------------
 1 | sc2bench.analysis
 2 | =====
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: Contents:
 8 | 
 9 | .. automodule:: sc2bench.analysis
10 |    :members:
11 | 


--------------------------------------------------------------------------------
/script/task/utils/dataset.py:
--------------------------------------------------------------------------------
1 | import math
2 | 
3 | 
4 | def get_num_iterations(dataset, batch_size, world_size):
5 |     num_iterations = math.ceil(len(dataset) / batch_size / world_size)
6 |     return num_iterations
7 | 


--------------------------------------------------------------------------------
/docs/source/package.rst:
--------------------------------------------------------------------------------
 1 | sc2bench API
 2 | =====
 3 | 
 4 | 
 5 | .. toctree::
 6 |    subpkgs/models
 7 |    subpkgs/transform
 8 |    subpkgs/common
 9 |    subpkgs/loss
10 |    subpkgs/analysis
11 |    :maxdepth: 2
12 |    :caption: Overview


--------------------------------------------------------------------------------
/script/software/install_vtm.sh:
--------------------------------------------------------------------------------
1 | mkdir ~/software
2 | cd ~/software
3 | git clone https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM.git
4 | cd VVCSoftware_VTM
5 | mkdir build
6 | cd build
7 | cmake .. -DCMAKE_BUILD_TYPE=Release
8 | make -j
9 | 


--------------------------------------------------------------------------------
/legacy/script/software/install_vtm.sh:
--------------------------------------------------------------------------------
1 | mkdir ~/software
2 | cd ~/software
3 | git clone https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM.git
4 | cd VVCSoftware_VTM
5 | mkdir build
6 | cd build
7 | cmake .. -DCMAKE_BUILD_TYPE=Release
8 | make -j
9 | 


--------------------------------------------------------------------------------
/docs/source/subpkgs/common.rst:
--------------------------------------------------------------------------------
 1 | sc2bench.common
 2 | =====
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: Contents:
 8 | 
 9 | ----
10 | 
11 | sc2bench.common.config_util
12 | ------------
13 | 
14 | .. automodule:: sc2bench.common.config_util
15 |    :members:
16 | 


--------------------------------------------------------------------------------
/sc2bench/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from . import collator
2 | from .codec import CODEC_TRANSFORM_MODULE_DICT
3 | from .misc import MISC_TRANSFORM_MODULE_DICT
4 | 
5 | TRANSFORM_MODULE_DICT = dict()
6 | TRANSFORM_MODULE_DICT.update(CODEC_TRANSFORM_MODULE_DICT)
7 | TRANSFORM_MODULE_DICT.update(MISC_TRANSFORM_MODULE_DICT)
8 | 


--------------------------------------------------------------------------------
/CITATION.bib:
--------------------------------------------------------------------------------
1 | @article{matsubara2023sc2,
2 |   title={{SC2 Benchmark: Supervised Compression for Split Computing}},
3 |   author={Matsubara, Yoshitomo and Yang, Ruihan and Levorato, Marco and Mandt, Stephan},
4 |   journal={Transactions on Machine Learning Research},
5 |   issn={2835-8856},
6 |   year={2023},
7 |   url={https://openreview.net/forum?id=p28wv4G65d}
8 | }


--------------------------------------------------------------------------------
/script/software/install_bpg.sh:
--------------------------------------------------------------------------------
 1 | mkdir ~/software
 2 | cd ~/software
 3 | wget https://bellard.org/bpg/libbpg-0.9.8.tar.gz
 4 | tar -xvf libbpg-0.9.8.tar.gz
 5 | cd libbpg-0.9.8/
 6 | sudo apt-get -y install libpng-dev
 7 | sudo apt-get -y install libjpeg-dev
 8 | sudo apt-get -y install libsdl-dev
 9 | sudo apt-get -y install libsdl-image1.2-dev
10 | sudo apt-get remove libnuma-dev
11 | sudo make
12 | sudo apt-get install libnuma-dev
13 | 


--------------------------------------------------------------------------------
/legacy/script/software/install_bpg.sh:
--------------------------------------------------------------------------------
 1 | mkdir ~/software
 2 | cd ~/software
 3 | wget https://bellard.org/bpg/libbpg-0.9.8.tar.gz
 4 | tar -xvf libbpg-0.9.8.tar.gz
 5 | cd libbpg-0.9.8/
 6 | sudo apt-get -y install libpng-dev
 7 | sudo apt-get -y install libjpeg-dev
 8 | sudo apt-get -y install libsdl-dev
 9 | sudo apt-get -y install libsdl-image1.2-dev
10 | sudo apt-get remove libnuma-dev
11 | sudo make
12 | sudo apt-get install libnuma-dev
13 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | 
 8 | [packages]
 9 | torch = ">=2.0.0"
10 | torchvision = ">=0.15.1"
11 | numpy = "*"
12 | scipy = "*"
13 | cython = "*"
14 | pycocotools = ">=2.0.2"
15 | matplotlib = "*"
16 | seaborn = "*"
17 | torchdistill = ">=1.0.0"
18 | compressai = ">=1.2.3"
19 | timm = "*"
20 | sc2bench = {editable = true, path = "."}
21 | 
22 | [requires]
23 | python_version = "3.12"
24 | 


--------------------------------------------------------------------------------
/docs/source/subpkgs/transform.rst:
--------------------------------------------------------------------------------
 1 | sc2bench.transform
 2 | =====
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: Contents:
 8 | 
 9 | ----
10 | 
11 | sc2bench.transform.codec
12 | ------------
13 | 
14 | .. automodule:: sc2bench.transforms.codec
15 |    :members:
16 | 
17 | ----
18 | 
19 | sc2bench.transform.collator
20 | ------------
21 | 
22 | .. automodule:: sc2bench.transforms.collator
23 |    :members:
24 | 
25 | ----
26 | 
27 | sc2bench.transform.misc
28 | ------------
29 | 
30 | .. automodule:: sc2bench.transforms.misc
31 |    :members:
32 | 


--------------------------------------------------------------------------------
/script/neural_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | MAX_QUALITY=${2}
 3 | 
 4 | if [ $# -ne 2 ]; then
 5 |   echo "Illegal number of arguments"
 6 |   exit 2
 7 | fi
 8 | 
 9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 |   json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 |   json_str+=${quality}
13 |   json_str+='}}}}}'
14 |   python script/task/object_detection.py \
15 |     --config configs/coco2017/input_compression/${BASE_NAME}.yaml \
16 |     --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 |     --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 | 


--------------------------------------------------------------------------------
/script/neural_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | MAX_QUALITY=${2}
 3 | 
 4 | if [ $# -ne 2 ]; then
 5 |   echo "Illegal number of arguments"
 6 |   exit 2
 7 | fi
 8 | 
 9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 |   json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 |   json_str+=${quality}
13 |   json_str+='}}}}}'
14 |   python script/task/image_classification.py \
15 |     --config configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
16 |     --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 |     --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 | 


--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | MAX_QUALITY=${2}
 3 | 
 4 | if [ $# -ne 2 ]; then
 5 |   echo "Illegal number of arguments"
 6 |   exit 2
 7 | fi
 8 | 
 9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 |   json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 |   json_str+=${quality}
13 |   json_str+='}}}}}'
14 |   python legacy/script/task/object_detection.py \
15 |   --config legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml \
16 |   --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 |   --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 | 


--------------------------------------------------------------------------------
/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | MAX_QUALITY=${2}
 3 | 
 4 | if [ $# -ne 2 ]; then
 5 |   echo "Illegal number of arguments"
 6 |   exit 2
 7 | fi
 8 | 
 9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 |   json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 |   json_str+=${quality}
13 |   json_str+='}}}}}'
14 |   python script/task/semantic_segmentation.py \
15 |     --config configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
16 |     --run_log log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 |     --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 | 


--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | MAX_QUALITY=${2}
 3 | 
 4 | if [ $# -ne 2 ]; then
 5 |   echo "Illegal number of arguments"
 6 |   exit 2
 7 | fi
 8 | 
 9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 |   json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 |   json_str+=${quality}
13 |   json_str+='}}}}}'
14 |   python legacy/script/task/image_classification.py \
15 |   --config legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
16 |   --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 |   --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 | 


--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | MAX_QUALITY=${2}
 3 | 
 4 | if [ $# -ne 2 ]; then
 5 |   echo "Illegal number of arguments"
 6 |   exit 2
 7 | fi
 8 | 
 9 | for quality in $(seq 1 1 ${MAX_QUALITY});
10 | do
11 |   json_str='{"models": {"model": {"compression_model": {"params": {"quality": '
12 |   json_str+=${quality}
13 |   json_str+='}}}}}'
14 |   python legacy/script/task/semantic_segmentation.py \
15 |   --config legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
16 |   --log legacy/log/input_compression/${BASE_NAME}-quality${quality}.txt \
17 |   --json "${json_str}" -student_only -test_only -no_dp_eval
18 | done
19 | 


--------------------------------------------------------------------------------
/sc2bench/common/config_util.py:
--------------------------------------------------------------------------------
 1 | def overwrite_config(org_config, sub_config):
 2 |     """
 3 |     Overwrites a configuration.
 4 | 
 5 |     :param org_config: (nested) dictionary of configuration to be updated.
 6 |     :type org_config: dict
 7 |     :param sub_config: (nested) dictionary to be added to org_config.
 8 |     :type sub_config: dict
 9 |     """
10 |     for sub_key, sub_value in sub_config.items():
11 |         if sub_key in org_config:
12 |             if isinstance(sub_value, dict):
13 |                 overwrite_config(org_config[sub_key], sub_value)
14 |             else:
15 |                 org_config[sub_key] = sub_value
16 |         else:
17 |             org_config[sub_key] = sub_value
18 | 


--------------------------------------------------------------------------------
/sc2bench/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import registry, detection, segmentation
 2 | from .backbone import BACKBONE_CLASS_DICT, BACKBONE_FUNC_DICT
 3 | from .detection.registry import DETECTION_MODEL_CLASS_DICT, DETECTION_MODEL_FUNC_DICT
 4 | from .segmentation.registry import SEGMENTATION_MODEL_CLASS_DICT, SEGMENTATION_MODEL_FUNC_DICT
 5 | from .wrapper import WRAPPER_CLASS_DICT
 6 | 
 7 | MODEL_DICT = dict()
 8 | MODEL_DICT.update(BACKBONE_CLASS_DICT)
 9 | MODEL_DICT.update(BACKBONE_FUNC_DICT)
10 | MODEL_DICT.update(DETECTION_MODEL_CLASS_DICT)
11 | MODEL_DICT.update(DETECTION_MODEL_FUNC_DICT)
12 | MODEL_DICT.update(SEGMENTATION_MODEL_CLASS_DICT)
13 | MODEL_DICT.update(SEGMENTATION_MODEL_FUNC_DICT)
14 | MODEL_DICT.update(WRAPPER_CLASS_DICT)
15 | 


--------------------------------------------------------------------------------
/script/codec_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | FORMAT_NAME=${2}
 3 | 
 4 | if [ $# -eq 5 ]
 5 | then
 6 |   MIN_QUALITY=${3}
 7 |   STEP_SIZE=${4}
 8 |   MAX_QUALITY=${5}
 9 | else
10 |   MIN_QUALITY=10
11 |   STEP_SIZE=10
12 |   MAX_QUALITY=100
13 | fi
14 | 
15 | 
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 |   sed -i "s/quality:.*/quality: ${quality}/g" configs/coco2017/input_compression/${BASE_NAME}.yaml
19 |   python script/task/object_detection.py \
20 |     --config configs/coco2017/input_compression/${BASE_NAME}.yaml \
21 |     --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 | 
24 | sed -i "s/quality:.*/quality:/g" configs/coco2017/input_compression/${BASE_NAME}.yaml
25 | 


--------------------------------------------------------------------------------
/script/codec_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | FORMAT_NAME=${2}
 3 | 
 4 | if [ $# -eq 5 ]
 5 | then
 6 |   MIN_QUALITY=${3}
 7 |   STEP_SIZE=${4}
 8 |   MAX_QUALITY=${5}
 9 | else
10 |   MIN_QUALITY=10
11 |   STEP_SIZE=10
12 |   MAX_QUALITY=100
13 | fi
14 | 
15 | 
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 |   sed -i "s/quality:.*/quality: ${quality}/g" configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
19 |   python script/task/image_classification.py \
20 |     --config configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
21 |     --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 | 
24 | sed -i "s/quality:.*/quality:/g" configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
25 | 


--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/coco2017-object_detection.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | FORMAT_NAME=${2}
 3 | 
 4 | if [ $# -eq 5 ]
 5 | then
 6 |   MIN_QUALITY=${3}
 7 |   STEP_SIZE=${4}
 8 |   MAX_QUALITY=${5}
 9 | else
10 |   MIN_QUALITY=10
11 |   STEP_SIZE=10
12 |   MAX_QUALITY=100
13 | fi
14 | 
15 | 
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 |   sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml
19 |   python legacy/script/task/object_detection.py \
20 |   --config legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml \
21 |   --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 | 
24 | sed -i "s/quality:.*/quality:/g" legacy/configs/coco2017/input_compression/${BASE_NAME}.yaml
25 | 


--------------------------------------------------------------------------------
/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | FORMAT_NAME=${2}
 3 | 
 4 | if [ $# -eq 5 ]
 5 | then
 6 |   MIN_QUALITY=${3}
 7 |   STEP_SIZE=${4}
 8 |   MAX_QUALITY=${5}
 9 | else
10 |   MIN_QUALITY=10
11 |   STEP_SIZE=10
12 |   MAX_QUALITY=100
13 | fi
14 | 
15 | 
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 |   sed -i "s/quality:.*/quality: ${quality}/g" configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
19 |   python script/task/semantic_segmentation.py \
20 |     --config configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
21 |     --run_log log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 | 
24 | sed -i "s/quality:.*/quality:/g" configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
25 | 


--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | FORMAT_NAME=${2}
 3 | 
 4 | if [ $# -eq 5 ]
 5 | then
 6 |   MIN_QUALITY=${3}
 7 |   STEP_SIZE=${4}
 8 |   MAX_QUALITY=${5}
 9 | else
10 |   MIN_QUALITY=10
11 |   STEP_SIZE=10
12 |   MAX_QUALITY=100
13 | fi
14 | 
15 | 
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 |   sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
19 |   python legacy/script/task/image_classification.py \
20 |   --config legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml \
21 |   --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 | 
24 | sed -i "s/quality:.*/quality:/g" legacy/configs/ilsvrc2012/input_compression/${BASE_NAME}.yaml
25 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
 1 | Usage
 2 | =====
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: Overview
 8 | 
 9 | Installation
10 | ------------
11 | 
12 | To use `sc2bench <https://pypi.org/project/sc2bench/>`_, first install it using pip:
13 | 
14 | .. code-block:: console
15 | 
16 |    $ pip install sc2bench
17 | 
18 | 
19 | Examples
20 | ------------
21 | 
22 | `The official repository (https://github.com/yoshitomo-matsubara/sc2-benchmark) <https://github.com/yoshitomo-matsubara/sc2-benchmark>`_
23 | offers many example scripts, configs, and checkpoints of trained models in `sc2bench`.
24 | 
25 | Currently, `example scripts <https://github.com/yoshitomo-matsubara/sc2-benchmark/tree/main/script/task>`_
26 | cover the following three tasks:
27 | 
28 | - Image classification (ILSVRC 2012)
29 | - Object detection (COCO 2017)
30 | - Semantic segmentation (PASCAL VOC 2012)
31 | 


--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh:
--------------------------------------------------------------------------------
 1 | BASE_NAME=${1}
 2 | FORMAT_NAME=${2}
 3 | 
 4 | if [ $# -eq 5 ]
 5 | then
 6 |   MIN_QUALITY=${3}
 7 |   STEP_SIZE=${4}
 8 |   MAX_QUALITY=${5}
 9 | else
10 |   MIN_QUALITY=10
11 |   STEP_SIZE=10
12 |   MAX_QUALITY=100
13 | fi
14 | 
15 | 
16 | for quality in $(seq ${MIN_QUALITY} ${STEP_SIZE} ${MAX_QUALITY});
17 | do
18 |   sed -i "s/quality:.*/quality: ${quality}/g" legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
19 |   python legacy/script/task/semantic_segmentation.py \
20 |   --config legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml \
21 |   --log legacy/log/${FORMAT_NAME}_compression/${BASE_NAME}-quality${quality}.txt -student_only -test_only -no_dp_eval
22 | done
23 | 
24 | sed -i "s/quality:.*/quality:/g" legacy/configs/pascal_voc2012/input_compression/${BASE_NAME}.yaml
25 | 


--------------------------------------------------------------------------------
/.github/workflows/documentation.yaml:
--------------------------------------------------------------------------------
 1 | name: Deploy Sphinx Documentation
 2 | on: [push, pull_request, workflow_dispatch]
 3 | permissions:
 4 |     contents: write
 5 | jobs:
 6 |   docs:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |       - name: Set up Python
11 |         uses: actions/setup-python@v5
12 |         with:
13 |           python-version: '3.10'
14 |       - name: Install dependencies
15 |         run: |
16 |           pip install sphinx sphinx_rtd_theme sphinxcontrib-youtube
17 |       - name: Install sc2bench
18 |         run: |
19 |           pip install -e .
20 |       - name: Sphinx build
21 |         run: |
22 |           sphinx-build -b html docs/source/ docs/_build/
23 |       - name: Deploy
24 |         uses: peaceiris/actions-gh-pages@v3
25 |         if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
26 |         with:
27 |           publish_branch: gh-pages
28 |           github_token: ${{ secrets.GITHUB_TOKEN }}
29 |           publish_dir: docs/_build/
30 |           force_orphan: true
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report--not-question-.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report (Not question)
 3 | about: Create a report to help us improve (Use Discussions to ask questions)
 4 | title: "[BUG] Please use Discussions instead of Issues to ask questions"
 5 | labels: bug
 6 | assignees: yoshitomo-matsubara
 7 | 
 8 | ---
 9 | 
10 | Please use [Discussions](https://github.com/yoshitomo-matsubara/sc2-benchmark/discussions) to ask questions.
11 | 
12 | **Describe the bug**
13 | A clear and concise description of what the bug is.
14 | 
15 | **To Reproduce**
16 | Provide
17 | 1. Exact command to run your code
18 | 2. Whether or not you made any changes in Python code (if so, how you made the changes?) 
19 | 3. YAML config file
20 | 4. Log file
21 | 
22 | **Expected behavior**
23 | A clear and concise description of what you expected to happen.
24 | 
25 | 
26 | **Environment (please complete the following information):**
27 |  - OS: [e.g. Ubuntu 20.04 LTS]
28 |  - Python ver. [e.g. 3.8]
29 |  - sc2bench and torchdistill vers. [e.g. v0.0.2 and v0.3.2]
30 | 
31 | 
32 | **Additional context**
33 | Add any other context about the problem here.
34 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | import sc2bench
 4 | 
 5 | with open('README.md', 'r') as f:
 6 |     long_description = f.read()
 7 | 
 8 | description = 'SC2 Benchmark: Supervised Compression for Split Computing.'
 9 | setup(
10 |     name='sc2bench',
11 |     version=sc2bench.__version__,
12 |     author='Yoshitomo Matsubara',
13 |     description=description,
14 |     long_description=long_description,
15 |     long_description_content_type='text/markdown',
16 |     url='https://github.com/yoshitomo-matsubara/sc2-benchmark',
17 |     packages=find_packages(exclude=('configs', 'resources', 'script', 'tests')),
18 |     python_requires='>=3.9',
19 |     install_requires=[
20 |         'torch>=2.0.0',
21 |         'torchvision>=0.15.1',
22 |         'numpy',
23 |         'pyyaml>=6.0.0',
24 |         'scipy',
25 |         'cython',
26 |         'pycocotools>=2.0.2',
27 |         'torchdistill>=1.0.0',
28 |         'compressai>=1.2.3',
29 |         'timm>=1.0.3'
30 |     ],
31 |     extras_require={
32 |         'test': ['pytest'],
33 |         'docs': ['sphinx', 'sphinx_rtd_theme', 'sphinxcontrib-youtube']
34 |     }
35 | )
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Yoshitomo Matsubara
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v4
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v5
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/script/task/custom/collator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torchdistill.datasets.registry import register_collate_func
 4 | 
 5 | 
 6 | @register_collate_func
 7 | def coco_collate_fn(batch):
 8 |     return tuple(zip(*batch))
 9 | 
10 | 
11 | def _cat_list(images, fill_value=0):
12 |     if len(images) == 1 and not isinstance(images[0], torch.Tensor):
13 |         return images
14 | 
15 |     max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
16 |     batch_shape = (len(images),) + max_size
17 |     batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
18 |     for img, pad_img in zip(images, batched_imgs):
19 |         pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
20 |     return batched_imgs
21 | 
22 | 
23 | @register_collate_func
24 | def coco_seg_collate_fn(batch):
25 |     images, targets, supp_dicts = list(zip(*batch))
26 |     batched_imgs = _cat_list(images, fill_value=0)
27 |     batched_targets = _cat_list(targets, fill_value=255)
28 |     return batched_imgs, batched_targets, supp_dicts
29 | 
30 | 
31 | @register_collate_func
32 | def coco_seg_eval_collate_fn(batch):
33 |     images, targets = list(zip(*batch))
34 |     batched_imgs = _cat_list(images, fill_value=0)
35 |     batched_targets = _cat_list(targets, fill_value=255)
36 |     return batched_imgs, batched_targets
37 | 


--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/jpeg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   coco2017:
 3 |     name: &dataset_name 'coco2017'
 4 |     type: 'cocodetect'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &coco_val !join [*dataset_name, '/val']
 9 |         images: !join [*root_dir, '/val2017']
10 |         annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 |         annotated_only: False
12 | 
13 | models:
14 |   model:
15 |     name: 'InputCompressionDetectionModel'
16 |     params:
17 |       codec_params:
18 |         - type: 'PILImageModule'
19 |           params:
20 |             format: 'JPEG'
21 |             quality: 90
22 |             returns_file_size: True
23 |       analysis_config:
24 |         analyzer_configs:
25 |           - type: 'FileSizeAccumulator'
26 |             params:
27 |               unit: 'KB'
28 |         analyzes_after_compress: True
29 |       adaptive_pad_config:
30 |       pre_transform_params:
31 |       post_transform_params:
32 |     detection_model:
33 |       name: 'fasterrcnn_resnet50_fpn'
34 |       params:
35 |         pretrained: True
36 |         progress: True
37 |         pretrained_backbone: True
38 |       ckpt: ''
39 | 
40 | test:
41 |   test_data_loader:
42 |     dataset_id: *coco_val
43 |     random_sample: False
44 |     batch_size: 1
45 |     num_workers: 4
46 |     collate_fn: 'coco_collate_fn'
47 | 


--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/webp-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   coco2017:
 3 |     name: &dataset_name 'coco2017'
 4 |     type: 'cocodetect'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &coco_val !join [*dataset_name, '/val']
 9 |         images: !join [*root_dir, '/val2017']
10 |         annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 |         annotated_only: False
12 | 
13 | models:
14 |   model:
15 |     name: 'InputCompressionDetectionModel'
16 |     params:
17 |       codec_params:
18 |         - type: 'PILImageModule'
19 |           params:
20 |             format: 'WEBP'
21 |             quality: 90
22 |             returns_file_size: True
23 |       analysis_config:
24 |         analyzer_configs:
25 |           - type: 'FileSizeAccumulator'
26 |             params:
27 |               unit: 'KB'
28 |         analyzes_after_compress: True
29 |       adaptive_pad_config:
30 |       pre_transform_params:
31 |       post_transform_params:
32 |     detection_model:
33 |       name: 'fasterrcnn_resnet50_fpn'
34 |       params:
35 |         pretrained: True
36 |         progress: True
37 |         pretrained_backbone: True
38 |       ckpt: ''
39 | 
40 | test:
41 |   test_data_loader:
42 |     dataset_id: *coco_val
43 |     random_sample: False
44 |     batch_size: 1
45 |     num_workers: 4
46 |     collate_fn: 'coco_collate_fn'
47 | 


--------------------------------------------------------------------------------
/sc2bench/loss.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torchdistill.losses.mid_level import register_mid_level_loss
 3 | 
 4 | 
 5 | @register_mid_level_loss
 6 | class BppLoss(nn.Module):
 7 |     """
 8 |     Bit-per-pixel (or rate) loss.
 9 | 
10 |     :param entropy_module_path: entropy module path to extract its output from io_dict
11 |     :type entropy_module_path: str
12 |     :param reduction: reduction type ('sum', 'batchmean', or 'mean')
13 |     :type reduction: str or None
14 |     """
15 |     def __init__(self, entropy_module_path, reduction='mean'):
16 |         super().__init__()
17 |         self.entropy_module_path = entropy_module_path
18 |         self.reduction = reduction
19 | 
20 |     def forward(self, student_io_dict, *args, **kwargs):
21 |         """
22 |         Computes a rate loss.
23 | 
24 |         :param student_io_dict: io_dict of model to be trained
25 |         :type student_io_dict: dict
26 |         """
27 |         entropy_module_dict = student_io_dict[self.entropy_module_path]
28 |         intermediate_features, likelihoods = entropy_module_dict['output']
29 |         n, _, h, w = intermediate_features.shape
30 |         num_pixels = n * h * w
31 |         if self.reduction == 'sum':
32 |             bpp = -likelihoods.log2().sum()
33 |         elif self.reduction == 'batchmean':
34 |             bpp = -likelihoods.log2().sum() / n
35 |         else:
36 |             bpp = -likelihoods.log2().sum() / num_pixels
37 |         return bpp
38 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 833
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: [800, 800]
19 |             - type: 'PILImageModule'
20 |               params:
21 |                 format: 'JPEG'
22 |                 quality: 90
23 |             - type: 'ToTensor'
24 |               params:
25 |             - type: 'Normalize'
26 |               params:
27 |                 mean: [0.485, 0.456, 0.406]
28 |                 std: [0.229, 0.224, 0.225]
29 | 
30 | models:
31 |   model:
32 |     name: &model_name 'tf_efficientnet_l2_ns'
33 |     repo_or_dir: 'rwightman/pytorch-image-models'
34 |     params:
35 |       num_classes: 1000
36 |       pretrained: True
37 |     experiment: &experiment !join [*dataset_name, '-', *model_name]
38 |     ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
39 | 
40 | test:
41 |   test_data_loader:
42 |     dataset_id: *imagenet_val
43 |     random_sample: False
44 |     batch_size: 1
45 |     num_workers: 16
46 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 507
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: [475, 475]
19 |             - type: 'PILImageModule'
20 |               params:
21 |                 format: 'JPEG'
22 |                 quality: 90
23 |             - type: 'ToTensor'
24 |               params:
25 |             - type: 'Normalize'
26 |               params:
27 |                 mean: [0.485, 0.456, 0.406]
28 |                 std: [0.229, 0.224, 0.225]
29 | 
30 | models:
31 |   model:
32 |     name: &model_name 'tf_efficientnet_l2_ns_475'
33 |     repo_or_dir: 'rwightman/pytorch-image-models'
34 |     params:
35 |       num_classes: 1000
36 |       pretrained: True
37 |     experiment: &experiment !join [*dataset_name, '-', *model_name]
38 |     ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
39 | 
40 | test:
41 |   test_data_loader:
42 |     dataset_id: *imagenet_val
43 |     random_sample: False
44 |     batch_size: 1
45 |     num_workers: 16
46 | 


--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/bpg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   coco2017:
 3 |     name: &dataset_name 'coco2017'
 4 |     type: 'cocodetect'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &coco_val !join [*dataset_name, '/val']
 9 |         images: !join [*root_dir, '/val2017']
10 |         annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 |         annotated_only: False
12 | 
13 | models:
14 |   model:
15 |     name: 'InputCompressionDetectionModel'
16 |     params:
17 |       codec_params:
18 |         - type: 'BPGModule'
19 |           params:
20 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 |             quality: 50
23 |             returns_file_size: True
24 |       analysis_config:
25 |         analyzer_configs:
26 |           - type: 'FileSizeAccumulator'
27 |             params:
28 |               unit: 'KB'
29 |         analyzes_after_compress: True
30 |       adaptive_pad_config:
31 |       pre_transform_params:
32 |       post_transform_params:
33 |     detection_model:
34 |       name: 'fasterrcnn_resnet50_fpn'
35 |       params:
36 |         pretrained: True
37 |         progress: True
38 |         pretrained_backbone: True
39 |       ckpt: ''
40 | 
41 | test:
42 |   test_data_loader:
43 |     dataset_id: *coco_val
44 |     random_sample: False
45 |     batch_size: 1
46 |     num_workers: 4
47 |     collate_fn: 'coco_collate_fn'
48 | 


--------------------------------------------------------------------------------
/script/task/utils/eval.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class SegEvaluator(object):
 5 |     def __init__(self, num_classes):
 6 |         self.num_classes = num_classes
 7 |         self.mat = None
 8 | 
 9 |     def update(self, a, b):
10 |         n = self.num_classes
11 |         if self.mat is None:
12 |             self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
13 |         with torch.no_grad():
14 |             k = (a >= 0) & (a < n)
15 |             inds = n * a[k].to(torch.int64) + b[k]
16 |             self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
17 | 
18 |     def reset(self):
19 |         self.mat.zero_()
20 | 
21 |     def compute(self):
22 |         h = self.mat.float()
23 |         acc_global = torch.diag(h).sum() / h.sum() * 100.0
24 |         acc = torch.diag(h) / h.sum(1) * 100.0
25 |         iu = torch.diag(h) / (h.sum(1) + h.sum(0) - torch.diag(h)) * 100.0
26 |         return acc_global, acc, iu
27 | 
28 |     def reduce_from_all_processes(self):
29 |         if not torch.distributed.is_available():
30 |             return
31 |         if not torch.distributed.is_initialized():
32 |             return
33 |         torch.distributed.barrier()
34 |         torch.distributed.all_reduce(self.mat)
35 | 
36 |     def __str__(self):
37 |         acc_global, acc, iu = self.compute()
38 |         return 'mean IoU: {:.1f}, IoU: {}, Global pixelwise acc: {:.1f}, Average row correct: {}'.format(
39 |             iu.mean().item(), ['{:.1f}'.format(i) for i in iu.tolist()],
40 |             acc_global.item(), ['{:.1f}'.format(i) for i in acc.tolist()]
41 |         )
42 | 


--------------------------------------------------------------------------------
/configs/coco2017/input_compression/mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   - name: 'custom'
 3 | 
 4 | datasets:
 5 |   &coco_val coco2017/val: !import_call
 6 |     _name: &dataset_name 'coco2017'
 7 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 8 |     key: 'coco.dataset.coco_dataset'
 9 |     init:
10 |       kwargs:
11 |         img_dir_path: !join [*root_dir, '/val2017']
12 |         ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 |         annotated_only: False
14 |         is_segment: False
15 | 
16 | models:
17 |   model:
18 |     key: 'InputCompressionDetectionModel'
19 |     kwargs:
20 |       analysis_config:
21 |         analyzer_configs:
22 |           - key: 'FileSizeAnalyzer'
23 |             kwargs:
24 |               unit: 'KB'
25 |         analyzes_after_compress: True
26 |       adaptive_pad_kwargs:
27 |         fill: 0
28 |         padding_mode: 'constant'
29 |         factor: 128
30 |       pre_transform:
31 |       post_transform:
32 |     compression_model:
33 |       key: 'mbt2018_mean'
34 |       kwargs:
35 |         pretrained: True
36 |         quality: 8
37 |         metric: 'mse'
38 |     detection_model:
39 |       key: 'fasterrcnn_resnet50_fpn'
40 |       kwargs:
41 |         pretrained: True
42 |         progress: True
43 | 
44 | test:
45 |   test_data_loader:
46 |     dataset_id: *coco_val
47 |     sampler:
48 |       class_or_func: !import_get
49 |         key: 'torch.utils.data.SequentialSampler'
50 |       kwargs:
51 |     collate_fn: 'coco_collate_fn'
52 |     kwargs:
53 |       batch_size: 1
54 |       num_workers: 4
55 | 


--------------------------------------------------------------------------------
/configs/coco2017/input_compression/factorized_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   - name: 'custom'
 3 | 
 4 | datasets:
 5 |   &coco_val coco2017/val: !import_call
 6 |     _name: &dataset_name 'coco2017'
 7 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 8 |     key: 'coco.dataset.coco_dataset'
 9 |     init:
10 |       kwargs:
11 |         img_dir_path: !join [*root_dir, '/val2017']
12 |         ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 |         annotated_only: False
14 |         is_segment: False
15 | 
16 | models:
17 |   model:
18 |     key: 'InputCompressionDetectionModel'
19 |     kwargs:
20 |       analysis_config:
21 |         analyzer_configs:
22 |           - key: 'FileSizeAnalyzer'
23 |             kwargs:
24 |               unit: 'KB'
25 |         analyzes_after_compress: True
26 |       adaptive_pad_kwargs:
27 |         fill: 0
28 |         padding_mode: 'constant'
29 |         factor: 128
30 |       pre_transform:
31 |       post_transform:
32 |     compression_model:
33 |       key: 'bmshj2018_factorized'
34 |       kwargs:
35 |         pretrained: True
36 |         quality: 8
37 |         metric: 'mse'
38 |     detection_model:
39 |       key: 'fasterrcnn_resnet50_fpn'
40 |       kwargs:
41 |         pretrained: True
42 |         progress: True
43 | 
44 | test:
45 |   test_data_loader:
46 |     dataset_id: *coco_val
47 |     sampler:
48 |       class_or_func: !import_get
49 |         key: 'torch.utils.data.SequentialSampler'
50 |       kwargs:
51 |     collate_fn: 'coco_collate_fn'
52 |     kwargs:
53 |       batch_size: 1
54 |       num_workers: 4
55 | 


--------------------------------------------------------------------------------
/configs/coco2017/input_compression/scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   - name: 'custom'
 3 | 
 4 | datasets:
 5 |   &coco_val coco2017/val: !import_call
 6 |     _name: &dataset_name 'coco2017'
 7 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 8 |     key: 'coco.dataset.coco_dataset'
 9 |     init:
10 |       kwargs:
11 |         img_dir_path: !join [*root_dir, '/val2017']
12 |         ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 |         annotated_only: False
14 |         is_segment: False
15 | 
16 | models:
17 |   model:
18 |     key: 'InputCompressionDetectionModel'
19 |     kwargs:
20 |       analysis_config:
21 |         analyzer_configs:
22 |           - key: 'FileSizeAnalyzer'
23 |             kwargs:
24 |               unit: 'KB'
25 |         analyzes_after_compress: True
26 |       adaptive_pad_kwargs:
27 |         fill: 0
28 |         padding_mode: 'constant'
29 |         factor: 128
30 |       pre_transform:
31 |       post_transform:
32 |     compression_model:
33 |       key: 'bmshj2018_hyperprior'
34 |       kwargs:
35 |         pretrained: True
36 |         quality: 8
37 |         metric: 'mse'
38 |     detection_model:
39 |       key: 'fasterrcnn_resnet50_fpn'
40 |       kwargs:
41 |         pretrained: True
42 |         progress: True
43 | 
44 | test:
45 |   test_data_loader:
46 |     dataset_id: *coco_val
47 |     sampler:
48 |       class_or_func: !import_get
49 |         key: 'torch.utils.data.SequentialSampler'
50 |       kwargs:
51 |     collate_fn: 'coco_collate_fn'
52 |     kwargs:
53 |       batch_size: 1
54 |       num_workers: 4
55 | 


--------------------------------------------------------------------------------
/configs/coco2017/input_compression/joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   - name: 'custom'
 3 | 
 4 | datasets:
 5 |   &coco_val coco2017/val: !import_call
 6 |     _name: &dataset_name 'coco2017'
 7 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 8 |     key: 'coco.dataset.coco_dataset'
 9 |     init:
10 |       kwargs:
11 |         img_dir_path: !join [*root_dir, '/val2017']
12 |         ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 |         annotated_only: False
14 |         is_segment: False
15 | 
16 | models:
17 |   model:
18 |     key: 'InputCompressionDetectionModel'
19 |     kwargs:
20 |       analysis_config:
21 |         analyzer_configs:
22 |           - key: 'FileSizeAnalyzer'
23 |             kwargs:
24 |               unit: 'KB'
25 |         analyzes_after_compress: True
26 |       adaptive_pad_kwargs:
27 |         fill: 0
28 |         padding_mode: 'constant'
29 |         factor: 128
30 |       pre_transform:
31 |       post_transform:
32 |       uses_cpu4compression_model: True
33 |     compression_model:
34 |       key: 'mbt2018'
35 |       kwargs:
36 |         pretrained: True
37 |         quality: 8
38 |         metric: 'mse'
39 |     detection_model:
40 |       key: 'fasterrcnn_resnet50_fpn'
41 |       kwargs:
42 |         pretrained: True
43 |         progress: True
44 | 
45 | test:
46 |   test_data_loader:
47 |     dataset_id: *coco_val
48 |     sampler:
49 |       class_or_func: !import_get
50 |         key: 'torch.utils.data.SequentialSampler'
51 |       kwargs:
52 |     collate_fn: 'coco_collate_fn'
53 |     kwargs:
54 |       batch_size: 1
55 |       num_workers: 4
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'PILImageModule'
25 |           params:
26 |             format: 'JPEG'
27 |             quality: 90
28 |             returns_file_size: True
29 |       post_transform_params:
30 |         - type: 'ToTensor'
31 |           params:
32 |         - type: 'Normalize'
33 |           params:
34 |             mean: [0.485, 0.456, 0.406]
35 |             std: [0.229, 0.224, 0.225]
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |     classification_model:
42 |       name: &model_name 'resnet50'
43 |       params:
44 |         num_classes: 1000
45 |         pretrained: True
46 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
47 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 | 
49 | test:
50 |   test_data_loader:
51 |     dataset_id: *imagenet_val
52 |     random_sample: False
53 |     batch_size: 1
54 |     num_workers: 16
55 |     collate_fn: 'default_collate_w_pil'
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'PILImageModule'
25 |           params:
26 |             format: 'WEBP'
27 |             quality: 90
28 |             returns_file_size: True
29 |       post_transform_params:
30 |         - type: 'ToTensor'
31 |           params:
32 |         - type: 'Normalize'
33 |           params:
34 |             mean: [0.485, 0.456, 0.406]
35 |             std: [0.229, 0.224, 0.225]
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |     classification_model:
42 |       name: &model_name 'resnet50'
43 |       params:
44 |         num_classes: 1000
45 |         pretrained: True
46 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
47 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 | 
49 | test:
50 |   test_data_loader:
51 |     dataset_id: *imagenet_val
52 |     random_sample: False
53 |     batch_size: 1
54 |     num_workers: 16
55 |     collate_fn: 'default_collate_w_pil'
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'PILImageModule'
25 |           params:
26 |             format: 'JPEG'
27 |             quality: 90
28 |             returns_file_size: True
29 |       post_transform_params:
30 |         - type: 'ToTensor'
31 |           params:
32 |         - type: 'Normalize'
33 |           params:
34 |             mean: [0.485, 0.456, 0.406]
35 |             std: [0.229, 0.224, 0.225]
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |     classification_model:
42 |       name: &model_name 'resnet101'
43 |       params:
44 |         num_classes: 1000
45 |         pretrained: True
46 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
47 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 | 
49 | test:
50 |   test_data_loader:
51 |     dataset_id: *imagenet_val
52 |     random_sample: False
53 |     batch_size: 1
54 |     num_workers: 16
55 |     collate_fn: 'default_collate_w_pil'
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/jpeg-resnet152.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'PILImageModule'
25 |           params:
26 |             format: 'JPEG'
27 |             quality: 90
28 |             returns_file_size: True
29 |       post_transform_params:
30 |         - type: 'ToTensor'
31 |           params:
32 |         - type: 'Normalize'
33 |           params:
34 |             mean: [0.485, 0.456, 0.406]
35 |             std: [0.229, 0.224, 0.225]
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |     classification_model:
42 |       name: &model_name 'resnet152'
43 |       params:
44 |         num_classes: 1000
45 |         pretrained: True
46 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
47 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 | 
49 | test:
50 |   test_data_loader:
51 |     dataset_id: *imagenet_val
52 |     random_sample: False
53 |     batch_size: 1
54 |     num_workers: 16
55 |     collate_fn: 'default_collate_w_pil'
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/webp-resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'PILImageModule'
25 |           params:
26 |             format: 'WEBP'
27 |             quality: 90
28 |             returns_file_size: True
29 |       post_transform_params:
30 |         - type: 'ToTensor'
31 |           params:
32 |         - type: 'Normalize'
33 |           params:
34 |             mean: [0.485, 0.456, 0.406]
35 |             std: [0.229, 0.224, 0.225]
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |     classification_model:
42 |       name: &model_name 'resnet101'
43 |       params:
44 |         num_classes: 1000
45 |         pretrained: True
46 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
47 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 | 
49 | test:
50 |   test_data_loader:
51 |     dataset_id: *imagenet_val
52 |     random_sample: False
53 |     batch_size: 1
54 |     num_workers: 16
55 |     collate_fn: 'default_collate_w_pil'
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/webp-resnet152.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'PILImageModule'
25 |           params:
26 |             format: 'WEBP'
27 |             quality: 90
28 |             returns_file_size: True
29 |       post_transform_params:
30 |         - type: 'ToTensor'
31 |           params:
32 |         - type: 'Normalize'
33 |           params:
34 |             mean: [0.485, 0.456, 0.406]
35 |             std: [0.229, 0.224, 0.225]
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |     classification_model:
42 |       name: &model_name 'resnet152'
43 |       params:
44 |         num_classes: 1000
45 |         pretrained: True
46 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
47 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
48 | 
49 | test:
50 |   test_data_loader:
51 |     dataset_id: *imagenet_val
52 |     random_sample: False
53 |     batch_size: 1
54 |     num_workers: 16
55 |     collate_fn: 'default_collate_w_pil'
56 | 


--------------------------------------------------------------------------------
/script/codec_input_compression/README.md:
--------------------------------------------------------------------------------
 1 | # Codec-based Input Compression Baselines
 2 | 
 3 | We considered the following codec-based image compression methods:
 4 | - JPEG
 5 | - WebP
 6 | - BPG
 7 | 
 8 | If you want to use BPG, you will need to manually install the software 
 9 | ```shell
10 | bash script/software/install_bpg.sh
11 | ```
12 | 
13 | The script will place the encoder and decoder in `~/software/`
14 | 
15 | ## ImageNet (ILSVRC 2012): Image Classification
16 | Codec-based input compression followed by ResNet-50
17 | 
18 | ```shell
19 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh jpeg-resnet50 jpeg
20 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh webp-resnet50 webp
21 | bash script/codec_input_compression/ilsvrc2012-image_classification.sh bpg-resnet50 bpg 5 5 50
22 | ```
23 | 
24 | ## COCO 2017: Object Detection
25 | Codec-based input compression followed by Faster R-CNN with ResNet-50 and FPN
26 | 
27 | ```shell
28 | bash script/codec_input_compression/coco2017-object_detection.sh jpeg-faster_rcnn_resnet50_fpn jpeg
29 | bash script/codec_input_compression/coco2017-object_detection.sh webp-faster_rcnn_resnet50_fpn webp
30 | bash script/codec_input_compression/coco2017-object_detection.sh bpg-faster_rcnn_resnet50_fpn bpg 5 5 50
31 | ```
32 | 
33 | ## PASCAL VOC 2012: Semantic Segmentation
34 | Codec-based input compression followed by DeepLabv3 with ResNet-50
35 | 
36 | ```shell
37 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh jpeg-deeplabv3_resnet50 jpeg
38 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh webp-deeplabv3_resnet50 webp
39 | bash script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh bpg-deeplabv3_resnet50 bpg 5 5 50
40 | ```
41 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/bpg-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'BPGModule'
25 |           params:
26 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
27 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
28 |             quality: 50
29 |             returns_file_size: True
30 |       post_transform_params:
31 |         - type: 'ToTensor'
32 |           params:
33 |         - type: 'Normalize'
34 |           params:
35 |             mean: [0.485, 0.456, 0.406]
36 |             std: [0.229, 0.224, 0.225]
37 |       analysis_config:
38 |         analyzer_configs:
39 |           - type: 'FileSizeAccumulator'
40 |             params:
41 |               unit: 'KB'
42 |     classification_model:
43 |       name: &model_name 'resnet50'
44 |       params:
45 |         num_classes: 1000
46 |         pretrained: True
47 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
48 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
49 | 
50 | test:
51 |   test_data_loader:
52 |     dataset_id: *imagenet_val
53 |     random_sample: False
54 |     batch_size: 1
55 |     num_workers: 16
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/bpg-resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'BPGModule'
25 |           params:
26 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
27 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
28 |             quality: 50
29 |             returns_file_size: True
30 |       post_transform_params:
31 |         - type: 'ToTensor'
32 |           params:
33 |         - type: 'Normalize'
34 |           params:
35 |             mean: [0.485, 0.456, 0.406]
36 |             std: [0.229, 0.224, 0.225]
37 |       analysis_config:
38 |         analyzer_configs:
39 |           - type: 'FileSizeAccumulator'
40 |             params:
41 |               unit: 'KB'
42 |     classification_model:
43 |       name: &model_name 'resnet101'
44 |       params:
45 |         num_classes: 1000
46 |         pretrained: True
47 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
48 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
49 | 
50 | test:
51 |   test_data_loader:
52 |     dataset_id: *imagenet_val
53 |     random_sample: False
54 |     batch_size: 1
55 |     num_workers: 16
56 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/bpg-resnet152.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'BPGModule'
25 |           params:
26 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
27 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
28 |             quality: 50
29 |             returns_file_size: True
30 |       post_transform_params:
31 |         - type: 'ToTensor'
32 |           params:
33 |         - type: 'Normalize'
34 |           params:
35 |             mean: [0.485, 0.456, 0.406]
36 |             std: [0.229, 0.224, 0.225]
37 |       analysis_config:
38 |         analyzer_configs:
39 |           - type: 'FileSizeAccumulator'
40 |             params:
41 |               unit: 'KB'
42 |     classification_model:
43 |       name: &model_name 'resnet152'
44 |       params:
45 |         num_classes: 1000
46 |         pretrained: True
47 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
48 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
49 | 
50 | test:
51 |   test_data_loader:
52 |     dataset_id: *imagenet_val
53 |     random_sample: False
54 |     batch_size: 1
55 |     num_workers: 16
56 | 


--------------------------------------------------------------------------------
/configs/coco2017/input_compression/jpeg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   - name: 'custom'
 3 | 
 4 | datasets:
 5 |   &coco_val coco2017/val: !import_call
 6 |     _name: &dataset_name 'coco2017'
 7 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 8 |     key: 'coco.dataset.coco_dataset'
 9 |     init:
10 |       kwargs:
11 |         img_dir_path: !join [*root_dir, '/val2017']
12 |         ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 |         annotated_only: False
14 |         is_segment: False
15 | 
16 | models:
17 |   model:
18 |     key: 'InputCompressionDetectionModel'
19 |     kwargs:
20 |       codec_encoder_decoder: !import_call
21 |         key: 'torchvision.transforms.Compose'
22 |         init:
23 |           kwargs:
24 |             transforms:
25 |               - !import_call
26 |                 key: 'sc2bench.transforms.codec.PILImageModule'
27 |                 init:
28 |                   kwargs:
29 |                     format: 'JPEG'
30 |                     quality: 90
31 |                     returns_file_size: True
32 |       analysis_config:
33 |         analyzer_configs:
34 |           - key: 'FileSizeAccumulator'
35 |             kwargs:
36 |               unit: 'KB'
37 |         analyzes_after_compress: True
38 |       adaptive_pad_config:
39 |       pre_transform:
40 |       post_transform:
41 |     detection_model:
42 |       key: 'fasterrcnn_resnet50_fpn'
43 |       kwargs:
44 |         pretrained: True
45 |         progress: True
46 | 
47 | test:
48 |   test_data_loader:
49 |     dataset_id: *coco_val
50 |     sampler:
51 |       class_or_func: !import_get
52 |         key: 'torch.utils.data.SequentialSampler'
53 |       kwargs:
54 |     collate_fn: 'coco_collate_fn'
55 |     kwargs:
56 |       batch_size: 1
57 |       num_workers: 4
58 | 


--------------------------------------------------------------------------------
/configs/coco2017/input_compression/webp-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   - name: 'custom'
 3 | 
 4 | datasets:
 5 |   &coco_val coco2017/val: !import_call
 6 |     _name: &dataset_name 'coco2017'
 7 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 8 |     key: 'coco.dataset.coco_dataset'
 9 |     init:
10 |       kwargs:
11 |         img_dir_path: !join [*root_dir, '/val2017']
12 |         ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 |         annotated_only: False
14 |         is_segment: False
15 | 
16 | models:
17 |   model:
18 |     key: 'InputCompressionDetectionModel'
19 |     kwargs:
20 |       codec_encoder_decoder: !import_call
21 |         key: 'torchvision.transforms.Compose'
22 |         init:
23 |           kwargs:
24 |             transforms:
25 |               - !import_call
26 |                 key: 'sc2bench.transforms.codec.PILImageModule'
27 |                 init:
28 |                   kwargs:
29 |                     format: 'WEBP'
30 |                     quality: 90
31 |                     returns_file_size: True
32 |       analysis_config:
33 |         analyzer_configs:
34 |           - key: 'FileSizeAccumulator'
35 |             kwargs:
36 |               unit: 'KB'
37 |         analyzes_after_compress: True
38 |       adaptive_pad_config:
39 |       pre_transform:
40 |       post_transform:
41 |     detection_model:
42 |       key: 'fasterrcnn_resnet50_fpn'
43 |       kwargs:
44 |         pretrained: True
45 |         progress: True
46 | 
47 | test:
48 |   test_data_loader:
49 |     dataset_id: *coco_val
50 |     sampler:
51 |       class_or_func: !import_get
52 |         key: 'torch.utils.data.SequentialSampler'
53 |       kwargs:
54 |     collate_fn: 'coco_collate_fn'
55 |     kwargs:
56 |       batch_size: 1
57 |       num_workers: 4
58 | 


--------------------------------------------------------------------------------
/legacy/script/codec_input_compression/README.md:
--------------------------------------------------------------------------------
 1 | # Codec-based Input Compression Baselines
 2 | 
 3 | We considered the following codec-based image compression methods:
 4 | - JPEG
 5 | - WebP
 6 | - BPG
 7 | 
 8 | If you want to use BPG, you will need to manually install the software 
 9 | ```shell
10 | bash script/software/install_bpg.sh
11 | ```
12 | 
13 | The script will place the encoder and decoder in `~/software/`
14 | 
15 | ## ImageNet (ILSVRC 2012): Image Classification
16 | Codec-based input compression followed by ResNet-50
17 | 
18 | ```shell
19 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh jpeg-resnet50 jpeg
20 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh webp-resnet50 webp
21 | bash legacy/script/codec_input_compression/ilsvrc2012-image_classification.sh bpg-resnet50 bpg 5 5 50
22 | ```
23 | 
24 | ## COCO 2017: Object Detection
25 | Codec-based input compression followed by Faster R-CNN with ResNet-50 and FPN
26 | 
27 | ```shell
28 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh jpeg-faster_rcnn_resnet50_fpn jpeg
29 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh webp-faster_rcnn_resnet50_fpn webp
30 | bash legacy/script/codec_input_compression/coco2017-object_detection.sh bpg-faster_rcnn_resnet50_fpn bpg 5 5 50
31 | ```
32 | 
33 | ## PASCAL VOC 2012: Semantic Segmentation
34 | Codec-based input compression followed by DeepLabv3 with ResNet-50
35 | 
36 | ```shell
37 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh jpeg-deeplabv3_resnet50 jpeg
38 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh webp-deeplabv3_resnet50 webp
39 | bash legacy/script/codec_input_compression/pascal_voc2012-semantic_segmentation.sh bpg-deeplabv3_resnet50 bpg 5 5 50
40 | ```
41 | 


--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/factorized_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   coco2017:
 3 |     name: &dataset_name 'coco2017'
 4 |     type: 'cocodetect'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &coco_val !join [*dataset_name, '/val']
 9 |         images: !join [*root_dir, '/val2017']
10 |         annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 |         annotated_only: False
12 | 
13 | models:
14 |   model:
15 |     name: 'InputCompressionDetectionModel'
16 |     params:
17 |       codec_params:
18 |         - type: 'BPGModule'
19 |           params:
20 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 |             quality: 50
23 |             returns_file_size: True
24 |       analysis_config:
25 |         analyzer_configs:
26 |           - type: 'FileSizeAnalyzer'
27 |             params:
28 |               unit: 'KB'
29 |         analyzes_after_compress: True
30 |       adaptive_pad_kwargs:
31 |         fill: 0
32 |         padding_mode: 'constant'
33 |         factor: 128
34 |       pre_transform_params:
35 |       post_transform_params:
36 |     compression_model:
37 |       name: 'bmshj2018_factorized'
38 |       params:
39 |         pretrained: True
40 |         quality: 8
41 |         metric: 'mse'
42 |       ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
43 |     detection_model:
44 |       name: 'fasterrcnn_resnet50_fpn'
45 |       params:
46 |         pretrained: True
47 |         progress: True
48 |         pretrained_backbone: True
49 |       ckpt: ''
50 | 
51 | test:
52 |   test_data_loader:
53 |     dataset_id: *coco_val
54 |     random_sample: False
55 |     batch_size: 1
56 |     num_workers: 4
57 |     collate_fn: 'coco_collate_fn'
58 | 


--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/mean_scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   coco2017:
 3 |     name: &dataset_name 'coco2017'
 4 |     type: 'cocodetect'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &coco_val !join [*dataset_name, '/val']
 9 |         images: !join [*root_dir, '/val2017']
10 |         annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 |         annotated_only: False
12 | 
13 | models:
14 |   model:
15 |     name: 'InputCompressionDetectionModel'
16 |     params:
17 |       codec_params:
18 |         - type: 'BPGModule'
19 |           params:
20 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 |             quality: 50
23 |             returns_file_size: True
24 |       analysis_config:
25 |         analyzer_configs:
26 |           - type: 'FileSizeAnalyzer'
27 |             params:
28 |               unit: 'KB'
29 |         analyzes_after_compress: True
30 |       adaptive_pad_kwargs:
31 |         fill: 0
32 |         padding_mode: 'constant'
33 |         factor: 128
34 |       pre_transform_params:
35 |       post_transform_params:
36 |     compression_model:
37 |       name: 'mbt2018_mean'
38 |       params:
39 |         pretrained: True
40 |         quality: 8
41 |         metric: 'mse'
42 |       ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
43 |     detection_model:
44 |       name: 'fasterrcnn_resnet50_fpn'
45 |       params:
46 |         pretrained: True
47 |         progress: True
48 |         pretrained_backbone: True
49 |       ckpt: ''
50 | 
51 | test:
52 |   test_data_loader:
53 |     dataset_id: *coco_val
54 |     random_sample: False
55 |     batch_size: 1
56 |     num_workers: 4
57 |     collate_fn: 'coco_collate_fn'
58 | 


--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/scale_hyperprior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   coco2017:
 3 |     name: &dataset_name 'coco2017'
 4 |     type: 'cocodetect'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &coco_val !join [*dataset_name, '/val']
 9 |         images: !join [*root_dir, '/val2017']
10 |         annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 |         annotated_only: False
12 | 
13 | models:
14 |   model:
15 |     name: 'InputCompressionDetectionModel'
16 |     params:
17 |       codec_params:
18 |         - type: 'BPGModule'
19 |           params:
20 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 |             quality: 50
23 |             returns_file_size: True
24 |       analysis_config:
25 |         analyzer_configs:
26 |           - type: 'FileSizeAnalyzer'
27 |             params:
28 |               unit: 'KB'
29 |         analyzes_after_compress: True
30 |       adaptive_pad_kwargs:
31 |         fill: 0
32 |         padding_mode: 'constant'
33 |         factor: 128
34 |       pre_transform_params:
35 |       post_transform_params:
36 |     compression_model:
37 |       name: 'bmshj2018_hyperprior'
38 |       params:
39 |         pretrained: True
40 |         quality: 8
41 |         metric: 'mse'
42 |       ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
43 |     detection_model:
44 |       name: 'fasterrcnn_resnet50_fpn'
45 |       params:
46 |         pretrained: True
47 |         progress: True
48 |         pretrained_backbone: True
49 |       ckpt: ''
50 | 
51 | test:
52 |   test_data_loader:
53 |     dataset_id: *coco_val
54 |     random_sample: False
55 |     batch_size: 1
56 |     num_workers: 4
57 |     collate_fn: 'coco_collate_fn'
58 | 


--------------------------------------------------------------------------------
/configs/coco2017/input_compression/bpg-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | dependencies:
 2 |   - name: 'custom'
 3 | 
 4 | datasets:
 5 |   &coco_val coco2017/val: !import_call
 6 |     _name: &dataset_name 'coco2017'
 7 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 8 |     key: 'coco.dataset.coco_dataset'
 9 |     init:
10 |       kwargs:
11 |         img_dir_path: !join [*root_dir, '/val2017']
12 |         ann_file_path: !join [*root_dir, '/annotations/instances_val2017.json']
13 |         annotated_only: False
14 |         is_segment: False
15 | 
16 | models:
17 |   model:
18 |     key: 'InputCompressionDetectionModel'
19 |     kwargs:
20 |       codec_encoder_decoder: !import_call
21 |         key: 'torchvision.transforms.Compose'
22 |         init:
23 |           kwargs:
24 |             transforms:
25 |               - !import_call
26 |                 key: 'sc2bench.transforms.codec.BPGModule'
27 |                 init:
28 |                   kwargs:
29 |                     encoder_path: '~/software/libbpg-0.9.8/bpgenc'
30 |                     decoder_path: '~/software/libbpg-0.9.8/bpgdec'
31 |                     quality: 50
32 |                     returns_file_size: True
33 |       analysis_config:
34 |         analyzer_configs:
35 |           - key: 'FileSizeAccumulator'
36 |             kwargs:
37 |               unit: 'KB'
38 |         analyzes_after_compress: True
39 |       adaptive_pad_config:
40 |       pre_transform:
41 |       post_transform:
42 |     detection_model:
43 |       key: 'fasterrcnn_resnet50_fpn'
44 |       kwargs:
45 |         pretrained: True
46 |         progress: True
47 | 
48 | test:
49 |   test_data_loader:
50 |     dataset_id: *coco_val
51 |     sampler:
52 |       class_or_func: !import_get
53 |         key: 'torch.utils.data.SequentialSampler'
54 |       kwargs:
55 |     collate_fn: 'coco_collate_fn'
56 |     kwargs:
57 |       batch_size: 1
58 |       num_workers: 4
59 | 


--------------------------------------------------------------------------------
/legacy/configs/coco2017/input_compression/joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   coco2017:
 3 |     name: &dataset_name 'coco2017'
 4 |     type: 'cocodetect'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &coco_val !join [*dataset_name, '/val']
 9 |         images: !join [*root_dir, '/val2017']
10 |         annotations: !join [*root_dir, '/annotations/instances_val2017.json']
11 |         annotated_only: False
12 | 
13 | models:
14 |   model:
15 |     name: 'InputCompressionDetectionModel'
16 |     params:
17 |       codec_params:
18 |         - type: 'BPGModule'
19 |           params:
20 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
21 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
22 |             quality: 50
23 |             returns_file_size: True
24 |       analysis_config:
25 |         analyzer_configs:
26 |           - type: 'FileSizeAnalyzer'
27 |             params:
28 |               unit: 'KB'
29 |         analyzes_after_compress: True
30 |       adaptive_pad_kwargs:
31 |         fill: 0
32 |         padding_mode: 'constant'
33 |         factor: 128
34 |       pre_transform_params:
35 |       post_transform_params:
36 |       uses_cpu4compression_model: True
37 |     compression_model:
38 |       name: 'mbt2018'
39 |       params:
40 |         pretrained: True
41 |         quality: 8
42 |         metric: 'mse'
43 |       ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
44 |     detection_model:
45 |       name: 'fasterrcnn_resnet50_fpn'
46 |       params:
47 |         pretrained: True
48 |         progress: True
49 |         pretrained_backbone: True
50 |       ckpt: ''
51 | 
52 | test:
53 |   test_data_loader:
54 |     dataset_id: *coco_val
55 |     random_sample: False
56 |     batch_size: 1
57 |     num_workers: 4
58 |     collate_fn: 'coco_collate_fn'
59 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/vtm-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 | 
19 | models:
20 |   model:
21 |     name: 'CodecInputCompressionClassifier'
22 |     params:
23 |       codec_params:
24 |         - type: 'VTMModule'
25 |           params:
26 |             encoder_path: '~/software/VVCSoftware_VTM/bin/EncoderAppStatic'
27 |             decoder_path: '~/software/VVCSoftware_VTM/bin/DecoderAppStatic'
28 |             config_path: '~/software/VVCSoftware_VTM/cfg/encoder_intra_vtm.cfg'
29 |             color_mode: 'ycbcr'
30 |             quality: 63
31 |             returns_file_size: True
32 |       post_transform_params:
33 |         - type: 'ToTensor'
34 |           params:
35 |         - type: 'Normalize'
36 |           params:
37 |             mean: [0.485, 0.456, 0.406]
38 |             std: [0.229, 0.224, 0.225]
39 |       analysis_config:
40 |         analyzer_configs:
41 |           - type: 'FileSizeAccumulator'
42 |             params:
43 |               unit: 'KB'
44 |     classification_model:
45 |       name: &model_name 'resnet50'
46 |       params:
47 |         num_classes: 1000
48 |         pretrained: True
49 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
50 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
51 | 
52 | test:
53 |   test_data_loader:
54 |     dataset_id: *imagenet_val
55 |     random_sample: False
56 |     batch_size: 1
57 |     num_workers: 16
58 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/feature_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 |             - type: 'ToTensor'
19 |               params:
20 |             - type: 'Normalize'
21 |               params:
22 |                 mean: [0.485, 0.456, 0.406]
23 |                 std: [0.229, 0.224, 0.225]
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecFeatureCompressionClassifier'
28 |     params:
29 |       codec_params:
30 |         - type: 'PILTensorModule'
31 |           params:
32 |             format: 'JPEG'
33 |             quality: 90
34 |             returns_file_size: True
35 |       encoder_config:
36 |         sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
37 |       decoder_config:
38 |         sequential: ['layer3', 'layer4', 'avgpool']
39 |       classifier_config:
40 |         sequential: ['fc']
41 |       post_transform_params:
42 |       analysis_config:
43 |         analyzer_configs:
44 |           - type: 'FileSizeAccumulator'
45 |             params:
46 |               unit: 'KB'
47 |     classification_model:
48 |       name: &model_name 'resnet50'
49 |       params:
50 |         num_classes: 1000
51 |         pretrained: True
52 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
53 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
54 | 
55 | test:
56 |   test_data_loader:
57 |     dataset_id: *imagenet_val
58 |     random_sample: False
59 |     batch_size: 1
60 |     num_workers: 16
61 |     collate_fn: 'default_collate_w_pil'
62 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/feature_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: [224, 224]
18 |             - type: 'ToTensor'
19 |               params:
20 |             - type: 'Normalize'
21 |               params:
22 |                 mean: [0.485, 0.456, 0.406]
23 |                 std: [0.229, 0.224, 0.225]
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecFeatureCompressionClassifier'
28 |     params:
29 |       codec_params:
30 |         - type: 'PILTensorModule'
31 |           params:
32 |             format: 'WEBP'
33 |             quality: 90
34 |             returns_file_size: True
35 |       encoder_config:
36 |         sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
37 |       decoder_config:
38 |         sequential: ['layer3', 'layer4', 'avgpool']
39 |       classifier_config:
40 |         sequential: ['fc']
41 |       post_transform_params:
42 |       analysis_config:
43 |         analyzer_configs:
44 |           - type: 'FileSizeAccumulator'
45 |             params:
46 |               unit: 'KB'
47 |     classification_model:
48 |       name: &model_name 'resnet50'
49 |       params:
50 |         num_classes: 1000
51 |         pretrained: True
52 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
53 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
54 | 
55 | test:
56 |   test_data_loader:
57 |     dataset_id: *imagenet_val
58 |     random_sample: False
59 |     batch_size: 1
60 |     num_workers: 16
61 |     collate_fn: 'default_collate_w_pil'
62 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/factorized_prior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: &input_size [224, 224]
18 |             - type: 'ToTensor'
19 |               params:
20 |             - type: 'AdaptivePad'
21 |               params:
22 |                 fill: 0
23 |                 factor: 64
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionClassifier'
28 |     params:
29 |       post_transform_params:
30 |         - type: 'CenterCrop'
31 |           params:
32 |             size: *input_size
33 |         - type: 'Normalize'
34 |           params:
35 |             mean: [0.485, 0.456, 0.406]
36 |             std: [0.229, 0.224, 0.225]
37 |       analysis_config:
38 |         analyzes_after_compress: True
39 |         analyzer_configs:
40 |           - type: 'FileSizeAnalyzer'
41 |             params:
42 |               unit: 'KB'
43 |     compression_model:
44 |       name: 'bmshj2018_factorized'
45 |       params:
46 |         pretrained: True
47 |         quality: 8
48 |         metric: 'mse'
49 |       ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
50 |     classification_model:
51 |       name: &model_name 'resnet50'
52 |       params:
53 |         num_classes: 1000
54 |         pretrained: True
55 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
56 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
57 | 
58 | test:
59 |   test_data_loader:
60 |     dataset_id: *imagenet_val
61 |     random_sample: False
62 |     batch_size: 1
63 |     num_workers: 16
64 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: &input_size [224, 224]
18 |             - type: 'ToTensor'
19 |               params:
20 |             - type: 'AdaptivePad'
21 |               params:
22 |                 fill: 0
23 |                 factor: 64
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionClassifier'
28 |     params:
29 |       post_transform_params:
30 |         - type: 'CenterCrop'
31 |           params:
32 |             size: *input_size
33 |         - type: 'Normalize'
34 |           params:
35 |             mean: [0.485, 0.456, 0.406]
36 |             std: [0.229, 0.224, 0.225]
37 |       analysis_config:
38 |         analyzes_after_compress: True
39 |         analyzer_configs:
40 |           - type: 'FileSizeAnalyzer'
41 |             params:
42 |               unit: 'KB'
43 |     compression_model:
44 |       name: 'bmshj2018_hyperprior'
45 |       params:
46 |         pretrained: True
47 |         quality: 8
48 |         metric: 'mse'
49 |       ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
50 |     classification_model:
51 |       name: &model_name 'resnet50'
52 |       params:
53 |         num_classes: 1000
54 |         pretrained: True
55 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
56 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
57 | 
58 | test:
59 |   test_data_loader:
60 |     dataset_id: *imagenet_val
61 |     random_sample: False
62 |     batch_size: 1
63 |     num_workers: 16
64 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: &input_size [224, 224]
18 |             - type: 'ToTensor'
19 |               params:
20 |             - type: 'AdaptivePad'
21 |               params:
22 |                 fill: 0
23 |                 factor: 64
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionClassifier'
28 |     params:
29 |       post_transform_params:
30 |         - type: 'CenterCrop'
31 |           params:
32 |             size: *input_size
33 |         - type: 'Normalize'
34 |           params:
35 |             mean: [0.485, 0.456, 0.406]
36 |             std: [0.229, 0.224, 0.225]
37 |       analysis_config:
38 |         analyzes_after_compress: True
39 |         analyzer_configs:
40 |           - type: 'FileSizeAnalyzer'
41 |             params:
42 |               unit: 'KB'
43 |     compression_model:
44 |       name: 'mbt2018_mean'
45 |       params:
46 |         pretrained: True
47 |         quality: 8
48 |         metric: 'mse'
49 |       ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
50 |     classification_model:
51 |       name: &model_name 'resnet50'
52 |       params:
53 |         num_classes: 1000
54 |         pretrained: True
55 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
56 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
57 | 
58 | test:
59 |   test_data_loader:
60 |     dataset_id: *imagenet_val
61 |     random_sample: False
62 |     batch_size: 1
63 |     num_workers: 16
64 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: False
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecInputCompressionSegmentationModel'
28 |     params:
29 |       codec_params:
30 |         - type: 'PILImageModule'
31 |           params:
32 |             format: 'JPEG'
33 |             quality: 90
34 |             returns_file_size: True
35 |       analysis_config:
36 |         analyzer_configs:
37 |           - type: 'FileSizeAccumulator'
38 |             params:
39 |               unit: 'KB'
40 |       post_transform_params:
41 |         - type: 'ToTensor'
42 |           params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     segmentation_model:
48 |       name: 'deeplabv3_resnet50'
49 |       params:
50 |         pretrained: True
51 |         pretrained_backbone: True
52 |         num_classes: 21
53 |         aux_loss: True
54 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
55 | 
56 | test:
57 |   test_data_loader:
58 |     dataset_id: *pascal_val
59 |     random_sample: False
60 |     batch_size: 1
61 |     num_workers: 16
62 |     collate_fn: 'pascal_seg_eval_collate_fn'
63 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: False
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecInputCompressionSegmentationModel'
28 |     params:
29 |       codec_params:
30 |         - type: 'PILImageModule'
31 |           params:
32 |             format: 'WEBP'
33 |             quality: 90
34 |             returns_file_size: True
35 |       analysis_config:
36 |         analyzer_configs:
37 |           - type: 'FileSizeAccumulator'
38 |             params:
39 |               unit: 'KB'
40 |       post_transform_params:
41 |         - type: 'ToTensor'
42 |           params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     segmentation_model:
48 |       name: 'deeplabv3_resnet50'
49 |       params:
50 |         pretrained: True
51 |         pretrained_backbone: True
52 |         num_classes: 21
53 |         aux_loss: True
54 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
55 | 
56 | test:
57 |   test_data_loader:
58 |     dataset_id: *pascal_val
59 |     random_sample: False
60 |     batch_size: 1
61 |     num_workers: 16
62 |     collate_fn: 'pascal_seg_eval_collate_fn'
63 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: False
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecInputCompressionSegmentationModel'
28 |     params:
29 |       codec_params:
30 |         - type: 'PILImageModule'
31 |           params:
32 |             format: 'JPEG'
33 |             quality: 90
34 |             returns_file_size: True
35 |       analysis_config:
36 |         analyzer_configs:
37 |           - type: 'FileSizeAccumulator'
38 |             params:
39 |               unit: 'KB'
40 |       post_transform_params:
41 |         - type: 'ToTensor'
42 |           params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     segmentation_model:
48 |       name: 'deeplabv3_resnet101'
49 |       params:
50 |         pretrained: False
51 |         pretrained_backbone: True
52 |         num_classes: 21
53 |         aux_loss: True
54 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
55 | 
56 | test:
57 |   test_data_loader:
58 |     dataset_id: *pascal_val
59 |     random_sample: False
60 |     batch_size: 1
61 |     num_workers: 16
62 |     collate_fn: 'pascal_seg_eval_collate_fn'
63 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: False
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecInputCompressionSegmentationModel'
28 |     params:
29 |       codec_params:
30 |         - type: 'PILImageModule'
31 |           params:
32 |             format: 'WEBP'
33 |             quality: 90
34 |             returns_file_size: True
35 |       analysis_config:
36 |         analyzer_configs:
37 |           - type: 'FileSizeAccumulator'
38 |             params:
39 |               unit: 'KB'
40 |       post_transform_params:
41 |         - type: 'ToTensor'
42 |           params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     segmentation_model:
48 |       name: 'deeplabv3_resnet101'
49 |       params:
50 |         pretrained: True
51 |         pretrained_backbone: True
52 |         num_classes: 21
53 |         aux_loss: True
54 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
55 | 
56 | test:
57 |   test_data_loader:
58 |     dataset_id: *pascal_val
59 |     random_sample: False
60 |     batch_size: 1
61 |     num_workers: 16
62 |     collate_fn: 'pascal_seg_eval_collate_fn'
63 | 


--------------------------------------------------------------------------------
/script/neural_input_compression/README.md:
--------------------------------------------------------------------------------
 1 | # Neural Input Compression Baselines
 2 | 
 3 | We considered the following neural image compression models:
 4 | - Factorized Prior
 5 | - Scale Hyperprior
 6 | - Mean-scale Hyperprior
 7 | - Joint Autoregressive Hierarchical Prior
 8 | 
 9 | 
10 | ## ImageNet (ILSVRC 2012): Image Classification
11 | Neural input compression followed by ResNet-50
12 | 
13 | ```shell
14 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh factorized_prior-resnet50 8
15 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh scale_hyperprior-resnet50 8
16 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh mean_scale_hyperprior-resnet50 8
17 | bash script/neural_input_compression/ilsvrc2012-image_classification.sh joint_autoregressive_hierarchical_prior-resnet50 8
18 | ```
19 | 
20 | ## COCO 2017: Object Detection
21 | Neural input compression followed by Faster R-CNN with ResNet-50 and FPN
22 | 
23 | ```shell
24 | bash script/neural_input_compression/coco2017-object_detection.sh factorized_prior-faster_rcnn_resnet50_fpn 8
25 | bash script/neural_input_compression/coco2017-object_detection.sh scale_hyperprior-faster_rcnn_resnet50_fpn 8
26 | bash script/neural_input_compression/coco2017-object_detection.sh mean_scale_hyperprior-faster_rcnn_resnet50_fpn 8
27 | bash script/neural_input_compression/coco2017-object_detection.sh joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn 8
28 | ```
29 | 
30 | ## PASCAL VOC 2012: Semantic Segmentation
31 | Neural input compression followed by DeepLabv3 with ResNet-50
32 | 
33 | ```shell
34 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh factorized_prior-deeplabv3_resnet50 8
35 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh scale_hyperprior-deeplabv3_resnet50 8
36 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh mean_scale_hyperprior-deeplabv3_resnet50 8
37 | bash script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh joint_autoregressive_hierarchical_prior-deeplabv3_resnet50 8
38 | ```
39 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/joint_autoregressive_hierarchical_prior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'Resize'
13 |               params:
14 |                 size: 256
15 |             - type: 'CenterCrop'
16 |               params:
17 |                 size: &input_size [224, 224]
18 |             - type: 'ToTensor'
19 |               params:
20 |             - type: 'AdaptivePad'
21 |               params:
22 |                 fill: 0
23 |                 factor: 64
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionClassifier'
28 |     params:
29 |       post_transform_params:
30 |         - type: 'CenterCrop'
31 |           params:
32 |             size: *input_size
33 |         - type: 'Normalize'
34 |           params:
35 |             mean: [0.485, 0.456, 0.406]
36 |             std: [0.229, 0.224, 0.225]
37 |       analysis_config:
38 |         analyzes_after_compress: True
39 |         analyzer_configs:
40 |           - type: 'FileSizeAnalyzer'
41 |             params:
42 |               unit: 'KB'
43 |       uses_cpu4compression_model: True
44 |     compression_model:
45 |       name: 'mbt2018'
46 |       params:
47 |         pretrained: True
48 |         quality: 8
49 |         metric: 'mse'
50 |       ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
51 |     classification_model:
52 |       name: &model_name 'resnet50'
53 |       params:
54 |         num_classes: 1000
55 |         pretrained: True
56 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
57 |       ckpt: !join ['./resource/ckpt/', *experiment, '.pt']
58 | 
59 | test:
60 |   test_data_loader:
61 |     dataset_id: *imagenet_val
62 |     random_sample: False
63 |     batch_size: 1
64 |     num_workers: 16
65 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: False
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecInputCompressionSegmentationModel'
28 |     params:
29 |       codec_params:
30 |         - type: 'BPGModule'
31 |           params:
32 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
33 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
34 |             quality: 50
35 |             returns_file_size: True
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |       post_transform_params:
42 |         - type: 'ToTensor'
43 |           params:
44 |         - type: 'Normalize'
45 |           params:
46 |             mean: [0.485, 0.456, 0.406]
47 |             std: [0.229, 0.224, 0.225]
48 |     segmentation_model:
49 |       name: 'deeplabv3_resnet50'
50 |       params:
51 |         pretrained: True
52 |         pretrained_backbone: True
53 |         num_classes: 21
54 |         aux_loss: True
55 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
56 | 
57 | test:
58 |   test_data_loader:
59 |     dataset_id: *pascal_val
60 |     random_sample: False
61 |     batch_size: 1
62 |     num_workers: 16
63 |     collate_fn: 'pascal_seg_eval_collate_fn'
64 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: False
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'CodecInputCompressionSegmentationModel'
28 |     params:
29 |       codec_params:
30 |         - type: 'BPGModule'
31 |           params:
32 |             encoder_path: '~/software/libbpg-0.9.8/bpgenc'
33 |             decoder_path: '~/software/libbpg-0.9.8/bpgdec'
34 |             quality: 50
35 |             returns_file_size: True
36 |       analysis_config:
37 |         analyzer_configs:
38 |           - type: 'FileSizeAccumulator'
39 |             params:
40 |               unit: 'KB'
41 |       post_transform_params:
42 |         - type: 'ToTensor'
43 |           params:
44 |         - type: 'Normalize'
45 |           params:
46 |             mean: [0.485, 0.456, 0.406]
47 |             std: [0.229, 0.224, 0.225]
48 |     segmentation_model:
49 |       name: 'deeplabv3_resnet101'
50 |       params:
51 |         pretrained: True
52 |         pretrained_backbone: True
53 |         num_classes: 21
54 |         aux_loss: True
55 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
56 | 
57 | test:
58 |   test_data_loader:
59 |     dataset_id: *pascal_val
60 |     random_sample: False
61 |     batch_size: 1
62 |     num_workers: 16
63 |     collate_fn: 'pascal_seg_eval_collate_fn'
64 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/factorized_prior-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 833
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: &input_size [800, 800]
19 |             - type: 'ToTensor'
20 |               params:
21 |             - type: 'AdaptivePad'
22 |               params:
23 |                 fill: 0
24 |                 factor: 64
25 | 
26 | models:
27 |   model:
28 |     name: 'NeuralInputCompressionClassifier'
29 |     params:
30 |       post_transform_params:
31 |         - type: 'CenterCrop'
32 |           params:
33 |             size: *input_size
34 |         - type: 'Normalize'
35 |           params:
36 |             mean: [0.485, 0.456, 0.406]
37 |             std: [0.229, 0.224, 0.225]
38 |       analysis_config:
39 |         analyzes_after_compress: True
40 |         analyzer_configs:
41 |           - type: 'FileSizeAnalyzer'
42 |             params:
43 |               unit: 'KB'
44 |     compression_model:
45 |       name: 'bmshj2018_factorized'
46 |       params:
47 |         pretrained: True
48 |         quality: 8
49 |         metric: 'mse'
50 |       ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
51 |     classification_model:
52 |       name: &model_name 'tf_efficientnet_l2_ns'
53 |       repo_or_dir: 'rwightman/pytorch-image-models'
54 |       params:
55 |         num_classes: 1000
56 |         pretrained: True
57 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
58 |       ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 | 
60 | test:
61 |   test_data_loader:
62 |     dataset_id: *imagenet_val
63 |     random_sample: False
64 |     batch_size: 1
65 |     num_workers: 16
66 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 833
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: &input_size [800, 800]
19 |             - type: 'ToTensor'
20 |               params:
21 |             - type: 'AdaptivePad'
22 |               params:
23 |                 fill: 0
24 |                 factor: 64
25 | 
26 | models:
27 |   model:
28 |     name: 'NeuralInputCompressionClassifier'
29 |     params:
30 |       post_transform_params:
31 |         - type: 'CenterCrop'
32 |           params:
33 |             size: *input_size
34 |         - type: 'Normalize'
35 |           params:
36 |             mean: [0.485, 0.456, 0.406]
37 |             std: [0.229, 0.224, 0.225]
38 |       analysis_config:
39 |         analyzes_after_compress: True
40 |         analyzer_configs:
41 |           - type: 'FileSizeAnalyzer'
42 |             params:
43 |               unit: 'KB'
44 |     compression_model:
45 |       name: 'mbt2018_mean'
46 |       params:
47 |         pretrained: True
48 |         quality: 8
49 |         metric: 'mse'
50 |       ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
51 |     classification_model:
52 |       name: &model_name 'tf_efficientnet_l2_ns'
53 |       repo_or_dir: 'rwightman/pytorch-image-models'
54 |       params:
55 |         num_classes: 1000
56 |         pretrained: True
57 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
58 |       ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 | 
60 | test:
61 |   test_data_loader:
62 |     dataset_id: *imagenet_val
63 |     random_sample: False
64 |     batch_size: 1
65 |     num_workers: 16
66 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 833
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: &input_size [800, 800]
19 |             - type: 'ToTensor'
20 |               params:
21 |             - type: 'AdaptivePad'
22 |               params:
23 |                 fill: 0
24 |                 factor: 64
25 | 
26 | models:
27 |   model:
28 |     name: 'NeuralInputCompressionClassifier'
29 |     params:
30 |       post_transform_params:
31 |         - type: 'CenterCrop'
32 |           params:
33 |             size: *input_size
34 |         - type: 'Normalize'
35 |           params:
36 |             mean: [0.485, 0.456, 0.406]
37 |             std: [0.229, 0.224, 0.225]
38 |       analysis_config:
39 |         analyzes_after_compress: True
40 |         analyzer_configs:
41 |           - type: 'FileSizeAnalyzer'
42 |             params:
43 |               unit: 'KB'
44 |     compression_model:
45 |       name: 'bmshj2018_hyperprior'
46 |       params:
47 |         pretrained: True
48 |         quality: 8
49 |         metric: 'mse'
50 |       ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
51 |     classification_model:
52 |       name: &model_name 'tf_efficientnet_l2_ns'
53 |       repo_or_dir: 'rwightman/pytorch-image-models'
54 |       params:
55 |         num_classes: 1000
56 |         pretrained: True
57 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
58 |       ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 | 
60 | test:
61 |   test_data_loader:
62 |     dataset_id: *imagenet_val
63 |     random_sample: False
64 |     batch_size: 1
65 |     num_workers: 16
66 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/scale_hyperprior-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 507
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: &input_size [475, 475]
19 |             - type: 'ToTensor'
20 |               params:
21 |             - type: 'AdaptivePad'
22 |               params:
23 |                 fill: 0
24 |                 factor: 64
25 | 
26 | models:
27 |   model:
28 |     name: 'NeuralInputCompressionClassifier'
29 |     params:
30 |       post_transform_params:
31 |         - type: 'CenterCrop'
32 |           params:
33 |             size: *input_size
34 |         - type: 'Normalize'
35 |           params:
36 |             mean: [0.485, 0.456, 0.406]
37 |             std: [0.229, 0.224, 0.225]
38 |       analysis_config:
39 |         analyzes_after_compress: True
40 |         analyzer_configs:
41 |           - type: 'FileSizeAnalyzer'
42 |             params:
43 |               unit: 'KB'
44 |     compression_model:
45 |       name: 'bmshj2018_hyperprior'
46 |       params:
47 |         pretrained: True
48 |         quality: 8
49 |         metric: 'mse'
50 |       ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
51 |     classification_model:
52 |       name: &model_name 'tf_efficientnet_l2_ns'
53 |       repo_or_dir: 'rwightman/pytorch-image-models'
54 |       params:
55 |         num_classes: 1000
56 |         pretrained: True
57 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
58 |       ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 | 
60 | test:
61 |   test_data_loader:
62 |     dataset_id: *imagenet_val
63 |     random_sample: False
64 |     batch_size: 1
65 |     num_workers: 16
66 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/factorized_prior-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 507
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: &input_size [475, 475]
19 |             - type: 'ToTensor'
20 |               params:
21 |             - type: 'AdaptivePad'
22 |               params:
23 |                 fill: 0
24 |                 factor: 64
25 | 
26 | models:
27 |   model:
28 |     name: 'NeuralInputCompressionClassifier'
29 |     params:
30 |       post_transform_params:
31 |         - type: 'CenterCrop'
32 |           params:
33 |             size: *input_size
34 |         - type: 'Normalize'
35 |           params:
36 |             mean: [0.485, 0.456, 0.406]
37 |             std: [0.229, 0.224, 0.225]
38 |       analysis_config:
39 |         analyzes_after_compress: True
40 |         analyzer_configs:
41 |           - type: 'FileSizeAnalyzer'
42 |             params:
43 |               unit: 'KB'
44 |     compression_model:
45 |       name: 'bmshj2018_factorized'
46 |       params:
47 |         pretrained: True
48 |         quality: 8
49 |         metric: 'mse'
50 |       ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
51 |     classification_model:
52 |       name: &model_name 'tf_efficientnet_l2_ns_475'
53 |       repo_or_dir: 'rwightman/pytorch-image-models'
54 |       params:
55 |         num_classes: 1000
56 |         pretrained: True
57 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
58 |       ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 | 
60 | test:
61 |   test_data_loader:
62 |     dataset_id: *imagenet_val
63 |     random_sample: False
64 |     batch_size: 1
65 |     num_workers: 16
66 | 


--------------------------------------------------------------------------------
/legacy/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   ilsvrc2012:
 3 |     name: &dataset_name 'ilsvrc2012'
 4 |     type: 'ImageFolder'
 5 |     root: &root_dir !join ['~/dataset/', *dataset_name]
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &imagenet_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: !join [*root_dir, '/val']
11 |           transform_params:
12 |             - type: 'WrappedResize'
13 |               params:
14 |                 size: 507
15 |                 interpolation: 'bicubic'
16 |             - type: 'CenterCrop'
17 |               params:
18 |                 size: &input_size [475, 475]
19 |             - type: 'ToTensor'
20 |               params:
21 |             - type: 'AdaptivePad'
22 |               params:
23 |                 fill: 0
24 |                 factor: 64
25 | 
26 | models:
27 |   model:
28 |     name: 'NeuralInputCompressionClassifier'
29 |     params:
30 |       post_transform_params:
31 |         - type: 'CenterCrop'
32 |           params:
33 |             size: *input_size
34 |         - type: 'Normalize'
35 |           params:
36 |             mean: [0.485, 0.456, 0.406]
37 |             std: [0.229, 0.224, 0.225]
38 |       analysis_config:
39 |         analyzes_after_compress: True
40 |         analyzer_configs:
41 |           - type: 'FileSizeAnalyzer'
42 |             params:
43 |               unit: 'KB'
44 |     compression_model:
45 |       name: 'mbt2018_mean'
46 |       params:
47 |         pretrained: True
48 |         quality: 8
49 |         metric: 'mse'
50 |       ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
51 |     classification_model:
52 |       name: &model_name 'tf_efficientnet_l2_ns_475'
53 |       repo_or_dir: 'rwightman/pytorch-image-models'
54 |       params:
55 |         num_classes: 1000
56 |         pretrained: True
57 |       experiment: &experiment !join [*dataset_name, '-', *model_name]
58 |       ckpt: !join ['./imagenet/vanilla/', *experiment, '.pt']
59 | 
60 | test:
61 |   test_data_loader:
62 |     dataset_id: *imagenet_val
63 |     random_sample: False
64 |     batch_size: 1
65 |     num_workers: 16
66 | 


--------------------------------------------------------------------------------
/legacy/script/neural_input_compression/README.md:
--------------------------------------------------------------------------------
 1 | # Neural Input Compression Baselines
 2 | 
 3 | We considered the following neural image compression models:
 4 | - Factorized Prior
 5 | - Scale Hyperprior
 6 | - Mean-scale Hyperprior
 7 | - Joint Autoregressive Hierarchical Prior
 8 | 
 9 | 
10 | ## ImageNet (ILSVRC 2012): Image Classification
11 | Neural input compression followed by ResNet-50
12 | 
13 | ```shell
14 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh factorized_prior-resnet50 8
15 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh scale_hyperprior-resnet50 8
16 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh mean_scale_hyperprior-resnet50 8
17 | bash legacy/script/neural_input_compression/ilsvrc2012-image_classification.sh joint_autoregressive_hierarchical_prior-resnet50 8
18 | ```
19 | 
20 | ## COCO 2017: Object Detection
21 | Neural input compression followed by Faster R-CNN with ResNet-50 and FPN
22 | 
23 | ```shell
24 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh factorized_prior-faster_rcnn_resnet50_fpn 8
25 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh scale_hyperprior-faster_rcnn_resnet50_fpn 8
26 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh mean_scale_hyperprior-faster_rcnn_resnet50_fpn 8
27 | bash legacy/script/neural_input_compression/coco2017-object_detection.sh joint_autoregressive_hierarchical_prior-faster_rcnn_resnet50_fpn 8
28 | ```
29 | 
30 | ## PASCAL VOC 2012: Semantic Segmentation
31 | Neural input compression followed by DeepLabv3 with ResNet-50
32 | 
33 | ```shell
34 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh factorized_prior-deeplabv3_resnet50 8
35 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh scale_hyperprior-deeplabv3_resnet50 8
36 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh mean_scale_hyperprior-deeplabv3_resnet50 8
37 | bash legacy/script/neural_input_compression/pascal_voc2012-semantic_segmentation.sh joint_autoregressive_hierarchical_prior-deeplabv3_resnet50 8
38 | ```
39 | 


--------------------------------------------------------------------------------
/sc2bench/transforms/collator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchdistill.datasets.registry import register_collate_func
 3 | 
 4 | 
 5 | def cat_list(images, fill_value=0):
 6 |     """
 7 |     Concatenates a list of images with the max size for each of heights and widths and
 8 |     fills empty spaces with a specified value.
 9 | 
10 |     :param images: batch tensor
11 |     :type images: torch.Tensor
12 |     :param fill_value: value to be filled
13 |     :type fill_value: int
14 |     :return: backbone model
15 |     :rtype: torch.Tensor
16 |     """
17 |     if len(images) == 1 and not isinstance(images[0], torch.Tensor):
18 |         return images
19 | 
20 |     max_size = tuple(max(s) for s in zip(*[img.shape for img in images]))
21 |     batch_shape = (len(images),) + max_size
22 |     batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
23 |     for img, pad_img in zip(images, batched_imgs):
24 |         pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
25 |     return batched_imgs
26 | 
27 | 
28 | @register_collate_func
29 | def pascal_seg_collate_fn(batch):
30 |     """
31 |     Collates input data for PASCAL VOC 2012 segmentation.
32 | 
33 |     :param batch: list/tuple of triplets (image, target, supp_dict), where supp_dict can be an empty dict
34 |     :type batch: list or tuple
35 |     :return: collated images, targets, and supplementary dicts
36 |     :rtype: (torch.Tensor, tensor.Tensor, list[dict])
37 |     """
38 |     images, targets, supp_dicts = list(zip(*batch))
39 |     batched_imgs = cat_list(images, fill_value=0)
40 |     batched_targets = cat_list(targets, fill_value=255)
41 |     return batched_imgs, batched_targets, supp_dicts
42 | 
43 | 
44 | @register_collate_func
45 | def pascal_seg_eval_collate_fn(batch):
46 |     """
47 |     Collates input data for PASCAL VOC 2012 segmentation in evaluation
48 | 
49 |     :param batch: list/tuple of tuples (image, target)
50 |     :type batch: list or tuple
51 |     :return: collated images and targets
52 |     :rtype: (torch.Tensor, tensor.Tensor)
53 |     """
54 |     images, targets = list(zip(*batch))
55 |     batched_imgs = cat_list(images, fill_value=0)
56 |     batched_targets = cat_list(targets, fill_value=255)
57 |     return batched_imgs, batched_targets
58 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/factorized_prior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     compression_model:
48 |       name: 'bmshj2018_factorized'
49 |       params:
50 |         pretrained: True
51 |         quality: 8
52 |         metric: 'mse'
53 |       ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
54 |     segmentation_model:
55 |       name: 'deeplabv3_resnet50'
56 |       params:
57 |         pretrained: True
58 |         pretrained_backbone: True
59 |         num_classes: 21
60 |         aux_loss: True
61 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
62 | 
63 | test:
64 |   test_data_loader:
65 |     dataset_id: *pascal_val
66 |     random_sample: False
67 |     batch_size: 1
68 |     num_workers: 16
69 |     collate_fn: 'pascal_seg_eval_collate_fn'
70 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/scale_hyperprior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     compression_model:
48 |       name: 'bmshj2018_hyperprior'
49 |       params:
50 |         pretrained: True
51 |         quality: 8
52 |         metric: 'mse'
53 |       ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
54 |     segmentation_model:
55 |       name: 'deeplabv3_resnet50'
56 |       params:
57 |         pretrained: True
58 |         pretrained_backbone: True
59 |         num_classes: 21
60 |         aux_loss: True
61 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
62 | 
63 | test:
64 |   test_data_loader:
65 |     dataset_id: *pascal_val
66 |     random_sample: False
67 |     batch_size: 1
68 |     num_workers: 16
69 |     collate_fn: 'pascal_seg_eval_collate_fn'
70 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/factorized_prior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     compression_model:
48 |       name: 'bmshj2018_factorized'
49 |       params:
50 |         pretrained: True
51 |         quality: 8
52 |         metric: 'mse'
53 |       ckpt: './resource/ckpt/input_compression/factorized_prior.pt'
54 |     segmentation_model:
55 |       name: 'deeplabv3_resnet101'
56 |       params:
57 |         pretrained: True
58 |         pretrained_backbone: True
59 |         num_classes: 21
60 |         aux_loss: True
61 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
62 | 
63 | test:
64 |   test_data_loader:
65 |     dataset_id: *pascal_val
66 |     random_sample: False
67 |     batch_size: 1
68 |     num_workers: 16
69 |     collate_fn: 'pascal_seg_eval_collate_fn'
70 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/mean_scale_hyperprior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     compression_model:
48 |       name: 'mbt2018_mean'
49 |       params:
50 |         pretrained: True
51 |         quality: 8
52 |         metric: 'mse'
53 |       ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
54 |     segmentation_model:
55 |       name: 'deeplabv3_resnet101'
56 |       params:
57 |         pretrained: True
58 |         pretrained_backbone: True
59 |         num_classes: 21
60 |         aux_loss: True
61 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
62 | 
63 | test:
64 |   test_data_loader:
65 |     dataset_id: *pascal_val
66 |     random_sample: False
67 |     batch_size: 1
68 |     num_workers: 16
69 |     collate_fn: 'pascal_seg_eval_collate_fn'
70 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/mean_scale_hyperprior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     compression_model:
48 |       name: 'mbt2018_mean'
49 |       params:
50 |         pretrained: True
51 |         quality: 8
52 |         metric: 'mse'
53 |       ckpt: './resource/ckpt/input_compression/mean_scale_hyperprior.pt'
54 |     segmentation_model:
55 |       name: 'deeplabv3_resnet50'
56 |       params:
57 |         pretrained: True
58 |         pretrained_backbone: True
59 |         num_classes: 21
60 |         aux_loss: True
61 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
62 | 
63 | test:
64 |   test_data_loader:
65 |     dataset_id: *pascal_val
66 |     random_sample: False
67 |     batch_size: 1
68 |     num_workers: 16
69 |     collate_fn: 'pascal_seg_eval_collate_fn'
70 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/scale_hyperprior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |     compression_model:
48 |       name: 'bmshj2018_hyperprior'
49 |       params:
50 |         pretrained: True
51 |         quality: 8
52 |         metric: 'mse'
53 |       ckpt: './resource/ckpt/input_compression/scale_hyperprior.pt'
54 |     segmentation_model:
55 |       name: 'deeplabv3_resnet101'
56 |       params:
57 |         pretrained: True
58 |         pretrained_backbone: True
59 |         num_classes: 21
60 |         aux_loss: True
61 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
62 | 
63 | test:
64 |   test_data_loader:
65 |     dataset_id: *pascal_val
66 |     random_sample: False
67 |     batch_size: 1
68 |     num_workers: 16
69 |     collate_fn: 'pascal_seg_eval_collate_fn'
70 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/joint_autoregressive_hierarchical_prior-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |       uses_cpu4compression_model: True
48 |     compression_model:
49 |       name: 'mbt2018'
50 |       params:
51 |         pretrained: True
52 |         quality: 8
53 |         metric: 'mse'
54 |       ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
55 |     segmentation_model:
56 |       name: 'deeplabv3_resnet101'
57 |       params:
58 |         pretrained: True
59 |         pretrained_backbone: True
60 |         num_classes: 21
61 |         aux_loss: True
62 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
63 | 
64 | test:
65 |   test_data_loader:
66 |     dataset_id: *pascal_val
67 |     random_sample: False
68 |     batch_size: 1
69 |     num_workers: 16
70 |     collate_fn: 'pascal_seg_eval_collate_fn'
71 | 


--------------------------------------------------------------------------------
/legacy/configs/pascal_voc2012/input_compression/joint_autoregressive_hierarchical_prior-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   pascal_voc:
 3 |     name: &dataset_name 'pascal_voc2012'
 4 |     type: 'VOCSegmentation'
 5 |     root: &root_dir '~/dataset'
 6 |     splits:
 7 |       val:
 8 |         dataset_id: &pascal_val !join [*dataset_name, '/val']
 9 |         params:
10 |           root: *root_dir
11 |           image_set: 'val'
12 |           year: '2012'
13 |           download: False
14 |           transforms_compose_cls: 'CustomCompose'
15 |           transforms_params: &val_transform
16 |             - type: 'CustomRandomResize'
17 |               params:
18 |                 min_size: 513
19 |                 max_size: 513
20 |             - type: 'CustomToTensor'
21 |               params:
22 |                 converts_sample: True
23 |                 converts_target: True
24 | 
25 | models:
26 |   model:
27 |     name: 'NeuralInputCompressionSegmentationModel'
28 |     params:
29 |       pre_transform_params:
30 |         - type: 'AdaptivePad'
31 |           params:
32 |             padding_position: 'right_bottom'
33 |             returns_org_patch_size: True
34 |             fill: 0
35 |             factor: 64
36 |       analysis_config:
37 |         analyzes_after_compress: True
38 |         analyzer_configs:
39 |           - type: 'FileSizeAnalyzer'
40 |             params:
41 |               unit: 'KB'
42 |       post_transform_params:
43 |         - type: 'Normalize'
44 |           params:
45 |             mean: [0.485, 0.456, 0.406]
46 |             std: [0.229, 0.224, 0.225]
47 |       uses_cpu4compression_model: True
48 |     compression_model:
49 |       name: 'mbt2018'
50 |       params:
51 |         pretrained: True
52 |         quality: 8
53 |         metric: 'mse'
54 |       ckpt: './resource/ckpt/input_compression/joint_autoregressive_hierarchical_prior.pt'
55 |     segmentation_model:
56 |       name: 'deeplabv3_resnet50'
57 |       params:
58 |         pretrained: True
59 |         pretrained_backbone: True
60 |         num_classes: 21
61 |         aux_loss: True
62 |       ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
63 | 
64 | test:
65 |   test_data_loader:
66 |     dataset_id: *pascal_val
67 |     random_sample: False
68 |     batch_size: 1
69 |     num_workers: 16
70 |     collate_fn: 'pascal_seg_eval_collate_fn'
71 | 


--------------------------------------------------------------------------------
/legacy/script/README.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | Download and preprocess datasets before you run experiments.  
 4 | Here, we provide three examples: ImageNet (ILSVRC 2012), COCO 2017, and PASCAL VOC 2012.
 5 | 
 6 | ## 1. ImageNet (ILSVRC 2012): Image Classification
 7 | ### 1.1 Download the datasets
 8 | As the terms of use do not allow to distribute the URLs, you will have to create an account [here](http://image-net.org/download) to get the URLs, and replace `${TRAIN_DATASET_URL}` and `${VAL_DATASET_URL}` with them.
 9 | ```shell
10 | wget ${TRAIN_DATASET_URL} ./
11 | wget ${VAL_DATASET_URL} ./
12 | ```
13 | 
14 | ### 1.2 Untar and extract files
15 | ```shell
16 | # Go to the root of this repository
17 | mkdir ~/dataset/ilsvrc2012/{train,val} -p
18 | mv ILSVRC2012_img_train.tar ~/dataset/ilsvrc2012/train/
19 | mv ILSVRC2012_img_val.tar ~/dataset/ilsvrc2012/val/
20 | cd ~/dataset/ilsvrc2012/train/
21 | tar -xvf ILSVRC2012_img_train.tar
22 | mv ILSVRC2012_img_train.tar ../
23 | for f in *.tar; do
24 |   d=`basename $f .tar`
25 |   mkdir $d
26 |   (cd $d && tar xf ../$f)
27 | done
28 | rm -r *.tar
29 | cd ../../../../
30 | 
31 | wget https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
32 | mv valprep.sh ~/dataset/ilsvrc2012/val/
33 | cd ~/dataset/ilsvrc2012/val/
34 | tar -xvf ILSVRC2012_img_val.tar
35 | mv ILSVRC2012_img_val.tar ../
36 | sh valprep.sh
37 | mv valprep.sh ../
38 | cd ../../../../
39 | ```
40 | 
41 | 
42 | ## 2. COCO 2017: Object Detection
43 | ### 2.1 Download the datasets
44 | ```shell
45 | wget http://images.cocodataset.org/zips/train2017.zip ./
46 | wget http://images.cocodataset.org/zips/val2017.zip ./
47 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip ./
48 | ```
49 | 
50 | ### 2.2 Unzip and extract files
51 | ```shell
52 | # Go to the root of this repository
53 | mkdir ~/dataset/coco2017/ -p
54 | mv train2017.zip ~/dataset/coco2017/
55 | mv val2017.zip ~/dataset/coco2017/
56 | mv annotations_trainval2017.zip ~/dataset/coco2017/
57 | cd ~/dataset/coco2017/
58 | unzip train2017.zip
59 | unzip val2017.zip
60 | unzip annotations_trainval2017.zip
61 | cd ../../../
62 | ```
63 | 
64 | 
65 | ## 3. PASCAL VOC 2012: Semantic Segmentation
66 | You can skip Steps 3.1 and 3.2 by replacing `download: False` in a yaml config file with `download: True`.
67 | 
68 | ### 3.1 Download the datasets
69 | ```shell
70 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
71 | ```
72 | 
73 | ### 3.2 Untar and extract files
74 | ```shell
75 | # Go to the root of this repository
76 | mkdir ~/dataset/ -p
77 | mv VOCtrainval_11-May-2012.tar ~/dataset/
78 | cd ~/dataset/
79 | tar -xvf ILSVRC2012_img_val.tar
80 | cd ../../
81 | ```
82 | 


--------------------------------------------------------------------------------
/script/README.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | Download and preprocess datasets before you run experiments.  
 4 | Here, we provide three examples: ImageNet (ILSVRC 2012), COCO 2017, and PASCAL VOC 2012.
 5 | 
 6 | ## 1. ImageNet (ILSVRC 2012): Image Classification
 7 | ### 1.1 Download the datasets
 8 | As the terms of use do not allow to distribute the URLs, you will have to create an account [here](http://image-net.org/download) to get the URLs, and replace `${TRAIN_DATASET_URL}` and `${VAL_DATASET_URL}` with them.
 9 | ```shell
10 | wget ${TRAIN_DATASET_URL} ./
11 | wget ${VAL_DATASET_URL} ./
12 | ```
13 | 
14 | ### 1.2 Untar and extract files
15 | ```shell
16 | # Go to the root of this repository
17 | mkdir ~/datasets/ilsvrc2012/{train,val} -p
18 | mv ILSVRC2012_img_train.tar ~/datasets/ilsvrc2012/train/
19 | mv ILSVRC2012_img_val.tar ~/datasets/ilsvrc2012/val/
20 | cd ~/datasets/ilsvrc2012/train/
21 | tar -xvf ILSVRC2012_img_train.tar
22 | mv ILSVRC2012_img_train.tar ../
23 | for f in *.tar; do
24 |   d=`basename $f .tar`
25 |   mkdir $d
26 |   (cd $d && tar xf ../$f)
27 | done
28 | rm -r *.tar
29 | cd ../../../../
30 | 
31 | wget https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh
32 | mv valprep.sh ~/datasets/ilsvrc2012/val/
33 | cd ~/datasets/ilsvrc2012/val/
34 | tar -xvf ILSVRC2012_img_val.tar
35 | mv ILSVRC2012_img_val.tar ../
36 | sh valprep.sh
37 | mv valprep.sh ../
38 | cd ../../../../
39 | ```
40 | 
41 | 
42 | ## 2. COCO 2017: Object Detection
43 | ### 2.1 Download the datasets
44 | ```shell
45 | wget http://images.cocodataset.org/zips/train2017.zip ./
46 | wget http://images.cocodataset.org/zips/val2017.zip ./
47 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip ./
48 | ```
49 | 
50 | ### 2.2 Unzip and extract files
51 | ```shell
52 | # Go to the root of this repository
53 | mkdir ~/datasets/coco2017/ -p
54 | mv train2017.zip ~/datasets/coco2017/
55 | mv val2017.zip ~/datasets/coco2017/
56 | mv annotations_trainval2017.zip ~/datasets/coco2017/
57 | cd ~/datasets/coco2017/
58 | unzip train2017.zip
59 | unzip val2017.zip
60 | unzip annotations_trainval2017.zip
61 | cd ../../../
62 | ```
63 | 
64 | 
65 | ## 3. PASCAL VOC 2012: Semantic Segmentation
66 | You can skip Steps 3.1 and 3.2 by replacing `download: False` in a yaml config file with `download: True`.
67 | 
68 | ### 3.1 Download the datasets
69 | ```shell
70 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
71 | ```
72 | 
73 | ### 3.2 Untar and extract files
74 | ```shell
75 | # Go to the root of this repository
76 | mkdir ~/datasets/ -p
77 | mv VOCtrainval_11-May-2012.tar ~/datasets/
78 | cd ~/datasets/
79 | tar -xvf ILSVRC2012_img_val.tar
80 | cd ../../
81 | ```
82 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.PILImageModule'
36 |                 init:
37 |                   kwargs:
38 |                     format: 'JPEG'
39 |                     quality: 90
40 |                     returns_file_size: True
41 |       post_transform: !import_call
42 |         key: 'torchvision.transforms.Compose'
43 |         init:
44 |           kwargs:
45 |             transforms:
46 |               - !import_call
47 |                 key: 'torchvision.transforms.ToTensor'
48 |                 init:
49 |               - !import_call
50 |                 key: 'torchvision.transforms.Normalize'
51 |                 init:
52 |                   kwargs:
53 |                     mean: [0.485, 0.456, 0.406]
54 |                     std: [0.229, 0.224, 0.225]
55 |       analysis_config:
56 |         analyzer_configs:
57 |           - key: 'FileSizeAccumulator'
58 |             kwargs:
59 |               unit: 'KB'
60 |     classification_model:
61 |       key: 'resnet50'
62 |       _weights: &model_weights_enum !import_get
63 |         key: 'torchvision.models.resnet.ResNet50_Weights'
64 |       kwargs:
65 |         num_classes: 1000
66 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
67 | 
68 | test:
69 |   test_data_loader:
70 |     dataset_id: *imagenet_val
71 |     collate_fn: 'default_collate_w_pil'
72 |     sampler:
73 |       class_or_func: !import_get
74 |         key: 'torch.utils.data.SequentialSampler'
75 |       kwargs:
76 |     kwargs:
77 |       batch_size: 1
78 |       num_workers: 16
79 |       drop_last: False
80 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.PILImageModule'
36 |                 init:
37 |                   kwargs:
38 |                     format: 'WEBP'
39 |                     quality: 90
40 |                     returns_file_size: True
41 |       post_transform: !import_call
42 |         key: 'torchvision.transforms.Compose'
43 |         init:
44 |           kwargs:
45 |             transforms:
46 |               - !import_call
47 |                 key: 'torchvision.transforms.ToTensor'
48 |                 init:
49 |               - !import_call
50 |                 key: 'torchvision.transforms.Normalize'
51 |                 init:
52 |                   kwargs:
53 |                     mean: [0.485, 0.456, 0.406]
54 |                     std: [0.229, 0.224, 0.225]
55 |       analysis_config:
56 |         analyzer_configs:
57 |           - key: 'FileSizeAccumulator'
58 |             kwargs:
59 |               unit: 'KB'
60 |     classification_model:
61 |       key: 'resnet50'
62 |       _weights: &model_weights_enum !import_get
63 |         key: 'torchvision.models.resnet.ResNet50_Weights'
64 |       kwargs:
65 |         num_classes: 1000
66 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
67 | 
68 | test:
69 |   test_data_loader:
70 |     dataset_id: *imagenet_val
71 |     collate_fn: 'default_collate_w_pil'
72 |     sampler:
73 |       class_or_func: !import_get
74 |         key: 'torch.utils.data.SequentialSampler'
75 |       kwargs:
76 |     kwargs:
77 |       batch_size: 1
78 |       num_workers: 16
79 |       drop_last: False
80 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.PILImageModule'
36 |                 init:
37 |                   kwargs:
38 |                     format: 'JPEG'
39 |                     quality: 90
40 |                     returns_file_size: True
41 |       post_transform: !import_call
42 |         key: 'torchvision.transforms.Compose'
43 |         init:
44 |           kwargs:
45 |             transforms:
46 |               - !import_call
47 |                 key: 'torchvision.transforms.ToTensor'
48 |                 init:
49 |               - !import_call
50 |                 key: 'torchvision.transforms.Normalize'
51 |                 init:
52 |                   kwargs:
53 |                     mean: [0.485, 0.456, 0.406]
54 |                     std: [0.229, 0.224, 0.225]
55 |       analysis_config:
56 |         analyzer_configs:
57 |           - key: 'FileSizeAccumulator'
58 |             kwargs:
59 |               unit: 'KB'
60 |     classification_model:
61 | 
62 |       key: 'resnet101'
63 |       _weights: &model_weights_enum !import_get
64 |         key: 'torchvision.models.resnet.ResNet101_Weights'
65 |       kwargs:
66 |         num_classes: 1000
67 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 | 
69 | test:
70 |   test_data_loader:
71 |     dataset_id: *imagenet_val
72 |     collate_fn: 'default_collate_w_pil'
73 |     sampler:
74 |       class_or_func: !import_get
75 |         key: 'torch.utils.data.SequentialSampler'
76 |       kwargs:
77 |     kwargs:
78 |       batch_size: 1
79 |       num_workers: 16
80 |       drop_last: False
81 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-resnet152.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.PILImageModule'
36 |                 init:
37 |                   kwargs:
38 |                     format: 'JPEG'
39 |                     quality: 90
40 |                     returns_file_size: True
41 |       post_transform: !import_call
42 |         key: 'torchvision.transforms.Compose'
43 |         init:
44 |           kwargs:
45 |             transforms:
46 |               - !import_call
47 |                 key: 'torchvision.transforms.ToTensor'
48 |                 init:
49 |               - !import_call
50 |                 key: 'torchvision.transforms.Normalize'
51 |                 init:
52 |                   kwargs:
53 |                     mean: [0.485, 0.456, 0.406]
54 |                     std: [0.229, 0.224, 0.225]
55 |       analysis_config:
56 |         analyzer_configs:
57 |           - key: 'FileSizeAccumulator'
58 |             kwargs:
59 |               unit: 'KB'
60 |     classification_model:
61 | 
62 |       key: 'resnet152'
63 |       _weights: &model_weights_enum !import_get
64 |         key: 'torchvision.models.resnet.ResNet152_Weights'
65 |       kwargs:
66 |         num_classes: 1000
67 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 | 
69 | test:
70 |   test_data_loader:
71 |     dataset_id: *imagenet_val
72 |     collate_fn: 'default_collate_w_pil'
73 |     sampler:
74 |       class_or_func: !import_get
75 |         key: 'torch.utils.data.SequentialSampler'
76 |       kwargs:
77 |     kwargs:
78 |       batch_size: 1
79 |       num_workers: 16
80 |       drop_last: False
81 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/webp-resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.PILImageModule'
36 |                 init:
37 |                   kwargs:
38 |                     format: 'WEBP'
39 |                     quality: 90
40 |                     returns_file_size: True
41 |       post_transform: !import_call
42 |         key: 'torchvision.transforms.Compose'
43 |         init:
44 |           kwargs:
45 |             transforms:
46 |               - !import_call
47 |                 key: 'torchvision.transforms.ToTensor'
48 |                 init:
49 |               - !import_call
50 |                 key: 'torchvision.transforms.Normalize'
51 |                 init:
52 |                   kwargs:
53 |                     mean: [0.485, 0.456, 0.406]
54 |                     std: [0.229, 0.224, 0.225]
55 |       analysis_config:
56 |         analyzer_configs:
57 |           - key: 'FileSizeAccumulator'
58 |             kwargs:
59 |               unit: 'KB'
60 |     classification_model:
61 | 
62 |       key: 'resnet101'
63 |       _weights: &model_weights_enum !import_get
64 |         key: 'torchvision.models.resnet.ResNet101_Weights'
65 |       kwargs:
66 |         num_classes: 1000
67 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 | 
69 | test:
70 |   test_data_loader:
71 |     dataset_id: *imagenet_val
72 |     collate_fn: 'default_collate_w_pil'
73 |     sampler:
74 |       class_or_func: !import_get
75 |         key: 'torch.utils.data.SequentialSampler'
76 |       kwargs:
77 |     kwargs:
78 |       batch_size: 1
79 |       num_workers: 16
80 |       drop_last: False
81 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/webp-resnet152.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.PILImageModule'
36 |                 init:
37 |                   kwargs:
38 |                     format: 'WEBP'
39 |                     quality: 90
40 |                     returns_file_size: True
41 |       post_transform: !import_call
42 |         key: 'torchvision.transforms.Compose'
43 |         init:
44 |           kwargs:
45 |             transforms:
46 |               - !import_call
47 |                 key: 'torchvision.transforms.ToTensor'
48 |                 init:
49 |               - !import_call
50 |                 key: 'torchvision.transforms.Normalize'
51 |                 init:
52 |                   kwargs:
53 |                     mean: [0.485, 0.456, 0.406]
54 |                     std: [0.229, 0.224, 0.225]
55 |       analysis_config:
56 |         analyzer_configs:
57 |           - key: 'FileSizeAccumulator'
58 |             kwargs:
59 |               unit: 'KB'
60 |     classification_model:
61 | 
62 |       key: 'resnet152'
63 |       _weights: &model_weights_enum !import_get
64 |         key: 'torchvision.models.resnet.ResNet152_Weights'
65 |       kwargs:
66 |         num_classes: 1000
67 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 | 
69 | test:
70 |   test_data_loader:
71 |     dataset_id: *imagenet_val
72 |     collate_fn: 'default_collate_w_pil'
73 |     sampler:
74 |       class_or_func: !import_get
75 |         key: 'torch.utils.data.SequentialSampler'
76 |       kwargs:
77 |     kwargs:
78 |       batch_size: 1
79 |       num_workers: 16
80 |       drop_last: False
81 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/bpg-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.BPGModule'
36 |                 init:
37 |                   kwargs:
38 |                     encoder_path: '~/software/libbpg-0.9.8/bpgenc'
39 |                     decoder_path: '~/software/libbpg-0.9.8/bpgdec'
40 |                     quality: 50
41 |                     returns_file_size: True
42 |       post_transform: !import_call
43 |         key: 'torchvision.transforms.Compose'
44 |         init:
45 |           kwargs:
46 |             transforms:
47 |               - !import_call
48 |                 key: 'torchvision.transforms.ToTensor'
49 |                 init:
50 |               - !import_call
51 |                 key: 'torchvision.transforms.Normalize'
52 |                 init:
53 |                   kwargs:
54 |                     mean: [0.485, 0.456, 0.406]
55 |                     std: [0.229, 0.224, 0.225]
56 |       analysis_config:
57 |         analyzer_configs:
58 |           - key: 'FileSizeAccumulator'
59 |             kwargs:
60 |               unit: 'KB'
61 |     classification_model:
62 |       key: 'resnet50'
63 |       _weights: &model_weights_enum !import_get
64 |         key: 'torchvision.models.resnet.ResNet50_Weights'
65 |       kwargs:
66 |         num_classes: 1000
67 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
68 | 
69 | test:
70 |   test_data_loader:
71 |     dataset_id: *imagenet_val
72 |     sampler:
73 |       class_or_func: !import_get
74 |         key: 'torch.utils.data.SequentialSampler'
75 |       kwargs:
76 |     kwargs:
77 |       batch_size: 1
78 |       num_workers: 16
79 |       drop_last: False
80 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/bpg-resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.BPGModule'
36 |                 init:
37 |                   kwargs:
38 |                     encoder_path: '~/software/libbpg-0.9.8/bpgenc'
39 |                     decoder_path: '~/software/libbpg-0.9.8/bpgdec'
40 |                     quality: 50
41 |                     returns_file_size: True
42 |       post_transform: !import_call
43 |         key: 'torchvision.transforms.Compose'
44 |         init:
45 |           kwargs:
46 |             transforms:
47 |               - !import_call
48 |                 key: 'torchvision.transforms.ToTensor'
49 |                 init:
50 |               - !import_call
51 |                 key: 'torchvision.transforms.Normalize'
52 |                 init:
53 |                   kwargs:
54 |                     mean: [0.485, 0.456, 0.406]
55 |                     std: [0.229, 0.224, 0.225]
56 |       analysis_config:
57 |         analyzer_configs:
58 |           - key: 'FileSizeAccumulator'
59 |             kwargs:
60 |               unit: 'KB'
61 |     classification_model:
62 | 
63 |       key: 'resnet101'
64 |       _weights: &model_weights_enum !import_get
65 |         key: 'torchvision.models.resnet.ResNet101_Weights'
66 |       kwargs:
67 |         num_classes: 1000
68 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 | 
70 | test:
71 |   test_data_loader:
72 |     dataset_id: *imagenet_val
73 |     sampler:
74 |       class_or_func: !import_get
75 |         key: 'torch.utils.data.SequentialSampler'
76 |       kwargs:
77 |     kwargs:
78 |       batch_size: 1
79 |       num_workers: 16
80 |       drop_last: False
81 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/bpg-resnet152.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.BPGModule'
36 |                 init:
37 |                   kwargs:
38 |                     encoder_path: '~/software/libbpg-0.9.8/bpgenc'
39 |                     decoder_path: '~/software/libbpg-0.9.8/bpgdec'
40 |                     quality: 50
41 |                     returns_file_size: True
42 |       post_transform: !import_call
43 |         key: 'torchvision.transforms.Compose'
44 |         init:
45 |           kwargs:
46 |             transforms:
47 |               - !import_call
48 |                 key: 'torchvision.transforms.ToTensor'
49 |                 init:
50 |               - !import_call
51 |                 key: 'torchvision.transforms.Normalize'
52 |                 init:
53 |                   kwargs:
54 |                     mean: [0.485, 0.456, 0.406]
55 |                     std: [0.229, 0.224, 0.225]
56 |       analysis_config:
57 |         analyzer_configs:
58 |           - key: 'FileSizeAccumulator'
59 |             kwargs:
60 |               unit: 'KB'
61 |     classification_model:
62 | 
63 |       key: 'resnet152'
64 |       _weights: &model_weights_enum !import_get
65 |         key: 'torchvision.models.resnet.ResNet152_Weights'
66 |       kwargs:
67 |         num_classes: 1000
68 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 | 
70 | test:
71 |   test_data_loader:
72 |     dataset_id: *imagenet_val
73 |     sampler:
74 |       class_or_func: !import_get
75 |         key: 'torch.utils.data.SequentialSampler'
76 |       kwargs:
77 |     kwargs:
78 |       batch_size: 1
79 |       num_workers: 16
80 |       drop_last: False
81 | 


--------------------------------------------------------------------------------
/docs/source/subpkgs/models.rst:
--------------------------------------------------------------------------------
  1 | sc2bench.models
  2 | =====
  3 | 
  4 | 
  5 | .. toctree::
  6 |    :maxdepth: 4
  7 |    :caption: Contents:
  8 | 
  9 | ----
 10 | 
 11 | sc2bench.models.layer
 12 | ------------
 13 | 
 14 | .. automodule:: sc2bench.models.layer
 15 |    :members:
 16 |    :exclude-members: forward
 17 | 
 18 | ----
 19 | 
 20 | sc2bench.models.registry
 21 | ------------
 22 | 
 23 | .. automodule:: sc2bench.models.registry
 24 |    :members:
 25 |    :exclude-members: forward
 26 | 
 27 | ----
 28 | 
 29 | sc2bench.models.wrapper
 30 | ------------
 31 | 
 32 | .. automodule:: sc2bench.models.wrapper
 33 |    :members:
 34 |    :exclude-members: forward
 35 | 
 36 | ----
 37 | 
 38 | sc2bench.models.backbone
 39 | ------------
 40 | 
 41 | .. automodule:: sc2bench.models.backbone
 42 |    :members:
 43 |    :exclude-members: forward
 44 | 
 45 | ----
 46 | 
 47 | sc2bench.models.detection
 48 | ------------
 49 | 
 50 | .. automodule:: sc2bench.models.detection
 51 |    :members:
 52 | 
 53 | ----
 54 | 
 55 | sc2bench.models.detection.base
 56 | ^^^^^^^^^^^^
 57 | 
 58 | .. automodule:: sc2bench.models.detection.base
 59 |    :members:
 60 |    :exclude-members: forward
 61 | 
 62 | ----
 63 | 
 64 | sc2bench.models.detection.rcnn
 65 | ^^^^^^^^^^^^
 66 | 
 67 | .. automodule:: sc2bench.models.detection.rcnn
 68 |    :members:
 69 |    :exclude-members: forward
 70 | 
 71 | ----
 72 | 
 73 | sc2bench.models.detection.registry
 74 | ^^^^^^^^^^^^
 75 | 
 76 | .. automodule:: sc2bench.models.detection.registry
 77 |    :members:
 78 | 
 79 | ----
 80 | 
 81 | sc2bench.models.detection.transform
 82 | ^^^^^^^^^^^^
 83 | 
 84 | .. automodule:: sc2bench.models.detection.transform
 85 |    :members:
 86 |    :exclude-members: forward
 87 | 
 88 | ----
 89 | 
 90 | sc2bench.models.detection.wrapper
 91 | ^^^^^^^^^^^^
 92 | 
 93 | .. automodule:: sc2bench.models.detection.wrapper
 94 |    :members:
 95 |    :exclude-members: forward
 96 | 
 97 | 
 98 | sc2bench.models.segmentation
 99 | ------------
100 | 
101 | .. automodule:: sc2bench.models.segmentation
102 |    :members:
103 | 
104 | ----
105 | 
106 | sc2bench.models.segmentation.base
107 | ^^^^^^^^^^^^
108 | .. automodule:: sc2bench.models.segmentation.base
109 |    :members:
110 |    :exclude-members: forward
111 | 
112 | ----
113 | 
114 | sc2bench.models.segmentation.deeplabv3
115 | ^^^^^^^^^^^^
116 | 
117 | .. automodule:: sc2bench.models.segmentation.deeplabv3
118 |    :members:
119 |    :exclude-members: forward
120 | 
121 | ----
122 | 
123 | sc2bench.models.segmentation.registry
124 | ^^^^^^^^^^^^
125 | 
126 | .. automodule:: sc2bench.models.segmentation.registry
127 |    :members:
128 | 
129 | ----
130 | 
131 | sc2bench.models.segmentation.wrapper
132 | ^^^^^^^^^^^^
133 | 
134 | .. automodule:: sc2bench.models.segmentation.wrapper
135 |    :members:
136 |    :exclude-members: forward
137 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 833
19 |                       interpolation: !getattr
20 |                         - !import_get
21 |                           key: 'torchvision.transforms.functional.InterpolationMode'
22 |                         - 'BICUBIC'
23 |                 - !import_call
24 |                   key: 'torchvision.transforms.CenterCrop'
25 |                   init:
26 |                     kwargs:
27 |                       size: [800, 800]
28 | 
29 | models:
30 |   model:
31 |     key: 'CodecInputCompressionClassifier'
32 |     kwargs:
33 |       codec_encoder_decoder: !import_call
34 |         key: 'torchvision.transforms.Compose'
35 |         init:
36 |           kwargs:
37 |             transforms:
38 |               - !import_call
39 |                 key: 'sc2bench.transforms.codec.PILImageModule'
40 |                 init:
41 |                   kwargs:
42 |                     format: 'JPEG'
43 |                     quality: 90
44 |                     returns_file_size: True
45 |       post_transform: !import_call
46 |         key: 'torchvision.transforms.Compose'
47 |         init:
48 |           kwargs:
49 |             transforms:
50 |               - !import_call
51 |                 key: 'torchvision.transforms.ToTensor'
52 |                 init:
53 |               - !import_call
54 |                 key: 'torchvision.transforms.Normalize'
55 |                 init:
56 |                   kwargs:
57 |                     mean: [0.485, 0.456, 0.406]
58 |                     std: [0.229, 0.224, 0.225]
59 |       analysis_config:
60 |         analyzer_configs:
61 |           - key: 'FileSizeAccumulator'
62 |             kwargs:
63 |               unit: 'KB'
64 |     classification_model:
65 |       key: 'tf_efficientnet_l2_ns'
66 |       repo_or_dir: 'rwightman/pytorch-image-models'
67 |       kwargs:
68 |         num_classes: 1000
69 |         pretrained: True
70 | 
71 | test:
72 |   test_data_loader:
73 |     dataset_id: *imagenet_val
74 |     sampler:
75 |       class_or_func: !import_get
76 |         key: 'torch.utils.data.SequentialSampler'
77 |       kwargs:
78 |     kwargs:
79 |       batch_size: 1
80 |       num_workers: 16
81 |       drop_last: False
82 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/jpeg-tf_efficientnet_l2_ns_475.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 507
19 |                       interpolation: !getattr
20 |                         - !import_get
21 |                           key: 'torchvision.transforms.functional.InterpolationMode'
22 |                         - 'BICUBIC'
23 |                 - !import_call
24 |                   key: 'torchvision.transforms.CenterCrop'
25 |                   init:
26 |                     kwargs:
27 |                       size: [475, 475]
28 | 
29 | models:
30 |   model:
31 |     key: 'CodecInputCompressionClassifier'
32 |     kwargs:
33 |       codec_encoder_decoder: !import_call
34 |         key: 'torchvision.transforms.Compose'
35 |         init:
36 |           kwargs:
37 |             transforms:
38 |               - !import_call
39 |                 key: 'sc2bench.transforms.codec.PILImageModule'
40 |                 init:
41 |                   kwargs:
42 |                     format: 'JPEG'
43 |                     quality: 90
44 |                     returns_file_size: True
45 |       post_transform: !import_call
46 |         key: 'torchvision.transforms.Compose'
47 |         init:
48 |           kwargs:
49 |             transforms:
50 |               - !import_call
51 |                 key: 'torchvision.transforms.ToTensor'
52 |                 init:
53 |               - !import_call
54 |                 key: 'torchvision.transforms.Normalize'
55 |                 init:
56 |                   kwargs:
57 |                     mean: [0.485, 0.456, 0.406]
58 |                     std: [0.229, 0.224, 0.225]
59 |       analysis_config:
60 |         analyzer_configs:
61 |           - key: 'FileSizeAccumulator'
62 |             kwargs:
63 |               unit: 'KB'
64 |     classification_model:
65 |       key: 'tf_efficientnet_l2_ns_475'
66 |       repo_or_dir: 'rwightman/pytorch-image-models'
67 |       kwargs:
68 |         num_classes: 1000
69 |         pretrained: True
70 | 
71 | test:
72 |   test_data_loader:
73 |     dataset_id: *imagenet_val
74 |     sampler:
75 |       class_or_func: !import_get
76 |         key: 'torch.utils.data.SequentialSampler'
77 |       kwargs:
78 |     kwargs:
79 |       batch_size: 1
80 |       num_workers: 16
81 |       drop_last: False
82 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/feature_compression/jpeg-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 |                 - !import_call
25 |                   key: 'torchvision.transforms.ToTensor'
26 |                   init:
27 |                 - !import_call
28 |                   key: 'torchvision.transforms.Normalize'
29 |                   init:
30 |                     kwargs:
31 |                       mean: [0.485, 0.456, 0.406]
32 |                       std: [0.229, 0.224, 0.225]
33 | 
34 | models:
35 |   model:
36 |     key: 'CodecFeatureCompressionClassifier'
37 |     kwargs:
38 |       codec_encoder_decoder: !import_call
39 |         key: 'torchvision.transforms.Compose'
40 |         init:
41 |           kwargs:
42 |             transforms:
43 |               - !import_call
44 |                 key: 'sc2bench.transforms.codec.PILTensorModule'
45 |                 init:
46 |                   kwargs:
47 |                     format: 'JPEG'
48 |                     quality: 90
49 |                     returns_file_size: True
50 |       encoder_config:
51 |         sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
52 |       decoder_config:
53 |         sequential: ['layer3', 'layer4', 'avgpool']
54 |       classifier_config:
55 |         sequential: ['fc']
56 |       post_transform:
57 |       analysis_config:
58 |         analyzer_configs:
59 |           - key: 'FileSizeAccumulator'
60 |             kwargs:
61 |               unit: 'KB'
62 |     classification_model:
63 |       key: 'resnet50'
64 |       _weights: &model_weights_enum !import_get
65 |         key: 'torchvision.models.resnet.ResNet50_Weights'
66 |       kwargs:
67 |         num_classes: 1000
68 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 | 
70 | test:
71 |   test_data_loader:
72 |     dataset_id: *imagenet_val
73 |     collate_fn: 'default_collate_w_pil'
74 |     sampler:
75 |       class_or_func: !import_get
76 |         key: 'torch.utils.data.SequentialSampler'
77 |       kwargs:
78 |     kwargs:
79 |       batch_size: 1
80 |       num_workers: 16
81 |       drop_last: False
82 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/feature_compression/webp-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 |                 - !import_call
25 |                   key: 'torchvision.transforms.ToTensor'
26 |                   init:
27 |                 - !import_call
28 |                   key: 'torchvision.transforms.Normalize'
29 |                   init:
30 |                     kwargs:
31 |                       mean: [0.485, 0.456, 0.406]
32 |                       std: [0.229, 0.224, 0.225]
33 | 
34 | models:
35 |   model:
36 |     key: 'CodecFeatureCompressionClassifier'
37 |     kwargs:
38 |       codec_encoder_decoder: !import_call
39 |         key: 'torchvision.transforms.Compose'
40 |         init:
41 |           kwargs:
42 |             transforms:
43 |               - !import_call
44 |                 key: 'sc2bench.transforms.codec.PILTensorModule'
45 |                 init:
46 |                   kwargs:
47 |                     format: 'WEBP'
48 |                     quality: 90
49 |                     returns_file_size: True
50 |       encoder_config:
51 |         sequential: ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2']
52 |       decoder_config:
53 |         sequential: ['layer3', 'layer4', 'avgpool']
54 |       classifier_config:
55 |         sequential: ['fc']
56 |       post_transform:
57 |       analysis_config:
58 |         analyzer_configs:
59 |           - key: 'FileSizeAccumulator'
60 |             kwargs:
61 |               unit: 'KB'
62 |     classification_model:
63 |       key: 'resnet50'
64 |       _weights: &model_weights_enum !import_get
65 |         key: 'torchvision.models.resnet.ResNet50_Weights'
66 |       kwargs:
67 |         num_classes: 1000
68 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
69 | 
70 | test:
71 |   test_data_loader:
72 |     dataset_id: *imagenet_val
73 |     collate_fn: 'default_collate_w_pil'
74 |     sampler:
75 |       class_or_func: !import_get
76 |         key: 'torch.utils.data.SequentialSampler'
77 |       kwargs:
78 |     kwargs:
79 |       batch_size: 1
80 |       num_workers: 16
81 |       drop_last: False
82 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/mean_scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: &input_size [224, 224]
24 |                 - !import_call
25 |                   key: 'torchvision.transforms.ToTensor'
26 |                   init:
27 |                 - !import_call
28 |                   key: 'sc2bench.transforms.misc.AdaptivePad'
29 |                   init:
30 |                     kwargs:
31 |                       fill: 0
32 |                       factor: 64
33 | 
34 | models:
35 |   model:
36 |     key: 'NeuralInputCompressionClassifier'
37 |     kwargs:
38 |       post_transform: !import_call
39 |         key: 'torchvision.transforms.Compose'
40 |         init:
41 |           kwargs:
42 |             transforms:
43 |               - !import_call
44 |                 key: 'torchvision.transforms.CenterCrop'
45 |                 init:
46 |                   kwargs:
47 |                     size: *input_size
48 |               - !import_call
49 |                 key: 'torchvision.transforms.Normalize'
50 |                 init:
51 |                   kwargs:
52 |                     mean: [0.485, 0.456, 0.406]
53 |                     std: [0.229, 0.224, 0.225]
54 |       analysis_config:
55 |         analyzes_after_compress: True
56 |         analyzer_configs:
57 |           - key: 'FileSizeAnalyzer'
58 |             kwargs:
59 |               unit: 'KB'
60 |     compression_model:
61 |       key: 'mbt2018_mean'
62 |       kwargs:
63 |         pretrained: True
64 |         quality: 8
65 |         metric: 'mse'
66 |     classification_model:
67 |       key: 'resnet50'
68 |       _weights: &model_weights_enum !import_get
69 |         key: 'torchvision.models.resnet.ResNet50_Weights'
70 |       kwargs:
71 |         num_classes: 1000
72 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
73 | 
74 | test:
75 |   test_data_loader:
76 |     dataset_id: *imagenet_val
77 |     sampler:
78 |       class_or_func: !import_get
79 |         key: 'torch.utils.data.SequentialSampler'
80 |       kwargs:
81 |     kwargs:
82 |       batch_size: 1
83 |       num_workers: 16
84 |       drop_last: False
85 | 


--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &pascal_val 'pascal_voc2012/val': !import_call
 3 |     _name: 'pascal_voc2012'
 4 |     _root: &root_dir '~/datasets'
 5 |     key: 'torchvision.datasets.VOCSegmentation'
 6 |     init:
 7 |       kwargs:
 8 |         root: *root_dir
 9 |         image_set: 'val'
10 |         year: '2012'
11 |         download: True
12 |         transforms: !import_call
13 |           key: 'custom.transform.CustomCompose'
14 |           init:
15 |             kwargs:
16 |               transforms:
17 |                 - !import_call
18 |                   key: 'custom.transform.CustomRandomResize'
19 |                   init:
20 |                     kwargs:
21 |                       min_size: 513
22 |                       max_size: 513
23 |                 - !import_call
24 |                   key: 'sc2bench.transforms.misc.CustomToTensor'
25 |                   init:
26 |                     kwargs:
27 |                       converts_sample: False
28 |                       converts_target: True
29 | 
30 | models:
31 |   model:
32 |     key: 'CodecInputCompressionSegmentationModel'
33 |     kwargs:
34 |       codec_encoder_decoder: !import_call
35 |         key: 'torchvision.transforms.Compose'
36 |         init:
37 |           kwargs:
38 |             transforms:
39 |               - !import_call
40 |                 key: 'sc2bench.transforms.codec.PILImageModule'
41 |                 init:
42 |                   kwargs:
43 |                     format: 'JPEG'
44 |                     quality: 90
45 |                     returns_file_size: True
46 |       analysis_config:
47 |         analyzer_configs:
48 |           - key: 'FileSizeAccumulator'
49 |             kwargs:
50 |               unit: 'KB'
51 |       post_transform: !import_call
52 |         key: 'torchvision.transforms.Compose'
53 |         init:
54 |           kwargs:
55 |             transforms:
56 |               - !import_call
57 |                 key: 'torchvision.transforms.ToTensor'
58 |                 init:
59 |               - !import_call
60 |                 key: 'torchvision.transforms.Normalize'
61 |                 init:
62 |                   kwargs:
63 |                     mean: [0.485, 0.456, 0.406]
64 |                     std: [0.229, 0.224, 0.225]
65 |     segmentation_model:
66 |       key: 'deeplabv3_resnet50'
67 |       kwargs:
68 |         pretrained: True
69 |         num_classes: 21
70 |         aux_loss: True
71 |       src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
72 | 
73 | test:
74 |   test_data_loader:
75 |     dataset_id: *pascal_val
76 |     sampler:
77 |       class_or_func: !import_get
78 |         key: 'torch.utils.data.SequentialSampler'
79 |       kwargs:
80 |     collate_fn: 'pascal_seg_eval_collate_fn'
81 |     kwargs:
82 |       batch_size: 1
83 |       num_workers: 16
84 | 


--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &pascal_val 'pascal_voc2012/val': !import_call
 3 |     _name: 'pascal_voc2012'
 4 |     _root: &root_dir '~/datasets'
 5 |     key: 'torchvision.datasets.VOCSegmentation'
 6 |     init:
 7 |       kwargs:
 8 |         root: *root_dir
 9 |         image_set: 'val'
10 |         year: '2012'
11 |         download: True
12 |         transforms: !import_call
13 |           key: 'custom.transform.CustomCompose'
14 |           init:
15 |             kwargs:
16 |               transforms:
17 |                 - !import_call
18 |                   key: 'custom.transform.CustomRandomResize'
19 |                   init:
20 |                     kwargs:
21 |                       min_size: 513
22 |                       max_size: 513
23 |                 - !import_call
24 |                   key: 'sc2bench.transforms.misc.CustomToTensor'
25 |                   init:
26 |                     kwargs:
27 |                       converts_sample: False
28 |                       converts_target: True
29 | 
30 | models:
31 |   model:
32 |     key: 'CodecInputCompressionSegmentationModel'
33 |     kwargs:
34 |       codec_encoder_decoder: !import_call
35 |         key: 'torchvision.transforms.Compose'
36 |         init:
37 |           kwargs:
38 |             transforms:
39 |               - !import_call
40 |                 key: 'sc2bench.transforms.codec.PILImageModule'
41 |                 init:
42 |                   kwargs:
43 |                     format: 'WEBP'
44 |                     quality: 90
45 |                     returns_file_size: True
46 |       analysis_config:
47 |         analyzer_configs:
48 |           - key: 'FileSizeAccumulator'
49 |             kwargs:
50 |               unit: 'KB'
51 |       post_transform: !import_call
52 |         key: 'torchvision.transforms.Compose'
53 |         init:
54 |           kwargs:
55 |             transforms:
56 |               - !import_call
57 |                 key: 'torchvision.transforms.ToTensor'
58 |                 init:
59 |               - !import_call
60 |                 key: 'torchvision.transforms.Normalize'
61 |                 init:
62 |                   kwargs:
63 |                     mean: [0.485, 0.456, 0.406]
64 |                     std: [0.229, 0.224, 0.225]
65 |     segmentation_model:
66 |       key: 'deeplabv3_resnet101'
67 |       kwargs:
68 |         pretrained: True
69 |         num_classes: 21
70 |         aux_loss: True
71 |       src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
72 | 
73 | test:
74 |   test_data_loader:
75 |     dataset_id: *pascal_val
76 |     sampler:
77 |       class_or_func: !import_get
78 |         key: 'torch.utils.data.SequentialSampler'
79 |       kwargs:
80 |     collate_fn: 'pascal_seg_eval_collate_fn'
81 |     kwargs:
82 |       batch_size: 1
83 |       num_workers: 16
84 | 


--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/webp-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &pascal_val 'pascal_voc2012/val': !import_call
 3 |     _name: 'pascal_voc2012'
 4 |     _root: &root_dir '~/datasets'
 5 |     key: 'torchvision.datasets.VOCSegmentation'
 6 |     init:
 7 |       kwargs:
 8 |         root: *root_dir
 9 |         image_set: 'val'
10 |         year: '2012'
11 |         download: True
12 |         transforms: !import_call
13 |           key: 'custom.transform.CustomCompose'
14 |           init:
15 |             kwargs:
16 |               transforms:
17 |                 - !import_call
18 |                   key: 'custom.transform.CustomRandomResize'
19 |                   init:
20 |                     kwargs:
21 |                       min_size: 513
22 |                       max_size: 513
23 |                 - !import_call
24 |                   key: 'sc2bench.transforms.misc.CustomToTensor'
25 |                   init:
26 |                     kwargs:
27 |                       converts_sample: False
28 |                       converts_target: True
29 | 
30 | models:
31 |   model:
32 |     key: 'CodecInputCompressionSegmentationModel'
33 |     kwargs:
34 |       codec_encoder_decoder: !import_call
35 |         key: 'torchvision.transforms.Compose'
36 |         init:
37 |           kwargs:
38 |             transforms:
39 |               - !import_call
40 |                 key: 'sc2bench.transforms.codec.PILImageModule'
41 |                 init:
42 |                   kwargs:
43 |                     format: 'WEBP'
44 |                     quality: 90
45 |                     returns_file_size: True
46 |       analysis_config:
47 |         analyzer_configs:
48 |           - key: 'FileSizeAccumulator'
49 |             kwargs:
50 |               unit: 'KB'
51 |       post_transform: !import_call
52 |         key: 'torchvision.transforms.Compose'
53 |         init:
54 |           kwargs:
55 |             transforms:
56 |               - !import_call
57 |                 key: 'torchvision.transforms.ToTensor'
58 |                 init:
59 |               - !import_call
60 |                 key: 'torchvision.transforms.Normalize'
61 |                 init:
62 |                   kwargs:
63 |                     mean: [0.485, 0.456, 0.406]
64 |                     std: [0.229, 0.224, 0.225]
65 |     segmentation_model:
66 |       key: 'deeplabv3_resnet50'
67 |       kwargs:
68 |         pretrained: True
69 |         num_classes: 21
70 |         aux_loss: True
71 |       src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
72 | 
73 | test:
74 |   test_data_loader:
75 |     dataset_id: *pascal_val
76 |     sampler:
77 |       class_or_func: !import_get
78 |         key: 'torch.utils.data.SequentialSampler'
79 |       kwargs:
80 |     collate_fn: 'pascal_seg_eval_collate_fn'
81 |     kwargs:
82 |       batch_size: 1
83 |       num_workers: 16
84 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/factorized_prior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: &input_size [224, 224]
24 |                 - !import_call
25 |                   key: 'torchvision.transforms.ToTensor'
26 |                   init:
27 |                 - !import_call
28 |                   key: 'sc2bench.transforms.misc.AdaptivePad'
29 |                   init:
30 |                     kwargs:
31 |                       fill: 0
32 |                       factor: 64
33 | 
34 | models:
35 |   model:
36 |     key: 'NeuralInputCompressionClassifier'
37 |     kwargs:
38 |       post_transform: !import_call
39 |         key: 'torchvision.transforms.Compose'
40 |         init:
41 |           kwargs:
42 |             transforms:
43 |               - !import_call
44 |                 key: 'torchvision.transforms.CenterCrop'
45 |                 init:
46 |                   kwargs:
47 |                     size: *input_size
48 |               - !import_call
49 |                 key: 'torchvision.transforms.Normalize'
50 |                 init:
51 |                   kwargs:
52 |                     mean: [0.485, 0.456, 0.406]
53 |                     std: [0.229, 0.224, 0.225]
54 |       analysis_config:
55 |         analyzes_after_compress: True
56 |         analyzer_configs:
57 |           - key: 'FileSizeAnalyzer'
58 |             kwargs:
59 |               unit: 'KB'
60 |     compression_model:
61 |       key: 'bmshj2018_factorized'
62 |       kwargs:
63 |         pretrained: True
64 |         quality: 8
65 |         metric: 'mse'
66 |     classification_model:
67 |       key: 'resnet50'
68 |       _weights: &model_weights_enum !import_get
69 |         key: 'torchvision.models.resnet.ResNet50_Weights'
70 |       kwargs:
71 |         num_classes: 1000
72 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
73 | 
74 | test:
75 |   test_data_loader:
76 |     dataset_id: *imagenet_val
77 |     sampler:
78 |       class_or_func: !import_get
79 |         key: 'torch.utils.data.SequentialSampler'
80 |       kwargs:
81 |     kwargs:
82 |       batch_size: 1
83 |       num_workers: 16
84 |       drop_last: False
85 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/scale_hyperprior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: &input_size [224, 224]
24 |                 - !import_call
25 |                   key: 'torchvision.transforms.ToTensor'
26 |                   init:
27 |                 - !import_call
28 |                   key: 'sc2bench.transforms.misc.AdaptivePad'
29 |                   init:
30 |                     kwargs:
31 |                       fill: 0
32 |                       factor: 64
33 | 
34 | models:
35 |   model:
36 |     key: 'NeuralInputCompressionClassifier'
37 |     kwargs:
38 |       post_transform: !import_call
39 |         key: 'torchvision.transforms.Compose'
40 |         init:
41 |           kwargs:
42 |             transforms:
43 |               - !import_call
44 |                 key: 'torchvision.transforms.CenterCrop'
45 |                 init:
46 |                   kwargs:
47 |                     size: *input_size
48 |               - !import_call
49 |                 key: 'torchvision.transforms.Normalize'
50 |                 init:
51 |                   kwargs:
52 |                     mean: [0.485, 0.456, 0.406]
53 |                     std: [0.229, 0.224, 0.225]
54 |       analysis_config:
55 |         analyzes_after_compress: True
56 |         analyzer_configs:
57 |           - key: 'FileSizeAnalyzer'
58 |             kwargs:
59 |               unit: 'KB'
60 |     compression_model:
61 |       key: 'bmshj2018_hyperprior'
62 |       kwargs:
63 |         pretrained: True
64 |         quality: 8
65 |         metric: 'mse'
66 |     classification_model:
67 |       key: 'resnet50'
68 |       _weights: &model_weights_enum !import_get
69 |         key: 'torchvision.models.resnet.ResNet50_Weights'
70 |       kwargs:
71 |         num_classes: 1000
72 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
73 | 
74 | test:
75 |   test_data_loader:
76 |     dataset_id: *imagenet_val
77 |     sampler:
78 |       class_or_func: !import_get
79 |         key: 'torch.utils.data.SequentialSampler'
80 |       kwargs:
81 |     kwargs:
82 |       batch_size: 1
83 |       num_workers: 16
84 |       drop_last: False
85 | 


--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/jpeg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &pascal_val 'pascal_voc2012/val': !import_call
 3 |     _name: 'pascal_voc2012'
 4 |     _root: &root_dir '~/datasets'
 5 |     key: 'torchvision.datasets.VOCSegmentation'
 6 |     init:
 7 |       kwargs:
 8 |         root: *root_dir
 9 |         image_set: 'val'
10 |         year: '2012'
11 |         download: True
12 |         transforms: !import_call
13 |           key: 'custom.transform.CustomCompose'
14 |           init:
15 |             kwargs:
16 |               transforms:
17 |                 - !import_call
18 |                   key: 'custom.transform.CustomRandomResize'
19 |                   init:
20 |                     kwargs:
21 |                       min_size: 513
22 |                       max_size: 513
23 |                 - !import_call
24 |                   key: 'sc2bench.transforms.misc.CustomToTensor'
25 |                   init:
26 |                     kwargs:
27 |                       converts_sample: False
28 |                       converts_target: True
29 | 
30 | models:
31 |   model:
32 |     key: 'CodecInputCompressionSegmentationModel'
33 |     kwargs:
34 |       codec_encoder_decoder: !import_call
35 |         key: 'torchvision.transforms.Compose'
36 |         init:
37 |           kwargs:
38 |             transforms:
39 |               - !import_call
40 |                 key: 'sc2bench.transforms.codec.PILImageModule'
41 |                 init:
42 |                   kwargs:
43 |                     format: 'JPEG'
44 |                     quality: 90
45 |                     returns_file_size: True
46 |       analysis_config:
47 |         analyzer_configs:
48 |           - key: 'FileSizeAccumulator'
49 |             kwargs:
50 |               unit: 'KB'
51 |       post_transform: !import_call
52 |         key: 'torchvision.transforms.Compose'
53 |         init:
54 |           kwargs:
55 |             transforms:
56 |               - !import_call
57 |                 key: 'torchvision.transforms.ToTensor'
58 |                 init:
59 |               - !import_call
60 |                 key: 'torchvision.transforms.Normalize'
61 |                 init:
62 |                   kwargs:
63 |                     mean: [0.485, 0.456, 0.406]
64 |                     std: [0.229, 0.224, 0.225]
65 |     segmentation_model:
66 |       key: 'deeplabv3_resnet101'
67 |       kwargs:
68 |         pretrained: False
69 |         num_classes: 21
70 |         aux_loss: True
71 |       src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
72 | 
73 | test:
74 |   test_data_loader:
75 |     dataset_id: *pascal_val
76 |     sampler:
77 |       class_or_func: !import_get
78 |         key: 'torch.utils.data.SequentialSampler'
79 |       kwargs:
80 |     collate_fn: 'pascal_seg_eval_collate_fn'
81 |     kwargs:
82 |       batch_size: 1
83 |       num_workers: 16
84 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/joint_autoregressive_hierarchical_prior-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [*root_dir, '/val']
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: &input_size [224, 224]
24 |                 - !import_call
25 |                   key: 'torchvision.transforms.ToTensor'
26 |                   init:
27 |                 - !import_call
28 |                   key: 'sc2bench.transforms.misc.AdaptivePad'
29 |                   init:
30 |                     kwargs:
31 |                       fill: 0
32 |                       factor: 64
33 | 
34 | models:
35 |   model:
36 |     key: 'NeuralInputCompressionClassifier'
37 |     kwargs:
38 |       post_transform: !import_call
39 |         key: 'torchvision.transforms.Compose'
40 |         init:
41 |           kwargs:
42 |             transforms:
43 |               - !import_call
44 |                 key: 'torchvision.transforms.CenterCrop'
45 |                 init:
46 |                   kwargs:
47 |                     size: *input_size
48 |               - !import_call
49 |                 key: 'torchvision.transforms.Normalize'
50 |                 init:
51 |                   kwargs:
52 |                     mean: [0.485, 0.456, 0.406]
53 |                     std: [0.229, 0.224, 0.225]
54 |       analysis_config:
55 |         analyzes_after_compress: True
56 |         analyzer_configs:
57 |           - key: 'FileSizeAnalyzer'
58 |             kwargs:
59 |               unit: 'KB'
60 |       uses_cpu4compression_model: True
61 |     compression_model:
62 |       key: 'mbt2018'
63 |       kwargs:
64 |         pretrained: True
65 |         quality: 8
66 |         metric: 'mse'
67 |     classification_model:
68 |       key: 'resnet50'
69 |       _weights: &model_weights_enum !import_get
70 |         key: 'torchvision.models.resnet.ResNet50_Weights'
71 |       kwargs:
72 |         num_classes: 1000
73 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
74 | 
75 | test:
76 |   test_data_loader:
77 |     dataset_id: *imagenet_val
78 |     sampler:
79 |       class_or_func: !import_get
80 |         key: 'torch.utils.data.SequentialSampler'
81 |       kwargs:
82 |     kwargs:
83 |       batch_size: 1
84 |       num_workers: 16
85 |       drop_last: False
86 | 


--------------------------------------------------------------------------------
/configs/ilsvrc2012/input_compression/vtm-resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &imagenet_val ilsvrc2012/val: !import_call
 3 |     _name: &dataset_name 'ilsvrc2012'
 4 |     _root: &root_dir !join ['~/datasets/', *dataset_name]
 5 |     key: 'torchvision.datasets.ImageFolder'
 6 |     init:
 7 |       kwargs:
 8 |         root: !join [ *root_dir, '/val' ]
 9 |         transform: !import_call
10 |           key: 'torchvision.transforms.Compose'
11 |           init:
12 |             kwargs:
13 |               transforms:
14 |                 - !import_call
15 |                   key: 'torchvision.transforms.Resize'
16 |                   init:
17 |                     kwargs:
18 |                       size: 256
19 |                 - !import_call
20 |                   key: 'torchvision.transforms.CenterCrop'
21 |                   init:
22 |                     kwargs:
23 |                       size: [224, 224]
24 | 
25 | models:
26 |   model:
27 |     key: 'CodecInputCompressionClassifier'
28 |     kwargs:
29 |       codec_encoder_decoder: !import_call
30 |         key: 'torchvision.transforms.Compose'
31 |         init:
32 |           kwargs:
33 |             transforms:
34 |               - !import_call
35 |                 key: 'sc2bench.transforms.codec.VTMModule'
36 |                 init:
37 |                   kwargs:
38 |                     encoder_path: '~/software/VVCSoftware_VTM/bin/EncoderAppStatic'
39 |                     decoder_path: '~/software/VVCSoftware_VTM/bin/DecoderAppStatic'
40 |                     config_path: '~/software/VVCSoftware_VTM/cfg/encoder_intra_vtm.cfg'
41 |                     color_mode: 'ycbcr'
42 |                     quality: 63
43 |                     returns_file_size: True
44 |       post_transform: !import_call
45 |         key: 'torchvision.transforms.Compose'
46 |         init:
47 |           kwargs:
48 |             transforms:
49 |               - !import_call
50 |                 key: 'torchvision.transforms.ToTensor'
51 |                 init:
52 |               - !import_call
53 |                 key: 'torchvision.transforms.Normalize'
54 |                 init:
55 |                   kwargs:
56 |                     mean: [0.485, 0.456, 0.406]
57 |                     std: [0.229, 0.224, 0.225]
58 |       analysis_config:
59 |         analyzer_configs:
60 |           - key: 'FileSizeAccumulator'
61 |             kwargs:
62 |               unit: 'KB'
63 |     classification_model:
64 |       key: 'resnet50'
65 |       _weights: &model_weights_enum !import_get
66 |         key: 'torchvision.models.resnet.ResNet50_Weights'
67 |       kwargs:
68 |         num_classes: 1000
69 |         weights: !getattr [*model_weights_enum, 'IMAGENET1K_V1']
70 | 
71 | test:
72 |   test_data_loader:
73 |     dataset_id: *imagenet_val
74 |     sampler:
75 |       class_or_func: !import_get
76 |         key: 'torch.utils.data.SequentialSampler'
77 |       kwargs:
78 |     kwargs:
79 |       batch_size: 1
80 |       num_workers: 16
81 |       drop_last: False
82 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import sys
14 | import pathlib
15 | 
16 | sys.path.insert(0, pathlib.Path(__file__).parents[2].resolve().as_posix())
17 | 
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = 'SC2 Benchmark'
22 | copyright = '2023, Yoshitomo Matsubara'
23 | author = 'Yoshitomo Matsubara'
24 | 
25 | # The full version, including alpha/beta/rc tags
26 | import sc2bench
27 | version = 'v' + sc2bench.__version__
28 | release = version
29 | 
30 | 
31 | # -- General configuration ---------------------------------------------------
32 | 
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35 | # ones.
36 | extensions = [
37 |     'sphinx.ext.autodoc',
38 |     'sphinx.ext.autosummary',
39 |     'sphinx.ext.doctest',
40 |     'sphinx.ext.viewcode',
41 |     'sphinx_rtd_theme',
42 |     'sphinxcontrib.youtube'
43 | ]
44 | autodoc_member_order = 'bysource'
45 | highlight_language = 'python'
46 | 
47 | html_show_sourcelink = False
48 | html_context = {
49 |     'display_github': True,
50 |     'github_user': 'yoshitomo-matsubara',
51 |     'github_repo': 'sc2-benchmark',
52 |     'github_version': 'main',
53 |     'conf_py_path': '/docs/source/'
54 | }
55 | 
56 | import sphinx_rtd_theme
57 | html_theme = 'sphinx_rtd_theme'
58 | 
59 | html_theme_options = {
60 |     'analytics_id': 'G-39T9X4DN85',
61 |     'display_version': True,
62 |     'style_external_links': True
63 | }
64 | 
65 | # Add any paths that contain templates here, relative to this directory.
66 | templates_path = ['_templates']
67 | 
68 | # List of patterns, relative to source directory, that match files and
69 | # directories to ignore when looking for source files.
70 | # This pattern also affects html_static_path and html_extra_path.
71 | exclude_patterns = []
72 | 
73 | 
74 | # -- Options for HTML output -------------------------------------------------
75 | 
76 | # The theme to use for HTML and HTML Help pages.  See the documentation for
77 | # a list of builtin themes.
78 | #
79 | html_theme = 'sphinx_rtd_theme'
80 | 
81 | # Add any paths that contain custom static files (such as style sheets) here,
82 | # relative to this directory. They are copied after the builtin static files,
83 | # so a file named "default.css" will overwrite the builtin "default.css".
84 | html_static_path = ['_static']
85 | 
86 | 


--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet50.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &pascal_val 'pascal_voc2012/val': !import_call
 3 |     _name: 'pascal_voc2012'
 4 |     _root: &root_dir '~/datasets'
 5 |     key: 'torchvision.datasets.VOCSegmentation'
 6 |     init:
 7 |       kwargs:
 8 |         root: *root_dir
 9 |         image_set: 'val'
10 |         year: '2012'
11 |         download: True
12 |         transforms: !import_call
13 |           key: 'custom.transform.CustomCompose'
14 |           init:
15 |             kwargs:
16 |               transforms:
17 |                 - !import_call
18 |                   key: 'custom.transform.CustomRandomResize'
19 |                   init:
20 |                     kwargs:
21 |                       min_size: 513
22 |                       max_size: 513
23 |                 - !import_call
24 |                   key: 'sc2bench.transforms.misc.CustomToTensor'
25 |                   init:
26 |                     kwargs:
27 |                       converts_sample: False
28 |                       converts_target: True
29 | 
30 | models:
31 |   model:
32 |     key: 'CodecInputCompressionSegmentationModel'
33 |     kwargs:
34 |       codec_encoder_decoder: !import_call
35 |         key: 'torchvision.transforms.Compose'
36 |         init:
37 |           kwargs:
38 |             transforms:
39 |               - !import_call
40 |                 key: 'sc2bench.transforms.codec.BPGModule'
41 |                 init:
42 |                   kwargs:
43 |                     encoder_path: '~/software/libbpg-0.9.8/bpgenc'
44 |                     decoder_path: '~/software/libbpg-0.9.8/bpgdec'
45 |                     quality: 50
46 |                     returns_file_size: True
47 |       analysis_config:
48 |         analyzer_configs:
49 |           - key: 'FileSizeAccumulator'
50 |             kwargs:
51 |               unit: 'KB'
52 |       post_transform: !import_call
53 |         key: 'torchvision.transforms.Compose'
54 |         init:
55 |           kwargs:
56 |             transforms:
57 |               - !import_call
58 |                 key: 'torchvision.transforms.ToTensor'
59 |                 init:
60 |               - !import_call
61 |                 key: 'torchvision.transforms.Normalize'
62 |                 init:
63 |                   kwargs:
64 |                     mean: [0.485, 0.456, 0.406]
65 |                     std: [0.229, 0.224, 0.225]
66 |     segmentation_model:
67 |       key: 'deeplabv3_resnet50'
68 |       kwargs:
69 |         pretrained: True
70 |         num_classes: 21
71 |         aux_loss: True
72 |       src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet50.pt'
73 | 
74 | test:
75 |   test_data_loader:
76 |     dataset_id: *pascal_val
77 |     sampler:
78 |       class_or_func: !import_get
79 |         key: 'torch.utils.data.SequentialSampler'
80 |       kwargs:
81 |     collate_fn: 'pascal_seg_eval_collate_fn'
82 |     kwargs:
83 |       batch_size: 1
84 |       num_workers: 16
85 | 


--------------------------------------------------------------------------------
/configs/pascal_voc2012/input_compression/bpg-deeplabv3_resnet101.yaml:
--------------------------------------------------------------------------------
 1 | datasets:
 2 |   &pascal_val 'pascal_voc2012/val': !import_call
 3 |     _name: 'pascal_voc2012'
 4 |     _root: &root_dir '~/datasets'
 5 |     key: 'torchvision.datasets.VOCSegmentation'
 6 |     init:
 7 |       kwargs:
 8 |         root: *root_dir
 9 |         image_set: 'val'
10 |         year: '2012'
11 |         download: True
12 |         transforms: !import_call
13 |           key: 'custom.transform.CustomCompose'
14 |           init:
15 |             kwargs:
16 |               transforms:
17 |                 - !import_call
18 |                   key: 'custom.transform.CustomRandomResize'
19 |                   init:
20 |                     kwargs:
21 |                       min_size: 513
22 |                       max_size: 513
23 |                 - !import_call
24 |                   key: 'sc2bench.transforms.misc.CustomToTensor'
25 |                   init:
26 |                     kwargs:
27 |                       converts_sample: False
28 |                       converts_target: True
29 | 
30 | models:
31 |   model:
32 |     key: 'CodecInputCompressionSegmentationModel'
33 |     kwargs:
34 |       codec_encoder_decoder: !import_call
35 |         key: 'torchvision.transforms.Compose'
36 |         init:
37 |           kwargs:
38 |             transforms:
39 |               - !import_call
40 |                 key: 'sc2bench.transforms.codec.BPGModule'
41 |                 init:
42 |                   kwargs:
43 |                     encoder_path: '~/software/libbpg-0.9.8/bpgenc'
44 |                     decoder_path: '~/software/libbpg-0.9.8/bpgdec'
45 |                     quality: 50
46 |                     returns_file_size: True
47 |       analysis_config:
48 |         analyzer_configs:
49 |           - key: 'FileSizeAccumulator'
50 |             kwargs:
51 |               unit: 'KB'
52 |       post_transform: !import_call
53 |         key: 'torchvision.transforms.Compose'
54 |         init:
55 |           kwargs:
56 |             transforms:
57 |               - !import_call
58 |                 key: 'torchvision.transforms.ToTensor'
59 |                 init:
60 |               - !import_call
61 |                 key: 'torchvision.transforms.Normalize'
62 |                 init:
63 |                   kwargs:
64 |                     mean: [0.485, 0.456, 0.406]
65 |                     std: [0.229, 0.224, 0.225]
66 |     segmentation_model:
67 |       key: 'deeplabv3_resnet101'
68 |       kwargs:
69 |         pretrained: True
70 |         num_classes: 21
71 |         aux_loss: True
72 |       src_ckpt: 'https://github.com/yoshitomo-matsubara/torchdistill/releases/download/v0.2.8/pascal_voc2012-deeplabv3_resnet101.pt'
73 | 
74 | test:
75 |   test_data_loader:
76 |     dataset_id: *pascal_val
77 |     sampler:
78 |       class_or_func: !import_get
79 |         key: 'torch.utils.data.SequentialSampler'
80 |       kwargs:
81 |     collate_fn: 'pascal_seg_eval_collate_fn'
82 |     kwargs:
83 |       batch_size: 1
84 |       num_workers: 16
85 | 


--------------------------------------------------------------------------------