├── IQA
    ├── fastvqa
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── head.cpython-38.pyc
    │   │   │   ├── resnet.cpython-38.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── swin_v1.cpython-38.pyc
    │   │   │   ├── swin_v2.cpython-38.pyc
    │   │   │   ├── evaluator.cpython-38.pyc
    │   │   │   ├── conv_backbone.cpython-38.pyc
    │   │   │   ├── swin_backbone.cpython-38.pyc
    │   │   │   └── xclip_backbone.cpython-38.pyc
    │   │   ├── evaluator.py
    │   │   ├── head.py
    │   │   ├── swin_v1.py
    │   │   └── swin_v2.py
    │   └── __pycache__
    │   │   └── __init__.cpython-38.pyc
    ├── options
    │   └── fast-sama-iqa.yml
    └── demo_train_iqa_baseline.py
├── method.png
├── VQA
    ├── fastvqa
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── __init__.cpython-39.pyc
    │   ├── models
    │   │   ├── __pycache__
    │   │   │   ├── head.cpython-37.pyc
    │   │   │   ├── head.cpython-38.pyc
    │   │   │   ├── head.cpython-39.pyc
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   ├── backbone.cpython-38.pyc
    │   │   │   ├── swin_v1.cpython-38.pyc
    │   │   │   ├── evaluator.cpython-37.pyc
    │   │   │   ├── evaluator.cpython-38.pyc
    │   │   │   ├── evaluator.cpython-39.pyc
    │   │   │   ├── backbone_v0_1.cpython-38.pyc
    │   │   │   ├── conv_backbone.cpython-37.pyc
    │   │   │   ├── conv_backbone.cpython-38.pyc
    │   │   │   ├── conv_backbone.cpython-39.pyc
    │   │   │   ├── swin_backbone.cpython-37.pyc
    │   │   │   ├── swin_backbone.cpython-38.pyc
    │   │   │   ├── swin_backbone.cpython-39.pyc
    │   │   │   ├── xclip_backbone.cpython-37.pyc
    │   │   │   ├── xclip_backbone.cpython-38.pyc
    │   │   │   └── swin_backbone_scale.cpython-38.pyc
    │   │   ├── __init__.py
    │   │   ├── evaluator.py
    │   │   └── head.py
    │   ├── datasets
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   ├── basic_datasets.cpython-37.pyc
    │   │   │   ├── basic_datasets.cpython-38.pyc
    │   │   │   ├── basic_datasets.cpython-39.pyc
    │   │   │   ├── fusion_datasets.cpython-37.pyc
    │   │   │   ├── fusion_datasets.cpython-38.pyc
    │   │   │   ├── fusion_datasets.cpython-39.pyc
    │   │   │   ├── inference_dataset.cpython-38.pyc
    │   │   │   └── fusion_datasets_TEST.cpython-38.pyc
    │   │   └── __init__.py
    │   └── version.py
    ├── pretrained_weights
    │   └── README.md
    ├── options
    │   ├── fast-SAMA-test.yml
    │   ├── fast-SAMA-finetune.yml
    │   └── fast-SAMA-train.yml
    ├── demo_test.py
    ├── demo_finetune.py
    ├── demo_train.py
    └── examplar_data_labels
    │   └── LIVE_VQC
    │       └── mylabels.txt
└── README.md


/IQA/fastvqa/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import *
2 | 


--------------------------------------------------------------------------------
/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/method.png


--------------------------------------------------------------------------------
/VQA/fastvqa/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import *
2 | from .models import *
3 | 


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluator import IQAModel
2 | 
3 | __all__ = [
4 |     "IQAModel",
5 | ]
6 | 


--------------------------------------------------------------------------------
/VQA/pretrained_weights/README.md:
--------------------------------------------------------------------------------
1 | put the pretrained weights in this folder, and set the configuration in the `.yml` file.
2 | 


--------------------------------------------------------------------------------
/IQA/fastvqa/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/head.cpython-38.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/resnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/resnet.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/head.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/head.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/head.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/head.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/head.cpython-39.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/swin_v1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/swin_v1.cpython-38.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/swin_v2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/swin_v2.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/swin_v1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/swin_v1.cpython-38.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/evaluator.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/evaluator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/evaluator.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/evaluator.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/evaluator.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/evaluator.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/evaluator.cpython-39.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/conv_backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/conv_backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/swin_backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/swin_backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/backbone_v0_1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/backbone_v0_1.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/conv_backbone.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/conv_backbone.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/conv_backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/conv_backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/conv_backbone.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/conv_backbone.cpython-39.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/swin_backbone.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/swin_backbone.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/swin_backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/swin_backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/swin_backbone.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/swin_backbone.cpython-39.pyc


--------------------------------------------------------------------------------
/IQA/fastvqa/models/__pycache__/xclip_backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/IQA/fastvqa/models/__pycache__/xclip_backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/basic_datasets.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/basic_datasets.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/basic_datasets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/basic_datasets.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/basic_datasets.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/basic_datasets.cpython-39.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/xclip_backbone.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/xclip_backbone.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/xclip_backbone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/xclip_backbone.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .fusion_datasets import FusionDataset, FineTuneDataset
3 | 
4 | __all__ = [
5 |     "FusionDataset",
6 |     "FineTuneDataset"
7 | ]


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/fusion_datasets.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/fusion_datasets.cpython-37.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/fusion_datasets.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/fusion_datasets.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/fusion_datasets.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/fusion_datasets.cpython-39.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/inference_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/inference_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__pycache__/swin_backbone_scale.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/models/__pycache__/swin_backbone_scale.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/datasets/__pycache__/fusion_datasets_TEST.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Sissuire/SAMA/HEAD/VQA/fastvqa/datasets/__pycache__/fusion_datasets_TEST.cpython-38.pyc


--------------------------------------------------------------------------------
/VQA/fastvqa/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .swin_backbone import SwinTransformer3D as VQABackbone
 2 | from .swin_backbone import SwinTransformer2D as IQABackbone
 3 | from .head import VQAHead, IQAHead, VARHead
 4 | 
 5 | from .evaluator import DiViDeAddEvaluator
 6 | 
 7 | __all__ = [
 8 |     "VQABackbone",
 9 |     "IQABackbone",
10 |     "VQAHead",
11 |     "IQAHead",
12 |     "VARHead",
13 |     "DiViDeAddEvaluator"
14 | ]
15 | 


--------------------------------------------------------------------------------
/VQA/fastvqa/version.py:
--------------------------------------------------------------------------------
 1 | __version__ = "3.1.0"
 2 | 
 3 | 
 4 | def parse_version_info(version_str):
 5 |     version_info = []
 6 |     for x in version_str.split("."):
 7 |         if x.isdigit():
 8 |             version_info.append(int(x))
 9 |         elif x.find("rc") != -1:
10 |             patch_version = x.split("rc")
11 |             version_info.append(int(patch_version[0]))
12 |             version_info.append(f"rc{patch_version[1]}")
13 |     return tuple(version_info)
14 | 
15 | 
16 | version_info = parse_version_info(__version__)
17 | 


--------------------------------------------------------------------------------
/IQA/fastvqa/models/evaluator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from .swin_v1 import SwinTransformer as ImageEncoder_v1
 5 | from .swin_v2 import SwinTransformerV2 as ImageEncoder
 6 | from .head import VQAHead, IQAHead, VARHead, VQAHeadMLP, HyperHead
 7 | 
 8 | 
 9 | class IQAModel(nn.Module):
10 |     def __init__(self):
11 |         super().__init__()
12 |         # self.backbone = ImageEncoder_v1()
13 |         self.backbone = ImageEncoder()
14 |         self.vqa_head = VQAHeadMLP()
15 | 
16 |     def forward(self, x):
17 |         f = self.backbone(x)
18 |         scores = self.vqa_head(f)
19 |         return scores.flatten(1).mean(1)
20 | 
21 | 


--------------------------------------------------------------------------------
/IQA/options/fast-sama-iqa.yml:
--------------------------------------------------------------------------------
 1 | # Swin-Image-Encoder for IQA
 2 | 
 3 | name: SAMA-IQA-sama-koniq
 4 | 
 5 | stype: sama  # [fragment, sama, sama-spm]
 6 | 
 7 | num_epochs: 50
 8 | l_num_epochs: 0
 9 | warmup_epochs: 5
10 | constant_epochs: 150
11 | ema: true
12 | save_model: true
13 | batch_size: 64  # 64  
14 | num_workers: 8
15 | test_batch_size: 64  #64
16 | test_num_workers: 8
17 | num_splits: 10
18 | 
19 | data:
20 |     database: koniq
21 |     data_info: PATH_TO_DATA/koniq/koniq10k_scores.csv
22 |     data_prefix: PATH_TO_DATA/data/koniq/1024x768
23 | 
24 |     fwin_h: 8
25 |     fwin_w: 8
26 |     fsize_h: 32
27 |     fsize_w: 32
28 | 
29 | 
30 | # data:
31 | #     database: spaq
32 | #     data_info: PATH_TO_DATA/spaq/spaq_info.txt
33 | #     data_prefix: PATH_TO_DATA/spaq/TestImage
34 | 
35 | #     fwin_h: 8
36 | #     fwin_w: 8
37 | #     fsize_h: 32
38 | #     fsize_w: 32
39 | 
40 | 
41 | # model: # discard
42 | #     backbone_type: swin_image_v2
43 | #     head_in_channels: 768
44 | #     head_hidden_channels: 128
45 |             
46 | optimizer:
47 |     lr: !!float 1e-3
48 |     backbone_lr_mult: !!float 1e-1
49 |     wd: 0.05
50 |     
51 | load_path: PATH_TO_MODEL/swinv2_tiny_patch4_window8_256.pth
52 | 


--------------------------------------------------------------------------------
/VQA/options/fast-SAMA-test.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: SAMA-VQA-sama
 3 | test_batch_size: 1
 4 | test_num_workers: 2
 5 | 
 6 | stype: sama  # [sama, sama-c, sama-mix, sama-swm, sama-spm, fragments==others]
 7 | 
 8 | data:
 9 |     val-ltest:
10 |         type: FusionDataset
11 |         args:
12 |             phase: test
13 |             anno_file: ./examplar_data_labels/LSVQ/labels_mytest.txt
14 |             data_prefix: PATH_TO_DATA/LSVQ
15 |             sample_types:
16 |                 fragments:
17 |                     fragments_h: 7
18 |                     fragments_w: 7
19 |                     fsize_h: 32
20 |                     fsize_w: 32
21 |                     aligned: 32
22 |                     clip_len: 32
23 |                     frame_interval: 2
24 |                     num_clips: 4 
25 | model:
26 |     type: DiViDeAddEvaluator
27 |     args:
28 |         backbone:
29 |             fragments:
30 |                 checkpoint: false
31 |                 pretrained: 
32 |         backbone_size: swin_tiny_grpb
33 |         backbone_preserve_keys: fragments
34 |         divide_head: false
35 |         vqa_head:
36 |             in_channels: 768
37 |             hidden_channels: 64
38 |             
39 | optimizer:
40 |     lr: !!float 1e-3
41 |     backbone_lr_mult: !!float 1e-1
42 |     wd: 0.05
43 |         
44 | load_path: ./pretrained_weights/SAMA-baseline_val-ltest_s_dev_v0.0.pth
45 | 


--------------------------------------------------------------------------------
/VQA/options/fast-SAMA-finetune.yml:
--------------------------------------------------------------------------------
  1 | 
  2 | name: SAMA-baseline-finetune-youtube
  3 | split_seed: 10
  4 | 
  5 | num_epochs: 30
  6 | l_num_epochs: 0
  7 | warmup_epochs: 2.5
  8 | ema: true
  9 | save_model: true
 10 | train_batch_size: 12
 11 | train_num_workers: 6
 12 | test_batch_size: 1
 13 | test_num_workers: 6
 14 | 
 15 | stype: sama  # [sama, sama-c, sama-mix, sama-swm, sama-spm, fragments==others]
 16 | 
 17 | data:
 18 |     # database: livevqc
 19 |     # type: FineTuneDataset
 20 |     # anno_file: ./examplar_data_labels/LIVE_VQC/mylabels.txt
 21 |     # data_prefix: PATH_TO_DATA/LIVE-VQC
 22 |     # train:
 23 |     #     sample_types:
 24 |     #         fragments:
 25 |     #             fragments_h: 7
 26 |     #             fragments_w: 7
 27 |     #             fsize_h: 32
 28 |     #             fsize_w: 32
 29 |     #             aligned: 32
 30 |     #             clip_len: 32
 31 |     #             frame_interval: 2
 32 |     #             num_clips: 1
 33 |     # test:
 34 |     #     sample_types:
 35 |     #         fragments:
 36 |     #             fragments_h: 7
 37 |     #             fragments_w: 7
 38 |     #             fsize_h: 32
 39 |     #             fsize_w: 32
 40 |     #             aligned: 32
 41 |     #             clip_len: 32
 42 |     #             frame_interval: 2
 43 |     #             num_clips: 4
 44 | 
 45 | 
 46 |     # database: kv1k
 47 |     # type: FineTuneDataset
 48 |     # anno_file: ./examplar_data_labels/KoNViD/mylabels.txt
 49 |     # data_prefix: PATH_TO_DATA/KoNViD
 50 |     # train:
 51 |     #     sample_types:
 52 |     #         fragments:
 53 |     #             fragments_h: 7
 54 |     #             fragments_w: 7
 55 |     #             fsize_h: 32
 56 |     #             fsize_w: 32
 57 |     #             aligned: 32
 58 |     #             clip_len: 32
 59 |     #             frame_interval: 2
 60 |     #             num_clips: 1
 61 |     # test:
 62 |     #     sample_types:
 63 |     #         fragments:
 64 |     #             fragments_h: 7
 65 |     #             fragments_w: 7
 66 |     #             fsize_h: 32
 67 |     #             fsize_w: 32
 68 |     #             aligned: 32
 69 |     #             clip_len: 32
 70 |     #             frame_interval: 2
 71 |     #             num_clips: 4
 72 | 
 73 |     database: youtube
 74 |     type: FineTuneDataset
 75 |     anno_file: ./examplar_data_labels/YouTubeUGC/mylabels.txt
 76 |     data_prefix: PATH_TO_DATA/YouTube
 77 |     train:
 78 |         sample_types:
 79 |             fragments:
 80 |                 fragments_h: 7
 81 |                 fragments_w: 7
 82 |                 fsize_h: 32
 83 |                 fsize_w: 32
 84 |                 aligned: 32
 85 |                 clip_len: 32
 86 |                 frame_interval: 2
 87 |                 num_clips: 1
 88 |     test:
 89 |         sample_types:
 90 |             fragments:
 91 |                 fragments_h: 7
 92 |                 fragments_w: 7
 93 |                 fsize_h: 32
 94 |                 fsize_w: 32
 95 |                 aligned: 32
 96 |                 clip_len: 32
 97 |                 frame_interval: 2
 98 |                 num_clips: 4
 99 | 
100 | model:
101 |     type: DiViDeAddEvaluator
102 |     args:
103 |         backbone:
104 |             fragments:
105 |                 checkpoint: false
106 |                 pretrained: 
107 |         backbone_size: swin_tiny_grpb
108 |         backbone_preserve_keys: fragments
109 |         divide_head: false
110 |         vqa_head:
111 |             in_channels: 768
112 |             hidden_channels: 64
113 |             
114 | optimizer:
115 |     lr: !!float 1e-3
116 |     backbone_lr_mult: !!float 1e-1
117 |     wd: 0.05
118 |         
119 | load_path: PATH_TO_MODEL/pretrained_weights/SAMA-baseline_val-ltest_s_dev_v0.0.pth
120 | 
121 | 


--------------------------------------------------------------------------------
/VQA/options/fast-SAMA-train.yml:
--------------------------------------------------------------------------------
  1 | 
  2 | name: SAMA-VQA-sama
  3 | num_epochs: 30
  4 | l_num_epochs: 0
  5 | warmup_epochs: 2.5
  6 | ema: true
  7 | save_model: true
  8 | train_batch_size: 12
  9 | train_num_workers: 6
 10 | test_batch_size: 1
 11 | test_num_workers: 6
 12 | 
 13 | stype: sama  # [sama, sama-c, sama-mix, sama-swm, sama-spm, fragments==others]
 14 | 
 15 | data:
 16 |     train:
 17 |         type: FusionDataset
 18 |         args:
 19 |             phase: train
 20 |             anno_file: ./examplar_data_labels/LSVQ/labels_mytrain.txt
 21 |             data_prefix: PATH_TO_DATA/LSVQ
 22 |             sample_types:
 23 |                 fragments:
 24 |                     fragments_h: 7
 25 |                     fragments_w: 7
 26 |                     fsize_h: 32
 27 |                     fsize_w: 32
 28 |                     aligned: 32
 29 |                     clip_len: 32
 30 |                     frame_interval: 2
 31 |                     num_clips: 1
 32 |     val-livevqc:
 33 |         type: FusionDataset
 34 |         args:
 35 |             phase: test
 36 |             anno_file: ./examplar_data_labels/LIVE_VQC/mylabels.txt
 37 |             data_prefix: PATH_TO_DATA/LIVE-VQC
 38 |             sample_types:
 39 |                 fragments:
 40 |                     fragments_h: 7
 41 |                     fragments_w: 7
 42 |                     fsize_h: 32
 43 |                     fsize_w: 32
 44 |                     aligned: 32
 45 |                     clip_len: 32
 46 |                     frame_interval: 2
 47 |                     num_clips: 4
 48 |     val-kv1k:
 49 |         type: FusionDataset
 50 |         args:
 51 |             phase: test
 52 |             anno_file: ./examplar_data_labels/KoNViD/mylabels.txt
 53 |             data_prefix: PATH_TO_DATA/KoNViD
 54 |             sample_types:
 55 |                 fragments:
 56 |                     fragments_h: 7
 57 |                     fragments_w: 7
 58 |                     fsize_h: 32
 59 |                     fsize_w: 32
 60 |                     aligned: 32
 61 |                     clip_len: 32
 62 |                     frame_interval: 2
 63 |                     num_clips: 4
 64 |     val-ltest:
 65 |         type: FusionDataset
 66 |         args:
 67 |             phase: test
 68 |             anno_file: ./examplar_data_labels/LSVQ/labels_mytest.txt
 69 |             data_prefix: PATH_TO_DATA/LSVQ
 70 |             sample_types:
 71 |                 fragments:
 72 |                     fragments_h: 7
 73 |                     fragments_w: 7
 74 |                     fsize_h: 32
 75 |                     fsize_w: 32
 76 |                     aligned: 32
 77 |                     clip_len: 32
 78 |                     frame_interval: 2
 79 |                     num_clips: 4 
 80 |     val-l1080p:
 81 |         type: FusionDataset
 82 |         args:
 83 |             phase: test
 84 |             anno_file: ./examplar_data_labels/LSVQ/labels_mytest_1080p.txt
 85 |             data_prefix: PATH_TO_DATA/LSVQ
 86 |             sample_types:
 87 |                 fragments:
 88 |                     fragments_h: 7
 89 |                     fragments_w: 7
 90 |                     fsize_h: 32
 91 |                     fsize_w: 32
 92 |                     aligned: 32
 93 |                     clip_len: 32
 94 |                     frame_interval: 2
 95 |                     num_clips: 4 
 96 | model:
 97 |     type: DiViDeAddEvaluator
 98 |     args:
 99 |         backbone:
100 |             fragments:
101 |                 checkpoint: false
102 |                 pretrained: 
103 |         backbone_size: swin_tiny_grpb
104 |         backbone_preserve_keys: fragments
105 |         divide_head: false
106 |         vqa_head:
107 |             in_channels: 768
108 |             hidden_channels: 64
109 |             
110 | optimizer:
111 |     lr: !!float 1e-3
112 |     backbone_lr_mult: !!float 1e-1
113 |     wd: 0.05
114 |         
115 | load_path: PATH_TO_MODEL/swin_tiny_patch244_window877_kinetics400_1k.pth
116 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SAMA Overview
 2 | 
 3 | PyTorch implementation of "**Scaling and Masking: A New Paradigm of Data Sampling for Image and Video Quality Assessment**" ([arXiv](https://arxiv.org/abs/2401.02614)/[AAAI](https://ojs.aaai.org/index.php/AAAI/article/view/28170)), which has been accepted by **AAAI-2024**.
 4 | 
 5 | This code is modified from [FAST-VQA](https://github.com/VQAssessment/FAST-VQA-and-FasterVQA).
 6 | 
 7 | ![](method.png)
 8 | 
 9 | ## Usage
10 | 
11 | ### IQA
12 | For image quality assessment (IQA), please refer to [IQA/demo_train_iqa_baseline.py](https://github.com/Sissuire/SAMA/blob/main/IQA/demo_train_iqa_baseline.py).
13 | 
14 | ### VQA
15 | For video quality assessment (VQA), please refer to [VQA/demo_train.py](https://github.com/Sissuire/SAMA/blob/main/VQA/demo_train.py) to get the training result, and refer to [VQA/demo_finetune.py](https://github.com/Sissuire/SAMA/blob/main/VQA/demo_finetune.py) to get the finetuning result. We also provide the [training log](https://github.com/Sissuire/SAMA/blob/main/VQA/log.FAST.SAMA.out) for VQA.
16 | 
17 | The main idea/contribution lies in the data sampling, which can be found in [IQA](https://github.com/Sissuire/SAMA/blob/b8fdfa390999908bf6c0da284973bb1f2eb646d8/IQA/demo_train_iqa_baseline.py#L166C13-L166C13) and [VQA](https://github.com/Sissuire/SAMA/blob/b8fdfa390999908bf6c0da284973bb1f2eb646d8/VQA/fastvqa/datasets/fusion_datasets.py#L211).
18 | 
19 | Make sure the configuration has been properly set in 
20 | - [fast-sama-iqa.yml](https://github.com/Sissuire/SAMA/blob/main/IQA/options/fast-sama-iqa.yml) for IQA training;
21 | - [fast-SAMA-train.yml](https://github.com/Sissuire/SAMA/blob/main/VQA/options/fast-SAMA-train.yml) for VQA training on LSVQ;
22 | - and [fast-SAMA-tinetune.yml](https://github.com/Sissuire/SAMA/blob/main/VQA/options/fast-SAMA-finetune.yml) for VQA finetuning.
23 | 
24 | And please prepare the pretrained models of [video-swin](https://github.com/SwinTransformer/storage/releases/download/v1.0.4/swin_tiny_patch244_window877_kinetics400_1k.pth) for VQA and [swin-v2](https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_tiny_patch4_window8_256.pth) for IQA.
25 | 
26 | #### Testing with pretrained model on videos
27 | 
28 | We have provided the pretrained weights (trained on LSVQ train set): [GoogleDrive](https://drive.google.com/drive/folders/1adB3aB8gBMx7c38tEfgls-i6QNZJI8nF?usp=sharing) / [BaiDu](https://pan.baidu.com/s/1KTicZ2WX8BN7GTgr9PX6ZQ?pwd=xyns) (Code:xyns). please check the pretrained weights in `./VQA/pretrained_weights` folder and put the weights in the folder. 
29 | 
30 | To test on your own dataset or video files, please construct the dataset information as the examplar in `./VQA/examplar_data_labels`, and set the configuration in [fast-SAMA-test.yml](https://github.com/Sissuire/SAMA/blob/main/VQA/options/fast-SAMA-test.yml). Run the file [demo_test.py](https://github.com/Sissuire/SAMA/blob/main/VQA/demo_test.py) to check the details.
31 | 
32 | ### Environment
33 | Different environment may induce possible fluctuation of performance.
34 | 
35 | ```
36 | Python 3.8.10
37 | PyTorch 1.7.0
38 | ```
39 | 
40 | The installation can refer to [FAST-VQA](https://github.com/VQAssessment/FAST-VQA-and-FasterVQA).
41 | 
42 | ### Citation
43 | If you are interested in the work, or find the code helpful, please cite our work
44 | ```
45 | @article{sama2024,
46 |         title={Scaling and Masking: A New Paradigm of Data Sampling for Image and Video Quality Assessment},
47 |         volume={38},
48 |         number={4},
49 |         journal={Proceedings of the AAAI Conference on Artificial Intelligence},
50 |         author={Liu, Yongxu and Quan, Yinghui and Xiao, Guoyao and Li, Aobo and Wu, Jinjian},
51 |         year={2024},
52 |         month={Mar.},
53 |         pages={3792-3801},
54 |         url={https://ojs.aaai.org/index.php/AAAI/article/view/28170},
55 |         DOI={10.1609/aaai.v38i4.28170}
56 | }
57 | ```
58 | 
59 | ### Contact
60 | 
61 | Feel free to contact me via `yongxu.liu@xidian.edu.cn` if any question or bug.
62 | 
63 | ### License
64 | 
65 | Copyright (c) [2024] [Yongxu Liu]
66 | 
67 | Permission to use, copy, or modify this software and its documentation for educational and research purposes only and without fee is here granted. This program shall not be used, rewritten, or adapted as the basis of a commercial software or hardware product without first obtaining permission of the authors. The authors make no representations about the suitability of this software for any purpose. It is provided "as is" without express or implied warranty.
68 | 


--------------------------------------------------------------------------------
/VQA/fastvqa/models/evaluator.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from functools import partial, reduce
  4 | from .swin_backbone import SwinTransformer3D as VideoBackbone
  5 | from .head import VQAHead, IQAHead, VARHead
  6 | 
  7 | 
  8 | class DiViDeAddEvaluator(nn.Module):
  9 |     def __init__(
 10 |         self,
 11 |         backbone_size="divided",
 12 |         backbone_preserve_keys = 'fragments,resize',
 13 |         multi=False,
 14 |         layer=-1,
 15 |         backbone=dict(resize={"window_size": (4,4,4)}, fragments={"window_size": (4,4,4)}),
 16 |         divide_head=False,
 17 |         vqa_head=dict(in_channels=768),
 18 |         var=False,
 19 |     ):
 20 |         self.backbone_preserve_keys = backbone_preserve_keys.split(",")
 21 |         self.multi = multi
 22 |         self.layer = layer
 23 |         super().__init__()
 24 |         for key, hypers in backbone.items():
 25 |             print(backbone_size)
 26 |             if key not in self.backbone_preserve_keys:
 27 |                 continue
 28 |             if backbone_size=="divided":
 29 |                 t_backbone_size = hypers["type"]
 30 |             else:
 31 |                 t_backbone_size = backbone_size
 32 |             if t_backbone_size == 'swin_tiny_grpb':
 33 |                 # to reproduce fast-vqa
 34 |                 b = VideoBackbone()
 35 |             elif t_backbone_size == 'swin_tiny_grpb_m':
 36 |                 # to reproduce fast-vqa-m
 37 |                 b = VideoBackbone(window_size=(4,4,4), frag_biases=[0,0,0,0])
 38 |             else:
 39 |                 raise NotImplementedError
 40 |             print("Setting backbone:", key+"_backbone")
 41 |             setattr(self, key+"_backbone", b)   
 42 |         if divide_head:
 43 |             print(divide_head)
 44 |             for key in backbone:
 45 |                 if key not in self.backbone_preserve_keys:
 46 |                     continue
 47 |                 if var:
 48 |                     b = VARHead(**vqa_head)
 49 |                     print(b)
 50 |                 else:
 51 |                     b = VQAHead(**vqa_head)
 52 |                 print("Setting head:", key+"_head")
 53 |                 setattr(self, key+"_head", b) 
 54 |         else:
 55 |             if var:
 56 |                 self.vqa_head = VARHead(**vqa_head)
 57 |                 print(b)
 58 |             else:
 59 |                 self.vqa_head = VQAHead(**vqa_head)
 60 | 
 61 |     def forward(self, vclips, inference=True, return_pooled_feats=False, reduce_scores=True, pooled=False, **kwargs):
 62 |         if inference:
 63 |             self.eval()
 64 |             with torch.no_grad():
 65 |                 
 66 |                 scores = []
 67 |                 feats = {}
 68 |                 for key in vclips:
 69 |                     feat = getattr(self, key.split("_")[0]+"_backbone")(vclips[key], multi=self.multi, layer=self.layer, **kwargs)
 70 |                     if hasattr(self, key.split("_")[0]+"_head"):
 71 |                         scores += [getattr(self, key.split("_")[0]+"_head")(feat)]
 72 |                     else:
 73 |                         scores += [getattr(self, "vqa_head")(feat)]
 74 |                     if return_pooled_feats:
 75 |                         feats[key] = feat.mean((-3,-2,-1))
 76 |                 if reduce_scores:
 77 |                     if len(scores) > 1:
 78 |                         scores = reduce(lambda x,y:x+y, scores)
 79 |                     else:
 80 |                         scores = scores[0]
 81 |                     if pooled:
 82 |                         scores = torch.mean(scores, (1,2,3,4))
 83 |             self.train()
 84 |             if return_pooled_feats:
 85 |                 return scores, feats
 86 |             return scores
 87 |         else:
 88 |             self.train()
 89 |             scores = []
 90 |             feats = {}
 91 |             for key in vclips:
 92 |                 feat = getattr(self, key.split("_")[0]+"_backbone")(vclips[key], multi=self.multi, layer=self.layer, **kwargs)
 93 |                 if hasattr(self, key.split("_")[0]+"_head"):
 94 |                     scores += [getattr(self, key.split("_")[0]+"_head")(feat)]
 95 |                 else:
 96 |                     scores += [getattr(self, "vqa_head")(feat)]
 97 |                 if return_pooled_feats:
 98 |                     feats[key] = feat.mean((-3,-2,-1))
 99 |             if reduce_scores:
100 |                 if len(scores) > 1:
101 |                     scores = reduce(lambda x,y:x+y, scores)
102 |                 else:
103 |                     scores = scores[0]
104 |                 if pooled:
105 |                     print(scores.shape)
106 |                     scores = torch.mean(scores, (1,2,3,4))
107 |                     print(scores.shape)
108 |             
109 |             if return_pooled_feats:
110 |                 return scores, feats
111 |             return scores
112 | 


--------------------------------------------------------------------------------
/VQA/fastvqa/models/head.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | from torchvision.ops import roi_pool, roi_align
  4 | from torch.nn import functional as F
  5 | import numpy as np
  6 | import math
  7 | 
  8 | class VQAHead(nn.Module):
  9 |     """MLP Regression Head for VQA.
 10 |     Args:
 11 |         in_channels: input channels for MLP
 12 |         hidden_channels: hidden channels for MLP
 13 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
 14 |     """
 15 | 
 16 |     def __init__(
 17 |         self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, **kwargs
 18 |     ):
 19 |         super().__init__()
 20 |         self.dropout_ratio = dropout_ratio
 21 |         self.in_channels = in_channels
 22 |         self.hidden_channels = hidden_channels
 23 |         if self.dropout_ratio != 0:
 24 |             self.dropout = nn.Dropout(p=self.dropout_ratio)
 25 |         else:
 26 |             self.dropout = None
 27 |         self.fc_hid = nn.Conv3d(self.in_channels, self.hidden_channels, (1, 1, 1))
 28 |         self.fc_last = nn.Conv3d(self.hidden_channels, 1, (1, 1, 1))
 29 |         self.gelu = nn.GELU()
 30 | 
 31 |         self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
 32 | 
 33 |     def forward(self, x, rois=None):
 34 |         x = self.dropout(x)
 35 |         qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x))))
 36 |         return qlt_score
 37 |     
 38 | 
 39 | class VQAHead_samaw(nn.Module):
 40 |     """MLP Regression Head for VQA.
 41 |     Args:
 42 |         in_channels: input channels for MLP
 43 |         hidden_channels: hidden channels for MLP
 44 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
 45 |     """
 46 | 
 47 |     def __init__(
 48 |         self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, **kwargs
 49 |     ):
 50 |         super().__init__()
 51 |         self.dropout_ratio = dropout_ratio
 52 |         self.in_channels = in_channels
 53 |         self.hidden_channels = hidden_channels
 54 |         if self.dropout_ratio != 0:
 55 |             self.dropout = nn.Dropout(p=self.dropout_ratio)
 56 |         else:
 57 |             self.dropout = None
 58 |         self.fc_hid = nn.Conv3d(self.in_channels, self.hidden_channels, (1, 1, 1))
 59 |         self.fc_last = nn.Conv3d(self.hidden_channels, 1, (1, 1, 1))
 60 |         self.gelu = nn.GELU()
 61 |         self.fc_scale_hid = nn.Conv3d(self.in_channels, self.hidden_channels, (1, 1, 1))
 62 |         self.fc_scale_last = nn.Conv3d(self.hidden_channels, 1, (1, 1, 1))
 63 |         self.softmax = nn.Softmax(-3)
 64 | 
 65 |         self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
 66 | 
 67 |     def forward(self, x, rois=None):
 68 |         x = self.dropout(x)
 69 |         qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x))))
 70 | 
 71 |         x_mean = torch.mean(x, dim=(3, 4), keepdim=True)
 72 |         qlt_weight = self.fc_scale_last(self.dropout(self.gelu(self.fc_scale_hid(self.dropout(x_mean)))))
 73 |         qlt_weight = self.softmax(qlt_weight)
 74 |         return qlt_score * qlt_weight
 75 |     
 76 |     
 77 | class VARHead(nn.Module):
 78 |     """MLP Regression Head for Video Action Recognition.
 79 |     Args:
 80 |         in_channels: input channels for MLP
 81 |         hidden_channels: hidden channels for MLP
 82 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
 83 |     """
 84 | 
 85 |     def __init__(
 86 |         self, in_channels=768, out_channels=400, dropout_ratio=0.5, **kwargs
 87 |     ):
 88 |         super().__init__()
 89 |         self.dropout_ratio = dropout_ratio
 90 |         self.in_channels = in_channels
 91 |         self.out_channels = out_channels
 92 |         if self.dropout_ratio != 0:
 93 |             self.dropout = nn.Dropout(p=self.dropout_ratio)
 94 |         else:
 95 |             self.dropout = None
 96 |         self.fc = nn.Conv3d(self.in_channels, self.out_channels, (1, 1, 1))
 97 |         self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
 98 | 
 99 |     def forward(self, x, rois=None):
100 |         x = self.dropout(x)
101 |         x = self.avg_pool(x)
102 |         out = self.fc(x)
103 |         return out
104 | 
105 | 
106 | class IQAHead(nn.Module):
107 |     """MLP Regression Head for IQA.
108 |     Args:
109 |         in_channels: input channels for MLP
110 |         hidden_channels: hidden channels for MLP
111 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
112 |     """
113 | 
114 |     def __init__(
115 |         self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, **kwargs
116 |     ):
117 |         super().__init__()
118 |         self.dropout_ratio = dropout_ratio
119 |         self.in_channels = in_channels
120 |         self.hidden_channels = hidden_channels
121 |         if self.dropout_ratio != 0:
122 |             self.dropout = nn.Dropout(p=self.dropout_ratio)
123 |         else:
124 |             self.dropout = None
125 |         self.fc_hid = nn.Linear(self.in_channels, self.hidden_channels)
126 |         self.fc_last = nn.Linear(self.hidden_channels, 1)
127 |         self.gelu = nn.GELU()
128 | 
129 |     def forward(self, x):
130 |         x = self.dropout(x)
131 |         qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x))))
132 |         return qlt_score
133 | 


--------------------------------------------------------------------------------
/IQA/fastvqa/models/head.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | from torchvision.ops import roi_pool, roi_align
  4 | from torch.nn import functional as F
  5 | import numpy as np
  6 | import math
  7 | 
  8 | 
  9 | class VQAHeadMLP(nn.Module):
 10 |     """MLP Regression Head for VQA.
 11 |     Args:
 12 |         in_channels: input channels for MLP
 13 |         hidden_channels: hidden channels for MLP
 14 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
 15 |     """
 16 | 
 17 |     def __init__(
 18 |         self, in_channels=768, hidden_channels=64, target=1, dropout_ratio=0.5):
 19 |         super().__init__()
 20 |     
 21 |         self.dropout_ratio = dropout_ratio
 22 |         self.in_channels = in_channels
 23 |         self.hidden_channels = hidden_channels
 24 |         self.dropout = nn.Dropout(p=self.dropout_ratio) if dropout_ratio > 0 else nn.Identity()
 25 |         self.fc1 = nn.Linear(self.in_channels, self.hidden_channels)
 26 |         self.fc2 = nn.Linear(self.hidden_channels, target)
 27 |         self.gelu = nn.GELU()
 28 | 
 29 | 
 30 |     def forward(self, x, rois=None):
 31 |         x = self.dropout(x)
 32 |         qlt_score = self.fc2(self.dropout(self.gelu(self.fc1(x))))
 33 |         return qlt_score
 34 | 
 35 | 
 36 | class HyperHead(nn.Module):
 37 |     def __init__(
 38 |         self, in_channels=768, hidden_channels=64, dropout_ratio=0.5):
 39 |         super().__init__()
 40 |     
 41 |         self.dropout_ratio = dropout_ratio
 42 |         self.in_channels = in_channels
 43 |         self.hidden_channels = hidden_channels
 44 |         
 45 |         self.dropout = nn.Dropout(dropout_ratio) if dropout_ratio > 0 else nn.Identity()
 46 |         self.fc11 = nn.Linear(self.in_channels, self.hidden_channels)
 47 |         self.fc12 = nn.Linear(self.hidden_channels, 1)
 48 | 
 49 |         self.fc21 = nn.Linear(self.in_channels, hidden_channels)
 50 |         self.fc22 = nn.Linear(hidden_channels, 50)
 51 |         self.fc32 = nn.Linear(hidden_channels, 2)
 52 |         self.gelu = nn.GELU()
 53 | 
 54 | 
 55 |     def forward(self, x):
 56 |         x = self.dropout(x)
 57 |         relative_score = self.fc12(self.dropout(self.gelu(self.fc11(x))))  # [b, 1]
 58 | 
 59 |         f = self.dropout(self.gelu(self.fc21(x)))
 60 |         cls = self.fc22(f)   # [b, N]
 61 | 
 62 |         wb = self.fc32(f)  # [b, 2]
 63 |         scores = relative_score * wb[:, :1] + wb[:, 1:]
 64 |         return scores, cls
 65 |     
 66 | 
 67 | 
 68 | class VQAHead(nn.Module):
 69 |     """MLP Regression Head for VQA.
 70 |     Args:
 71 |         in_channels: input channels for MLP
 72 |         hidden_channels: hidden channels for MLP
 73 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
 74 |     """
 75 | 
 76 |     def __init__(
 77 |         self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, **kwargs
 78 |     ):
 79 |         super().__init__()
 80 |         self.dropout_ratio = dropout_ratio
 81 |         self.in_channels = in_channels
 82 |         self.hidden_channels = hidden_channels
 83 |         if self.dropout_ratio != 0:
 84 |             self.dropout = nn.Dropout(p=self.dropout_ratio)
 85 |         else:
 86 |             self.dropout = None
 87 |         self.fc_hid = nn.Conv3d(self.in_channels, self.hidden_channels, (1, 1, 1))
 88 |         self.fc_last = nn.Conv3d(self.hidden_channels, 1, (1, 1, 1))
 89 |         self.gelu = nn.GELU()
 90 | 
 91 |         self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
 92 | 
 93 |     def forward(self, x, rois=None):
 94 |         x = self.dropout(x)
 95 |         qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x))))
 96 |         return qlt_score
 97 |     
 98 | class VARHead(nn.Module):
 99 |     """MLP Regression Head for Video Action Recognition.
100 |     Args:
101 |         in_channels: input channels for MLP
102 |         hidden_channels: hidden channels for MLP
103 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
104 |     """
105 | 
106 |     def __init__(
107 |         self, in_channels=768, out_channels=400, dropout_ratio=0.5, **kwargs
108 |     ):
109 |         super().__init__()
110 |         self.dropout_ratio = dropout_ratio
111 |         self.in_channels = in_channels
112 |         self.out_channels = out_channels
113 |         if self.dropout_ratio != 0:
114 |             self.dropout = nn.Dropout(p=self.dropout_ratio)
115 |         else:
116 |             self.dropout = None
117 |         self.fc = nn.Conv3d(self.in_channels, self.out_channels, (1, 1, 1))
118 |         self.avg_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
119 | 
120 |     def forward(self, x, rois=None):
121 |         x = self.dropout(x)
122 |         x = self.avg_pool(x)
123 |         out = self.fc(x)
124 |         return out
125 | 
126 | 
127 | class IQAHead(nn.Module):
128 |     """MLP Regression Head for IQA.
129 |     Args:
130 |         in_channels: input channels for MLP
131 |         hidden_channels: hidden channels for MLP
132 |         dropout_ratio: the dropout ratio for features before the MLP (default 0.5)
133 |     """
134 | 
135 |     def __init__(
136 |         self, in_channels=768, hidden_channels=64, dropout_ratio=0.5, **kwargs
137 |     ):
138 |         super().__init__()
139 |         self.dropout_ratio = dropout_ratio
140 |         self.in_channels = in_channels
141 |         self.hidden_channels = hidden_channels
142 |         if self.dropout_ratio != 0:
143 |             self.dropout = nn.Dropout(p=self.dropout_ratio)
144 |         else:
145 |             self.dropout = None
146 |         self.fc_hid = nn.Linear(self.in_channels, self.hidden_channels)
147 |         self.fc_last = nn.Linear(self.hidden_channels, 1)
148 |         self.gelu = nn.GELU()
149 | 
150 |     def forward(self, x):
151 |         x = self.dropout(x)
152 |         qlt_score = self.fc_last(self.dropout(self.gelu(self.fc_hid(x))))
153 |         return qlt_score
154 | 


--------------------------------------------------------------------------------
/VQA/demo_test.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------
  2 | # SAMA, AAAI 2024
  3 | # Testing code for VQA. 
  4 | # This code is modified from FAST-VQA [ECCV, 2022]
  5 | # -------------------------------------------------
  6 | import torch
  7 | import random
  8 | import os.path as osp
  9 | import fastvqa.models as models
 10 | import fastvqa.datasets as datasets
 11 | import os
 12 | import argparse
 13 | import sys
 14 | 
 15 | from scipy.stats import spearmanr, pearsonr
 16 | from scipy.stats.stats import kendalltau as kendallr
 17 | import numpy as np
 18 | 
 19 | import timeit
 20 | import math
 21 | 
 22 | import yaml
 23 | 
 24 | from functools import reduce
 25 | from thop import profile
 26 | import warnings
 27 | 
 28 | warnings.filterwarnings("ignore")
 29 | 
 30 | from torch.utils.tensorboard import SummaryWriter  
 31 | 
 32 | 
 33 | def rescale(pr, gt=None):
 34 |     if gt is None:
 35 |         pr = (pr - np.mean(pr)) / np.std(pr)
 36 |     else:
 37 |         pr = ((pr - np.mean(pr)) / np.std(pr)) * np.std(gt) + np.mean(gt)
 38 |     return pr
 39 | 
 40 | sample_types=["resize", "fragments", "crop", "arp_resize", "arp_fragments"]
 41 | 
 42 | 
 43 | 
 44 | def inference_set(inf_loader, model, device):
 45 | 
 46 |     results = []
 47 | 
 48 |     tic = timeit.default_timer()
 49 |     gt_labels, pr_labels = [], []
 50 |  
 51 |     for i, data in enumerate(inf_loader):
 52 |         result = dict()
 53 |         video, video_up = {}, {}
 54 |         for key in sample_types:
 55 |             if key in data:
 56 |                 video[key] = data[key].to(device)
 57 |                 ## Reshape into clips
 58 |                 b, c, t, h, w = video[key].shape
 59 |                 video[key] = video[key].reshape(b, c, data["num_clips"][key], t // data["num_clips"][key], h, w).permute(0,2,1,3,4,5).reshape(b * data["num_clips"][key], c, t // data["num_clips"][key], h, w) 
 60 | 
 61 |         with torch.no_grad():
 62 |             result["pr_labels"] = model(video).cpu().numpy()
 63 |                 
 64 |         result["gt_label"] = data["gt_label"].item()
 65 | 
 66 |         results.append(result)
 67 |         
 68 |     ## generate the demo video for video quality localization
 69 |     gt_labels = [r["gt_label"] for r in results]
 70 |     pr_labels = [np.mean(r["pr_labels"][:]) for r in results]
 71 |     pr_labels = rescale(pr_labels, gt_labels)
 72 |     
 73 |     s = spearmanr(gt_labels, pr_labels)[0]
 74 |     p = pearsonr(gt_labels, pr_labels)[0]
 75 |     k = kendallr(gt_labels, pr_labels)[0]
 76 |     r = np.sqrt(((gt_labels - pr_labels) ** 2).mean())
 77 |     
 78 |     torch.cuda.empty_cache()
 79 | 
 80 |     toc = timeit.default_timer()
 81 |     minutes = int((toc - tic) / 60)
 82 |     seconds = int((toc - tic) % 60)
 83 | 
 84 |     print(
 85 |         f"For {len(gt_labels)} videos, \nthe accuracy of the model is as follows:\n  SROCC: {s:.4f} best: {best_s:.4f} \n  PLCC:  {p:.4f} best: {best_p:.4f}  \n  KROCC: {k:.4f} best: {best_k:.4f} \n  RMSE:  {r:.4f} best: {best_r:.4f}."
 86 |     )
 87 |     print('time elapsed {:02d}m {:02d}s.'.format(minutes, seconds))
 88 | 
 89 |     return s, p, k, r
 90 | 
 91 | 
 92 | 
 93 | def main():
 94 | 
 95 |     parser = argparse.ArgumentParser()
 96 |     parser.add_argument("-o", "--opt", type=str, 
 97 |                         default="./options/fast-SAMA-test.yml", help="the option file")
 98 | 
 99 |     args = parser.parse_args()
100 |     with open(args.opt, "r") as f:
101 |         opt = yaml.safe_load(f)
102 |     print(opt)
103 |     
104 |     ## adaptively choose the device
105 |     device = "cuda" if torch.cuda.is_available() else "cpu"
106 | 
107 |     if sys.gettrace():
108 |         print('in DEBUGE mode.')
109 |         opt["name"] = "DEBUG"
110 |         opt['test_num_workers']=0
111 | 
112 |     ## defining model and loading checkpoint
113 | 
114 |     print('using device: {}'.format(device))
115 |     model = getattr(models, opt["model"]["type"])(**opt["model"]["args"]).to(device)
116 |     
117 |         
118 |     stype = opt['stype'] if opt['stype'] in ['sama', 'sama-c', 'sama-mix', 'sama+spm', 'sama+swm'] else 'fragments'
119 |         
120 |     val_datasets = {}
121 |     for key in opt["data"]:
122 |         if key.startswith("val"):
123 |             val_datasets[key] = getattr(datasets, opt["data"][key]["type"])(opt["data"][key]["args"], stype=stype)
124 |             print('dataset=[{}], with {} samples.'.format(key, len(val_datasets[key])))
125 | 
126 |     val_loaders = {}
127 |     for key, val_dataset in val_datasets.items():
128 |         val_loaders[key] = torch.utils.data.DataLoader(val_dataset, 
129 |                                                         batch_size=opt["test_batch_size"], 
130 |                                                         num_workers=opt["test_num_workers"], 
131 |                                                         pin_memory=False,
132 |                                                         shuffle=False,
133 |                                                         drop_last=False)
134 | 
135 |     if "load_path" in opt:
136 |         state_dict = torch.load(opt["load_path"], map_location=device)           
137 |         print(model.load_state_dict(state_dict['state_dict'] , strict=False))
138 |             
139 | 
140 |     print(f"evaluation ..")
141 | 
142 |     bests = {}
143 |     for key in val_loaders:
144 |         bests[key] = inference_set(
145 |             val_loaders[key],
146 |             model,
147 |             device
148 |         )                  
149 | 
150 |     for key in val_loaders:
151 |         print(
152 |             f"""For the finetuning process on {key} with {len(val_datasets[key])} videos,
153 |             the best validation accuracy of the model-s is as follows:
154 |             SROCC: {bests[key][0]:.4f}
155 |             PLCC:  {bests[key][1]:.4f}
156 |             KROCC: {bests[key][2]:.4f}
157 |             RMSE:  {bests[key][3]:.4f}."""
158 |         )
159 | 
160 | 
161 | 
162 | if __name__ == "__main__":
163 |     main()
164 | 


--------------------------------------------------------------------------------
/VQA/demo_finetune.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------
  2 | # SAMA, AAAI 2024
  3 | # Finetuning code for VQA. 
  4 | # This code is modified from FAST-VQA [ECCV, 2022]
  5 | # -------------------------------------------------
  6 | import torch
  7 | import random
  8 | import os.path as osp
  9 | import fastvqa.models as models
 10 | import fastvqa.datasets as datasets
 11 | import os
 12 | import argparse
 13 | import sys
 14 | 
 15 | from scipy.stats import spearmanr, pearsonr
 16 | from scipy.stats.stats import kendalltau as kendallr
 17 | import numpy as np
 18 | 
 19 | import timeit
 20 | import math
 21 | 
 22 | import yaml
 23 | 
 24 | from functools import reduce
 25 | from thop import profile
 26 | import warnings
 27 | 
 28 | warnings.filterwarnings("ignore")
 29 | 
 30 | from torch.utils.tensorboard import SummaryWriter  
 31 | 
 32 | 
 33 | def train_test_split(dataset_path, ann_file, ratio=0.8, seed=42):
 34 |     random.seed(seed)
 35 |     video_infos = []
 36 |     with open(ann_file, "r") as fin:
 37 |         for line in fin.readlines():
 38 |             line_split = line.strip().split(",")
 39 |             filename, _, _, label = line_split
 40 |             label = float(label)
 41 |             filename = osp.join(dataset_path, filename)
 42 |             video_infos.append(dict(filename=filename, label=label))
 43 |     random.shuffle(video_infos)
 44 |     return (
 45 |         video_infos[: int(ratio * len(video_infos))],
 46 |         video_infos[int(ratio * len(video_infos)) :],
 47 |     )
 48 | 
 49 | 
 50 | def rank_loss(y_pred, y):
 51 |     ranking_loss = torch.nn.functional.relu(
 52 |         (y_pred - y_pred.t()) * torch.sign((y.t() - y))
 53 |     )
 54 |     scale = 1 + torch.max(ranking_loss)
 55 |     return (
 56 |         torch.sum(ranking_loss) / y_pred.shape[0] / (y_pred.shape[0] - 1) / scale
 57 |     ).float()
 58 | 
 59 | def plcc_loss(y_pred, y):
 60 |     sigma_hat, m_hat = torch.std_mean(y_pred, unbiased=False)
 61 |     y_pred = (y_pred - m_hat) / (sigma_hat + 1e-8)
 62 |     sigma, m = torch.std_mean(y, unbiased=False)
 63 |     y = (y - m) / (sigma + 1e-8)
 64 |     loss0 = torch.nn.functional.mse_loss(y_pred, y) / 4
 65 |     rho = torch.mean(y_pred * y)
 66 |     loss1 = torch.nn.functional.mse_loss(rho * y_pred, y) / 4
 67 |     return ((loss0 + loss1) / 2).float()
 68 | 
 69 | def rescaled_l2_loss(y_pred, y):
 70 |     y_pred_rs = (y_pred - y_pred.mean()) / y_pred.std()
 71 |     y_rs = (y - y.mean()) / (y.std() + eps)
 72 |     return torch.nn.functional.mse_loss(y_pred_rs, y_rs)
 73 | 
 74 | def rplcc_loss(y_pred, y, eps=1e-8):
 75 |     ## Literally (1 - PLCC) / 2
 76 |     cov = torch.cov(y_pred, y)
 77 |     std = (torch.std(y_pred) + eps) * (torch.std(y) + eps)
 78 |     return (1 - cov / std) / 2
 79 | 
 80 | def self_similarity_loss(f, f_hat, f_hat_detach=False):
 81 |     if f_hat_detach:
 82 |         f_hat = f_hat.detach()
 83 |     return 1 - torch.nn.functional.cosine_similarity(f, f_hat, dim=1).mean()
 84 | 
 85 | def contrastive_similarity_loss(f, f_hat, f_hat_detach=False, eps=1e-8):
 86 |     if f_hat_detach:
 87 |         f_hat = f_hat.detach()
 88 |     intra_similarity = torch.nn.functional.cosine_similarity(f, f_hat, dim=1).mean()
 89 |     cross_similarity = torch.nn.functional.cosine_similarity(f, f_hat, dim=0).mean()
 90 |     return (1 - intra_similarity) / (1 - cross_similarity + eps)
 91 | 
 92 | def rescale(pr, gt=None):
 93 |     if gt is None:
 94 |         pr = (pr - np.mean(pr)) / np.std(pr)
 95 |     else:
 96 |         pr = ((pr - np.mean(pr)) / np.std(pr)) * np.std(gt) + np.mean(gt)
 97 |     return pr
 98 | 
 99 | sample_types=["resize", "fragments", "crop", "arp_resize", "arp_fragments"]
100 | 
101 | 
102 | 
103 | 
104 | def finetune_epoch(ft_loader, model, model_ema, optimizer, scheduler, device, epoch=-1, writer=None, 
105 |                    need_upsampled=True, need_feat=True, need_fused=False, need_separate_sup=False):
106 |     model.train()
107 |     tic = timeit.default_timer()
108 |     train_labels, pred_labels = [], []
109 |     epoch_loss = 0
110 | 
111 |     for i, data in enumerate(ft_loader):
112 |         optimizer.zero_grad()
113 |         video = {}
114 |         for key in sample_types:
115 |             if key in data:
116 |                 video[key] = data[key].to(device)
117 |         
118 |         y = data["gt_label"].float().detach().to(device).unsqueeze(-1)
119 |         scores = model(video, inference=False, reduce_scores=False) 
120 |         if len(scores) > 1:
121 |             y_pred = reduce(lambda x,y:x+y, scores)
122 |         else:
123 |             y_pred = scores[0]
124 |         y_pred = y_pred.mean((-2, -1)).sum(-1)
125 | 
126 |         frame_inds = data["frame_inds"]
127 |         
128 |         # Plain Supervised Loss
129 |         p_loss, r_loss = plcc_loss(y_pred, y), rank_loss(y_pred, y)
130 |         
131 |         loss = p_loss + 0.3 * r_loss
132 |         epoch_loss += loss.item()
133 | 
134 |         loss.backward()
135 |         optimizer.step()
136 |         scheduler.step()
137 |         
138 |         pred_labels.extend(list(y_pred.view(-1).detach().cpu().numpy()))
139 |         train_labels.extend(list(y.view(-1).detach().cpu().numpy()))
140 | 
141 |         #ft_loader.dataset.refresh_hypers()
142 | 
143 |         
144 |         if model_ema is not None:
145 |             model_params = dict(model.named_parameters())
146 |             model_ema_params = dict(model_ema.named_parameters())
147 |             for k in model_params.keys():
148 |                 model_ema_params[k].data.mul_(0.999).add_(
149 |                     model_params[k].data, alpha=1 - 0.999)
150 |                 
151 |     
152 |     train_srcc = spearmanr(train_labels, pred_labels)[0]
153 | 
154 |     writer.add_scalar('train_srcc', train_srcc, epoch)
155 |     writer.add_scalar('train_total_loss', epoch_loss, epoch)
156 | 
157 |     toc = timeit.default_timer()
158 | 
159 |     minutes = int((toc - tic) / 60)
160 |     seconds = int((toc - tic) % 60)
161 |     print('Epoch-{:02d}, training SRCC={:.4f}, time elapsed {:02d}m {:02d}s.'.format(epoch, train_srcc, minutes, seconds))
162 |     print('backbone_lr = {:.2e}, head_lr = {:.2e}'.format(optimizer.state_dict()['param_groups'][0]['lr'],
163 |                                                           optimizer.state_dict()['param_groups'][-1]['lr']))
164 |     
165 |     model.eval()
166 | 
167 |     
168 | def profile_inference(inf_set, model, device):
169 |     video = {}
170 |     data = inf_set[0]
171 |     for key in sample_types:
172 |         if key in data:
173 |             video[key] = data[key].to(device).unsqueeze(0)
174 |     with torch.no_grad():
175 |         flops, params = profile(model, (video, ))
176 |     print(f"The FLOps of the Variant is {flops/1e9:.1f}G, with Params {params/1e6:.2f}M.")
177 | 
178 | def inference_set(inf_loader, model, device, best_, epoch, writer=None, save_model=False, suffix='s', save_name="divide"):
179 | 
180 |     results = []
181 | 
182 |     tic = timeit.default_timer()
183 |     gt_labels, pr_labels = [], []
184 | 
185 |     best_s, best_p, best_k, best_r = best_
186 |  
187 |     for i, data in enumerate(inf_loader):
188 |         result = dict()
189 |         video, video_up = {}, {}
190 |         for key in sample_types:
191 |             if key in data:
192 |                 video[key] = data[key].to(device)
193 |                 ## Reshape into clips
194 |                 b, c, t, h, w = video[key].shape
195 |                 video[key] = video[key].reshape(b, c, data["num_clips"][key], t // data["num_clips"][key], h, w).permute(0,2,1,3,4,5).reshape(b * data["num_clips"][key], c, t // data["num_clips"][key], h, w) 
196 | 
197 |         with torch.no_grad():
198 |             result["pr_labels"] = model(video).cpu().numpy()
199 |                 
200 |         result["gt_label"] = data["gt_label"].item()
201 | 
202 |         results.append(result)
203 |         
204 |     ## generate the demo video for video quality localization
205 |     gt_labels = [r["gt_label"] for r in results]
206 |     pr_labels = [np.mean(r["pr_labels"][:]) for r in results]
207 |     pr_labels = rescale(pr_labels, gt_labels)
208 |     
209 |     s = spearmanr(gt_labels, pr_labels)[0]
210 |     p = pearsonr(gt_labels, pr_labels)[0]
211 |     k = kendallr(gt_labels, pr_labels)[0]
212 |     r = np.sqrt(((gt_labels - pr_labels) ** 2).mean())
213 | 
214 |     writer.add_scalar('val_{}_srcc'.format(suffix), s, epoch)
215 |     writer.add_scalar('val_{}_plcc'.format(suffix), p, epoch)
216 |     writer.add_scalar('val_{}_krcc'.format(suffix), k, epoch)
217 |     writer.add_scalar('val_{}_rmse'.format(suffix), r, epoch)
218 |     
219 |     torch.cuda.empty_cache()
220 | 
221 |     if s + p > best_s + best_p and save_model:
222 |         state_dict = model.state_dict()
223 |         torch.save(
224 |             {"state_dict": state_dict, 
225 |              "validation_results": best_},
226 |             f"pretrained_weights/{save_name}_{suffix}_dev_v0.0.pth")
227 | 
228 |     best_s, best_p, best_k, best_r = (
229 |         max(best_s, s),
230 |         max(best_p, p),
231 |         max(best_k, k),
232 |         min(best_r, r),
233 |     )
234 | 
235 | 
236 |     writer.add_scalar('val_{}_best_srcc'.format(suffix), best_s, epoch)
237 |     writer.add_scalar('val_{}_best_plcc'.format(suffix), best_p, epoch)
238 |     writer.add_scalar('val_{}_best_krcc'.format(suffix), best_k, epoch)
239 |     writer.add_scalar('val_{}_best_rmse'.format(suffix), best_r, epoch)
240 | 
241 |     toc = timeit.default_timer()
242 |     minutes = int((toc - tic) / 60)
243 |     seconds = int((toc - tic) % 60)
244 | 
245 |     print(
246 |         f"For {len(gt_labels)} videos, \nthe accuracy of the model: [{suffix}] is as follows:\n  SROCC: {s:.4f} best: {best_s:.4f} \n  PLCC:  {p:.4f} best: {best_p:.4f}  \n  KROCC: {k:.4f} best: {best_k:.4f} \n  RMSE:  {r:.4f} best: {best_r:.4f}."
247 |     )
248 |     print('time elapsed {:02d}m {:02d}s.'.format(minutes, seconds))
249 | 
250 |     return best_s, best_p, best_k, best_r
251 | 
252 |     # torch.save(results, f'{args.save_dir}/results_{dataset.lower()}_s{32}*{32}_ens{args.famount}.pkl')
253 | 
254 | 
255 | def main():
256 | 
257 |     parser = argparse.ArgumentParser()
258 |     parser.add_argument(
259 |         "-o", "--opt", type=str, default="./options/fast-SAMA-finetune.yml", help="the option file"
260 |     )
261 | 
262 |     args = parser.parse_args()
263 |     with open(args.opt, "r") as f:
264 |         opt = yaml.safe_load(f)
265 |     print(opt)
266 |     
267 | 
268 |     ## adaptively choose the device
269 | 
270 |     # os.environ['CUDA_VISIBLE_DEVICES']='6'
271 |     device = "cuda" if torch.cuda.is_available() else "cpu"
272 | 
273 |     if sys.gettrace():
274 |         print('in DEBUGE mode.')
275 |         opt["name"] = "DEBUG"
276 |         opt['train_num_workers']=0
277 |         opt['test_num_workers']=0
278 | 
279 | 
280 |     if opt.get("split_seed", -1) > 0:
281 |         num_splits = 10
282 |     else:
283 |         num_splits = 1
284 |         
285 |     stype = opt['stype'] if opt['stype'] in ['sama', 'sama-c', 'sama-mix', 'sama+spm', 'sama+swm'] else 'fragments'
286 |     
287 |     for split in range(num_splits):
288 |         print(f"""\n==================== SPLIT-{split:02d} ====================""")
289 |         
290 |         key = opt["data"]["database"]
291 |         ann_file = opt["data"]["anno_file"]
292 |         data_prefix = opt["data"]["data_prefix"]
293 |         video_infos = []
294 |         with open(ann_file, "r") as fin:
295 |             for line in fin:
296 |                 line_split = line.strip().split(",")
297 |                 fileid, _, _, label = line_split
298 |                 label = float(label)
299 |                 filename = osp.join(data_prefix, fileid)
300 |                 video_infos.append(dict(filename=filename, label=label, fileid=fileid))
301 |         video_infos = np.asarray(video_infos)
302 | 
303 |         index_current = np.arange(len(video_infos))
304 |         random.Random(split * 123).shuffle(index_current)   # shuffle with certain seed
305 |         pos_train_end = int(0.8 * len(video_infos))
306 |         trainindex = index_current[:pos_train_end]
307 |         evalindex = index_current[pos_train_end:]
308 | 
309 |         train_datasets, train_loaders, val_datasets, val_loaders = {}, {}, {}, {}
310 |                 
311 |         val_datasets[key] = getattr(datasets, opt["data"]["type"])(video_infos[evalindex], 
312 |                                                                    opt["data"]["test"], 
313 |                                                                    stype=stype,
314 |                                                                    is_train=False)
315 |         val_loaders[key] = torch.utils.data.DataLoader(val_datasets[key], 
316 |                                                        batch_size=opt["test_batch_size"], 
317 |                                                        num_workers=opt["test_num_workers"], 
318 |                                                        pin_memory=False,
319 |                                                        shuffle=False,
320 |                                                        drop_last=False)
321 |         
322 |         train_datasets[key] = getattr(datasets, opt["data"]["type"])(video_infos[trainindex], 
323 |                                                                      opt["data"]["train"], 
324 |                                                                      stype=stype, 
325 |                                                                      is_train=True)
326 |         train_loaders[key] = torch.utils.data.DataLoader(train_datasets[key], 
327 |                                                          batch_size=opt["train_batch_size"], 
328 |                                                          num_workers=opt["train_num_workers"], 
329 |                                                          shuffle=True)
330 |         print('dataset=[{}], with {} samples.'.format(key, len(train_datasets[key])))
331 |         print('dataset=[{}], with {} samples.'.format(key, len(val_datasets[key])))
332 | 
333 |         ## defining model and loading checkpoint
334 |         print('using device: {}'.format(device))
335 |         model = getattr(models, opt["model"]["type"])(**opt["model"]["args"]).to(device)
336 |         if "load_path" in opt:
337 |             state_dict = torch.load(opt["load_path"], map_location=device)["state_dict"]
338 |             print(model.load_state_dict(state_dict, strict=True))
339 |             
340 |         if opt["ema"]:
341 |             from copy import deepcopy
342 |             model_ema = deepcopy(model)
343 |         else:
344 |             model_ema = None
345 | 
346 |         #profile_inference(val_dataset, model, device)    
347 | 
348 |         # finetune the model
349 |         param_groups=[]
350 | 
351 |         for key, value in dict(model.named_children()).items():
352 |             if "backbone" in key:
353 |                 param_groups += [{"params": value.parameters(), "lr": opt["optimizer"]["lr"] * opt["optimizer"]["backbone_lr_mult"]}]
354 |             else:
355 |                 param_groups += [{"params": value.parameters(), "lr": opt["optimizer"]["lr"]}]
356 | 
357 |         optimizer = torch.optim.AdamW(lr=opt["optimizer"]["lr"], 
358 |                                       params=param_groups,
359 |                                       weight_decay=opt["optimizer"]["wd"])
360 |         warmup_iter = 0
361 |         for train_loader in train_loaders.values():
362 |             warmup_iter += int(opt["warmup_epochs"] * len(train_loader))
363 |         max_iter = int((opt["num_epochs"] + opt["l_num_epochs"]) * len(train_loader))
364 |         lr_lambda = (
365 |             lambda cur_iter: cur_iter / warmup_iter
366 |             if cur_iter <= warmup_iter
367 |             else 0.5 * (1 + math.cos(math.pi * (cur_iter - warmup_iter) / max_iter))
368 |         )
369 | 
370 |         scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)
371 | 
372 |         bests = {}
373 |         # bests_n = {}
374 |         for key in val_loaders:
375 |             bests[key] = -1,-1,-1,1000
376 |             # bests_n[key] = -1,-1,-1,1000
377 |         
378 |         os.makedirs('./tensorboard/', exist_ok=True)
379 |         os.makedirs('./pretrained_weights/', exist_ok=True)
380 |         writer = SummaryWriter('./tensorboard/{}'.format(opt['name']))
381 |         
382 |         for epoch in range(opt["num_epochs"]):
383 |             print(f"Finetune Epoch {epoch}:")
384 | 
385 |             for key, train_loader in train_loaders.items():
386 |                 finetune_epoch(
387 |                     train_loader, model, model_ema, optimizer, scheduler, device, epoch, writer, 
388 |                     opt.get("need_upsampled", False), opt.get("need_feat", False), opt.get("need_fused", False),
389 |                 )
390 | 
391 | 
392 |             print(f"evaluation ..")
393 | 
394 |             for key in val_loaders:
395 |                 bests[key] = inference_set(
396 |                     val_loaders[key],
397 |                     model_ema if model_ema is not None else model,
398 |                     device, bests[key], epoch, writer, 
399 |                     save_model=opt["save_model"], save_name=opt["name"],
400 |                     suffix=key+"_s",
401 |                 )
402 |         if opt["num_epochs"] > 0:
403 |             for key in val_loaders:
404 |                 print(
405 |                     f"""For the finetuning process on {key} with {len(val_datasets[key])} videos,
406 |                     the best validation accuracy of the model-s is as follows:
407 |                     SROCC: {bests[key][0]:.4f}
408 |                     PLCC:  {bests[key][1]:.4f}
409 |                     KROCC: {bests[key][2]:.4f}
410 |                     RMSE:  {bests[key][3]:.4f}."""
411 |                 )
412 | 
413 |             
414 | if __name__ == "__main__":
415 |     main()
416 | 


--------------------------------------------------------------------------------
/VQA/demo_train.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------
  2 | # SAMA, AAAI 2024
  3 | # Training code for VQA. 
  4 | # This code is modified from FAST-VQA [ECCV, 2022]
  5 | # -------------------------------------------------
  6 | import torch
  7 | import random
  8 | import os.path as osp
  9 | import fastvqa.models as models
 10 | import fastvqa.datasets as datasets
 11 | import os
 12 | import argparse
 13 | import sys
 14 | 
 15 | from scipy.stats import spearmanr, pearsonr
 16 | from scipy.stats.stats import kendalltau as kendallr
 17 | import numpy as np
 18 | 
 19 | import timeit
 20 | import math
 21 | 
 22 | import yaml
 23 | 
 24 | from functools import reduce
 25 | from thop import profile
 26 | import warnings
 27 | 
 28 | warnings.filterwarnings("ignore")
 29 | 
 30 | from torch.utils.tensorboard import SummaryWriter  
 31 | 
 32 | 
 33 | def train_test_split(dataset_path, ann_file, ratio=0.8, seed=42):
 34 |     random.seed(seed)
 35 |     video_infos = []
 36 |     with open(ann_file, "r") as fin:
 37 |         for line in fin.readlines():
 38 |             line_split = line.strip().split(",")
 39 |             filename, _, _, label = line_split
 40 |             label = float(label)
 41 |             filename = osp.join(dataset_path, filename)
 42 |             video_infos.append(dict(filename=filename, label=label))
 43 |     random.shuffle(video_infos)
 44 |     return (
 45 |         video_infos[: int(ratio * len(video_infos))],
 46 |         video_infos[int(ratio * len(video_infos)) :],
 47 |     )
 48 | 
 49 | 
 50 | def rank_loss(y_pred, y):
 51 |     ranking_loss = torch.nn.functional.relu(
 52 |         (y_pred - y_pred.t()) * torch.sign((y.t() - y))
 53 |     )
 54 |     scale = 1 + torch.max(ranking_loss)
 55 |     return (
 56 |         torch.sum(ranking_loss) / y_pred.shape[0] / (y_pred.shape[0] - 1) / scale
 57 |     ).float()
 58 | 
 59 | def plcc_loss(y_pred, y):
 60 |     sigma_hat, m_hat = torch.std_mean(y_pred, unbiased=False)
 61 |     y_pred = (y_pred - m_hat) / (sigma_hat + 1e-8)
 62 |     sigma, m = torch.std_mean(y, unbiased=False)
 63 |     y = (y - m) / (sigma + 1e-8)
 64 |     loss0 = torch.nn.functional.mse_loss(y_pred, y) / 4
 65 |     rho = torch.mean(y_pred * y)
 66 |     loss1 = torch.nn.functional.mse_loss(rho * y_pred, y) / 4
 67 |     return ((loss0 + loss1) / 2).float()
 68 | 
 69 | def rescaled_l2_loss(y_pred, y):
 70 |     y_pred_rs = (y_pred - y_pred.mean()) / y_pred.std()
 71 |     y_rs = (y - y.mean()) / (y.std() + eps)
 72 |     return torch.nn.functional.mse_loss(y_pred_rs, y_rs)
 73 | 
 74 | def rplcc_loss(y_pred, y, eps=1e-8):
 75 |     ## Literally (1 - PLCC) / 2
 76 |     cov = torch.cov(y_pred, y)
 77 |     std = (torch.std(y_pred) + eps) * (torch.std(y) + eps)
 78 |     return (1 - cov / std) / 2
 79 | 
 80 | def self_similarity_loss(f, f_hat, f_hat_detach=False):
 81 |     if f_hat_detach:
 82 |         f_hat = f_hat.detach()
 83 |     return 1 - torch.nn.functional.cosine_similarity(f, f_hat, dim=1).mean()
 84 | 
 85 | def contrastive_similarity_loss(f, f_hat, f_hat_detach=False, eps=1e-8):
 86 |     if f_hat_detach:
 87 |         f_hat = f_hat.detach()
 88 |     intra_similarity = torch.nn.functional.cosine_similarity(f, f_hat, dim=1).mean()
 89 |     cross_similarity = torch.nn.functional.cosine_similarity(f, f_hat, dim=0).mean()
 90 |     return (1 - intra_similarity) / (1 - cross_similarity + eps)
 91 | 
 92 | def rescale(pr, gt=None):
 93 |     if gt is None:
 94 |         pr = (pr - np.mean(pr)) / np.std(pr)
 95 |     else:
 96 |         pr = ((pr - np.mean(pr)) / np.std(pr)) * np.std(gt) + np.mean(gt)
 97 |     return pr
 98 | 
 99 | sample_types=["resize", "fragments", "crop", "arp_resize", "arp_fragments"]
100 | 
101 | 
102 | 
103 | 
104 | def finetune_epoch(ft_loader, model, model_ema, optimizer, scheduler, device, epoch=-1, writer=None, 
105 |                    need_upsampled=True, need_feat=True, need_fused=False, need_separate_sup=False):
106 |     model.train()
107 |     tic = timeit.default_timer()
108 |     train_labels, pred_labels = [], []
109 |     epoch_loss = 0
110 | 
111 |     for i, data in enumerate(ft_loader):
112 |         optimizer.zero_grad()
113 |         video = {}
114 |         for key in sample_types:
115 |             if key in data:
116 |                 video[key] = data[key].to(device)
117 |         
118 |         y = data["gt_label"].float().detach().to(device).unsqueeze(-1)
119 |         scores = model(video, inference=False, reduce_scores=False) 
120 |         if len(scores) > 1:
121 |             y_pred = reduce(lambda x,y:x+y, scores)
122 |         else:
123 |             y_pred = scores[0]
124 |         y_pred = y_pred.mean((-2, -1)).sum(-1)
125 | 
126 |         frame_inds = data["frame_inds"]
127 |         
128 |         # Plain Supervised Loss
129 |         p_loss, r_loss = plcc_loss(y_pred, y), rank_loss(y_pred, y)
130 |         
131 |         loss = p_loss + 0.3 * r_loss
132 |         epoch_loss += loss.item()
133 | 
134 |         loss.backward()
135 |         optimizer.step()
136 |         scheduler.step()
137 |         
138 |         pred_labels.extend(list(y_pred.view(-1).detach().cpu().numpy()))
139 |         train_labels.extend(list(y.view(-1).detach().cpu().numpy()))
140 | 
141 |         #ft_loader.dataset.refresh_hypers()
142 | 
143 |         
144 |         if model_ema is not None:
145 |             model_params = dict(model.named_parameters())
146 |             model_ema_params = dict(model_ema.named_parameters())
147 |             for k in model_params.keys():
148 |                 model_ema_params[k].data.mul_(0.999).add_(
149 |                     model_params[k].data, alpha=1 - 0.999)
150 |                 
151 |     
152 |     train_srcc = spearmanr(train_labels, pred_labels)[0]
153 | 
154 |     writer.add_scalar('train_srcc', train_srcc, epoch)
155 |     writer.add_scalar('train_total_loss', epoch_loss, epoch)
156 | 
157 |     toc = timeit.default_timer()
158 | 
159 |     minutes = int((toc - tic) / 60)
160 |     seconds = int((toc - tic) % 60)
161 |     print('Epoch-{:02d}, training SRCC={:.4f}, time elapsed {:02d}m {:02d}s.'.format(epoch, train_srcc, minutes, seconds))
162 |     print('backbone_lr = {:.2e}, head_lr = {:.2e}'.format(optimizer.state_dict()['param_groups'][0]['lr'],
163 |                                                           optimizer.state_dict()['param_groups'][-1]['lr']))
164 |     
165 |     model.eval()
166 | 
167 |     
168 | def profile_inference(inf_set, model, device):
169 |     video = {}
170 |     data = inf_set[0]
171 |     for key in sample_types:
172 |         if key in data:
173 |             video[key] = data[key].to(device).unsqueeze(0)
174 |     with torch.no_grad():
175 |         flops, params = profile(model, (video, ))
176 |     print(f"The FLOps of the Variant is {flops/1e9:.1f}G, with Params {params/1e6:.2f}M.")
177 | 
178 | 
179 | def inference_set(inf_loader, model, device, best_, epoch, writer=None, save_model=False, suffix='s', save_name="divide"):
180 | 
181 |     results = []
182 | 
183 |     tic = timeit.default_timer()
184 |     gt_labels, pr_labels = [], []
185 | 
186 |     best_s, best_p, best_k, best_r = best_
187 |  
188 |     for i, data in enumerate(inf_loader):
189 |         result = dict()
190 |         video, video_up = {}, {}
191 |         for key in sample_types:
192 |             if key in data:
193 |                 video[key] = data[key].to(device)
194 |                 ## Reshape into clips
195 |                 b, c, t, h, w = video[key].shape
196 |                 video[key] = video[key].reshape(b, c, data["num_clips"][key], t // data["num_clips"][key], h, w).permute(0,2,1,3,4,5).reshape(b * data["num_clips"][key], c, t // data["num_clips"][key], h, w) 
197 | 
198 |         with torch.no_grad():
199 |             result["pr_labels"] = model(video).cpu().numpy()
200 |                 
201 |         result["gt_label"] = data["gt_label"].item()
202 | 
203 |         results.append(result)
204 |         
205 |     ## generate the demo video for video quality localization
206 |     gt_labels = [r["gt_label"] for r in results]
207 |     pr_labels = [np.mean(r["pr_labels"][:]) for r in results]
208 |     pr_labels = rescale(pr_labels, gt_labels)
209 |     
210 |     s = spearmanr(gt_labels, pr_labels)[0]
211 |     p = pearsonr(gt_labels, pr_labels)[0]
212 |     k = kendallr(gt_labels, pr_labels)[0]
213 |     r = np.sqrt(((gt_labels - pr_labels) ** 2).mean())
214 | 
215 |     writer.add_scalar('val_{}_srcc'.format(suffix), s, epoch)
216 |     writer.add_scalar('val_{}_plcc'.format(suffix), p, epoch)
217 |     writer.add_scalar('val_{}_krcc'.format(suffix), k, epoch)
218 |     writer.add_scalar('val_{}_rmse'.format(suffix), r, epoch)
219 |     
220 |     torch.cuda.empty_cache()
221 | 
222 |     if s + p > best_s + best_p and save_model:
223 |         state_dict = model.state_dict()
224 |         torch.save(
225 |             {"state_dict": state_dict, 
226 |              "validation_results": best_},
227 |             f"pretrained_weights/{save_name}_{suffix}_dev_v0.0.pth")
228 | 
229 |     best_s, best_p, best_k, best_r = (
230 |         max(best_s, s),
231 |         max(best_p, p),
232 |         max(best_k, k),
233 |         min(best_r, r),
234 |     )
235 | 
236 | 
237 |     writer.add_scalar('val_{}_best_srcc'.format(suffix), best_s, epoch)
238 |     writer.add_scalar('val_{}_best_plcc'.format(suffix), best_p, epoch)
239 |     writer.add_scalar('val_{}_best_krcc'.format(suffix), best_k, epoch)
240 |     writer.add_scalar('val_{}_best_rmse'.format(suffix), best_r, epoch)
241 | 
242 |     toc = timeit.default_timer()
243 |     minutes = int((toc - tic) / 60)
244 |     seconds = int((toc - tic) % 60)
245 | 
246 |     print(
247 |         f"For {len(gt_labels)} videos, \nthe accuracy of the model: [{suffix}] is as follows:\n  SROCC: {s:.4f} best: {best_s:.4f} \n  PLCC:  {p:.4f} best: {best_p:.4f}  \n  KROCC: {k:.4f} best: {best_k:.4f} \n  RMSE:  {r:.4f} best: {best_r:.4f}."
248 |     )
249 |     print('time elapsed {:02d}m {:02d}s.'.format(minutes, seconds))
250 | 
251 |     return best_s, best_p, best_k, best_r
252 | 
253 |     # torch.save(results, f'{args.save_dir}/results_{dataset.lower()}_s{32}*{32}_ens{args.famount}.pkl')
254 | 
255 | 
256 | def main():
257 | 
258 |     parser = argparse.ArgumentParser()
259 |     parser.add_argument("-o", "--opt", type=str, 
260 |                         default="./options/fast-SAMA-train.yml", help="the option file")
261 | 
262 |     args = parser.parse_args()
263 |     with open(args.opt, "r") as f:
264 |         opt = yaml.safe_load(f)
265 |     print(opt)
266 |     
267 |     ## adaptively choose the device
268 |     device = "cuda" if torch.cuda.is_available() else "cpu"
269 | 
270 |     if sys.gettrace():
271 |         print('in DEBUGE mode.')
272 |         opt["name"] = "DEBUG"
273 |         opt['train_num_workers']=0
274 |         opt['test_num_workers']=0
275 | 
276 |     ## defining model and loading checkpoint
277 | 
278 |     bests_ = []
279 |     print('using device: {}'.format(device))
280 |     model = getattr(models, opt["model"]["type"])(**opt["model"]["args"]).to(device)
281 |     
282 |     if opt.get("split_seed", -1) > 0:
283 |         num_splits = 10
284 |     else:
285 |         num_splits = 1
286 |         
287 |     stype = opt['stype'] if opt['stype'] in ['sama', 'sama-c', 'sama-mix', 'sama+spm', 'sama+swm'] else 'fragments'
288 |     for split in range(num_splits):
289 |         
290 |         val_datasets = {}
291 |         for key in opt["data"]:
292 |             if key.startswith("val"):
293 |                 val_datasets[key] = getattr(datasets, opt["data"][key]["type"])(opt["data"][key]["args"], stype=stype)
294 |                 print('dataset=[{}], with {} samples.'.format(key, len(val_datasets[key])))
295 | 
296 | 
297 |         val_loaders = {}
298 |         for key, val_dataset in val_datasets.items():
299 |             val_loaders[key] = torch.utils.data.DataLoader(val_dataset, 
300 |                                                            batch_size=opt["test_batch_size"], 
301 |                                                            num_workers=opt["test_num_workers"], 
302 |                                                            pin_memory=False,
303 |                                                            shuffle=False,
304 |                                                            drop_last=False)
305 | 
306 |         train_datasets = {}
307 |         for key in opt["data"]:
308 |             if key.startswith("train"):
309 |                 train_dataset = getattr(datasets, opt["data"][key]["type"])(opt["data"][key]["args"], stype=stype)
310 |                 train_datasets[key] = train_dataset
311 |                 print('dataset=[{}], with {} samples.'.format(key, len(train_datasets[key])))
312 |         
313 |         train_loaders = {}
314 |         for key, train_dataset in train_datasets.items():
315 |             train_loaders[key] = torch.utils.data.DataLoader(train_dataset, 
316 |                                                              batch_size=opt["train_batch_size"], 
317 |                                                              num_workers=opt["train_num_workers"], 
318 |                                                              shuffle=True)
319 |         
320 |         
321 |         if "load_path_aux" in opt:
322 |             state_dict = torch.load(opt["load_path"], map_location=device)["state_dict"]
323 |             aux_state_dict = torch.load(opt["load_path_aux"], map_location=device)["state_dict"]
324 | 
325 |             from collections import OrderedDict
326 | 
327 |             fusion_state_dict = OrderedDict()
328 |             for k, v in state_dict.items():
329 |                 if "head" in k:
330 |                     continue
331 |                 if k.startswith("vqa_head"):
332 |                     ki = k.replace("vqa", "fragments")
333 |                 else:
334 |                     ki = k
335 |                 fusion_state_dict[ki] = v
336 | 
337 |             for k, v in aux_state_dict.items():
338 |                 if "head" in k:
339 |                     continue
340 |                 if k.startswith("frag"):
341 |                     continue
342 |                 if k.startswith("vqa_head"):
343 |                     ki = k.replace("vqa", "resize")
344 |                 else:
345 |                     ki = k
346 |                 fusion_state_dict[ki] = v
347 |             state_dict = fusion_state_dict
348 |             print(model.load_state_dict(state_dict))
349 |         
350 |         elif "load_path" in opt:
351 |             state_dict = torch.load(opt["load_path"], map_location=device)
352 | 
353 |             if "state_dict" in state_dict:
354 |                 ### migrate training weights from mmaction
355 |                 state_dict = state_dict["state_dict"]
356 |                 from collections import OrderedDict
357 | 
358 |                 i_state_dict = OrderedDict()
359 |                 for key in state_dict.keys():
360 |                     if "head" in key:
361 |                         continue
362 |                     if "cls" in key:
363 |                         tkey = key.replace("cls", "vqa")
364 |                     elif "backbone" in key:
365 |                         i_state_dict[key] = state_dict[key]
366 |                         i_state_dict["fragments_"+key] = state_dict[key]
367 |                         i_state_dict["resize_"+key] = state_dict[key]
368 |                     else:
369 |                         i_state_dict[key] = state_dict[key]
370 | 
371 |             t_state_dict = model.state_dict()
372 |             for key, value in t_state_dict.items():
373 |                 if key in i_state_dict and i_state_dict[key].shape != value.shape:
374 |                     i_state_dict.pop(key)
375 |             
376 |             print(model.load_state_dict(i_state_dict, strict=False))
377 |             
378 |         if opt["ema"]:
379 |             from copy import deepcopy
380 |             model_ema = deepcopy(model)
381 |         else:
382 |             model_ema = None
383 | 
384 |         #profile_inference(val_dataset, model, device)    
385 | 
386 |         # finetune the model
387 |         param_groups=[]
388 | 
389 |         for key, value in dict(model.named_children()).items():
390 |             if "backbone" in key:
391 |                 param_groups += [{"params": value.parameters(), "lr": opt["optimizer"]["lr"] * opt["optimizer"]["backbone_lr_mult"]}]
392 |             else:
393 |                 param_groups += [{"params": value.parameters(), "lr": opt["optimizer"]["lr"]}]
394 | 
395 |         optimizer = torch.optim.AdamW(lr=opt["optimizer"]["lr"], 
396 |                                       params=param_groups,
397 |                                       weight_decay=opt["optimizer"]["wd"])
398 |         warmup_iter = 0
399 |         for train_loader in train_loaders.values():
400 |             warmup_iter += int(opt["warmup_epochs"] * len(train_loader))
401 |         max_iter = int((opt["num_epochs"] + opt["l_num_epochs"]) * len(train_loader))
402 |         lr_lambda = (
403 |             lambda cur_iter: cur_iter / warmup_iter
404 |             if cur_iter <= warmup_iter
405 |             else 0.5 * (1 + math.cos(math.pi * (cur_iter - warmup_iter) / max_iter)))
406 | 
407 |         scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_lambda)
408 | 
409 |         bests = {}
410 |         for key in val_loaders:
411 |             bests[key] = -1,-1,-1,1000
412 |         
413 |         os.makedirs('./tensorboard/', exist_ok=True)
414 |         os.makedirs('./pretrained_weights/', exist_ok=True)
415 |         writer = SummaryWriter('./tensorboard/{}'.format(opt['name']))
416 | 
417 |         for epoch in range(opt["num_epochs"]):
418 |             print(f"Finetune Epoch {epoch}:")
419 | 
420 |             for key, train_loader in train_loaders.items():
421 |                 finetune_epoch(
422 |                     train_loader, model, model_ema, optimizer, scheduler, device, epoch, writer, 
423 |                     opt.get("need_upsampled", False), opt.get("need_feat", False), opt.get("need_fused", False),
424 |                 )
425 | 
426 |             state_dict = model.state_dict()
427 |             torch.save({"state_dict": state_dict}, 'pretrained_weights/{}_Epoch_{:02d}.pth'.format(opt["name"], epoch))
428 | 
429 |             print(f"evaluation ..")
430 | 
431 |             for key in val_loaders:
432 |                 bests[key] = inference_set(
433 |                     val_loaders[key],
434 |                     model_ema if model_ema is not None else model,
435 |                     device, bests[key], epoch, writer, 
436 |                     save_model=opt["save_model"], save_name=opt["name"],
437 |                     suffix=key+"_s",
438 |                 )
439 |                 
440 |                     
441 |         if opt["num_epochs"] > 0:
442 |             for key in val_loaders:
443 |                 print(
444 |                     f"""For the finetuning process on {key} with {len(val_datasets[key])} videos,
445 |                     the best validation accuracy of the model-s is as follows:
446 |                     SROCC: {bests[key][0]:.4f}
447 |                     PLCC:  {bests[key][1]:.4f}
448 |                     KROCC: {bests[key][2]:.4f}
449 |                     RMSE:  {bests[key][3]:.4f}."""
450 |                 )
451 | 
452 | 
453 | 
454 | if __name__ == "__main__":
455 |     main()
456 | 


--------------------------------------------------------------------------------
/IQA/fastvqa/models/swin_v1.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Swin Transformer
  3 | # Copyright (c) 2021 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # --------------------------------------------------------
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.utils.checkpoint as checkpoint
 10 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_
 11 | 
 12 | try:
 13 |     import os, sys
 14 | 
 15 |     kernel_path = os.path.abspath(os.path.join('..'))
 16 |     sys.path.append(kernel_path)
 17 |     from kernels.window_process.window_process import WindowProcess, WindowProcessReverse
 18 | 
 19 | except:
 20 |     WindowProcess = None
 21 |     WindowProcessReverse = None
 22 |     print("[Warning] Fused window process have not been installed. Please refer to get_started.md for installation.")
 23 | 
 24 | 
 25 | class Mlp(nn.Module):
 26 |     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
 27 |         super().__init__()
 28 |         out_features = out_features or in_features
 29 |         hidden_features = hidden_features or in_features
 30 |         self.fc1 = nn.Linear(in_features, hidden_features)
 31 |         self.act = act_layer()
 32 |         self.fc2 = nn.Linear(hidden_features, out_features)
 33 |         self.drop = nn.Dropout(drop)
 34 | 
 35 |     def forward(self, x):
 36 |         x = self.fc1(x)
 37 |         x = self.act(x)
 38 |         x = self.drop(x)
 39 |         x = self.fc2(x)
 40 |         x = self.drop(x)
 41 |         return x
 42 | 
 43 | 
 44 | def window_partition(x, window_size):
 45 |     """
 46 |     Args:
 47 |         x: (B, H, W, C)
 48 |         window_size (int): window size
 49 |     Returns:
 50 |         windows: (num_windows*B, window_size, window_size, C)
 51 |     """
 52 |     B, H, W, C = x.shape
 53 |     x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
 54 |     windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
 55 |     return windows
 56 | 
 57 | 
 58 | def window_reverse(windows, window_size, H, W):
 59 |     """
 60 |     Args:
 61 |         windows: (num_windows*B, window_size, window_size, C)
 62 |         window_size (int): Window size
 63 |         H (int): Height of image
 64 |         W (int): Width of image
 65 |     Returns:
 66 |         x: (B, H, W, C)
 67 |     """
 68 |     B = int(windows.shape[0] / (H * W / window_size / window_size))
 69 |     x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
 70 |     x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
 71 |     return x
 72 | 
 73 | 
 74 | class WindowAttention(nn.Module):
 75 |     r""" Window based multi-head self attention (W-MSA) module with relative position bias.
 76 |     It supports both of shifted and non-shifted window.
 77 |     Args:
 78 |         dim (int): Number of input channels.
 79 |         window_size (tuple[int]): The height and width of the window.
 80 |         num_heads (int): Number of attention heads.
 81 |         qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
 82 |         qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set
 83 |         attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
 84 |         proj_drop (float, optional): Dropout ratio of output. Default: 0.0
 85 |     """
 86 | 
 87 |     def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scale=None, attn_drop=0., proj_drop=0.):
 88 | 
 89 |         super().__init__()
 90 |         self.dim = dim
 91 |         self.window_size = window_size  # Wh, Ww
 92 |         self.num_heads = num_heads
 93 |         head_dim = dim // num_heads
 94 |         self.scale = qk_scale or head_dim ** -0.5
 95 | 
 96 |         # define a parameter table of relative position bias
 97 |         self.relative_position_bias_table = nn.Parameter(
 98 |             torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), num_heads))  # 2*Wh-1 * 2*Ww-1, nH
 99 | 
100 |         # get pair-wise relative position index for each token inside the window
101 |         coords_h = torch.arange(self.window_size[0])
102 |         coords_w = torch.arange(self.window_size[1])
103 |         coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
104 |         coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
105 |         relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
106 |         relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
107 |         relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
108 |         relative_coords[:, :, 1] += self.window_size[1] - 1
109 |         relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
110 |         relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
111 |         self.register_buffer("relative_position_index", relative_position_index)
112 | 
113 |         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
114 |         self.attn_drop = nn.Dropout(attn_drop)
115 |         self.proj = nn.Linear(dim, dim)
116 |         self.proj_drop = nn.Dropout(proj_drop)
117 | 
118 |         trunc_normal_(self.relative_position_bias_table, std=.02)
119 |         self.softmax = nn.Softmax(dim=-1)
120 | 
121 |     def forward(self, x, mask=None):
122 |         """
123 |         Args:
124 |             x: input features with shape of (num_windows*B, N, C)
125 |             mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
126 |         """
127 |         B_, N, C = x.shape
128 |         qkv = self.qkv(x).reshape(B_, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
129 |         q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
130 | 
131 |         q = q * self.scale
132 |         attn = (q @ k.transpose(-2, -1))
133 | 
134 |         relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
135 |             self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
136 |         relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
137 |         attn = attn + relative_position_bias.unsqueeze(0)
138 | 
139 |         if mask is not None:
140 |             nW = mask.shape[0]
141 |             attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
142 |             attn = attn.view(-1, self.num_heads, N, N)
143 |             attn = self.softmax(attn)
144 |         else:
145 |             attn = self.softmax(attn)
146 | 
147 |         attn = self.attn_drop(attn)
148 | 
149 |         x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
150 |         x = self.proj(x)
151 |         x = self.proj_drop(x)
152 |         return x
153 | 
154 |     def extra_repr(self) -> str:
155 |         return f'dim={self.dim}, window_size={self.window_size}, num_heads={self.num_heads}'
156 | 
157 |     def flops(self, N):
158 |         # calculate flops for 1 window with token length of N
159 |         flops = 0
160 |         # qkv = self.qkv(x)
161 |         flops += N * self.dim * 3 * self.dim
162 |         # attn = (q @ k.transpose(-2, -1))
163 |         flops += self.num_heads * N * (self.dim // self.num_heads) * N
164 |         #  x = (attn @ v)
165 |         flops += self.num_heads * N * N * (self.dim // self.num_heads)
166 |         # x = self.proj(x)
167 |         flops += N * self.dim * self.dim
168 |         return flops
169 | 
170 | 
171 | class SwinTransformerBlock(nn.Module):
172 |     r""" Swin Transformer Block.
173 |     Args:
174 |         dim (int): Number of input channels.
175 |         input_resolution (tuple[int]): Input resulotion.
176 |         num_heads (int): Number of attention heads.
177 |         window_size (int): Window size.
178 |         shift_size (int): Shift size for SW-MSA.
179 |         mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
180 |         qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
181 |         qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
182 |         drop (float, optional): Dropout rate. Default: 0.0
183 |         attn_drop (float, optional): Attention dropout rate. Default: 0.0
184 |         drop_path (float, optional): Stochastic depth rate. Default: 0.0
185 |         act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
186 |         norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
187 |         fused_window_process (bool, optional): If True, use one kernel to fused window shift & window partition for acceleration, similar for the reversed part. Default: False
188 |     """
189 | 
190 |     def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
191 |                  mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0., drop_path=0.,
192 |                  act_layer=nn.GELU, norm_layer=nn.LayerNorm,
193 |                  fused_window_process=False):
194 |         super().__init__()
195 |         self.dim = dim
196 |         self.input_resolution = input_resolution
197 |         self.num_heads = num_heads
198 |         self.window_size = window_size
199 |         self.shift_size = shift_size
200 |         self.mlp_ratio = mlp_ratio
201 |         if min(self.input_resolution) <= self.window_size:
202 |             # if window size is larger than input resolution, we don't partition windows
203 |             self.shift_size = 0
204 |             self.window_size = min(self.input_resolution)
205 |         assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
206 | 
207 |         self.norm1 = norm_layer(dim)
208 |         self.attn = WindowAttention(
209 |             dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
210 |             qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
211 | 
212 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
213 |         self.norm2 = norm_layer(dim)
214 |         mlp_hidden_dim = int(dim * mlp_ratio)
215 |         self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
216 | 
217 |         if self.shift_size > 0:
218 |             # calculate attention mask for SW-MSA
219 |             H, W = self.input_resolution
220 |             img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
221 |             h_slices = (slice(0, -self.window_size),
222 |                         slice(-self.window_size, -self.shift_size),
223 |                         slice(-self.shift_size, None))
224 |             w_slices = (slice(0, -self.window_size),
225 |                         slice(-self.window_size, -self.shift_size),
226 |                         slice(-self.shift_size, None))
227 |             cnt = 0
228 |             for h in h_slices:
229 |                 for w in w_slices:
230 |                     img_mask[:, h, w, :] = cnt
231 |                     cnt += 1
232 | 
233 |             mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
234 |             mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
235 |             attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
236 |             attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
237 |         else:
238 |             attn_mask = None
239 | 
240 |         self.register_buffer("attn_mask", attn_mask)
241 |         self.fused_window_process = fused_window_process
242 | 
243 |     def forward(self, x):
244 |         H, W = self.input_resolution
245 |         B, L, C = x.shape
246 |         assert L == H * W, "input feature has wrong size"
247 | 
248 |         shortcut = x
249 |         x = self.norm1(x)
250 |         x = x.view(B, H, W, C)
251 | 
252 |         # cyclic shift
253 |         if self.shift_size > 0:
254 |             if not self.fused_window_process:
255 |                 shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
256 |                 # partition windows
257 |                 x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
258 |             else:
259 |                 x_windows = WindowProcess.apply(x, B, H, W, C, -self.shift_size, self.window_size)
260 |         else:
261 |             shifted_x = x
262 |             # partition windows
263 |             x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
264 | 
265 |         x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
266 | 
267 |         # W-MSA/SW-MSA
268 |         attn_windows = self.attn(x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C
269 | 
270 |         # merge windows
271 |         attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
272 | 
273 |         # reverse cyclic shift
274 |         if self.shift_size > 0:
275 |             if not self.fused_window_process:
276 |                 shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
277 |                 x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
278 |             else:
279 |                 x = WindowProcessReverse.apply(attn_windows, B, H, W, C, self.shift_size, self.window_size)
280 |         else:
281 |             shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
282 |             x = shifted_x
283 |         x = x.view(B, H * W, C)
284 |         x = shortcut + self.drop_path(x)
285 | 
286 |         # FFN
287 |         x = x + self.drop_path(self.mlp(self.norm2(x)))
288 | 
289 |         return x
290 | 
291 |     def extra_repr(self) -> str:
292 |         return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
293 |                f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
294 | 
295 |     def flops(self):
296 |         flops = 0
297 |         H, W = self.input_resolution
298 |         # norm1
299 |         flops += self.dim * H * W
300 |         # W-MSA/SW-MSA
301 |         nW = H * W / self.window_size / self.window_size
302 |         flops += nW * self.attn.flops(self.window_size * self.window_size)
303 |         # mlp
304 |         flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
305 |         # norm2
306 |         flops += self.dim * H * W
307 |         return flops
308 | 
309 | 
310 | class PatchMerging(nn.Module):
311 |     r""" Patch Merging Layer.
312 |     Args:
313 |         input_resolution (tuple[int]): Resolution of input feature.
314 |         dim (int): Number of input channels.
315 |         norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
316 |     """
317 | 
318 |     def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
319 |         super().__init__()
320 |         self.input_resolution = input_resolution
321 |         self.dim = dim
322 |         self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
323 |         self.norm = norm_layer(4 * dim)
324 | 
325 |     def forward(self, x):
326 |         """
327 |         x: B, H*W, C
328 |         """
329 |         H, W = self.input_resolution
330 |         B, L, C = x.shape
331 |         assert L == H * W, "input feature has wrong size"
332 |         assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
333 | 
334 |         x = x.view(B, H, W, C)
335 | 
336 |         x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
337 |         x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
338 |         x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
339 |         x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
340 |         x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
341 |         x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
342 | 
343 |         x = self.norm(x)
344 |         x = self.reduction(x)
345 | 
346 |         return x
347 | 
348 |     def extra_repr(self) -> str:
349 |         return f"input_resolution={self.input_resolution}, dim={self.dim}"
350 | 
351 |     def flops(self):
352 |         H, W = self.input_resolution
353 |         flops = H * W * self.dim
354 |         flops += (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
355 |         return flops
356 | 
357 | 
358 | class BasicLayer(nn.Module):
359 |     """ A basic Swin Transformer layer for one stage.
360 |     Args:
361 |         dim (int): Number of input channels.
362 |         input_resolution (tuple[int]): Input resolution.
363 |         depth (int): Number of blocks.
364 |         num_heads (int): Number of attention heads.
365 |         window_size (int): Local window size.
366 |         mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
367 |         qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
368 |         qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
369 |         drop (float, optional): Dropout rate. Default: 0.0
370 |         attn_drop (float, optional): Attention dropout rate. Default: 0.0
371 |         drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
372 |         norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
373 |         downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
374 |         use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
375 |         fused_window_process (bool, optional): If True, use one kernel to fused window shift & window partition for acceleration, similar for the reversed part. Default: False
376 |     """
377 | 
378 |     def __init__(self, dim, input_resolution, depth, num_heads, window_size,
379 |                  mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0., attn_drop=0.,
380 |                  drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
381 |                  fused_window_process=False):
382 | 
383 |         super().__init__()
384 |         self.dim = dim
385 |         self.input_resolution = input_resolution
386 |         self.depth = depth
387 |         self.use_checkpoint = use_checkpoint
388 | 
389 |         # build blocks
390 |         self.blocks = nn.ModuleList([
391 |             SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
392 |                                  num_heads=num_heads, window_size=window_size,
393 |                                  shift_size=0 if (i % 2 == 0) else window_size // 2,
394 |                                  mlp_ratio=mlp_ratio,
395 |                                  qkv_bias=qkv_bias, qk_scale=qk_scale,
396 |                                  drop=drop, attn_drop=attn_drop,
397 |                                  drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
398 |                                  norm_layer=norm_layer,
399 |                                  fused_window_process=fused_window_process)
400 |             for i in range(depth)])
401 | 
402 |         # patch merging layer
403 |         if downsample is not None:
404 |             self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
405 |         else:
406 |             self.downsample = None
407 | 
408 |     def forward(self, x):
409 |         for blk in self.blocks:
410 |             if self.use_checkpoint:
411 |                 x = checkpoint.checkpoint(blk, x)
412 |             else:
413 |                 x = blk(x)
414 |         if self.downsample is not None:
415 |             x = self.downsample(x)
416 |         return x
417 | 
418 |     def extra_repr(self) -> str:
419 |         return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
420 | 
421 |     def flops(self):
422 |         flops = 0
423 |         for blk in self.blocks:
424 |             flops += blk.flops()
425 |         if self.downsample is not None:
426 |             flops += self.downsample.flops()
427 |         return flops
428 | 
429 | 
430 | class PatchEmbed(nn.Module):
431 |     r""" Image to Patch Embedding
432 |     Args:
433 |         img_size (int): Image size.  Default: 224.
434 |         patch_size (int): Patch token size. Default: 4.
435 |         in_chans (int): Number of input image channels. Default: 3.
436 |         embed_dim (int): Number of linear projection output channels. Default: 96.
437 |         norm_layer (nn.Module, optional): Normalization layer. Default: None
438 |     """
439 | 
440 |     def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
441 |         super().__init__()
442 |         img_size = to_2tuple(img_size)
443 |         patch_size = to_2tuple(patch_size)
444 |         patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
445 |         self.img_size = img_size
446 |         self.patch_size = patch_size
447 |         self.patches_resolution = patches_resolution
448 |         self.num_patches = patches_resolution[0] * patches_resolution[1]
449 | 
450 |         self.in_chans = in_chans
451 |         self.embed_dim = embed_dim
452 | 
453 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
454 |         if norm_layer is not None:
455 |             self.norm = norm_layer(embed_dim)
456 |         else:
457 |             self.norm = None
458 | 
459 |     def forward(self, x):
460 |         B, C, H, W = x.shape
461 |         # FIXME look at relaxing size constraints
462 |         assert H == self.img_size[0] and W == self.img_size[1], \
463 |             f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
464 |         x = self.proj(x).flatten(2).transpose(1, 2)  # B Ph*Pw C
465 |         if self.norm is not None:
466 |             x = self.norm(x)
467 |         return x
468 | 
469 |     def flops(self):
470 |         Ho, Wo = self.patches_resolution
471 |         flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
472 |         if self.norm is not None:
473 |             flops += Ho * Wo * self.embed_dim
474 |         return flops
475 | 
476 | 
477 | class SwinTransformer(nn.Module):
478 |     r""" Swin Transformer
479 |         A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
480 |           https://arxiv.org/pdf/2103.14030
481 |     Args:
482 |         img_size (int | tuple(int)): Input image size. Default 224
483 |         patch_size (int | tuple(int)): Patch size. Default: 4
484 |         in_chans (int): Number of input image channels. Default: 3
485 |         num_classes (int): Number of classes for classification head. Default: 1000
486 |         embed_dim (int): Patch embedding dimension. Default: 96
487 |         depths (tuple(int)): Depth of each Swin Transformer layer.
488 |         num_heads (tuple(int)): Number of attention heads in different layers.
489 |         window_size (int): Window size. Default: 7
490 |         mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
491 |         qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
492 |         qk_scale (float): Override default qk scale of head_dim ** -0.5 if set. Default: None
493 |         drop_rate (float): Dropout rate. Default: 0
494 |         attn_drop_rate (float): Attention dropout rate. Default: 0
495 |         drop_path_rate (float): Stochastic depth rate. Default: 0.1
496 |         norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
497 |         ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
498 |         patch_norm (bool): If True, add normalization after patch embedding. Default: True
499 |         use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
500 |         fused_window_process (bool, optional): If True, use one kernel to fused window shift & window partition for acceleration, similar for the reversed part. Default: False
501 |     """
502 | 
503 |     def __init__(self, img_size=224, patch_size=4, in_chans=3, num_classes=1000,
504 |                  embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24],
505 |                  window_size=7, mlp_ratio=4., qkv_bias=True, qk_scale=None,
506 |                  drop_rate=0., attn_drop_rate=0., drop_path_rate=0.1,
507 |                  norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
508 |                  use_checkpoint=False, fused_window_process=False, **kwargs):
509 |         super().__init__()
510 | 
511 |         self.num_classes = num_classes
512 |         self.num_layers = len(depths)
513 |         self.embed_dim = embed_dim
514 |         self.ape = ape
515 |         self.patch_norm = patch_norm
516 |         self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
517 |         self.mlp_ratio = mlp_ratio
518 | 
519 |         # split image into non-overlapping patches
520 |         self.patch_embed = PatchEmbed(
521 |             img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
522 |             norm_layer=norm_layer if self.patch_norm else None)
523 |         num_patches = self.patch_embed.num_patches
524 |         patches_resolution = self.patch_embed.patches_resolution
525 |         self.patches_resolution = patches_resolution
526 | 
527 |         # absolute position embedding
528 |         if self.ape:
529 |             self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
530 |             trunc_normal_(self.absolute_pos_embed, std=.02)
531 | 
532 |         self.pos_drop = nn.Dropout(p=drop_rate)
533 | 
534 |         # stochastic depth
535 |         dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
536 | 
537 |         # build layers
538 |         self.layers = nn.ModuleList()
539 |         for i_layer in range(self.num_layers):
540 |             layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
541 |                                input_resolution=(patches_resolution[0] // (2 ** i_layer),
542 |                                                  patches_resolution[1] // (2 ** i_layer)),
543 |                                depth=depths[i_layer],
544 |                                num_heads=num_heads[i_layer],
545 |                                window_size=window_size,
546 |                                mlp_ratio=self.mlp_ratio,
547 |                                qkv_bias=qkv_bias, qk_scale=qk_scale,
548 |                                drop=drop_rate, attn_drop=attn_drop_rate,
549 |                                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
550 |                                norm_layer=norm_layer,
551 |                                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
552 |                                use_checkpoint=use_checkpoint,
553 |                                fused_window_process=fused_window_process)
554 |             self.layers.append(layer)
555 | 
556 |         self.norm = norm_layer(self.num_features)
557 |         self.avgpool = nn.AdaptiveAvgPool1d(1)
558 |         self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
559 | 
560 |         self.apply(self._init_weights)
561 | 
562 |     def _init_weights(self, m):
563 |         if isinstance(m, nn.Linear):
564 |             trunc_normal_(m.weight, std=.02)
565 |             if isinstance(m, nn.Linear) and m.bias is not None:
566 |                 nn.init.constant_(m.bias, 0)
567 |         elif isinstance(m, nn.LayerNorm):
568 |             nn.init.constant_(m.bias, 0)
569 |             nn.init.constant_(m.weight, 1.0)
570 | 
571 |     @torch.jit.ignore
572 |     def no_weight_decay(self):
573 |         return {'absolute_pos_embed'}
574 | 
575 |     @torch.jit.ignore
576 |     def no_weight_decay_keywords(self):
577 |         return {'relative_position_bias_table'}
578 | 
579 |     def forward_features(self, x):
580 |         # x = self.patch_embed(x)
581 |         if self.ape:
582 |             x = x + self.absolute_pos_embed
583 |         x = self.pos_drop(x)
584 | 
585 |         for i, layer in enumerate(self.layers):
586 |             x = layer(x)
587 | 
588 |         x = self.norm(x)  # B L C
589 |         x = self.avgpool(x.transpose(1, 2))  # B C 1
590 |         x = torch.flatten(x, 1)
591 |         return x
592 | 
593 |     def forward(self, x):
594 |         x = self.forward_features(x)
595 |         # x = self.head(x)
596 |         return x
597 | 
598 |     def flops(self):
599 |         flops = 0
600 |         flops += self.patch_embed.flops()
601 |         for i, layer in enumerate(self.layers):
602 |             flops += layer.flops()
603 |         flops += self.num_features * self.patches_resolution[0] * self.patches_resolution[1] // (2 ** self.num_layers)
604 |         flops += self.num_features * self.num_classes
605 |         return flops


--------------------------------------------------------------------------------
/IQA/fastvqa/models/swin_v2.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Swin Transformer V2
  3 | # Copyright (c) 2022 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # --------------------------------------------------------
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | import torch.utils.checkpoint as checkpoint
 11 | from timm.models.layers import DropPath, to_2tuple, trunc_normal_
 12 | import numpy as np
 13 | 
 14 | 
 15 | class Mlp(nn.Module):
 16 |     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
 17 |         super().__init__()
 18 |         out_features = out_features or in_features
 19 |         hidden_features = hidden_features or in_features
 20 |         self.fc1 = nn.Linear(in_features, hidden_features)
 21 |         self.act = act_layer()
 22 |         self.fc2 = nn.Linear(hidden_features, out_features)
 23 |         self.drop = nn.Dropout(drop)
 24 | 
 25 |     def forward(self, x):
 26 |         x = self.fc1(x)
 27 |         x = self.act(x)
 28 |         x = self.drop(x)
 29 |         x = self.fc2(x)
 30 |         x = self.drop(x)
 31 |         return x
 32 | 
 33 | 
 34 | def window_partition(x, window_size):
 35 |     """
 36 |     Args:
 37 |         x: (B, H, W, C)
 38 |         window_size (int): window size
 39 |     Returns:
 40 |         windows: (num_windows*B, window_size, window_size, C)
 41 |     """
 42 |     B, H, W, C = x.shape
 43 |     x = x.view(B, H // window_size, window_size, W // window_size, window_size, C)
 44 |     windows = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, C)
 45 |     return windows
 46 | 
 47 | 
 48 | def window_reverse(windows, window_size, H, W):
 49 |     """
 50 |     Args:
 51 |         windows: (num_windows*B, window_size, window_size, C)
 52 |         window_size (int): Window size
 53 |         H (int): Height of image
 54 |         W (int): Width of image
 55 |     Returns:
 56 |         x: (B, H, W, C)
 57 |     """
 58 |     B = int(windows.shape[0] / (H * W / window_size / window_size))
 59 |     x = windows.view(B, H // window_size, W // window_size, window_size, window_size, -1)
 60 |     x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)
 61 |     return x
 62 | 
 63 | 
 64 | class WindowAttention(nn.Module):
 65 |     r""" Window based multi-head self attention (W-MSA) module with relative position bias.
 66 |     It supports both of shifted and non-shifted window.
 67 |     Args:
 68 |         dim (int): Number of input channels.
 69 |         window_size (tuple[int]): The height and width of the window.
 70 |         num_heads (int): Number of attention heads.
 71 |         qkv_bias (bool, optional):  If True, add a learnable bias to query, key, value. Default: True
 72 |         attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0
 73 |         proj_drop (float, optional): Dropout ratio of output. Default: 0.0
 74 |         pretrained_window_size (tuple[int]): The height and width of the window in pre-training.
 75 |     """
 76 | 
 77 |     def __init__(self, dim, window_size, num_heads, qkv_bias=True, attn_drop=0., proj_drop=0.,
 78 |                  pretrained_window_size=[0, 0]):
 79 | 
 80 |         super().__init__()
 81 |         self.dim = dim
 82 |         self.window_size = window_size  # Wh, Ww
 83 |         self.pretrained_window_size = pretrained_window_size
 84 |         self.num_heads = num_heads
 85 | 
 86 |         self.logit_scale = nn.Parameter(torch.log(10 * torch.ones((num_heads, 1, 1))), requires_grad=True)
 87 | 
 88 |         # mlp to generate continuous relative position bias
 89 |         self.cpb_mlp = nn.Sequential(nn.Linear(2, 512, bias=True),
 90 |                                      nn.ReLU(inplace=True),
 91 |                                      nn.Linear(512, num_heads, bias=False))
 92 | 
 93 |         # get relative_coords_table
 94 |         relative_coords_h = torch.arange(-(self.window_size[0] - 1), self.window_size[0], dtype=torch.float32)
 95 |         relative_coords_w = torch.arange(-(self.window_size[1] - 1), self.window_size[1], dtype=torch.float32)
 96 |         relative_coords_table = torch.stack(
 97 |             torch.meshgrid([relative_coords_h,
 98 |                             relative_coords_w])).permute(1, 2, 0).contiguous().unsqueeze(0)  # 1, 2*Wh-1, 2*Ww-1, 2
 99 |         if pretrained_window_size[0] > 0:
100 |             relative_coords_table[:, :, :, 0] /= (pretrained_window_size[0] - 1)
101 |             relative_coords_table[:, :, :, 1] /= (pretrained_window_size[1] - 1)
102 |         else:
103 |             relative_coords_table[:, :, :, 0] /= (self.window_size[0] - 1)
104 |             relative_coords_table[:, :, :, 1] /= (self.window_size[1] - 1)
105 |         relative_coords_table *= 8  # normalize to -8, 8
106 |         relative_coords_table = torch.sign(relative_coords_table) * torch.log2(
107 |             torch.abs(relative_coords_table) + 1.0) / np.log2(8)
108 | 
109 |         self.register_buffer("relative_coords_table", relative_coords_table)
110 | 
111 |         # get pair-wise relative position index for each token inside the window
112 |         coords_h = torch.arange(self.window_size[0])
113 |         coords_w = torch.arange(self.window_size[1])
114 |         coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
115 |         coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
116 |         relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
117 |         relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
118 |         relative_coords[:, :, 0] += self.window_size[0] - 1  # shift to start from 0
119 |         relative_coords[:, :, 1] += self.window_size[1] - 1
120 |         relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
121 |         relative_position_index = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
122 |         self.register_buffer("relative_position_index", relative_position_index)
123 | 
124 |         self.qkv = nn.Linear(dim, dim * 3, bias=False)
125 |         if qkv_bias:
126 |             self.q_bias = nn.Parameter(torch.zeros(dim))
127 |             self.v_bias = nn.Parameter(torch.zeros(dim))
128 |         else:
129 |             self.q_bias = None
130 |             self.v_bias = None
131 |         self.attn_drop = nn.Dropout(attn_drop)
132 |         self.proj = nn.Linear(dim, dim)
133 |         self.proj_drop = nn.Dropout(proj_drop)
134 |         self.softmax = nn.Softmax(dim=-1)
135 | 
136 |     def forward(self, x, mask=None):
137 |         """
138 |         Args:
139 |             x: input features with shape of (num_windows*B, N, C)
140 |             mask: (0/-inf) mask with shape of (num_windows, Wh*Ww, Wh*Ww) or None
141 |         """
142 |         B_, N, C = x.shape
143 |         qkv_bias = None
144 |         if self.q_bias is not None:
145 |             qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
146 |         qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
147 |         qkv = qkv.reshape(B_, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
148 |         q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
149 | 
150 |         # cosine attention
151 |         attn = (F.normalize(q, dim=-1) @ F.normalize(k, dim=-1).transpose(-2, -1))
152 |         logit_scale = torch.clamp(self.logit_scale, max=torch.log(torch.tensor(1. / 0.01))).exp()
153 |         attn = attn * logit_scale
154 | 
155 |         relative_position_bias_table = self.cpb_mlp(self.relative_coords_table).view(-1, self.num_heads)
156 |         relative_position_bias = relative_position_bias_table[self.relative_position_index.view(-1)].view(
157 |             self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1)  # Wh*Ww,Wh*Ww,nH
158 |         relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
159 |         relative_position_bias = 16 * torch.sigmoid(relative_position_bias)
160 |         attn = attn + relative_position_bias.unsqueeze(0)
161 | 
162 |         if mask is not None:
163 |             nW = mask.shape[0]
164 |             attn = attn.view(B_ // nW, nW, self.num_heads, N, N) + mask.unsqueeze(1).unsqueeze(0)
165 |             attn = attn.view(-1, self.num_heads, N, N)
166 |             attn = self.softmax(attn)
167 |         else:
168 |             attn = self.softmax(attn)
169 | 
170 |         attn = self.attn_drop(attn)
171 | 
172 |         x = (attn @ v).transpose(1, 2).reshape(B_, N, C)
173 |         x = self.proj(x)
174 |         x = self.proj_drop(x)
175 |         return x
176 | 
177 |     def extra_repr(self) -> str:
178 |         return f'dim={self.dim}, window_size={self.window_size}, ' \
179 |                f'pretrained_window_size={self.pretrained_window_size}, num_heads={self.num_heads}'
180 | 
181 |     def flops(self, N):
182 |         # calculate flops for 1 window with token length of N
183 |         flops = 0
184 |         # qkv = self.qkv(x)
185 |         flops += N * self.dim * 3 * self.dim
186 |         # attn = (q @ k.transpose(-2, -1))
187 |         flops += self.num_heads * N * (self.dim // self.num_heads) * N
188 |         #  x = (attn @ v)
189 |         flops += self.num_heads * N * N * (self.dim // self.num_heads)
190 |         # x = self.proj(x)
191 |         flops += N * self.dim * self.dim
192 |         return flops
193 | 
194 | 
195 | class SwinTransformerBlock(nn.Module):
196 |     r""" Swin Transformer Block.
197 |     Args:
198 |         dim (int): Number of input channels.
199 |         input_resolution (tuple[int]): Input resulotion.
200 |         num_heads (int): Number of attention heads.
201 |         window_size (int): Window size.
202 |         shift_size (int): Shift size for SW-MSA.
203 |         mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
204 |         qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
205 |         drop (float, optional): Dropout rate. Default: 0.0
206 |         attn_drop (float, optional): Attention dropout rate. Default: 0.0
207 |         drop_path (float, optional): Stochastic depth rate. Default: 0.0
208 |         act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
209 |         norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
210 |         pretrained_window_size (int): Window size in pre-training.
211 |     """
212 | 
213 |     def __init__(self, dim, input_resolution, num_heads, window_size=7, shift_size=0,
214 |                  mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0., drop_path=0.,
215 |                  act_layer=nn.GELU, norm_layer=nn.LayerNorm, pretrained_window_size=0):
216 |         super().__init__()
217 |         self.dim = dim
218 |         self.input_resolution = input_resolution
219 |         self.num_heads = num_heads
220 |         self.window_size = window_size
221 |         self.shift_size = shift_size
222 |         self.mlp_ratio = mlp_ratio
223 |         if min(self.input_resolution) <= self.window_size:
224 |             # if window size is larger than input resolution, we don't partition windows
225 |             self.shift_size = 0
226 |             self.window_size = min(self.input_resolution)
227 |         assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"
228 | 
229 |         self.norm1 = norm_layer(dim)
230 |         self.attn = WindowAttention(
231 |             dim, window_size=to_2tuple(self.window_size), num_heads=num_heads,
232 |             qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop,
233 |             pretrained_window_size=to_2tuple(pretrained_window_size))
234 | 
235 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
236 |         self.norm2 = norm_layer(dim)
237 |         mlp_hidden_dim = int(dim * mlp_ratio)
238 |         self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
239 | 
240 |         if self.shift_size > 0:
241 |             # calculate attention mask for SW-MSA
242 |             H, W = self.input_resolution
243 |             img_mask = torch.zeros((1, H, W, 1))  # 1 H W 1
244 |             h_slices = (slice(0, -self.window_size),
245 |                         slice(-self.window_size, -self.shift_size),
246 |                         slice(-self.shift_size, None))
247 |             w_slices = (slice(0, -self.window_size),
248 |                         slice(-self.window_size, -self.shift_size),
249 |                         slice(-self.shift_size, None))
250 |             cnt = 0
251 |             for h in h_slices:
252 |                 for w in w_slices:
253 |                     img_mask[:, h, w, :] = cnt
254 |                     cnt += 1
255 | 
256 |             mask_windows = window_partition(img_mask, self.window_size)  # nW, window_size, window_size, 1
257 |             mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
258 |             attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
259 |             attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
260 |         else:
261 |             attn_mask = None
262 | 
263 |         self.register_buffer("attn_mask", attn_mask)
264 | 
265 |     def forward(self, x):
266 |         H, W = self.input_resolution
267 |         B, L, C = x.shape
268 |         assert L == H * W, "input feature has wrong size"
269 | 
270 |         shortcut = x
271 |         x = x.view(B, H, W, C)
272 | 
273 |         # cyclic shift
274 |         if self.shift_size > 0:
275 |             shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
276 |         else:
277 |             shifted_x = x
278 | 
279 |         # partition windows
280 |         x_windows = window_partition(shifted_x, self.window_size)  # nW*B, window_size, window_size, C
281 |         x_windows = x_windows.view(-1, self.window_size * self.window_size, C)  # nW*B, window_size*window_size, C
282 | 
283 |         # W-MSA/SW-MSA
284 |         attn_windows = self.attn(x_windows, mask=self.attn_mask)  # nW*B, window_size*window_size, C
285 | 
286 |         # merge windows
287 |         attn_windows = attn_windows.view(-1, self.window_size, self.window_size, C)
288 |         shifted_x = window_reverse(attn_windows, self.window_size, H, W)  # B H' W' C
289 | 
290 |         # reverse cyclic shift
291 |         if self.shift_size > 0:
292 |             x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
293 |         else:
294 |             x = shifted_x
295 |         x = x.view(B, H * W, C)
296 |         x = shortcut + self.drop_path(self.norm1(x))
297 | 
298 |         # FFN
299 |         x = x + self.drop_path(self.norm2(self.mlp(x)))
300 | 
301 |         return x
302 | 
303 |     def extra_repr(self) -> str:
304 |         return f"dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, " \
305 |                f"window_size={self.window_size}, shift_size={self.shift_size}, mlp_ratio={self.mlp_ratio}"
306 | 
307 |     def flops(self):
308 |         flops = 0
309 |         H, W = self.input_resolution
310 |         # norm1
311 |         flops += self.dim * H * W
312 |         # W-MSA/SW-MSA
313 |         nW = H * W / self.window_size / self.window_size
314 |         flops += nW * self.attn.flops(self.window_size * self.window_size)
315 |         # mlp
316 |         flops += 2 * H * W * self.dim * self.dim * self.mlp_ratio
317 |         # norm2
318 |         flops += self.dim * H * W
319 |         return flops
320 | 
321 | 
322 | class PatchMerging(nn.Module):
323 |     r""" Patch Merging Layer.
324 |     Args:
325 |         input_resolution (tuple[int]): Resolution of input feature.
326 |         dim (int): Number of input channels.
327 |         norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
328 |     """
329 | 
330 |     def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm):
331 |         super().__init__()
332 |         self.input_resolution = input_resolution
333 |         self.dim = dim
334 |         self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
335 |         self.norm = norm_layer(2 * dim)
336 | 
337 |     def forward(self, x):
338 |         """
339 |         x: B, H*W, C
340 |         """
341 |         H, W = self.input_resolution
342 |         B, L, C = x.shape
343 |         assert L == H * W, "input feature has wrong size"
344 |         assert H % 2 == 0 and W % 2 == 0, f"x size ({H}*{W}) are not even."
345 | 
346 |         x = x.view(B, H, W, C)
347 | 
348 |         x0 = x[:, 0::2, 0::2, :]  # B H/2 W/2 C
349 |         x1 = x[:, 1::2, 0::2, :]  # B H/2 W/2 C
350 |         x2 = x[:, 0::2, 1::2, :]  # B H/2 W/2 C
351 |         x3 = x[:, 1::2, 1::2, :]  # B H/2 W/2 C
352 |         x = torch.cat([x0, x1, x2, x3], -1)  # B H/2 W/2 4*C
353 |         x = x.view(B, -1, 4 * C)  # B H/2*W/2 4*C
354 | 
355 |         x = self.reduction(x)
356 |         x = self.norm(x)
357 | 
358 |         return x
359 | 
360 |     def extra_repr(self) -> str:
361 |         return f"input_resolution={self.input_resolution}, dim={self.dim}"
362 | 
363 |     def flops(self):
364 |         H, W = self.input_resolution
365 |         flops = (H // 2) * (W // 2) * 4 * self.dim * 2 * self.dim
366 |         flops += H * W * self.dim // 2
367 |         return flops
368 | 
369 | 
370 | class BasicLayer(nn.Module):
371 |     """ A basic Swin Transformer layer for one stage.
372 |     Args:
373 |         dim (int): Number of input channels.
374 |         input_resolution (tuple[int]): Input resolution.
375 |         depth (int): Number of blocks.
376 |         num_heads (int): Number of attention heads.
377 |         window_size (int): Local window size.
378 |         mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
379 |         qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
380 |         drop (float, optional): Dropout rate. Default: 0.0
381 |         attn_drop (float, optional): Attention dropout rate. Default: 0.0
382 |         drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0
383 |         norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm
384 |         downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None
385 |         use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False.
386 |         pretrained_window_size (int): Local window size in pre-training.
387 |     """
388 | 
389 |     def __init__(self, dim, input_resolution, depth, num_heads, window_size,
390 |                  mlp_ratio=4., qkv_bias=True, drop=0., attn_drop=0.,
391 |                  drop_path=0., norm_layer=nn.LayerNorm, downsample=None, use_checkpoint=False,
392 |                  pretrained_window_size=0):
393 | 
394 |         super().__init__()
395 |         self.dim = dim
396 |         self.input_resolution = input_resolution
397 |         self.depth = depth
398 |         self.use_checkpoint = use_checkpoint
399 | 
400 |         # build blocks
401 |         self.blocks = nn.ModuleList([
402 |             SwinTransformerBlock(dim=dim, input_resolution=input_resolution,
403 |                                  num_heads=num_heads, window_size=window_size,
404 |                                  shift_size=0 if (i % 2 == 0) else window_size // 2,
405 |                                  mlp_ratio=mlp_ratio,
406 |                                  qkv_bias=qkv_bias,
407 |                                  drop=drop, attn_drop=attn_drop,
408 |                                  drop_path=drop_path[i] if isinstance(drop_path, list) else drop_path,
409 |                                  norm_layer=norm_layer,
410 |                                  pretrained_window_size=pretrained_window_size)
411 |             for i in range(depth)])
412 | 
413 |         # patch merging layer
414 |         if downsample is not None:
415 |             self.downsample = downsample(input_resolution, dim=dim, norm_layer=norm_layer)
416 |         else:
417 |             self.downsample = None
418 | 
419 |     def forward(self, x):
420 |         for blk in self.blocks:
421 |             if self.use_checkpoint:
422 |                 x = checkpoint.checkpoint(blk, x)
423 |             else:
424 |                 x = blk(x)
425 |         if self.downsample is not None:
426 |             x = self.downsample(x)
427 |         return x
428 | 
429 |     def extra_repr(self) -> str:
430 |         return f"dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}"
431 | 
432 |     def flops(self):
433 |         flops = 0
434 |         for blk in self.blocks:
435 |             flops += blk.flops()
436 |         if self.downsample is not None:
437 |             flops += self.downsample.flops()
438 |         return flops
439 | 
440 |     def _init_respostnorm(self):
441 |         for blk in self.blocks:
442 |             nn.init.constant_(blk.norm1.bias, 0)
443 |             nn.init.constant_(blk.norm1.weight, 0)
444 |             nn.init.constant_(blk.norm2.bias, 0)
445 |             nn.init.constant_(blk.norm2.weight, 0)
446 | 
447 | 
448 | class PatchEmbed(nn.Module):
449 |     r""" Image to Patch Embedding
450 |     Args:
451 |         img_size (int): Image size.  Default: 224.
452 |         patch_size (int): Patch token size. Default: 4.
453 |         in_chans (int): Number of input image channels. Default: 3.
454 |         embed_dim (int): Number of linear projection output channels. Default: 96.
455 |         norm_layer (nn.Module, optional): Normalization layer. Default: None
456 |     """
457 | 
458 |     def __init__(self, img_size=224, patch_size=4, in_chans=3, embed_dim=96, norm_layer=None):
459 |         super().__init__()
460 |         img_size = to_2tuple(img_size)
461 |         patch_size = to_2tuple(patch_size)
462 |         patches_resolution = [img_size[0] // patch_size[0], img_size[1] // patch_size[1]]
463 |         self.img_size = img_size
464 |         self.patch_size = patch_size
465 |         self.patches_resolution = patches_resolution
466 |         self.num_patches = patches_resolution[0] * patches_resolution[1]
467 | 
468 |         self.in_chans = in_chans
469 |         self.embed_dim = embed_dim
470 | 
471 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
472 |         if norm_layer is not None:
473 |             self.norm = norm_layer(embed_dim)
474 |         else:
475 |             self.norm = None
476 | 
477 |     def forward(self, x):
478 |         B, C, H, W = x.shape
479 |         # FIXME look at relaxing size constraints
480 |         assert H == self.img_size[0] and W == self.img_size[1], \
481 |             f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
482 |         x = self.proj(x).flatten(2).transpose(1, 2)  # B Ph*Pw C
483 |         if self.norm is not None:
484 |             x = self.norm(x)
485 |         return x
486 | 
487 |     def flops(self):
488 |         Ho, Wo = self.patches_resolution
489 |         flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
490 |         if self.norm is not None:
491 |             flops += Ho * Wo * self.embed_dim
492 |         return flops
493 | 
494 | 
495 | class SwinTransformerV2(nn.Module):
496 |     r""" Swin Transformer
497 |         A PyTorch impl of : `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows`  -
498 |           https://arxiv.org/pdf/2103.14030
499 |     Args:
500 |         img_size (int | tuple(int)): Input image size. Default 224
501 |         patch_size (int | tuple(int)): Patch size. Default: 4
502 |         in_chans (int): Number of input image channels. Default: 3
503 |         num_classes (int): Number of classes for classification head. Default: 1000
504 |         embed_dim (int): Patch embedding dimension. Default: 96
505 |         depths (tuple(int)): Depth of each Swin Transformer layer.
506 |         num_heads (tuple(int)): Number of attention heads in different layers.
507 |         window_size (int): Window size. Default: 7
508 |         mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4
509 |         qkv_bias (bool): If True, add a learnable bias to query, key, value. Default: True
510 |         drop_rate (float): Dropout rate. Default: 0
511 |         attn_drop_rate (float): Attention dropout rate. Default: 0
512 |         drop_path_rate (float): Stochastic depth rate. Default: 0.1
513 |         norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
514 |         ape (bool): If True, add absolute position embedding to the patch embedding. Default: False
515 |         patch_norm (bool): If True, add normalization after patch embedding. Default: True
516 |         use_checkpoint (bool): Whether to use checkpointing to save memory. Default: False
517 |         pretrained_window_sizes (tuple(int)): Pretrained window sizes of each layer.
518 |     """
519 | 
520 |     def __init__(self, img_size=256, patch_size=4, in_chans=3, num_classes=1000,
521 |                  embed_dim=96, depths=[2, 2, 6, 2], num_heads=[3, 6, 12, 24],
522 |                  window_size=8, mlp_ratio=4., qkv_bias=True,
523 |                  drop_rate=0., attn_drop_rate=0., drop_path_rate=0.2,
524 |                  norm_layer=nn.LayerNorm, ape=False, patch_norm=True,
525 |                  use_checkpoint=False, pretrained_window_sizes=[0, 0, 0, 0], **kwargs):
526 |         super().__init__()
527 | 
528 |         self.num_classes = num_classes
529 |         self.num_layers = len(depths)
530 |         self.embed_dim = embed_dim
531 |         self.ape = ape
532 |         self.patch_norm = patch_norm
533 |         self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
534 |         self.mlp_ratio = mlp_ratio
535 | 
536 |         # split image into non-overlapping patches
537 |         self.patch_embed = PatchEmbed(
538 |             img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim,
539 |             norm_layer=norm_layer if self.patch_norm else None)
540 |         num_patches = self.patch_embed.num_patches
541 |         patches_resolution = self.patch_embed.patches_resolution
542 |         self.patches_resolution = patches_resolution
543 | 
544 |         # absolute position embedding
545 |         if self.ape:
546 |             self.absolute_pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
547 |             trunc_normal_(self.absolute_pos_embed, std=.02)
548 | 
549 |         self.pos_drop = nn.Dropout(p=drop_rate)
550 | 
551 |         # stochastic depth
552 |         dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]  # stochastic depth decay rule
553 | 
554 |         # build layers
555 |         self.layers = nn.ModuleList()
556 |         for i_layer in range(self.num_layers):
557 |             layer = BasicLayer(dim=int(embed_dim * 2 ** i_layer),
558 |                                input_resolution=(patches_resolution[0] // (2 ** i_layer),
559 |                                                  patches_resolution[1] // (2 ** i_layer)),
560 |                                depth=depths[i_layer],
561 |                                num_heads=num_heads[i_layer],
562 |                                window_size=window_size,
563 |                                mlp_ratio=self.mlp_ratio,
564 |                                qkv_bias=qkv_bias,
565 |                                drop=drop_rate, attn_drop=attn_drop_rate,
566 |                                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
567 |                                norm_layer=norm_layer,
568 |                                downsample=PatchMerging if (i_layer < self.num_layers - 1) else None,
569 |                                use_checkpoint=use_checkpoint,
570 |                                pretrained_window_size=pretrained_window_sizes[i_layer])
571 |             self.layers.append(layer)
572 | 
573 |         self.norm = norm_layer(self.num_features)
574 |         self.avgpool = nn.AdaptiveAvgPool1d(1)
575 |         self.head = nn.Linear(self.num_features, num_classes) if num_classes > 0 else nn.Identity()
576 | 
577 |         self.apply(self._init_weights)
578 |         for bly in self.layers:
579 |             bly._init_respostnorm()
580 | 
581 |     def _init_weights(self, m):
582 |         if isinstance(m, nn.Linear):
583 |             trunc_normal_(m.weight, std=.02)
584 |             if isinstance(m, nn.Linear) and m.bias is not None:
585 |                 nn.init.constant_(m.bias, 0)
586 |         elif isinstance(m, nn.LayerNorm):
587 |             nn.init.constant_(m.bias, 0)
588 |             nn.init.constant_(m.weight, 1.0)
589 | 
590 |     @torch.jit.ignore
591 |     def no_weight_decay(self):
592 |         return {'absolute_pos_embed'}
593 | 
594 |     @torch.jit.ignore
595 |     def no_weight_decay_keywords(self):
596 |         return {"cpb_mlp", "logit_scale", 'relative_position_bias_table'}
597 | 
598 |     def forward_features(self, x):
599 |         x = self.patch_embed(x)
600 |         if self.ape:
601 |             x = x + self.absolute_pos_embed
602 |         x = self.pos_drop(x)
603 | 
604 |         for layer in self.layers:
605 |             x = layer(x)
606 | 
607 |         x = self.norm(x)  # B L C
608 |         # x = self.avgpool(x.transpose(1, 2))  # B C 1
609 |         # x = torch.flatten(x, 1)
610 |         return x
611 | 
612 |     def forward(self, x):
613 |         x = self.forward_features(x)
614 |         # x = self.head(x)
615 |         return x
616 | 
617 |     def flops(self):
618 |         flops = 0
619 |         flops += self.patch_embed.flops()
620 |         for i, layer in enumerate(self.layers):
621 |             flops += layer.flops()
622 |         flops += self.num_features * self.patches_resolution[0] * self.patches_resolution[1] // (2 ** self.num_layers)
623 |         flops += self.num_features * self.num_classes
624 |         return flops


--------------------------------------------------------------------------------
/IQA/demo_train_iqa_baseline.py:
--------------------------------------------------------------------------------
  1 | # -------------------------------------------------
  2 | # SAMA, AAAI 2024
  3 | # Training code for IQA. 
  4 | # -------------------------------------------------
  5 | import torch
  6 | import random
  7 | import os
  8 | import os.path as osp
  9 | import fastvqa.models as models
 10 | import sys
 11 | import argparse
 12 | import torch.nn as nn
 13 | 
 14 | from scipy.stats import spearmanr, pearsonr
 15 | from scipy.stats import kendalltau as kendallr
 16 | import numpy as np
 17 | from torchvision import transforms
 18 | 
 19 | import yaml
 20 | import timeit 
 21 | from PIL import Image
 22 | 
 23 | from thop import profile
 24 | import warnings
 25 | 
 26 | warnings.filterwarnings("ignore")
 27 | 
 28 | from torch.utils.tensorboard import SummaryWriter  
 29 | 
 30 | 
 31 | class ImageDataset(torch.utils.data.Dataset):
 32 |     def __init__(self, files, labels, 
 33 |                  data_args={"fwin_h": 8, "fwin_w": 8, "fsize_h": 32, "fsize_w": 32}, 
 34 |                  stype="fragment",
 35 |                  is_train=True):
 36 |         
 37 |         super().__init__()
 38 |         
 39 |         self.files = files 
 40 |         self.labels = labels
 41 |         self.is_train = is_train
 42 |         self.length = len(files)
 43 | 
 44 |         self.fwin_h = data_args['fwin_h']
 45 |         self.fwin_w = data_args['fwin_w']
 46 |         self.fsize_h = data_args['fsize_h']
 47 |         self.fsize_w = data_args['fsize_w']
 48 | 
 49 |         self.minh = self.fwin_h * self.fsize_h
 50 |         self.minw = self.fwin_w * self.fsize_w
 51 |         self.minsize = max(self.minh, self.minw)
 52 | 
 53 |         self.stype = stype if stype in ["sama", "sama-spm"] else "fragment"
 54 |         print("processing data with [{}]".format(self.stype))
 55 | 
 56 |         if self.is_train:
 57 |             self.transform = transforms.Compose([
 58 |                 transforms.ToTensor(),
 59 |                 transforms.RandomHorizontalFlip(0.5),
 60 |                 transforms.RandomRotation(45),
 61 |                 transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 62 |             ])
 63 |         else:
 64 |             self.transform = transforms.Compose([
 65 |                 transforms.ToTensor(),
 66 |                 transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
 67 |             ])
 68 | 
 69 |     def get_spatial_fragments(self, img, fragments_h=8, fragments_w=8, fsize_h=32, fsize_w=32):
 70 |         size_h = fragments_h * fsize_h
 71 |         size_w = fragments_w * fsize_w
 72 | 
 73 |         res_h, res_w = img.shape[-2:]
 74 |         ratio = min(res_h / size_h, res_w / size_w)
 75 |         if ratio < 1:
 76 |             img = torch.nn.functional.interpolate(img.unsqueeze(0), scale_factor=1 / ratio, mode="bilinear", align_corners=False)
 77 |             img = img[0]
 78 |         size = size_h, size_w
 79 | 
 80 |         ## make sure that sampling will not run out of the picture
 81 |         hgrids = torch.LongTensor([min(res_h // fragments_h * i, res_h - fsize_h) for i in range(fragments_h)])
 82 |         wgrids = torch.LongTensor([min(res_w // fragments_w * i, res_w - fsize_w) for i in range(fragments_w)])
 83 |         hlength, wlength = res_h // fragments_h, res_w // fragments_w
 84 | 
 85 |         if self.is_train:
 86 |             if hlength > fsize_h:
 87 |                 rnd_h = torch.randint(hlength - fsize_h, (len(hgrids), len(wgrids)))
 88 |             else:
 89 |                 rnd_h = torch.zeros((len(hgrids), len(wgrids))).int()
 90 |             if wlength > fsize_w:
 91 |                 rnd_w = torch.randint(wlength - fsize_w, (len(hgrids), len(wgrids)))
 92 |             else:
 93 |                 rnd_w = torch.zeros((len(hgrids), len(wgrids))).int()
 94 |         else:
 95 |             rnd_h = torch.ones((len(hgrids), len(wgrids))).int() * int((hlength - fsize_h) / 2)
 96 |             rnd_w = torch.ones((len(hgrids), len(wgrids))).int() * int((wlength - fsize_w) / 2) 
 97 | 
 98 |         t_img = torch.zeros(img.shape[:-2] + size).to(img.device)
 99 | 
100 |         for i, hs in enumerate(hgrids):
101 |             for j, ws in enumerate(wgrids):
102 |                 h_s, h_e = i * fsize_h, (i + 1) * fsize_h
103 |                 w_s, w_e = j * fsize_w, (j + 1) * fsize_w
104 | 
105 |                 h_so, h_eo = hs + rnd_h[i][j], hs + rnd_h[i][j] + fsize_h
106 |                 w_so, w_eo = ws + rnd_w[i][j], ws + rnd_w[i][j] + fsize_w
107 |                 t_img[:, h_s:h_e, w_s:w_e] = img[:, h_so:h_eo, w_so:w_eo]
108 |         return t_img
109 | 
110 | 
111 |     def get_spatial_fragments_spm(self, img, fragments_h=8, fragments_w=8, fsize_h=32, fsize_w=32):
112 |         size_h = fragments_h * fsize_h
113 |         size_w = fragments_w * fsize_w
114 | 
115 |         res_h, res_w = img.shape[-2:]
116 |         ratio = min(res_h / size_h, res_w / size_w)
117 |         if ratio < 1:
118 |             res_h, res_w = round(res_h / ratio), round(res_w / ratio)
119 |             img = torch.nn.functional.interpolate(img.unsqueeze(0), size=(res_h, res_w), mode="bilinear", align_corners=False)
120 |             img = img[0]
121 |             ratio = min(res_h / size_h, res_w / size_w)
122 |         size = size_h, size_w
123 | 
124 |         img_scale, hgrids, wgrids = [], [], []
125 |         rnd_h, rnd_w = [], []
126 |         if self.is_train:
127 |             rnd_rh, rnd_rw = torch.rand((fragments_h, fragments_w)), torch.rand((fragments_h, fragments_w))
128 |         else:
129 |             rnd_rh, rnd_rw = torch.ones((fragments_h, fragments_w)) * 0.5, torch.ones((fragments_h, fragments_w)) * 0.5
130 | 
131 |         factors = [1, 1 / ratio]
132 |         for scale in factors:
133 |             this_h, this_w = round(res_h * scale), round(res_w * scale)
134 |             img_scale.append(torch.nn.functional.interpolate(img.unsqueeze(0), size=(this_h, this_w), mode="bilinear", align_corners=False)[0])
135 | 
136 |             hgrids.append(torch.LongTensor([min(this_h // fragments_h * i, this_h - fsize_h) for i in range(fragments_h)]))
137 |             wgrids.append(torch.LongTensor([min(this_w // fragments_w * i, this_w - fsize_w) for i in range(fragments_w)]))
138 | 
139 |             hlength, wlength = this_h // fragments_h, this_w // fragments_w
140 |             rnd_h.append((rnd_rh[:, :] * (hlength - fsize_h)).int())
141 |             rnd_w.append((rnd_rw[:, :] * (wlength - fsize_w)).int())
142 | 
143 |         target_imgs = torch.zeros((2, ) + img.shape[:-2] + size).to(img.device)
144 |         for k, scale in enumerate(factors):
145 |             for i, hs in enumerate(hgrids[k]):
146 |                 for j, ws in enumerate(wgrids[k]):
147 |                     h_s, h_e = i * fsize_h, (i + 1) * fsize_h
148 |                     w_s, w_e = j * fsize_w, (j + 1) * fsize_w
149 | 
150 |                     h_so = hs + rnd_h[k][i][j]
151 |                     h_eo = h_so + fsize_h
152 |                     w_so = ws + rnd_w[k][i][j]
153 |                     w_eo = w_so + fsize_w
154 |                     target_imgs[k, :, h_s:h_e, w_s:w_e] = img_scale[k][:, h_so:h_eo, w_so:w_eo]  # 32 * 32
155 | 
156 |         # patch-based mask [4, 4]
157 |         mask = torch.zeros((1, size_h, size_w))
158 |         for i in range(size_w // 8):  # patch为4
159 |             for j in range(size_h // 8):
160 |                 mask[:, j*8:j*8+4, i*8:i*8+4] = 1
161 |                 mask[:, j*8+4:j*8+8, i*8+4:i*8+8] = 1
162 | 
163 |         out_img = mask * target_imgs[0] + (1 - mask) * target_imgs[1]
164 |         return out_img
165 | 
166 |     def get_spatial_fragments_swm(self, img, fragments_h=8, fragments_w=8, fsize_h=32, fsize_w=32):
167 |         size_h = fragments_h * fsize_h
168 |         size_w = fragments_w * fsize_w
169 | 
170 |         res_h, res_w = img.shape[-2:]
171 |         ratio = min(res_h / size_h, res_w / size_w)
172 |         if ratio < 1:
173 |             res_h, res_w = round(res_h / ratio), round(res_w / ratio)
174 |             img = torch.nn.functional.interpolate(img.unsqueeze(0), size=(res_h, res_w), mode="bilinear", align_corners=False)
175 |             img = img[0]
176 |             ratio = min(res_h / size_h, res_w / size_w)
177 |         size = size_h, size_w
178 | 
179 |         img_scale, hgrids, wgrids = [], [], []
180 |         rnd_h, rnd_w = [], []
181 |         if self.is_train:
182 |             rnd_rh, rnd_rw = torch.rand((fragments_h, fragments_w)), torch.rand((fragments_h, fragments_w))
183 |         else:
184 |             rnd_rh, rnd_rw = torch.ones((fragments_h, fragments_w)) * 0.5, torch.ones((fragments_h, fragments_w)) * 0.5
185 | 
186 |         factors = [1, 1 / ratio]
187 |         for scale in factors:
188 |             this_h, this_w = round(res_h * scale), round(res_w * scale)
189 |             img_scale.append(torch.nn.functional.interpolate(img.unsqueeze(0), size=(this_h, this_w), mode="bilinear", align_corners=False)[0])
190 | 
191 |             hgrids.append(torch.LongTensor([min(this_h // fragments_h * i, this_h - fsize_h) for i in range(fragments_h)]))
192 |             wgrids.append(torch.LongTensor([min(this_w // fragments_w * i, this_w - fsize_w) for i in range(fragments_w)]))
193 | 
194 |             hlength, wlength = this_h // fragments_h, this_w // fragments_w
195 |             rnd_h.append((rnd_rh[:, :] * (hlength - fsize_h)).int())
196 |             rnd_w.append((rnd_rw[:, :] * (wlength - fsize_w)).int())
197 | 
198 |         target_imgs = torch.zeros((2, ) + img.shape[:-2] + size).to(img.device)
199 |         for k, scale in enumerate(factors):
200 |             for i, hs in enumerate(hgrids[k]):
201 |                 for j, ws in enumerate(wgrids[k]):
202 |                     h_s, h_e = i * fsize_h, (i + 1) * fsize_h
203 |                     w_s, w_e = j * fsize_w, (j + 1) * fsize_w
204 | 
205 |                     h_so = hs + rnd_h[k][i][j]
206 |                     h_eo = h_so + fsize_h
207 |                     w_so = ws + rnd_w[k][i][j]
208 |                     w_eo = w_so + fsize_w
209 |                     target_imgs[k, :, h_s:h_e, w_s:w_e] = img_scale[k][:, h_so:h_eo, w_so:w_eo]  # 32 * 32
210 | 
211 |         # window-based mask [32, 32]
212 |         mask = torch.zeros((1, size_h, size_w))
213 |         for i in range(fragments_h):  # window
214 |             for j in range(fragments_w):
215 |                 if (i + j) % 2 == 0:
216 |                     mask[:, j*32:j*32+32, i*32:i*32+32] = 1
217 | 
218 |         out_img = mask * target_imgs[0] + (1 - mask) * target_imgs[1]
219 |         return out_img
220 |     
221 |     def __getitem__(self, index):
222 |         filename = self.files[index]
223 |         label = float(self.labels[index])
224 |         
225 |         img = Image.open(filename).convert('RGB')
226 |         width, height = img.size
227 | 
228 |         if min(width, height) < self.minsize:
229 |             scale_factor = self.minsize / min(width, height)
230 |             img = img.resize((int(width * scale_factor), int(height * scale_factor)), Image.BILINEAR)
231 | 
232 |         img = self.transform(img)
233 | 
234 |         if self.stype == "fragment":
235 |             data = self.get_spatial_fragments(img, self.fwin_h, self.fwin_w, self.fsize_h, self.fsize_w)
236 |         elif self.stype == "sama-spm":
237 |             data = self.get_spatial_fragments_spm(img, self.fwin_h, self.fwin_w, self.fsize_h, self.fsize_w)
238 |         elif self.stype == "sama":
239 |             data = self.get_spatial_fragments_swm(img, self.fwin_h, self.fwin_w, self.fsize_h, self.fsize_w)
240 |         else:
241 |             raise NotImplementedError
242 | 
243 |         return data, label
244 |     
245 |     def __len__(self):
246 |         return self.length
247 | 
248 | 
249 | 
250 | def train_test_split(dataset_path, ann_file, ratio=0.8, seed=42):
251 |     random.seed(seed)
252 |     video_infos = []
253 |     with open(ann_file, "r") as fin:
254 |         for line in fin.readlines():
255 |             line_split = line.strip().split(",")
256 |             filename, _, _, label = line_split
257 |             label = float(label)
258 |             filename = osp.join(dataset_path, filename)
259 |             video_infos.append(dict(filename=filename, label=label))
260 |     random.shuffle(video_infos)
261 |     return (
262 |         video_infos[: int(ratio * len(video_infos))],
263 |         video_infos[int(ratio * len(video_infos)) :],
264 |     )
265 | 
266 | 
267 | def rank_loss(y_pred, y):
268 |     ranking_loss = torch.nn.functional.relu(
269 |         (y_pred - y_pred.t()) * torch.sign((y.t() - y))
270 |     )
271 |     scale = 1 + torch.max(ranking_loss)
272 |     return (
273 |         torch.sum(ranking_loss) / y_pred.shape[0] / (y_pred.shape[0] - 1) / scale
274 |     ).float()
275 | 
276 | 
277 | def plcc_loss(y_pred, y):
278 |     sigma_hat, m_hat = torch.std_mean(y_pred, unbiased=False)
279 |     y_pred = (y_pred - m_hat) / (sigma_hat + 1e-8)
280 |     sigma, m = torch.std_mean(y, unbiased=False)
281 |     y = (y - m) / (sigma + 1e-8)
282 |     loss0 = torch.nn.functional.mse_loss(y_pred, y) / 4
283 |     rho = torch.mean(y_pred * y)
284 |     loss1 = torch.nn.functional.mse_loss(rho * y_pred, y) / 4
285 |     return ((loss0 + loss1) / 2).float()
286 | 
287 | def rescaled_l2_loss(y_pred, y):
288 |     y_pred_rs = (y_pred - y_pred.mean()) / y_pred.std()
289 |     y_rs = (y - y.mean()) / (y.std() + eps)
290 |     return torch.nn.functional.mse_loss(y_pred_rs, y_rs)
291 | 
292 | def rplcc_loss(y_pred, y, eps=1e-8):
293 |     sigma_hat, m_hat = torch.std_mean(y_pred, unbiased=False)
294 |     y_pred = (y_pred - m_hat) / (sigma_hat + 1e-8)
295 | 
296 |     sigma, m = torch.std_mean(y, unbiased=False)
297 |     y = (y - m) / (sigma + 1e-8)
298 |     
299 |     rho = torch.mean(y_pred.reshape(y.shape) * y)
300 |     return 1 - rho
301 | 
302 | def self_similarity_loss(f, f_hat, f_hat_detach=False):
303 |     if f_hat_detach:
304 |         f_hat = f_hat.detach()
305 |     return 1 - torch.nn.functional.cosine_similarity(f, f_hat, dim=1).mean()
306 | 
307 | def contrastive_similarity_loss(f, f_hat, f_hat_detach=False, eps=1e-8):
308 |     if f_hat_detach:
309 |         f_hat = f_hat.detach()
310 |     intra_similarity = torch.nn.functional.cosine_similarity(f, f_hat, dim=1).mean()
311 |     cross_similarity = torch.nn.functional.cosine_similarity(f, f_hat, dim=0).mean()
312 |     return (1 - intra_similarity) / (1 - cross_similarity + eps)
313 | 
314 | def rescale(pr, gt=None):
315 |     if gt is None:
316 |         pr = (pr - np.mean(pr)) / np.std(pr)
317 |     else:
318 |         pr = ((pr - np.mean(pr)) / np.std(pr)) * np.std(gt) + np.mean(gt)
319 |     return pr
320 | 
321 | sample_types=["resize", "diamond_fragments", "fragments", "crop", "arp_resize", "arp_fragments"]
322 | 
323 | 
324 | def finetune_epoch(ft_loader, model, model_ema, optimizer, scheduler, device, epoch=-1, split=-1, writer=None):
325 |     
326 |     model.train()
327 | 
328 |     tic = timeit.default_timer()
329 | 
330 |     criterion = nn.SmoothL1Loss()
331 |     train_labels, pred_labels = [], []
332 |     plcc_loss_total, rank_loss_total, loss_total = 0, 0, 0
333 |     for i, (data, label) in enumerate(ft_loader):
334 |         optimizer.zero_grad()
335 |         
336 |         data = data.to(device)
337 |         label = label.to(device).float()
338 |         
339 |         scores = model(data) 
340 |         scores = scores.view(label.shape)
341 |         
342 |         # Plain Supervised Loss
343 |         # p_loss, r_loss = plcc_loss(scores, label), rank_loss(scores, label)
344 |         
345 |         loss = criterion(scores, label) # + 0.5 * rplcc_loss(scores, label)
346 |         # loss = p_loss + 0.3 * r_loss + 0.3 * criterion(scores, label)
347 | 
348 | 
349 |         # plcc_loss_total += p_loss.item()
350 |         # rank_loss_total += r_loss.item()
351 |         loss_total += loss.item()
352 | 
353 |         loss.backward()
354 |         optimizer.step()
355 |         scheduler.step()
356 |         
357 |         #ft_loader.dataset.refresh_hypers()
358 | 
359 |         pred_labels.extend(list(scores.view(-1).detach().cpu().numpy()))
360 |         train_labels.extend(list(label.view(-1).detach().cpu().numpy()))
361 |         
362 |         if model_ema is not None:
363 |             model_params = dict(model.named_parameters())
364 |             model_ema_params = dict(model_ema.named_parameters())
365 |             for k in model_params.keys():
366 |                 model_ema_params[k].data.mul_(0.999).add_(
367 |                     model_params[k].data, alpha=1 - 0.999)
368 |     
369 |     train_srcc = spearmanr(train_labels, pred_labels)[0]
370 |     writer.add_scalar('train_srcc', train_srcc, epoch)
371 | 
372 |     writer.add_scalar('train_plcc_loss', plcc_loss_total, epoch)
373 |     writer.add_scalar('train_rank_loss', rank_loss_total, epoch)
374 |     writer.add_scalar('train_total_loss', loss_total, epoch)
375 | 
376 |     toc = timeit.default_timer()
377 | 
378 |     minutes = int((toc - tic) / 60)
379 |     seconds = int((toc - tic) % 60)
380 |     print('Epoch-{:02d}, training SRCC={:.4f}, time elapsed {:02d}m {:02d}s.'.format(epoch, train_srcc, minutes, seconds))
381 |     print('backbone_lr = {:.2e}, head_lr = {:.2e}'.format(optimizer.state_dict()['param_groups'][0]['lr'],
382 |                                                           optimizer.state_dict()['param_groups'][-1]['lr']))
383 |     model.eval()
384 | 
385 |     
386 | def profile_inference(inf_set, model, device):
387 |     video = {}
388 |     data = inf_set[0]
389 |     for key in sample_types:
390 |         if key in data:
391 |             video[key] = data[key].to(device).unsqueeze(0)
392 |     with torch.no_grad():
393 |         flops, params = profile(model, (video, ))
394 |     print(f"The FLOps of the Variant is {flops/1e9:.1f}G, with Params {params/1e6:.2f}M.")
395 | 
396 | 
397 | def inference_set(inf_loader, model, device, best_, epoch, split=-1, save_model=False, suffix='s', save_name="divide", writer=None):
398 | 
399 |     model.eval()
400 | 
401 |     tic = timeit.default_timer()
402 |     gt_labels, pr_labels = [], []
403 | 
404 |     best_s, best_p, best_k, best_r = best_
405 |     
406 |     with torch.no_grad():
407 |         for i, (data, label) in enumerate(inf_loader):
408 | 
409 |             data = data.to(device)
410 |             label = label.to(device)
411 |             
412 |             scores = model(data) 
413 |             scores = scores.view(label.shape)
414 | 
415 |             pr_labels.extend(list(scores.cpu().numpy()))
416 |             gt_labels.extend(list(label.cpu().numpy()))
417 | 
418 |     pr_labels = rescale(pr_labels, gt_labels)
419 |     
420 |     s = spearmanr(gt_labels, pr_labels)[0]
421 |     p = pearsonr(gt_labels, pr_labels)[0]
422 |     k = kendallr(gt_labels, pr_labels)[0]
423 |     r = np.sqrt(((gt_labels - pr_labels) ** 2).mean())
424 | 
425 |     writer.add_scalar('val_{}_srcc'.format(suffix), s, epoch)
426 |     writer.add_scalar('val_{}_plcc'.format(suffix), p, epoch)
427 |     writer.add_scalar('val_{}_krcc'.format(suffix), k, epoch)
428 |     writer.add_scalar('val_{}_rmse'.format(suffix), r, epoch)
429 |     
430 |     # del results, result #, video, video_up
431 |     torch.cuda.empty_cache()
432 | 
433 |     if s + p > best_s + best_p and save_model:
434 |         state_dict = model.state_dict()
435 |         torch.save(
436 |             {
437 |                 "state_dict": state_dict,
438 |                 "validation_results": best_,
439 |             },
440 |             f"pretrained_weights/{save_name}_{suffix}_dev.pth",
441 |         )
442 | 
443 |     best_s, best_p, best_k, best_r = (
444 |         max(best_s, s),
445 |         max(best_p, p),
446 |         max(best_k, k),
447 |         min(best_r, r),
448 |     )
449 | 
450 |     writer.add_scalar('val_{}_best_srcc'.format(suffix), best_s, epoch)
451 |     writer.add_scalar('val_{}_best_plcc'.format(suffix), best_p, epoch)
452 |     writer.add_scalar('val_{}_best_krcc'.format(suffix), best_k, epoch)
453 |     writer.add_scalar('val_{}_best_rmse'.format(suffix), best_r, epoch)
454 | 
455 |     toc = timeit.default_timer()
456 |     
457 |     minutes = int((toc - tic) / 60)
458 |     seconds = int((toc - tic) % 60)
459 | 
460 |     print(
461 |         f"For {len(gt_labels)} images, \nthe accuracy of the model: [{suffix}] is as follows:\n  SROCC: {s:.4f} best: {best_s:.4f} \n  PLCC:  {p:.4f} best: {best_p:.4f}  \n  KROCC: {k:.4f} best: {best_k:.4f} \n  RMSE:  {r:.4f} best: {best_r:.4f}."
462 |     )
463 |     print('time elapsed {:02d}m {:02d}s.'.format(minutes, seconds))
464 | 
465 |     return best_s, best_p, best_k, best_r
466 | 
467 | 
468 | def read_info(info_file, prefix):
469 |     name, mos = [], []
470 |     import os.path as osp
471 |     if info_file[-3:] == "txt":
472 |         with open(info_file, 'r') as f:
473 |             for line in f:
474 |                 dis, score = line.split()
475 |                 name.append(osp.join(prefix, dis))
476 |                 mos.append(float(score))
477 |         name = np.stack(name)
478 |         mos = np.stack(mos).astype(np.float32)
479 | 
480 |     elif info_file[-3:] == "csv":
481 |         import pandas as pd
482 |         d = pd.read_csv(info_file)
483 |         mos = np.asarray(d['MOS_zscore'].to_list()).astype(np.float32)
484 |         name = d['image_name'].to_list()
485 |         for i in range(len(name)):
486 |             name[i] = osp.join(prefix, name[i])
487 |         name = np.asarray(name)
488 |     elif info_file[-3:] == "pkl":
489 |         import pickle
490 |         with open(info_file, 'rb') as f:
491 |             d = pickle.load(f)
492 |         for i, ifile in enumerate(d['files']):
493 |             name.append(osp.join(prefix, ifile))
494 |         name = np.asarray(name)
495 |         mos = np.asarray(d['labels'])
496 |     else:
497 |         raise NotImplementedError
498 |     
499 |     return name, mos
500 | 
501 | 
502 | def main():
503 | 
504 |     parser = argparse.ArgumentParser()
505 |     parser.add_argument(
506 |         "-o", "--opt", type=str, default="./options/fast-sama-iqa.yml", help="the option file"
507 |     )
508 | 
509 |     args = parser.parse_args()
510 | 
511 |     if sys.gettrace():
512 |         print('in DEBUG mode.')
513 |         args.opt = './options/fast-sama-iqa.yml'
514 |         
515 |     with open(args.opt, "r") as f:
516 |         opt = yaml.safe_load(f)
517 |     
518 |     if sys.gettrace():
519 |         opt['num_workers'] = 0
520 |         opt['test_num_workers'] = 0
521 |         opt['name'] = 'DEBUG'
522 | 
523 |     print(opt)
524 | 
525 |     database = opt["data"]["database"]
526 |     files, labels = read_info(opt["data"]["data_info"], opt["data"]["data_prefix"])
527 | 
528 |     num_samples = len(files)
529 |     num_repeat = opt["num_splits"]
530 |     if opt["data"]["database"] == "kadid":
531 |         ref_idx = np.arange(81).repeat(5*25).reshape(-1)
532 |         index_all = np.zeros((num_repeat, 81), dtype=np.int)
533 |         for ii in range(num_repeat):
534 |             index_current = np.arange(81)
535 |             random.Random(ii * 123).shuffle(index_current)
536 |             index_all[ii] = index_current
537 |     else:
538 |         index_all = np.zeros((num_repeat, num_samples), dtype=np.int)
539 |         for ii in range(num_repeat):
540 |             index_current = np.asarray(range(num_samples))
541 |             random.Random(ii * 123).shuffle(index_current)   # shuffle with certain seed
542 |             index_all[ii] = index_current
543 |     np.savetxt('rand_index_{}.txt'.format(database), index_all, fmt='%d')
544 | 
545 |     # ------------------ fix seed -----------------------
546 |     seed = 44442
547 |     torch.manual_seed(seed)  #
548 |     torch.backends.cudnn.deterministic = True
549 |     torch.backends.cudnn.benchmark = False
550 |     np.random.seed(seed)
551 |     random.seed(seed)
552 |     # ---------------------------------------------------
553 | 
554 |     os.makedirs('./pretrained_weights/', exist_ok=True)
555 |     os.makedirs('./tf-logs/', exist_ok=True)
556 |     torch.utils.backcompat.broadcast_warning.enabled = True
557 |     device = "cuda" if torch.cuda.is_available() else "cpu"
558 |     # if sys.gettrace():
559 |     #     device = 'cpu'
560 |     
561 |     # best_eval = {'koniq': [], 'livec': []}
562 |     best_eval = {database: []}
563 |     for split in range(num_repeat):
564 |         print(f"""\n==================== SPLIT-{split:02d} ====================""")
565 |         writer = SummaryWriter('./tf-logs/{}-split-{:02d}'.format(opt['name'], split))
566 | 
567 |         index = index_all[split]
568 |         
569 |         pos_train_end = int(0.8 * num_samples)
570 |         if opt["data"]["database"] == "kadid":
571 |             eval_ref_idx = index[:int(0.2 * 81)]
572 |             trainindex, evalindex = [], []
573 |             for iii in range(len(files)):
574 |                 if ref_idx[iii] in eval_ref_idx:
575 |                     evalindex.append(iii)
576 |                 else:
577 |                     trainindex.append(iii)
578 |             trainindex = np.asarray(trainindex)
579 |             evalindex = np.asarray(evalindex)
580 |         else:
581 |             trainindex = index[:pos_train_end]                 # the first 80%
582 |             evalindex = index[pos_train_end:]
583 | 
584 |         trainindex.sort()
585 |         evalindex.sort()
586 | 
587 |         train_dataset = ImageDataset(files[trainindex], labels[trainindex], data_args=opt["data"], stype=opt["stype"], is_train=True)
588 |         eval_datasets = {}
589 |         eval_datasets[database] = ImageDataset(files[evalindex], labels[evalindex], data_args=opt["data"], stype=opt["stype"], is_train=False)
590 |         # eval_datasets['livec'] = ImageDataset(files_livec, labels_livec, data_args=opt["data"], is_train=False)
591 | 
592 |         train_loader = torch.utils.data.DataLoader(
593 |                 train_dataset, batch_size=opt["batch_size"], num_workers=opt["num_workers"], shuffle=True)
594 |         eval_loaders = {}
595 |         for key, idataset in eval_datasets.items():
596 |             eval_loaders[key] = torch.utils.data.DataLoader(
597 |                 idataset, batch_size=opt["test_batch_size"], num_workers=opt["test_num_workers"], 
598 |                 pin_memory=True, shuffle=False, drop_last=False)
599 |         
600 |         model = getattr(models, "IQAModel")().to(device)
601 |         
602 |         if "load_path" in opt:
603 |             state_dict = torch.load(opt["load_path"], map_location=device)
604 |             if 'pretrained_weights' in opt["load_path"] and "state_dict" in state_dict:
605 |                 i_state_dict = state_dict['state_dict']
606 | 
607 |             elif "model" in state_dict:
608 |                 ### migrate training weights from swin-transformer-v1
609 |                 state_dict = state_dict["model"]
610 |                 from collections import OrderedDict
611 | 
612 |                 i_state_dict = OrderedDict()
613 |                 for key in state_dict.keys():
614 |                     tkey = 'backbone.' + key
615 |                     i_state_dict[tkey] = state_dict[key]
616 | 
617 |             elif "state_dict" in state_dict:
618 |                 ### migrate training weights from mmaction
619 |                 state_dict = state_dict["state_dict"]
620 |                 from collections import OrderedDict
621 | 
622 |                 i_state_dict = OrderedDict()
623 |                 for key in state_dict.keys():
624 |                     if "head" in key:
625 |                         continue
626 |                     if "cls" in key:
627 |                         tkey = key.replace("cls", "vqa")
628 |                     elif "backbone" in key:
629 |                         i_state_dict[key] = state_dict[key]
630 |                         i_state_dict["fragments_"+key] = state_dict[key]
631 |                         i_state_dict["resize_"+key] = state_dict[key]
632 |                     else:
633 |                         i_state_dict[key] = state_dict[key]
634 |             t_state_dict = model.state_dict()
635 |             for key, value in t_state_dict.items():
636 |                 if key in i_state_dict and i_state_dict[key].shape != value.shape:
637 |                     i_state_dict.pop(key)
638 |             
639 |             print(model.load_state_dict(i_state_dict, strict=False))
640 |             
641 |         #print(model)
642 | 
643 |         if opt["ema"]:
644 |             from copy import deepcopy
645 |             model_ema = deepcopy(model)
646 |         else:
647 |             model_ema = None
648 | 
649 |         #profile_inference(val_dataset, model, device)    
650 | 
651 |         # finetune the model
652 | 
653 |         param_groups=[]
654 |         for key, value in dict(model.named_children()).items():
655 |             if "backbone" in key:
656 |                 param_groups += [{"params": value.parameters(), "lr": opt["optimizer"]["lr"] * opt["optimizer"]["backbone_lr_mult"]}]
657 |             else:
658 |                 param_groups += [{"params": value.parameters(), "lr": opt["optimizer"]["lr"]}]
659 | 
660 |         optimizer = torch.optim.AdamW(lr=opt["optimizer"]["lr"], 
661 |                                       params=param_groups,
662 |                                       weight_decay=opt["optimizer"]["wd"])
663 |         
664 |         warmup_iter = int(opt["warmup_epochs"] * len(train_loader))
665 |         max_iter = int((opt["num_epochs"] + opt["l_num_epochs"]) * len(train_loader))
666 |         warmup_iter = max(1, warmup_iter)
667 |         lr_lambda = (
668 |             lambda cur_iter: max(1e-2, cur_iter / warmup_iter)
669 |             # lambda cur_iter: cur_iter / warmup_iter
670 |             if cur_iter <= warmup_iter
671 |             else 1
672 |             # else max(1e-1, min(1, 1 - 0.9 * (cur_iter / len(train_loader) - opt["constant_epochs"]) / (opt["num_epochs"] - 20 - opt["constant_epochs"]))) 
673 |             # else 0.5 * (1 + math.cos(math.pi * (cur_iter - warmup_iter) / max_iter))
674 |         )
675 |         # lr_lambda = (lambda x: x)
676 |         scheduler = torch.optim.lr_scheduler.LambdaLR(
677 |             optimizer, lr_lambda=lr_lambda,
678 |         )
679 | 
680 |         bests = {}
681 |         bests_n = {}
682 |         for key in eval_loaders.keys():
683 |             bests[key] = -1,-1,-1,1000
684 |             bests_n[key] = -1,-1,-1,1000
685 |         
686 |         for epoch in range(opt["num_epochs"]):
687 |             print(f"Finetune Epoch {epoch}:")
688 |             finetune_epoch(
689 |                 train_loader, model, model_ema, optimizer, scheduler, device, epoch, split,
690 |                 writer=writer)
691 |             
692 |             print(f"evaluation ..")
693 |             # ----------------------------- reduce time consumption 
694 |             for key in eval_loaders:
695 |                 bests[key] = inference_set(
696 |                     eval_loaders[key],
697 |                     model_ema if model_ema is not None else model,
698 |                     device, bests[key], epoch, split, 
699 |                     save_model=opt["save_model"], save_name=opt["name"],
700 |                     suffix=key+"_s",
701 |                     writer=writer
702 |                 )
703 |         
704 | 
705 |         if opt["num_epochs"] > 0:
706 |             for key in eval_loaders:
707 |                 print(
708 |                     f"""SPLIT-{split:02d}, for the finetuning process on {key} with {len(eval_datasets[key])} images,
709 |                     the best validation accuracy of the model-s is as follows:
710 |                     SROCC: {bests[key][0]:.4f}
711 |                     PLCC:  {bests[key][1]:.4f}
712 |                     KROCC: {bests[key][2]:.4f}
713 |                     RMSE:  {bests[key][3]:.4f}."""
714 |                 )
715 | 
716 | 
717 |                 best_eval[key].append([bests[key][0], bests[key][1], bests[key][2], bests[key][3]])
718 |             
719 |     print('\n ============================================== ')
720 |     print(np.median(best_eval[database], 0))
721 | 
722 | 
723 | 
724 | if __name__ == "__main__":
725 |     main()
726 | 


--------------------------------------------------------------------------------
/VQA/examplar_data_labels/LIVE_VQC/mylabels.txt:
--------------------------------------------------------------------------------
  1 | all-videos/A001.mp4, 10.002367, 30.0, 80.232
  2 | all-videos/A002.mp4, 10.04, 29.97002997002997, 57.3005
  3 | all-videos/A003.mp4, 10.008333, 29.97002997002997, 78.1558
  4 | all-videos/A004.mp4, 9.966821999999999, 30.0, 59.3179
  5 | all-videos/A005.mp4, 10.033332999999999, 30.0, 47.1778
  6 | all-videos/A006.mp4, 10.030721999999999, 30.0, 84.6517
  7 | all-videos/A007.mp4, 10.008333, 29.97002997002997, 50.8675
  8 | all-videos/A008.mp4, 10.006667, 29.97002997002997, 46.8902
  9 | all-videos/A009.mp4, 10.006667, 29.97002997002997, 80.7925
 10 | all-videos/A010.mp4, 10.0306, 30.0, 31.2785
 11 | all-videos/A011.mp4, 10.006611, 30.0, 87.2287
 12 | all-videos/A012.mp4, 10.033389, 29.916666666666668, 47.7826
 13 | all-videos/A013.mp4, 10.04, 29.97002997002997, 83.4083
 14 | all-videos/A014.mp4, 10.006667, 29.97002997002997, 69.9652
 15 | all-videos/A015.mp4, 10.003910999999999, 30.0, 78.3957
 16 | all-videos/A016.mp4, 10.000855999999999, 29.916666666666668, 37.8421
 17 | all-videos/A017.mp4, 10.030332999999999, 30.0, 65.4778
 18 | all-videos/A018.mp4, 10.006667, 29.97002997002997, 76.0205
 19 | all-videos/A019.mp4, 10.04, 29.97002997002997, 70.843
 20 | all-videos/A020.mp4, 10.005455999999999, 30.0, 77.7258
 21 | all-videos/A021.mp4, 10.016733, 29.850746268656717, 37.6011
 22 | all-videos/A022.mp4, 10.031666999999999, 30.0, 38.674
 23 | all-videos/A023.mp4, 10.006667, 29.97002997002997, 69.0588
 24 | all-videos/A024.mp4, 10.006667, 29.97002997002997, 71.962
 25 | all-videos/A025.mp4, 10.024867, 30.0, 42.0856
 26 | all-videos/A026.mp4, 10.008066999999999, 30.0, 74.5397
 27 | all-videos/A027.mp4, 10.006667, 29.97002997002997, 56.3388
 28 | all-videos/A028.mp4, 10.023678, 29.916666666666668, 41.5304
 29 | all-videos/A029.mp4, 10.004999999999999, 29.97002997002997, 75.3861
 30 | all-videos/A030.mp4, 10.031478, 30.0, 81.5278
 31 | all-videos/A031.mp4, 10.023522, 30.0, 62.1042
 32 | all-videos/A032.mp4, 10.04, 29.97002997002997, 74.3189
 33 | all-videos/A033.mp4, 10.017267, 30.0, 71.7097
 34 | all-videos/A034.mp4, 10.04, 29.97002997002997, 80.4271
 35 | all-videos/A035.mp4, 10.04, 29.97002997002997, 85.0968
 36 | all-videos/A036.mp4, 10.028767, 30.0, 46.6608
 37 | all-videos/A037.mp4, 10.04, 29.97002997002997, 77.5858
 38 | all-videos/A038.mp4, 10.025063, 29.925, 51.9195
 39 | all-videos/A039.mp4, 10.025599999999999, 29.916666666666668, 53.006
 40 | all-videos/A040.mp4, 10.032022, 30.0, 50.3763
 41 | all-videos/A041.mp4, 10.004622, 30.0, 35.1412
 42 | all-videos/A042.mp4, 10.030278, 30.0, 76.8312
 43 | all-videos/A043.mp4, 10.031666999999999, 30.0, 38.1554
 44 | all-videos/A044.mp4, 10.033332999999999, 30.0, 53.4333
 45 | all-videos/A045.mp4, 10.001510999999999, 30.0, 81.9773
 46 | all-videos/A046.mp4, 10.030344, 30.0, 69.1902
 47 | all-videos/A047.mp4, 10.030356, 30.0, 58.4895
 48 | all-videos/A048.mp4, 10.006667, 29.97002997002997, 72.3462
 49 | all-videos/A049.mp4, 10.006667, 29.97002997002997, 72.2176
 50 | all-videos/A050.mp4, 10.002177999999999, 30.0, 48.0519
 51 | all-videos/A051.mp4, 10.008333, 29.97002997002997, 80.2807
 52 | all-videos/A052.mp4, 10.006667, 29.97002997002997, 44.8256
 53 | all-videos/A053.mp4, 10.030433, 30.0, 67.0884
 54 | all-videos/A054.mp4, 10.027743, 29.917, 76.1667
 55 | all-videos/A055.mp4, 10.034666999999999, 30.0, 53.8092
 56 | all-videos/A056.mp4, 10.0, 30.0, 52.3594
 57 | all-videos/A057.mp4, 10.004999999999999, 29.97002997002997, 66.4892
 58 | all-videos/A058.mp4, 10.000656, 29.850746268656717, 57.0
 59 | all-videos/A059.mp4, 10.011099999999999, 30.0, 74.2873
 60 | all-videos/A060.mp4, 10.008578, 30.0, 63.1839
 61 | all-videos/A061.mp4, 10.002078, 30.0, 56.4521
 62 | all-videos/A062.mp4, 10.026188999999999, 30.0, 47.5187
 63 | all-videos/A063.mp4, 10.031378, 30.0, 44.8971
 64 | all-videos/A064.mp4, 10.012844, 30.0, 49.8497
 65 | all-videos/A065.mp4, 10.030622, 30.0, 81.8222
 66 | all-videos/A066.mp4, 10.006667, 29.97002997002997, 74.7647
 67 | all-videos/A067.mp4, 10.04, 29.97002997002997, 77.4551
 68 | all-videos/A068.mp4, 10.005644, 30.0, 39.123
 69 | all-videos/A069.mp4, 10.0, 30.0, 49.4444
 70 | all-videos/A070.mp4, 10.006667, 29.97002997002997, 65.4608
 71 | all-videos/A071.mp4, 10.006667, 29.97002997002997, 74.1071
 72 | all-videos/A072.mp4, 10.08, 25.0, 20.4406
 73 | all-videos/A073.mp4, 10.04, 29.97002997002997, 91.1313
 74 | all-videos/A074.mp4, 10.004999999999999, 29.97002997002997, 57.3204
 75 | all-videos/A075.mp4, 10.030356, 30.0, 58.1867
 76 | all-videos/A076.mp4, 10.030332999999999, 30.0, 64.7835
 77 | all-videos/A077.mp4, 10.04, 29.97002997002997, 74.4483
 78 | all-videos/A078.mp4, 9.971478, 30.0, 38.8777
 79 | all-videos/A079.mp4, 9.999977999999999, 30.0, 81.0943
 80 | all-videos/A080.mp4, 10.015233, 30.0, 39.2086
 81 | all-videos/A081.mp4, 10.006667, 29.97002997002997, 75.7143
 82 | all-videos/A082.mp4, 10.008333, 29.97002997002997, 87.0859
 83 | all-videos/A083.mp4, 10.023052999999999, 29.931, 46.3077
 84 | all-videos/A084.mp4, 10.003357, 29.79, 55.3835
 85 | all-videos/A085.mp4, 10.030367, 30.0, 70.8486
 86 | all-videos/A086.mp4, 10.006667, 29.97002997002997, 35.2749
 87 | all-videos/A087.mp4, 10.019810999999999, 29.916666666666668, 56.6415
 88 | all-videos/A088.mp4, 10.027033, 29.416666666666668, 43.4892
 89 | all-videos/A089.mp4, 10.019771, 29.841, 46.9444
 90 | all-videos/A090.mp4, 10.006667, 29.97002997002997, 86.1709
 91 | all-videos/A091.mp4, 10.021115, 29.837, 46.2238
 92 | all-videos/A092.mp4, 10.026022, 29.833333333333332, 71.095
 93 | all-videos/A093.mp4, 10.005956, 30.0, 66.9632
 94 | all-videos/A094.mp4, 10.031467, 30.0, 52.3441
 95 | all-videos/A095.mp4, 10.006667, 29.97002997002997, 71.5491
 96 | all-videos/A096.mp4, 10.008333, 29.97002997002997, 66.5815
 97 | all-videos/A097.mp4, 10.041667, 29.97002997002997, 66.4593
 98 | all-videos/A098.mp4, 10.030356, 30.0, 85.8091
 99 | all-videos/A099.mp4, 10.030510999999999, 30.0, 46.1517
100 | all-videos/A100.mp4, 10.024967, 30.0, 46.6273
101 | all-videos/A101.mp4, 10.008333, 29.97002997002997, 69.3657
102 | all-videos/A102.mp4, 10.026043999999999, 29.850746268656717, 62.987
103 | all-videos/A103.mp4, 10.016499999999999, 29.850746268656717, 71.6203
104 | all-videos/A104.mp4, 10.021355999999999, 30.0, 50.385
105 | all-videos/A105.mp4, 10.006667, 29.97002997002997, 37.8824
106 | all-videos/A106.mp4, 9.984587999999999, 29.833333333333332, 72.642
107 | all-videos/A107.mp4, 10.006667, 29.97002997002997, 81.6325
108 | all-videos/A108.mp4, 10.005556, 120.0, 35.8462
109 | all-videos/A109.mp4, 10.003021, 29.791, 36.5086
110 | all-videos/A110.mp4, 10.04, 29.97002997002997, 87.8585
111 | all-videos/B001.mp4, 10.019677999999999, 11.083333333333334, 27.2485
112 | all-videos/B002.mp4, 10.031467, 30.0, 73.4245
113 | all-videos/B003.mp4, 10.031367, 30.0, 49.1111
114 | all-videos/B004.mp4, 10.006321999999999, 30.0, 60.6719
115 | all-videos/B005.mp4, 10.038333, 29.97002997002997, 47.2959
116 | all-videos/B006.mp4, 9.998232999999999, 30.0, 44.4845
117 | all-videos/B007.mp4, 9.998267, 30.0, 37.858
118 | all-videos/B008.mp4, 10.030766999999999, 30.0, 75.0204
119 | all-videos/B009.mp4, 9.998066999999999, 30.0, 39.4971
120 | all-videos/B010.mp4, 10.002689, 29.916666666666668, 33.9518
121 | all-videos/B011.mp4, 10.031644, 29.916666666666668, 36.0333
122 | all-videos/B012.mp4, 10.024211, 29.916666666666668, 33.9801
123 | all-videos/B013.mp4, 10.025466999999999, 29.916666666666668, 58.0795
124 | all-videos/B014.mp4, 10.030267, 30.0, 62.3243
125 | all-videos/B015.mp4, 10.033332999999999, 30.0, 56.9636
126 | all-videos/B016.mp4, 10.04, 29.97002997002997, 52.1065
127 | all-videos/B017.mp4, 10.01, 29.97002997002997, 65.3693
128 | all-videos/B018.mp4, 10.001944, 30.0, 54.024
129 | all-videos/B019.mp4, 10.021422, 30.0, 66.1658
130 | all-videos/B020.mp4, 10.033332999999999, 30.0, 69.2781
131 | all-videos/B021.mp4, 10.038333, 29.97002997002997, 84.9903
132 | all-videos/B022.mp4, 10.038333, 29.97002997002997, 84.532
133 | all-videos/B023.mp4, 10.038333, 29.97002997002997, 70.8629
134 | all-videos/B024.mp4, 10.038333, 29.97002997002997, 67.6078
135 | all-videos/B025.mp4, 10.038333, 29.97002997002997, 86.53
136 | all-videos/B026.mp4, 10.04, 29.97002997002997, 72.0794
137 | all-videos/B027.mp4, 10.04, 29.97002997002997, 89.2689
138 | all-videos/B028.mp4, 10.04, 29.97002997002997, 78.8442
139 | all-videos/B029.mp4, 10.04, 29.97002997002997, 90.8788
140 | all-videos/B030.mp4, 10.04, 29.97002997002997, 23.7704
141 | all-videos/B031.mp4, 10.04, 29.97002997002997, 69.9155
142 | all-videos/B032.mp4, 10.04, 29.97002997002997, 62.1639
143 | all-videos/B033.mp4, 10.04, 29.97002997002997, 67.869
144 | all-videos/B034.mp4, 10.04, 29.97002997002997, 71.4773
145 | all-videos/B035.mp4, 10.04, 29.97002997002997, 43.9005
146 | all-videos/B036.mp4, 10.04, 29.97002997002997, 82.8776
147 | all-videos/B037.mp4, 10.04, 29.97002997002997, 47.7402
148 | all-videos/B038.mp4, 10.04, 29.97002997002997, 67.4641
149 | all-videos/B039.mp4, 10.04, 29.97002997002997, 61.2217
150 | all-videos/B040.mp4, 10.04, 29.97002997002997, 67.0729
151 | all-videos/B041.mp4, 10.04, 29.97002997002997, 45.799
152 | all-videos/B042.mp4, 10.04, 29.97002997002997, 62.8774
153 | all-videos/B043.mp4, 10.04, 29.97002997002997, 38.3
154 | all-videos/B044.mp4, 10.04, 29.97002997002997, 87.1329
155 | all-videos/B045.mp4, 10.04, 29.97002997002997, 64.1244
156 | all-videos/B046.mp4, 10.04, 29.97002997002997, 75.0229
157 | all-videos/B047.mp4, 10.04, 29.97002997002997, 63.7714
158 | all-videos/B048.mp4, 10.04, 29.97002997002997, 65.9427
159 | all-videos/B049.mp4, 10.04, 29.97002997002997, 78.5928
160 | all-videos/B050.mp4, 10.04, 29.97002997002997, 77.2019
161 | all-videos/B051.mp4, 10.04, 29.97002997002997, 46.0611
162 | all-videos/B052.mp4, 10.04, 29.97002997002997, 58.6099
163 | all-videos/B053.mp4, 10.04, 29.97002997002997, 74.9505
164 | all-videos/B054.mp4, 10.003333, 120.0, 27.2126
165 | all-videos/B055.mp4, 10.04, 29.97002997002997, 64.193
166 | all-videos/B056.mp4, 10.038333, 29.97002997002997, 62.6744
167 | all-videos/B057.mp4, 10.04, 29.97002997002997, 59.9947
168 | all-videos/B058.mp4, 10.04, 29.97002997002997, 58.367
169 | all-videos/B059.mp4, 10.04, 29.97002997002997, 52.0
170 | all-videos/B060.mp4, 10.04, 29.97002997002997, 59.4882
171 | all-videos/B061.mp4, 10.04, 29.97002997002997, 63.7011
172 | all-videos/B062.mp4, 10.04, 29.97002997002997, 56.8361
173 | all-videos/B063.mp4, 10.038333, 29.97002997002997, 44.152
174 | all-videos/B064.mp4, 10.04, 29.97002997002997, 60.0559
175 | all-videos/B065.mp4, 10.04, 29.97002997002997, 58.7207
176 | all-videos/B066.mp4, 10.04, 29.97002997002997, 57.5947
177 | all-videos/B067.mp4, 10.0, 30.0, 70.6193
178 | all-videos/B068.mp4, 10.0, 30.0, 76.6719
179 | all-videos/B069.mp4, 10.0, 30.0, 68.4948
180 | all-videos/B070.mp4, 10.0, 30.0, 56.4128
181 | all-videos/B071.mp4, 10.0, 30.0, 81.3516
182 | all-videos/B072.mp4, 10.0, 30.0, 82.573
183 | all-videos/B073.mp4, 10.0, 30.0, 66.0347
184 | all-videos/B074.mp4, 10.0, 30.0, 83.0838
185 | all-videos/B075.mp4, 10.0, 30.0, 55.4301
186 | all-videos/B076.mp4, 10.0, 30.0, 85.2071
187 | all-videos/B077.mp4, 10.0, 30.0, 81.9227
188 | all-videos/B078.mp4, 10.0, 30.0, 87.6489
189 | all-videos/B079.mp4, 10.0, 30.0, 73.5323
190 | all-videos/B080.mp4, 10.0, 30.0, 78.8136
191 | all-videos/B081.mp4, 10.033332999999999, 30.0, 46.3822
192 | all-videos/B082.mp4, 10.027854999999999, 29.916666666666668, 39.3422
193 | all-videos/B083.mp4, 10.01001, 29.97, 47.1299
194 | all-videos/B084.mp4, 10.0, 30.0, 27.3091
195 | all-videos/B085.mp4, 10.033332999999999, 30.0, 78.5556
196 | all-videos/B086.mp4, 10.033332999999999, 30.0, 80.3536
197 | all-videos/B087.mp4, 10.0, 30.0, 72.178
198 | all-videos/B088.mp4, 10.0, 30.0, 52.8177
199 | all-videos/B089.mp4, 10.01, 29.97002997002997, 74.2079
200 | all-videos/B090.mp4, 10.016807, 29.75, 26.0497
201 | all-videos/B091.mp4, 10.022345999999999, 29.833333333333332, 71.7738
202 | all-videos/B092.mp4, 10.033332999999999, 30.0, 75.2238
203 | all-videos/B093.mp4, 10.016499999999999, 29.850746268656717, 70.0112
204 | all-videos/B094.mp4, 10.033332999999999, 30.0, 72.7368
205 | all-videos/B095.mp4, 10.033332999999999, 30.0, 72.7588
206 | all-videos/B096.mp4, 10.018093, 29.846, 57.6856
207 | all-videos/B097.mp4, 10.028329, 29.416666666666668, 60.4053
208 | all-videos/B098.mp4, 10.01, 29.97002997002997, 80.199
209 | all-videos/B099.mp4, 10.022345999999999, 29.833333333333332, 69.5399
210 | all-videos/B100.mp4, 10.01, 29.97002997002997, 80.2398
211 | all-videos/B101.mp4, 10.01, 29.97002997002997, 59.2431
212 | all-videos/B102.mp4, 10.01, 29.97002997002997, 88.0219
213 | all-videos/B103.mp4, 10.033332999999999, 30.0, 25.2484
214 | all-videos/B104.mp4, 10.033332999999999, 30.0, 71.3464
215 | all-videos/B105.mp4, 10.01, 29.97002997002997, 70.9
216 | all-videos/B106.mp4, 10.01, 29.97002997002997, 57.0578
217 | all-videos/B107.mp4, 10.01, 29.97002997002997, 76.7188
218 | all-videos/B108.mp4, 10.0, 30.0, 85.7418
219 | all-videos/B109.mp4, 10.01, 29.97002997002997, 79.3295
220 | all-videos/B110.mp4, 10.01, 29.97002997002997, 77.6527
221 | all-videos/B111.mp4, 10.0, 30.0, 57.7553
222 | all-videos/B112.mp4, 10.033332999999999, 30.0, 56.3086
223 | all-videos/B113.mp4, 10.033332999999999, 30.0, 73.7705
224 | all-videos/B114.mp4, 10.01, 29.97002997002997, 85.8129
225 | all-videos/B115.mp4, 10.033332999999999, 30.0, 39.1942
226 | all-videos/B116.mp4, 10.016499999999999, 29.850746268656717, 65.6578
227 | all-videos/B117.mp4, 10.0, 30.0, 70.7436
228 | all-videos/B118.mp4, 10.041667, 24.0, 71.8769
229 | all-videos/B119.mp4, 10.033332999999999, 30.0, 69.3909
230 | all-videos/B120.mp4, 10.043367, 29.97002997002997, 82.9815
231 | all-videos/B121.mp4, 10.033332999999999, 30.0, 54.3529
232 | all-videos/B122.mp4, 10.01, 29.97002997002997, 59.2645
233 | all-videos/B123.mp4, 10.033332999999999, 30.0, 76.6535
234 | all-videos/B124.mp4, 10.033332999999999, 30.0, 55.3564
235 | all-videos/B125.mp4, 10.033332999999999, 30.0, 52.9389
236 | all-videos/B126.mp4, 10.033332999999999, 30.0, 73.5285
237 | all-videos/B127.mp4, 10.01, 29.97002997002997, 58.382
238 | all-videos/B128.mp4, 10.01, 29.97002997002997, 59.2601
239 | all-videos/B129.mp4, 10.01, 29.97002997002997, 38.2353
240 | all-videos/B130.mp4, 10.01001, 29.97, 52.7733
241 | all-videos/B131.mp4, 10.01, 29.97002997002997, 37.062
242 | all-videos/B132.mp4, 10.036451, 20.027, 42.6774
243 | all-videos/B133.mp4, 10.01, 29.97002997002997, 81.2515
244 | all-videos/B134.mp4, 10.033332999999999, 30.0, 69.35
245 | all-videos/B135.mp4, 10.043367, 29.97002997002997, 79.775
246 | all-videos/B136.mp4, 10.01, 29.97002997002997, 71.8718
247 | all-videos/B137.mp4, 10.033332999999999, 30.0, 47.1258
248 | all-videos/B138.mp4, 10.0, 30.0, 79.3155
249 | all-videos/B139.mp4, 10.033332999999999, 30.0, 68.8052
250 | all-videos/B140.mp4, 10.043367, 29.97002997002997, 68.4787
251 | all-videos/B141.mp4, 10.01, 29.97002997002997, 86.3333
252 | all-videos/B142.mp4, 10.033332999999999, 30.0, 64.9667
253 | all-videos/B143.mp4, 10.033332999999999, 30.0, 64.0795
254 | all-videos/B144.mp4, 10.01, 29.97002997002997, 46.1283
255 | all-videos/B145.mp4, 10.043367, 29.97002997002997, 87.7442
256 | all-videos/B146.mp4, 10.089385, 29.833333333333332, 62.9054
257 | all-videos/B147.mp4, 10.022345999999999, 29.833333333333332, 73.0254
258 | all-videos/B148.mp4, 10.027854999999999, 29.916666666666668, 27.5249
259 | all-videos/B149.mp4, 10.033332999999999, 30.0, 51.5759
260 | all-videos/B150.mp4, 10.016499999999999, 29.850746268656717, 58.6421
261 | all-videos/B151.mp4, 10.027854999999999, 29.916666666666668, 71.6954
262 | all-videos/B152.mp4, 10.075641, 23.919073800308954, 70.9887
263 | all-videos/B153.mp4, 10.012811, 29.662, 62.9021
264 | all-videos/B154.mp4, 10.031096, 29.90700104493208, 49.736
265 | all-videos/B155.mp4, 10.01, 29.97002997002997, 43.4969
266 | all-videos/B156.mp4, 10.0, 30.0, 69.2067
267 | all-videos/B157.mp4, 10.0, 30.0, 75.9554
268 | all-videos/B158.mp4, 10.043367, 29.97002997002997, 76.8757
269 | all-videos/B159.mp4, 10.066666999999999, 30.0, 35.9529
270 | all-videos/B160.mp4, 10.022122, 29.834, 34.3556
271 | all-videos/B161.mp4, 9.566666999999999, 30.0, 71.6517
272 | all-videos/B162.mp4, 10.050419999999999, 29.75, 51.3829
273 | all-videos/B163.mp4, 10.005035999999999, 29.785, 45.5813
274 | all-videos/B164.mp4, 10.043367, 29.97002997002997, 83.1737
275 | all-videos/B165.mp4, 10.01, 29.97002997002997, 67.3736
276 | all-videos/B166.mp4, 10.033332999999999, 30.0, 57.8166
277 | all-videos/B167.mp4, 10.0, 30.0, 14.8662
278 | all-videos/B168.mp4, 10.01, 29.97002997002997, 56.3245
279 | all-videos/B169.mp4, 10.0, 30.0, 54.0769
280 | all-videos/B170.mp4, 10.027854999999999, 29.916666666666668, 35.8132
281 | all-videos/B171.mp4, 10.01, 29.97002997002997, 89.7447
282 | all-videos/B172.mp4, 10.01, 29.97002997002997, 75.619
283 | all-videos/B173.mp4, 10.0, 30.0, 66.0171
284 | all-videos/B174.mp4, 10.0, 30.0, 17.9012
285 | all-videos/B175.mp4, 10.027854999999999, 29.916666666666668, 38.6221
286 | all-videos/B176.mp4, 10.01, 29.97002997002997, 68.9807
287 | all-videos/B177.mp4, 10.033332999999999, 30.0, 40.7351
288 | all-videos/B178.mp4, 10.033332999999999, 30.0, 61.3542
289 | all-videos/B179.mp4, 10.033332999999999, 30.0, 56.5635
290 | all-videos/B180.mp4, 10.01, 29.97002997002997, 72.9946
291 | all-videos/B181.mp4, 10.055866, 29.833333333333332, 38.5263
292 | all-videos/B182.mp4, 10.01, 29.97002997002997, 82.9684
293 | all-videos/B183.mp4, 10.043367, 29.97002997002997, 85.2941
294 | all-videos/B184.mp4, 10.033332999999999, 30.0, 57.0256
295 | all-videos/B185.mp4, 10.0, 30.0, 65.4143
296 | all-videos/B186.mp4, 10.01, 29.97002997002997, 64.5275
297 | all-videos/B187.mp4, 10.01, 29.97002997002997, 90.199
298 | all-videos/B188.mp4, 10.0, 30.0, 70.3474
299 | all-videos/B189.mp4, 10.043367, 29.97002997002997, 49.8683
300 | all-videos/B190.mp4, 10.01001, 29.97, 72.1381
301 | all-videos/B191.mp4, 10.043367, 29.97002997002997, 82.8227
302 | all-videos/B192.mp4, 10.066666999999999, 30.0, 55.6981
303 | all-videos/B193.mp4, 10.01, 29.97002997002997, 70.5926
304 | all-videos/B194.mp4, 10.0, 30.0, 55.5238
305 | all-videos/B195.mp4, 10.027854999999999, 29.916666666666668, 70.2275
306 | all-videos/B196.mp4, 10.033332999999999, 30.0, 50.7012
307 | all-videos/B197.mp4, 10.004363999999999, 29.787, 36.4242
308 | all-videos/B198.mp4, 10.043367, 29.97002997002997, 84.7438
309 | all-videos/B199.mp4, 10.01001, 29.97, 76.1538
310 | all-videos/B200.mp4, 10.01, 29.97002997002997, 36.2614
311 | all-videos/B201.mp4, 10.0, 30.0, 49.4186
312 | all-videos/B202.mp4, 10.01, 29.97002997002997, 78.8095
313 | all-videos/B203.mp4, 10.01, 29.97002997002997, 78.539
314 | all-videos/B204.mp4, 10.01, 29.97002997002997, 72.3743
315 | all-videos/B205.mp4, 10.0, 30.0, 63.2195
316 | all-videos/B206.mp4, 10.027854999999999, 29.916666666666668, 80.9784
317 | all-videos/B207.mp4, 10.01, 29.97002997002997, 86.5106
318 | all-videos/B208.mp4, 10.019802, 25.25, 74.1111
319 | all-videos/B209.mp4, 10.033332999999999, 30.0, 74.7753
320 | all-videos/B210.mp4, 10.01, 29.97002997002997, 29.4167
321 | all-videos/B211.mp4, 10.01, 29.97002997002997, 80.995
322 | all-videos/B212.mp4, 10.028329, 29.416666666666668, 71.6895
323 | all-videos/B213.mp4, 10.01, 29.97002997002997, 85.9296
324 | all-videos/B214.mp4, 10.01, 29.97002997002997, 45.2158
325 | all-videos/B215.mp4, 10.0, 30.0, 49.1789
326 | all-videos/B216.mp4, 10.033332999999999, 30.0, 85.8934
327 | all-videos/B217.mp4, 10.01, 29.97002997002997, 37.7778
328 | all-videos/B218.mp4, 10.043367, 29.97002997002997, 89.0446
329 | all-videos/B219.mp4, 10.043367, 29.97002997002997, 78.0149
330 | all-videos/B220.mp4, 10.01, 29.97002997002997, 70.117
331 | all-videos/B221.mp4, 10.0, 30.0, 80.4144
332 | all-videos/B222.mp4, 10.083333, 24.0, 34.2513
333 | all-videos/B223.mp4, 10.01, 29.97002997002997, 60.6067
334 | all-videos/B224.mp4, 10.01, 29.97002997002997, 81.0055
335 | all-videos/B225.mp4, 10.033332999999999, 30.0, 80.4624
336 | all-videos/B226.mp4, 10.022345999999999, 29.833333333333332, 62.5157
337 | all-videos/B227.mp4, 10.01, 29.97002997002997, 90.2448
338 | all-videos/B228.mp4, 10.033332999999999, 30.0, 71.1987
339 | all-videos/B229.mp4, 10.01, 29.97002997002997, 73.8866
340 | all-videos/B230.mp4, 10.03009, 29.91, 73.6337
341 | all-videos/B231.mp4, 10.01, 29.97002997002997, 65.0412
342 | all-videos/B232.mp4, 10.0, 29.8, 65.1549
343 | all-videos/B233.mp4, 10.016499999999999, 29.850746268656717, 50.8201
344 | all-videos/B234.mp4, 10.033332999999999, 30.0, 80.4612
345 | all-videos/B235.mp4, 10.0, 30.0, 68.4103
346 | all-videos/B236.mp4, 10.020107, 29.84, 53.3737
347 | all-videos/B237.mp4, 10.033332999999999, 30.0, 70.1209
348 | all-videos/B238.mp4, 10.016499999999999, 29.850746268656717, 60.5058
349 | all-videos/B239.mp4, 10.0, 30.0, 67.5337
350 | all-videos/B240.mp4, 10.086454999999999, 29.842, 67.1576
351 | all-videos/B241.mp4, 10.027854999999999, 29.916666666666668, 66.459
352 | all-videos/B242.mp4, 10.033332999999999, 30.0, 71.6724
353 | all-videos/B243.mp4, 10.016499999999999, 29.850746268656717, 47.8021
354 | all-videos/B244.mp4, 10.033332999999999, 30.0, 63.5397
355 | all-videos/B245.mp4, 10.033332999999999, 30.0, 82.8444
356 | all-videos/B246.mp4, 10.033332999999999, 30.0, 56.3813
357 | all-videos/B247.mp4, 10.033332999999999, 30.0, 76.2626
358 | all-videos/B248.mp4, 10.01, 29.97002997002997, 62.3922
359 | all-videos/B249.mp4, 10.0, 30.0, 69.7869
360 | all-videos/B250.mp4, 10.016499999999999, 29.850746268656717, 61.2128
361 | all-videos/B251.mp4, 10.01, 29.97002997002997, 82.7214
362 | all-videos/B252.mp4, 10.015691, 29.953, 73.2762
363 | all-videos/B253.mp4, 10.033332999999999, 30.0, 72.1818
364 | all-videos/B254.mp4, 10.033332999999999, 30.0, 54.569
365 | all-videos/B255.mp4, 10.022345999999999, 29.833333333333332, 62.2424
366 | all-videos/B256.mp4, 10.0, 30.0, 59.5503
367 | all-videos/B257.mp4, 10.01, 29.97002997002997, 82.2251
368 | all-videos/B258.mp4, 10.01, 29.97002997002997, 85.4278
369 | all-videos/B259.mp4, 10.01, 29.97002997002997, 84.8989
370 | all-videos/B260.mp4, 10.041667, 24.0, 6.22368
371 | all-videos/B261.mp4, 10.033332999999999, 30.0, 78.0125
372 | all-videos/B262.mp4, 10.01, 29.97002997002997, 51.9461
373 | all-videos/B263.mp4, 10.033332999999999, 30.0, 64.4931
374 | all-videos/B264.mp4, 10.033332999999999, 30.0, 78.114
375 | all-videos/B265.mp4, 9.7, 30.0, 74.4638
376 | all-videos/B266.mp4, 10.01, 29.97002997002997, 88.24
377 | all-videos/B267.mp4, 10.01, 29.97002997002997, 62.3061
378 | all-videos/B268.mp4, 10.027854999999999, 29.916666666666668, 25.0861
379 | all-videos/B269.mp4, 10.022345999999999, 29.833333333333332, 66.5404
380 | all-videos/B270.mp4, 10.043367, 29.97002997002997, 90.3281
381 | all-videos/B271.mp4, 10.0, 30.0, 84.2073
382 | all-videos/B272.mp4, 10.041667, 24.0, 49.6404
383 | all-videos/B273.mp4, 10.066666999999999, 30.0, 53.4568
384 | all-videos/B274.mp4, 10.033332999999999, 30.0, 54.9497
385 | all-videos/B275.mp4, 10.043367, 29.97002997002997, 79.0355
386 | all-videos/B276.mp4, 10.043367, 29.97002997002997, 83.5027
387 | all-videos/B277.mp4, 10.01, 29.97002997002997, 28.6053
388 | all-videos/B278.mp4, 10.01, 29.97002997002997, 70.3646
389 | all-videos/B279.mp4, 10.033332999999999, 30.0, 67.9827
390 | all-videos/B280.mp4, 10.0, 30.0, 69.5136
391 | all-videos/B281.mp4, 10.01, 29.97002997002997, 50.288
392 | all-videos/B282.mp4, 9.994429, 29.916666666666668, 70.8817
393 | all-videos/B283.mp4, 10.027009, 29.62, 64.2865
394 | all-videos/B284.mp4, 10.01, 29.97002997002997, 71.3248
395 | all-videos/B285.mp4, 10.01, 29.97002997002997, 73.6837
396 | all-videos/B286.mp4, 10.033332999999999, 30.0, 38.6012
397 | all-videos/B287.mp4, 10.0, 30.0, 70.2771
398 | all-videos/B288.mp4, 10.043367, 29.97002997002997, 82.4922
399 | all-videos/B289.mp4, 10.01, 29.97002997002997, 84.8406
400 | all-videos/B290.mp4, 10.033332999999999, 30.0, 62.4348
401 | all-videos/B291.mp4, 10.0, 30.0, 71.1256
402 | all-videos/B292.mp4, 10.033332999999999, 30.0, 81.9686
403 | all-videos/B293.mp4, 10.066666999999999, 30.0, 78.2485
404 | all-videos/B294.mp4, 10.0, 30.0, 66.3431
405 | all-videos/B295.mp4, 10.01, 29.97002997002997, 54.7539
406 | all-videos/B296.mp4, 10.021044, 29.937, 63.203
407 | all-videos/B297.mp4, 10.033332999999999, 30.0, 61.4341
408 | all-videos/B298.mp4, 10.033332999999999, 30.0, 42.3916
409 | all-videos/B299.mp4, 10.033332999999999, 30.0, 72.21
410 | all-videos/B300.mp4, 10.033332999999999, 30.0, 27.691
411 | all-videos/B301.mp4, 10.0, 30.0, 49.1242
412 | all-videos/B302.mp4, 10.01, 29.97002997002997, 72.25
413 | all-videos/B303.mp4, 10.041667, 24.0, 18.8824
414 | all-videos/B304.mp4, 10.043367, 29.97002997002997, 91.7312
415 | all-videos/B305.mp4, 10.01, 29.97002997002997, 66.0914
416 | all-videos/B306.mp4, 10.033332999999999, 30.0, 79.1263
417 | all-videos/B307.mp4, 10.022345999999999, 29.833333333333332, 62.3543
418 | all-videos/B308.mp4, 10.033332999999999, 30.0, 82.8098
419 | all-videos/B309.mp4, 10.0, 30.0, 75.774
420 | all-videos/B310.mp4, 10.033332999999999, 30.0, 85.1746
421 | all-videos/B311.mp4, 10.033332999999999, 30.0, 88.8258
422 | all-videos/B312.mp4, 10.033332999999999, 30.0, 61.1696
423 | all-videos/B313.mp4, 10.033332999999999, 30.0, 89.2074
424 | all-videos/B314.mp4, 10.01, 29.97002997002997, 94.2865
425 | all-videos/B315.mp4, 10.033332999999999, 30.0, 85.593
426 | all-videos/B316.mp4, 10.033332999999999, 30.0, 89.1236
427 | all-videos/C001.mp4, 10.04, 30.0, 84.2191
428 | all-videos/D001.mp4, 10.020211, 16.666666666666668, 32.7293
429 | all-videos/E001.mp4, 10.0314, 30.0, 51.6061
430 | all-videos/F001.mp4, 10.026644, 30.020013342228154, 69.3289
431 | all-videos/F002.mp4, 10.007033, 30.0, 29.0101
432 | all-videos/F003.mp4, 10.011688999999999, 30.0, 40.2153
433 | all-videos/F004.mp4, 10.001610999999999, 30.0, 18.6163
434 | all-videos/F005.mp4, 10.004222, 30.0, 56.6974
435 | all-videos/F006.mp4, 10.01, 29.97002997002997, 59.6044
436 | all-videos/F007.mp4, 10.008333, 29.97002997002997, 42.7267
437 | all-videos/G001.mp4, 10.0, 30.0, 72.0884
438 | all-videos/G002.mp4, 10.033332999999999, 30.0, 47.5172
439 | all-videos/G003.mp4, 10.033332999999999, 30.0, 73.1593
440 | all-videos/G004.mp4, 10.0, 30.0, 76.068
441 | all-videos/G005.mp4, 10.033332999999999, 30.0, 85.2933
442 | all-videos/G006.mp4, 10.0, 30.0, 72.881
443 | all-videos/G007.mp4, 10.01, 29.97002997002997, 87.2169
444 | all-videos/G008.mp4, 10.01, 29.97002997002997, 65.8113
445 | all-videos/G009.mp4, 10.01, 29.97002997002997, 72.7931
446 | all-videos/G010.mp4, 10.01, 29.97002997002997, 68.4129
447 | all-videos/G011.mp4, 10.01, 29.97002997002997, 24.7697
448 | all-videos/G012.mp4, 10.01, 29.97002997002997, 51.3923
449 | all-videos/G013.mp4, 10.036451, 20.027, 73.2679
450 | all-videos/G014.mp4, 10.01, 29.97002997002997, 56.096
451 | all-videos/G015.mp4, 10.01, 29.97002997002997, 89.2513
452 | all-videos/G016.mp4, 10.01, 29.97002997002997, 80.5959
453 | all-videos/G017.mp4, 10.0, 30.0, 75.1832
454 | all-videos/G018.mp4, 10.0, 30.0, 83.5848
455 | all-videos/G019.mp4, 10.0, 30.0, 71.6599
456 | all-videos/G020.mp4, 10.033332999999999, 30.0, 60.6335
457 | all-videos/G021.mp4, 10.0, 30.0, 41.2703
458 | all-videos/G022.mp4, 10.0, 30.0, 58.5622
459 | all-videos/G023.mp4, 10.0, 30.0, 58.9406
460 | all-videos/G024.mp4, 10.033332999999999, 30.0, 59.6292
461 | all-videos/G025.mp4, 10.0, 30.0, 40.5028
462 | all-videos/G026.mp4, 10.033332999999999, 30.0, 54.4118
463 | all-videos/G027.mp4, 10.033332999999999, 30.0, 45.7368
464 | all-videos/G028.mp4, 10.033332999999999, 30.0, 61.3883
465 | all-videos/G029.mp4, 10.033332999999999, 30.0, 50.4835
466 | all-videos/G030.mp4, 10.01, 29.97002997002997, 77.7895
467 | all-videos/G031.mp4, 10.01, 29.97002997002997, 70.6823
468 | all-videos/G032.mp4, 10.01, 29.97002997002997, 53.278
469 | all-videos/G033.mp4, 10.01, 29.97002997002997, 60.6505
470 | all-videos/G034.mp4, 10.01, 29.97002997002997, 82.4354
471 | all-videos/G035.mp4, 10.01, 29.97002997002997, 72.2569
472 | all-videos/G036.mp4, 10.01, 29.97002997002997, 78.6898
473 | all-videos/G037.mp4, 10.01, 29.97002997002997, 78.2783
474 | all-videos/G038.mp4, 10.01, 29.97002997002997, 72.125
475 | all-videos/G039.mp4, 10.01, 29.97002997002997, 73.5789
476 | all-videos/G040.mp4, 10.01, 29.97002997002997, 80.9306
477 | all-videos/G041.mp4, 10.016499999999999, 29.850746268656717, 43.7181
478 | all-videos/G042.mp4, 10.022345999999999, 29.833333333333332, 70.191
479 | all-videos/G043.mp4, 10.022345999999999, 29.833333333333332, 77.4279
480 | all-videos/G044.mp4, 10.036432999999999, 29.791459781529294, 74.9635
481 | all-videos/G045.mp4, 10.016499999999999, 29.850746268656717, 62.4847
482 | all-videos/G046.mp4, 10.016499999999999, 29.850746268656717, 78.8212
483 | all-videos/G047.mp4, 10.066666999999999, 30.0, 70.1804
484 | all-videos/G048.mp4, 10.066666999999999, 30.0, 78.9434
485 | all-videos/G049.mp4, 9.994429, 29.916666666666668, 44.5829
486 | all-videos/G050.mp4, 10.033332999999999, 30.0, 79.9171
487 | all-videos/G051.mp4, 10.066666999999999, 30.0, 75.6131
488 | all-videos/G052.mp4, 10.066666999999999, 30.0, 67.6825
489 | all-videos/G053.mp4, 10.033332999999999, 30.0, 74.3118
490 | all-videos/G054.mp4, 10.0, 30.0, 56.0
491 | all-videos/G055.mp4, 10.066666999999999, 30.0, 63.478
492 | all-videos/G056.mp4, 10.01, 29.97002997002997, 62.0506
493 | all-videos/G057.mp4, 10.033332999999999, 30.0, 54.5354
494 | all-videos/G058.mp4, 10.033332999999999, 30.0, 65.1421
495 | all-videos/G059.mp4, 10.033332999999999, 30.0, 38.0054
496 | all-videos/G060.mp4, 10.033332999999999, 30.0, 71.4348
497 | all-videos/G061.mp4, 10.033332999999999, 30.0, 73.8883
498 | all-videos/G062.mp4, 10.041667, 24.0, 49.3832
499 | all-videos/G063.mp4, 10.115701999999999, 20.166666666666668, 29.0845
500 | all-videos/G064.mp4, 10.016529, 20.166666666666668, 11.3333
501 | all-videos/G065.mp4, 10.020619, 24.25, 68.6814
502 | all-videos/G066.mp4, 10.097999999999999, 20.2020202020202, 11.9079
503 | all-videos/G067.mp4, 10.022345999999999, 29.833333333333332, 57.2944
504 | all-videos/G068.mp4, 10.016499999999999, 29.850746268656717, 59.4638
505 | all-videos/G069.mp4, 10.022345999999999, 29.833333333333332, 66.1244
506 | all-videos/G070.mp4, 10.066666999999999, 30.0, 79.5904
507 | all-videos/G071.mp4, 10.0, 30.0, 48.3714
508 | all-videos/G072.mp4, 10.01, 29.97002997002997, 47.7432
509 | all-videos/G073.mp4, 10.0, 30.0, 18.6333
510 | all-videos/G074.mp4, 10.01, 29.97002997002997, 83.1826
511 | all-videos/G075.mp4, 10.049915, 29.851, 64.293
512 | all-videos/G076.mp4, 10.020443, 29.839, 80.0361
513 | all-videos/G077.mp4, 10.022345999999999, 29.833333333333332, 85.2383
514 | all-videos/G078.mp4, 10.022345999999999, 29.833333333333332, 81.5829
515 | all-videos/G079.mp4, 10.033332999999999, 30.0, 15.3576
516 | all-videos/G080.mp4, 10.027854999999999, 29.916666666666668, 26.2575
517 | all-videos/G081.mp4, 10.041667, 24.0, 46.7325
518 | all-videos/G082.mp4, 10.043367, 29.97002997002997, 78.019
519 | all-videos/G083.mp4, 10.01, 29.97002997002997, 78.7753
520 | all-videos/G084.mp4, 10.0, 30.0, 59.3198
521 | all-videos/G085.mp4, 10.01, 29.97002997002997, 76.4971
522 | all-videos/G086.mp4, 10.01, 29.97002997002997, 41.3556
523 | all-videos/G087.mp4, 10.01, 29.97002997002997, 71.1184
524 | all-videos/G088.mp4, 10.01, 29.97002997002997, 56.0826
525 | all-videos/G089.mp4, 10.041667, 24.0, 31.2638
526 | all-videos/G090.mp4, 10.043367, 29.97002997002997, 75.4464
527 | all-videos/G091.mp4, 10.01, 29.97002997002997, 84.2475
528 | all-videos/G092.mp4, 10.01, 29.97002997002997, 67.8351
529 | all-videos/G093.mp4, 10.01, 29.97002997002997, 66.1105
530 | all-videos/G094.mp4, 10.01, 29.97002997002997, 66.865
531 | all-videos/G095.mp4, 10.01, 29.97002997002997, 79.2192
532 | all-videos/G096.mp4, 10.01, 29.97002997002997, 86.6243
533 | all-videos/G097.mp4, 10.01, 29.97002997002997, 84.6976
534 | all-videos/G098.mp4, 10.01, 29.97002997002997, 35.8556
535 | all-videos/G099.mp4, 10.01, 29.97002997002997, 80.5632
536 | all-videos/G100.mp4, 10.0, 30.0, 80.0573
537 | all-videos/G101.mp4, 10.01, 29.97002997002997, 63.0345
538 | all-videos/G102.mp4, 10.01, 29.97002997002997, 67.75
539 | all-videos/G103.mp4, 10.01, 29.97002997002997, 81.3545
540 | all-videos/G104.mp4, 10.01, 29.97002997002997, 86.1474
541 | all-videos/G105.mp4, 10.033332999999999, 30.0, 45.0703
542 | all-videos/G106.mp4, 10.043367, 29.97002997002997, 23.4251
543 | all-videos/G107.mp4, 10.033332999999999, 30.0, 49.2717
544 | all-videos/G108.mp4, 10.041667, 24.0, 58.5988
545 | all-videos/G109.mp4, 10.0, 30.0, 79.2459
546 | all-videos/G110.mp4, 10.0, 30.0, 83.5217
547 | all-videos/G111.mp4, 10.033332999999999, 30.0, 55.0282
548 | all-videos/G112.mp4, 10.1, 30.0, 74.3393
549 | all-videos/G113.mp4, 10.043367, 29.97002997002997, 74.4923
550 | all-videos/G114.mp4, 10.033332999999999, 30.0, 65.0532
551 | all-videos/G115.mp4, 10.043367, 29.97002997002997, 77.8551
552 | all-videos/G116.mp4, 10.043367, 29.97002997002997, 88.4254
553 | all-videos/G117.mp4, 10.043367, 29.97002997002997, 71.7751
554 | all-videos/G118.mp4, 10.043367, 29.97002997002997, 65.9
555 | all-videos/G119.mp4, 10.043367, 29.97002997002997, 76.8795
556 | all-videos/H001.mp4, 10.0, 30.0, 63.6974
557 | all-videos/H002.mp4, 10.010714, 29.868, 69.1073
558 | all-videos/I001.mp4, 10.01, 29.97002997002997, 72.3462
559 | all-videos/I002.mp4, 10.022122, 29.834, 63.4225
560 | all-videos/I003.mp4, 10.018032, 29.946, 69.5414
561 | all-videos/J001.mp4, 10.011974, 30.064, 71.2778
562 | all-videos/J002.mp4, 10.022197, 29.734, 62.36
563 | all-videos/J003.mp4, 10.003988, 30.088, 60.4424
564 | all-videos/J004.mp4, 10.028224999999999, 30.115, 62.645
565 | all-videos/J005.mp4, 10.034182, 27.207, 24.1117
566 | all-videos/K001.mp4, 10.0, 30.0, 33.0114
567 | all-videos/K002.mp4, 10.016694, 29.95, 53.9415
568 | all-videos/L001.mp4, 10.006667, 29.97002997002997, 76.5463
569 | all-videos/M001.mp4, 10.0, 30.0, 36.9665
570 | all-videos/M002.mp4, 10.0, 30.0, 69.3081
571 | all-videos/M003.mp4, 10.0, 30.0, 21.1084
572 | all-videos/N001.mp4, 10.004999999999999, 120.0, 67.445
573 | all-videos/O001.mp4, 10.036667, 24.0, 27.8
574 | all-videos/O002.mp4, 10.04, 120.0, 31.0169
575 | all-videos/P001.mp4, 10.0, 30.0, 64.399
576 | all-videos/P002.mp4, 10.0, 30.0, 60.1489
577 | all-videos/P003.mp4, 10.0, 30.0, 61.749
578 | all-videos/P004.mp4, 10.0, 30.0, 63.4194
579 | all-videos/P005.mp4, 10.0, 30.0, 74.1421
580 | all-videos/P006.mp4, 10.0, 30.0, 54.2271
581 | all-videos/P007.mp4, 10.0, 30.0, 38.2139
582 | all-videos/P008.mp4, 10.0, 30.0, 38.4545
583 | all-videos/P009.mp4, 10.0, 30.0, 55.9663
584 | all-videos/Q001.mp4, 10.026644, 30.020013342228154, 61.1466
585 | all-videos/R001.mp4, 10.026644, 30.020013342228154, 72.4848
586 | 


--------------------------------------------------------------------------------