├── .idea
├── .gitignore
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── lsct_phiqnet.iml
├── misc.xml
├── modules.xml
└── vcs.xml
├── LICENSE
├── README.md
├── requirements.txt
└── src
├── brisque
├── __init__.py
├── frame_features_video_folders_brisque.py
└── test_features.py
├── callbacks
├── __init__.py
├── callbacks.py
├── csv_callback.py
├── evaluation_callback_generator.py
├── evaluation_vq_generator.py
└── warmup_cosine_decay_scheduler.py
├── cnn_lstm
├── __init__.py
├── attention.py
├── attention_with_context.py
├── generate_random_split.py
├── train_cnn_lstm_params_search.py
├── train_cnn_lstm_params_search_konvid.py
├── train_cnn_lstm_params_search_konvid_oldfeatures.py
├── train_cnn_lstm_params_search_vsfa_features_no_flip.py
├── train_cnn_lstm_params_search_vsfa_features_with_flip.py
└── train_cnn_lstm_params_search_vsfa_features_with_flip_attention.py
├── examples
├── __init__.py
├── frame_features_video.py
├── image_quality_prediction.py
├── sample_data
│ ├── example_image_1 (mos=2.9).jpg
│ ├── example_image_2 (mos=2.865).jpg
│ └── example_video (mos=3.24).mp4
└── video_quality_prediction.py
├── ffmpeg
└── video_handler.py
├── lsct
├── README.md
├── __init__.py
├── ablations
│ ├── __init__.py
│ ├── cnn_lstm_phiqnet.py
│ ├── frame_features_video_folders_resnet50.py
│ ├── frame_features_video_folders_resnet50_1.py
│ ├── train_lsct_clip_length_search.py
│ ├── train_lsct_resnet50.py
│ └── transformer_phiqnet.py
├── bin
│ ├── __init__.py
│ ├── train_lsct_all_databases.py
│ ├── train_lsct_all_databases_10runs.py
│ ├── train_lsct_all_databases_triq_features_10runs.py
│ ├── train_lsct_params_search.py
│ ├── train_lsct_params_search_1.py
│ ├── train_lsct_single_databases.py
│ └── train_lsct_test_on_live.py
├── meta_data
│ ├── all_video_mos.csv
│ ├── all_video_mos_Resnet50.csv
│ ├── all_video_mos_Resnet50_vsfa.csv
│ ├── all_video_mos_triq.csv
│ ├── all_vids.pkl
│ ├── ugc_chunks.pkl
│ └── ugc_mos_original.xlsx
├── models
│ ├── __init__.py
│ ├── cnn_1d.py
│ ├── cnn_lstm_model.py
│ ├── lsct_phiqnet_model.py
│ └── video_quality_transformer.py
├── train
│ ├── __init__.py
│ ├── train.py
│ ├── train_cnn_lstm.py
│ ├── video_clip_feature_generator.py
│ ├── video_clip_feature_generator_vsfa.py
│ └── video_clip_resnet_feature_generator.py
└── utils
│ ├── __init__.py
│ ├── frame_features_video_folders.py
│ ├── frame_features_video_folders_Resnet50.py
│ ├── gather_video_ids.py
│ └── ugc_chunk_generator.py
├── model_weights
├── LSCT.h5
└── README.md
└── phiqnet
├── README.md
├── __init__.py
├── backbone
├── ResNest.py
├── __init__.py
├── _common_blocks.py
├── resnet50.py
├── resnet_config.py
├── resnet_family.py
├── resnet_feature_maps.py
├── resnext50.py
├── vgg16.py
└── weights.py
├── bin
├── __init__.py
├── train_resnet152_distribution_fpn_attention_imageaug.py
├── train_resnet152_distribution_fpn_attention_imageaug_finetune.py
├── train_resnet152v2_distribution_fpn_attention_imageaug.py
├── train_resnet50_distribution_NOfpn_attention_imageaug.py
├── train_resnet50_distribution_fpn_NOattention_imageaug.py
├── train_resnet50_distribution_fpn_attention_NOimageaug.py
├── train_resnet50_distribution_fpn_attention_imageaug.py
├── train_resnet50_distribution_fpn_attention_imageaug_NOpretrain.py
├── train_resnet50_distribution_fpn_attention_imageaug_finetune.py
├── train_resnet50_distribution_fpn_attention_imageaug_freezebackbone.py
├── train_resnet50_distribution_fpn_attention_imageaug_koniq10k.py
├── train_resnet50_distribution_fpn_attention_imageaug_koniq10k_finetune.py
├── train_resnet50_distribution_fpn_attention_imageaug_koniq_small.py
├── train_resnet50_distribution_fpn_attention_imageaug_koniq_small_finetune.py
├── train_resnet50_distribution_fpn_attention_imageaug_koniqall.py
├── train_resnet50_distribution_fpn_attention_imageaug_koniqall_finetune.py
├── train_resnet50_distribution_fpn_attention_imageaug_test.py
├── train_resnet50_mos_fpn_attention_imageaug.py
├── train_resnet50_mos_fpn_attention_imageaug_finetune.py
├── train_resnet50_mos_fpn_attention_imageaug_koniq_small.py
├── train_resnet50_mos_fpn_attention_imageaug_koniq_small_finetune.py
├── train_resnet50_mos_fpn_attention_imageaug_koniqall.py
├── train_resnet50_mos_fpn_attention_imageaug_koniqall_finetune.py
└── train_vgg16_distribution_fpn_attention_imageaug.py
├── databases
├── README.md
├── koniq10k_images_scores.csv
├── live_mos.csv
├── random_split_imageset.py
├── test_images_koniq.csv
├── test_images_live.csv
├── train_images_koniq.csv
└── train_images_live.csv
├── layers
├── __init__.py
├── bi_fpn.py
├── fpn.py
├── pan.py
└── upsample.py
├── loss
├── __init__.py
└── distribution_loss.py
├── model_evaluation
├── __init__.py
├── evaluation.py
├── validation.py
└── validation_spag.py
├── models
├── __init__.py
├── image_quality_model.py
├── model_analysis.py
└── prediction_model_contrast_sensitivity.py
├── pretrained_weights
└── README.md
├── train
├── __init__.py
├── group_generator.py
├── plot_train.py
└── train.py
└── utils
├── __init__.py
└── imageset_handler.py
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/lsct_phiqnet.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # LSCT-PHIQNet Implementation
2 |
3 | TF-Keras implementation of LSCT-PHIQNet as described in [Long Short-term Convolutional Transformer for No-Reference Video Quality Assessment](https://dl.acm.org/doi/abs/10.1145/3474085.3475368).
4 |
5 | There are two main modules:
6 |
7 | phiqnet is the implementation of PHIQNet for NR-IQA;
8 |
9 | lsct is the implementation of LSCT for NR-VQA based on PHIQNet features.
10 |
11 | Please see respective README in individual modules.
12 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu~=2.2.0
2 | numpy~=1.17.0
3 | six~=1.12.0
4 | scipy~=1.4.1
5 | pillow~=6.1.0
6 | matplotlib~=3.1.0
7 | sklearn
8 | scikit-learn~=0.21.3
9 | opencv-python~=4.1.1.26
10 | h5py~=2.10.0
11 | pyyaml~=5.3
12 | imgaug~=0.4.0
13 | pandas~=1.0.2
14 | future~=0.18.2
15 | munch~=2.5.0
16 | scikit-image~=0.15.0
17 | tensorflow_addons
--------------------------------------------------------------------------------
/src/brisque/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/brisque/__init__.py
--------------------------------------------------------------------------------
/src/brisque/test_features.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | f1 = np.load(r'C:\vq_datasets\BRISQUE_frame_features\live_vqc\Video\A001.npy')
4 | f2 = np.load(r'C:\vq_datasets\BRISQUE_frame_features_flipped\live_vqc\Video\A001.npy')
5 | t = 0
--------------------------------------------------------------------------------
/src/callbacks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/callbacks/__init__.py
--------------------------------------------------------------------------------
/src/callbacks/callbacks.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
3 | from callbacks.csv_callback import MyCSVLogger
4 |
5 |
6 | def create_callbacks(model_name, result_folder, other_callback=None, checkpoint=True, early_stop=True, metrics='accuracy'):
7 | """Creates callbacks for model training
8 |
9 | :param model_name: model name
10 | :param result_folder: folder to write to
11 | :param other_callback: other evaluation callbacks
12 | :param checkpoint: flag to use checkpoint or not
13 | :param early_stop: flag to use early_stop or not
14 | :param metrics: evaluation metrics for writing to checkpoint file
15 | :return: callbacks
16 | """
17 |
18 | callbacks = []
19 | if other_callback is not None:
20 | callbacks.append(other_callback)
21 | csv_log_file = os.path.join(result_folder, model_name + '.log')
22 | csv_logger = MyCSVLogger(csv_log_file, append=True, separator=';')
23 | callbacks.append(csv_logger)
24 | if early_stop:
25 | callbacks.append(EarlyStopping(monitor='plcc', min_delta=0.001, patience=40, mode='max'))
26 | if checkpoint:
27 | if metrics == None:
28 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{val_loss:.4f}.h5')
29 | else:
30 | if metrics == 'accuracy':
31 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{accuracy:.4f}_{val_loss:.4f}_{val_accuracy:.4f}.h5')
32 | elif metrics == 'mae':
33 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{mae:.4f}_{val_loss:.4f}_{val_mae:.4f}.h5')
34 | elif metrics == 'categorical_crossentropy':
35 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{categorical_crossentropy:.4f}_{val_loss:.4f}_{val_categorical_crossentropy:.4f}.h5')
36 | elif metrics == 'distribution_loss':
37 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{distribution_loss:.4f}_{val_loss:.4f}_{val_distribution_loss:.4f}.h5')
38 | else:
39 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{val_loss:.4f}.h5')
40 | mcp = ModelCheckpoint(mcp_file, save_best_only=True, save_weights_only=True, monitor='plcc', verbose=1, mode='max')
41 | callbacks.append(mcp)
42 |
43 | # tensorboard_callback = TensorBoard(log_dir=result_folder, histogram_freq=1)
44 | # callbacks.append(tensorboard_callback)
45 |
46 | return callbacks
47 |
--------------------------------------------------------------------------------
/src/callbacks/csv_callback.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import csv
3 |
4 | import numpy as np
5 | import six
6 |
7 | from tensorflow.python.util.compat import collections_abc
8 | from tensorflow.keras.callbacks import CSVLogger
9 |
10 |
11 | class MyCSVLogger(CSVLogger):
12 | """
13 | This is basically a copy of CSVLogger, the only change is that 4 decimal precision is used in loggers.
14 | """
15 | def __init__(self, filename, separator=',', append=False):
16 | super(MyCSVLogger, self).__init__(filename, separator, append)
17 |
18 | def on_epoch_end(self, epoch, logs=None):
19 | logs = logs or {}
20 |
21 | def handle_value(k):
22 | is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0
23 | if isinstance(k, six.string_types):
24 | return k
25 | elif isinstance(k, collections_abc.Iterable) and not is_zero_dim_ndarray:
26 | return '"[%s]"' % (', '.join(map(str, k)))
27 | else:
28 | return '{:.4f}'.format(k)
29 |
30 | if self.keys is None:
31 | self.keys = sorted(logs.keys())
32 |
33 | if self.model.stop_training:
34 | # We set NA so that csv parsers do not fail for this last epoch.
35 | logs = dict([(k, logs[k]) if k in logs else (k, 'NA') for k in self.keys])
36 |
37 | if not self.writer:
38 | class CustomDialect(csv.excel):
39 | delimiter = self.sep
40 |
41 | fieldnames = ['epoch'] + self.keys
42 |
43 | self.writer = csv.DictWriter(
44 | self.csv_file,
45 | fieldnames=fieldnames,
46 | dialect=CustomDialect)
47 | if self.append_header:
48 | self.writer.writeheader()
49 |
50 | row_dict = collections.OrderedDict({'epoch': epoch})
51 | row_dict.update((key, handle_value(logs[key])) for key in self.keys)
52 | self.writer.writerow(row_dict)
53 | self.csv_file.flush()
54 |
--------------------------------------------------------------------------------
/src/callbacks/evaluation_callback_generator.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.callbacks import Callback
2 | import numpy as np
3 | import scipy.stats
4 |
5 |
6 | class ModelEvaluationIQGenerator(Callback):
7 | """
8 | Evaluation for IQA, the main function is to calculate PLCC, SROCC, RMSE and MAD after each train epoch.
9 | """
10 | def __init__(self, val_generator, using_single_mos, evaluation_generator=None, imagenet_pretrain=False):
11 | super(ModelEvaluationIQGenerator, self).__init__()
12 | self.val_generator = val_generator
13 | self.evaluation_generator = evaluation_generator
14 | self.using_single_mos = using_single_mos
15 | self.imagenet_pretrain = imagenet_pretrain
16 | self.mos_scales = np.array([1, 2, 3, 4, 5])
17 |
18 | def __get_prediction_mos(self, image):
19 | prediction = self.model.predict(np.expand_dims(image, axis=0))
20 | return prediction[0][0]
21 |
22 | def __get_prediction_distribution(self, image):
23 | prediction = self.model.predict(np.expand_dims(image, axis=0))
24 | prediction = np.sum(np.multiply(self.mos_scales, prediction[0]))
25 | return prediction
26 |
27 | def __evaluation__(self, iq_generator):
28 | predictions = []
29 | mos_scores = []
30 |
31 | for j in range(iq_generator.__len__()):
32 | images, scores_batch = iq_generator.__getitem__(j)
33 | # mos_scores.extend(scores)
34 | if self.imagenet_pretrain:
35 | # ImageNnet normalization
36 | images /= 127.5
37 | images -= 1.
38 |
39 | prediction_batch = self.model.predict(images)
40 | prediction = []
41 | scores = []
42 | for i in range(prediction_batch.shape[0]):
43 | prediction.append(np.sum(np.multiply(self.mos_scales, prediction_batch[i,:])))
44 | scores.append(np.sum(np.multiply(self.mos_scales, scores_batch[i, :])))
45 | predictions.extend(prediction)
46 | mos_scores.extend(scores)
47 |
48 | PLCC = scipy.stats.pearsonr(mos_scores, predictions)[0]
49 | SROCC = scipy.stats.spearmanr(mos_scores, predictions)[0]
50 | RMSE = np.sqrt(np.mean(np.subtract(predictions, mos_scores) ** 2))
51 | MAD = np.mean(np.abs(np.subtract(predictions, mos_scores)))
52 | print('\nPLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(PLCC, SROCC, RMSE, MAD))
53 | return PLCC, SROCC, RMSE, MAD
54 |
55 | def on_epoch_end(self, epoch, logs=None):
56 | plcc, srcc, rmse, mad = self.__evaluation__(self.val_generator)
57 |
58 | logs['plcc'] = plcc
59 | logs['srcc'] = srcc
60 | logs['rmse'] = rmse
61 |
62 | if self.evaluation_generator:
63 | if epoch % 10 == 0:
64 | plcc_10th, srcc_10th, rmse_10th, mad_10th = self.__evaluation__(self.evaluation_generator)
65 | print('\nEpoch {}: PLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(epoch, plcc_10th, srcc_10th, rmse_10th, mad_10th))
66 |
67 |
--------------------------------------------------------------------------------
/src/callbacks/evaluation_vq_generator.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.callbacks import Callback
2 | import numpy as np
3 | import scipy.stats
4 |
5 |
6 | class ModelEvaluationGeneratorVQ(Callback):
7 | """
8 | Evaluation for VQA, the main function is to calculate PLCC, SROCC, RMSE and MAD after each train epoch.
9 | """
10 | def __init__(self, val_generator, evaluation_generator=None):
11 | super(ModelEvaluationGeneratorVQ, self).__init__()
12 | self.val_generator = val_generator
13 | self.evaluation_generator = evaluation_generator
14 |
15 | def __evaluation__(self, vq_generator):
16 | predictions = []
17 | mos_scores = []
18 |
19 | for i in range(vq_generator.__len__()):
20 | features, score = vq_generator.__getitem__(i)
21 | mos_scores.extend(score)
22 | prediction = self.model.predict(features)
23 | predictions.extend(np.squeeze(prediction, 1))
24 |
25 | PLCC = scipy.stats.pearsonr(mos_scores, predictions)[0]
26 | SROCC = scipy.stats.spearmanr(mos_scores, predictions)[0]
27 | RMSE = np.sqrt(np.mean(np.subtract(predictions, mos_scores) ** 2))
28 | MAD = np.mean(np.abs(np.subtract(predictions, mos_scores)))
29 | return PLCC, SROCC, RMSE, MAD
30 |
31 | def on_epoch_end(self, epoch, logs=None):
32 | plcc, srcc, rmse, mad = self.__evaluation__(self.val_generator)
33 | print('\nPLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(plcc, srcc, rmse, mad))
34 |
35 | logs['plcc'] = plcc
36 | logs['srcc'] = srcc
37 | logs['rmse'] = rmse
38 |
39 | if self.evaluation_generator:
40 | if epoch % 10 == 0:
41 | plcc_10th, srcc_10th, rmse_10th, mad_10th = self.__evaluation__(self.evaluation_generator)
42 | print('\nEpoch {}: PLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(epoch, plcc_10th, srcc_10th, rmse_10th, mad_10th))
43 |
--------------------------------------------------------------------------------
/src/callbacks/warmup_cosine_decay_scheduler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from tensorflow import keras
3 | from tensorflow.keras import backend as K
4 |
5 |
6 | def cosine_decay_with_warmup(global_step,
7 | learning_rate_base,
8 | total_steps,
9 | warmup_learning_rate=0.0,
10 | warmup_steps=0,
11 | hold_base_rate_steps=0):
12 | """Cosine decay schedule with warm up period.
13 |
14 | Cosine annealing learning rate as described in:
15 | Loshchilov and Hutter, SGDR: Stochastic Gradient Descent with Warm Restarts.
16 | ICLR 2017. https://arxiv.org/abs/1608.03983
17 | In this schedule, the learning rate grows linearly from warmup_learning_rate
18 | to learning_rate_base for warmup_steps, then transitions to a cosine decay
19 | schedule.
20 |
21 | Arguments:
22 | global_step {int} -- global step.
23 | learning_rate_base {float} -- base learning rate.
24 | total_steps {int} -- total number of training steps.
25 |
26 | Keyword Arguments:
27 | warmup_learning_rate {float} -- initial learning rate for warm up. (default: {0.0})
28 | warmup_steps {int} -- number of warmup steps. (default: {0})
29 | hold_base_rate_steps {int} -- Optional number of steps to hold base learning rate
30 | before decaying. (default: {0})
31 | Returns:
32 | a float representing learning rate.
33 |
34 | Raises:
35 | ValueError: if warmup_learning_rate is larger than learning_rate_base,
36 | or if warmup_steps is larger than total_steps.
37 | """
38 |
39 | if total_steps < warmup_steps:
40 | raise ValueError('total_steps must be larger or equal to '
41 | 'warmup_steps.')
42 | learning_rate = 0.5 * learning_rate_base * (1 + np.cos(
43 | np.pi *
44 | (global_step - warmup_steps - hold_base_rate_steps
45 | ) / float(total_steps - warmup_steps - hold_base_rate_steps)))
46 | if hold_base_rate_steps > 0:
47 | learning_rate = np.where(global_step > warmup_steps + hold_base_rate_steps,
48 | learning_rate, learning_rate_base)
49 | if warmup_steps > 0:
50 | if learning_rate_base < warmup_learning_rate:
51 | raise ValueError('learning_rate_base must be larger or equal to '
52 | 'warmup_learning_rate.')
53 | slope = (learning_rate_base - warmup_learning_rate) / warmup_steps
54 | warmup_rate = slope * global_step + warmup_learning_rate
55 | learning_rate = np.where(global_step < warmup_steps, warmup_rate,
56 | learning_rate)
57 | return np.where(global_step > total_steps, 0.0, learning_rate)
58 |
59 |
60 | class WarmUpCosineDecayScheduler(keras.callbacks.Callback):
61 | """Cosine decay with warmup learning rate scheduler
62 | """
63 |
64 | def __init__(self,
65 | learning_rate_base,
66 | total_steps,
67 | global_step_init=0,
68 | warmup_learning_rate=0.0,
69 | warmup_steps=0,
70 | hold_base_rate_steps=80,
71 | verbose=1):
72 | """Constructor for cosine decay with warmup learning rate scheduler.
73 |
74 | Arguments:
75 | learning_rate_base {float} -- base learning rate.
76 | total_steps {int} -- total number of training steps.
77 |
78 | Keyword Arguments:
79 | global_step_init {int} -- initial global step, e.g. from previous checkpoint.
80 | warmup_learning_rate {float} -- initial learning rate for warm up. (default: {0.0})
81 | warmup_steps {int} -- number of warmup steps. (default: {0})
82 | hold_base_rate_steps {int} -- Optional number of steps to hold base learning rate
83 | before decaying. (default: {0})
84 | verbose {int} -- 0: quiet, 1: update messages. (default: {0})
85 | """
86 |
87 | super(WarmUpCosineDecayScheduler, self).__init__()
88 | self.learning_rate_base = learning_rate_base
89 | self.total_steps = total_steps
90 | self.global_step = global_step_init
91 | self.warmup_learning_rate = warmup_learning_rate
92 | self.warmup_steps = warmup_steps
93 | self.hold_base_rate_steps = hold_base_rate_steps
94 | self.verbose = verbose
95 | self.learning_rates = []
96 |
97 | def on_epoch_end(self, epoch, logs=None):
98 | lr = K.get_value(self.model.optimizer.lr)
99 | if self.verbose > 0:
100 | print('\nEpoch %05d: setting learning rate to %s.' % (epoch + 1, lr))
101 |
102 | def on_batch_end(self, batch, logs=None):
103 | self.global_step = self.global_step + 1
104 | lr = K.get_value(self.model.optimizer.lr)
105 | self.learning_rates.append(lr)
106 | # if self.verbose > 0:
107 | # print('\nBatch %05d: setting learning '
108 | # 'rate to %s.' % (self.global_step + 1, lr))
109 |
110 | def on_batch_begin(self, batch, logs=None):
111 | lr = cosine_decay_with_warmup(global_step=self.global_step,
112 | learning_rate_base=self.learning_rate_base,
113 | total_steps=self.total_steps,
114 | warmup_learning_rate=self.warmup_learning_rate,
115 | warmup_steps=self.warmup_steps,
116 | hold_base_rate_steps=self.hold_base_rate_steps)
117 | K.set_value(self.model.optimizer.lr, lr)
118 | # if self.verbose > 0:
119 | # print('\nBatch %05d: setting learning '
120 | # 'rate to %s.' % (self.global_step + 1, lr))
121 |
122 |
123 | # # Create a model.
124 | # model = Sequential()
125 | # model.add(Dense(32, activation='relu', input_dim=100))
126 | # model.add(Dense(10, activation='softmax'))
127 | # model.compile(optimizer='rmsprop',
128 | # loss='categorical_crossentropy',
129 | # metrics=['accuracy'])
130 | #
131 | # # Number of training samples.
132 | # sample_count = 12
133 | #
134 | # # Total epochs to train.
135 | # epochs = 100
136 | #
137 | # # Number of warmup epochs.
138 | # warmup_epoch = 10
139 | #
140 | # # Training batch size, set small value here for demonstration purpose.
141 | # batch_size = 4
142 | #
143 | # # Base learning rate after warmup.
144 | # learning_rate_base = 0.001
145 | #
146 | # total_steps = int(epochs * sample_count / batch_size)
147 | #
148 | # # Compute the number of warmup batches.
149 | # warmup_steps = int(warmup_epoch * sample_count / batch_size)
150 | #
151 | # # Generate dummy data.
152 | # data = np.random.random((sample_count, 100))
153 | # labels = np.random.randint(10, size=(sample_count, 1))
154 | #
155 | # # Convert labels to categorical one-hot encoding.
156 | # one_hot_labels = keras.utils.to_categorical(labels, num_classes=10)
157 | #
158 | # # Compute the number of warmup batches.
159 | # warmup_batches = warmup_epoch * sample_count / batch_size
160 | #
161 | # # Create the Learning rate scheduler.
162 | # warm_up_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base,
163 | # total_steps=total_steps,
164 | # warmup_learning_rate=0.0,
165 | # warmup_steps=warmup_steps,
166 | # hold_base_rate_steps=0)
167 | #
168 | # # Train the model, iterating on the data in batches of 32 samples
169 | # model.fit(data, one_hot_labels, epochs=epochs, batch_size=batch_size,
170 | # verbose=0, callbacks=[warm_up_lr])
171 | #
172 | # import matplotlib.pyplot as plt
173 | # plt.plot(warm_up_lr.learning_rates)
174 | # plt.xlabel('Step', fontsize=20)
175 | # plt.ylabel('lr', fontsize=20)
176 | # plt.axis([0, total_steps, 0, learning_rate_base*1.1])
177 | # plt.xticks(np.arange(0, total_steps, 50))
178 | # plt.grid()
179 | # plt.title('Cosine decay with warmup', fontsize=20)
180 | # plt.show()
181 |
--------------------------------------------------------------------------------
/src/cnn_lstm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/cnn_lstm/__init__.py
--------------------------------------------------------------------------------
/src/cnn_lstm/attention.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.layers import Dense, Lambda, dot, Activation, concatenate
2 | from tensorflow.keras.layers import Layer
3 | import tensorflow.keras.backend as K
4 |
5 |
6 | # class Attention(Layer):
7 | #
8 | # def __init__(self, **kwargs):
9 | # super().__init__(**kwargs)
10 | #
11 | # def __call__(self, hidden_states):
12 | # """
13 | # Many-to-one attention mechanism for Keras.
14 | # @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim).
15 | # @return: 2D tensor with shape (batch_size, 128)
16 | # @author: felixhao28.
17 | # """
18 | # hidden_size = int(hidden_states.shape[2])
19 | # # Inside dense layer
20 | # # hidden_states dot W => score_first_part
21 | # # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size)
22 | # # W is the trainable weight matrix of attention Luong's multiplicative style score
23 | # score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)
24 | # # score_first_part dot last_hidden_state => attention_weights
25 | # # (batch_size, time_steps, hidden_size) dot (batch_size, hidden_size) => (batch_size, time_steps)
26 | # h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states)
27 | # score = dot([score_first_part, h_t], [2, 1], name='attention_score')
28 | # attention_weights = Activation('softmax', name='attention_weight')(score)
29 | # # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size)
30 | # context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector')
31 | # return context_vector
32 | # # out = K.sum(context_vector, axis=1)
33 | # # pre_activation = concatenate([context_vector, h_t], name='attention_output')
34 | # # attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
35 | # # return attention_vector
36 |
37 |
38 | class Attention(Layer):
39 |
40 | def __init__(self, return_sequences=True):
41 | self.return_sequences = return_sequences
42 | super(Attention, self).__init__()
43 |
44 | def build(self, input_shape):
45 | assert len(input_shape) == 3
46 | input_shape_list = input_shape.as_list()
47 | self.W = self.add_weight(name="att_weight", shape=(input_shape_list[-1], 1),
48 | initializer="normal")
49 | self.b = self.add_weight(name="att_bias", shape=(input_shape_list[-1], 1),
50 | initializer="zeros")
51 |
52 | super(Attention, self).build(input_shape)
53 |
54 | def call(self, x):
55 | e = K.tanh(K.dot(x, self.W) + self.b)
56 | a = K.softmax(e, axis=1)
57 | output = x * a
58 |
59 | if self.return_sequences:
60 | return output
61 |
62 | return K.sum(output, axis=1)
63 |
--------------------------------------------------------------------------------
/src/cnn_lstm/attention_with_context.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras import initializers
3 | from tensorflow.keras import regularizers
4 | from tensorflow.keras import constraints
5 |
6 | from tensorflow.keras import activations
7 | from tensorflow.keras import backend as K
8 |
9 | from tensorflow.keras.layers import Layer, Embedding
10 |
11 |
12 | class Attention(Layer):
13 | """
14 | Attention operation, with a context/query vector, for temporal data.
15 | Supports Masking.
16 | Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]
17 | "Hierarchical Attention Networks for Document Classification"
18 | by using a context vector to assist the attention
19 | # Input shape
20 | 3D tensor with shape: `(samples, steps, features)`.
21 | # Output shape
22 | 2D tensor with shape: `(samples, features)`.
23 | :param kwargs:
24 | Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
25 | The dimensions are inferred based on the output shape of the RNN.
26 | Example:
27 | model.add(LSTM(64, return_sequences=True))
28 | model.add(AttentionWithContext())
29 | """
30 |
31 | def __init__(self,
32 | W_regularizer=None, u_regularizer=None, b_regularizer=None,
33 | W_constraint=None, u_constraint=None, b_constraint=None,
34 | bias=True,
35 | return_attention=False, **kwargs):
36 |
37 | self.supports_masking = True
38 | self.return_attention = return_attention
39 | self.init = initializers.get('glorot_uniform')
40 |
41 | self.W_regularizer = regularizers.get(W_regularizer)
42 | # self.u_regularizer = regularizers.get(u_regularizer)
43 | self.b_regularizer = regularizers.get(b_regularizer)
44 |
45 | self.W_constraint = constraints.get(W_constraint)
46 | # self.u_constraint = constraints.get(u_constraint)
47 | self.b_constraint = constraints.get(b_constraint)
48 |
49 | self.bias = bias
50 | super(Attention, self).__init__(**kwargs)
51 |
52 | def build(self, input_shape):
53 | assert len(input_shape) == 3
54 | input_shape_list = input_shape.as_list()
55 |
56 | self.W = self.add_weight(shape=((input_shape_list[-1], input_shape_list[-1])),
57 | initializer=self.init,
58 | name='{}_W'.format(self.name),
59 | regularizer=self.W_regularizer,
60 | constraint=self.W_constraint)
61 | if self.bias:
62 | self.b = self.add_weight(shape=(input_shape_list[-1],),
63 | initializer='zero',
64 | name='{}_b'.format(self.name),
65 | regularizer=self.b_regularizer,
66 | constraint=self.b_constraint)
67 |
68 | # self.u = self.add_weight(shape=(input_shape_list[-1],),
69 | # initializer=self.init,
70 | # name='{}_u'.format(self.name),
71 | # regularizer=self.u_regularizer,
72 | # constraint=self.u_constraint)
73 |
74 | super(Attention, self).build(input_shape.as_list())
75 |
76 | def compute_mask(self, input, input_mask=None):
77 | # do not pass the mask to the next layers
78 | return None
79 |
80 | def call(self, x, mask=None):
81 | uit = tf.tensordot(x, self.W, axes=1)
82 |
83 | if self.bias:
84 | uit += self.b
85 |
86 | uit = activations.tanh(uit)
87 |
88 | a = activations.softmax(uit, axis=1)
89 | output = x * a
90 | result = K.sum(output, axis=1)
91 |
92 | return result
93 |
94 | # ait = tf.tensordot(uit, self.u, axes=1)
95 | #
96 | # a = activations.exponential(ait)
97 | #
98 | # # apply mask after the exp. will be re-normalized next
99 | # if mask is not None:
100 | # # Cast the mask to floatX to avoid float64 upcasting in theano
101 | # a *= tf.cast(mask, K.floatx())
102 | #
103 | # # in some cases especially in the early stages of training the sum may be almost zero
104 | # # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
105 | # # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())
106 | # a /= tf.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
107 | #
108 | # a = K.expand_dims(a)
109 | # weighted_input = x * a
110 | # result = K.sum(weighted_input, axis=1)
111 | #
112 | # if self.return_attention:
113 | # return [result, a]
114 | # return result
115 |
116 | def compute_output_shape(self, input_shape):
117 | if self.return_attention:
118 | #TODO use TensorShape here, as done in the else statement. I'm not sure
119 | # if this is returning a single tensor, or a list of two so leaving this undone for now. Suspect this will
120 | # need to complete if using Sequential rather than Functional API
121 | return [(input_shape[0], input_shape[-1]),
122 | (input_shape[0], input_shape[1])]
123 | else:
124 | return tf.TensorShape([input_shape[0].value, input_shape[-1].value])
125 |
126 |
--------------------------------------------------------------------------------
/src/cnn_lstm/generate_random_split.py:
--------------------------------------------------------------------------------
1 | from lsct.utils.gather_video_ids import gather_all_vids
2 | from pickle import load, dump
3 |
4 |
5 | vids = r'C:\lsct_phiqnet\src\lsct\meta_data\all_vids.pkl'
6 | for i in range(10):
7 | train_vids, test_vids = gather_all_vids(all_vids_pkl=vids)
8 | dump([train_vids, test_vids], open(r'C:\vq_datasets\random_splits\split_{}.pkl'.format(i), 'wb'))
--------------------------------------------------------------------------------
/src/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/__init__.py
--------------------------------------------------------------------------------
/src/examples/frame_features_video.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import tensorflow as tf
4 |
5 | from lsct.utils.frame_features_video_folders import CalculateFrameQualityFeatures
6 | from lsct.ablations.frame_features_video_folders_resnet50 import CalculateFrameQualityFeaturesResnet50
7 |
8 | FFMPEG = r'..\\ffmpeg\ffmpeg.exe'
9 | FFPROBE = r'..\\ffmpeg\ffprobe.exe'
10 |
11 |
12 | """
13 | This script shows how to calculate PHIQNet features on all video frames, FFMPEG and FFProbe are required
14 | """
15 | def video_frame_features_PHIQNet(phinqnet_weights_path, video_path, reture_clip_features=False):
16 | frame_features_extractor = CalculateFrameQualityFeatures(phinqnet_weights_path, FFPROBE, FFMPEG)
17 | features = frame_features_extractor.__ffmpeg_frames_features__(video_path, flip=False)
18 | features = np.squeeze(np.array(features), axis=2)
19 | features = np.reshape(features, (features.shape[0], features.shape[1] * features.shape[2]))
20 |
21 | if reture_clip_features:
22 | clip_features = []
23 | clip_length = 16
24 | for j in range(features.shape[0] // clip_length):
25 | clip_features.append(features[j * clip_length: (j + 1) * clip_length, :])
26 | clip_features = np.array(clip_features)
27 | return clip_features
28 |
29 | return np.array(features)
30 |
31 |
32 | def video_frame_features_ResNet50(resnet50_weights_path, video_path, reture_clip_features=False):
33 | frame_features_extractor = CalculateFrameQualityFeaturesResnet50(resnet50_weights_path, FFPROBE, FFMPEG)
34 | features = frame_features_extractor.__ffmpeg_frames_features__(video_path, flip=False)
35 | features = np.squeeze(np.array(features), axis=1)
36 |
37 | if reture_clip_features:
38 | clip_features = []
39 | clip_length = 16
40 | for j in range(features.shape[0] // clip_length):
41 | clip_features.append(features[j * clip_length: (j + 1) * clip_length, :])
42 | clip_features = np.array(clip_features)
43 | return clip_features
44 |
45 | return np.array(features, np.float16)
46 |
47 |
48 | def video_frame_features_ResNet50_folder(resnet50_weights_path, video_folder, target_folder):
49 | frame_features_extractor = CalculateFrameQualityFeaturesResnet50(resnet50_weights_path, FFPROBE, FFMPEG)
50 |
51 | video_types = ('.mp4', '.mpg')
52 | video_paths = [f for f in os.listdir(video_folder) if f.endswith(video_types)]
53 | video_paths = video_paths[:70000]
54 | numb_videos = len(video_paths)
55 |
56 | for i, video_path in enumerate(video_paths):
57 | ext = os.path.splitext(video_path)
58 | np_file = os.path.join(target_folder, '{}.npy'.format(ext[0]))
59 | if not os.path.exists(np_file):
60 | features = frame_features_extractor.__ffmpeg_frames_features__(os.path.join(video_folder, video_path), flip=False)
61 | features = np.squeeze(np.array(features), axis=1)
62 | features = np.array(features, dtype=np.float16)
63 | np.save(np_file, features)
64 | print('{} out of {}, {} done'.format(i, numb_videos, video_path))
65 | else:
66 | print('{} out of {}, {} already exists'.format(i, numb_videos, video_path))
67 |
68 |
69 | if __name__ == '__main__':
70 | gpus = tf.config.experimental.list_physical_devices('GPU')
71 | tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
72 |
73 | # phiqnet_weights_path = r'..\\model_weights\PHIQNet.h5'
74 | # video_path = r'.\\sample_data\example_video (mos=3.24).mp4'
75 | video_folder = r'K:\Faglitteratur\VQA\k150ka'
76 | # features = video_frame_features_PHIQNet(phiqnet_weights_path, video_path)
77 |
78 | # Use None that ResNet50 will download ImageNet Pretrained weights or specify the weight path
79 | resnet50_imagenet_weights = r'C:\pretrained_weights_files\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
80 | # features_resnet50 = video_frame_features_ResNet50(resnet50_imagenet_weights, video_path)
81 |
82 | target_folder = r'F:\k150k_features'
83 | video_frame_features_ResNet50_folder(resnet50_imagenet_weights, video_folder, target_folder)
84 | t = 0
--------------------------------------------------------------------------------
/src/examples/image_quality_prediction.py:
--------------------------------------------------------------------------------
1 | from phiqnet.models.image_quality_model import phiq_net
2 | import numpy as np
3 | from PIL import Image
4 |
5 |
6 | def predict_image_quality(model_weights_path, image_path):
7 | image = Image.open(image_path)
8 | image = np.asarray(image, dtype=np.float32)
9 | image /= 127.5
10 | image -= 1.
11 |
12 | model = phiq_net(n_quality_levels=5)
13 | model.load_weights(model_weights_path)
14 |
15 | prediction = model.predict(np.expand_dims(image, axis=0))
16 |
17 | mos_scales = np.array([1, 2, 3, 4, 5])
18 | predicted_mos = (np.sum(np.multiply(mos_scales, prediction[0])))
19 | return predicted_mos
20 |
21 |
22 | if __name__ == '__main__':
23 | image_path = r'.\\sample_data\example_image_1 (mos=2.9).jpg'
24 | # image_path = r'.\\sample_data\example_image_2 (mos=2.865).jpg'
25 | model_weights_path = r'..\\model_weights\PHIQNet.h5'
26 | predict_mos = predict_image_quality(model_weights_path, image_path)
27 | print('Predicted MOS: {}'.format(predict_mos))
--------------------------------------------------------------------------------
/src/examples/sample_data/example_image_1 (mos=2.9).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/sample_data/example_image_1 (mos=2.9).jpg
--------------------------------------------------------------------------------
/src/examples/sample_data/example_image_2 (mos=2.865).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/sample_data/example_image_2 (mos=2.865).jpg
--------------------------------------------------------------------------------
/src/examples/sample_data/example_video (mos=3.24).mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/sample_data/example_video (mos=3.24).mp4
--------------------------------------------------------------------------------
/src/examples/video_quality_prediction.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from lsct.utils.frame_features_video_folders import CalculateFrameQualityFeatures
4 | from lsct.models.lsct_phiqnet_model import create_model
5 |
6 | FFMPEG = r'..\\ffmpeg\ffmpeg.exe'
7 | FFPROBE = r'..\\ffmpeg\ffprobe.exe'
8 |
9 |
10 | def predict_video_quality(phinqnet_weights_path, lsct_weights_path, video_path):
11 | frame_features_extractor = CalculateFrameQualityFeatures(phinqnet_weights_path, FFPROBE, FFMPEG)
12 | features = frame_features_extractor.__ffmpeg_frames_features__(video_path, flip=False)
13 | features = np.squeeze(np.array(features), axis=2)
14 | features = np.reshape(features, (features.shape[0], features.shape[1] * features.shape[2]))
15 |
16 | clip_features = []
17 | clip_length = 16
18 | for j in range(features.shape[0] // clip_length):
19 | clip_features.append(features[j * clip_length: (j + 1) * clip_length, :])
20 | clip_features = np.array(clip_features)
21 |
22 | transformer_params = [2, 64, 4, 64]
23 | dropout_rates = 0.1
24 | cnn_filters = [32, 64]
25 |
26 | feature_length = 5 * 256
27 |
28 | vq_model = create_model(clip_length,
29 | feature_length=feature_length,
30 | cnn_filters=cnn_filters,
31 | transformer_params=transformer_params,
32 | dropout_rate=dropout_rates)
33 | vq_model.summary()
34 | vq_model.load_weights(lsct_weights_path)
35 | predict_mos = vq_model.predict(np.expand_dims(clip_features, axis=0))
36 | return predict_mos[0][0]
37 |
38 |
39 | if __name__ == '__main__':
40 | phiqnet_weights_path = r'..\\model_weights\PHIQNet.h5'
41 | lsct_weights_path = r'..\\model_weights\LSCT.h5'
42 |
43 | video_path = r'.\\sample_data\example_video (mos=3.24).mp4'
44 | predict_mos = predict_video_quality(phiqnet_weights_path, lsct_weights_path, video_path)
45 | print('Predicted MOS: {}'.format(predict_mos))
46 |
--------------------------------------------------------------------------------
/src/ffmpeg/video_handler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import subprocess as sp
3 | import json
4 |
5 | """
6 | A class to handle video using FFMPEG
7 | """
8 | class VideoHandler():
9 | def __init__(self, ffprobe_exe, ffmpeg_exe, process_frame_interval=0):
10 | self.ffprobe = ffprobe_exe
11 | self.ffmpeg = ffmpeg_exe
12 | self.process_frame_interval = process_frame_interval
13 |
14 | def get_video_meta(self, video_file):
15 | """Internal method to get video meta
16 | :return: a list containing [audio_exit, video_exit, duration, frame_count, height, width, fps]
17 | """
18 | cmd = [self.ffprobe, '-i', video_file, '-v', 'quiet', '-print_format', 'json', '-show_streams', '-show_format']
19 | ffprobe_output = json.loads(sp.check_output(cmd).decode('utf-8'))
20 |
21 | # audio_exits = False
22 | video_exits = False
23 | duration = 0
24 | frame_count = 0
25 | height = 0
26 | width = 0
27 | fps = 0
28 | bitrate = 0
29 |
30 | stream_type = 'streams'
31 | codec_type = 'codec_type'
32 | if stream_type in ffprobe_output:
33 | for i in range(len(ffprobe_output[stream_type])):
34 | if codec_type in ffprobe_output[stream_type][i]:
35 | # if ffprobe_output[stream_type][i][codec_type] == 'audio':
36 | # audio_exits = True
37 | if ffprobe_output[stream_type][i][codec_type] == 'video':
38 | video_exits = True
39 | frame_rate = ffprobe_output[stream_type][i]['avg_frame_rate']
40 | if '/' in frame_rate:
41 | fps_temp = [float(item) for item in frame_rate.split('/')]
42 | fps = fps_temp[0] / fps_temp[1]
43 | else:
44 | fps = float(frame_rate)
45 | if 'duration' not in ffprobe_output[stream_type][i]:
46 | if 'format' in ffprobe_output:
47 | duration = float(ffprobe_output['format']['duration'])
48 | else:
49 | duration = float(ffprobe_output[stream_type][i]['duration'])
50 | frame_count = int(duration * fps)
51 | height = ffprobe_output[stream_type][i]['height']
52 | width = ffprobe_output[stream_type][i]['width']
53 | if 'bit_rate' not in ffprobe_output[stream_type][i]:
54 | if 'format' in ffprobe_output:
55 | bitrate = int(ffprobe_output['format']['bit_rate'])
56 | else:
57 | bitrate = int(ffprobe_output[stream_type][i]['bit_rate']) / 1000
58 |
59 | if not video_exits:
60 | return None
61 | return [video_exits, duration, frame_count, height, width, fps, bitrate]
62 |
63 | def get_frames(self, video_file, convert_to_gray=False):
64 | """
65 | Get video frames in a Numpy array
66 | :param video_file: video path
67 | :param convert_to_gray: flag to convert to gray or not
68 | :return: frames in an array
69 | """
70 | meta = self.get_video_meta(video_file)
71 | video_height = meta[3]
72 | video_width = meta[4]
73 | video_size = video_height * video_width * 3
74 | # print('Start reading {}'.format(video_file))
75 | if self.process_frame_interval > 0:
76 | fps = 'fps=1/' + str(self.process_frame_interval)
77 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-vf', fps, '-pix_fmt', 'rgb24', '-vcodec',
78 | 'rawvideo', '-']
79 | else:
80 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-pix_fmt', 'rgb24', '-hide_banner', '-loglevel',
81 | 'panic', '-vcodec', 'rawvideo', '-']
82 | pipe = sp.Popen(cmd, stdout=sp.PIPE)
83 |
84 | images = []
85 | try:
86 | while True:
87 | try:
88 | raw_image = pipe.stdout.read(video_size)
89 | if len(raw_image) != video_size:
90 | break
91 | image = np.fromstring(raw_image, dtype='uint8')
92 | image = image.reshape((video_height, video_width, 3))
93 |
94 | if convert_to_gray:
95 | image = np.array(image, dtype=np.float32)
96 | image = np.dot(image, [0.2989, 0.587, 0.114])
97 |
98 | images.append(image.astype(np.uint8))
99 | except Exception as e1:
100 | print(e1)
101 | continue
102 | except Exception as e2:
103 | print(e2)
104 | pipe.stdout.flush()
105 |
106 | return images
107 |
--------------------------------------------------------------------------------
/src/lsct/README.md:
--------------------------------------------------------------------------------
1 | # LSCT Implementation
2 |
3 | TF-Keras implementation of LSCT as described in [Long Short-term Convolutional Transformer for No-Reference Video Quality Assessment].
4 |
5 | ## Installation
6 |
7 | 1) Clone this repository.
8 | 2) Install required Python packages. The code is developed by PyCharm in Python 3.7. The requirements.txt document is generated by PyCharm, and the code should also be run in latest versions of the packages.
9 |
10 | ## Training a model
11 | Examples of training LSCT and its variants can be seen in lsct/bin.
12 | Argparser should be used, but the authors prefer to use dictionary with parameters being defined. It is easy to convert to take arguments.
13 | In principle, the following parameters can be defined:
14 |
15 | args = {}
16 | args['multi_gpu'] = 0 # gpu setting, set to 1 for using multiple GPUs
17 | args['gpu'] = 0 # If having multiple GPUs, specify which GPU to use
18 |
19 | args['result_folder'] = r'..\databases\experiments' # Define result path
20 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
21 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
22 |
23 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
24 | args['ugc_chunk_pickle'] = r'..\\meta_data\ugc_chunks.pkl' # this file contains information about the YouTube-UGC chunks, if set to None, then chunks are not included in training data
25 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' # folder contains OHIQNet features on chunk frames, if ugc_chunk_pickle=None, then this argument is not used
26 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' # folder contains OHIQNet features on chunk flipped frames, if ugc_chunk_pickle=None, then this argument is not used
27 |
28 | args['database'] = ['live', 'konvid', 'ugc'] # specify which database will be included in the training data
29 |
30 | args['model_name'] = 'lsct' # model name to be used in recording training result (e.g., logs)
31 |
32 | args['transformer_params'] = [2, 64, 4, 64]
33 | args['dropout_rate'] = 0.1
34 | args['cnn_filters'] = [32, 64]
35 | # args['pooling_sizes'] = [4, 4]
36 | args['clip_length'] = 16
37 |
38 | args['lr_base'] = 1e-3 # Define the back learning rate in warmup and rate decay approach
39 | args['batch_size'] = 32 # Batch size, should choose to fit in the GPU memory
40 | args['epochs'] = 120 # Maximal epoch number, can set early stop in the callback or not
41 | args['lr_schedule'] = True # Choose between True and False, indicating if learning rate schedule should be used or not
42 |
43 | args['validation'] = 'validation' # Choose between 'validation' and 'test'. If 'validation', the model will be trained on train set and validated on test set, which are randomly split from the databases.
44 | # If 'test', the model will be trained on entire 'KonViD-1k' and 'YouTube-UGC' databases, and validated on the entire 'LIVE-VQC' database
45 |
46 | args['do_finetune'] = False # specify if finetune using SGD with smaller learning rate is performed
47 |
48 | ## Predict video quality using the trained model
49 | After LSCT has been trained, and the weights have been stored in h5 file, it can be used to predict video quality with arbitrary resolutions.
50 | In the "examples" folder, an example script examples\video_quality_prediction.py can predict quality of example video using the pretrained weights.
51 |
52 | In order to predict video quality, both the PHIQNet weights and LSCT weights are required, and also FFMPEG (including FFProbe) are also needed to read video frames.
53 | The pretrained weights of PHIQNet and LSCT can be found in model_weights folder.
54 |
55 | ## Prepare datasets for model training
56 | This work uses three publicly available databases: KonViD-1k [The Konstanz natural video database (KoNViD-1k)](https://ieeexplore.ieee.org/document/7965673) by V. Hosu, F. Hahn, M. Jenadeleh, H. Lin, H. Men, T. Sziranyi, S. Li, D. Saupe;
57 | YouTube-UGC [YouTube UGC dataset for video compression research](https://ieeexplore.ieee.org/document/8901772) by Y. Wang, S. Inguva, and B. Adsumilli;
58 | and LIVE-VQC [Large-scale study of perceptual video quality](https://ieeexplore.ieee.org/document/8463581) by Z. Sinno, and A.C. Bovik
59 |
60 | 1) The three databases can be used individually and also merged, and then randomly split to training and testing sets.
61 |
62 | 2) Calculate PHIQNet features on all video frames. A script lsct\utils\frame_features_video_folders.py can be used to calculate PHIQNet features in a list of video folders.
63 | An example script examples\frame_features_video.py also shows how to calculate PHIQNet features on video frames.
64 | Please download the PHIQNet weights file [here](https://drive.google.com/file/d/1ymy2oL0r-XNzjqk_kE-lcNkI2FhSu95h/view?usp=sharing), and store in in model_weights.
65 |
66 | 3) The frame features are better to stored in Numpy NPY files in target folders in lsct\utils\frame_features_video_folders.py. It is noted that flipped frames are also used for data augmentation. The frame features are in default stored in target_folder\frame_features, and the flipped features are stored in target_folder\frame_features_flipped.
67 |
68 | 4) Make meta file containing feature file paths and the MOS value, an example file is provided in lsct\meta_data\all_video_mos.csv:
69 | ```
70 | C:\vq_datasets\frame_features\live_vqc\Video\A001.npy,4.20928
71 | C:\vq_datasets\frame_features\live_vqc\Video\A002.npy,3.29202
72 | C:\vq_datasets\frame_features\live_vqc\Video\A004.npy,3.372716
73 | C:\vq_datasets\frame_features\live_vqc\Video\A005.npy,2.887112
74 | C:\vq_datasets\frame_features\live_vqc\Video\A006.npy,4.386068
75 | C:\vq_datasets\frame_features\live_vqc\Video\A007.npy,3.0347
76 | ```
77 | If the features are stored in other folders, please update this file. This file contains only frame features without flipping, and the script assumes that the flipped features can be accessed by replacing 'frame_features' by 'frame_features_flipped' in the meta file. So please store the features of flipped frames in that way.
78 |
79 | 5) Make a dumped pickle file containing a list of video IDs, which can be easily used to localize train and test videos. This can be done by lsct\utils\gather_video_ids.py.
80 | Video ID is formated as: database_video, e.g., live_A001.
81 | An example file is provided in lsct\meta_data\all_vids.pkl.
82 |
83 | 6) If using YouTube-UGC chunks, then extract the PHIQNet features for individual trunks. This can be done by lsct\utils\ugc_chunk_generator.py, in which ugc_chunks.pkl can also be dumped.
84 | ugc_chunks.pkl contains a dictionary of: {UGC video name: [full MOS, chunk0 MOS, chunk1 MOS, ...]}.
85 |
86 | 7) The meta files together with the paths to chunk features (if used) should be provided for training, see lsct\bin\train_lsct_all_databases.py for an example.
87 |
88 | ## State-of-the-art models
89 | Other NR-VQA models are also included in the work. The original implementations of metrics are employed, and they can be found below.
90 |
91 | VBLLIDS: paper [Blind prediction of natural video quality](https://ieeexplore.ieee.org/document/6705673) by M. A. Saad, A. C. Bovik, and C. Charrierk, and [implementation](http://live.ece.utexas.edu/research/Quality/VideoBLIINDS_Code_MicheleSaad.zip).
92 |
93 | ST-3DDCT: paper [Spatiotemporal statistics for video quality assessment](https://ieeexplore.ieee.org/document/7469872) by X. Li, Qun Guo, and Xiaoqiang Lu, and [implementation](https://github.com/scikit-video/scikit-video/tree/master/skvideo/measure).
94 |
95 | TLVQM: paper [Two-level approach for no-reference consumer video quality assessment](https://ieeexplore.ieee.org/document/8742797) by J. Korhone, and [implementation](https://github.com/jarikorhonen/nr-vqa-consumervideo).
96 |
97 | VSFA: paper [Quality assessment of in-the-wild videos](https://dl.acm.org/doi/10.1145/3343031.3351028) by D. Li, T. Jiang, and M. Jiang, and [implementation](https://github.com/lidq92/VSFA).
98 |
99 | 3D-CNN-LSTM: paper [Deep neural networks for no-reference video quality assessment](https://ieeexplore.ieee.org/document/8803395) by J. You, and J. Korhonen.
100 |
101 | VIDEAL: paper [UGC-VQA: Benchmarking blind video quality assessment for user generated content](https://arxiv.org/abs/2005.14354) by Z. Tu, Y. Wang, N. Birkbeck, B. Adsumilli, and A. C. Bovik, and [implementation](https://github.com/tu184044109/VIDEVAL_release).
102 |
103 | ## FAQ
104 | * To be added
105 |
--------------------------------------------------------------------------------
/src/lsct/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/__init__.py
--------------------------------------------------------------------------------
/src/lsct/ablations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/ablations/__init__.py
--------------------------------------------------------------------------------
/src/lsct/ablations/train_lsct_clip_length_search.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 |
3 |
4 | """
5 | Search for best clip length for LSCT-PHIQNet.
6 | It is noted that max pooling sizes are adaptive to the clip length.
7 | """
8 | if __name__ == '__main__':
9 | args = {}
10 | args['result_folder'] = r'C:\vq_datasets\results\tmp'
11 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
12 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
13 |
14 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
15 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
16 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
17 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
18 |
19 | args['database'] = ['live', 'konvid', 'ugc']
20 | # args['database'] = ['konvid']
21 |
22 | args['transformer_params'] = [2, 64, 4, 64]
23 | args['dropout_rate'] = 0.1
24 | args['cnn_filters'] = [32, 64]
25 |
26 | clip_length_range = [8, 16, 24, 32, 64]
27 | pooling_sizes_range = [[4, 2],
28 | [4, 4],
29 | [6, 4],
30 | [8, 4],
31 | [8, 8]]
32 |
33 | args['batch_size'] = 32
34 | args['lr_base'] = 1e-3
35 | args['epochs'] = 140
36 |
37 | args['multi_gpu'] = 0
38 | args['gpu'] = 0
39 |
40 | args['validation'] = 'validation'
41 |
42 | args['do_finetune'] = False
43 |
44 | for clip_length, pooling_sizes in zip(clip_length_range, pooling_sizes_range):
45 | print('Clip length: {}'.format(clip_length))
46 | args['clip_length'] = clip_length
47 | args['pooling_sizes'] = pooling_sizes
48 | train_main(args)
49 |
--------------------------------------------------------------------------------
/src/lsct/ablations/train_lsct_resnet50.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 |
3 |
4 | #
5 | # Train script of LSCT-Resnet50 on all the three databases, it is same as LSCT-PHIQNet training, but using Resnet50 features to replace PHIQNet features.
6 | # Use lsct\ablations\frame_features_video_folders_resnet50.py to calculate Resnet50 features
7 | #
8 | if __name__ == '__main__':
9 | args = {}
10 | args['result_folder'] = r'C:\vq_datasets\results\lsct'
11 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
12 |
13 | # The feature file paths must be changed to Resnet50 feature files
14 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
15 |
16 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
17 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
18 | args['ugc_chunk_folder'] = r'.\frame_features_resnet50\ugc_chunks'
19 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped_resnet50\ugc_chunks'
20 |
21 | args['database'] = ['live', 'konvid', 'ugc']
22 |
23 | args['model_name'] = 'lsct'
24 |
25 | args['transformer_params'] = [2, 64, 4, 64]
26 | args['dropout_rate'] = 0.1
27 | args['cnn_filters'] = [32, 64]
28 | # args['pooling_sizes'] = [4, 4]
29 | args['clip_length'] = 16
30 |
31 | args['batch_size'] = 32
32 |
33 | args['lr_base'] = 1e-3
34 | args['epochs'] = 400
35 |
36 | args['multi_gpu'] = 0
37 | args['gpu'] = 1
38 |
39 | args['validation'] = 'validation'
40 |
41 | args['do_finetune'] = False
42 |
43 | train_main(args)
44 |
--------------------------------------------------------------------------------
/src/lsct/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/bin/__init__.py
--------------------------------------------------------------------------------
/src/lsct/bin/train_lsct_all_databases.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 |
3 |
4 | """
5 | General train script of LSCT-PHIQNet on all the three databases
6 | """
7 | if __name__ == '__main__':
8 | args = {}
9 | args['result_folder'] = r'C:\vq_datasets\results\lsct'
10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
12 |
13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
17 |
18 | args['database'] = ['live', 'konvid', 'ugc']
19 |
20 | args['model_name'] = 'lsct'
21 |
22 | args['transformer_params'] = [2, 64, 4, 64]
23 | args['dropout_rate'] = 0.1
24 | args['cnn_filters'] = [32, 64]
25 |
26 | # No need to define pooling sizes for 1D CNN, which will be defined in check_args() in train
27 | # args['pooling_sizes'] = [4, 4]
28 |
29 | args['clip_length'] = 16
30 |
31 | args['batch_size'] = 32
32 |
33 | args['lr_base'] = 1e-3
34 | args['epochs'] = 400
35 |
36 | args['multi_gpu'] = 0
37 | args['gpu'] = 1
38 |
39 | args['validation'] = 'validation'
40 |
41 | args['do_finetune'] = False
42 |
43 | train_main(args)
44 |
--------------------------------------------------------------------------------
/src/lsct/bin/train_lsct_all_databases_10runs.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 |
3 |
4 | """
5 | Run the training of LSCT-PHIQNet for 10 times with randomly split train and test sets
6 | """
7 | if __name__ == '__main__':
8 | args = {}
9 | args['result_folder'] = r'C:\vq_datasets\results\lsct'
10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
12 |
13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
17 |
18 | args['database'] = ['live', 'konvid', 'ugc']
19 |
20 | args['model_name'] = 'lsct'
21 |
22 | args['transformer_params'] = [2, 64, 4, 64]
23 | args['dropout_rate'] = 0.1
24 | args['cnn_filters'] = [32, 64]
25 | args['pooling_sizes'] = [4, 4]
26 | args['clip_length'] = 16
27 |
28 | args['batch_size'] = 32
29 |
30 | args['lr_base'] = 1e-3
31 | args['epochs'] = 400
32 |
33 | args['multi_gpu'] = 0
34 | args['gpu'] = 1
35 |
36 | args['validation'] = 'validation'
37 |
38 | args['do_finetune'] = True
39 |
40 | for _ in range(10):
41 | train_main(args)
42 |
--------------------------------------------------------------------------------
/src/lsct/bin/train_lsct_all_databases_triq_features_10runs.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 | from pickle import load
3 |
4 |
5 | """
6 | Run the training of LSCT-PHIQNet for 10 times with randomly split train and test sets
7 | """
8 | if __name__ == '__main__':
9 | args = {}
10 | args['result_folder'] = r'C:\vq_datasets\results\lsct_triq_features'
11 | # args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
12 | args['meta_file'] = r'C:\lsct_phiqnet\src\lsct\meta_data\all_video_mos_triq.csv'
13 |
14 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
15 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
16 | # args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
17 | # args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
18 |
19 | args['database'] = ['live', 'konvid', 'ugc']
20 |
21 | args['model_name'] = 'lsct_triq'
22 |
23 | args['transformer_params'] = [2, 32, 8, 64]
24 | args['dropout_rate'] = 0.1
25 | args['cnn_filters'] = [32, 64]
26 | args['pooling_sizes'] = [4, 4]
27 | args['clip_length'] = 16
28 |
29 | args['batch_size'] = 32
30 |
31 | args['lr_base'] = 1e-3
32 | args['epochs'] = 200
33 |
34 | args['multi_gpu'] = 1
35 | args['gpu'] = 1
36 |
37 | args['validation'] = 'validation'
38 |
39 | args['do_finetune'] = True
40 |
41 | for m in range(10):
42 | train_vids, test_vids = load(open(r'C:\vq_datasets\random_splits\split_{}.pkl'.format(m), 'rb'))
43 | train_main(args, train_vids, test_vids)
44 |
--------------------------------------------------------------------------------
/src/lsct/bin/train_lsct_params_search.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 | import numpy as np
3 | import os
4 | from lsct.utils.gather_video_ids import gather_all_vids
5 |
6 | """
7 | Search for best hyper-parameters of LSCT-PHIQNet
8 | """
9 | if __name__ == '__main__':
10 | args = {}
11 | args['result_folder'] = r'C:\vq_datasets\results\tmp'
12 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
13 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
14 |
15 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
16 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
17 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
18 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
19 |
20 | # args['database'] = ['live', 'konvid', 'ugc']
21 | args['database'] = ['ugc']
22 |
23 | cnn_filters_range = [
24 | [16, 32],
25 | [32, 64],
26 | [32, 64, 128],
27 | [32, 64, 128, 256]
28 | ]
29 | transformer_params_range = [
30 | [2, 16, 2, 32],
31 | [2, 16, 4, 32],
32 | [2, 32, 4, 64],
33 | [2, 64, 4, 64],
34 | [4, 32, 4, 64],
35 | [4, 64, 4, 64],
36 | [4, 64, 4, 128],
37 | [4, 64, 8, 128],
38 | [4, 64, 8, 256],
39 | [4, 128, 8, 256],
40 | [8, 256, 8, 512]
41 | ]
42 |
43 | args['dropout_rate'] = 0.1
44 | args['clip_length'] = 16
45 |
46 | args['batch_size'] = 32
47 |
48 | args['lr_base'] = 1e-3/2
49 | args['epochs'] = 300
50 |
51 | args['multi_gpu'] = 0
52 | args['gpu'] = 1
53 |
54 | args['validation'] = 'validation'
55 |
56 | args['do_finetune'] = True
57 |
58 | result_record_file = os.path.join(args['result_folder'], 'ugc_nochunks.csv')
59 | runs = 4
60 | all_plcc = np.zeros((runs, len(cnn_filters_range), len(transformer_params_range)))
61 |
62 | for k in range(runs):
63 | train_vids, test_vids = gather_all_vids(all_vids_pkl=args['vids_meta'])
64 |
65 | for i, cnn_filters in enumerate(cnn_filters_range):
66 | for j, transformer_params in enumerate(transformer_params_range):
67 | if i == 0 and j < 5:
68 | break
69 | if not os.path.exists(result_record_file):
70 | record_file = open(result_record_file, 'w+')
71 | else:
72 | record_file = open(result_record_file, 'a')
73 |
74 | args['cnn_filters'] = cnn_filters
75 | # No need to define pooling sizes for 1D CNN, which will be defined in check_args() in train
76 |
77 | args['transformer_params'] = transformer_params
78 | args['model_name'] = 'lsct_{}_{}'.format(cnn_filters, transformer_params)
79 |
80 | plcc = train_main(args, train_vids, test_vids)
81 |
82 | record_file.write('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k, cnn_filters, transformer_params, plcc))
83 |
84 | all_plcc[k, i, j] = plcc
85 | print('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k + 1, cnn_filters, transformer_params, plcc))
86 | record_file.flush()
87 | record_file.close()
88 | print(np.mean(np.array(all_plcc), axis=0))
89 |
--------------------------------------------------------------------------------
/src/lsct/bin/train_lsct_params_search_1.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 | import numpy as np
3 | import os
4 | from lsct.utils.gather_video_ids import gather_all_vids
5 |
6 | """
7 | Search for best hyper-parameters of LSCT-PHIQNet
8 | """
9 | if __name__ == '__main__':
10 | args = {}
11 | args['result_folder'] = r'C:\vq_datasets\results\tmp'
12 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
13 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
14 |
15 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
16 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
17 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
18 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
19 |
20 | # args['database'] = ['live', 'konvid', 'ugc']
21 | args['database'] = ['konvid']
22 |
23 | cnn_filters_range = [
24 | [16, 32],
25 | # [32, 64],
26 | # [32, 64, 128],
27 | # [32, 64, 128, 256]
28 | ]
29 | transformer_params_range = [
30 | [2, 16, 2, 32],
31 | # [2, 16, 4, 32],
32 | # [2, 32, 4, 64],
33 | # [2, 64, 4, 64],
34 | # [4, 32, 4, 64],
35 | # [4, 64, 4, 64],
36 | # [4, 64, 4, 128],
37 | # [4, 64, 8, 128],
38 | # [4, 64, 8, 256],
39 | # [4, 128, 8, 256],
40 | # [8, 256, 8, 512]
41 | ]
42 |
43 | args['dropout_rate'] = 0.1
44 | args['clip_length'] = 16
45 |
46 | args['batch_size'] = 32
47 |
48 | args['lr_base'] = 1e-3/2
49 | args['epochs'] = 300
50 |
51 | args['multi_gpu'] = 0
52 | args['gpu'] = 0
53 |
54 | args['validation'] = 'validation'
55 |
56 | args['do_finetune'] = True
57 |
58 | result_record_file = os.path.join(args['result_folder'], 'konvid_nochunks.csv')
59 | runs = 5
60 | all_plcc = np.zeros((runs, len(cnn_filters_range), len(transformer_params_range)))
61 |
62 | for k in range(runs):
63 | train_vids, test_vids = gather_all_vids(all_vids_pkl=args['vids_meta'])
64 |
65 | for i, cnn_filters in enumerate(cnn_filters_range):
66 | for j, transformer_params in enumerate(transformer_params_range):
67 | if not os.path.exists(result_record_file):
68 | record_file = open(result_record_file, 'w+')
69 | else:
70 | record_file = open(result_record_file, 'a')
71 |
72 | args['cnn_filters'] = cnn_filters
73 | # No need to define pooling sizes for 1D CNN, which will be defined in check_args() in train
74 |
75 | args['transformer_params'] = transformer_params
76 | args['model_name'] = 'lsct_{}_{}'.format(cnn_filters, transformer_params)
77 |
78 | plcc = train_main(args, train_vids, test_vids)
79 |
80 | record_file.write('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k, cnn_filters, transformer_params, plcc))
81 |
82 | all_plcc[k, i, j] = plcc
83 | print('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k, cnn_filters, transformer_params, plcc))
84 | record_file.flush()
85 | record_file.close()
86 | print(np.mean(np.array(all_plcc), axis=0))
87 |
--------------------------------------------------------------------------------
/src/lsct/bin/train_lsct_single_databases.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 |
3 |
4 | """
5 | General train script of LSCT-PHIQNet on single one or two databases
6 | """
7 | if __name__ == '__main__':
8 | args = {}
9 | args['result_folder'] = r'C:\vq_datasets\results\lsct'
10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
12 |
13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
17 |
18 | args['database'] = ['konvid', 'ugc']
19 | # args['database'] = ['konvid']
20 | # args['database'] = ['ugc']
21 |
22 | args['model_name'] = 'lsct'
23 |
24 | args['transformer_params'] = [2, 64, 4, 64]
25 | args['dropout_rate'] = 0.1
26 | args['cnn_filters'] = [32, 64]
27 | # args['pooling_sizes'] = [4, 4]
28 | args['clip_length'] = 16
29 |
30 | args['batch_size'] = 32
31 |
32 | args['lr_base'] = 1e-3
33 | args['epochs'] = 400
34 |
35 | args['multi_gpu'] = 0
36 | args['gpu'] = 1
37 |
38 | args['validation'] = 'validation'
39 |
40 | args['do_finetune'] = False
41 |
42 | train_main(args)
43 |
--------------------------------------------------------------------------------
/src/lsct/bin/train_lsct_test_on_live.py:
--------------------------------------------------------------------------------
1 | from lsct.train.train import train_main
2 |
3 |
4 | """
5 | By setting args['validation'] = 'test', the model is trained on entire KonViD-1k and YouTube-UGC databases, and tested on LIVE-VQC
6 | """
7 | if __name__ == '__main__':
8 | args = {}
9 | args['result_folder'] = r'C:\vq_datasets\results\lsct'
10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl'
11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv'
12 |
13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified
14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl'
15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks'
16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks'
17 |
18 | args['database'] = ['live', 'konvid', 'ugc']
19 |
20 | args['model_name'] = 'lsct'
21 |
22 | args['transformer_params'] = [2, 64, 4, 64]
23 | args['dropout_rate'] = 0.1
24 | args['cnn_filters'] = [32, 64]
25 | # args['pooling_sizes'] = [4, 4]
26 | args['clip_length'] = 16
27 |
28 | args['batch_size'] = 32
29 | args['lr_base'] = 1e-3
30 | args['epochs'] = 400
31 |
32 | args['multi_gpu'] = 0
33 | args['gpu'] = 1
34 |
35 | args['validation'] = 'test'
36 |
37 | args['do_finetune'] = False
38 |
39 | train_main(args)
40 |
--------------------------------------------------------------------------------
/src/lsct/meta_data/all_vids.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/meta_data/all_vids.pkl
--------------------------------------------------------------------------------
/src/lsct/meta_data/ugc_chunks.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/meta_data/ugc_chunks.pkl
--------------------------------------------------------------------------------
/src/lsct/meta_data/ugc_mos_original.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/meta_data/ugc_mos_original.xlsx
--------------------------------------------------------------------------------
/src/lsct/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/models/__init__.py
--------------------------------------------------------------------------------
/src/lsct/models/cnn_1d.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.layers import Layer, Conv1D, Input, Dropout, MaxPool1D, Masking
2 | import tensorflow.keras.backend as K
3 | from tensorflow.keras import Model
4 | import tensorflow as tf
5 |
6 |
7 | class CNN1D(Layer):
8 | def __init__(self, filters=(32, 64), pooling_sizes=(4, 4), kernel_size=3, stride_size=1, using_dropout=True,
9 | using_bias=False, dropout_rate=0.1, **kwargs):
10 | """
11 | 1D CNN model
12 | :param filters: filter numbers in the CNN blocks
13 | :param pooling_sizes: max pooling size in each block
14 | :param kernel_size: kernel size of CNN layer
15 | :param stride_size: stride of CNN layer
16 | :param using_dropout: flag to use dropout or not
17 | :param using_bias: flag to use bias in CNN or not
18 | :param dropout_rate: dropout rate if using it
19 | :param kwargs: other config prams
20 | """
21 | self.filters = filters
22 | self.kernel_size = kernel_size
23 | self.stride_size = stride_size
24 | self.using_dropout = using_dropout
25 | self.conv1d = []
26 | self.pooling = []
27 | self.dropout = []
28 | for i, s_filter in enumerate(filters):
29 | self.conv1d.append(Conv1D(s_filter,
30 | kernel_size,
31 | padding='same',
32 | strides=stride_size,
33 | use_bias=using_bias,
34 | name='conv{}'.format(i)
35 | ))
36 | self.pooling.append(MaxPool1D(pool_size=pooling_sizes[i], name='pool{}'.format(i)))
37 | if using_dropout:
38 | self.dropout = Dropout(rate=dropout_rate)
39 |
40 | super(CNN1D, self).__init__(**kwargs)
41 |
42 | def build(self, input_shape):
43 | super(CNN1D, self).build(input_shape)
44 |
45 | def call(self, x, mask=None):
46 | for i in range(len(self.conv1d)):
47 | x = self.conv1d[i](x)
48 | x = self.pooling[i](x)
49 | if self.using_dropout:
50 | x = self.dropout(x)
51 | x = K.squeeze(x, axis=-2)
52 | return x
53 |
54 | def compute_output_shape(self, input_shape):
55 | return 1, self.filters[-1]
56 |
57 |
58 | if __name__ == '__main__':
59 | input_shape = (16, 5 * 256)
60 | filters = [32, 64, 128, 256]
61 | pooling_sizes = [2, 2, 2, 2]
62 | inputs = Input(shape=input_shape)
63 | x = CNN1D(filters=filters, pooling_sizes=pooling_sizes)(inputs)
64 | model = Model(inputs=inputs, outputs=x)
65 | model.summary()
66 |
--------------------------------------------------------------------------------
/src/lsct/models/cnn_lstm_model.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Masking, BatchNormalization, Dropout, Input, \
2 | Bidirectional, ConvLSTM2D, Attention
3 | from tensorflow.keras.models import Model
4 |
5 | from lsct.models.cnn_1d import CNN1D
6 | from cnn_lstm.attention_with_context import Attention
7 |
8 |
9 | def create_cnn_lstm_model(clip_length, feature_length=4096, cnn_filters=(32, 64), pooling_sizes=(4, 4),
10 | lstm_filters=(32, 64), mlp_filters=(64, 32, 8), using_dropout=True, using_bidirectional=False,
11 | using_cnn=True, using_attention=False, dropout_rate=0.1):
12 | """
13 | Create CNN-LSTM model for VQA
14 | :param clip_length: clip length
15 | :param feature_length: feature length
16 | :param cnn_filters: filters in 1D CNN
17 | :param pooling_sizes: pooling sizes in 1D CNN
18 | :param lstm_filters: filters in LSTM
19 | :param mlp_filters: filters in the MLP head
20 | :param using_dropout: flag to use dropout or not
21 | :param using_bidirectional: flag to use bidirectional LSTM or not
22 | :param using_cnn: flag to use 1D CNN or not
23 | :param dropout_rate: dropout rate
24 | :return: CNN-LSTM model
25 | """
26 | if using_cnn:
27 | cnn_model = CNN1D(filters=cnn_filters, pooling_sizes=pooling_sizes, using_dropout=using_dropout,
28 | dropout_rate=dropout_rate)
29 | input_shape = (None, clip_length, feature_length)
30 | else:
31 | input_shape = (None, clip_length)
32 | inputs = Input(shape=input_shape)
33 | if using_cnn:
34 | x = TimeDistributed(cnn_model)(inputs)
35 | else:
36 | x = inputs
37 | x = Masking(mask_value=0.)(x)
38 | for i, lstm_filter in enumerate(lstm_filters):
39 | if i < len(lstm_filters) - 1:
40 | if using_bidirectional:
41 | x = Bidirectional(LSTM(lstm_filter, return_sequences=True))(x)
42 | else:
43 | x = LSTM(lstm_filter, return_sequences=True)(x)
44 | else:
45 | if using_attention:
46 | if using_bidirectional:
47 | x = Bidirectional(LSTM(lstm_filter, return_sequences=True))(x)
48 | else:
49 | x = LSTM(lstm_filter, return_sequences=True)(x)
50 | else:
51 | if using_bidirectional:
52 | x = Bidirectional(LSTM(lstm_filter))(x)
53 | else:
54 | x = LSTM(lstm_filter)(x)
55 |
56 | if using_attention:
57 | x = Attention()(x)
58 |
59 | for mlp_filter in mlp_filters:
60 | x = Dense(mlp_filter)(x)
61 | if using_dropout:
62 | x = Dropout(dropout_rate)(x)
63 |
64 | outputs = Dense(1, activation='linear')(x)
65 | model = Model(inputs=inputs, outputs=outputs)
66 | model.summary()
67 |
68 | return model
69 |
--------------------------------------------------------------------------------
/src/lsct/models/lsct_phiqnet_model.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.layers import Input, TimeDistributed
2 | from tensorflow.keras.models import Model
3 |
4 | from lsct.models.cnn_1d import CNN1D
5 | from lsct.models.video_quality_transformer import VideoQualityTransformer
6 |
7 |
8 | def create_model(clip_length=16, feature_length=1280, cnn_filters=(32, 64), pooling_sizes=(4, 4),
9 | transformer_params=(2, 64, 4, 64), strides=1, dropout_rate=0.1):
10 | """
11 | Create the LSCT-PHIQNet model for NR-VQA
12 | :param clip_length: clip length
13 | :param feature_length: length of frame PHIQNet features, default is 1280=5*256
14 | :param cnn_filters: CNN filters for the 1D CNN
15 | :param pooling_sizes: Pooling sizes for the 1D CNN
16 | :param transformer_params: Transformer parameters
17 | :param strides: stride in 1D CNN
18 | :param dropout_rate: dropout rate for both 1D CNN and Transformer
19 | :return: the LSCT-PHIQNet model
20 | """
21 | using_dropout = dropout_rate > 0
22 | cnn_model = CNN1D(filters=cnn_filters, pooling_sizes=pooling_sizes, stride_size=strides, using_dropout=using_dropout,
23 | dropout_rate=dropout_rate)
24 | input_shape = (None, clip_length, feature_length)
25 |
26 | inputs = Input(shape=input_shape)
27 | x = TimeDistributed(cnn_model)(inputs)
28 |
29 | transformer = VideoQualityTransformer(
30 | num_layers=transformer_params[0],
31 | d_model=transformer_params[1],
32 | num_heads=transformer_params[2],
33 | mlp_dim=transformer_params[3],
34 | dropout=dropout_rate,
35 | )
36 | x = transformer(x)
37 |
38 | model = Model(inputs=inputs, outputs=x)
39 |
40 | return model
41 |
--------------------------------------------------------------------------------
/src/lsct/models/video_quality_transformer.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras import Model
3 | import tensorflow_addons as tfa
4 | from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Layer
5 |
6 |
7 | def create_padding_mask(input):
8 | """
9 | Creates mask for input to Transformer based on the average of all elements = 0
10 | :param input: input sequence
11 | :return: mask
12 | """
13 | input = tf.pad(input, paddings=[[0, 0], [1, 0], [0, 0]], constant_values=1)
14 | input = tf.cast(tf.math.equal(tf.keras.backend.mean(input, axis=-1), 0), tf.float32)
15 |
16 | # add extra dimensions to add the padding to the attention logits.
17 | return input[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len)
18 |
19 |
20 | class MultiHeadAttention(Layer):
21 | """
22 | This is the standard multi-head attention layer
23 | """
24 | def __init__(self, d_model, num_heads=8):
25 | super(MultiHeadAttention, self).__init__()
26 | self.d_model = d_model
27 | self.num_heads = num_heads
28 | if d_model % num_heads != 0:
29 | raise ValueError(
30 | f'embedding dimension = {d_model} should be divisible by number of heads = {num_heads}'
31 | )
32 | self.depth = d_model // num_heads
33 |
34 | self.wq = Dense(d_model)
35 | self.wk = Dense(d_model)
36 | self.wv = Dense(d_model)
37 |
38 | self.dense = Dense(d_model)
39 |
40 | def split_heads(self, x, batch_size):
41 | x = tf.reshape(
42 | x, (batch_size, -1, self.num_heads, self.depth)
43 | )
44 | return tf.transpose(x, perm=[0, 2, 1, 3])
45 |
46 | def scaled_dot_product_attention(self, query, key, value, mask):
47 | matmul_qk = tf.matmul(query, key, transpose_b=True)
48 | dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
49 | scaled_score = matmul_qk / tf.math.sqrt(dim_key)
50 | if mask is not None:
51 | scaled_score += (mask * -1e9)
52 | weights = tf.nn.softmax(scaled_score, axis=-1)
53 | output = tf.matmul(weights, value)
54 | return output, weights
55 |
56 | def call(self, inputs, mask):
57 | batch_size = tf.shape(inputs)[0]
58 |
59 | query = self.wq(inputs)
60 | key = self.wk(inputs)
61 | value = self.wv(inputs)
62 |
63 | query = self.split_heads(query, batch_size)
64 | key = self.split_heads(key, batch_size)
65 | value = self.split_heads(value, batch_size)
66 |
67 | attention, weights = self.scaled_dot_product_attention(query, key, value, mask)
68 | attention = tf.transpose(attention, perm=[0, 2, 1, 3])
69 | concat_attention = tf.reshape(
70 | attention, (batch_size, -1, self.d_model)
71 | )
72 | output = self.dense(concat_attention)
73 | return output, weights
74 |
75 |
76 | class TransformerBlock(Layer):
77 | """
78 | This is the standard Transformer block
79 | """
80 | def __init__(self, d_model, num_heads, dff, dropout=0.1):
81 | super(TransformerBlock, self).__init__()
82 | self.mha = MultiHeadAttention(d_model, num_heads)
83 | self.ffn = tf.keras.Sequential(
84 | [Dense(dff, activation="relu"),
85 | Dense(d_model),]
86 | )
87 |
88 | self.layernorm1 = LayerNormalization(epsilon=1e-6)
89 | self.layernorm2 = LayerNormalization(epsilon=1e-6)
90 |
91 | self.dropout1 = Dropout(dropout)
92 | self.dropout2 = Dropout(dropout)
93 |
94 | def call(self, x, training, mask):
95 | attn_output, attention_weigths = self.mha(x, mask)
96 | attn_output = self.dropout1(attn_output, training=training)
97 | out1 = self.layernorm1(x + attn_output)
98 | ffn_output = self.ffn(out1)
99 | ffn_output = self.dropout2(ffn_output, training=training)
100 | out2 = self.layernorm2(out1 + ffn_output)
101 | return out2
102 |
103 |
104 | class VideoQualityTransformer(Model):
105 | """
106 | Transformer for video quality assessment using the standard Transformer,
107 | the maximum_position_encoding should cover the maximal clip number in the databases
108 | """
109 | def __init__(
110 | self,
111 | num_layers,
112 | d_model,
113 | num_heads,
114 | mlp_dim,
115 | dropout=0.1,
116 | maximum_position_encoding=6000
117 | ):
118 | super(VideoQualityTransformer, self).__init__()
119 |
120 | self.d_model = d_model
121 | self.num_layers = num_layers
122 |
123 | # positional embedding is predefined with a sufficient length
124 | self.pos_emb = self.add_weight('pos_emb', shape=(1, maximum_position_encoding, d_model))
125 |
126 | # add video quality token
127 | self.quality_emb = self.add_weight('quality_emb', shape=(1, 1, d_model))
128 |
129 | # normal Transformer architecture
130 | self.feature_proj = Dense(d_model)
131 | self.dropout = Dropout(dropout)
132 | self.enc_layers = [
133 | TransformerBlock(d_model, num_heads, mlp_dim, dropout)
134 | for _ in range(num_layers)
135 | ]
136 |
137 | # MLP head
138 | self.mlp_head = tf.keras.Sequential(
139 | [
140 | Dense(mlp_dim, activation=tfa.activations.gelu),
141 | Dropout(dropout),
142 | Dense(1),
143 | ]
144 | )
145 |
146 | def call(self, x, training):
147 | batch_size = tf.shape(x)[0]
148 | mask = create_padding_mask(x)
149 |
150 | frame_length = tf.shape(x)[1]
151 | x = self.feature_proj(x)
152 |
153 | quality_emb = tf.broadcast_to(self.quality_emb, [batch_size, 1, self.d_model])
154 | x = tf.concat([quality_emb, x], axis=1)
155 |
156 | # truncate the positional embedding for shorter videos
157 | x = x + self.pos_emb[:, : frame_length + 1, :]
158 |
159 | x = self.dropout(x, training=training)
160 |
161 | for layer in self.enc_layers:
162 | x = layer(x, training, mask)
163 |
164 | # First (CLS) is used for VQA
165 | x = self.mlp_head(x[:, 0])
166 | return x
--------------------------------------------------------------------------------
/src/lsct/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/train/__init__.py
--------------------------------------------------------------------------------
/src/lsct/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/utils/__init__.py
--------------------------------------------------------------------------------
/src/lsct/utils/frame_features_video_folders.py:
--------------------------------------------------------------------------------
1 | """
2 | This class is to calculate PHIQNet features on video frames in a list of video folders, FFMPEG is required
3 | """
4 | import numpy as np
5 | import subprocess as sp
6 | import json
7 | import os
8 | import tensorflow as tf
9 | from phiqnet.models.model_analysis import phiq_subnet
10 |
11 |
12 | class CalculateFrameQualityFeatures():
13 | def __init__(self, model_weights, ffprobe_exe=None, ffmpeg_exe=None, process_frame_interval=0):
14 | """
15 | Frame PHIQNet feature computer
16 | :param model_weights: PHIQNet model_weights file
17 | :param ffprobe_exe: FFProbe exe file
18 | :param ffmpeg_exe: FFMPEG exe file
19 | :param process_frame_interval: parameter of frame processing interval, 0 means all frames will be used
20 | """
21 | self.ffmpeg = ffmpeg_exe
22 | self.ffprobe = ffprobe_exe
23 | self.process_frame_interval = process_frame_interval
24 | self.mos_scales = np.array([1, 2, 3, 4, 5])
25 | self.get_feature_model(model_weights)
26 |
27 | def get_feature_model(self, model_weights):
28 | self.feature_model = phiq_subnet(n_quality_levels=5, return_backbone_maps=False, return_feature_maps=True,
29 | return_features=True)
30 | self.feature_model.load_weights(model_weights, by_name=True)
31 |
32 | def get_video_meta(self, video_file):
33 | """Internal method to get video meta
34 | :return: a list containing [audio_exit, video_exit, duration, frame_count, height, width, fps]
35 | """
36 | cmd = [self.ffprobe, '-i', video_file, '-v', 'quiet', '-print_format', 'json', '-show_streams', '-show_format']
37 | ffprobe_output = json.loads(sp.check_output(cmd).decode('utf-8'))
38 |
39 | # audio_exits = False
40 | video_exits = False
41 | duration = 0
42 | frame_count = 0
43 | height = 0
44 | width = 0
45 | fps = 0
46 | bitrate = 0
47 |
48 | stream_type = 'streams'
49 | codec_type = 'codec_type'
50 | if stream_type in ffprobe_output:
51 | for i in range(len(ffprobe_output[stream_type])):
52 | if codec_type in ffprobe_output[stream_type][i]:
53 | # if ffprobe_output[stream_type][i][codec_type] == 'audio':
54 | # audio_exits = True
55 | if ffprobe_output[stream_type][i][codec_type] == 'video':
56 | video_exits = True
57 | frame_rate = ffprobe_output[stream_type][i]['avg_frame_rate']
58 | if '/' in frame_rate:
59 | fps_temp = [float(item) for item in frame_rate.split('/')]
60 | fps = fps_temp[0] / fps_temp[1]
61 | else:
62 | fps = float(frame_rate)
63 | if 'duration' not in ffprobe_output[stream_type][i]:
64 | if 'format' in ffprobe_output:
65 | duration = float(ffprobe_output['format']['duration'])
66 | else:
67 | duration = float(ffprobe_output[stream_type][i]['duration'])
68 | frame_count = int(duration * fps)
69 | height = ffprobe_output[stream_type][i]['height']
70 | width = ffprobe_output[stream_type][i]['width']
71 | if 'bit_rate' not in ffprobe_output[stream_type][i]:
72 | if 'format' in ffprobe_output:
73 | bitrate = int(ffprobe_output['format']['bit_rate'])
74 | else:
75 | bitrate = int(ffprobe_output[stream_type][i]['bit_rate']) / 1000
76 |
77 | if not video_exits:
78 | return None
79 | return [video_exits, duration, frame_count, height, width, fps, bitrate]
80 |
81 | def video_features(self, video_folders, feature_folder):
82 | """
83 | :param video_folders: a list of folders of all video files
84 | :param feature_folder: target folder to store the features files in NPY format
85 | :return: None
86 | """
87 | for video_folder in video_folders:
88 | video_files = os.listdir(video_folder)
89 | for video_file in video_files:
90 | try:
91 | if video_file.endswith(('.mkv', '.mp4')): # Only mkv and mps contained in KonViD-1k, LIVE-VQC and YouTube-UGC databases
92 | video_path = os.path.join(video_folder, video_file)
93 | video_name = os.path.splitext(os.path.basename(video_file))[0]
94 |
95 | # Path to store the PHIQNet features of a frame and a flipped frame must be defined
96 | npy_file_features = r''
97 | npy_file_features_flipped = r''
98 |
99 | if not os.path.exists(os.path.dirname(npy_file_features)):
100 | os.makedirs(os.path.dirname(npy_file_features))
101 | if not os.path.exists(os.path.dirname(npy_file_features_flipped)):
102 | os.makedirs(os.path.dirname(npy_file_features_flipped))
103 | frame_features, features_flipped = self.__ffmpeg_frames_features__(
104 | os.path.join(video_folder, video_file), flip=True)
105 | np.save(npy_file_features, np.asarray(frame_features, dtype=np.float16))
106 | np.save(npy_file_features_flipped, np.asarray(features_flipped, dtype=np.float16))
107 | except Exception:
108 | print('{} excep'.format(video_file))
109 |
110 | def __cal_features__(self, image):
111 | image /= 127.5
112 | image -= 1.
113 | return self.feature_model.predict(np.expand_dims(image, axis=0))
114 |
115 | def __ffmpeg_frames_features__(self, video_file, flip=True):
116 | meta = self.get_video_meta(video_file)
117 | video_height = meta[3]
118 | video_width = meta[4]
119 | video_size = video_height * video_width * 3
120 | # print('Start reading {}'.format(video_file))
121 | if self.process_frame_interval > 0:
122 | fps = 'fps=1/' + str(self.process_frame_interval)
123 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-vf', fps, '-pix_fmt', 'rgb24', '-vcodec',
124 | 'rawvideo', '-']
125 | else:
126 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-pix_fmt', 'rgb24', '-hide_banner', '-loglevel',
127 | 'panic', '-vcodec', 'rawvideo', '-']
128 | pipe = sp.Popen(cmd, stdout=sp.PIPE)
129 |
130 | features = []
131 | if flip:
132 | features_flipped = []
133 | try:
134 | while True:
135 | try:
136 | raw_image = pipe.stdout.read(video_size)
137 | if len(raw_image) != video_size:
138 | break
139 | image = np.fromstring(raw_image, dtype='uint8')
140 | image = image.reshape((video_height, video_width, 3))
141 | image = np.asarray(image, dtype=np.float32)
142 | flipped_image = np.fliplr(image)
143 | frame_feature = self.__cal_features__(image)
144 | features.append(np.asarray(frame_feature))
145 | if flip:
146 | flipped_frame_features = self.__cal_features__(flipped_image)
147 | features_flipped.append(np.array(flipped_frame_features))
148 |
149 | except Exception as e1:
150 | print(e1)
151 | continue
152 | except Exception as e2:
153 | print(e2)
154 | pipe.stdout.flush()
155 |
156 | if flip:
157 | return features, features_flipped
158 | else:
159 | return features
160 |
161 |
162 | if __name__ == '__main__':
163 | ffmpeg_exe = r'...\\ffmpeg\ffmpeg.exe'
164 | ffprobe_exe = r'...\\ffmpeg\ffprobe.exe'
165 | model_weights_file = r'..\\model_weights\PHIQNet.h5'
166 |
167 | feature_folder = r'...\\model_weights\frame_features'
168 | video_frame_features = CalculateFrameQualityFeatures(model_weights=model_weights_file,
169 | ffmpeg_exe=ffmpeg_exe,
170 | ffprobe_exe=ffprobe_exe)
171 | video_folders = [
172 | r'.\live_vqc_video',
173 | r'.\ugc_test',
174 | r'.\ugc_train',
175 | r'.\ugc_validation',
176 | r'.\KoNViD_1k_videos'
177 | ]
178 | video_frame_features.video_features(video_folders, feature_folder)
179 |
180 |
--------------------------------------------------------------------------------
/src/lsct/utils/frame_features_video_folders_Resnet50.py:
--------------------------------------------------------------------------------
1 | """
2 | This class is to calculate PHIQNet features on video frames in a list of video folders, FFMPEG is required
3 | """
4 | import numpy as np
5 | import subprocess as sp
6 | import json
7 | import os
8 | import tensorflow as tf
9 | from phiqnet.models.model_analysis import phiq_subnet
10 |
11 |
12 | class CalculateFrameQualityFeatures():
13 | def __init__(self, model_weights, ffprobe_exe=None, ffmpeg_exe=None, process_frame_interval=0):
14 | """
15 | Frame PHIQNet feature computer
16 | :param model_weights: PHIQNet model_weights file
17 | :param ffprobe_exe: FFProbe exe file
18 | :param ffmpeg_exe: FFMPEG exe file
19 | :param process_frame_interval: parameter of frame processing interval, 0 means all frames will be used
20 | """
21 | self.ffmpeg = ffmpeg_exe
22 | self.ffprobe = ffprobe_exe
23 | self.process_frame_interval = process_frame_interval
24 | self.get_feature_model(model_weights)
25 |
26 | def get_feature_model(self, model_weights):
27 | self.feature_model = phiq_subnet(n_quality_levels=5, return_backbone_maps=False, return_feature_maps=False,
28 | return_features=True)
29 | self.feature_model.load_weights(model_weights, by_name=True)
30 |
31 | def get_video_meta(self, video_file):
32 | """Internal method to get video meta
33 | :return: a list containing [audio_exit, video_exit, duration, frame_count, height, width, fps]
34 | """
35 | cmd = [self.ffprobe, '-i', video_file, '-v', 'quiet', '-print_format', 'json', '-show_streams', '-show_format']
36 | ffprobe_output = json.loads(sp.check_output(cmd).decode('utf-8'))
37 |
38 | # audio_exits = False
39 | video_exits = False
40 | duration = 0
41 | frame_count = 0
42 | height = 0
43 | width = 0
44 | fps = 0
45 | bitrate = 0
46 |
47 | stream_type = 'streams'
48 | codec_type = 'codec_type'
49 | if stream_type in ffprobe_output:
50 | for i in range(len(ffprobe_output[stream_type])):
51 | if codec_type in ffprobe_output[stream_type][i]:
52 | # if ffprobe_output[stream_type][i][codec_type] == 'audio':
53 | # audio_exits = True
54 | if ffprobe_output[stream_type][i][codec_type] == 'video':
55 | video_exits = True
56 | frame_rate = ffprobe_output[stream_type][i]['avg_frame_rate']
57 | if '/' in frame_rate:
58 | fps_temp = [float(item) for item in frame_rate.split('/')]
59 | fps = fps_temp[0] / fps_temp[1]
60 | else:
61 | fps = float(frame_rate)
62 | if 'duration' not in ffprobe_output[stream_type][i]:
63 | if 'format' in ffprobe_output:
64 | duration = float(ffprobe_output['format']['duration'])
65 | else:
66 | duration = float(ffprobe_output[stream_type][i]['duration'])
67 | frame_count = int(duration * fps)
68 | height = ffprobe_output[stream_type][i]['height']
69 | width = ffprobe_output[stream_type][i]['width']
70 | if 'bit_rate' not in ffprobe_output[stream_type][i]:
71 | if 'format' in ffprobe_output:
72 | bitrate = int(ffprobe_output['format']['bit_rate'])
73 | else:
74 | bitrate = int(ffprobe_output[stream_type][i]['bit_rate']) / 1000
75 |
76 | if not video_exits:
77 | return None
78 | return [video_exits, duration, frame_count, height, width, fps, bitrate]
79 |
80 | def video_features(self, video_folders, feature_folder):
81 | """
82 | :param video_folders: a list of folders of all video files
83 | :param feature_folder: target folder to store the features files in NPY format
84 | :return: None
85 | """
86 | for video_folder in video_folders:
87 | video_files = os.listdir(video_folder)
88 | for video_file in video_files:
89 | try:
90 | if video_file.endswith(('.mkv', '.mp4')): # Only mkv and mps contained in KonViD-1k, LIVE-VQC and YouTube-UGC databases
91 | video_path = os.path.join(video_folder, video_file)
92 | video_name = os.path.splitext(os.path.basename(video_file))[0]
93 |
94 | # Path to store the PHIQNet features of a frame and a flipped frame must be defined
95 | npy_file_features = r''
96 | npy_file_features_flipped = r''
97 |
98 | if not os.path.exists(os.path.dirname(npy_file_features)):
99 | os.makedirs(os.path.dirname(npy_file_features))
100 | if not os.path.exists(os.path.dirname(npy_file_features_flipped)):
101 | os.makedirs(os.path.dirname(npy_file_features_flipped))
102 | frame_features, features_flipped = self.__ffmpeg_frames_features__(
103 | os.path.join(video_folder, video_file), flip=True)
104 | np.save(npy_file_features, np.asarray(frame_features, dtype=np.float16))
105 | np.save(npy_file_features_flipped, np.asarray(features_flipped, dtype=np.float16))
106 | except Exception:
107 | print('{} excep'.format(video_file))
108 |
109 | def __cal_features__(self, image):
110 | image /= 127.5
111 | image -= 1.
112 | return self.feature_model.predict(np.expand_dims(image, axis=0))
113 |
114 | def __ffmpeg_frames_features__(self, video_file, flip=True):
115 | meta = self.get_video_meta(video_file)
116 | video_height = meta[3]
117 | video_width = meta[4]
118 | video_size = video_height * video_width * 3
119 | # print('Start reading {}'.format(video_file))
120 | if self.process_frame_interval > 0:
121 | fps = 'fps=1/' + str(self.process_frame_interval)
122 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-vf', fps, '-pix_fmt', 'rgb24', '-vcodec',
123 | 'rawvideo', '-']
124 | else:
125 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-pix_fmt', 'rgb24', '-hide_banner', '-loglevel',
126 | 'panic', '-vcodec', 'rawvideo', '-']
127 | pipe = sp.Popen(cmd, stdout=sp.PIPE)
128 |
129 | features = []
130 | if flip:
131 | features_flipped = []
132 | try:
133 | while True:
134 | try:
135 | raw_image = pipe.stdout.read(video_size)
136 | if len(raw_image) != video_size:
137 | break
138 | image = np.fromstring(raw_image, dtype='uint8')
139 | image = image.reshape((video_height, video_width, 3))
140 | image = np.asarray(image, dtype=np.float32)
141 | flipped_image = np.fliplr(image)
142 | frame_feature = self.__cal_features__(image)
143 | features.append(np.asarray(frame_feature))
144 | if flip:
145 | flipped_frame_features = self.__cal_features__(flipped_image)
146 | features_flipped.append(np.array(flipped_frame_features))
147 |
148 | except Exception as e1:
149 | print(e1)
150 | continue
151 | except Exception as e2:
152 | print(e2)
153 | pipe.stdout.flush()
154 |
155 | if flip:
156 | return features, features_flipped
157 | else:
158 | return features
159 |
160 |
161 | if __name__ == '__main__':
162 | ffmpeg_exe = r'...\\ffmpeg\ffmpeg.exe'
163 | ffprobe_exe = r'...\\ffmpeg\ffprobe.exe'
164 | model_weights_file = r'..\\model_weights\PHIQNet.h5'
165 |
166 | feature_folder = r'...\\model_weights\frame_features'
167 | video_frame_features = CalculateFrameQualityFeatures(model_weights=model_weights_file,
168 | ffmpeg_exe=ffmpeg_exe,
169 | ffprobe_exe=ffprobe_exe)
170 | video_folders = [
171 | r'.\live_vqc_video',
172 | r'.\ugc_test',
173 | r'.\ugc_train',
174 | r'.\ugc_validation',
175 | r'.\KoNViD_1k_videos'
176 | ]
177 | video_frame_features.video_features(video_folders, feature_folder)
178 |
179 |
--------------------------------------------------------------------------------
/src/lsct/utils/gather_video_ids.py:
--------------------------------------------------------------------------------
1 | """
2 | This script is to collect all video IDs and possibly to dump them
3 | a video ID is: database name_video name
4 | """
5 | import os
6 | import glob
7 | from pickle import load, dump
8 | from sklearn.model_selection import train_test_split
9 | from random import shuffle
10 |
11 |
12 | def gather_live_konvid_vids(video_folder, database):
13 | """
14 | LIVE-VQC and KonViD-1k video IDs
15 | :param video_folder:
16 | :param database:
17 | :return:
18 | """
19 | vids = []
20 | for file in glob.glob(os.path.join(video_folder, '*.mp4')):
21 | vid = os.path.splitext(os.path.basename(file))[0]
22 | vids.append('{}_{}'.format(database, vid))
23 | return vids
24 |
25 |
26 | def gather_ugc_vids(video_folders):
27 | """
28 | YouTube-UGC video IDs
29 | :param video_folders: list of folders of YouTube-UGC video
30 | :return: video IDs
31 | """
32 | ugc_vids = []
33 | for video_folder in video_folders:
34 | files = glob.glob(os.path.join(video_folder, '*.mkv'))
35 | for file in files:
36 | vid = os.path.splitext(os.path.basename(file))[0]
37 | ugc_vids.append('ugc_{}'.format(vid))
38 | return ugc_vids
39 |
40 |
41 | def gather_all_vids(all_vids_pkl=None, test_ratio=0.2, random_state=None):
42 | if all_vids_pkl:
43 | all_vids = load(open(all_vids_pkl, 'rb'))
44 | else:
45 | live_vids = gather_live_konvid_vids(r'.\live_vqc_Video', 'live')
46 | konvid_vids = gather_live_konvid_vids(r'.\KoNViD_1k_videos', 'konvid')
47 | ugc_vids = gather_ugc_vids([r'.\ugc_test', r'.\ugc_train', r'.\ugc_validation'])
48 | all_vids = live_vids + konvid_vids + ugc_vids
49 |
50 | # the video IDs can be dumped here, for later use in training
51 | dump(all_vids, open(r'.\all_vids.pkl', 'wb'))
52 | shuffle(all_vids)
53 | train_vids, test_vids = train_test_split(all_vids, test_size=test_ratio, random_state=random_state)
54 | return train_vids, test_vids
55 |
56 |
57 | if __name__ == '__main__':
58 | # live_video_folder = r'.\live_vqc_Video'
59 | # konvid_video_folder = r'.\KoNViD_1k_videos'
60 | # live_vids, live_fps = gather_live_konvid_vids(live_video_folder, 'live')
61 | # konvid_vids, konvid_fps = gather_live_konvid_vids(konvid_video_folder, 'konvid')
62 | #
63 | # ugc_video_folders = [r'.\ugc_test', r'.\ugc_train', r'.\ugc_validation']
64 | # gather_ugc_vids(ugc_video_folders)
65 |
66 | info = load(open(r'..\\meta_data\ugc_chunks.pkl', 'rb'))
67 | t = 0
68 |
--------------------------------------------------------------------------------
/src/lsct/utils/ugc_chunk_generator.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import os
4 | from ffmpeg.video_handler import VideoHandler
5 | from pickle import load, dump
6 |
7 |
8 | def get_video_handler():
9 | ffmpeg_exe = r'..\\ffmpeg\ffmpeg.exe'
10 | ffprobe_exe = r'..\\ffmpeg\ffprobe.exe'
11 | video_handler = VideoHandler(ffprobe_exe, ffmpeg_exe)
12 | return video_handler
13 |
14 |
15 | def get_video_path(vid):
16 | # Video folders of YouTube-UGC database must be specified
17 | video_folders = [
18 | r'.\ugc_test',
19 | r'.\ugc_train',
20 | r'.\ugc_validation'
21 | ]
22 | for video_folder in video_folders:
23 | if os.path.exists(os.path.join(video_folder, vid + '.mkv')):
24 | return os.path.join(video_folder, vid + '.mkv')
25 |
26 | return None
27 |
28 |
29 | def get_chunk_mos_Resnet():
30 | """
31 | Extract frame features from Resnet50 of individual chunks and stored in Numpy npy files
32 | :return: Dictionary containing video id, full MOS, chunk1 MOS, chunk2 MOS, chunk3 MOS
33 | """
34 | chunk_mos_dict = dict()
35 | ugc_mos_file = r'C:\vq_datasets\ugc_mos_original.xlsx'
36 | ugc_mos = pd.read_excel(ugc_mos_file)
37 |
38 | frame_feature_folder = r'C:\vq_datasets\VSFA\UGC'
39 | chunk_feature_folder = r'C:\vq_datasets\VSFA\UGC_CHUNKS'
40 |
41 | video_handler = get_video_handler()
42 |
43 | for index, row in ugc_mos.iterrows():
44 | vid = row['vid']
45 | video_path = get_video_path(vid)
46 |
47 | if video_path:
48 | video_meta = video_handler.get_video_meta(video_path)
49 | fps = round(video_meta[-2])
50 | mos_chunk_0 = row['MOS chunk00']
51 | mos_chunk_1 = row['MOS chunk05']
52 | mos_chunk_2 = row['MOS chunk10']
53 |
54 | chunk_mos = []
55 | chunk_mos.append(row['MOS full'])
56 |
57 | frame_features = np.load(os.path.join(frame_feature_folder, vid + '_resnet-50_res5c.npy'))
58 | if not np.isnan(mos_chunk_0):
59 | chunk_mos.append(mos_chunk_0)
60 | frame_features_chunk_0 = frame_features[0 : 10 * fps, :]
61 | np.save(os.path.join(chunk_feature_folder, vid + '_resnet-50_res5c_chunk_0.npy'), frame_features_chunk_0)
62 |
63 | if not np.isnan(mos_chunk_1):
64 | chunk_mos.append(mos_chunk_1)
65 | frame_features_chunk_1 = frame_features[5 * fps: 15 * fps, :]
66 | np.save(os.path.join(chunk_feature_folder, vid + '_resnet-50_res5c_chunk_1.npy'), frame_features_chunk_1)
67 |
68 | if not np.isnan(mos_chunk_2):
69 | chunk_mos.append(mos_chunk_2)
70 | frame_features_chunk_2 = frame_features[10 * fps:, :]
71 | np.save(os.path.join(chunk_feature_folder, vid + '_resnet-50_res5c_chunk_2.npy'), frame_features_chunk_2)
72 |
73 | chunk_mos_dict[vid] = chunk_mos
74 |
75 | return chunk_mos_dict
76 |
77 |
78 | def get_chunk_features_mos():
79 | """
80 | Extract frame features of individual chunks and stored in Numpy npy files
81 | :return: Dictionary containing video id, full MOS, chunk1 MOS, chunk2 MOS, chunk3 MOS
82 | """
83 | chunk_mos_dict = dict()
84 | ugc_mos_file = r'..\\meta_data\ugc_mos_original.xlsx'
85 | ugc_mos = pd.read_excel(ugc_mos_file)
86 |
87 | # Frame feature files of YouTube-UGC videos must be specified
88 | frame_feature_folder = r'.\frame_features\ugc'
89 |
90 | # Target folder to store the frame features of chunks
91 | chunk_feature_folder = r'.\frame_features\ugc_chunks'
92 |
93 | video_handler = get_video_handler()
94 |
95 | for index, row in ugc_mos.iterrows():
96 | vid = row['vid']
97 | video_path = get_video_path(vid)
98 |
99 | if video_path:
100 | mos_chunk_0 = row['MOS chunk00']
101 | mos_chunk_1 = row['MOS chunk05']
102 | mos_chunk_2 = row['MOS chunk10']
103 |
104 | video_meta = video_handler.get_video_meta(video_path)
105 | fps = round(video_meta[-2])
106 |
107 | chunk_mos = []
108 | chunk_mos.append(row['MOS full'])
109 |
110 | frame_features = np.load(os.path.join(frame_feature_folder, vid + '.npy'))
111 | if not np.isnan(mos_chunk_0):
112 | chunk_mos.append(mos_chunk_0)
113 | frame_features_chunk_0 = frame_features[0 : 10 * fps, :, :, :]
114 | np.save(os.path.join(chunk_feature_folder, vid + '_chunk_0.npy'), frame_features_chunk_0)
115 |
116 | if not np.isnan(mos_chunk_1):
117 | chunk_mos.append(mos_chunk_1)
118 | frame_features_chunk_1 = frame_features[5 * fps: 15 * fps, :, :, :]
119 | np.save(os.path.join(chunk_feature_folder, vid + '_chunk_1.npy'), frame_features_chunk_1)
120 |
121 | if not np.isnan(mos_chunk_2):
122 | chunk_mos.append(mos_chunk_2)
123 | frame_features_chunk_2 = frame_features[10 * fps:, :, :, :]
124 | np.save(os.path.join(chunk_feature_folder, vid + '_chunk_2.npy'), frame_features_chunk_2)
125 |
126 | chunk_mos_dict[vid] = chunk_mos
127 |
128 | return chunk_mos_dict
129 |
130 |
131 | if __name__ == '__main__':
132 | chunk_mos_dict = get_chunk_features_mos()
133 | chunk_mos_dict_resnet50s = get_chunk_mos_Resnet()
134 |
135 | # The chunk MOS values can be dumped
136 | # dump(chunk_mos_dict, open(r'..\\meta_data\ugc_chunks.pkl', 'wb'))
137 |
--------------------------------------------------------------------------------
/src/model_weights/LSCT.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/model_weights/LSCT.h5
--------------------------------------------------------------------------------
/src/model_weights/README.md:
--------------------------------------------------------------------------------
1 | # Trained weights
2 |
3 | The trained weights are supplied here.
4 |
5 | PHIQNet: please download the trained weights [here](https://drive.google.com/file/d/1ymy2oL0r-XNzjqk_kE-lcNkI2FhSu95h/view?usp=sharing), which is required to calculate frame quality features for VQA.
6 |
7 | LSCT.h5: contains the weights for the LSCT model trained on KonViD-1k, YouTube-UGC and LIVE-VQC databases.
--------------------------------------------------------------------------------
/src/phiqnet/README.md:
--------------------------------------------------------------------------------
1 | # PHIQNet Implementation
2 |
3 | TF-Keras implementation of PHIQNet as described in [Perceptual Hierarchical Networks for No-Reference Image Quality Assessment].
4 |
5 | ## Installation
6 |
7 | 1) Clone this repository.
8 | 2) Install required Python packages. The code is developed by PyCharm in Python 3.7. The requirements.txt document is generated by PyCharm, and the code should also be run in latest versions of the packages.
9 |
10 | ## Training a model
11 | Many examples of training PHIQNet and its variants can be seen in phiqnet/bin.
12 | Argparser should be used, but the authors prefer to use dictionary with parameters being defined. It is easy to convert to take arguments.
13 | In principle, the following parameters can be defined:
14 |
15 | args = {}
16 | args['multi_gpu'] = 0 # gpu setting, set to 1 for using multiple GPUs
17 | args['gpu'] = 0 # If having multiple GPUs, specify which GPU to use
18 |
19 | args['result_folder'] = r'..\databases\experiments' # Define result path
20 | args['n_quality_levels'] = 5 # Choose between 1 (MOS prediction) and 5 (distribution prediction)
21 |
22 | args['train_folders'] = # Define folders containing training images
23 | [
24 | r'..\databases\train\koniq_normal',
25 | r'..\databases\train\koniq_small',
26 | r'..\databases\train\live'
27 | ]
28 | args['val_folders'] = # Define folders containing testing images
29 | [
30 | r'..\databases\val\koniq_normal',
31 | r'..\databases\val\koniq_small',
32 | r'..\databases\val\live'
33 | ]
34 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' # MOS (distribution of scores) file for KonIQ database
35 | args['live_mos_file'] = r'..\databases\live_mos.csv' # MOS (standard distribution of scores) file for LIVE-wild database
36 |
37 | args['naive_backbone'] = False # Choose between True and False, indicating using backbone network only or neck + head as well
38 | args['backbone'] = 'resnet50' # Choose from ['resnet18', 'resnet50', 'resnet152', 'resnet152v2', 'vgg16', 'resnest50']
39 | args['weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' # Define the path of ImageNet pretrained weights
40 | args['initial_epoch'] = 0 # Define initial epoch for use in fine-tune
41 |
42 | args['lr_base'] = 1e-4 / 2 # Define the back learning rate in warmup and rate decay approach
43 | args['lr_schedule'] = True # Choose between True and False, indicating if learning rate schedule should be used or not
44 | args['batch_size'] = 4 # Batch size, should choose to fit in the GPU memory
45 | args['epochs'] = 120 # Maximal epoch number, can set early stop in the callback or not
46 |
47 | args['fpn_type'] = 'fpn' # FPN type, choose from ['fpn', 'bifpn', 'pan', 'no_fpn'], it is noted that if 'bifpn' is chosen, the image resolutions must be power of 2 otherwise shape mismatch will be thrown
48 | args['attention_module'] = True # Choose between True and False, indicating if attention module should be used or not
49 |
50 | args['image_aug'] = True # Choose between True and False, indicating if image augmentation should be used or not
51 |
52 | ## Predict image quality using the trained model
53 | After PHIQNet has been trained, and the weights have been stored in h5 file, it can be used to predict image quality with arbitrary sizes,
54 |
55 | ```shell
56 | args = {}
57 | args['n_quality_levels'] = 5
58 | args['naive_backbone'] = False
59 | args['backbone'] = 'resnet50'
60 | args['fpn_type'] = 'fpn'
61 | args['weights'] = r'..\\model_weights\PHIQNet.h5'
62 | model = phiq_net(n_quality_levels=args['n_quality_levels'],
63 | naive_backbone=args['naive_backbone'],
64 | backbone=args['backbone'],
65 | fpn_type=args['fpn_type'])
66 | model.load_weights(args['weights'])
67 | ```
68 | And then use ModelEvaluation to predict quality of image set.
69 |
70 | In the "examples" folder, an example script examples\image_quality_prediction.py is provided to use the trained weights to predict quality of example images.
71 |
72 | ## Prepare datasets for model training
73 | This work uses two publicly available databases: KonIQ-10k [KonIQ-10k: An ecologically valid database for deep learning of blind image quality assessment](https://ieeexplore.ieee.org/document/8968750) by V. Hosu, H. Lin, T. Sziranyi, and D. Saupe;
74 | and LIVE-wild [Massive online crowdsourced study of subjective and objective picture quality](https://ieeexplore.ieee.org/document/7327186) by D. Ghadiyaram, and A.C. Bovik
75 |
76 | 1) The two databases were merged, and then split to training and testing sets. Please see README in phiqnet/databases for details.
77 | 2) Make MOS files (note: do NOT include head line):
78 |
79 | For database with score distribution available, the MOS file is like this (koniq format):
80 | ```
81 | image path, voter number of quality scale 1, voter number of quality scale 2, voter number of quality scale 3, voter number of quality scale 4, voter number of quality scale 5, MOS or Z-score
82 | 10004473376.jpg,0,0,25,73,7,3.828571429
83 | 10007357496.jpg,0,3,45,47,1,3.479166667
84 | 10007903636.jpg,1,0,20,73,2,3.78125
85 | 10009096245.jpg,0,0,21,75,13,3.926605505
86 | ```
87 |
88 | For database with standard deviation available, the MOS file is like this (live format):
89 | ```
90 | image path, standard deviation, MOS or Z-score
91 | t1.bmp,18.3762,63.9634
92 | t2.bmp,13.6514,25.3353
93 | t3.bmp,18.9246,48.9366
94 | t4.bmp,18.2414,35.8863
95 | ```
96 |
97 | The format of MOS file ('koniq' or 'live') and the format of MOS or Z-score ('mos' or 'z_score') should also be specified in phiqnet/utils/imageset_handler/get_image_scores.
98 | 3) In the train script in phiqnet/bin, the folders containing training and testing images are provided.
99 | 4) Pretrained ImageNet weights can be downloaded (see README in phiqnet/pretrained_weights) and pointed to in the train script in phiqnet/bin.
100 |
101 | ## Trained PHIQNet weights
102 | The PHIQNet has been trained on KonIQ-10k and LIVE-wild databases, and the weights file can be downloaded [here](https://drive.google.com/file/d/1ymy2oL0r-XNzjqk_kE-lcNkI2FhSu95h/view?usp=sharing).
103 | Due to the file size limitation of CVPR submission, the weights file cannot be submitted. Please download the weights file and store in model_weights.
104 |
105 | ## State-of-the-art models
106 | Other three models are also included in the work. The original implementations of metrics are employed, and they can be found below.
107 |
108 | Koncept512 [KonIQ-10k: An ecologically valid database for deep learning of blind image quality assessment](https://github.com/subpic/koniq)
109 |
110 | SGDNet [SGDNet: An end-to-end saliency-guided deep neural network for no-reference image quality assessment](https://github.com/ysyscool/SGDNet)
111 |
112 | CaHDC [End-to-end blind image quality prediction with cascaded deep neural network](https://web.xidian.edu.cn/wjj/files/20190620_152557.zip)
113 |
114 | ## FAQ
115 | * To be added
116 |
--------------------------------------------------------------------------------
/src/phiqnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/backbone/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/backbone/_common_blocks.py:
--------------------------------------------------------------------------------
1 | from backbone.resnet_config import get_submodules_from_kwargs
2 |
3 |
4 | def slice_tensor(x, start, stop, axis):
5 | if axis == 3:
6 | return x[:, :, :, start:stop]
7 | elif axis == 1:
8 | return x[:, start:stop, :, :]
9 | else:
10 | raise ValueError("Slice axis should be in (1, 3), got {}.".format(axis))
11 |
12 |
13 | def GroupConv2D(filters,
14 | kernel_size,
15 | strides=(1, 1),
16 | groups=32,
17 | kernel_initializer='he_uniform',
18 | use_bias=True,
19 | activation='linear',
20 | padding='valid',
21 | **kwargs):
22 | """
23 | Grouped Convolution Layer implemented as a Slice,
24 | Conv2D and Concatenate layers. Split filters to groups, apply Conv2D and concatenate back.
25 |
26 | Args:
27 | filters: Integer, the dimensionality of the output space
28 | (i.e. the number of output filters in the convolution).
29 | kernel_size: An integer or tuple/list of a single integer,
30 | specifying the length of the 1D convolution window.
31 | strides: An integer or tuple/list of a single integer, specifying the stride
32 | length of the convolution.
33 | groups: Integer, number of groups to split input filters to.
34 | kernel_initializer: Regularizer function applied to the kernel model_weights matrix.
35 | use_bias: Boolean, whether the layer uses a bias vector.
36 | activation: Activation function to use (see activations).
37 | If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x).
38 | padding: one of "valid" or "same" (case-insensitive).
39 |
40 | Input shape:
41 | 4D tensor with shape: (batch, rows, cols, channels) if data_format is "channels_last".
42 |
43 | Output shape:
44 | 4D tensor with shape: (batch, new_rows, new_cols, filters) if data_format is "channels_last".
45 | rows and cols values might have changed due to padding.
46 |
47 | """
48 |
49 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
50 | slice_axis = 3 if backend.image_data_format() == 'channels_last' else 1
51 |
52 | def layer(input_tensor):
53 | inp_ch = int(backend.int_shape(input_tensor)[-1] // groups) # input grouped channels
54 | out_ch = int(filters // groups) # output grouped channels
55 |
56 | blocks = []
57 | for c in range(groups):
58 | slice_arguments = {
59 | 'start': c * inp_ch,
60 | 'stop': (c + 1) * inp_ch,
61 | 'axis': slice_axis,
62 | }
63 | x = layers.Lambda(slice_tensor, arguments=slice_arguments)(input_tensor)
64 | x = layers.Conv2D(out_ch,
65 | kernel_size,
66 | strides=strides,
67 | kernel_initializer=kernel_initializer,
68 | use_bias=use_bias,
69 | activation=activation,
70 | padding=padding)(x)
71 | blocks.append(x)
72 |
73 | x = layers.Concatenate(axis=slice_axis)(blocks)
74 | return x
75 |
76 | return layer
77 |
78 |
79 | def expand_dims(x, channels_axis):
80 | if channels_axis == 3:
81 | return x[:, None, None, :]
82 | elif channels_axis == 1:
83 | return x[:, :, None, None]
84 | else:
85 | raise ValueError("Slice axis should be in (1, 3), got {}.".format(channels_axis))
86 |
87 |
88 | def ChannelSE(reduction=16, **kwargs):
89 | """
90 | Squeeze and Excitation block, reimplementation inspired by
91 | https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/senet.py
92 |
93 | Args:
94 | reduction: channels squeeze factor
95 |
96 | """
97 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
98 | channels_axis = 3 if backend.image_data_format() == 'channels_last' else 1
99 |
100 | def layer(input_tensor):
101 | # get number of channels/filters
102 | channels = backend.int_shape(input_tensor)[channels_axis]
103 |
104 | x = input_tensor
105 |
106 | # squeeze and excitation block in PyTorch style with
107 | x = layers.GlobalAveragePooling2D()(x)
108 | x = layers.Lambda(expand_dims, arguments={'channels_axis': channels_axis})(x)
109 | x = layers.Conv2D(channels // reduction, (1, 1), kernel_initializer='he_uniform')(x)
110 | x = layers.Activation('relu')(x)
111 | x = layers.Conv2D(channels, (1, 1), kernel_initializer='he_uniform')(x)
112 | x = layers.Activation('sigmoid')(x)
113 |
114 | # apply attention
115 | x = layers.Multiply()([input_tensor, x])
116 |
117 | return x
118 |
119 | return layer
120 |
--------------------------------------------------------------------------------
/src/phiqnet/backbone/resnet_config.py:
--------------------------------------------------------------------------------
1 | import keras_applications as ka
2 |
3 |
4 | def get_submodules_from_kwargs(kwargs):
5 | backend = kwargs.get('backend', ka._KERAS_BACKEND)
6 | layers = kwargs.get('layers', ka._KERAS_LAYERS)
7 | models = kwargs.get('models', ka._KERAS_MODELS)
8 | utils = kwargs.get('utils', ka._KERAS_UTILS)
9 | return backend, layers, models, utils
10 |
--------------------------------------------------------------------------------
/src/phiqnet/backbone/vgg16.py:
--------------------------------------------------------------------------------
1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | # pylint: disable=invalid-name
16 | """VGG16 model for Keras."""
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 |
21 | from tensorflow.python.keras import layers
22 | from tensorflow.python.keras.engine import training
23 |
24 |
25 | def VGG16(inputs):
26 | """Instantiates the VGG16 model.
27 |
28 | By default, it loads model_weights pre-trained on ImageNet. Check 'model_weights' for
29 | other options.
30 |
31 | This model can be built both with 'channels_first' data format
32 | (channels, height, width) or 'channels_last' data format
33 | (height, width, channels).
34 |
35 | The default input size for this model is 224x224.
36 |
37 | Caution: Be sure to properly pre-process your inputs to the application.
38 | Please see `applications.vgg16.preprocess_input` for an example.
39 |
40 | Arguments:
41 | include_top: whether to include the 3 fully-connected
42 | layers at the top of the network.
43 | model_weights: one of `None` (random initialization),
44 | 'imagenet' (pre-training on ImageNet),
45 | or the path to the model_weights file to be loaded.
46 | input_tensor: optional Keras tensor
47 | (i.e. output of `layers.Input()`)
48 | to use as image input for the model.
49 | input_shape: optional shape tuple, only to be specified
50 | if `include_top` is False (otherwise the input shape
51 | has to be `(224, 224, 3)`
52 | (with `channels_last` data format)
53 | or `(3, 224, 224)` (with `channels_first` data format).
54 | It should have exactly 3 input channels,
55 | and width and height should be no smaller than 32.
56 | E.g. `(200, 200, 3)` would be one valid value.
57 | pooling: Optional pooling mode for feature extraction
58 | when `include_top` is `False`.
59 | - `None` means that the output of the model will be
60 | the 4D tensor output of the
61 | last convolutional block.
62 | - `avg` means that global average pooling
63 | will be applied to the output of the
64 | last convolutional block, and thus
65 | the output of the model will be a 2D tensor.
66 | - `max` means that global max pooling will
67 | be applied.
68 | classes: optional number of classes to classify images
69 | into, only to be specified if `include_top` is True, and
70 | if no `model_weights` argument is specified.
71 | classifier_activation: A `str` or callable. The activation function to use
72 | on the "top" layer. Ignored unless `include_top=True`. Set
73 | `classifier_activation=None` to return the logits of the "top" layer.
74 |
75 | Returns:
76 | A `keras.Model` instance.
77 |
78 | Raises:
79 | ValueError: in case of invalid argument for `model_weights`,
80 | or invalid input shape.
81 | ValueError: if `classifier_activation` is not `softmax` or `None` when
82 | using a pretrained top layer.
83 | """
84 |
85 | # Block 1
86 | x = layers.Conv2D(
87 | 64, (3, 3), activation='relu', padding='same', name='block1_conv1')(
88 | inputs)
89 | x = layers.Conv2D(
90 | 64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
91 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
92 |
93 | outputs = []
94 | # Block 2
95 | x = layers.Conv2D(
96 | 128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
97 | x = layers.Conv2D(
98 | 128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
99 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
100 | outputs.append(x)
101 |
102 | # Block 3
103 | x = layers.Conv2D(
104 | 256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
105 | x = layers.Conv2D(
106 | 256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
107 | x = layers.Conv2D(
108 | 256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
109 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
110 | outputs.append(x)
111 |
112 | # Block 4
113 | x = layers.Conv2D(
114 | 512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
115 | x = layers.Conv2D(
116 | 512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
117 | x = layers.Conv2D(
118 | 512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
119 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
120 | outputs.append(x)
121 |
122 | # Block 5
123 | x = layers.Conv2D(
124 | 512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
125 | x = layers.Conv2D(
126 | 512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
127 | x = layers.Conv2D(
128 | 512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
129 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
130 | outputs.append(x)
131 |
132 | # Create model.
133 | model = training.Model(inputs, outputs, name='vgg16')
134 |
135 | return model
136 |
137 |
138 | if __name__ == '__main__':
139 | model = VGG16(None)
140 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/bin/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet152_distribution_fpn_attention_imageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet152'
22 | args['model_weights'] = r'..\pretrained_weights\resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet152_distribution_fpn_attention_imageaug_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet152'
22 | args['model_weights'] = r'..\databases\experiments\resnet152_distribution_attention_fpn\116_0.8542_1.0227.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 5e-7
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet152v2_distribution_fpn_attention_imageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet152v2'
22 | args['model_weights'] = r'..\pretrained_weights\resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 1
28 | args['epochs'] = 80
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_NOfpn_attention_imageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'no_fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_NOattention_imageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = False
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_NOimageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [
12 | # r'..\databases\train\koniq_normal',
13 | r'..\databases\train\koniq_small',]
14 | # r'..\databases\train\live']
15 | args['val_folders'] = [
16 | # r'..\databases\val\koniq_normal',
17 | r'..\databases\val\koniq_small',]
18 | # r'..\databases\val\live']
19 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
20 | args['live_mos_file'] = r'..\databases\live_mos.csv'
21 |
22 | args['naive_backbone'] = False
23 | args['backbone'] = 'resnet50'
24 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
25 | args['initial_epoch'] = 0
26 |
27 | args['lr_base'] = 1e-4 / 2
28 | args['lr_schedule'] = True
29 | args['batch_size'] = 4
30 | args['epochs'] = 120
31 |
32 | args['fpn_type'] = 'fpn'
33 | args['attention_module'] = True
34 |
35 | args['image_aug'] = False
36 |
37 | train_main(args)
38 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [
12 | r'..\databases\train\koniq_normal',
13 | r'..\databases\train\koniq_small',
14 | r'..\databases\train\live']
15 | args['val_folders'] = [
16 | r'..\databases\val\koniq_normal',
17 | r'..\databases\val\koniq_small',
18 | r'..\databases\val\live']
19 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
20 | args['live_mos_file'] = r'..\databases\live_mos.csv'
21 |
22 | args['naive_backbone'] = False
23 | args['backbone'] = 'resnet50'
24 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
25 | args['initial_epoch'] = 0
26 |
27 | args['lr_base'] = 1e-4 / 2
28 | args['lr_schedule'] = True
29 | args['batch_size'] = 4
30 | args['epochs'] = 120
31 |
32 | args['fpn_type'] = 'fpn'
33 | args['attention_module'] = True
34 |
35 | args['image_aug'] = True
36 |
37 | train_main(args)
38 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_NOpretrain.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = None
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_entropy_distribution_fpn_lr\113_0.8534_1.0183.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-6
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_freezebackbone.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 | args['freeze_backbone'] = True
33 |
34 | args['image_aug'] = True
35 |
36 | train_main(args)
37 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq10k.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_normal'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal']
12 | args['val_folders'] = [r'..\databases\val\koniq_normal']
13 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
14 | args['live_mos_file'] = r'..\databases\live_mos.csv'
15 |
16 | args['naive_backbone'] = False
17 | args['backbone'] = 'resnet50'
18 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
19 | args['initial_epoch'] = 0
20 |
21 | args['lr_base'] = 1e-4 / 2
22 | args['lr_schedule'] = True
23 | args['batch_size'] = 4
24 | args['epochs'] = 120
25 |
26 | args['fpn_type'] = 'fpn'
27 | args['attention_module'] = True
28 |
29 | args['image_aug'] = True
30 |
31 | train_main(args)
32 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq10k_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_normal'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal']
12 | args['val_folders'] = [r'..\databases\val\koniq_normal']
13 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
14 | args['live_mos_file'] = r'..\databases\live_mos.csv'
15 |
16 | args['naive_backbone'] = False
17 | args['backbone'] = 'resnet50'
18 | args['model_weights'] = r'..\databases\experiments\koniq_normal\resnet50_distribution_attention_fpn\80_0.8503_0.9293.h5'
19 | args['initial_epoch'] = 0
20 |
21 | args['lr_base'] = 1e-7
22 | args['lr_schedule'] = True
23 | args['batch_size'] = 4
24 | args['epochs'] = 120
25 |
26 | args['fpn_type'] = 'fpn'
27 | args['attention_module'] = True
28 |
29 | args['image_aug'] = True
30 |
31 | train_main(args)
32 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq_small.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_small'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [#r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [#r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 8
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq_small_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_small'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [#r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [#r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\databases\experiments\koniq_small\resnet50_distribution_attention_fpn_finetune\16_0.8542_0.9240.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-5
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 8
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniqall.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | # r'..\databases\train\koniq_small',]
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | # r'..\databases\val\koniq_small',]
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniqall_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_all'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small']
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small']
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\databases\experiments\koniq_all\resnet50_distribution_attention_fpn\82_0.8477_0.9715.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-7
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_test.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments_fixed_shape'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [#r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [#r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\databases\experiments_fixed_shape\resnet50_distribution_attention_fpn\21_0.8670_0.9262.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-7
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 16
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 | args['lr_schedule'] = True
35 |
36 | train_main(args)
37 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_all'
9 | args['n_quality_levels'] = 1
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 1
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',
13 | r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',
16 | r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\databases\experiments\resnet50_mos_attention_fpn\119_0.0005_0.0990.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 2e-8
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniq_small.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_small'
9 | args['n_quality_levels'] = 1
10 |
11 | args['train_folders'] = [#r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [#r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 8
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniq_small_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 0
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_small'
9 | args['n_quality_levels'] = 1
10 |
11 | args['train_folders'] = [#r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [#r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\databases\experiments\koniq_small\resnet50_mos_attention_fpn\44_0.0094_0.0473.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-6
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 8
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniqall.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_all'
9 | args['n_quality_levels'] = 1
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-4 / 2
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniqall_finetune.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments\koniq_all'
9 | args['n_quality_levels'] = 1
10 |
11 | args['train_folders'] = [r'..\databases\train\koniq_normal',
12 | r'..\databases\train\koniq_small',]
13 | # r'..\databases\train\live']
14 | args['val_folders'] = [r'..\databases\val\koniq_normal',
15 | r'..\databases\val\koniq_small',]
16 | # r'..\databases\val\live']
17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
18 | args['live_mos_file'] = r'..\databases\live_mos.csv'
19 |
20 | args['naive_backbone'] = False
21 | args['backbone'] = 'resnet50'
22 | args['model_weights'] = r'..\databases\experiments\koniq_all\resnet50_mos_attention_fpn\120_0.0008_0.0853.h5'
23 | args['initial_epoch'] = 0
24 |
25 | args['lr_base'] = 1e-7
26 | args['lr_schedule'] = True
27 | args['batch_size'] = 4
28 | args['epochs'] = 120
29 |
30 | args['fpn_type'] = 'fpn'
31 | args['attention_module'] = True
32 |
33 | args['image_aug'] = True
34 |
35 | train_main(args)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/bin/train_vgg16_distribution_fpn_attention_imageaug.py:
--------------------------------------------------------------------------------
1 | from phiqnet.train.train import train_main
2 |
3 | if __name__ == '__main__':
4 | args = {}
5 | args['multi_gpu'] = 0
6 | args['gpu'] = 1
7 |
8 | args['result_folder'] = r'..\databases\experiments'
9 | args['n_quality_levels'] = 5
10 |
11 | args['train_folders'] = [
12 | # r'..\databases\train\koniq_normal',
13 | r'..\databases\train\koniq_small',]
14 | # r'..\databases\train\live']
15 | args['val_folders'] = [
16 | # r'..\databases\val\koniq_normal',
17 | r'..\databases\val\koniq_small',]
18 | # r'..\databases\val\live']
19 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv'
20 | args['live_mos_file'] = r'..\databases\live_mos.csv'
21 |
22 | args['naive_backbone'] = False
23 | args['backbone'] = 'vgg16'
24 | args['model_weights'] = r'..\pretrained_weights\vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
25 | args['initial_epoch'] = 0
26 |
27 | args['lr_base'] = 1e-4 / 2
28 | args['lr_schedule'] = True
29 | args['batch_size'] = 4
30 | args['epochs'] = 120
31 |
32 | args['fpn_type'] = 'fpn'
33 | args['attention_module'] = True
34 |
35 | args['image_aug'] = True
36 |
37 | train_main(args)
38 |
--------------------------------------------------------------------------------
/src/phiqnet/databases/README.md:
--------------------------------------------------------------------------------
1 | # Databases for training PHIQNet
2 |
3 | This work uses two publicly available databases: KonIQ-10k [KonIQ-10k: An ecologically valid database for deep learning of blind image quality assessment](https://ieeexplore.ieee.org/document/8968750) by V. Hosu, H. Lin, T. Sziranyi, and D. Saupe;
4 | and LIVE-wild [Massive online crowdsourced study of subjective and objective picture quality](https://ieeexplore.ieee.org/document/7327186) by D. Ghadiyaram, and A.C. Bovik
5 |
6 | The train_images_koniq(live) and test_images_koniq(live) list the images in the training and testing sets, which were randomly chosen from the two databases in terms of SI (image complexity) and MOS.
7 |
8 | It is also no problem to run multiple experiments with randomly split train and test images each time, which can be done by running phiqnet\databases\random_split_imageset.py.
--------------------------------------------------------------------------------
/src/phiqnet/databases/random_split_imageset.py:
--------------------------------------------------------------------------------
1 | import os
2 | from sklearn.model_selection import train_test_split
3 | import shutil
4 | from PIL import Image
5 |
6 |
7 | def do_image_resize(image_files, original_image_folder, target_image_folder):
8 | """
9 | Halves the images in KonIQ-10k database
10 | :param image_files:
11 | :param original_image_folder:
12 | :param target_image_folder:
13 | :return:
14 | """
15 | for image_file in image_files:
16 | image = Image.open(os.path.join(original_image_folder, image_file)).resize((512, 384))
17 | image.save(os.path.join(target_image_folder, image_file))
18 |
19 |
20 | def do_split(original_image_folder, target_image_folder, mos_file, database='live'):
21 | all_files = []
22 |
23 | train_image_folder = os.path.join(target_image_folder, 'train', database)
24 | val_image_folder = os.path.join(target_image_folder, 'val', database)
25 | if not os.path.exists(train_image_folder):
26 | os.makedirs(train_image_folder)
27 | if not os.path.exists(val_image_folder):
28 | os.makedirs(val_image_folder)
29 |
30 | with open(mos_file) as mf:
31 | lines = mf.readlines()
32 | for line in lines:
33 | content = line.split(',')
34 | all_files.append(content[0])
35 |
36 | train_images, val_images = train_test_split(all_files, test_size=0.2, random_state=None)
37 |
38 | if database == 'live' or database == 'koniq_normal':
39 | for train_image in train_images:
40 | shutil.copy(os.path.join(original_image_folder, train_image),
41 | os.path.join(train_image_folder, train_image))
42 | for val_image in val_images:
43 | shutil.copy(os.path.join(original_image_folder, val_image),
44 | os.path.join(val_image_folder, val_image))
45 | else:
46 | do_image_resize(train_images, original_image_folder, train_image_folder)
47 | do_image_resize(val_images, original_image_folder, val_image_folder)
48 |
49 |
50 | def random_split():
51 | # Specify the image folders for KonIQ-10 database and LIVE-wild database, suppose they are stored separately.
52 | koniq_image_folder = r''
53 | live_image_folder = r''
54 |
55 | # Specify the MOS files for KonIQ-10 database and LIVE-wild database, respectively.
56 | # Now the image files will be written to the current database folder, then can be used in model training
57 | live_mos = r'.\live_mos.csv'
58 | live_koniq = r'.\koniq10k_images_scores.csv'
59 |
60 | target_image_folder = r'.\\'
61 |
62 | do_split(live_image_folder, target_image_folder, live_mos)
63 | do_split(koniq_image_folder, target_image_folder, live_koniq, database='koniq_normal')
64 | do_split(koniq_image_folder, target_image_folder, live_koniq, database='koniq_small')
65 |
66 |
67 | if __name__ == '__main__':
68 | random_split()
--------------------------------------------------------------------------------
/src/phiqnet/databases/test_images_live.csv:
--------------------------------------------------------------------------------
1 | 100.bmp
2 | 1001.JPG
3 | 1003.JPG
4 | 1006.JPG
5 | 1008.JPG
6 | 1009.JPG
7 | 101.bmp
8 | 1012.JPG
9 | 1013.JPG
10 | 1014.JPG
11 | 1017.JPG
12 | 1018.JPG
13 | 102.bmp
14 | 1020.JPG
15 | 1022.JPG
16 | 1023.JPG
17 | 1024.JPG
18 | 1025.JPG
19 | 1026.JPG
20 | 1027.JPG
21 | 103.bmp
22 | 1035.JPG
23 | 1036.JPG
24 | 1037.JPG
25 | 1038.JPG
26 | 1039.JPG
27 | 1040.JPG
28 | 1041.JPG
29 | 1042.JPG
30 | 1043.JPG
31 | 1044.JPG
32 | 1046.JPG
33 | 1047.JPG
34 | 105.bmp
35 | 1050.JPG
36 | 1051.JPG
37 | 1052.JPG
38 | 1053.JPG
39 | 1055.JPG
40 | 1058.JPG
41 | 1059.JPG
42 | 1060.JPG
43 | 1061.JPG
44 | 1062.JPG
45 | 1065.JPG
46 | 1066.JPG
47 | 1069.JPG
48 | 1072.JPG
49 | 1075.JPG
50 | 1076.JPG
51 | 1077.JPG
52 | 1078.JPG
53 | 1081.JPG
54 | 1082.JPG
55 | 1085.JPG
56 | 1090.JPG
57 | 1091.JPG
58 | 1092.JPG
59 | 1093.JPG
60 | 1094.JPG
61 | 1095.JPG
62 | 1096.JPG
63 | 1098.JPG
64 | 1099.JPG
65 | 1100.JPG
66 | 1104.JPG
67 | 1105.JPG
68 | 1106.JPG
69 | 1110.JPG
70 | 1111.JPG
71 | 1113.JPG
72 | 1114.JPG
73 | 112.bmp
74 | 1125.JPG
75 | 1126.JPG
76 | 1134.JPG
77 | 1139.JPG
78 | 114.bmp
79 | 1140.JPG
80 | 1141.JPG
81 | 1142.JPG
82 | 1143.JPG
83 | 1144.JPG
84 | 1145.JPG
85 | 1146.JPG
86 | 1147.JPG
87 | 1148.JPG
88 | 1149.JPG
89 | 115.bmp
90 | 1156.JPG
91 | 1157.JPG
92 | 116.bmp
93 | 1161.bmp
94 | 1163.bmp
95 | 117.bmp
96 | 118.bmp
97 | 119.bmp
98 | 12.bmp
99 | 120.bmp
100 | 126.bmp
101 | 127.bmp
102 | 128.bmp
103 | 129.bmp
104 | 13.bmp
105 | 132.bmp
106 | 135.bmp
107 | 136.bmp
108 | 137.bmp
109 | 138.bmp
110 | 139.bmp
111 | 140.bmp
112 | 141.bmp
113 | 142.bmp
114 | 143.bmp
115 | 144.bmp
116 | 148.bmp
117 | 149.bmp
118 | 151.bmp
119 | 152.bmp
120 | 153.bmp
121 | 155.bmp
122 | 156.bmp
123 | 158.bmp
124 | 159.bmp
125 | 161.bmp
126 | 164.bmp
127 | 169.bmp
128 | 17.bmp
129 | 170.bmp
130 | 172.bmp
131 | 174.bmp
132 | 177.bmp
133 | 18.bmp
134 | 182.bmp
135 | 183.bmp
136 | 187.bmp
137 | 189.bmp
138 | 191.bmp
139 | 192.bmp
140 | 194.bmp
141 | 195.bmp
142 | 198.bmp
143 | 199.bmp
144 | 20.bmp
145 | 200.bmp
146 | 202.bmp
147 | 203.bmp
148 | 204.bmp
149 | 205.bmp
150 | 208.bmp
151 | 209.bmp
152 | 21.bmp
153 | 210.bmp
154 | 213.bmp
155 | 214.bmp
156 | 217.bmp
157 | 219.bmp
158 | 220.bmp
159 | 222.bmp
160 | 225.bmp
161 | 231.bmp
162 | 233.bmp
163 | 234.bmp
164 | 236.bmp
165 | 237.bmp
166 | 238.bmp
167 | 241.bmp
168 | 243.bmp
169 | 245.bmp
170 | 246.bmp
171 | 249.bmp
172 | 250.bmp
173 | 253.bmp
174 | 254.bmp
175 | 255.bmp
176 | 257.bmp
177 | 259.bmp
178 | 26.bmp
179 | 261.bmp
180 | 262.bmp
181 | 264.bmp
182 | 265.bmp
183 | 269.bmp
184 | 27.bmp
185 | 270.bmp
186 | 271.bmp
187 | 274.bmp
188 | 276.bmp
189 | 278.bmp
190 | 279.bmp
191 | 28.bmp
192 | 283.bmp
193 | 284.bmp
194 | 286.bmp
195 | 287.bmp
196 | 29.bmp
197 | 290.bmp
198 | 291.bmp
199 | 292.bmp
200 | 293.bmp
201 | 295.bmp
202 | 296.bmp
203 | 3.bmp
204 | 30.bmp
205 | 305.bmp
206 | 306.bmp
207 | 307.bmp
208 | 31.bmp
209 | 310.bmp
210 | 313.bmp
211 | 314.bmp
212 | 315.bmp
213 | 316.bmp
214 | 318.bmp
215 | 319.bmp
216 | 32.bmp
217 | 322.bmp
218 | 325.bmp
219 | 329.bmp
220 | 331.bmp
221 | 332.bmp
222 | 334.bmp
223 | 338.bmp
224 | 343.bmp
225 | 347.bmp
226 | 348.bmp
227 | 349.bmp
228 | 35.bmp
229 | 351.bmp
230 | 352.bmp
231 | 356.bmp
232 | 357.bmp
233 | 358.bmp
234 | 360.bmp
235 | 362.bmp
236 | 363.bmp
237 | 365.bmp
238 | 366.bmp
239 | 368.bmp
240 | 369.bmp
241 | 371.bmp
242 | 373.bmp
243 | 377.bmp
244 | 381.bmp
245 | 382.bmp
246 | 385.bmp
247 | 387.bmp
248 | 389.bmp
249 | 39.bmp
250 | 390.bmp
251 | 392.bmp
252 | 396.bmp
253 | 40.bmp
254 | 400.bmp
255 | 405.bmp
256 | 406.bmp
257 | 407.bmp
258 | 409.bmp
259 | 410.bmp
260 | 414.bmp
261 | 415.bmp
262 | 416.bmp
263 | 417.bmp
264 | 418.bmp
265 | 422.bmp
266 | 423.bmp
267 | 425.bmp
268 | 426.bmp
269 | 428.bmp
270 | 429.bmp
271 | 43.bmp
272 | 430.bmp
273 | 431.bmp
274 | 432.bmp
275 | 434.bmp
276 | 435.bmp
277 | 436.bmp
278 | 437.bmp
279 | 44.bmp
280 | 441.bmp
281 | 443.bmp
282 | 444.bmp
283 | 446.bmp
284 | 447.bmp
285 | 448.bmp
286 | 449.bmp
287 | 45.bmp
288 | 450.bmp
289 | 452.bmp
290 | 454.bmp
291 | 456.bmp
292 | 457.bmp
293 | 458.bmp
294 | 46.bmp
295 | 465.bmp
296 | 466.bmp
297 | 467.bmp
298 | 468.bmp
299 | 469.bmp
300 | 47.bmp
301 | 470.bmp
302 | 471.bmp
303 | 473.bmp
304 | 475.bmp
305 | 478.bmp
306 | 48.bmp
307 | 480.bmp
308 | 481.bmp
309 | 482.bmp
310 | 483.bmp
311 | 484.bmp
312 | 486.bmp
313 | 49.bmp
314 | 493.bmp
315 | 494.bmp
316 | 495.bmp
317 | 497.bmp
318 | 498.bmp
319 | 500.bmp
320 | 503.bmp
321 | 504.bmp
322 | 507.bmp
323 | 508.bmp
324 | 509.bmp
325 | 51.bmp
326 | 510.bmp
327 | 513.bmp
328 | 514.bmp
329 | 517.bmp
330 | 518.bmp
331 | 519.bmp
332 | 52.bmp
333 | 520.bmp
334 | 523.bmp
335 | 524.bmp
336 | 525.bmp
337 | 527.bmp
338 | 530.bmp
339 | 531.bmp
340 | 533.JPG
341 | 536.JPG
342 | 537.JPG
343 | 539.JPG
344 | 54.bmp
345 | 543.JPG
346 | 544.JPG
347 | 545.JPG
348 | 550.JPG
349 | 552.JPG
350 | 555.JPG
351 | 558.JPG
352 | 560.JPG
353 | 564.JPG
354 | 565.JPG
355 | 566.JPG
356 | 567.JPG
357 | 568.JPG
358 | 569.JPG
359 | 571.JPG
360 | 583.JPG
361 | 584.JPG
362 | 588.JPG
363 | 592.JPG
364 | 593.JPG
365 | 596.JPG
366 | 597.JPG
367 | 598.JPG
368 | 6.bmp
369 | 60.bmp
370 | 600.JPG
371 | 601.JPG
372 | 602.JPG
373 | 603.JPG
374 | 605.JPG
375 | 607.JPG
376 | 609.JPG
377 | 61.bmp
378 | 610.JPG
379 | 611.JPG
380 | 612.JPG
381 | 613.JPG
382 | 614.JPG
383 | 617.JPG
384 | 618.JPG
385 | 619.JPG
386 | 620.JPG
387 | 621.JPG
388 | 627.JPG
389 | 629.JPG
390 | 632.JPG
391 | 633.JPG
392 | 638.JPG
393 | 639.JPG
394 | 64.bmp
395 | 640.JPG
396 | 642.JPG
397 | 644.JPG
398 | 645.JPG
399 | 646.JPG
400 | 649.JPG
401 | 65.bmp
402 | 652.JPG
403 | 654.JPG
404 | 657.JPG
405 | 659.JPG
406 | 66.bmp
407 | 665.JPG
408 | 666.JPG
409 | 667.JPG
410 | 670.JPG
411 | 671.JPG
412 | 676.JPG
413 | 680.JPG
414 | 681.JPG
415 | 683.JPG
416 | 686.JPG
417 | 688.JPG
418 | 69.bmp
419 | 693.JPG
420 | 694.JPG
421 | 696.JPG
422 | 697.JPG
423 | 698.JPG
424 | 70.bmp
425 | 701.JPG
426 | 702.JPG
427 | 706.JPG
428 | 707.JPG
429 | 709.JPG
430 | 710.JPG
431 | 711.JPG
432 | 712.JPG
433 | 714.JPG
434 | 716.JPG
435 | 718.JPG
436 | 719.JPG
437 | 720.JPG
438 | 729.JPG
439 | 732.JPG
440 | 733.JPG
441 | 740.JPG
442 | 745.JPG
443 | 746.JPG
444 | 747.JPG
445 | 748.JPG
446 | 750.JPG
447 | 752.JPG
448 | 753.JPG
449 | 757.JPG
450 | 759.JPG
451 | 76.bmp
452 | 761.JPG
453 | 764.JPG
454 | 770.JPG
455 | 771.JPG
456 | 773.JPG
457 | 774.JPG
458 | 775.JPG
459 | 776.JPG
460 | 777.JPG
461 | 778.JPG
462 | 779.JPG
463 | 781.JPG
464 | 783.JPG
465 | 784.JPG
466 | 785.JPG
467 | 786.JPG
468 | 788.JPG
469 | 79.bmp
470 | 791.JPG
471 | 795.JPG
472 | 798.JPG
473 | 8.bmp
474 | 80.bmp
475 | 801.JPG
476 | 804.JPG
477 | 806.JPG
478 | 811.JPG
479 | 814.JPG
480 | 815.JPG
481 | 816.JPG
482 | 82.bmp
483 | 821.JPG
484 | 823.JPG
485 | 828.JPG
486 | 83.bmp
487 | 832.JPG
488 | 833.JPG
489 | 838.JPG
490 | 839.JPG
491 | 840.JPG
492 | 842.JPG
493 | 844.JPG
494 | 845.JPG
495 | 849.JPG
496 | 85.bmp
497 | 850.JPG
498 | 852.JPG
499 | 854.JPG
500 | 855.JPG
501 | 856.JPG
502 | 857.JPG
503 | 858.JPG
504 | 86.bmp
505 | 862.JPG
506 | 865.JPG
507 | 866.JPG
508 | 867.JPG
509 | 868.JPG
510 | 869.JPG
511 | 870.JPG
512 | 871.JPG
513 | 873.JPG
514 | 874.JPG
515 | 875.JPG
516 | 877.JPG
517 | 878.JPG
518 | 879.JPG
519 | 88.bmp
520 | 880.JPG
521 | 881.JPG
522 | 882.JPG
523 | 887.JPG
524 | 889.JPG
525 | 890.JPG
526 | 893.JPG
527 | 894.JPG
528 | 895.JPG
529 | 898.JPG
530 | 9.bmp
531 | 90.bmp
532 | 901.JPG
533 | 902.JPG
534 | 903.JPG
535 | 905.JPG
536 | 906.JPG
537 | 907.JPG
538 | 908.JPG
539 | 909.JPG
540 | 911.JPG
541 | 912.JPG
542 | 915.JPG
543 | 919.JPG
544 | 922.JPG
545 | 923.JPG
546 | 924.JPG
547 | 925.JPG
548 | 926.JPG
549 | 93.bmp
550 | 931.JPG
551 | 932.JPG
552 | 938.JPG
553 | 947.JPG
554 | 949.JPG
555 | 95.bmp
556 | 952.JPG
557 | 953.JPG
558 | 954.JPG
559 | 955.JPG
560 | 960.JPG
561 | 962.JPG
562 | 967.JPG
563 | 968.JPG
564 | 97.bmp
565 | 971.JPG
566 | 972.JPG
567 | 973.JPG
568 | 975.JPG
569 | 976.JPG
570 | 977.JPG
571 | 978.JPG
572 | 98.bmp
573 | 986.JPG
574 | 99.bmp
575 | 990.JPG
576 | 992.JPG
577 | 993.JPG
578 | 994.JPG
579 | 995.JPG
580 | 996.JPG
581 | 998.JPG
582 | 999.JPG
583 | t3.bmp
584 | t5.bmp
585 | t7.bmp
586 |
--------------------------------------------------------------------------------
/src/phiqnet/databases/train_images_live.csv:
--------------------------------------------------------------------------------
1 | 10.bmp
2 | 1000.JPG
3 | 1002.JPG
4 | 1004.JPG
5 | 1005.JPG
6 | 1007.JPG
7 | 1010.JPG
8 | 1011.JPG
9 | 1015.JPG
10 | 1016.JPG
11 | 1019.JPG
12 | 1021.JPG
13 | 1028.JPG
14 | 1029.JPG
15 | 1030.JPG
16 | 1031.JPG
17 | 1032.JPG
18 | 1033.JPG
19 | 1034.JPG
20 | 104.bmp
21 | 1045.JPG
22 | 1048.JPG
23 | 1049.JPG
24 | 1054.JPG
25 | 1056.JPG
26 | 1057.JPG
27 | 106.bmp
28 | 1063.JPG
29 | 1064.JPG
30 | 1067.JPG
31 | 1068.JPG
32 | 107.bmp
33 | 1070.JPG
34 | 1071.JPG
35 | 1073.JPG
36 | 1074.JPG
37 | 1079.JPG
38 | 108.bmp
39 | 1080.JPG
40 | 1083.JPG
41 | 1084.JPG
42 | 1086.JPG
43 | 1087.JPG
44 | 1088.JPG
45 | 1089.JPG
46 | 109.bmp
47 | 1097.JPG
48 | 11.bmp
49 | 110.bmp
50 | 1101.JPG
51 | 1102.JPG
52 | 1103.JPG
53 | 1107.JPG
54 | 1108.JPG
55 | 1109.JPG
56 | 111.bmp
57 | 1112.JPG
58 | 1115.JPG
59 | 1116.JPG
60 | 1117.JPG
61 | 1118.JPG
62 | 1119.JPG
63 | 1120.JPG
64 | 1121.JPG
65 | 1122.JPG
66 | 1123.JPG
67 | 1124.JPG
68 | 1127.JPG
69 | 1128.JPG
70 | 1129.JPG
71 | 113.bmp
72 | 1130.JPG
73 | 1131.JPG
74 | 1132.JPG
75 | 1133.JPG
76 | 1135.JPG
77 | 1136.JPG
78 | 1137.JPG
79 | 1138.JPG
80 | 1150.JPG
81 | 1151.JPG
82 | 1152.JPG
83 | 1153.JPG
84 | 1154.JPG
85 | 1155.JPG
86 | 1158.JPG
87 | 1159.JPG
88 | 1160.bmp
89 | 1162.bmp
90 | 1164.bmp
91 | 121.bmp
92 | 122.bmp
93 | 123.bmp
94 | 124.bmp
95 | 125.bmp
96 | 130.bmp
97 | 131.bmp
98 | 133.bmp
99 | 134.bmp
100 | 14.bmp
101 | 145.bmp
102 | 146.bmp
103 | 147.bmp
104 | 15.bmp
105 | 150.bmp
106 | 154.bmp
107 | 157.bmp
108 | 16.bmp
109 | 160.bmp
110 | 162.bmp
111 | 163.bmp
112 | 165.bmp
113 | 166.bmp
114 | 167.bmp
115 | 168.bmp
116 | 171.bmp
117 | 173.bmp
118 | 175.bmp
119 | 176.bmp
120 | 178.bmp
121 | 179.bmp
122 | 180.bmp
123 | 181.bmp
124 | 184.bmp
125 | 185.bmp
126 | 186.bmp
127 | 188.bmp
128 | 19.bmp
129 | 190.bmp
130 | 193.bmp
131 | 196.bmp
132 | 197.bmp
133 | 201.bmp
134 | 206.bmp
135 | 207.bmp
136 | 211.bmp
137 | 212.bmp
138 | 215.bmp
139 | 216.bmp
140 | 218.bmp
141 | 22.bmp
142 | 221.bmp
143 | 223.bmp
144 | 224.bmp
145 | 226.bmp
146 | 227.bmp
147 | 228.bmp
148 | 229.bmp
149 | 23.bmp
150 | 230.bmp
151 | 232.bmp
152 | 235.bmp
153 | 239.bmp
154 | 24.bmp
155 | 240.bmp
156 | 242.bmp
157 | 244.bmp
158 | 247.bmp
159 | 248.bmp
160 | 25.bmp
161 | 251.bmp
162 | 252.bmp
163 | 256.bmp
164 | 258.bmp
165 | 260.bmp
166 | 263.bmp
167 | 266.bmp
168 | 267.bmp
169 | 268.bmp
170 | 272.bmp
171 | 273.bmp
172 | 275.bmp
173 | 277.bmp
174 | 280.bmp
175 | 281.bmp
176 | 282.bmp
177 | 285.bmp
178 | 288.bmp
179 | 289.bmp
180 | 294.bmp
181 | 297.bmp
182 | 298.bmp
183 | 299.bmp
184 | 300.bmp
185 | 301.bmp
186 | 302.bmp
187 | 303.bmp
188 | 304.bmp
189 | 308.bmp
190 | 309.bmp
191 | 311.bmp
192 | 312.bmp
193 | 317.bmp
194 | 320.bmp
195 | 321.bmp
196 | 323.bmp
197 | 324.bmp
198 | 326.bmp
199 | 327.bmp
200 | 328.bmp
201 | 33.bmp
202 | 330.bmp
203 | 333.bmp
204 | 335.bmp
205 | 336.bmp
206 | 337.bmp
207 | 339.bmp
208 | 34.bmp
209 | 340.bmp
210 | 341.bmp
211 | 342.bmp
212 | 344.bmp
213 | 345.bmp
214 | 346.bmp
215 | 350.bmp
216 | 353.bmp
217 | 354.bmp
218 | 355.bmp
219 | 359.bmp
220 | 36.bmp
221 | 361.bmp
222 | 364.bmp
223 | 367.bmp
224 | 37.bmp
225 | 370.bmp
226 | 372.bmp
227 | 374.bmp
228 | 375.bmp
229 | 376.bmp
230 | 378.bmp
231 | 379.bmp
232 | 38.bmp
233 | 380.bmp
234 | 383.bmp
235 | 384.bmp
236 | 386.bmp
237 | 388.bmp
238 | 391.bmp
239 | 393.bmp
240 | 394.bmp
241 | 395.bmp
242 | 397.bmp
243 | 398.bmp
244 | 399.bmp
245 | 4.bmp
246 | 401.bmp
247 | 402.bmp
248 | 403.bmp
249 | 404.bmp
250 | 408.bmp
251 | 41.bmp
252 | 411.bmp
253 | 412.bmp
254 | 413.bmp
255 | 419.bmp
256 | 42.bmp
257 | 420.bmp
258 | 421.bmp
259 | 424.bmp
260 | 427.bmp
261 | 433.bmp
262 | 438.bmp
263 | 439.bmp
264 | 440.bmp
265 | 442.bmp
266 | 445.bmp
267 | 451.bmp
268 | 453.bmp
269 | 455.bmp
270 | 459.bmp
271 | 460.bmp
272 | 461.bmp
273 | 462.bmp
274 | 463.bmp
275 | 464.bmp
276 | 472.bmp
277 | 474.bmp
278 | 476.bmp
279 | 477.bmp
280 | 479.bmp
281 | 485.bmp
282 | 487.bmp
283 | 488.bmp
284 | 489.bmp
285 | 490.bmp
286 | 491.bmp
287 | 492.bmp
288 | 496.bmp
289 | 499.bmp
290 | 5.bmp
291 | 50.bmp
292 | 501.bmp
293 | 502.bmp
294 | 505.bmp
295 | 506.bmp
296 | 511.bmp
297 | 512.bmp
298 | 515.bmp
299 | 516.bmp
300 | 521.bmp
301 | 522.bmp
302 | 526.bmp
303 | 528.bmp
304 | 529.bmp
305 | 53.bmp
306 | 532.bmp
307 | 534.JPG
308 | 535.JPG
309 | 538.JPG
310 | 540.JPG
311 | 541.JPG
312 | 542.JPG
313 | 546.JPG
314 | 547.JPG
315 | 548.JPG
316 | 549.JPG
317 | 55.bmp
318 | 551.JPG
319 | 553.JPG
320 | 554.JPG
321 | 556.JPG
322 | 557.JPG
323 | 559.JPG
324 | 56.bmp
325 | 561.JPG
326 | 562.JPG
327 | 563.JPG
328 | 57.bmp
329 | 570.JPG
330 | 572.JPG
331 | 573.JPG
332 | 574.JPG
333 | 575.JPG
334 | 576.JPG
335 | 577.JPG
336 | 578.JPG
337 | 579.JPG
338 | 58.bmp
339 | 580.JPG
340 | 581.JPG
341 | 582.JPG
342 | 585.JPG
343 | 586.JPG
344 | 587.JPG
345 | 589.JPG
346 | 59.bmp
347 | 590.JPG
348 | 591.JPG
349 | 594.JPG
350 | 595.JPG
351 | 599.JPG
352 | 604.JPG
353 | 606.JPG
354 | 608.JPG
355 | 615.JPG
356 | 616.JPG
357 | 62.bmp
358 | 622.JPG
359 | 623.JPG
360 | 624.JPG
361 | 625.JPG
362 | 626.JPG
363 | 628.JPG
364 | 63.bmp
365 | 630.JPG
366 | 631.JPG
367 | 634.JPG
368 | 635.JPG
369 | 636.JPG
370 | 637.JPG
371 | 641.JPG
372 | 643.JPG
373 | 647.JPG
374 | 648.JPG
375 | 650.JPG
376 | 651.JPG
377 | 653.JPG
378 | 655.JPG
379 | 656.JPG
380 | 658.JPG
381 | 660.JPG
382 | 661.JPG
383 | 662.JPG
384 | 663.JPG
385 | 664.JPG
386 | 668.JPG
387 | 669.JPG
388 | 67.bmp
389 | 672.JPG
390 | 673.JPG
391 | 674.JPG
392 | 675.JPG
393 | 677.JPG
394 | 678.JPG
395 | 679.JPG
396 | 68.bmp
397 | 682.JPG
398 | 684.JPG
399 | 685.JPG
400 | 687.JPG
401 | 689.JPG
402 | 690.JPG
403 | 691.JPG
404 | 692.JPG
405 | 695.JPG
406 | 699.JPG
407 | 7.bmp
408 | 700.JPG
409 | 703.JPG
410 | 704.JPG
411 | 705.JPG
412 | 708.JPG
413 | 71.bmp
414 | 713.JPG
415 | 715.JPG
416 | 717.JPG
417 | 72.bmp
418 | 721.JPG
419 | 722.JPG
420 | 723.JPG
421 | 724.JPG
422 | 725.JPG
423 | 726.JPG
424 | 727.JPG
425 | 728.JPG
426 | 73.bmp
427 | 730.JPG
428 | 731.JPG
429 | 734.JPG
430 | 735.JPG
431 | 736.JPG
432 | 737.JPG
433 | 738.JPG
434 | 739.JPG
435 | 74.bmp
436 | 741.JPG
437 | 742.JPG
438 | 743.JPG
439 | 744.JPG
440 | 749.JPG
441 | 75.bmp
442 | 751.JPG
443 | 754.JPG
444 | 755.JPG
445 | 756.JPG
446 | 758.JPG
447 | 760.JPG
448 | 762.JPG
449 | 763.JPG
450 | 765.JPG
451 | 766.JPG
452 | 767.JPG
453 | 768.JPG
454 | 769.JPG
455 | 77.bmp
456 | 772.JPG
457 | 78.bmp
458 | 780.JPG
459 | 782.JPG
460 | 787.JPG
461 | 789.JPG
462 | 790.JPG
463 | 792.JPG
464 | 793.JPG
465 | 794.JPG
466 | 796.JPG
467 | 797.JPG
468 | 799.JPG
469 | 800.JPG
470 | 802.JPG
471 | 803.JPG
472 | 805.JPG
473 | 807.JPG
474 | 808.JPG
475 | 809.JPG
476 | 81.bmp
477 | 810.JPG
478 | 812.JPG
479 | 813.JPG
480 | 817.JPG
481 | 818.JPG
482 | 819.JPG
483 | 820.JPG
484 | 822.JPG
485 | 824.JPG
486 | 825.JPG
487 | 826.JPG
488 | 827.JPG
489 | 829.JPG
490 | 830.JPG
491 | 831.JPG
492 | 834.JPG
493 | 835.JPG
494 | 836.JPG
495 | 837.JPG
496 | 84.bmp
497 | 841.JPG
498 | 843.JPG
499 | 846.JPG
500 | 847.JPG
501 | 848.JPG
502 | 851.JPG
503 | 853.JPG
504 | 859.JPG
505 | 860.JPG
506 | 861.JPG
507 | 863.JPG
508 | 864.JPG
509 | 87.bmp
510 | 872.JPG
511 | 876.JPG
512 | 883.JPG
513 | 884.JPG
514 | 885.JPG
515 | 886.JPG
516 | 888.JPG
517 | 89.bmp
518 | 891.JPG
519 | 892.JPG
520 | 896.JPG
521 | 897.JPG
522 | 899.JPG
523 | 900.JPG
524 | 904.JPG
525 | 91.bmp
526 | 910.JPG
527 | 913.JPG
528 | 914.JPG
529 | 916.JPG
530 | 917.JPG
531 | 918.JPG
532 | 92.bmp
533 | 920.JPG
534 | 921.JPG
535 | 927.JPG
536 | 928.JPG
537 | 929.JPG
538 | 930.JPG
539 | 933.JPG
540 | 934.JPG
541 | 935.JPG
542 | 936.JPG
543 | 937.JPG
544 | 939.JPG
545 | 94.bmp
546 | 940.JPG
547 | 941.JPG
548 | 942.JPG
549 | 943.JPG
550 | 944.JPG
551 | 945.JPG
552 | 946.JPG
553 | 948.JPG
554 | 950.JPG
555 | 951.JPG
556 | 956.JPG
557 | 957.JPG
558 | 958.JPG
559 | 959.JPG
560 | 96.bmp
561 | 961.JPG
562 | 963.JPG
563 | 964.JPG
564 | 965.JPG
565 | 966.JPG
566 | 969.JPG
567 | 970.JPG
568 | 974.JPG
569 | 979.JPG
570 | 980.JPG
571 | 981.JPG
572 | 982.JPG
573 | 983.JPG
574 | 984.JPG
575 | 985.JPG
576 | 987.JPG
577 | 988.JPG
578 | 989.JPG
579 | 991.JPG
580 | 997.JPG
581 | t1.bmp
582 | t2.bmp
583 | t4.bmp
584 | t6.bmp
585 |
--------------------------------------------------------------------------------
/src/phiqnet/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/layers/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/layers/fpn.py:
--------------------------------------------------------------------------------
1 | """
2 | Reference: Feature pyramid networks for object detection, CVPR'17.
3 | """
4 | from tensorflow.keras.layers import Conv2D, Add
5 | from phiqnet.layers.upsample import Upsample
6 |
7 |
8 | def build_fpn(C2, C3, C4, C5, feature_size=256, name='fpn_'):
9 | """
10 | Create the FPN layers on top of the backbone features
11 | :param C2: Feature stage C2 from the backbone
12 | :param C3: Feature stage C3 from the backbone
13 | :param C4: Feature stage C4 from the backbone
14 | :param C5: Feature stage C5 from the backbone
15 | :param feature_size: feature size to use for the resulting feature levels, set as the lowest channel dimension in the feature maps, i.e., 256
16 | :param name: name for naming the layer
17 | :return: pyramidical feature maps [P2, P3, P4, P5, P6]
18 | """
19 |
20 | # upsample C5 to get P5 from the FPN paper
21 | P5 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C5_reduced')(C5)
22 | P5_upsampled = Upsample(name=name + 'P5_upsampled')([P5, C4])
23 | P5 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P5')(P5)
24 |
25 | # add P5 elementwise to C4
26 | P4 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C4_reduced')(C4)
27 | P4 = Add(name=name + 'P4_merged')([P5_upsampled, P4])
28 | P4_upsampled = Upsample(name=name + 'P4_upsampled')([P4, C3])
29 | P4 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P4')(P4)
30 |
31 | # add P4 elementwise to C3
32 | P3 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C3_reduced')(C3)
33 | P3 = Add(name=name + 'P3_merged')([P4_upsampled, P3])
34 | P3_upsampled = Upsample(name=name + 'P3_upsampled')([P3, C2])
35 | P3 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P3')(P3)
36 |
37 | P2 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C2_reduced')(C2)
38 | P2 = Add(name=name + 'P2_merged')([P3_upsampled, P2])
39 | P2 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P2')(P2)
40 |
41 | # "P6 is obtained via a 3x3 stride-2 conv on C5"
42 | P6 = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name=name + 'P6')(C5)
43 |
44 | return [P2, P3, P4, P5, P6]
45 |
46 |
47 | def build_non_fpn(C2, C3, C4, C5, feature_size=256):
48 | """
49 | If no FPS is used, then use a bottle-neck layer to change the channel dimension to 256
50 | :param C2: Feature stage C2 from the backbone
51 | :param C3: Feature stage C3 from the backbone
52 | :param C4: Feature stage C4 from the backbone
53 | :param C5: Feature stage C5 from the backbone
54 | :param feature_size: feature size to use for the resulting feature levels, set as the lowest channel dimension in the feature maps, i.e., 256
55 | :return: pyramidical feature maps [P2, P3, P4, P5, P6]
56 | """
57 | P2 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P2_bottleneck')(C2)
58 | P3 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P3_bottleneck')(C3)
59 | P4 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P4_bottleneck')(C4)
60 | P5 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P5_bottleneck')(C5)
61 | P6 = Conv2D(feature_size, kernel_size=1, strides=2, padding='same', name='P6_bottleneck')(C5)
62 | return [P2, P3, P4, P5, P6]
63 |
64 |
--------------------------------------------------------------------------------
/src/phiqnet/layers/pan.py:
--------------------------------------------------------------------------------
1 | """
2 | Reference: Path Aggregation Network for Instance Segmentation., CVPR'18.
3 | """
4 | from tensorflow.keras.layers import Conv2D, Add
5 | from phiqnet.layers.upsample import Upsample
6 |
7 |
8 | def build_pan(C2, C3, C4, C5, feature_size=256, name='pan_', conv_on_P=False):
9 | """
10 | Create the PAN layers on top of the backbone features
11 | :param C2: Feature stage C2 from the backbone
12 | :param C3: Feature stage C3 from the backbone
13 | :param C4: Feature stage C4 from the backbone
14 | :param C5: Feature stage C5 from the backbone
15 | :param feature_size: feature size to use for the resulting feature levels, set as the lowest channel dimension in the feature maps, i.e., 256
16 | :param name: name for naming the layer
17 | :param conv_on_P: flag to use or not another conv-layer on feature maps
18 | :return: pyramidical feature maps [N2, N3, N4, N5]
19 | """
20 | P5 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C5_reduced')(C5)
21 | P5_upsampled = Upsample(name=name + 'P5_upsampled')([P5, C4])
22 |
23 | P4 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C4_reduced')(C4)
24 | P4 = Add(name=name + 'P4_merged')([P5_upsampled, P4])
25 | P4_upsampled = Upsample(name=name + 'P4_upsampled')([P4, C3])
26 |
27 | P3 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C3_reduced')(C3)
28 | P3 = Add(name=name + 'P3_merged')([P4_upsampled, P3])
29 | P3_upsampled = Upsample(name=name + 'P3_upsampled')([P3, C2])
30 |
31 | P2 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C2_reduced')(C2)
32 | P2 = Add(name=name + 'P2_merged')([P3_upsampled, P2])
33 |
34 | if conv_on_P:
35 | P5 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P5')(P5)
36 | P4 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P4')(P4)
37 | P3 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P3')(P3)
38 | P2 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P2')(P2)
39 |
40 | N2 = P2
41 |
42 | N2_reduced = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='N2_reduced')(N2)
43 | N3 = Add(name=name + 'N3_merged')([N2_reduced, P3])
44 | N3 = Conv2D(feature_size, kernel_size=3, strides=1, activation='relu', padding='same', name=name + 'N3')(N3)
45 |
46 | N3_reduced = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='N3_reduced')(N3)
47 | N4 = Add(name=name + 'N4_merged')([N3_reduced, P4])
48 | N4 = Conv2D(feature_size, kernel_size=3, strides=1, activation='relu', padding='same', name=name + 'N4')(N4)
49 |
50 | N4_reduced = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='N4_reduced')(N4)
51 | N5 = Add(name=name + 'N5_merged')([N4_reduced, P5])
52 | N5 = Conv2D(feature_size, kernel_size=3, strides=1, activation='relu', padding='same', name=name + 'N5')(N5)
53 |
54 | return [N2, N3, N4, N5]
--------------------------------------------------------------------------------
/src/phiqnet/layers/upsample.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.keras.layers import Layer
3 |
4 |
5 | class Upsample(Layer):
6 | """ Keras layer for upsampling a Tensor to be the same shape as another Tensor.
7 | """
8 |
9 | def call(self, inputs, **kwargs):
10 | """Upsamples a tensor
11 | :param inputs: List of [source, target] tensors
12 | :param kwargs:
13 | :return: Upsampled tensor
14 | """
15 | source, target = inputs
16 | target_shape = tf.keras.backend.shape(target)
17 | if tf.keras.backend.image_data_format() == 'channels_first':
18 | source = tf.transpose(source, (0, 2, 3, 1))
19 | output = tf.image.resize(source, (target_shape[2], target_shape[3]),
20 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
21 | output = tf.transpose(output, (0, 3, 1, 2))
22 | return output
23 | else:
24 | return tf.image.resize(source, (target_shape[1], target_shape[2]),
25 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
26 |
27 | def compute_output_shape(self, input_shape):
28 | """
29 | Compute the output shapes given the input shapes
30 | :param input_shape: List of input shapes [boxes, classification, other[0], other[1], ...]
31 | :return: Tuple representing the output shapes
32 | """
33 | if tf.keras.backend.image_data_format() == 'channels_first':
34 | return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
35 | else:
36 | return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
37 |
--------------------------------------------------------------------------------
/src/phiqnet/loss/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/loss/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/loss/distribution_loss.py:
--------------------------------------------------------------------------------
1 | """
2 | Two loss functions that might be used in PHIQNet.
3 | """
4 | from tensorflow.keras import backend as K
5 | import numpy as np
6 | from tensorflow.keras.losses import categorical_crossentropy
7 |
8 |
9 | def distribution_loss(y_true, y_pred):
10 | """
11 | Loss on quality score distributions
12 | :param y_true: y_true
13 | :param y_pred: y_pred
14 | :return: loss
15 | """
16 | mos_scales = np.array([1, 2, 3, 4, 5])
17 | return K.mean(K.square((y_pred - y_true) * mos_scales)) # MSE
18 |
19 |
20 | def ordinal_loss(y_true, y_pred):
21 | """
22 | A simple ordinal loss based on quality score distributions
23 | :param y_true: y_true
24 | :param y_pred: y_pred
25 | :return: loss
26 | """
27 | weights = K.cast(K.abs(K.argmax(y_true, axis=1) - K.argmax(y_pred, axis=1))/(K.int_shape(y_pred)[1] - 1), dtype='float32')
28 | # return (1.0 + model_weights) * sigmoid_focal_crossentropy(y_true, y_pred)
29 | return (1.0 + weights) * categorical_crossentropy(y_true, y_pred)
--------------------------------------------------------------------------------
/src/phiqnet/model_evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/model_evaluation/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/model_evaluation/evaluation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 | import scipy.stats
4 |
5 |
6 | class ModelEvaluation:
7 | """
8 | Evaluation the model, this script is actually copied from the evaluation callback.
9 | """
10 | def __init__(self, model, image_files, scores, using_single_mos, imagenet_pretrain=False):
11 | self.model = model
12 | self.image_files = image_files
13 | self.scores = scores
14 | self.using_single_mos = using_single_mos
15 | self.imagenet_pretrain = imagenet_pretrain
16 | self.mos_scales = np.array([1, 2, 3, 4, 5])
17 |
18 | def __get_prediction_mos(self, image):
19 | prediction = self.model.predict(np.expand_dims(image, axis=0))
20 | return prediction[0][0]
21 |
22 | def __get_prediction_distribution(self, image):
23 | # debug_model = Model(inputs=self.model.inputs, outputs=self.model.get_layer('fpn_concatenate').output)
24 | # debug_results = debug_model.predict(np.expand_dims(image, axis=0))
25 |
26 | prediction = self.model.predict(np.expand_dims(image, axis=0))
27 | prediction = np.sum(np.multiply(self.mos_scales, prediction[0]))
28 | return prediction
29 |
30 | def __evaluation__(self, result_file=None):
31 | predictions = []
32 | mos_scores = []
33 | if result_file is not None:
34 | rf = open(result_file, 'w+')
35 |
36 | for image_file, score in zip(self.image_files, self.scores):
37 | image = Image.open(image_file)
38 | image = np.asarray(image, dtype=np.float32)
39 | if self.imagenet_pretrain: # image normalization using TF approach
40 | image /= 127.5
41 | image -= 1.
42 | else: # Image normalization by subtracting mean and dividing std
43 | image[:, :, 0] -= 117.27205081970828
44 | image[:, :, 1] -= 106.23294835284031
45 | image[:, :, 2] -= 94.40750328714887
46 | image[:, :, 0] /= 59.112836751661085
47 | image[:, :, 1] /= 55.65498543815568
48 | image[:, :, 2] /= 54.9486100975773
49 |
50 | if self.using_single_mos:
51 | prediction = self.__get_prediction_mos(image)
52 | else:
53 | score = np.sum(np.multiply(self.mos_scales, score))
54 | prediction = self.__get_prediction_distribution(image)
55 |
56 | mos_scores.append(score)
57 |
58 | predictions.append(prediction)
59 | print('Real score: {}, predicted: {}'.format(score, prediction))
60 |
61 | if result_file is not None:
62 | rf.write('{},{},{}\n'.format(image_file, score, prediction))
63 |
64 | PLCC = scipy.stats.pearsonr(mos_scores, predictions)[0]
65 | SRCC = scipy.stats.spearmanr(mos_scores, predictions)[0]
66 | RMSE = np.sqrt(np.mean(np.subtract(predictions, mos_scores) ** 2))
67 | MAD = np.mean(np.abs(np.subtract(predictions, mos_scores)))
68 | print('\nPLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(PLCC, SRCC, RMSE, MAD))
69 |
70 | if result_file is not None:
71 | rf.close()
72 | return PLCC, SRCC, RMSE
73 |
--------------------------------------------------------------------------------
/src/phiqnet/model_evaluation/validation.py:
--------------------------------------------------------------------------------
1 | """
2 | This script is to evaluate (calculate the evaluation criteria PLCC, SROCC, RMSE) on individual testing sets.
3 | PHIQNet should be first generated, and then the model_weights file is loaded.
4 | """
5 | import tensorflow as tf
6 | from phiqnet.models.image_quality_model import phiq_net
7 | from phiqnet.utils.imageset_handler import get_image_scores, get_image_score_from_groups
8 | from phiqnet.model_evaluation.evaluation import ModelEvaluation
9 |
10 |
11 | def val_main(args):
12 | if args['n_quality_levels'] > 1:
13 | using_single_mos = False
14 | else:
15 | using_single_mos = True
16 |
17 | if args['model_weights'] is not None and ('resnet' in args['backbone'] or args['backbone'] == 'inception'):
18 | imagenet_pretrain = True
19 | else:
20 | imagenet_pretrain = False
21 |
22 | val_folders = [
23 | # r'..\databases\val\koniq_normal',]
24 | r'..\databases\val\koniq_small',]
25 | # r'..\databases\train\live',
26 | # r'..\databases\val\live']
27 |
28 | koniq_mos_file = r'..\databases\koniq10k_images_scores.csv'
29 | live_mos_file = r'..\databases\live_mos.csv'
30 |
31 | image_scores = get_image_scores(koniq_mos_file, live_mos_file, using_single_mos=using_single_mos)
32 | test_image_file_groups, test_score_groups = get_image_score_from_groups(val_folders, image_scores)
33 |
34 | test_image_files = []
35 | test_scores = []
36 | for test_image_file_group, test_score_group in zip(test_image_file_groups, test_score_groups):
37 | test_image_files.extend(test_image_file_group)
38 | test_scores.extend(test_score_group)
39 |
40 | model = phiq_net(n_quality_levels=args['n_quality_levels'],
41 | naive_backbone=args['naive_backbone'],
42 | backbone=args['backbone'],
43 | fpn_type=args['fpn_type'])
44 | model.load_weights(args['model_weights'])
45 |
46 | # model1 = phiq_net(n_quality_levels=args['n_quality_levels'],
47 | # naive_backbone=args['naive_backbone'],
48 | # backbone=args['backbone'],
49 | # fpn_type=args['fpn_type'])
50 | # model1.load_weights(r'..\\model_weights\PHIQNet.h5', by_name=True)
51 | # model.load_weights(args['model_weights'])
52 | # for i in range(250):
53 | # extracted_weights = model1.layers[i].get_weights()
54 | # model.layers[i].set_weights(extracted_weights)
55 |
56 | evaluation = ModelEvaluation(model, test_image_files, test_scores, using_single_mos,
57 | imagenet_pretrain=imagenet_pretrain)
58 | plcc, srcc, rmse = evaluation.__evaluation__()
59 |
60 |
61 | if __name__ == '__main__':
62 | gpus = tf.config.experimental.list_physical_devices('GPU')
63 | tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
64 |
65 | args = {}
66 | args['n_quality_levels'] = 5
67 | args['naive_backbone'] = False
68 | args['backbone'] = 'resnet50'
69 | args['fpn_type'] = 'fpn'
70 | # args['model_weights'] = r'..\databases\results\resnet50_mos_attention_fpn\38_0.0008_0.0208_0.0998_0.2286.h5'
71 | # args['model_weights'] = r'..\databases\results\resnet50_mos_attention_fpn_lr\96_0.0040_0.0488_0.1023_0.2326.h5'
72 | # args['model_weights'] = r'..\databases\results\resnet50_mos_attention_bifpn_lr\65_0.0080_0.0699_0.0621_0.1871.h5'
73 | # args['model_weights'] = r'..\databases\results\resnet50_distribution_attention_fpn_lr\61_0.8988_0.1192_1.0691_0.2386.h5'
74 | # args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_distribution_attention_bifpn_lr\107_0.0269_0.8673_0.1975_1.0134.h5'
75 | # args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_distribution_attention_fpn_lr_avg\117_0.0183_0.8621_0.2032_1.0449.h5'
76 | # args['model_weights'] = r'..\databases\results_distribution_loss\\resnet50_distribution_fpn_lr_avg\118_0.0255_0.8632_0.2084_1.0571.h5'
77 | # args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_distribution_fpn_lr_avg_cbam_finetune\32_0.0792_0.8892_0.2181_1.0748.h5'
78 | # args['model_weights'] = r'..\databases\experiments\resnet50_distribution_attention_fpn_finetune\117_0.8532_1.0189.h5'
79 | # args['model_weights'] = r'..\databases\experiments\resnet50_mos_attention_fpn\74_0.0027_0.1180.h5'
80 | # args['model_weights'] = r'..\databases\experiments\resnet50_distribution_attention_fpn_no_imageaug\91_0.8545_1.0103.h5'
81 | # args['model_weights'] = r'..\databases\experiments\resnet50_mos_attention_fpn_finetune\45_0.0003_0.0950.h5'
82 | # args['model_weights'] = r'..\databases\experiments\koniq_normal\resnet50_mos_attention_fpn\37_0.0102_0.0499.h5'
83 | args['model_weights'] = r'..\databases\experiments\koniq_normal\resnet50_distribution_attention_fpn_finetune\09_0.8493_0.9294.h5'
84 |
85 | val_main(args)
--------------------------------------------------------------------------------
/src/phiqnet/model_evaluation/validation_spag.py:
--------------------------------------------------------------------------------
1 | from phiqnet.models.image_quality_model import phiq_net
2 | from phiqnet.utils.imageset_handler import get_image_score_from_groups
3 | from phiqnet.model_evaluation.evaluation import ModelEvaluation
4 |
5 |
6 | def get_image_scores(mos_file):
7 | image_files = {}
8 | with open(mos_file, 'r+') as f:
9 | lines = f.readlines()
10 | for line in lines:
11 | content = line.split(',')
12 | image_file = content[0]
13 | score = float(content[1]) / 25. + 1
14 | image_files[image_file] = score
15 |
16 | return image_files
17 |
18 |
19 | def val_main(args):
20 | if args['n_quality_levels'] > 1:
21 | using_single_mos = False
22 | else:
23 | using_single_mos = True
24 |
25 | if args['model_weights'] is not None and ('resnet' in args['backbone'] or args['backbone'] == 'inception'):
26 | imagenet_pretrain = True
27 | else:
28 | imagenet_pretrain = False
29 |
30 | val_folders = [r'F:\SPAG_image_quality_dataset\TestImage']
31 | spag_mos_file = r'..\databases\spag\image_mos.csv'
32 | image_scores = get_image_scores(spag_mos_file)
33 | test_image_file_groups, test_score_groups = get_image_score_from_groups(val_folders, image_scores)
34 |
35 | test_image_files = []
36 | test_scores = []
37 | for test_image_file_group, test_score_group in zip(test_image_file_groups, test_score_groups):
38 | test_image_files.extend(test_image_file_group)
39 | test_scores.extend(test_score_group)
40 |
41 | model = phiq_net(n_quality_levels=args['n_quality_levels'],
42 | naive_backbone=args['naive_backbone'],
43 | backbone=args['backbone'],
44 | fpn_type=args['fpn_type'])
45 | model.load_weights(args['model_weights'])
46 |
47 | evaluation = ModelEvaluation(model, test_image_files, test_scores, using_single_mos,
48 | imagenet_pretrain=imagenet_pretrain)
49 | result_file = r'..\databases\spag\result.csv'
50 | plcc, srcc, rmse = evaluation.__evaluation__(result_file)
51 |
52 |
53 | if __name__ == '__main__':
54 | # gpus = tf.config.experimental.list_physical_devices('GPU')
55 | # tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
56 |
57 | args = {}
58 | # args['result_folder'] = r'..\databases\results'
59 | args['n_quality_levels'] = 5
60 | args['naive_backbone'] = False
61 | args['backbone'] = 'resnet50'
62 | args['fpn_type'] = 'fpn'
63 | args['model_weights'] = r'..\\model_weights\PHIQNet.h5'
64 |
65 | val_main(args)
--------------------------------------------------------------------------------
/src/phiqnet/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/models/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/models/image_quality_model.py:
--------------------------------------------------------------------------------
1 | """
2 | Main function to build PHIQNet.
3 | """
4 | from phiqnet.layers.fpn import build_fpn, build_non_fpn
5 | from phiqnet.layers.bi_fpn import build_bifpn
6 | from phiqnet.layers.pan import build_pan
7 | from phiqnet.backbone.ResNest import ResNest
8 | from tensorflow.keras.layers import Input, Dense, Average, GlobalAveragePooling2D
9 | from tensorflow.keras.models import Model
10 | from phiqnet.models.prediction_model_contrast_sensitivity import channel_spatial_attention
11 | from phiqnet.backbone.resnet50 import ResNet50
12 | from phiqnet.backbone.resnet_family import ResNet18
13 | from phiqnet.backbone.resnet_feature_maps import ResNet152v2, ResNet152
14 | from phiqnet.backbone.vgg16 import VGG16
15 |
16 |
17 | def phiq_net(n_quality_levels, input_shape=(None, None, 3), naive_backbone=False, backbone='resnet50', fpn_type='fpn',
18 | attention_module=True):
19 | """
20 | Build PHIQNet
21 | :param n_quality_levels: 1 for MOS prediction and 5 for score distribution
22 | :param input_shape: image input shape, keep as unspecifized
23 | :param naive_backbone: flag to use backbone only, i.e., without neck and head, if set to True
24 | :param backbone: backbone networks (resnet50/18/152v2, resnest, vgg16, etc.)
25 | :param fpn_type: chosen from 'fpn', 'bi-fpn' or 'pan'
26 | :param attention_module: flag to use or not attention module
27 | :return: PHIQNet model
28 | """
29 | inputs = Input(shape=input_shape)
30 | n_classes = None
31 | return_feature_maps = True
32 | if naive_backbone:
33 | n_classes = 1
34 | return_feature_maps = False
35 | fc_activation = None
36 | verbose = False
37 | if backbone == 'resnest50':
38 | backbone_model = ResNest(verbose=verbose,
39 | n_classes=n_classes, dropout_rate=0, fc_activation=fc_activation,
40 | blocks_set=[3, 4, 6, 3], radix=2, groups=1, bottleneck_width=64, deep_stem=True,
41 | stem_width=32, avg_down=True, avd=True, avd_first=False,
42 | return_feature_maps=return_feature_maps).build(inputs)
43 | elif backbone == 'resnest34':
44 | backbone_model = ResNest(verbose=verbose,
45 | n_classes=n_classes, dropout_rate=0, fc_activation=fc_activation,
46 | blocks_set=[3, 4, 6, 3], radix=2, groups=1, bottleneck_width=64, deep_stem=True,
47 | stem_width=16, avg_down=True, avd=True, avd_first=False, using_basic_block=True,
48 | return_feature_maps=return_feature_maps).build(inputs)
49 | elif backbone == 'resnest18':
50 | backbone_model = ResNest(verbose=verbose,
51 | n_classes=n_classes, dropout_rate=0, fc_activation=fc_activation,
52 | blocks_set=[2, 2, 2, 2], radix=2, groups=1, bottleneck_width=64, deep_stem=True,
53 | stem_width=16, avg_down=True, avd=True, avd_first=False, using_basic_block=True,
54 | return_feature_maps=return_feature_maps).build(inputs)
55 | elif backbone == 'resnet50':
56 | backbone_model = ResNet50(inputs,
57 | return_feature_maps=return_feature_maps)
58 | elif backbone == 'resnet18':
59 | backbone_model = ResNet18(input_tensor=inputs,
60 | weights=None,
61 | include_top=False)
62 | elif backbone == 'resnet152v2':
63 | backbone_model = ResNet152v2(inputs)
64 | elif backbone == 'resnet152':
65 | backbone_model = ResNet152(inputs)
66 | elif backbone == 'vgg16':
67 | backbone_model = VGG16(inputs)
68 | else:
69 | raise NotImplementedError
70 |
71 | if naive_backbone:
72 | backbone_model.summary()
73 | return backbone_model
74 |
75 | C2, C3, C4, C5 = backbone_model.outputs
76 | pyramid_feature_size = 256
77 | if fpn_type == 'fpn':
78 | fpn_features = build_fpn(C2, C3, C4, C5, feature_size=pyramid_feature_size)
79 | elif fpn_type == 'pan':
80 | fpn_features = build_pan(C2, C3, C4, C5, feature_size=pyramid_feature_size)
81 | elif fpn_type == 'bifpn':
82 | for i in range(3):
83 | if i == 0:
84 | fpn_features = [C3, C4, C5]
85 | fpn_features = build_bifpn(fpn_features, pyramid_feature_size, i)
86 | else:
87 | fpn_features = build_non_fpn(C2, C3, C4, C5, feature_size=pyramid_feature_size)
88 |
89 | PF = []
90 | for i, P in enumerate(fpn_features):
91 | if attention_module:
92 | PF.append(channel_spatial_attention(P, n_quality_levels, 'P{}'.format(i)))
93 | else:
94 | outputs = GlobalAveragePooling2D(name='avg_pool_{}'.format(i))(P)
95 | outputs = Dense(n_quality_levels, activation='softmax', name='fc_prediction_{}'.format(i))(outputs)
96 | PF.append(outputs)
97 | outputs = Average(name='PF_average')(PF)
98 |
99 | # pyramids = Concatenate(axis=1)(PF)
100 | # outputs = Dense(1, activation='linear', name='final_fc', use_bias=True)(pyramids)
101 |
102 | model = Model(inputs=inputs, outputs=outputs)
103 | model.summary()
104 | return model
105 |
106 |
107 | if __name__ == '__main__':
108 | input_shape = [None, None, 3]
109 | # model = phiq_net(n_quality_levels=5, input_shape=input_shape, backbone='resnet152v2')
110 | # model = phiq_net(n_quality_levels=5, input_shape=input_shape, backbone='resnet50')
111 | model = phiq_net(n_quality_levels=5, input_shape=input_shape, backbone='vgg16')
112 |
--------------------------------------------------------------------------------
/src/phiqnet/models/prediction_model_contrast_sensitivity.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Dense, Reshape, Average, \
2 | multiply, Lambda, Conv2D, Concatenate
3 | from tensorflow.keras import backend as K
4 |
5 |
6 | def channel_spatial_attention(input_feature, n_quality_levels, name, return_feature_map=False, return_features=False):
7 | """
8 | Attention model for implementing channel and spatial attention in IQA
9 | :param input_feature: feature maps from FPN or backbone
10 | :param n_quality_levels: 1 for MOS prediction and 5 for score distribution
11 | :param name: name of individual layers
12 | :param return_feature_map: flag to return feature map or not
13 | :param return_features: flag to return feature vector or not
14 | :return: output of attention module
15 | """
16 | channel_input = input_feature
17 | spatial_input = input_feature
18 |
19 | channel = input_feature.shape[-1]
20 | shared_dense_layer = Dense(channel,
21 | kernel_initializer='he_normal',
22 | use_bias=True,
23 | bias_initializer='zeros',
24 | activation='sigmoid'
25 | )
26 |
27 | avg_pool_channel = GlobalAveragePooling2D()(channel_input)
28 | avg_pool_channel = Reshape((1, channel))(avg_pool_channel)
29 | avg_pool_channel = shared_dense_layer(avg_pool_channel)
30 |
31 | max_pool_channel = GlobalMaxPooling2D()(channel_input)
32 | max_pool_channel = Reshape((1, channel))(max_pool_channel)
33 | max_pool_channel = shared_dense_layer(max_pool_channel)
34 |
35 | channel_weights = Average()([avg_pool_channel, max_pool_channel])
36 |
37 | avg_pool_spatial = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(spatial_input)
38 | max_pool_spatial = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(spatial_input)
39 | spatial_weights = Concatenate(axis=3)([avg_pool_spatial, max_pool_spatial])
40 | spatial_weights = Conv2D(filters=1,
41 | kernel_size=7,
42 | strides=1,
43 | padding='same',
44 | activation='sigmoid',
45 | kernel_initializer='he_normal',
46 | use_bias=False)(spatial_weights)
47 |
48 | outputs = multiply([multiply([input_feature, channel_weights]), spatial_weights])
49 |
50 | if return_feature_map:
51 | return outputs
52 |
53 | outputs = GlobalAveragePooling2D(name=name + '_avg_pool')(outputs)
54 | if return_features:
55 | return outputs
56 |
57 | if n_quality_levels > 1:
58 | outputs = Dense(n_quality_levels, activation='softmax', name=name + '_fc_prediction')(outputs)
59 | else:
60 | outputs = Dense(n_quality_levels, activation='linear', name=name + 'fc_prediction')(outputs)
61 |
62 | return outputs
63 |
--------------------------------------------------------------------------------
/src/phiqnet/pretrained_weights/README.md:
--------------------------------------------------------------------------------
1 | # Pretrained ImageNet weights
2 |
3 | Pretrained ImageNet weights for different backbone networks should be put here, and the weights can be downloaded from:
4 |
5 | [VGG16](https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5)
6 |
7 | [ResNet50](https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5)
8 |
9 | [ResNet152](https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5)
10 |
11 | [ResNet152V2](https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5)
12 |
13 | [InceptionResNetV2](https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5)
--------------------------------------------------------------------------------
/src/phiqnet/train/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/train/__init__.py
--------------------------------------------------------------------------------
/src/phiqnet/train/group_generator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | from PIL import Image
4 | from tensorflow.keras.utils import Sequence
5 | from imgaug import augmenters as iaa
6 |
7 |
8 | class GroupGenerator(Sequence):
9 | """
10 | Generator to supply group image data, individual dataset should go to individual group because they can have different resolutions
11 | """
12 | def __init__(self, image_file_groups, score_groups, batch_size=16, image_aug=True, shuffle=True, imagenet_pretrain=False):
13 | self.image_file_groups = image_file_groups
14 | self.score_groups = score_groups
15 | self.batch_size = batch_size
16 | self.shuffle = shuffle
17 | self.imagenet_pretrain = imagenet_pretrain
18 | if image_aug:
19 | # do image augmentation by left-right flip
20 | self.seq = iaa.Sequential([iaa.Fliplr(0.5)])
21 | self.image_aug = image_aug
22 | self.on_epoch_end()
23 |
24 | def __len__(self):
25 | return sum(self.group_length)
26 |
27 | def on_epoch_end(self):
28 | if self.shuffle:
29 | # shuffle both group orders and image orders in each group
30 | images_scores = list(zip(self.image_file_groups, self.score_groups))
31 | random.shuffle(images_scores)
32 | self.image_file_groups, self.score_groups = zip(*images_scores)
33 |
34 | self.index_groups = []
35 | self.group_length = []
36 | for i in range(len(self.image_file_groups)):
37 | self.index_groups.append(np.arange(len(self.image_file_groups[i])))
38 | self.group_length.append(len(self.image_file_groups[i]) // self.batch_size)
39 |
40 | for i in range(len(self.index_groups)):
41 | np.random.shuffle(self.index_groups[i])
42 |
43 | def __getitem__(self, item):
44 | lens = 0
45 | idx_0 = len(self.group_length) - 1
46 | for i, data_len in enumerate(self.group_length):
47 | lens += data_len
48 | if item < lens:
49 | idx_0 = i
50 | break
51 | item -= (lens - self.group_length[idx_0])
52 |
53 | images = []
54 | y_scores = []
55 |
56 | for idx_1 in self.index_groups[idx_0][item * self.batch_size: (item + 1) * self.batch_size]:
57 | image = np.asarray(Image.open(self.image_file_groups[idx_0][idx_1]), dtype=np.float32)
58 | if self.imagenet_pretrain:
59 | # ImageNet normalization
60 | image /= 127.5
61 | image -= 1.
62 | else:
63 | # Normalization based on the combined database consisting of KonIQ-10k and LIVE-Wild datasets
64 | image[:, :, 0] -= 117.27205081970828
65 | image[:, :, 1] -= 106.23294835284031
66 | image[:, :, 2] -= 94.40750328714887
67 | image[:, :, 0] /= 59.112836751661085
68 | image[:, :, 1] /= 55.65498543815568
69 | image[:, :, 2] /= 54.9486100975773
70 | images.append(image)
71 | y_scores.append(self.score_groups[idx_0][idx_1])
72 |
73 | if self.image_aug:
74 | images_aug = self.seq(images=images)
75 | return np.array(images_aug), np.array(y_scores)
76 | else:
77 | return np.array(images), np.array(y_scores)
78 |
79 |
80 |
--------------------------------------------------------------------------------
/src/phiqnet/train/plot_train.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import os
3 |
4 |
5 | def get_all_metrics(history):
6 | metrics = set()
7 | for metric in history.history:
8 | if 'val_' in metric:
9 | metric = metric.replace(metric, 'val_')
10 | metrics.add(metric)
11 | return metrics
12 |
13 |
14 | def plot_history(history, result_dir, prefix):
15 | """
16 | Plots the model training history in each epoch
17 | :param history: generated during model training
18 | :param result_dir: save the training history in this folder
19 | :return: None
20 | """
21 | try:
22 | metrics = get_all_metrics(history)
23 | for metric in metrics:
24 | loss_metric = 'val_' + metric
25 | if metric in history.history and loss_metric in history.history:
26 | plt.plot(history.history[metric], 'g.-')
27 | plt.plot(history.history[loss_metric], 'r.-')
28 | plt.title(metric)
29 | plt.xlabel('epoch')
30 | plt.ylabel(metric)
31 | plt.legend(['train', 'val'])
32 | plt.savefig(os.path.join(result_dir, prefix + '_' + metric + '.png'))
33 | plt.close()
34 | except Exception as e:
35 | print(e)
36 |
--------------------------------------------------------------------------------
/src/phiqnet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/utils/__init__.py
--------------------------------------------------------------------------------