├── .idea ├── .gitignore ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── lsct_phiqnet.iml ├── misc.xml ├── modules.xml └── vcs.xml ├── LICENSE ├── README.md ├── requirements.txt └── src ├── brisque ├── __init__.py ├── frame_features_video_folders_brisque.py └── test_features.py ├── callbacks ├── __init__.py ├── callbacks.py ├── csv_callback.py ├── evaluation_callback_generator.py ├── evaluation_vq_generator.py └── warmup_cosine_decay_scheduler.py ├── cnn_lstm ├── __init__.py ├── attention.py ├── attention_with_context.py ├── generate_random_split.py ├── train_cnn_lstm_params_search.py ├── train_cnn_lstm_params_search_konvid.py ├── train_cnn_lstm_params_search_konvid_oldfeatures.py ├── train_cnn_lstm_params_search_vsfa_features_no_flip.py ├── train_cnn_lstm_params_search_vsfa_features_with_flip.py └── train_cnn_lstm_params_search_vsfa_features_with_flip_attention.py ├── examples ├── __init__.py ├── frame_features_video.py ├── image_quality_prediction.py ├── sample_data │ ├── example_image_1 (mos=2.9).jpg │ ├── example_image_2 (mos=2.865).jpg │ └── example_video (mos=3.24).mp4 └── video_quality_prediction.py ├── ffmpeg └── video_handler.py ├── lsct ├── README.md ├── __init__.py ├── ablations │ ├── __init__.py │ ├── cnn_lstm_phiqnet.py │ ├── frame_features_video_folders_resnet50.py │ ├── frame_features_video_folders_resnet50_1.py │ ├── train_lsct_clip_length_search.py │ ├── train_lsct_resnet50.py │ └── transformer_phiqnet.py ├── bin │ ├── __init__.py │ ├── train_lsct_all_databases.py │ ├── train_lsct_all_databases_10runs.py │ ├── train_lsct_all_databases_triq_features_10runs.py │ ├── train_lsct_params_search.py │ ├── train_lsct_params_search_1.py │ ├── train_lsct_single_databases.py │ └── train_lsct_test_on_live.py ├── meta_data │ ├── all_video_mos.csv │ ├── all_video_mos_Resnet50.csv │ ├── all_video_mos_Resnet50_vsfa.csv │ ├── all_video_mos_triq.csv │ ├── all_vids.pkl │ ├── ugc_chunks.pkl │ └── ugc_mos_original.xlsx ├── models │ ├── __init__.py │ ├── cnn_1d.py │ ├── cnn_lstm_model.py │ ├── lsct_phiqnet_model.py │ └── video_quality_transformer.py ├── train │ ├── __init__.py │ ├── train.py │ ├── train_cnn_lstm.py │ ├── video_clip_feature_generator.py │ ├── video_clip_feature_generator_vsfa.py │ └── video_clip_resnet_feature_generator.py └── utils │ ├── __init__.py │ ├── frame_features_video_folders.py │ ├── frame_features_video_folders_Resnet50.py │ ├── gather_video_ids.py │ └── ugc_chunk_generator.py ├── model_weights ├── LSCT.h5 └── README.md └── phiqnet ├── README.md ├── __init__.py ├── backbone ├── ResNest.py ├── __init__.py ├── _common_blocks.py ├── resnet50.py ├── resnet_config.py ├── resnet_family.py ├── resnet_feature_maps.py ├── resnext50.py ├── vgg16.py └── weights.py ├── bin ├── __init__.py ├── train_resnet152_distribution_fpn_attention_imageaug.py ├── train_resnet152_distribution_fpn_attention_imageaug_finetune.py ├── train_resnet152v2_distribution_fpn_attention_imageaug.py ├── train_resnet50_distribution_NOfpn_attention_imageaug.py ├── train_resnet50_distribution_fpn_NOattention_imageaug.py ├── train_resnet50_distribution_fpn_attention_NOimageaug.py ├── train_resnet50_distribution_fpn_attention_imageaug.py ├── train_resnet50_distribution_fpn_attention_imageaug_NOpretrain.py ├── train_resnet50_distribution_fpn_attention_imageaug_finetune.py ├── train_resnet50_distribution_fpn_attention_imageaug_freezebackbone.py ├── train_resnet50_distribution_fpn_attention_imageaug_koniq10k.py ├── train_resnet50_distribution_fpn_attention_imageaug_koniq10k_finetune.py ├── train_resnet50_distribution_fpn_attention_imageaug_koniq_small.py ├── train_resnet50_distribution_fpn_attention_imageaug_koniq_small_finetune.py ├── train_resnet50_distribution_fpn_attention_imageaug_koniqall.py ├── train_resnet50_distribution_fpn_attention_imageaug_koniqall_finetune.py ├── train_resnet50_distribution_fpn_attention_imageaug_test.py ├── train_resnet50_mos_fpn_attention_imageaug.py ├── train_resnet50_mos_fpn_attention_imageaug_finetune.py ├── train_resnet50_mos_fpn_attention_imageaug_koniq_small.py ├── train_resnet50_mos_fpn_attention_imageaug_koniq_small_finetune.py ├── train_resnet50_mos_fpn_attention_imageaug_koniqall.py ├── train_resnet50_mos_fpn_attention_imageaug_koniqall_finetune.py └── train_vgg16_distribution_fpn_attention_imageaug.py ├── databases ├── README.md ├── koniq10k_images_scores.csv ├── live_mos.csv ├── random_split_imageset.py ├── test_images_koniq.csv ├── test_images_live.csv ├── train_images_koniq.csv └── train_images_live.csv ├── layers ├── __init__.py ├── bi_fpn.py ├── fpn.py ├── pan.py └── upsample.py ├── loss ├── __init__.py └── distribution_loss.py ├── model_evaluation ├── __init__.py ├── evaluation.py ├── validation.py └── validation_spag.py ├── models ├── __init__.py ├── image_quality_model.py ├── model_analysis.py └── prediction_model_contrast_sensitivity.py ├── pretrained_weights └── README.md ├── train ├── __init__.py ├── group_generator.py ├── plot_train.py └── train.py └── utils ├── __init__.py └── imageset_handler.py /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 14 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/lsct_phiqnet.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LSCT-PHIQNet Implementation 2 | 3 | TF-Keras implementation of LSCT-PHIQNet as described in [Long Short-term Convolutional Transformer for No-Reference Video Quality Assessment](https://dl.acm.org/doi/abs/10.1145/3474085.3475368). 4 | 5 | There are two main modules: 6 | 7 | phiqnet is the implementation of PHIQNet for NR-IQA; 8 | 9 | lsct is the implementation of LSCT for NR-VQA based on PHIQNet features. 10 | 11 | Please see respective README in individual modules. 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-gpu~=2.2.0 2 | numpy~=1.17.0 3 | six~=1.12.0 4 | scipy~=1.4.1 5 | pillow~=6.1.0 6 | matplotlib~=3.1.0 7 | sklearn 8 | scikit-learn~=0.21.3 9 | opencv-python~=4.1.1.26 10 | h5py~=2.10.0 11 | pyyaml~=5.3 12 | imgaug~=0.4.0 13 | pandas~=1.0.2 14 | future~=0.18.2 15 | munch~=2.5.0 16 | scikit-image~=0.15.0 17 | tensorflow_addons -------------------------------------------------------------------------------- /src/brisque/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/brisque/__init__.py -------------------------------------------------------------------------------- /src/brisque/test_features.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | f1 = np.load(r'C:\vq_datasets\BRISQUE_frame_features\live_vqc\Video\A001.npy') 4 | f2 = np.load(r'C:\vq_datasets\BRISQUE_frame_features_flipped\live_vqc\Video\A001.npy') 5 | t = 0 -------------------------------------------------------------------------------- /src/callbacks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/callbacks/__init__.py -------------------------------------------------------------------------------- /src/callbacks/callbacks.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard 3 | from callbacks.csv_callback import MyCSVLogger 4 | 5 | 6 | def create_callbacks(model_name, result_folder, other_callback=None, checkpoint=True, early_stop=True, metrics='accuracy'): 7 | """Creates callbacks for model training 8 | 9 | :param model_name: model name 10 | :param result_folder: folder to write to 11 | :param other_callback: other evaluation callbacks 12 | :param checkpoint: flag to use checkpoint or not 13 | :param early_stop: flag to use early_stop or not 14 | :param metrics: evaluation metrics for writing to checkpoint file 15 | :return: callbacks 16 | """ 17 | 18 | callbacks = [] 19 | if other_callback is not None: 20 | callbacks.append(other_callback) 21 | csv_log_file = os.path.join(result_folder, model_name + '.log') 22 | csv_logger = MyCSVLogger(csv_log_file, append=True, separator=';') 23 | callbacks.append(csv_logger) 24 | if early_stop: 25 | callbacks.append(EarlyStopping(monitor='plcc', min_delta=0.001, patience=40, mode='max')) 26 | if checkpoint: 27 | if metrics == None: 28 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{val_loss:.4f}.h5') 29 | else: 30 | if metrics == 'accuracy': 31 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{accuracy:.4f}_{val_loss:.4f}_{val_accuracy:.4f}.h5') 32 | elif metrics == 'mae': 33 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{mae:.4f}_{val_loss:.4f}_{val_mae:.4f}.h5') 34 | elif metrics == 'categorical_crossentropy': 35 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{categorical_crossentropy:.4f}_{val_loss:.4f}_{val_categorical_crossentropy:.4f}.h5') 36 | elif metrics == 'distribution_loss': 37 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{distribution_loss:.4f}_{val_loss:.4f}_{val_distribution_loss:.4f}.h5') 38 | else: 39 | mcp_file = os.path.join(result_folder, '{epoch:01d}_{loss:.4f}_{val_loss:.4f}.h5') 40 | mcp = ModelCheckpoint(mcp_file, save_best_only=True, save_weights_only=True, monitor='plcc', verbose=1, mode='max') 41 | callbacks.append(mcp) 42 | 43 | # tensorboard_callback = TensorBoard(log_dir=result_folder, histogram_freq=1) 44 | # callbacks.append(tensorboard_callback) 45 | 46 | return callbacks 47 | -------------------------------------------------------------------------------- /src/callbacks/csv_callback.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import csv 3 | 4 | import numpy as np 5 | import six 6 | 7 | from tensorflow.python.util.compat import collections_abc 8 | from tensorflow.keras.callbacks import CSVLogger 9 | 10 | 11 | class MyCSVLogger(CSVLogger): 12 | """ 13 | This is basically a copy of CSVLogger, the only change is that 4 decimal precision is used in loggers. 14 | """ 15 | def __init__(self, filename, separator=',', append=False): 16 | super(MyCSVLogger, self).__init__(filename, separator, append) 17 | 18 | def on_epoch_end(self, epoch, logs=None): 19 | logs = logs or {} 20 | 21 | def handle_value(k): 22 | is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0 23 | if isinstance(k, six.string_types): 24 | return k 25 | elif isinstance(k, collections_abc.Iterable) and not is_zero_dim_ndarray: 26 | return '"[%s]"' % (', '.join(map(str, k))) 27 | else: 28 | return '{:.4f}'.format(k) 29 | 30 | if self.keys is None: 31 | self.keys = sorted(logs.keys()) 32 | 33 | if self.model.stop_training: 34 | # We set NA so that csv parsers do not fail for this last epoch. 35 | logs = dict([(k, logs[k]) if k in logs else (k, 'NA') for k in self.keys]) 36 | 37 | if not self.writer: 38 | class CustomDialect(csv.excel): 39 | delimiter = self.sep 40 | 41 | fieldnames = ['epoch'] + self.keys 42 | 43 | self.writer = csv.DictWriter( 44 | self.csv_file, 45 | fieldnames=fieldnames, 46 | dialect=CustomDialect) 47 | if self.append_header: 48 | self.writer.writeheader() 49 | 50 | row_dict = collections.OrderedDict({'epoch': epoch}) 51 | row_dict.update((key, handle_value(logs[key])) for key in self.keys) 52 | self.writer.writerow(row_dict) 53 | self.csv_file.flush() 54 | -------------------------------------------------------------------------------- /src/callbacks/evaluation_callback_generator.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.callbacks import Callback 2 | import numpy as np 3 | import scipy.stats 4 | 5 | 6 | class ModelEvaluationIQGenerator(Callback): 7 | """ 8 | Evaluation for IQA, the main function is to calculate PLCC, SROCC, RMSE and MAD after each train epoch. 9 | """ 10 | def __init__(self, val_generator, using_single_mos, evaluation_generator=None, imagenet_pretrain=False): 11 | super(ModelEvaluationIQGenerator, self).__init__() 12 | self.val_generator = val_generator 13 | self.evaluation_generator = evaluation_generator 14 | self.using_single_mos = using_single_mos 15 | self.imagenet_pretrain = imagenet_pretrain 16 | self.mos_scales = np.array([1, 2, 3, 4, 5]) 17 | 18 | def __get_prediction_mos(self, image): 19 | prediction = self.model.predict(np.expand_dims(image, axis=0)) 20 | return prediction[0][0] 21 | 22 | def __get_prediction_distribution(self, image): 23 | prediction = self.model.predict(np.expand_dims(image, axis=0)) 24 | prediction = np.sum(np.multiply(self.mos_scales, prediction[0])) 25 | return prediction 26 | 27 | def __evaluation__(self, iq_generator): 28 | predictions = [] 29 | mos_scores = [] 30 | 31 | for j in range(iq_generator.__len__()): 32 | images, scores_batch = iq_generator.__getitem__(j) 33 | # mos_scores.extend(scores) 34 | if self.imagenet_pretrain: 35 | # ImageNnet normalization 36 | images /= 127.5 37 | images -= 1. 38 | 39 | prediction_batch = self.model.predict(images) 40 | prediction = [] 41 | scores = [] 42 | for i in range(prediction_batch.shape[0]): 43 | prediction.append(np.sum(np.multiply(self.mos_scales, prediction_batch[i,:]))) 44 | scores.append(np.sum(np.multiply(self.mos_scales, scores_batch[i, :]))) 45 | predictions.extend(prediction) 46 | mos_scores.extend(scores) 47 | 48 | PLCC = scipy.stats.pearsonr(mos_scores, predictions)[0] 49 | SROCC = scipy.stats.spearmanr(mos_scores, predictions)[0] 50 | RMSE = np.sqrt(np.mean(np.subtract(predictions, mos_scores) ** 2)) 51 | MAD = np.mean(np.abs(np.subtract(predictions, mos_scores))) 52 | print('\nPLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(PLCC, SROCC, RMSE, MAD)) 53 | return PLCC, SROCC, RMSE, MAD 54 | 55 | def on_epoch_end(self, epoch, logs=None): 56 | plcc, srcc, rmse, mad = self.__evaluation__(self.val_generator) 57 | 58 | logs['plcc'] = plcc 59 | logs['srcc'] = srcc 60 | logs['rmse'] = rmse 61 | 62 | if self.evaluation_generator: 63 | if epoch % 10 == 0: 64 | plcc_10th, srcc_10th, rmse_10th, mad_10th = self.__evaluation__(self.evaluation_generator) 65 | print('\nEpoch {}: PLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(epoch, plcc_10th, srcc_10th, rmse_10th, mad_10th)) 66 | 67 | -------------------------------------------------------------------------------- /src/callbacks/evaluation_vq_generator.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.callbacks import Callback 2 | import numpy as np 3 | import scipy.stats 4 | 5 | 6 | class ModelEvaluationGeneratorVQ(Callback): 7 | """ 8 | Evaluation for VQA, the main function is to calculate PLCC, SROCC, RMSE and MAD after each train epoch. 9 | """ 10 | def __init__(self, val_generator, evaluation_generator=None): 11 | super(ModelEvaluationGeneratorVQ, self).__init__() 12 | self.val_generator = val_generator 13 | self.evaluation_generator = evaluation_generator 14 | 15 | def __evaluation__(self, vq_generator): 16 | predictions = [] 17 | mos_scores = [] 18 | 19 | for i in range(vq_generator.__len__()): 20 | features, score = vq_generator.__getitem__(i) 21 | mos_scores.extend(score) 22 | prediction = self.model.predict(features) 23 | predictions.extend(np.squeeze(prediction, 1)) 24 | 25 | PLCC = scipy.stats.pearsonr(mos_scores, predictions)[0] 26 | SROCC = scipy.stats.spearmanr(mos_scores, predictions)[0] 27 | RMSE = np.sqrt(np.mean(np.subtract(predictions, mos_scores) ** 2)) 28 | MAD = np.mean(np.abs(np.subtract(predictions, mos_scores))) 29 | return PLCC, SROCC, RMSE, MAD 30 | 31 | def on_epoch_end(self, epoch, logs=None): 32 | plcc, srcc, rmse, mad = self.__evaluation__(self.val_generator) 33 | print('\nPLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(plcc, srcc, rmse, mad)) 34 | 35 | logs['plcc'] = plcc 36 | logs['srcc'] = srcc 37 | logs['rmse'] = rmse 38 | 39 | if self.evaluation_generator: 40 | if epoch % 10 == 0: 41 | plcc_10th, srcc_10th, rmse_10th, mad_10th = self.__evaluation__(self.evaluation_generator) 42 | print('\nEpoch {}: PLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(epoch, plcc_10th, srcc_10th, rmse_10th, mad_10th)) 43 | -------------------------------------------------------------------------------- /src/callbacks/warmup_cosine_decay_scheduler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tensorflow import keras 3 | from tensorflow.keras import backend as K 4 | 5 | 6 | def cosine_decay_with_warmup(global_step, 7 | learning_rate_base, 8 | total_steps, 9 | warmup_learning_rate=0.0, 10 | warmup_steps=0, 11 | hold_base_rate_steps=0): 12 | """Cosine decay schedule with warm up period. 13 | 14 | Cosine annealing learning rate as described in: 15 | Loshchilov and Hutter, SGDR: Stochastic Gradient Descent with Warm Restarts. 16 | ICLR 2017. https://arxiv.org/abs/1608.03983 17 | In this schedule, the learning rate grows linearly from warmup_learning_rate 18 | to learning_rate_base for warmup_steps, then transitions to a cosine decay 19 | schedule. 20 | 21 | Arguments: 22 | global_step {int} -- global step. 23 | learning_rate_base {float} -- base learning rate. 24 | total_steps {int} -- total number of training steps. 25 | 26 | Keyword Arguments: 27 | warmup_learning_rate {float} -- initial learning rate for warm up. (default: {0.0}) 28 | warmup_steps {int} -- number of warmup steps. (default: {0}) 29 | hold_base_rate_steps {int} -- Optional number of steps to hold base learning rate 30 | before decaying. (default: {0}) 31 | Returns: 32 | a float representing learning rate. 33 | 34 | Raises: 35 | ValueError: if warmup_learning_rate is larger than learning_rate_base, 36 | or if warmup_steps is larger than total_steps. 37 | """ 38 | 39 | if total_steps < warmup_steps: 40 | raise ValueError('total_steps must be larger or equal to ' 41 | 'warmup_steps.') 42 | learning_rate = 0.5 * learning_rate_base * (1 + np.cos( 43 | np.pi * 44 | (global_step - warmup_steps - hold_base_rate_steps 45 | ) / float(total_steps - warmup_steps - hold_base_rate_steps))) 46 | if hold_base_rate_steps > 0: 47 | learning_rate = np.where(global_step > warmup_steps + hold_base_rate_steps, 48 | learning_rate, learning_rate_base) 49 | if warmup_steps > 0: 50 | if learning_rate_base < warmup_learning_rate: 51 | raise ValueError('learning_rate_base must be larger or equal to ' 52 | 'warmup_learning_rate.') 53 | slope = (learning_rate_base - warmup_learning_rate) / warmup_steps 54 | warmup_rate = slope * global_step + warmup_learning_rate 55 | learning_rate = np.where(global_step < warmup_steps, warmup_rate, 56 | learning_rate) 57 | return np.where(global_step > total_steps, 0.0, learning_rate) 58 | 59 | 60 | class WarmUpCosineDecayScheduler(keras.callbacks.Callback): 61 | """Cosine decay with warmup learning rate scheduler 62 | """ 63 | 64 | def __init__(self, 65 | learning_rate_base, 66 | total_steps, 67 | global_step_init=0, 68 | warmup_learning_rate=0.0, 69 | warmup_steps=0, 70 | hold_base_rate_steps=80, 71 | verbose=1): 72 | """Constructor for cosine decay with warmup learning rate scheduler. 73 | 74 | Arguments: 75 | learning_rate_base {float} -- base learning rate. 76 | total_steps {int} -- total number of training steps. 77 | 78 | Keyword Arguments: 79 | global_step_init {int} -- initial global step, e.g. from previous checkpoint. 80 | warmup_learning_rate {float} -- initial learning rate for warm up. (default: {0.0}) 81 | warmup_steps {int} -- number of warmup steps. (default: {0}) 82 | hold_base_rate_steps {int} -- Optional number of steps to hold base learning rate 83 | before decaying. (default: {0}) 84 | verbose {int} -- 0: quiet, 1: update messages. (default: {0}) 85 | """ 86 | 87 | super(WarmUpCosineDecayScheduler, self).__init__() 88 | self.learning_rate_base = learning_rate_base 89 | self.total_steps = total_steps 90 | self.global_step = global_step_init 91 | self.warmup_learning_rate = warmup_learning_rate 92 | self.warmup_steps = warmup_steps 93 | self.hold_base_rate_steps = hold_base_rate_steps 94 | self.verbose = verbose 95 | self.learning_rates = [] 96 | 97 | def on_epoch_end(self, epoch, logs=None): 98 | lr = K.get_value(self.model.optimizer.lr) 99 | if self.verbose > 0: 100 | print('\nEpoch %05d: setting learning rate to %s.' % (epoch + 1, lr)) 101 | 102 | def on_batch_end(self, batch, logs=None): 103 | self.global_step = self.global_step + 1 104 | lr = K.get_value(self.model.optimizer.lr) 105 | self.learning_rates.append(lr) 106 | # if self.verbose > 0: 107 | # print('\nBatch %05d: setting learning ' 108 | # 'rate to %s.' % (self.global_step + 1, lr)) 109 | 110 | def on_batch_begin(self, batch, logs=None): 111 | lr = cosine_decay_with_warmup(global_step=self.global_step, 112 | learning_rate_base=self.learning_rate_base, 113 | total_steps=self.total_steps, 114 | warmup_learning_rate=self.warmup_learning_rate, 115 | warmup_steps=self.warmup_steps, 116 | hold_base_rate_steps=self.hold_base_rate_steps) 117 | K.set_value(self.model.optimizer.lr, lr) 118 | # if self.verbose > 0: 119 | # print('\nBatch %05d: setting learning ' 120 | # 'rate to %s.' % (self.global_step + 1, lr)) 121 | 122 | 123 | # # Create a model. 124 | # model = Sequential() 125 | # model.add(Dense(32, activation='relu', input_dim=100)) 126 | # model.add(Dense(10, activation='softmax')) 127 | # model.compile(optimizer='rmsprop', 128 | # loss='categorical_crossentropy', 129 | # metrics=['accuracy']) 130 | # 131 | # # Number of training samples. 132 | # sample_count = 12 133 | # 134 | # # Total epochs to train. 135 | # epochs = 100 136 | # 137 | # # Number of warmup epochs. 138 | # warmup_epoch = 10 139 | # 140 | # # Training batch size, set small value here for demonstration purpose. 141 | # batch_size = 4 142 | # 143 | # # Base learning rate after warmup. 144 | # learning_rate_base = 0.001 145 | # 146 | # total_steps = int(epochs * sample_count / batch_size) 147 | # 148 | # # Compute the number of warmup batches. 149 | # warmup_steps = int(warmup_epoch * sample_count / batch_size) 150 | # 151 | # # Generate dummy data. 152 | # data = np.random.random((sample_count, 100)) 153 | # labels = np.random.randint(10, size=(sample_count, 1)) 154 | # 155 | # # Convert labels to categorical one-hot encoding. 156 | # one_hot_labels = keras.utils.to_categorical(labels, num_classes=10) 157 | # 158 | # # Compute the number of warmup batches. 159 | # warmup_batches = warmup_epoch * sample_count / batch_size 160 | # 161 | # # Create the Learning rate scheduler. 162 | # warm_up_lr = WarmUpCosineDecayScheduler(learning_rate_base=learning_rate_base, 163 | # total_steps=total_steps, 164 | # warmup_learning_rate=0.0, 165 | # warmup_steps=warmup_steps, 166 | # hold_base_rate_steps=0) 167 | # 168 | # # Train the model, iterating on the data in batches of 32 samples 169 | # model.fit(data, one_hot_labels, epochs=epochs, batch_size=batch_size, 170 | # verbose=0, callbacks=[warm_up_lr]) 171 | # 172 | # import matplotlib.pyplot as plt 173 | # plt.plot(warm_up_lr.learning_rates) 174 | # plt.xlabel('Step', fontsize=20) 175 | # plt.ylabel('lr', fontsize=20) 176 | # plt.axis([0, total_steps, 0, learning_rate_base*1.1]) 177 | # plt.xticks(np.arange(0, total_steps, 50)) 178 | # plt.grid() 179 | # plt.title('Cosine decay with warmup', fontsize=20) 180 | # plt.show() 181 | -------------------------------------------------------------------------------- /src/cnn_lstm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/cnn_lstm/__init__.py -------------------------------------------------------------------------------- /src/cnn_lstm/attention.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Dense, Lambda, dot, Activation, concatenate 2 | from tensorflow.keras.layers import Layer 3 | import tensorflow.keras.backend as K 4 | 5 | 6 | # class Attention(Layer): 7 | # 8 | # def __init__(self, **kwargs): 9 | # super().__init__(**kwargs) 10 | # 11 | # def __call__(self, hidden_states): 12 | # """ 13 | # Many-to-one attention mechanism for Keras. 14 | # @param hidden_states: 3D tensor with shape (batch_size, time_steps, input_dim). 15 | # @return: 2D tensor with shape (batch_size, 128) 16 | # @author: felixhao28. 17 | # """ 18 | # hidden_size = int(hidden_states.shape[2]) 19 | # # Inside dense layer 20 | # # hidden_states dot W => score_first_part 21 | # # (batch_size, time_steps, hidden_size) dot (hidden_size, hidden_size) => (batch_size, time_steps, hidden_size) 22 | # # W is the trainable weight matrix of attention Luong's multiplicative style score 23 | # score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states) 24 | # # score_first_part dot last_hidden_state => attention_weights 25 | # # (batch_size, time_steps, hidden_size) dot (batch_size, hidden_size) => (batch_size, time_steps) 26 | # h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states) 27 | # score = dot([score_first_part, h_t], [2, 1], name='attention_score') 28 | # attention_weights = Activation('softmax', name='attention_weight')(score) 29 | # # (batch_size, time_steps, hidden_size) dot (batch_size, time_steps) => (batch_size, hidden_size) 30 | # context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector') 31 | # return context_vector 32 | # # out = K.sum(context_vector, axis=1) 33 | # # pre_activation = concatenate([context_vector, h_t], name='attention_output') 34 | # # attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation) 35 | # # return attention_vector 36 | 37 | 38 | class Attention(Layer): 39 | 40 | def __init__(self, return_sequences=True): 41 | self.return_sequences = return_sequences 42 | super(Attention, self).__init__() 43 | 44 | def build(self, input_shape): 45 | assert len(input_shape) == 3 46 | input_shape_list = input_shape.as_list() 47 | self.W = self.add_weight(name="att_weight", shape=(input_shape_list[-1], 1), 48 | initializer="normal") 49 | self.b = self.add_weight(name="att_bias", shape=(input_shape_list[-1], 1), 50 | initializer="zeros") 51 | 52 | super(Attention, self).build(input_shape) 53 | 54 | def call(self, x): 55 | e = K.tanh(K.dot(x, self.W) + self.b) 56 | a = K.softmax(e, axis=1) 57 | output = x * a 58 | 59 | if self.return_sequences: 60 | return output 61 | 62 | return K.sum(output, axis=1) 63 | -------------------------------------------------------------------------------- /src/cnn_lstm/attention_with_context.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import initializers 3 | from tensorflow.keras import regularizers 4 | from tensorflow.keras import constraints 5 | 6 | from tensorflow.keras import activations 7 | from tensorflow.keras import backend as K 8 | 9 | from tensorflow.keras.layers import Layer, Embedding 10 | 11 | 12 | class Attention(Layer): 13 | """ 14 | Attention operation, with a context/query vector, for temporal data. 15 | Supports Masking. 16 | Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] 17 | "Hierarchical Attention Networks for Document Classification" 18 | by using a context vector to assist the attention 19 | # Input shape 20 | 3D tensor with shape: `(samples, steps, features)`. 21 | # Output shape 22 | 2D tensor with shape: `(samples, features)`. 23 | :param kwargs: 24 | Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True. 25 | The dimensions are inferred based on the output shape of the RNN. 26 | Example: 27 | model.add(LSTM(64, return_sequences=True)) 28 | model.add(AttentionWithContext()) 29 | """ 30 | 31 | def __init__(self, 32 | W_regularizer=None, u_regularizer=None, b_regularizer=None, 33 | W_constraint=None, u_constraint=None, b_constraint=None, 34 | bias=True, 35 | return_attention=False, **kwargs): 36 | 37 | self.supports_masking = True 38 | self.return_attention = return_attention 39 | self.init = initializers.get('glorot_uniform') 40 | 41 | self.W_regularizer = regularizers.get(W_regularizer) 42 | # self.u_regularizer = regularizers.get(u_regularizer) 43 | self.b_regularizer = regularizers.get(b_regularizer) 44 | 45 | self.W_constraint = constraints.get(W_constraint) 46 | # self.u_constraint = constraints.get(u_constraint) 47 | self.b_constraint = constraints.get(b_constraint) 48 | 49 | self.bias = bias 50 | super(Attention, self).__init__(**kwargs) 51 | 52 | def build(self, input_shape): 53 | assert len(input_shape) == 3 54 | input_shape_list = input_shape.as_list() 55 | 56 | self.W = self.add_weight(shape=((input_shape_list[-1], input_shape_list[-1])), 57 | initializer=self.init, 58 | name='{}_W'.format(self.name), 59 | regularizer=self.W_regularizer, 60 | constraint=self.W_constraint) 61 | if self.bias: 62 | self.b = self.add_weight(shape=(input_shape_list[-1],), 63 | initializer='zero', 64 | name='{}_b'.format(self.name), 65 | regularizer=self.b_regularizer, 66 | constraint=self.b_constraint) 67 | 68 | # self.u = self.add_weight(shape=(input_shape_list[-1],), 69 | # initializer=self.init, 70 | # name='{}_u'.format(self.name), 71 | # regularizer=self.u_regularizer, 72 | # constraint=self.u_constraint) 73 | 74 | super(Attention, self).build(input_shape.as_list()) 75 | 76 | def compute_mask(self, input, input_mask=None): 77 | # do not pass the mask to the next layers 78 | return None 79 | 80 | def call(self, x, mask=None): 81 | uit = tf.tensordot(x, self.W, axes=1) 82 | 83 | if self.bias: 84 | uit += self.b 85 | 86 | uit = activations.tanh(uit) 87 | 88 | a = activations.softmax(uit, axis=1) 89 | output = x * a 90 | result = K.sum(output, axis=1) 91 | 92 | return result 93 | 94 | # ait = tf.tensordot(uit, self.u, axes=1) 95 | # 96 | # a = activations.exponential(ait) 97 | # 98 | # # apply mask after the exp. will be re-normalized next 99 | # if mask is not None: 100 | # # Cast the mask to floatX to avoid float64 upcasting in theano 101 | # a *= tf.cast(mask, K.floatx()) 102 | # 103 | # # in some cases especially in the early stages of training the sum may be almost zero 104 | # # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. 105 | # # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) 106 | # a /= tf.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) 107 | # 108 | # a = K.expand_dims(a) 109 | # weighted_input = x * a 110 | # result = K.sum(weighted_input, axis=1) 111 | # 112 | # if self.return_attention: 113 | # return [result, a] 114 | # return result 115 | 116 | def compute_output_shape(self, input_shape): 117 | if self.return_attention: 118 | #TODO use TensorShape here, as done in the else statement. I'm not sure 119 | # if this is returning a single tensor, or a list of two so leaving this undone for now. Suspect this will 120 | # need to complete if using Sequential rather than Functional API 121 | return [(input_shape[0], input_shape[-1]), 122 | (input_shape[0], input_shape[1])] 123 | else: 124 | return tf.TensorShape([input_shape[0].value, input_shape[-1].value]) 125 | 126 | -------------------------------------------------------------------------------- /src/cnn_lstm/generate_random_split.py: -------------------------------------------------------------------------------- 1 | from lsct.utils.gather_video_ids import gather_all_vids 2 | from pickle import load, dump 3 | 4 | 5 | vids = r'C:\lsct_phiqnet\src\lsct\meta_data\all_vids.pkl' 6 | for i in range(10): 7 | train_vids, test_vids = gather_all_vids(all_vids_pkl=vids) 8 | dump([train_vids, test_vids], open(r'C:\vq_datasets\random_splits\split_{}.pkl'.format(i), 'wb')) -------------------------------------------------------------------------------- /src/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/__init__.py -------------------------------------------------------------------------------- /src/examples/frame_features_video.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import tensorflow as tf 4 | 5 | from lsct.utils.frame_features_video_folders import CalculateFrameQualityFeatures 6 | from lsct.ablations.frame_features_video_folders_resnet50 import CalculateFrameQualityFeaturesResnet50 7 | 8 | FFMPEG = r'..\\ffmpeg\ffmpeg.exe' 9 | FFPROBE = r'..\\ffmpeg\ffprobe.exe' 10 | 11 | 12 | """ 13 | This script shows how to calculate PHIQNet features on all video frames, FFMPEG and FFProbe are required 14 | """ 15 | def video_frame_features_PHIQNet(phinqnet_weights_path, video_path, reture_clip_features=False): 16 | frame_features_extractor = CalculateFrameQualityFeatures(phinqnet_weights_path, FFPROBE, FFMPEG) 17 | features = frame_features_extractor.__ffmpeg_frames_features__(video_path, flip=False) 18 | features = np.squeeze(np.array(features), axis=2) 19 | features = np.reshape(features, (features.shape[0], features.shape[1] * features.shape[2])) 20 | 21 | if reture_clip_features: 22 | clip_features = [] 23 | clip_length = 16 24 | for j in range(features.shape[0] // clip_length): 25 | clip_features.append(features[j * clip_length: (j + 1) * clip_length, :]) 26 | clip_features = np.array(clip_features) 27 | return clip_features 28 | 29 | return np.array(features) 30 | 31 | 32 | def video_frame_features_ResNet50(resnet50_weights_path, video_path, reture_clip_features=False): 33 | frame_features_extractor = CalculateFrameQualityFeaturesResnet50(resnet50_weights_path, FFPROBE, FFMPEG) 34 | features = frame_features_extractor.__ffmpeg_frames_features__(video_path, flip=False) 35 | features = np.squeeze(np.array(features), axis=1) 36 | 37 | if reture_clip_features: 38 | clip_features = [] 39 | clip_length = 16 40 | for j in range(features.shape[0] // clip_length): 41 | clip_features.append(features[j * clip_length: (j + 1) * clip_length, :]) 42 | clip_features = np.array(clip_features) 43 | return clip_features 44 | 45 | return np.array(features, np.float16) 46 | 47 | 48 | def video_frame_features_ResNet50_folder(resnet50_weights_path, video_folder, target_folder): 49 | frame_features_extractor = CalculateFrameQualityFeaturesResnet50(resnet50_weights_path, FFPROBE, FFMPEG) 50 | 51 | video_types = ('.mp4', '.mpg') 52 | video_paths = [f for f in os.listdir(video_folder) if f.endswith(video_types)] 53 | video_paths = video_paths[:70000] 54 | numb_videos = len(video_paths) 55 | 56 | for i, video_path in enumerate(video_paths): 57 | ext = os.path.splitext(video_path) 58 | np_file = os.path.join(target_folder, '{}.npy'.format(ext[0])) 59 | if not os.path.exists(np_file): 60 | features = frame_features_extractor.__ffmpeg_frames_features__(os.path.join(video_folder, video_path), flip=False) 61 | features = np.squeeze(np.array(features), axis=1) 62 | features = np.array(features, dtype=np.float16) 63 | np.save(np_file, features) 64 | print('{} out of {}, {} done'.format(i, numb_videos, video_path)) 65 | else: 66 | print('{} out of {}, {} already exists'.format(i, numb_videos, video_path)) 67 | 68 | 69 | if __name__ == '__main__': 70 | gpus = tf.config.experimental.list_physical_devices('GPU') 71 | tf.config.experimental.set_visible_devices(gpus[0], 'GPU') 72 | 73 | # phiqnet_weights_path = r'..\\model_weights\PHIQNet.h5' 74 | # video_path = r'.\\sample_data\example_video (mos=3.24).mp4' 75 | video_folder = r'K:\Faglitteratur\VQA\k150ka' 76 | # features = video_frame_features_PHIQNet(phiqnet_weights_path, video_path) 77 | 78 | # Use None that ResNet50 will download ImageNet Pretrained weights or specify the weight path 79 | resnet50_imagenet_weights = r'C:\pretrained_weights_files\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 80 | # features_resnet50 = video_frame_features_ResNet50(resnet50_imagenet_weights, video_path) 81 | 82 | target_folder = r'F:\k150k_features' 83 | video_frame_features_ResNet50_folder(resnet50_imagenet_weights, video_folder, target_folder) 84 | t = 0 -------------------------------------------------------------------------------- /src/examples/image_quality_prediction.py: -------------------------------------------------------------------------------- 1 | from phiqnet.models.image_quality_model import phiq_net 2 | import numpy as np 3 | from PIL import Image 4 | 5 | 6 | def predict_image_quality(model_weights_path, image_path): 7 | image = Image.open(image_path) 8 | image = np.asarray(image, dtype=np.float32) 9 | image /= 127.5 10 | image -= 1. 11 | 12 | model = phiq_net(n_quality_levels=5) 13 | model.load_weights(model_weights_path) 14 | 15 | prediction = model.predict(np.expand_dims(image, axis=0)) 16 | 17 | mos_scales = np.array([1, 2, 3, 4, 5]) 18 | predicted_mos = (np.sum(np.multiply(mos_scales, prediction[0]))) 19 | return predicted_mos 20 | 21 | 22 | if __name__ == '__main__': 23 | image_path = r'.\\sample_data\example_image_1 (mos=2.9).jpg' 24 | # image_path = r'.\\sample_data\example_image_2 (mos=2.865).jpg' 25 | model_weights_path = r'..\\model_weights\PHIQNet.h5' 26 | predict_mos = predict_image_quality(model_weights_path, image_path) 27 | print('Predicted MOS: {}'.format(predict_mos)) -------------------------------------------------------------------------------- /src/examples/sample_data/example_image_1 (mos=2.9).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/sample_data/example_image_1 (mos=2.9).jpg -------------------------------------------------------------------------------- /src/examples/sample_data/example_image_2 (mos=2.865).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/sample_data/example_image_2 (mos=2.865).jpg -------------------------------------------------------------------------------- /src/examples/sample_data/example_video (mos=3.24).mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/examples/sample_data/example_video (mos=3.24).mp4 -------------------------------------------------------------------------------- /src/examples/video_quality_prediction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from lsct.utils.frame_features_video_folders import CalculateFrameQualityFeatures 4 | from lsct.models.lsct_phiqnet_model import create_model 5 | 6 | FFMPEG = r'..\\ffmpeg\ffmpeg.exe' 7 | FFPROBE = r'..\\ffmpeg\ffprobe.exe' 8 | 9 | 10 | def predict_video_quality(phinqnet_weights_path, lsct_weights_path, video_path): 11 | frame_features_extractor = CalculateFrameQualityFeatures(phinqnet_weights_path, FFPROBE, FFMPEG) 12 | features = frame_features_extractor.__ffmpeg_frames_features__(video_path, flip=False) 13 | features = np.squeeze(np.array(features), axis=2) 14 | features = np.reshape(features, (features.shape[0], features.shape[1] * features.shape[2])) 15 | 16 | clip_features = [] 17 | clip_length = 16 18 | for j in range(features.shape[0] // clip_length): 19 | clip_features.append(features[j * clip_length: (j + 1) * clip_length, :]) 20 | clip_features = np.array(clip_features) 21 | 22 | transformer_params = [2, 64, 4, 64] 23 | dropout_rates = 0.1 24 | cnn_filters = [32, 64] 25 | 26 | feature_length = 5 * 256 27 | 28 | vq_model = create_model(clip_length, 29 | feature_length=feature_length, 30 | cnn_filters=cnn_filters, 31 | transformer_params=transformer_params, 32 | dropout_rate=dropout_rates) 33 | vq_model.summary() 34 | vq_model.load_weights(lsct_weights_path) 35 | predict_mos = vq_model.predict(np.expand_dims(clip_features, axis=0)) 36 | return predict_mos[0][0] 37 | 38 | 39 | if __name__ == '__main__': 40 | phiqnet_weights_path = r'..\\model_weights\PHIQNet.h5' 41 | lsct_weights_path = r'..\\model_weights\LSCT.h5' 42 | 43 | video_path = r'.\\sample_data\example_video (mos=3.24).mp4' 44 | predict_mos = predict_video_quality(phiqnet_weights_path, lsct_weights_path, video_path) 45 | print('Predicted MOS: {}'.format(predict_mos)) 46 | -------------------------------------------------------------------------------- /src/ffmpeg/video_handler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import subprocess as sp 3 | import json 4 | 5 | """ 6 | A class to handle video using FFMPEG 7 | """ 8 | class VideoHandler(): 9 | def __init__(self, ffprobe_exe, ffmpeg_exe, process_frame_interval=0): 10 | self.ffprobe = ffprobe_exe 11 | self.ffmpeg = ffmpeg_exe 12 | self.process_frame_interval = process_frame_interval 13 | 14 | def get_video_meta(self, video_file): 15 | """Internal method to get video meta 16 | :return: a list containing [audio_exit, video_exit, duration, frame_count, height, width, fps] 17 | """ 18 | cmd = [self.ffprobe, '-i', video_file, '-v', 'quiet', '-print_format', 'json', '-show_streams', '-show_format'] 19 | ffprobe_output = json.loads(sp.check_output(cmd).decode('utf-8')) 20 | 21 | # audio_exits = False 22 | video_exits = False 23 | duration = 0 24 | frame_count = 0 25 | height = 0 26 | width = 0 27 | fps = 0 28 | bitrate = 0 29 | 30 | stream_type = 'streams' 31 | codec_type = 'codec_type' 32 | if stream_type in ffprobe_output: 33 | for i in range(len(ffprobe_output[stream_type])): 34 | if codec_type in ffprobe_output[stream_type][i]: 35 | # if ffprobe_output[stream_type][i][codec_type] == 'audio': 36 | # audio_exits = True 37 | if ffprobe_output[stream_type][i][codec_type] == 'video': 38 | video_exits = True 39 | frame_rate = ffprobe_output[stream_type][i]['avg_frame_rate'] 40 | if '/' in frame_rate: 41 | fps_temp = [float(item) for item in frame_rate.split('/')] 42 | fps = fps_temp[0] / fps_temp[1] 43 | else: 44 | fps = float(frame_rate) 45 | if 'duration' not in ffprobe_output[stream_type][i]: 46 | if 'format' in ffprobe_output: 47 | duration = float(ffprobe_output['format']['duration']) 48 | else: 49 | duration = float(ffprobe_output[stream_type][i]['duration']) 50 | frame_count = int(duration * fps) 51 | height = ffprobe_output[stream_type][i]['height'] 52 | width = ffprobe_output[stream_type][i]['width'] 53 | if 'bit_rate' not in ffprobe_output[stream_type][i]: 54 | if 'format' in ffprobe_output: 55 | bitrate = int(ffprobe_output['format']['bit_rate']) 56 | else: 57 | bitrate = int(ffprobe_output[stream_type][i]['bit_rate']) / 1000 58 | 59 | if not video_exits: 60 | return None 61 | return [video_exits, duration, frame_count, height, width, fps, bitrate] 62 | 63 | def get_frames(self, video_file, convert_to_gray=False): 64 | """ 65 | Get video frames in a Numpy array 66 | :param video_file: video path 67 | :param convert_to_gray: flag to convert to gray or not 68 | :return: frames in an array 69 | """ 70 | meta = self.get_video_meta(video_file) 71 | video_height = meta[3] 72 | video_width = meta[4] 73 | video_size = video_height * video_width * 3 74 | # print('Start reading {}'.format(video_file)) 75 | if self.process_frame_interval > 0: 76 | fps = 'fps=1/' + str(self.process_frame_interval) 77 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-vf', fps, '-pix_fmt', 'rgb24', '-vcodec', 78 | 'rawvideo', '-'] 79 | else: 80 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-pix_fmt', 'rgb24', '-hide_banner', '-loglevel', 81 | 'panic', '-vcodec', 'rawvideo', '-'] 82 | pipe = sp.Popen(cmd, stdout=sp.PIPE) 83 | 84 | images = [] 85 | try: 86 | while True: 87 | try: 88 | raw_image = pipe.stdout.read(video_size) 89 | if len(raw_image) != video_size: 90 | break 91 | image = np.fromstring(raw_image, dtype='uint8') 92 | image = image.reshape((video_height, video_width, 3)) 93 | 94 | if convert_to_gray: 95 | image = np.array(image, dtype=np.float32) 96 | image = np.dot(image, [0.2989, 0.587, 0.114]) 97 | 98 | images.append(image.astype(np.uint8)) 99 | except Exception as e1: 100 | print(e1) 101 | continue 102 | except Exception as e2: 103 | print(e2) 104 | pipe.stdout.flush() 105 | 106 | return images 107 | -------------------------------------------------------------------------------- /src/lsct/README.md: -------------------------------------------------------------------------------- 1 | # LSCT Implementation 2 | 3 | TF-Keras implementation of LSCT as described in [Long Short-term Convolutional Transformer for No-Reference Video Quality Assessment]. 4 | 5 | ## Installation 6 | 7 | 1) Clone this repository. 8 | 2) Install required Python packages. The code is developed by PyCharm in Python 3.7. The requirements.txt document is generated by PyCharm, and the code should also be run in latest versions of the packages. 9 | 10 | ## Training a model 11 | Examples of training LSCT and its variants can be seen in lsct/bin. 12 | Argparser should be used, but the authors prefer to use dictionary with parameters being defined. It is easy to convert to take arguments. 13 | In principle, the following parameters can be defined: 14 | 15 | args = {} 16 | args['multi_gpu'] = 0 # gpu setting, set to 1 for using multiple GPUs 17 | args['gpu'] = 0 # If having multiple GPUs, specify which GPU to use 18 | 19 | args['result_folder'] = r'..\databases\experiments' # Define result path 20 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 21 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 22 | 23 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 24 | args['ugc_chunk_pickle'] = r'..\\meta_data\ugc_chunks.pkl' # this file contains information about the YouTube-UGC chunks, if set to None, then chunks are not included in training data 25 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' # folder contains OHIQNet features on chunk frames, if ugc_chunk_pickle=None, then this argument is not used 26 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' # folder contains OHIQNet features on chunk flipped frames, if ugc_chunk_pickle=None, then this argument is not used 27 | 28 | args['database'] = ['live', 'konvid', 'ugc'] # specify which database will be included in the training data 29 | 30 | args['model_name'] = 'lsct' # model name to be used in recording training result (e.g., logs) 31 | 32 | args['transformer_params'] = [2, 64, 4, 64] 33 | args['dropout_rate'] = 0.1 34 | args['cnn_filters'] = [32, 64] 35 | # args['pooling_sizes'] = [4, 4] 36 | args['clip_length'] = 16 37 | 38 | args['lr_base'] = 1e-3 # Define the back learning rate in warmup and rate decay approach 39 | args['batch_size'] = 32 # Batch size, should choose to fit in the GPU memory 40 | args['epochs'] = 120 # Maximal epoch number, can set early stop in the callback or not 41 | args['lr_schedule'] = True # Choose between True and False, indicating if learning rate schedule should be used or not 42 | 43 | args['validation'] = 'validation' # Choose between 'validation' and 'test'. If 'validation', the model will be trained on train set and validated on test set, which are randomly split from the databases. 44 | # If 'test', the model will be trained on entire 'KonViD-1k' and 'YouTube-UGC' databases, and validated on the entire 'LIVE-VQC' database 45 | 46 | args['do_finetune'] = False # specify if finetune using SGD with smaller learning rate is performed 47 | 48 | ## Predict video quality using the trained model 49 | After LSCT has been trained, and the weights have been stored in h5 file, it can be used to predict video quality with arbitrary resolutions. 50 | In the "examples" folder, an example script examples\video_quality_prediction.py can predict quality of example video using the pretrained weights. 51 | 52 | In order to predict video quality, both the PHIQNet weights and LSCT weights are required, and also FFMPEG (including FFProbe) are also needed to read video frames. 53 | The pretrained weights of PHIQNet and LSCT can be found in model_weights folder. 54 | 55 | ## Prepare datasets for model training 56 | This work uses three publicly available databases: KonViD-1k [The Konstanz natural video database (KoNViD-1k)](https://ieeexplore.ieee.org/document/7965673) by V. Hosu, F. Hahn, M. Jenadeleh, H. Lin, H. Men, T. Sziranyi, S. Li, D. Saupe; 57 | YouTube-UGC [YouTube UGC dataset for video compression research](https://ieeexplore.ieee.org/document/8901772) by Y. Wang, S. Inguva, and B. Adsumilli; 58 | and LIVE-VQC [Large-scale study of perceptual video quality](https://ieeexplore.ieee.org/document/8463581) by Z. Sinno, and A.C. Bovik 59 | 60 | 1) The three databases can be used individually and also merged, and then randomly split to training and testing sets. 61 | 62 | 2) Calculate PHIQNet features on all video frames. A script lsct\utils\frame_features_video_folders.py can be used to calculate PHIQNet features in a list of video folders. 63 | An example script examples\frame_features_video.py also shows how to calculate PHIQNet features on video frames. 64 | Please download the PHIQNet weights file [here](https://drive.google.com/file/d/1ymy2oL0r-XNzjqk_kE-lcNkI2FhSu95h/view?usp=sharing), and store in in model_weights. 65 | 66 | 3) The frame features are better to stored in Numpy NPY files in target folders in lsct\utils\frame_features_video_folders.py. It is noted that flipped frames are also used for data augmentation. The frame features are in default stored in target_folder\frame_features, and the flipped features are stored in target_folder\frame_features_flipped. 67 | 68 | 4) Make meta file containing feature file paths and the MOS value, an example file is provided in lsct\meta_data\all_video_mos.csv: 69 | ``` 70 | C:\vq_datasets\frame_features\live_vqc\Video\A001.npy,4.20928 71 | C:\vq_datasets\frame_features\live_vqc\Video\A002.npy,3.29202 72 | C:\vq_datasets\frame_features\live_vqc\Video\A004.npy,3.372716 73 | C:\vq_datasets\frame_features\live_vqc\Video\A005.npy,2.887112 74 | C:\vq_datasets\frame_features\live_vqc\Video\A006.npy,4.386068 75 | C:\vq_datasets\frame_features\live_vqc\Video\A007.npy,3.0347 76 | ``` 77 | If the features are stored in other folders, please update this file. This file contains only frame features without flipping, and the script assumes that the flipped features can be accessed by replacing 'frame_features' by 'frame_features_flipped' in the meta file. So please store the features of flipped frames in that way. 78 | 79 | 5) Make a dumped pickle file containing a list of video IDs, which can be easily used to localize train and test videos. This can be done by lsct\utils\gather_video_ids.py. 80 | Video ID is formated as: database_video, e.g., live_A001. 81 | An example file is provided in lsct\meta_data\all_vids.pkl. 82 | 83 | 6) If using YouTube-UGC chunks, then extract the PHIQNet features for individual trunks. This can be done by lsct\utils\ugc_chunk_generator.py, in which ugc_chunks.pkl can also be dumped. 84 | ugc_chunks.pkl contains a dictionary of: {UGC video name: [full MOS, chunk0 MOS, chunk1 MOS, ...]}. 85 | 86 | 7) The meta files together with the paths to chunk features (if used) should be provided for training, see lsct\bin\train_lsct_all_databases.py for an example. 87 | 88 | ## State-of-the-art models 89 | Other NR-VQA models are also included in the work. The original implementations of metrics are employed, and they can be found below. 90 | 91 | VBLLIDS: paper [Blind prediction of natural video quality](https://ieeexplore.ieee.org/document/6705673) by M. A. Saad, A. C. Bovik, and C. Charrierk, and [implementation](http://live.ece.utexas.edu/research/Quality/VideoBLIINDS_Code_MicheleSaad.zip). 92 | 93 | ST-3DDCT: paper [Spatiotemporal statistics for video quality assessment](https://ieeexplore.ieee.org/document/7469872) by X. Li, Qun Guo, and Xiaoqiang Lu, and [implementation](https://github.com/scikit-video/scikit-video/tree/master/skvideo/measure). 94 | 95 | TLVQM: paper [Two-level approach for no-reference consumer video quality assessment](https://ieeexplore.ieee.org/document/8742797) by J. Korhone, and [implementation](https://github.com/jarikorhonen/nr-vqa-consumervideo). 96 | 97 | VSFA: paper [Quality assessment of in-the-wild videos](https://dl.acm.org/doi/10.1145/3343031.3351028) by D. Li, T. Jiang, and M. Jiang, and [implementation](https://github.com/lidq92/VSFA). 98 | 99 | 3D-CNN-LSTM: paper [Deep neural networks for no-reference video quality assessment](https://ieeexplore.ieee.org/document/8803395) by J. You, and J. Korhonen. 100 | 101 | VIDEAL: paper [UGC-VQA: Benchmarking blind video quality assessment for user generated content](https://arxiv.org/abs/2005.14354) by Z. Tu, Y. Wang, N. Birkbeck, B. Adsumilli, and A. C. Bovik, and [implementation](https://github.com/tu184044109/VIDEVAL_release). 102 | 103 | ## FAQ 104 | * To be added 105 | -------------------------------------------------------------------------------- /src/lsct/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/__init__.py -------------------------------------------------------------------------------- /src/lsct/ablations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/ablations/__init__.py -------------------------------------------------------------------------------- /src/lsct/ablations/train_lsct_clip_length_search.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | 3 | 4 | """ 5 | Search for best clip length for LSCT-PHIQNet. 6 | It is noted that max pooling sizes are adaptive to the clip length. 7 | """ 8 | if __name__ == '__main__': 9 | args = {} 10 | args['result_folder'] = r'C:\vq_datasets\results\tmp' 11 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 12 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 13 | 14 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 15 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 16 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 17 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 18 | 19 | args['database'] = ['live', 'konvid', 'ugc'] 20 | # args['database'] = ['konvid'] 21 | 22 | args['transformer_params'] = [2, 64, 4, 64] 23 | args['dropout_rate'] = 0.1 24 | args['cnn_filters'] = [32, 64] 25 | 26 | clip_length_range = [8, 16, 24, 32, 64] 27 | pooling_sizes_range = [[4, 2], 28 | [4, 4], 29 | [6, 4], 30 | [8, 4], 31 | [8, 8]] 32 | 33 | args['batch_size'] = 32 34 | args['lr_base'] = 1e-3 35 | args['epochs'] = 140 36 | 37 | args['multi_gpu'] = 0 38 | args['gpu'] = 0 39 | 40 | args['validation'] = 'validation' 41 | 42 | args['do_finetune'] = False 43 | 44 | for clip_length, pooling_sizes in zip(clip_length_range, pooling_sizes_range): 45 | print('Clip length: {}'.format(clip_length)) 46 | args['clip_length'] = clip_length 47 | args['pooling_sizes'] = pooling_sizes 48 | train_main(args) 49 | -------------------------------------------------------------------------------- /src/lsct/ablations/train_lsct_resnet50.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | 3 | 4 | # 5 | # Train script of LSCT-Resnet50 on all the three databases, it is same as LSCT-PHIQNet training, but using Resnet50 features to replace PHIQNet features. 6 | # Use lsct\ablations\frame_features_video_folders_resnet50.py to calculate Resnet50 features 7 | # 8 | if __name__ == '__main__': 9 | args = {} 10 | args['result_folder'] = r'C:\vq_datasets\results\lsct' 11 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 12 | 13 | # The feature file paths must be changed to Resnet50 feature files 14 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 15 | 16 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 17 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 18 | args['ugc_chunk_folder'] = r'.\frame_features_resnet50\ugc_chunks' 19 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped_resnet50\ugc_chunks' 20 | 21 | args['database'] = ['live', 'konvid', 'ugc'] 22 | 23 | args['model_name'] = 'lsct' 24 | 25 | args['transformer_params'] = [2, 64, 4, 64] 26 | args['dropout_rate'] = 0.1 27 | args['cnn_filters'] = [32, 64] 28 | # args['pooling_sizes'] = [4, 4] 29 | args['clip_length'] = 16 30 | 31 | args['batch_size'] = 32 32 | 33 | args['lr_base'] = 1e-3 34 | args['epochs'] = 400 35 | 36 | args['multi_gpu'] = 0 37 | args['gpu'] = 1 38 | 39 | args['validation'] = 'validation' 40 | 41 | args['do_finetune'] = False 42 | 43 | train_main(args) 44 | -------------------------------------------------------------------------------- /src/lsct/bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/bin/__init__.py -------------------------------------------------------------------------------- /src/lsct/bin/train_lsct_all_databases.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | 3 | 4 | """ 5 | General train script of LSCT-PHIQNet on all the three databases 6 | """ 7 | if __name__ == '__main__': 8 | args = {} 9 | args['result_folder'] = r'C:\vq_datasets\results\lsct' 10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 12 | 13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 17 | 18 | args['database'] = ['live', 'konvid', 'ugc'] 19 | 20 | args['model_name'] = 'lsct' 21 | 22 | args['transformer_params'] = [2, 64, 4, 64] 23 | args['dropout_rate'] = 0.1 24 | args['cnn_filters'] = [32, 64] 25 | 26 | # No need to define pooling sizes for 1D CNN, which will be defined in check_args() in train 27 | # args['pooling_sizes'] = [4, 4] 28 | 29 | args['clip_length'] = 16 30 | 31 | args['batch_size'] = 32 32 | 33 | args['lr_base'] = 1e-3 34 | args['epochs'] = 400 35 | 36 | args['multi_gpu'] = 0 37 | args['gpu'] = 1 38 | 39 | args['validation'] = 'validation' 40 | 41 | args['do_finetune'] = False 42 | 43 | train_main(args) 44 | -------------------------------------------------------------------------------- /src/lsct/bin/train_lsct_all_databases_10runs.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | 3 | 4 | """ 5 | Run the training of LSCT-PHIQNet for 10 times with randomly split train and test sets 6 | """ 7 | if __name__ == '__main__': 8 | args = {} 9 | args['result_folder'] = r'C:\vq_datasets\results\lsct' 10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 12 | 13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 17 | 18 | args['database'] = ['live', 'konvid', 'ugc'] 19 | 20 | args['model_name'] = 'lsct' 21 | 22 | args['transformer_params'] = [2, 64, 4, 64] 23 | args['dropout_rate'] = 0.1 24 | args['cnn_filters'] = [32, 64] 25 | args['pooling_sizes'] = [4, 4] 26 | args['clip_length'] = 16 27 | 28 | args['batch_size'] = 32 29 | 30 | args['lr_base'] = 1e-3 31 | args['epochs'] = 400 32 | 33 | args['multi_gpu'] = 0 34 | args['gpu'] = 1 35 | 36 | args['validation'] = 'validation' 37 | 38 | args['do_finetune'] = True 39 | 40 | for _ in range(10): 41 | train_main(args) 42 | -------------------------------------------------------------------------------- /src/lsct/bin/train_lsct_all_databases_triq_features_10runs.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | from pickle import load 3 | 4 | 5 | """ 6 | Run the training of LSCT-PHIQNet for 10 times with randomly split train and test sets 7 | """ 8 | if __name__ == '__main__': 9 | args = {} 10 | args['result_folder'] = r'C:\vq_datasets\results\lsct_triq_features' 11 | # args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 12 | args['meta_file'] = r'C:\lsct_phiqnet\src\lsct\meta_data\all_video_mos_triq.csv' 13 | 14 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 15 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 16 | # args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 17 | # args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 18 | 19 | args['database'] = ['live', 'konvid', 'ugc'] 20 | 21 | args['model_name'] = 'lsct_triq' 22 | 23 | args['transformer_params'] = [2, 32, 8, 64] 24 | args['dropout_rate'] = 0.1 25 | args['cnn_filters'] = [32, 64] 26 | args['pooling_sizes'] = [4, 4] 27 | args['clip_length'] = 16 28 | 29 | args['batch_size'] = 32 30 | 31 | args['lr_base'] = 1e-3 32 | args['epochs'] = 200 33 | 34 | args['multi_gpu'] = 1 35 | args['gpu'] = 1 36 | 37 | args['validation'] = 'validation' 38 | 39 | args['do_finetune'] = True 40 | 41 | for m in range(10): 42 | train_vids, test_vids = load(open(r'C:\vq_datasets\random_splits\split_{}.pkl'.format(m), 'rb')) 43 | train_main(args, train_vids, test_vids) 44 | -------------------------------------------------------------------------------- /src/lsct/bin/train_lsct_params_search.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | import numpy as np 3 | import os 4 | from lsct.utils.gather_video_ids import gather_all_vids 5 | 6 | """ 7 | Search for best hyper-parameters of LSCT-PHIQNet 8 | """ 9 | if __name__ == '__main__': 10 | args = {} 11 | args['result_folder'] = r'C:\vq_datasets\results\tmp' 12 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 13 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 14 | 15 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 16 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 17 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 18 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 19 | 20 | # args['database'] = ['live', 'konvid', 'ugc'] 21 | args['database'] = ['ugc'] 22 | 23 | cnn_filters_range = [ 24 | [16, 32], 25 | [32, 64], 26 | [32, 64, 128], 27 | [32, 64, 128, 256] 28 | ] 29 | transformer_params_range = [ 30 | [2, 16, 2, 32], 31 | [2, 16, 4, 32], 32 | [2, 32, 4, 64], 33 | [2, 64, 4, 64], 34 | [4, 32, 4, 64], 35 | [4, 64, 4, 64], 36 | [4, 64, 4, 128], 37 | [4, 64, 8, 128], 38 | [4, 64, 8, 256], 39 | [4, 128, 8, 256], 40 | [8, 256, 8, 512] 41 | ] 42 | 43 | args['dropout_rate'] = 0.1 44 | args['clip_length'] = 16 45 | 46 | args['batch_size'] = 32 47 | 48 | args['lr_base'] = 1e-3/2 49 | args['epochs'] = 300 50 | 51 | args['multi_gpu'] = 0 52 | args['gpu'] = 1 53 | 54 | args['validation'] = 'validation' 55 | 56 | args['do_finetune'] = True 57 | 58 | result_record_file = os.path.join(args['result_folder'], 'ugc_nochunks.csv') 59 | runs = 4 60 | all_plcc = np.zeros((runs, len(cnn_filters_range), len(transformer_params_range))) 61 | 62 | for k in range(runs): 63 | train_vids, test_vids = gather_all_vids(all_vids_pkl=args['vids_meta']) 64 | 65 | for i, cnn_filters in enumerate(cnn_filters_range): 66 | for j, transformer_params in enumerate(transformer_params_range): 67 | if i == 0 and j < 5: 68 | break 69 | if not os.path.exists(result_record_file): 70 | record_file = open(result_record_file, 'w+') 71 | else: 72 | record_file = open(result_record_file, 'a') 73 | 74 | args['cnn_filters'] = cnn_filters 75 | # No need to define pooling sizes for 1D CNN, which will be defined in check_args() in train 76 | 77 | args['transformer_params'] = transformer_params 78 | args['model_name'] = 'lsct_{}_{}'.format(cnn_filters, transformer_params) 79 | 80 | plcc = train_main(args, train_vids, test_vids) 81 | 82 | record_file.write('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k, cnn_filters, transformer_params, plcc)) 83 | 84 | all_plcc[k, i, j] = plcc 85 | print('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k + 1, cnn_filters, transformer_params, plcc)) 86 | record_file.flush() 87 | record_file.close() 88 | print(np.mean(np.array(all_plcc), axis=0)) 89 | -------------------------------------------------------------------------------- /src/lsct/bin/train_lsct_params_search_1.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | import numpy as np 3 | import os 4 | from lsct.utils.gather_video_ids import gather_all_vids 5 | 6 | """ 7 | Search for best hyper-parameters of LSCT-PHIQNet 8 | """ 9 | if __name__ == '__main__': 10 | args = {} 11 | args['result_folder'] = r'C:\vq_datasets\results\tmp' 12 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 13 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 14 | 15 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 16 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 17 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 18 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 19 | 20 | # args['database'] = ['live', 'konvid', 'ugc'] 21 | args['database'] = ['konvid'] 22 | 23 | cnn_filters_range = [ 24 | [16, 32], 25 | # [32, 64], 26 | # [32, 64, 128], 27 | # [32, 64, 128, 256] 28 | ] 29 | transformer_params_range = [ 30 | [2, 16, 2, 32], 31 | # [2, 16, 4, 32], 32 | # [2, 32, 4, 64], 33 | # [2, 64, 4, 64], 34 | # [4, 32, 4, 64], 35 | # [4, 64, 4, 64], 36 | # [4, 64, 4, 128], 37 | # [4, 64, 8, 128], 38 | # [4, 64, 8, 256], 39 | # [4, 128, 8, 256], 40 | # [8, 256, 8, 512] 41 | ] 42 | 43 | args['dropout_rate'] = 0.1 44 | args['clip_length'] = 16 45 | 46 | args['batch_size'] = 32 47 | 48 | args['lr_base'] = 1e-3/2 49 | args['epochs'] = 300 50 | 51 | args['multi_gpu'] = 0 52 | args['gpu'] = 0 53 | 54 | args['validation'] = 'validation' 55 | 56 | args['do_finetune'] = True 57 | 58 | result_record_file = os.path.join(args['result_folder'], 'konvid_nochunks.csv') 59 | runs = 5 60 | all_plcc = np.zeros((runs, len(cnn_filters_range), len(transformer_params_range))) 61 | 62 | for k in range(runs): 63 | train_vids, test_vids = gather_all_vids(all_vids_pkl=args['vids_meta']) 64 | 65 | for i, cnn_filters in enumerate(cnn_filters_range): 66 | for j, transformer_params in enumerate(transformer_params_range): 67 | if not os.path.exists(result_record_file): 68 | record_file = open(result_record_file, 'w+') 69 | else: 70 | record_file = open(result_record_file, 'a') 71 | 72 | args['cnn_filters'] = cnn_filters 73 | # No need to define pooling sizes for 1D CNN, which will be defined in check_args() in train 74 | 75 | args['transformer_params'] = transformer_params 76 | args['model_name'] = 'lsct_{}_{}'.format(cnn_filters, transformer_params) 77 | 78 | plcc = train_main(args, train_vids, test_vids) 79 | 80 | record_file.write('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k, cnn_filters, transformer_params, plcc)) 81 | 82 | all_plcc[k, i, j] = plcc 83 | print('Run: {}, CNN: {}, Transformer: {}, plcc: {}\n'.format(k, cnn_filters, transformer_params, plcc)) 84 | record_file.flush() 85 | record_file.close() 86 | print(np.mean(np.array(all_plcc), axis=0)) 87 | -------------------------------------------------------------------------------- /src/lsct/bin/train_lsct_single_databases.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | 3 | 4 | """ 5 | General train script of LSCT-PHIQNet on single one or two databases 6 | """ 7 | if __name__ == '__main__': 8 | args = {} 9 | args['result_folder'] = r'C:\vq_datasets\results\lsct' 10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 12 | 13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 17 | 18 | args['database'] = ['konvid', 'ugc'] 19 | # args['database'] = ['konvid'] 20 | # args['database'] = ['ugc'] 21 | 22 | args['model_name'] = 'lsct' 23 | 24 | args['transformer_params'] = [2, 64, 4, 64] 25 | args['dropout_rate'] = 0.1 26 | args['cnn_filters'] = [32, 64] 27 | # args['pooling_sizes'] = [4, 4] 28 | args['clip_length'] = 16 29 | 30 | args['batch_size'] = 32 31 | 32 | args['lr_base'] = 1e-3 33 | args['epochs'] = 400 34 | 35 | args['multi_gpu'] = 0 36 | args['gpu'] = 1 37 | 38 | args['validation'] = 'validation' 39 | 40 | args['do_finetune'] = False 41 | 42 | train_main(args) 43 | -------------------------------------------------------------------------------- /src/lsct/bin/train_lsct_test_on_live.py: -------------------------------------------------------------------------------- 1 | from lsct.train.train import train_main 2 | 3 | 4 | """ 5 | By setting args['validation'] = 'test', the model is trained on entire KonViD-1k and YouTube-UGC databases, and tested on LIVE-VQC 6 | """ 7 | if __name__ == '__main__': 8 | args = {} 9 | args['result_folder'] = r'C:\vq_datasets\results\lsct' 10 | args['vids_meta'] = r'..\\meta_data\all_vids.pkl' 11 | args['meta_file'] = r'..\\meta_data\all_video_mos.csv' 12 | 13 | # if ugc_chunk_pickle is used, then the folders containing PHIQNet features of UGC chunks must be specified 14 | args['ugc_chunk_pickle'] = None # r'..\\meta_data\ugc_chunks.pkl' 15 | args['ugc_chunk_folder'] = r'.\frame_features\ugc_chunks' 16 | args['ugc_chunk_folder_flipped'] = r'.\frame_features_flipped\ugc_chunks' 17 | 18 | args['database'] = ['live', 'konvid', 'ugc'] 19 | 20 | args['model_name'] = 'lsct' 21 | 22 | args['transformer_params'] = [2, 64, 4, 64] 23 | args['dropout_rate'] = 0.1 24 | args['cnn_filters'] = [32, 64] 25 | # args['pooling_sizes'] = [4, 4] 26 | args['clip_length'] = 16 27 | 28 | args['batch_size'] = 32 29 | args['lr_base'] = 1e-3 30 | args['epochs'] = 400 31 | 32 | args['multi_gpu'] = 0 33 | args['gpu'] = 1 34 | 35 | args['validation'] = 'test' 36 | 37 | args['do_finetune'] = False 38 | 39 | train_main(args) 40 | -------------------------------------------------------------------------------- /src/lsct/meta_data/all_vids.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/meta_data/all_vids.pkl -------------------------------------------------------------------------------- /src/lsct/meta_data/ugc_chunks.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/meta_data/ugc_chunks.pkl -------------------------------------------------------------------------------- /src/lsct/meta_data/ugc_mos_original.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/meta_data/ugc_mos_original.xlsx -------------------------------------------------------------------------------- /src/lsct/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/models/__init__.py -------------------------------------------------------------------------------- /src/lsct/models/cnn_1d.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Layer, Conv1D, Input, Dropout, MaxPool1D, Masking 2 | import tensorflow.keras.backend as K 3 | from tensorflow.keras import Model 4 | import tensorflow as tf 5 | 6 | 7 | class CNN1D(Layer): 8 | def __init__(self, filters=(32, 64), pooling_sizes=(4, 4), kernel_size=3, stride_size=1, using_dropout=True, 9 | using_bias=False, dropout_rate=0.1, **kwargs): 10 | """ 11 | 1D CNN model 12 | :param filters: filter numbers in the CNN blocks 13 | :param pooling_sizes: max pooling size in each block 14 | :param kernel_size: kernel size of CNN layer 15 | :param stride_size: stride of CNN layer 16 | :param using_dropout: flag to use dropout or not 17 | :param using_bias: flag to use bias in CNN or not 18 | :param dropout_rate: dropout rate if using it 19 | :param kwargs: other config prams 20 | """ 21 | self.filters = filters 22 | self.kernel_size = kernel_size 23 | self.stride_size = stride_size 24 | self.using_dropout = using_dropout 25 | self.conv1d = [] 26 | self.pooling = [] 27 | self.dropout = [] 28 | for i, s_filter in enumerate(filters): 29 | self.conv1d.append(Conv1D(s_filter, 30 | kernel_size, 31 | padding='same', 32 | strides=stride_size, 33 | use_bias=using_bias, 34 | name='conv{}'.format(i) 35 | )) 36 | self.pooling.append(MaxPool1D(pool_size=pooling_sizes[i], name='pool{}'.format(i))) 37 | if using_dropout: 38 | self.dropout = Dropout(rate=dropout_rate) 39 | 40 | super(CNN1D, self).__init__(**kwargs) 41 | 42 | def build(self, input_shape): 43 | super(CNN1D, self).build(input_shape) 44 | 45 | def call(self, x, mask=None): 46 | for i in range(len(self.conv1d)): 47 | x = self.conv1d[i](x) 48 | x = self.pooling[i](x) 49 | if self.using_dropout: 50 | x = self.dropout(x) 51 | x = K.squeeze(x, axis=-2) 52 | return x 53 | 54 | def compute_output_shape(self, input_shape): 55 | return 1, self.filters[-1] 56 | 57 | 58 | if __name__ == '__main__': 59 | input_shape = (16, 5 * 256) 60 | filters = [32, 64, 128, 256] 61 | pooling_sizes = [2, 2, 2, 2] 62 | inputs = Input(shape=input_shape) 63 | x = CNN1D(filters=filters, pooling_sizes=pooling_sizes)(inputs) 64 | model = Model(inputs=inputs, outputs=x) 65 | model.summary() 66 | -------------------------------------------------------------------------------- /src/lsct/models/cnn_lstm_model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Masking, BatchNormalization, Dropout, Input, \ 2 | Bidirectional, ConvLSTM2D, Attention 3 | from tensorflow.keras.models import Model 4 | 5 | from lsct.models.cnn_1d import CNN1D 6 | from cnn_lstm.attention_with_context import Attention 7 | 8 | 9 | def create_cnn_lstm_model(clip_length, feature_length=4096, cnn_filters=(32, 64), pooling_sizes=(4, 4), 10 | lstm_filters=(32, 64), mlp_filters=(64, 32, 8), using_dropout=True, using_bidirectional=False, 11 | using_cnn=True, using_attention=False, dropout_rate=0.1): 12 | """ 13 | Create CNN-LSTM model for VQA 14 | :param clip_length: clip length 15 | :param feature_length: feature length 16 | :param cnn_filters: filters in 1D CNN 17 | :param pooling_sizes: pooling sizes in 1D CNN 18 | :param lstm_filters: filters in LSTM 19 | :param mlp_filters: filters in the MLP head 20 | :param using_dropout: flag to use dropout or not 21 | :param using_bidirectional: flag to use bidirectional LSTM or not 22 | :param using_cnn: flag to use 1D CNN or not 23 | :param dropout_rate: dropout rate 24 | :return: CNN-LSTM model 25 | """ 26 | if using_cnn: 27 | cnn_model = CNN1D(filters=cnn_filters, pooling_sizes=pooling_sizes, using_dropout=using_dropout, 28 | dropout_rate=dropout_rate) 29 | input_shape = (None, clip_length, feature_length) 30 | else: 31 | input_shape = (None, clip_length) 32 | inputs = Input(shape=input_shape) 33 | if using_cnn: 34 | x = TimeDistributed(cnn_model)(inputs) 35 | else: 36 | x = inputs 37 | x = Masking(mask_value=0.)(x) 38 | for i, lstm_filter in enumerate(lstm_filters): 39 | if i < len(lstm_filters) - 1: 40 | if using_bidirectional: 41 | x = Bidirectional(LSTM(lstm_filter, return_sequences=True))(x) 42 | else: 43 | x = LSTM(lstm_filter, return_sequences=True)(x) 44 | else: 45 | if using_attention: 46 | if using_bidirectional: 47 | x = Bidirectional(LSTM(lstm_filter, return_sequences=True))(x) 48 | else: 49 | x = LSTM(lstm_filter, return_sequences=True)(x) 50 | else: 51 | if using_bidirectional: 52 | x = Bidirectional(LSTM(lstm_filter))(x) 53 | else: 54 | x = LSTM(lstm_filter)(x) 55 | 56 | if using_attention: 57 | x = Attention()(x) 58 | 59 | for mlp_filter in mlp_filters: 60 | x = Dense(mlp_filter)(x) 61 | if using_dropout: 62 | x = Dropout(dropout_rate)(x) 63 | 64 | outputs = Dense(1, activation='linear')(x) 65 | model = Model(inputs=inputs, outputs=outputs) 66 | model.summary() 67 | 68 | return model 69 | -------------------------------------------------------------------------------- /src/lsct/models/lsct_phiqnet_model.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Input, TimeDistributed 2 | from tensorflow.keras.models import Model 3 | 4 | from lsct.models.cnn_1d import CNN1D 5 | from lsct.models.video_quality_transformer import VideoQualityTransformer 6 | 7 | 8 | def create_model(clip_length=16, feature_length=1280, cnn_filters=(32, 64), pooling_sizes=(4, 4), 9 | transformer_params=(2, 64, 4, 64), strides=1, dropout_rate=0.1): 10 | """ 11 | Create the LSCT-PHIQNet model for NR-VQA 12 | :param clip_length: clip length 13 | :param feature_length: length of frame PHIQNet features, default is 1280=5*256 14 | :param cnn_filters: CNN filters for the 1D CNN 15 | :param pooling_sizes: Pooling sizes for the 1D CNN 16 | :param transformer_params: Transformer parameters 17 | :param strides: stride in 1D CNN 18 | :param dropout_rate: dropout rate for both 1D CNN and Transformer 19 | :return: the LSCT-PHIQNet model 20 | """ 21 | using_dropout = dropout_rate > 0 22 | cnn_model = CNN1D(filters=cnn_filters, pooling_sizes=pooling_sizes, stride_size=strides, using_dropout=using_dropout, 23 | dropout_rate=dropout_rate) 24 | input_shape = (None, clip_length, feature_length) 25 | 26 | inputs = Input(shape=input_shape) 27 | x = TimeDistributed(cnn_model)(inputs) 28 | 29 | transformer = VideoQualityTransformer( 30 | num_layers=transformer_params[0], 31 | d_model=transformer_params[1], 32 | num_heads=transformer_params[2], 33 | mlp_dim=transformer_params[3], 34 | dropout=dropout_rate, 35 | ) 36 | x = transformer(x) 37 | 38 | model = Model(inputs=inputs, outputs=x) 39 | 40 | return model 41 | -------------------------------------------------------------------------------- /src/lsct/models/video_quality_transformer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import Model 3 | import tensorflow_addons as tfa 4 | from tensorflow.keras.layers import Dense, Dropout, LayerNormalization, Layer 5 | 6 | 7 | def create_padding_mask(input): 8 | """ 9 | Creates mask for input to Transformer based on the average of all elements = 0 10 | :param input: input sequence 11 | :return: mask 12 | """ 13 | input = tf.pad(input, paddings=[[0, 0], [1, 0], [0, 0]], constant_values=1) 14 | input = tf.cast(tf.math.equal(tf.keras.backend.mean(input, axis=-1), 0), tf.float32) 15 | 16 | # add extra dimensions to add the padding to the attention logits. 17 | return input[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len) 18 | 19 | 20 | class MultiHeadAttention(Layer): 21 | """ 22 | This is the standard multi-head attention layer 23 | """ 24 | def __init__(self, d_model, num_heads=8): 25 | super(MultiHeadAttention, self).__init__() 26 | self.d_model = d_model 27 | self.num_heads = num_heads 28 | if d_model % num_heads != 0: 29 | raise ValueError( 30 | f'embedding dimension = {d_model} should be divisible by number of heads = {num_heads}' 31 | ) 32 | self.depth = d_model // num_heads 33 | 34 | self.wq = Dense(d_model) 35 | self.wk = Dense(d_model) 36 | self.wv = Dense(d_model) 37 | 38 | self.dense = Dense(d_model) 39 | 40 | def split_heads(self, x, batch_size): 41 | x = tf.reshape( 42 | x, (batch_size, -1, self.num_heads, self.depth) 43 | ) 44 | return tf.transpose(x, perm=[0, 2, 1, 3]) 45 | 46 | def scaled_dot_product_attention(self, query, key, value, mask): 47 | matmul_qk = tf.matmul(query, key, transpose_b=True) 48 | dim_key = tf.cast(tf.shape(key)[-1], tf.float32) 49 | scaled_score = matmul_qk / tf.math.sqrt(dim_key) 50 | if mask is not None: 51 | scaled_score += (mask * -1e9) 52 | weights = tf.nn.softmax(scaled_score, axis=-1) 53 | output = tf.matmul(weights, value) 54 | return output, weights 55 | 56 | def call(self, inputs, mask): 57 | batch_size = tf.shape(inputs)[0] 58 | 59 | query = self.wq(inputs) 60 | key = self.wk(inputs) 61 | value = self.wv(inputs) 62 | 63 | query = self.split_heads(query, batch_size) 64 | key = self.split_heads(key, batch_size) 65 | value = self.split_heads(value, batch_size) 66 | 67 | attention, weights = self.scaled_dot_product_attention(query, key, value, mask) 68 | attention = tf.transpose(attention, perm=[0, 2, 1, 3]) 69 | concat_attention = tf.reshape( 70 | attention, (batch_size, -1, self.d_model) 71 | ) 72 | output = self.dense(concat_attention) 73 | return output, weights 74 | 75 | 76 | class TransformerBlock(Layer): 77 | """ 78 | This is the standard Transformer block 79 | """ 80 | def __init__(self, d_model, num_heads, dff, dropout=0.1): 81 | super(TransformerBlock, self).__init__() 82 | self.mha = MultiHeadAttention(d_model, num_heads) 83 | self.ffn = tf.keras.Sequential( 84 | [Dense(dff, activation="relu"), 85 | Dense(d_model),] 86 | ) 87 | 88 | self.layernorm1 = LayerNormalization(epsilon=1e-6) 89 | self.layernorm2 = LayerNormalization(epsilon=1e-6) 90 | 91 | self.dropout1 = Dropout(dropout) 92 | self.dropout2 = Dropout(dropout) 93 | 94 | def call(self, x, training, mask): 95 | attn_output, attention_weigths = self.mha(x, mask) 96 | attn_output = self.dropout1(attn_output, training=training) 97 | out1 = self.layernorm1(x + attn_output) 98 | ffn_output = self.ffn(out1) 99 | ffn_output = self.dropout2(ffn_output, training=training) 100 | out2 = self.layernorm2(out1 + ffn_output) 101 | return out2 102 | 103 | 104 | class VideoQualityTransformer(Model): 105 | """ 106 | Transformer for video quality assessment using the standard Transformer, 107 | the maximum_position_encoding should cover the maximal clip number in the databases 108 | """ 109 | def __init__( 110 | self, 111 | num_layers, 112 | d_model, 113 | num_heads, 114 | mlp_dim, 115 | dropout=0.1, 116 | maximum_position_encoding=6000 117 | ): 118 | super(VideoQualityTransformer, self).__init__() 119 | 120 | self.d_model = d_model 121 | self.num_layers = num_layers 122 | 123 | # positional embedding is predefined with a sufficient length 124 | self.pos_emb = self.add_weight('pos_emb', shape=(1, maximum_position_encoding, d_model)) 125 | 126 | # add video quality token 127 | self.quality_emb = self.add_weight('quality_emb', shape=(1, 1, d_model)) 128 | 129 | # normal Transformer architecture 130 | self.feature_proj = Dense(d_model) 131 | self.dropout = Dropout(dropout) 132 | self.enc_layers = [ 133 | TransformerBlock(d_model, num_heads, mlp_dim, dropout) 134 | for _ in range(num_layers) 135 | ] 136 | 137 | # MLP head 138 | self.mlp_head = tf.keras.Sequential( 139 | [ 140 | Dense(mlp_dim, activation=tfa.activations.gelu), 141 | Dropout(dropout), 142 | Dense(1), 143 | ] 144 | ) 145 | 146 | def call(self, x, training): 147 | batch_size = tf.shape(x)[0] 148 | mask = create_padding_mask(x) 149 | 150 | frame_length = tf.shape(x)[1] 151 | x = self.feature_proj(x) 152 | 153 | quality_emb = tf.broadcast_to(self.quality_emb, [batch_size, 1, self.d_model]) 154 | x = tf.concat([quality_emb, x], axis=1) 155 | 156 | # truncate the positional embedding for shorter videos 157 | x = x + self.pos_emb[:, : frame_length + 1, :] 158 | 159 | x = self.dropout(x, training=training) 160 | 161 | for layer in self.enc_layers: 162 | x = layer(x, training, mask) 163 | 164 | # First (CLS) is used for VQA 165 | x = self.mlp_head(x[:, 0]) 166 | return x -------------------------------------------------------------------------------- /src/lsct/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/train/__init__.py -------------------------------------------------------------------------------- /src/lsct/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/lsct/utils/__init__.py -------------------------------------------------------------------------------- /src/lsct/utils/frame_features_video_folders.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class is to calculate PHIQNet features on video frames in a list of video folders, FFMPEG is required 3 | """ 4 | import numpy as np 5 | import subprocess as sp 6 | import json 7 | import os 8 | import tensorflow as tf 9 | from phiqnet.models.model_analysis import phiq_subnet 10 | 11 | 12 | class CalculateFrameQualityFeatures(): 13 | def __init__(self, model_weights, ffprobe_exe=None, ffmpeg_exe=None, process_frame_interval=0): 14 | """ 15 | Frame PHIQNet feature computer 16 | :param model_weights: PHIQNet model_weights file 17 | :param ffprobe_exe: FFProbe exe file 18 | :param ffmpeg_exe: FFMPEG exe file 19 | :param process_frame_interval: parameter of frame processing interval, 0 means all frames will be used 20 | """ 21 | self.ffmpeg = ffmpeg_exe 22 | self.ffprobe = ffprobe_exe 23 | self.process_frame_interval = process_frame_interval 24 | self.mos_scales = np.array([1, 2, 3, 4, 5]) 25 | self.get_feature_model(model_weights) 26 | 27 | def get_feature_model(self, model_weights): 28 | self.feature_model = phiq_subnet(n_quality_levels=5, return_backbone_maps=False, return_feature_maps=True, 29 | return_features=True) 30 | self.feature_model.load_weights(model_weights, by_name=True) 31 | 32 | def get_video_meta(self, video_file): 33 | """Internal method to get video meta 34 | :return: a list containing [audio_exit, video_exit, duration, frame_count, height, width, fps] 35 | """ 36 | cmd = [self.ffprobe, '-i', video_file, '-v', 'quiet', '-print_format', 'json', '-show_streams', '-show_format'] 37 | ffprobe_output = json.loads(sp.check_output(cmd).decode('utf-8')) 38 | 39 | # audio_exits = False 40 | video_exits = False 41 | duration = 0 42 | frame_count = 0 43 | height = 0 44 | width = 0 45 | fps = 0 46 | bitrate = 0 47 | 48 | stream_type = 'streams' 49 | codec_type = 'codec_type' 50 | if stream_type in ffprobe_output: 51 | for i in range(len(ffprobe_output[stream_type])): 52 | if codec_type in ffprobe_output[stream_type][i]: 53 | # if ffprobe_output[stream_type][i][codec_type] == 'audio': 54 | # audio_exits = True 55 | if ffprobe_output[stream_type][i][codec_type] == 'video': 56 | video_exits = True 57 | frame_rate = ffprobe_output[stream_type][i]['avg_frame_rate'] 58 | if '/' in frame_rate: 59 | fps_temp = [float(item) for item in frame_rate.split('/')] 60 | fps = fps_temp[0] / fps_temp[1] 61 | else: 62 | fps = float(frame_rate) 63 | if 'duration' not in ffprobe_output[stream_type][i]: 64 | if 'format' in ffprobe_output: 65 | duration = float(ffprobe_output['format']['duration']) 66 | else: 67 | duration = float(ffprobe_output[stream_type][i]['duration']) 68 | frame_count = int(duration * fps) 69 | height = ffprobe_output[stream_type][i]['height'] 70 | width = ffprobe_output[stream_type][i]['width'] 71 | if 'bit_rate' not in ffprobe_output[stream_type][i]: 72 | if 'format' in ffprobe_output: 73 | bitrate = int(ffprobe_output['format']['bit_rate']) 74 | else: 75 | bitrate = int(ffprobe_output[stream_type][i]['bit_rate']) / 1000 76 | 77 | if not video_exits: 78 | return None 79 | return [video_exits, duration, frame_count, height, width, fps, bitrate] 80 | 81 | def video_features(self, video_folders, feature_folder): 82 | """ 83 | :param video_folders: a list of folders of all video files 84 | :param feature_folder: target folder to store the features files in NPY format 85 | :return: None 86 | """ 87 | for video_folder in video_folders: 88 | video_files = os.listdir(video_folder) 89 | for video_file in video_files: 90 | try: 91 | if video_file.endswith(('.mkv', '.mp4')): # Only mkv and mps contained in KonViD-1k, LIVE-VQC and YouTube-UGC databases 92 | video_path = os.path.join(video_folder, video_file) 93 | video_name = os.path.splitext(os.path.basename(video_file))[0] 94 | 95 | # Path to store the PHIQNet features of a frame and a flipped frame must be defined 96 | npy_file_features = r'' 97 | npy_file_features_flipped = r'' 98 | 99 | if not os.path.exists(os.path.dirname(npy_file_features)): 100 | os.makedirs(os.path.dirname(npy_file_features)) 101 | if not os.path.exists(os.path.dirname(npy_file_features_flipped)): 102 | os.makedirs(os.path.dirname(npy_file_features_flipped)) 103 | frame_features, features_flipped = self.__ffmpeg_frames_features__( 104 | os.path.join(video_folder, video_file), flip=True) 105 | np.save(npy_file_features, np.asarray(frame_features, dtype=np.float16)) 106 | np.save(npy_file_features_flipped, np.asarray(features_flipped, dtype=np.float16)) 107 | except Exception: 108 | print('{} excep'.format(video_file)) 109 | 110 | def __cal_features__(self, image): 111 | image /= 127.5 112 | image -= 1. 113 | return self.feature_model.predict(np.expand_dims(image, axis=0)) 114 | 115 | def __ffmpeg_frames_features__(self, video_file, flip=True): 116 | meta = self.get_video_meta(video_file) 117 | video_height = meta[3] 118 | video_width = meta[4] 119 | video_size = video_height * video_width * 3 120 | # print('Start reading {}'.format(video_file)) 121 | if self.process_frame_interval > 0: 122 | fps = 'fps=1/' + str(self.process_frame_interval) 123 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-vf', fps, '-pix_fmt', 'rgb24', '-vcodec', 124 | 'rawvideo', '-'] 125 | else: 126 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-pix_fmt', 'rgb24', '-hide_banner', '-loglevel', 127 | 'panic', '-vcodec', 'rawvideo', '-'] 128 | pipe = sp.Popen(cmd, stdout=sp.PIPE) 129 | 130 | features = [] 131 | if flip: 132 | features_flipped = [] 133 | try: 134 | while True: 135 | try: 136 | raw_image = pipe.stdout.read(video_size) 137 | if len(raw_image) != video_size: 138 | break 139 | image = np.fromstring(raw_image, dtype='uint8') 140 | image = image.reshape((video_height, video_width, 3)) 141 | image = np.asarray(image, dtype=np.float32) 142 | flipped_image = np.fliplr(image) 143 | frame_feature = self.__cal_features__(image) 144 | features.append(np.asarray(frame_feature)) 145 | if flip: 146 | flipped_frame_features = self.__cal_features__(flipped_image) 147 | features_flipped.append(np.array(flipped_frame_features)) 148 | 149 | except Exception as e1: 150 | print(e1) 151 | continue 152 | except Exception as e2: 153 | print(e2) 154 | pipe.stdout.flush() 155 | 156 | if flip: 157 | return features, features_flipped 158 | else: 159 | return features 160 | 161 | 162 | if __name__ == '__main__': 163 | ffmpeg_exe = r'...\\ffmpeg\ffmpeg.exe' 164 | ffprobe_exe = r'...\\ffmpeg\ffprobe.exe' 165 | model_weights_file = r'..\\model_weights\PHIQNet.h5' 166 | 167 | feature_folder = r'...\\model_weights\frame_features' 168 | video_frame_features = CalculateFrameQualityFeatures(model_weights=model_weights_file, 169 | ffmpeg_exe=ffmpeg_exe, 170 | ffprobe_exe=ffprobe_exe) 171 | video_folders = [ 172 | r'.\live_vqc_video', 173 | r'.\ugc_test', 174 | r'.\ugc_train', 175 | r'.\ugc_validation', 176 | r'.\KoNViD_1k_videos' 177 | ] 178 | video_frame_features.video_features(video_folders, feature_folder) 179 | 180 | -------------------------------------------------------------------------------- /src/lsct/utils/frame_features_video_folders_Resnet50.py: -------------------------------------------------------------------------------- 1 | """ 2 | This class is to calculate PHIQNet features on video frames in a list of video folders, FFMPEG is required 3 | """ 4 | import numpy as np 5 | import subprocess as sp 6 | import json 7 | import os 8 | import tensorflow as tf 9 | from phiqnet.models.model_analysis import phiq_subnet 10 | 11 | 12 | class CalculateFrameQualityFeatures(): 13 | def __init__(self, model_weights, ffprobe_exe=None, ffmpeg_exe=None, process_frame_interval=0): 14 | """ 15 | Frame PHIQNet feature computer 16 | :param model_weights: PHIQNet model_weights file 17 | :param ffprobe_exe: FFProbe exe file 18 | :param ffmpeg_exe: FFMPEG exe file 19 | :param process_frame_interval: parameter of frame processing interval, 0 means all frames will be used 20 | """ 21 | self.ffmpeg = ffmpeg_exe 22 | self.ffprobe = ffprobe_exe 23 | self.process_frame_interval = process_frame_interval 24 | self.get_feature_model(model_weights) 25 | 26 | def get_feature_model(self, model_weights): 27 | self.feature_model = phiq_subnet(n_quality_levels=5, return_backbone_maps=False, return_feature_maps=False, 28 | return_features=True) 29 | self.feature_model.load_weights(model_weights, by_name=True) 30 | 31 | def get_video_meta(self, video_file): 32 | """Internal method to get video meta 33 | :return: a list containing [audio_exit, video_exit, duration, frame_count, height, width, fps] 34 | """ 35 | cmd = [self.ffprobe, '-i', video_file, '-v', 'quiet', '-print_format', 'json', '-show_streams', '-show_format'] 36 | ffprobe_output = json.loads(sp.check_output(cmd).decode('utf-8')) 37 | 38 | # audio_exits = False 39 | video_exits = False 40 | duration = 0 41 | frame_count = 0 42 | height = 0 43 | width = 0 44 | fps = 0 45 | bitrate = 0 46 | 47 | stream_type = 'streams' 48 | codec_type = 'codec_type' 49 | if stream_type in ffprobe_output: 50 | for i in range(len(ffprobe_output[stream_type])): 51 | if codec_type in ffprobe_output[stream_type][i]: 52 | # if ffprobe_output[stream_type][i][codec_type] == 'audio': 53 | # audio_exits = True 54 | if ffprobe_output[stream_type][i][codec_type] == 'video': 55 | video_exits = True 56 | frame_rate = ffprobe_output[stream_type][i]['avg_frame_rate'] 57 | if '/' in frame_rate: 58 | fps_temp = [float(item) for item in frame_rate.split('/')] 59 | fps = fps_temp[0] / fps_temp[1] 60 | else: 61 | fps = float(frame_rate) 62 | if 'duration' not in ffprobe_output[stream_type][i]: 63 | if 'format' in ffprobe_output: 64 | duration = float(ffprobe_output['format']['duration']) 65 | else: 66 | duration = float(ffprobe_output[stream_type][i]['duration']) 67 | frame_count = int(duration * fps) 68 | height = ffprobe_output[stream_type][i]['height'] 69 | width = ffprobe_output[stream_type][i]['width'] 70 | if 'bit_rate' not in ffprobe_output[stream_type][i]: 71 | if 'format' in ffprobe_output: 72 | bitrate = int(ffprobe_output['format']['bit_rate']) 73 | else: 74 | bitrate = int(ffprobe_output[stream_type][i]['bit_rate']) / 1000 75 | 76 | if not video_exits: 77 | return None 78 | return [video_exits, duration, frame_count, height, width, fps, bitrate] 79 | 80 | def video_features(self, video_folders, feature_folder): 81 | """ 82 | :param video_folders: a list of folders of all video files 83 | :param feature_folder: target folder to store the features files in NPY format 84 | :return: None 85 | """ 86 | for video_folder in video_folders: 87 | video_files = os.listdir(video_folder) 88 | for video_file in video_files: 89 | try: 90 | if video_file.endswith(('.mkv', '.mp4')): # Only mkv and mps contained in KonViD-1k, LIVE-VQC and YouTube-UGC databases 91 | video_path = os.path.join(video_folder, video_file) 92 | video_name = os.path.splitext(os.path.basename(video_file))[0] 93 | 94 | # Path to store the PHIQNet features of a frame and a flipped frame must be defined 95 | npy_file_features = r'' 96 | npy_file_features_flipped = r'' 97 | 98 | if not os.path.exists(os.path.dirname(npy_file_features)): 99 | os.makedirs(os.path.dirname(npy_file_features)) 100 | if not os.path.exists(os.path.dirname(npy_file_features_flipped)): 101 | os.makedirs(os.path.dirname(npy_file_features_flipped)) 102 | frame_features, features_flipped = self.__ffmpeg_frames_features__( 103 | os.path.join(video_folder, video_file), flip=True) 104 | np.save(npy_file_features, np.asarray(frame_features, dtype=np.float16)) 105 | np.save(npy_file_features_flipped, np.asarray(features_flipped, dtype=np.float16)) 106 | except Exception: 107 | print('{} excep'.format(video_file)) 108 | 109 | def __cal_features__(self, image): 110 | image /= 127.5 111 | image -= 1. 112 | return self.feature_model.predict(np.expand_dims(image, axis=0)) 113 | 114 | def __ffmpeg_frames_features__(self, video_file, flip=True): 115 | meta = self.get_video_meta(video_file) 116 | video_height = meta[3] 117 | video_width = meta[4] 118 | video_size = video_height * video_width * 3 119 | # print('Start reading {}'.format(video_file)) 120 | if self.process_frame_interval > 0: 121 | fps = 'fps=1/' + str(self.process_frame_interval) 122 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-vf', fps, '-pix_fmt', 'rgb24', '-vcodec', 123 | 'rawvideo', '-'] 124 | else: 125 | cmd = [self.ffmpeg, '-i', video_file, '-f', 'image2pipe', '-pix_fmt', 'rgb24', '-hide_banner', '-loglevel', 126 | 'panic', '-vcodec', 'rawvideo', '-'] 127 | pipe = sp.Popen(cmd, stdout=sp.PIPE) 128 | 129 | features = [] 130 | if flip: 131 | features_flipped = [] 132 | try: 133 | while True: 134 | try: 135 | raw_image = pipe.stdout.read(video_size) 136 | if len(raw_image) != video_size: 137 | break 138 | image = np.fromstring(raw_image, dtype='uint8') 139 | image = image.reshape((video_height, video_width, 3)) 140 | image = np.asarray(image, dtype=np.float32) 141 | flipped_image = np.fliplr(image) 142 | frame_feature = self.__cal_features__(image) 143 | features.append(np.asarray(frame_feature)) 144 | if flip: 145 | flipped_frame_features = self.__cal_features__(flipped_image) 146 | features_flipped.append(np.array(flipped_frame_features)) 147 | 148 | except Exception as e1: 149 | print(e1) 150 | continue 151 | except Exception as e2: 152 | print(e2) 153 | pipe.stdout.flush() 154 | 155 | if flip: 156 | return features, features_flipped 157 | else: 158 | return features 159 | 160 | 161 | if __name__ == '__main__': 162 | ffmpeg_exe = r'...\\ffmpeg\ffmpeg.exe' 163 | ffprobe_exe = r'...\\ffmpeg\ffprobe.exe' 164 | model_weights_file = r'..\\model_weights\PHIQNet.h5' 165 | 166 | feature_folder = r'...\\model_weights\frame_features' 167 | video_frame_features = CalculateFrameQualityFeatures(model_weights=model_weights_file, 168 | ffmpeg_exe=ffmpeg_exe, 169 | ffprobe_exe=ffprobe_exe) 170 | video_folders = [ 171 | r'.\live_vqc_video', 172 | r'.\ugc_test', 173 | r'.\ugc_train', 174 | r'.\ugc_validation', 175 | r'.\KoNViD_1k_videos' 176 | ] 177 | video_frame_features.video_features(video_folders, feature_folder) 178 | 179 | -------------------------------------------------------------------------------- /src/lsct/utils/gather_video_ids.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is to collect all video IDs and possibly to dump them 3 | a video ID is: database name_video name 4 | """ 5 | import os 6 | import glob 7 | from pickle import load, dump 8 | from sklearn.model_selection import train_test_split 9 | from random import shuffle 10 | 11 | 12 | def gather_live_konvid_vids(video_folder, database): 13 | """ 14 | LIVE-VQC and KonViD-1k video IDs 15 | :param video_folder: 16 | :param database: 17 | :return: 18 | """ 19 | vids = [] 20 | for file in glob.glob(os.path.join(video_folder, '*.mp4')): 21 | vid = os.path.splitext(os.path.basename(file))[0] 22 | vids.append('{}_{}'.format(database, vid)) 23 | return vids 24 | 25 | 26 | def gather_ugc_vids(video_folders): 27 | """ 28 | YouTube-UGC video IDs 29 | :param video_folders: list of folders of YouTube-UGC video 30 | :return: video IDs 31 | """ 32 | ugc_vids = [] 33 | for video_folder in video_folders: 34 | files = glob.glob(os.path.join(video_folder, '*.mkv')) 35 | for file in files: 36 | vid = os.path.splitext(os.path.basename(file))[0] 37 | ugc_vids.append('ugc_{}'.format(vid)) 38 | return ugc_vids 39 | 40 | 41 | def gather_all_vids(all_vids_pkl=None, test_ratio=0.2, random_state=None): 42 | if all_vids_pkl: 43 | all_vids = load(open(all_vids_pkl, 'rb')) 44 | else: 45 | live_vids = gather_live_konvid_vids(r'.\live_vqc_Video', 'live') 46 | konvid_vids = gather_live_konvid_vids(r'.\KoNViD_1k_videos', 'konvid') 47 | ugc_vids = gather_ugc_vids([r'.\ugc_test', r'.\ugc_train', r'.\ugc_validation']) 48 | all_vids = live_vids + konvid_vids + ugc_vids 49 | 50 | # the video IDs can be dumped here, for later use in training 51 | dump(all_vids, open(r'.\all_vids.pkl', 'wb')) 52 | shuffle(all_vids) 53 | train_vids, test_vids = train_test_split(all_vids, test_size=test_ratio, random_state=random_state) 54 | return train_vids, test_vids 55 | 56 | 57 | if __name__ == '__main__': 58 | # live_video_folder = r'.\live_vqc_Video' 59 | # konvid_video_folder = r'.\KoNViD_1k_videos' 60 | # live_vids, live_fps = gather_live_konvid_vids(live_video_folder, 'live') 61 | # konvid_vids, konvid_fps = gather_live_konvid_vids(konvid_video_folder, 'konvid') 62 | # 63 | # ugc_video_folders = [r'.\ugc_test', r'.\ugc_train', r'.\ugc_validation'] 64 | # gather_ugc_vids(ugc_video_folders) 65 | 66 | info = load(open(r'..\\meta_data\ugc_chunks.pkl', 'rb')) 67 | t = 0 68 | -------------------------------------------------------------------------------- /src/lsct/utils/ugc_chunk_generator.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | from ffmpeg.video_handler import VideoHandler 5 | from pickle import load, dump 6 | 7 | 8 | def get_video_handler(): 9 | ffmpeg_exe = r'..\\ffmpeg\ffmpeg.exe' 10 | ffprobe_exe = r'..\\ffmpeg\ffprobe.exe' 11 | video_handler = VideoHandler(ffprobe_exe, ffmpeg_exe) 12 | return video_handler 13 | 14 | 15 | def get_video_path(vid): 16 | # Video folders of YouTube-UGC database must be specified 17 | video_folders = [ 18 | r'.\ugc_test', 19 | r'.\ugc_train', 20 | r'.\ugc_validation' 21 | ] 22 | for video_folder in video_folders: 23 | if os.path.exists(os.path.join(video_folder, vid + '.mkv')): 24 | return os.path.join(video_folder, vid + '.mkv') 25 | 26 | return None 27 | 28 | 29 | def get_chunk_mos_Resnet(): 30 | """ 31 | Extract frame features from Resnet50 of individual chunks and stored in Numpy npy files 32 | :return: Dictionary containing video id, full MOS, chunk1 MOS, chunk2 MOS, chunk3 MOS 33 | """ 34 | chunk_mos_dict = dict() 35 | ugc_mos_file = r'C:\vq_datasets\ugc_mos_original.xlsx' 36 | ugc_mos = pd.read_excel(ugc_mos_file) 37 | 38 | frame_feature_folder = r'C:\vq_datasets\VSFA\UGC' 39 | chunk_feature_folder = r'C:\vq_datasets\VSFA\UGC_CHUNKS' 40 | 41 | video_handler = get_video_handler() 42 | 43 | for index, row in ugc_mos.iterrows(): 44 | vid = row['vid'] 45 | video_path = get_video_path(vid) 46 | 47 | if video_path: 48 | video_meta = video_handler.get_video_meta(video_path) 49 | fps = round(video_meta[-2]) 50 | mos_chunk_0 = row['MOS chunk00'] 51 | mos_chunk_1 = row['MOS chunk05'] 52 | mos_chunk_2 = row['MOS chunk10'] 53 | 54 | chunk_mos = [] 55 | chunk_mos.append(row['MOS full']) 56 | 57 | frame_features = np.load(os.path.join(frame_feature_folder, vid + '_resnet-50_res5c.npy')) 58 | if not np.isnan(mos_chunk_0): 59 | chunk_mos.append(mos_chunk_0) 60 | frame_features_chunk_0 = frame_features[0 : 10 * fps, :] 61 | np.save(os.path.join(chunk_feature_folder, vid + '_resnet-50_res5c_chunk_0.npy'), frame_features_chunk_0) 62 | 63 | if not np.isnan(mos_chunk_1): 64 | chunk_mos.append(mos_chunk_1) 65 | frame_features_chunk_1 = frame_features[5 * fps: 15 * fps, :] 66 | np.save(os.path.join(chunk_feature_folder, vid + '_resnet-50_res5c_chunk_1.npy'), frame_features_chunk_1) 67 | 68 | if not np.isnan(mos_chunk_2): 69 | chunk_mos.append(mos_chunk_2) 70 | frame_features_chunk_2 = frame_features[10 * fps:, :] 71 | np.save(os.path.join(chunk_feature_folder, vid + '_resnet-50_res5c_chunk_2.npy'), frame_features_chunk_2) 72 | 73 | chunk_mos_dict[vid] = chunk_mos 74 | 75 | return chunk_mos_dict 76 | 77 | 78 | def get_chunk_features_mos(): 79 | """ 80 | Extract frame features of individual chunks and stored in Numpy npy files 81 | :return: Dictionary containing video id, full MOS, chunk1 MOS, chunk2 MOS, chunk3 MOS 82 | """ 83 | chunk_mos_dict = dict() 84 | ugc_mos_file = r'..\\meta_data\ugc_mos_original.xlsx' 85 | ugc_mos = pd.read_excel(ugc_mos_file) 86 | 87 | # Frame feature files of YouTube-UGC videos must be specified 88 | frame_feature_folder = r'.\frame_features\ugc' 89 | 90 | # Target folder to store the frame features of chunks 91 | chunk_feature_folder = r'.\frame_features\ugc_chunks' 92 | 93 | video_handler = get_video_handler() 94 | 95 | for index, row in ugc_mos.iterrows(): 96 | vid = row['vid'] 97 | video_path = get_video_path(vid) 98 | 99 | if video_path: 100 | mos_chunk_0 = row['MOS chunk00'] 101 | mos_chunk_1 = row['MOS chunk05'] 102 | mos_chunk_2 = row['MOS chunk10'] 103 | 104 | video_meta = video_handler.get_video_meta(video_path) 105 | fps = round(video_meta[-2]) 106 | 107 | chunk_mos = [] 108 | chunk_mos.append(row['MOS full']) 109 | 110 | frame_features = np.load(os.path.join(frame_feature_folder, vid + '.npy')) 111 | if not np.isnan(mos_chunk_0): 112 | chunk_mos.append(mos_chunk_0) 113 | frame_features_chunk_0 = frame_features[0 : 10 * fps, :, :, :] 114 | np.save(os.path.join(chunk_feature_folder, vid + '_chunk_0.npy'), frame_features_chunk_0) 115 | 116 | if not np.isnan(mos_chunk_1): 117 | chunk_mos.append(mos_chunk_1) 118 | frame_features_chunk_1 = frame_features[5 * fps: 15 * fps, :, :, :] 119 | np.save(os.path.join(chunk_feature_folder, vid + '_chunk_1.npy'), frame_features_chunk_1) 120 | 121 | if not np.isnan(mos_chunk_2): 122 | chunk_mos.append(mos_chunk_2) 123 | frame_features_chunk_2 = frame_features[10 * fps:, :, :, :] 124 | np.save(os.path.join(chunk_feature_folder, vid + '_chunk_2.npy'), frame_features_chunk_2) 125 | 126 | chunk_mos_dict[vid] = chunk_mos 127 | 128 | return chunk_mos_dict 129 | 130 | 131 | if __name__ == '__main__': 132 | chunk_mos_dict = get_chunk_features_mos() 133 | chunk_mos_dict_resnet50s = get_chunk_mos_Resnet() 134 | 135 | # The chunk MOS values can be dumped 136 | # dump(chunk_mos_dict, open(r'..\\meta_data\ugc_chunks.pkl', 'wb')) 137 | -------------------------------------------------------------------------------- /src/model_weights/LSCT.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/model_weights/LSCT.h5 -------------------------------------------------------------------------------- /src/model_weights/README.md: -------------------------------------------------------------------------------- 1 | # Trained weights 2 | 3 | The trained weights are supplied here. 4 | 5 | PHIQNet: please download the trained weights [here](https://drive.google.com/file/d/1ymy2oL0r-XNzjqk_kE-lcNkI2FhSu95h/view?usp=sharing), which is required to calculate frame quality features for VQA. 6 | 7 | LSCT.h5: contains the weights for the LSCT model trained on KonViD-1k, YouTube-UGC and LIVE-VQC databases. -------------------------------------------------------------------------------- /src/phiqnet/README.md: -------------------------------------------------------------------------------- 1 | # PHIQNet Implementation 2 | 3 | TF-Keras implementation of PHIQNet as described in [Perceptual Hierarchical Networks for No-Reference Image Quality Assessment]. 4 | 5 | ## Installation 6 | 7 | 1) Clone this repository. 8 | 2) Install required Python packages. The code is developed by PyCharm in Python 3.7. The requirements.txt document is generated by PyCharm, and the code should also be run in latest versions of the packages. 9 | 10 | ## Training a model 11 | Many examples of training PHIQNet and its variants can be seen in phiqnet/bin. 12 | Argparser should be used, but the authors prefer to use dictionary with parameters being defined. It is easy to convert to take arguments. 13 | In principle, the following parameters can be defined: 14 | 15 | args = {} 16 | args['multi_gpu'] = 0 # gpu setting, set to 1 for using multiple GPUs 17 | args['gpu'] = 0 # If having multiple GPUs, specify which GPU to use 18 | 19 | args['result_folder'] = r'..\databases\experiments' # Define result path 20 | args['n_quality_levels'] = 5 # Choose between 1 (MOS prediction) and 5 (distribution prediction) 21 | 22 | args['train_folders'] = # Define folders containing training images 23 | [ 24 | r'..\databases\train\koniq_normal', 25 | r'..\databases\train\koniq_small', 26 | r'..\databases\train\live' 27 | ] 28 | args['val_folders'] = # Define folders containing testing images 29 | [ 30 | r'..\databases\val\koniq_normal', 31 | r'..\databases\val\koniq_small', 32 | r'..\databases\val\live' 33 | ] 34 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' # MOS (distribution of scores) file for KonIQ database 35 | args['live_mos_file'] = r'..\databases\live_mos.csv' # MOS (standard distribution of scores) file for LIVE-wild database 36 | 37 | args['naive_backbone'] = False # Choose between True and False, indicating using backbone network only or neck + head as well 38 | args['backbone'] = 'resnet50' # Choose from ['resnet18', 'resnet50', 'resnet152', 'resnet152v2', 'vgg16', 'resnest50'] 39 | args['weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' # Define the path of ImageNet pretrained weights 40 | args['initial_epoch'] = 0 # Define initial epoch for use in fine-tune 41 | 42 | args['lr_base'] = 1e-4 / 2 # Define the back learning rate in warmup and rate decay approach 43 | args['lr_schedule'] = True # Choose between True and False, indicating if learning rate schedule should be used or not 44 | args['batch_size'] = 4 # Batch size, should choose to fit in the GPU memory 45 | args['epochs'] = 120 # Maximal epoch number, can set early stop in the callback or not 46 | 47 | args['fpn_type'] = 'fpn' # FPN type, choose from ['fpn', 'bifpn', 'pan', 'no_fpn'], it is noted that if 'bifpn' is chosen, the image resolutions must be power of 2 otherwise shape mismatch will be thrown 48 | args['attention_module'] = True # Choose between True and False, indicating if attention module should be used or not 49 | 50 | args['image_aug'] = True # Choose between True and False, indicating if image augmentation should be used or not 51 | 52 | ## Predict image quality using the trained model 53 | After PHIQNet has been trained, and the weights have been stored in h5 file, it can be used to predict image quality with arbitrary sizes, 54 | 55 | ```shell 56 | args = {} 57 | args['n_quality_levels'] = 5 58 | args['naive_backbone'] = False 59 | args['backbone'] = 'resnet50' 60 | args['fpn_type'] = 'fpn' 61 | args['weights'] = r'..\\model_weights\PHIQNet.h5' 62 | model = phiq_net(n_quality_levels=args['n_quality_levels'], 63 | naive_backbone=args['naive_backbone'], 64 | backbone=args['backbone'], 65 | fpn_type=args['fpn_type']) 66 | model.load_weights(args['weights']) 67 | ``` 68 | And then use ModelEvaluation to predict quality of image set. 69 | 70 | In the "examples" folder, an example script examples\image_quality_prediction.py is provided to use the trained weights to predict quality of example images. 71 | 72 | ## Prepare datasets for model training 73 | This work uses two publicly available databases: KonIQ-10k [KonIQ-10k: An ecologically valid database for deep learning of blind image quality assessment](https://ieeexplore.ieee.org/document/8968750) by V. Hosu, H. Lin, T. Sziranyi, and D. Saupe; 74 | and LIVE-wild [Massive online crowdsourced study of subjective and objective picture quality](https://ieeexplore.ieee.org/document/7327186) by D. Ghadiyaram, and A.C. Bovik 75 | 76 | 1) The two databases were merged, and then split to training and testing sets. Please see README in phiqnet/databases for details. 77 | 2) Make MOS files (note: do NOT include head line): 78 | 79 | For database with score distribution available, the MOS file is like this (koniq format): 80 | ``` 81 | image path, voter number of quality scale 1, voter number of quality scale 2, voter number of quality scale 3, voter number of quality scale 4, voter number of quality scale 5, MOS or Z-score 82 | 10004473376.jpg,0,0,25,73,7,3.828571429 83 | 10007357496.jpg,0,3,45,47,1,3.479166667 84 | 10007903636.jpg,1,0,20,73,2,3.78125 85 | 10009096245.jpg,0,0,21,75,13,3.926605505 86 | ``` 87 | 88 | For database with standard deviation available, the MOS file is like this (live format): 89 | ``` 90 | image path, standard deviation, MOS or Z-score 91 | t1.bmp,18.3762,63.9634 92 | t2.bmp,13.6514,25.3353 93 | t3.bmp,18.9246,48.9366 94 | t4.bmp,18.2414,35.8863 95 | ``` 96 | 97 | The format of MOS file ('koniq' or 'live') and the format of MOS or Z-score ('mos' or 'z_score') should also be specified in phiqnet/utils/imageset_handler/get_image_scores. 98 | 3) In the train script in phiqnet/bin, the folders containing training and testing images are provided. 99 | 4) Pretrained ImageNet weights can be downloaded (see README in phiqnet/pretrained_weights) and pointed to in the train script in phiqnet/bin. 100 | 101 | ## Trained PHIQNet weights 102 | The PHIQNet has been trained on KonIQ-10k and LIVE-wild databases, and the weights file can be downloaded [here](https://drive.google.com/file/d/1ymy2oL0r-XNzjqk_kE-lcNkI2FhSu95h/view?usp=sharing). 103 | Due to the file size limitation of CVPR submission, the weights file cannot be submitted. Please download the weights file and store in model_weights. 104 | 105 | ## State-of-the-art models 106 | Other three models are also included in the work. The original implementations of metrics are employed, and they can be found below. 107 | 108 | Koncept512 [KonIQ-10k: An ecologically valid database for deep learning of blind image quality assessment](https://github.com/subpic/koniq) 109 | 110 | SGDNet [SGDNet: An end-to-end saliency-guided deep neural network for no-reference image quality assessment](https://github.com/ysyscool/SGDNet) 111 | 112 | CaHDC [End-to-end blind image quality prediction with cascaded deep neural network](https://web.xidian.edu.cn/wjj/files/20190620_152557.zip) 113 | 114 | ## FAQ 115 | * To be added 116 | -------------------------------------------------------------------------------- /src/phiqnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/backbone/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/backbone/_common_blocks.py: -------------------------------------------------------------------------------- 1 | from backbone.resnet_config import get_submodules_from_kwargs 2 | 3 | 4 | def slice_tensor(x, start, stop, axis): 5 | if axis == 3: 6 | return x[:, :, :, start:stop] 7 | elif axis == 1: 8 | return x[:, start:stop, :, :] 9 | else: 10 | raise ValueError("Slice axis should be in (1, 3), got {}.".format(axis)) 11 | 12 | 13 | def GroupConv2D(filters, 14 | kernel_size, 15 | strides=(1, 1), 16 | groups=32, 17 | kernel_initializer='he_uniform', 18 | use_bias=True, 19 | activation='linear', 20 | padding='valid', 21 | **kwargs): 22 | """ 23 | Grouped Convolution Layer implemented as a Slice, 24 | Conv2D and Concatenate layers. Split filters to groups, apply Conv2D and concatenate back. 25 | 26 | Args: 27 | filters: Integer, the dimensionality of the output space 28 | (i.e. the number of output filters in the convolution). 29 | kernel_size: An integer or tuple/list of a single integer, 30 | specifying the length of the 1D convolution window. 31 | strides: An integer or tuple/list of a single integer, specifying the stride 32 | length of the convolution. 33 | groups: Integer, number of groups to split input filters to. 34 | kernel_initializer: Regularizer function applied to the kernel model_weights matrix. 35 | use_bias: Boolean, whether the layer uses a bias vector. 36 | activation: Activation function to use (see activations). 37 | If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). 38 | padding: one of "valid" or "same" (case-insensitive). 39 | 40 | Input shape: 41 | 4D tensor with shape: (batch, rows, cols, channels) if data_format is "channels_last". 42 | 43 | Output shape: 44 | 4D tensor with shape: (batch, new_rows, new_cols, filters) if data_format is "channels_last". 45 | rows and cols values might have changed due to padding. 46 | 47 | """ 48 | 49 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 50 | slice_axis = 3 if backend.image_data_format() == 'channels_last' else 1 51 | 52 | def layer(input_tensor): 53 | inp_ch = int(backend.int_shape(input_tensor)[-1] // groups) # input grouped channels 54 | out_ch = int(filters // groups) # output grouped channels 55 | 56 | blocks = [] 57 | for c in range(groups): 58 | slice_arguments = { 59 | 'start': c * inp_ch, 60 | 'stop': (c + 1) * inp_ch, 61 | 'axis': slice_axis, 62 | } 63 | x = layers.Lambda(slice_tensor, arguments=slice_arguments)(input_tensor) 64 | x = layers.Conv2D(out_ch, 65 | kernel_size, 66 | strides=strides, 67 | kernel_initializer=kernel_initializer, 68 | use_bias=use_bias, 69 | activation=activation, 70 | padding=padding)(x) 71 | blocks.append(x) 72 | 73 | x = layers.Concatenate(axis=slice_axis)(blocks) 74 | return x 75 | 76 | return layer 77 | 78 | 79 | def expand_dims(x, channels_axis): 80 | if channels_axis == 3: 81 | return x[:, None, None, :] 82 | elif channels_axis == 1: 83 | return x[:, :, None, None] 84 | else: 85 | raise ValueError("Slice axis should be in (1, 3), got {}.".format(channels_axis)) 86 | 87 | 88 | def ChannelSE(reduction=16, **kwargs): 89 | """ 90 | Squeeze and Excitation block, reimplementation inspired by 91 | https://github.com/Cadene/pretrained-models.pytorch/blob/master/pretrainedmodels/models/senet.py 92 | 93 | Args: 94 | reduction: channels squeeze factor 95 | 96 | """ 97 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 98 | channels_axis = 3 if backend.image_data_format() == 'channels_last' else 1 99 | 100 | def layer(input_tensor): 101 | # get number of channels/filters 102 | channels = backend.int_shape(input_tensor)[channels_axis] 103 | 104 | x = input_tensor 105 | 106 | # squeeze and excitation block in PyTorch style with 107 | x = layers.GlobalAveragePooling2D()(x) 108 | x = layers.Lambda(expand_dims, arguments={'channels_axis': channels_axis})(x) 109 | x = layers.Conv2D(channels // reduction, (1, 1), kernel_initializer='he_uniform')(x) 110 | x = layers.Activation('relu')(x) 111 | x = layers.Conv2D(channels, (1, 1), kernel_initializer='he_uniform')(x) 112 | x = layers.Activation('sigmoid')(x) 113 | 114 | # apply attention 115 | x = layers.Multiply()([input_tensor, x]) 116 | 117 | return x 118 | 119 | return layer 120 | -------------------------------------------------------------------------------- /src/phiqnet/backbone/resnet_config.py: -------------------------------------------------------------------------------- 1 | import keras_applications as ka 2 | 3 | 4 | def get_submodules_from_kwargs(kwargs): 5 | backend = kwargs.get('backend', ka._KERAS_BACKEND) 6 | layers = kwargs.get('layers', ka._KERAS_LAYERS) 7 | models = kwargs.get('models', ka._KERAS_MODELS) 8 | utils = kwargs.get('utils', ka._KERAS_UTILS) 9 | return backend, layers, models, utils 10 | -------------------------------------------------------------------------------- /src/phiqnet/backbone/vgg16.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | # pylint: disable=invalid-name 16 | """VGG16 model for Keras.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | from tensorflow.python.keras import layers 22 | from tensorflow.python.keras.engine import training 23 | 24 | 25 | def VGG16(inputs): 26 | """Instantiates the VGG16 model. 27 | 28 | By default, it loads model_weights pre-trained on ImageNet. Check 'model_weights' for 29 | other options. 30 | 31 | This model can be built both with 'channels_first' data format 32 | (channels, height, width) or 'channels_last' data format 33 | (height, width, channels). 34 | 35 | The default input size for this model is 224x224. 36 | 37 | Caution: Be sure to properly pre-process your inputs to the application. 38 | Please see `applications.vgg16.preprocess_input` for an example. 39 | 40 | Arguments: 41 | include_top: whether to include the 3 fully-connected 42 | layers at the top of the network. 43 | model_weights: one of `None` (random initialization), 44 | 'imagenet' (pre-training on ImageNet), 45 | or the path to the model_weights file to be loaded. 46 | input_tensor: optional Keras tensor 47 | (i.e. output of `layers.Input()`) 48 | to use as image input for the model. 49 | input_shape: optional shape tuple, only to be specified 50 | if `include_top` is False (otherwise the input shape 51 | has to be `(224, 224, 3)` 52 | (with `channels_last` data format) 53 | or `(3, 224, 224)` (with `channels_first` data format). 54 | It should have exactly 3 input channels, 55 | and width and height should be no smaller than 32. 56 | E.g. `(200, 200, 3)` would be one valid value. 57 | pooling: Optional pooling mode for feature extraction 58 | when `include_top` is `False`. 59 | - `None` means that the output of the model will be 60 | the 4D tensor output of the 61 | last convolutional block. 62 | - `avg` means that global average pooling 63 | will be applied to the output of the 64 | last convolutional block, and thus 65 | the output of the model will be a 2D tensor. 66 | - `max` means that global max pooling will 67 | be applied. 68 | classes: optional number of classes to classify images 69 | into, only to be specified if `include_top` is True, and 70 | if no `model_weights` argument is specified. 71 | classifier_activation: A `str` or callable. The activation function to use 72 | on the "top" layer. Ignored unless `include_top=True`. Set 73 | `classifier_activation=None` to return the logits of the "top" layer. 74 | 75 | Returns: 76 | A `keras.Model` instance. 77 | 78 | Raises: 79 | ValueError: in case of invalid argument for `model_weights`, 80 | or invalid input shape. 81 | ValueError: if `classifier_activation` is not `softmax` or `None` when 82 | using a pretrained top layer. 83 | """ 84 | 85 | # Block 1 86 | x = layers.Conv2D( 87 | 64, (3, 3), activation='relu', padding='same', name='block1_conv1')( 88 | inputs) 89 | x = layers.Conv2D( 90 | 64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) 91 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) 92 | 93 | outputs = [] 94 | # Block 2 95 | x = layers.Conv2D( 96 | 128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) 97 | x = layers.Conv2D( 98 | 128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) 99 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) 100 | outputs.append(x) 101 | 102 | # Block 3 103 | x = layers.Conv2D( 104 | 256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) 105 | x = layers.Conv2D( 106 | 256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) 107 | x = layers.Conv2D( 108 | 256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) 109 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) 110 | outputs.append(x) 111 | 112 | # Block 4 113 | x = layers.Conv2D( 114 | 512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) 115 | x = layers.Conv2D( 116 | 512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) 117 | x = layers.Conv2D( 118 | 512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) 119 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) 120 | outputs.append(x) 121 | 122 | # Block 5 123 | x = layers.Conv2D( 124 | 512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) 125 | x = layers.Conv2D( 126 | 512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) 127 | x = layers.Conv2D( 128 | 512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) 129 | x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) 130 | outputs.append(x) 131 | 132 | # Create model. 133 | model = training.Model(inputs, outputs, name='vgg16') 134 | 135 | return model 136 | 137 | 138 | if __name__ == '__main__': 139 | model = VGG16(None) 140 | -------------------------------------------------------------------------------- /src/phiqnet/bin/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/bin/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet152_distribution_fpn_attention_imageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet152' 22 | args['model_weights'] = r'..\pretrained_weights\resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet152_distribution_fpn_attention_imageaug_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet152' 22 | args['model_weights'] = r'..\databases\experiments\resnet152_distribution_attention_fpn\116_0.8542_1.0227.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 5e-7 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet152v2_distribution_fpn_attention_imageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet152v2' 22 | args['model_weights'] = r'..\pretrained_weights\resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 1 28 | args['epochs'] = 80 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_NOfpn_attention_imageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'no_fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_NOattention_imageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = False 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_NOimageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [ 12 | # r'..\databases\train\koniq_normal', 13 | r'..\databases\train\koniq_small',] 14 | # r'..\databases\train\live'] 15 | args['val_folders'] = [ 16 | # r'..\databases\val\koniq_normal', 17 | r'..\databases\val\koniq_small',] 18 | # r'..\databases\val\live'] 19 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 20 | args['live_mos_file'] = r'..\databases\live_mos.csv' 21 | 22 | args['naive_backbone'] = False 23 | args['backbone'] = 'resnet50' 24 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 25 | args['initial_epoch'] = 0 26 | 27 | args['lr_base'] = 1e-4 / 2 28 | args['lr_schedule'] = True 29 | args['batch_size'] = 4 30 | args['epochs'] = 120 31 | 32 | args['fpn_type'] = 'fpn' 33 | args['attention_module'] = True 34 | 35 | args['image_aug'] = False 36 | 37 | train_main(args) 38 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [ 12 | r'..\databases\train\koniq_normal', 13 | r'..\databases\train\koniq_small', 14 | r'..\databases\train\live'] 15 | args['val_folders'] = [ 16 | r'..\databases\val\koniq_normal', 17 | r'..\databases\val\koniq_small', 18 | r'..\databases\val\live'] 19 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 20 | args['live_mos_file'] = r'..\databases\live_mos.csv' 21 | 22 | args['naive_backbone'] = False 23 | args['backbone'] = 'resnet50' 24 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 25 | args['initial_epoch'] = 0 26 | 27 | args['lr_base'] = 1e-4 / 2 28 | args['lr_schedule'] = True 29 | args['batch_size'] = 4 30 | args['epochs'] = 120 31 | 32 | args['fpn_type'] = 'fpn' 33 | args['attention_module'] = True 34 | 35 | args['image_aug'] = True 36 | 37 | train_main(args) 38 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_NOpretrain.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = None 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_entropy_distribution_fpn_lr\113_0.8534_1.0183.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-6 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_freezebackbone.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | args['freeze_backbone'] = True 33 | 34 | args['image_aug'] = True 35 | 36 | train_main(args) 37 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq10k.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_normal' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal'] 12 | args['val_folders'] = [r'..\databases\val\koniq_normal'] 13 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 14 | args['live_mos_file'] = r'..\databases\live_mos.csv' 15 | 16 | args['naive_backbone'] = False 17 | args['backbone'] = 'resnet50' 18 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 19 | args['initial_epoch'] = 0 20 | 21 | args['lr_base'] = 1e-4 / 2 22 | args['lr_schedule'] = True 23 | args['batch_size'] = 4 24 | args['epochs'] = 120 25 | 26 | args['fpn_type'] = 'fpn' 27 | args['attention_module'] = True 28 | 29 | args['image_aug'] = True 30 | 31 | train_main(args) 32 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq10k_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_normal' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal'] 12 | args['val_folders'] = [r'..\databases\val\koniq_normal'] 13 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 14 | args['live_mos_file'] = r'..\databases\live_mos.csv' 15 | 16 | args['naive_backbone'] = False 17 | args['backbone'] = 'resnet50' 18 | args['model_weights'] = r'..\databases\experiments\koniq_normal\resnet50_distribution_attention_fpn\80_0.8503_0.9293.h5' 19 | args['initial_epoch'] = 0 20 | 21 | args['lr_base'] = 1e-7 22 | args['lr_schedule'] = True 23 | args['batch_size'] = 4 24 | args['epochs'] = 120 25 | 26 | args['fpn_type'] = 'fpn' 27 | args['attention_module'] = True 28 | 29 | args['image_aug'] = True 30 | 31 | train_main(args) 32 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq_small.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_small' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [#r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [#r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 8 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniq_small_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_small' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [#r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [#r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\databases\experiments\koniq_small\resnet50_distribution_attention_fpn_finetune\16_0.8542_0.9240.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-5 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 8 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniqall.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | # r'..\databases\train\koniq_small',] 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | # r'..\databases\val\koniq_small',] 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_koniqall_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_all' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small'] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small'] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\databases\experiments\koniq_all\resnet50_distribution_attention_fpn\82_0.8477_0.9715.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-7 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_distribution_fpn_attention_imageaug_test.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments_fixed_shape' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [#r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [#r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\databases\experiments_fixed_shape\resnet50_distribution_attention_fpn\21_0.8670_0.9262.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-7 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 16 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | args['lr_schedule'] = True 35 | 36 | train_main(args) 37 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_all' 9 | args['n_quality_levels'] = 1 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 1 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small', 13 | r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small', 16 | r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\databases\experiments\resnet50_mos_attention_fpn\119_0.0005_0.0990.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 2e-8 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniq_small.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_small' 9 | args['n_quality_levels'] = 1 10 | 11 | args['train_folders'] = [#r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [#r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 8 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniq_small_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 0 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_small' 9 | args['n_quality_levels'] = 1 10 | 11 | args['train_folders'] = [#r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [#r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\databases\experiments\koniq_small\resnet50_mos_attention_fpn\44_0.0094_0.0473.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-6 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 8 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniqall.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_all' 9 | args['n_quality_levels'] = 1 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\pretrained_weights\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-4 / 2 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_resnet50_mos_fpn_attention_imageaug_koniqall_finetune.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments\koniq_all' 9 | args['n_quality_levels'] = 1 10 | 11 | args['train_folders'] = [r'..\databases\train\koniq_normal', 12 | r'..\databases\train\koniq_small',] 13 | # r'..\databases\train\live'] 14 | args['val_folders'] = [r'..\databases\val\koniq_normal', 15 | r'..\databases\val\koniq_small',] 16 | # r'..\databases\val\live'] 17 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 18 | args['live_mos_file'] = r'..\databases\live_mos.csv' 19 | 20 | args['naive_backbone'] = False 21 | args['backbone'] = 'resnet50' 22 | args['model_weights'] = r'..\databases\experiments\koniq_all\resnet50_mos_attention_fpn\120_0.0008_0.0853.h5' 23 | args['initial_epoch'] = 0 24 | 25 | args['lr_base'] = 1e-7 26 | args['lr_schedule'] = True 27 | args['batch_size'] = 4 28 | args['epochs'] = 120 29 | 30 | args['fpn_type'] = 'fpn' 31 | args['attention_module'] = True 32 | 33 | args['image_aug'] = True 34 | 35 | train_main(args) 36 | -------------------------------------------------------------------------------- /src/phiqnet/bin/train_vgg16_distribution_fpn_attention_imageaug.py: -------------------------------------------------------------------------------- 1 | from phiqnet.train.train import train_main 2 | 3 | if __name__ == '__main__': 4 | args = {} 5 | args['multi_gpu'] = 0 6 | args['gpu'] = 1 7 | 8 | args['result_folder'] = r'..\databases\experiments' 9 | args['n_quality_levels'] = 5 10 | 11 | args['train_folders'] = [ 12 | # r'..\databases\train\koniq_normal', 13 | r'..\databases\train\koniq_small',] 14 | # r'..\databases\train\live'] 15 | args['val_folders'] = [ 16 | # r'..\databases\val\koniq_normal', 17 | r'..\databases\val\koniq_small',] 18 | # r'..\databases\val\live'] 19 | args['koniq_mos_file'] = r'..\databases\koniq10k_images_scores.csv' 20 | args['live_mos_file'] = r'..\databases\live_mos.csv' 21 | 22 | args['naive_backbone'] = False 23 | args['backbone'] = 'vgg16' 24 | args['model_weights'] = r'..\pretrained_weights\vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5' 25 | args['initial_epoch'] = 0 26 | 27 | args['lr_base'] = 1e-4 / 2 28 | args['lr_schedule'] = True 29 | args['batch_size'] = 4 30 | args['epochs'] = 120 31 | 32 | args['fpn_type'] = 'fpn' 33 | args['attention_module'] = True 34 | 35 | args['image_aug'] = True 36 | 37 | train_main(args) 38 | -------------------------------------------------------------------------------- /src/phiqnet/databases/README.md: -------------------------------------------------------------------------------- 1 | # Databases for training PHIQNet 2 | 3 | This work uses two publicly available databases: KonIQ-10k [KonIQ-10k: An ecologically valid database for deep learning of blind image quality assessment](https://ieeexplore.ieee.org/document/8968750) by V. Hosu, H. Lin, T. Sziranyi, and D. Saupe; 4 | and LIVE-wild [Massive online crowdsourced study of subjective and objective picture quality](https://ieeexplore.ieee.org/document/7327186) by D. Ghadiyaram, and A.C. Bovik 5 | 6 | The train_images_koniq(live) and test_images_koniq(live) list the images in the training and testing sets, which were randomly chosen from the two databases in terms of SI (image complexity) and MOS. 7 | 8 | It is also no problem to run multiple experiments with randomly split train and test images each time, which can be done by running phiqnet\databases\random_split_imageset.py. -------------------------------------------------------------------------------- /src/phiqnet/databases/random_split_imageset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from sklearn.model_selection import train_test_split 3 | import shutil 4 | from PIL import Image 5 | 6 | 7 | def do_image_resize(image_files, original_image_folder, target_image_folder): 8 | """ 9 | Halves the images in KonIQ-10k database 10 | :param image_files: 11 | :param original_image_folder: 12 | :param target_image_folder: 13 | :return: 14 | """ 15 | for image_file in image_files: 16 | image = Image.open(os.path.join(original_image_folder, image_file)).resize((512, 384)) 17 | image.save(os.path.join(target_image_folder, image_file)) 18 | 19 | 20 | def do_split(original_image_folder, target_image_folder, mos_file, database='live'): 21 | all_files = [] 22 | 23 | train_image_folder = os.path.join(target_image_folder, 'train', database) 24 | val_image_folder = os.path.join(target_image_folder, 'val', database) 25 | if not os.path.exists(train_image_folder): 26 | os.makedirs(train_image_folder) 27 | if not os.path.exists(val_image_folder): 28 | os.makedirs(val_image_folder) 29 | 30 | with open(mos_file) as mf: 31 | lines = mf.readlines() 32 | for line in lines: 33 | content = line.split(',') 34 | all_files.append(content[0]) 35 | 36 | train_images, val_images = train_test_split(all_files, test_size=0.2, random_state=None) 37 | 38 | if database == 'live' or database == 'koniq_normal': 39 | for train_image in train_images: 40 | shutil.copy(os.path.join(original_image_folder, train_image), 41 | os.path.join(train_image_folder, train_image)) 42 | for val_image in val_images: 43 | shutil.copy(os.path.join(original_image_folder, val_image), 44 | os.path.join(val_image_folder, val_image)) 45 | else: 46 | do_image_resize(train_images, original_image_folder, train_image_folder) 47 | do_image_resize(val_images, original_image_folder, val_image_folder) 48 | 49 | 50 | def random_split(): 51 | # Specify the image folders for KonIQ-10 database and LIVE-wild database, suppose they are stored separately. 52 | koniq_image_folder = r'' 53 | live_image_folder = r'' 54 | 55 | # Specify the MOS files for KonIQ-10 database and LIVE-wild database, respectively. 56 | # Now the image files will be written to the current database folder, then can be used in model training 57 | live_mos = r'.\live_mos.csv' 58 | live_koniq = r'.\koniq10k_images_scores.csv' 59 | 60 | target_image_folder = r'.\\' 61 | 62 | do_split(live_image_folder, target_image_folder, live_mos) 63 | do_split(koniq_image_folder, target_image_folder, live_koniq, database='koniq_normal') 64 | do_split(koniq_image_folder, target_image_folder, live_koniq, database='koniq_small') 65 | 66 | 67 | if __name__ == '__main__': 68 | random_split() -------------------------------------------------------------------------------- /src/phiqnet/databases/test_images_live.csv: -------------------------------------------------------------------------------- 1 | 100.bmp 2 | 1001.JPG 3 | 1003.JPG 4 | 1006.JPG 5 | 1008.JPG 6 | 1009.JPG 7 | 101.bmp 8 | 1012.JPG 9 | 1013.JPG 10 | 1014.JPG 11 | 1017.JPG 12 | 1018.JPG 13 | 102.bmp 14 | 1020.JPG 15 | 1022.JPG 16 | 1023.JPG 17 | 1024.JPG 18 | 1025.JPG 19 | 1026.JPG 20 | 1027.JPG 21 | 103.bmp 22 | 1035.JPG 23 | 1036.JPG 24 | 1037.JPG 25 | 1038.JPG 26 | 1039.JPG 27 | 1040.JPG 28 | 1041.JPG 29 | 1042.JPG 30 | 1043.JPG 31 | 1044.JPG 32 | 1046.JPG 33 | 1047.JPG 34 | 105.bmp 35 | 1050.JPG 36 | 1051.JPG 37 | 1052.JPG 38 | 1053.JPG 39 | 1055.JPG 40 | 1058.JPG 41 | 1059.JPG 42 | 1060.JPG 43 | 1061.JPG 44 | 1062.JPG 45 | 1065.JPG 46 | 1066.JPG 47 | 1069.JPG 48 | 1072.JPG 49 | 1075.JPG 50 | 1076.JPG 51 | 1077.JPG 52 | 1078.JPG 53 | 1081.JPG 54 | 1082.JPG 55 | 1085.JPG 56 | 1090.JPG 57 | 1091.JPG 58 | 1092.JPG 59 | 1093.JPG 60 | 1094.JPG 61 | 1095.JPG 62 | 1096.JPG 63 | 1098.JPG 64 | 1099.JPG 65 | 1100.JPG 66 | 1104.JPG 67 | 1105.JPG 68 | 1106.JPG 69 | 1110.JPG 70 | 1111.JPG 71 | 1113.JPG 72 | 1114.JPG 73 | 112.bmp 74 | 1125.JPG 75 | 1126.JPG 76 | 1134.JPG 77 | 1139.JPG 78 | 114.bmp 79 | 1140.JPG 80 | 1141.JPG 81 | 1142.JPG 82 | 1143.JPG 83 | 1144.JPG 84 | 1145.JPG 85 | 1146.JPG 86 | 1147.JPG 87 | 1148.JPG 88 | 1149.JPG 89 | 115.bmp 90 | 1156.JPG 91 | 1157.JPG 92 | 116.bmp 93 | 1161.bmp 94 | 1163.bmp 95 | 117.bmp 96 | 118.bmp 97 | 119.bmp 98 | 12.bmp 99 | 120.bmp 100 | 126.bmp 101 | 127.bmp 102 | 128.bmp 103 | 129.bmp 104 | 13.bmp 105 | 132.bmp 106 | 135.bmp 107 | 136.bmp 108 | 137.bmp 109 | 138.bmp 110 | 139.bmp 111 | 140.bmp 112 | 141.bmp 113 | 142.bmp 114 | 143.bmp 115 | 144.bmp 116 | 148.bmp 117 | 149.bmp 118 | 151.bmp 119 | 152.bmp 120 | 153.bmp 121 | 155.bmp 122 | 156.bmp 123 | 158.bmp 124 | 159.bmp 125 | 161.bmp 126 | 164.bmp 127 | 169.bmp 128 | 17.bmp 129 | 170.bmp 130 | 172.bmp 131 | 174.bmp 132 | 177.bmp 133 | 18.bmp 134 | 182.bmp 135 | 183.bmp 136 | 187.bmp 137 | 189.bmp 138 | 191.bmp 139 | 192.bmp 140 | 194.bmp 141 | 195.bmp 142 | 198.bmp 143 | 199.bmp 144 | 20.bmp 145 | 200.bmp 146 | 202.bmp 147 | 203.bmp 148 | 204.bmp 149 | 205.bmp 150 | 208.bmp 151 | 209.bmp 152 | 21.bmp 153 | 210.bmp 154 | 213.bmp 155 | 214.bmp 156 | 217.bmp 157 | 219.bmp 158 | 220.bmp 159 | 222.bmp 160 | 225.bmp 161 | 231.bmp 162 | 233.bmp 163 | 234.bmp 164 | 236.bmp 165 | 237.bmp 166 | 238.bmp 167 | 241.bmp 168 | 243.bmp 169 | 245.bmp 170 | 246.bmp 171 | 249.bmp 172 | 250.bmp 173 | 253.bmp 174 | 254.bmp 175 | 255.bmp 176 | 257.bmp 177 | 259.bmp 178 | 26.bmp 179 | 261.bmp 180 | 262.bmp 181 | 264.bmp 182 | 265.bmp 183 | 269.bmp 184 | 27.bmp 185 | 270.bmp 186 | 271.bmp 187 | 274.bmp 188 | 276.bmp 189 | 278.bmp 190 | 279.bmp 191 | 28.bmp 192 | 283.bmp 193 | 284.bmp 194 | 286.bmp 195 | 287.bmp 196 | 29.bmp 197 | 290.bmp 198 | 291.bmp 199 | 292.bmp 200 | 293.bmp 201 | 295.bmp 202 | 296.bmp 203 | 3.bmp 204 | 30.bmp 205 | 305.bmp 206 | 306.bmp 207 | 307.bmp 208 | 31.bmp 209 | 310.bmp 210 | 313.bmp 211 | 314.bmp 212 | 315.bmp 213 | 316.bmp 214 | 318.bmp 215 | 319.bmp 216 | 32.bmp 217 | 322.bmp 218 | 325.bmp 219 | 329.bmp 220 | 331.bmp 221 | 332.bmp 222 | 334.bmp 223 | 338.bmp 224 | 343.bmp 225 | 347.bmp 226 | 348.bmp 227 | 349.bmp 228 | 35.bmp 229 | 351.bmp 230 | 352.bmp 231 | 356.bmp 232 | 357.bmp 233 | 358.bmp 234 | 360.bmp 235 | 362.bmp 236 | 363.bmp 237 | 365.bmp 238 | 366.bmp 239 | 368.bmp 240 | 369.bmp 241 | 371.bmp 242 | 373.bmp 243 | 377.bmp 244 | 381.bmp 245 | 382.bmp 246 | 385.bmp 247 | 387.bmp 248 | 389.bmp 249 | 39.bmp 250 | 390.bmp 251 | 392.bmp 252 | 396.bmp 253 | 40.bmp 254 | 400.bmp 255 | 405.bmp 256 | 406.bmp 257 | 407.bmp 258 | 409.bmp 259 | 410.bmp 260 | 414.bmp 261 | 415.bmp 262 | 416.bmp 263 | 417.bmp 264 | 418.bmp 265 | 422.bmp 266 | 423.bmp 267 | 425.bmp 268 | 426.bmp 269 | 428.bmp 270 | 429.bmp 271 | 43.bmp 272 | 430.bmp 273 | 431.bmp 274 | 432.bmp 275 | 434.bmp 276 | 435.bmp 277 | 436.bmp 278 | 437.bmp 279 | 44.bmp 280 | 441.bmp 281 | 443.bmp 282 | 444.bmp 283 | 446.bmp 284 | 447.bmp 285 | 448.bmp 286 | 449.bmp 287 | 45.bmp 288 | 450.bmp 289 | 452.bmp 290 | 454.bmp 291 | 456.bmp 292 | 457.bmp 293 | 458.bmp 294 | 46.bmp 295 | 465.bmp 296 | 466.bmp 297 | 467.bmp 298 | 468.bmp 299 | 469.bmp 300 | 47.bmp 301 | 470.bmp 302 | 471.bmp 303 | 473.bmp 304 | 475.bmp 305 | 478.bmp 306 | 48.bmp 307 | 480.bmp 308 | 481.bmp 309 | 482.bmp 310 | 483.bmp 311 | 484.bmp 312 | 486.bmp 313 | 49.bmp 314 | 493.bmp 315 | 494.bmp 316 | 495.bmp 317 | 497.bmp 318 | 498.bmp 319 | 500.bmp 320 | 503.bmp 321 | 504.bmp 322 | 507.bmp 323 | 508.bmp 324 | 509.bmp 325 | 51.bmp 326 | 510.bmp 327 | 513.bmp 328 | 514.bmp 329 | 517.bmp 330 | 518.bmp 331 | 519.bmp 332 | 52.bmp 333 | 520.bmp 334 | 523.bmp 335 | 524.bmp 336 | 525.bmp 337 | 527.bmp 338 | 530.bmp 339 | 531.bmp 340 | 533.JPG 341 | 536.JPG 342 | 537.JPG 343 | 539.JPG 344 | 54.bmp 345 | 543.JPG 346 | 544.JPG 347 | 545.JPG 348 | 550.JPG 349 | 552.JPG 350 | 555.JPG 351 | 558.JPG 352 | 560.JPG 353 | 564.JPG 354 | 565.JPG 355 | 566.JPG 356 | 567.JPG 357 | 568.JPG 358 | 569.JPG 359 | 571.JPG 360 | 583.JPG 361 | 584.JPG 362 | 588.JPG 363 | 592.JPG 364 | 593.JPG 365 | 596.JPG 366 | 597.JPG 367 | 598.JPG 368 | 6.bmp 369 | 60.bmp 370 | 600.JPG 371 | 601.JPG 372 | 602.JPG 373 | 603.JPG 374 | 605.JPG 375 | 607.JPG 376 | 609.JPG 377 | 61.bmp 378 | 610.JPG 379 | 611.JPG 380 | 612.JPG 381 | 613.JPG 382 | 614.JPG 383 | 617.JPG 384 | 618.JPG 385 | 619.JPG 386 | 620.JPG 387 | 621.JPG 388 | 627.JPG 389 | 629.JPG 390 | 632.JPG 391 | 633.JPG 392 | 638.JPG 393 | 639.JPG 394 | 64.bmp 395 | 640.JPG 396 | 642.JPG 397 | 644.JPG 398 | 645.JPG 399 | 646.JPG 400 | 649.JPG 401 | 65.bmp 402 | 652.JPG 403 | 654.JPG 404 | 657.JPG 405 | 659.JPG 406 | 66.bmp 407 | 665.JPG 408 | 666.JPG 409 | 667.JPG 410 | 670.JPG 411 | 671.JPG 412 | 676.JPG 413 | 680.JPG 414 | 681.JPG 415 | 683.JPG 416 | 686.JPG 417 | 688.JPG 418 | 69.bmp 419 | 693.JPG 420 | 694.JPG 421 | 696.JPG 422 | 697.JPG 423 | 698.JPG 424 | 70.bmp 425 | 701.JPG 426 | 702.JPG 427 | 706.JPG 428 | 707.JPG 429 | 709.JPG 430 | 710.JPG 431 | 711.JPG 432 | 712.JPG 433 | 714.JPG 434 | 716.JPG 435 | 718.JPG 436 | 719.JPG 437 | 720.JPG 438 | 729.JPG 439 | 732.JPG 440 | 733.JPG 441 | 740.JPG 442 | 745.JPG 443 | 746.JPG 444 | 747.JPG 445 | 748.JPG 446 | 750.JPG 447 | 752.JPG 448 | 753.JPG 449 | 757.JPG 450 | 759.JPG 451 | 76.bmp 452 | 761.JPG 453 | 764.JPG 454 | 770.JPG 455 | 771.JPG 456 | 773.JPG 457 | 774.JPG 458 | 775.JPG 459 | 776.JPG 460 | 777.JPG 461 | 778.JPG 462 | 779.JPG 463 | 781.JPG 464 | 783.JPG 465 | 784.JPG 466 | 785.JPG 467 | 786.JPG 468 | 788.JPG 469 | 79.bmp 470 | 791.JPG 471 | 795.JPG 472 | 798.JPG 473 | 8.bmp 474 | 80.bmp 475 | 801.JPG 476 | 804.JPG 477 | 806.JPG 478 | 811.JPG 479 | 814.JPG 480 | 815.JPG 481 | 816.JPG 482 | 82.bmp 483 | 821.JPG 484 | 823.JPG 485 | 828.JPG 486 | 83.bmp 487 | 832.JPG 488 | 833.JPG 489 | 838.JPG 490 | 839.JPG 491 | 840.JPG 492 | 842.JPG 493 | 844.JPG 494 | 845.JPG 495 | 849.JPG 496 | 85.bmp 497 | 850.JPG 498 | 852.JPG 499 | 854.JPG 500 | 855.JPG 501 | 856.JPG 502 | 857.JPG 503 | 858.JPG 504 | 86.bmp 505 | 862.JPG 506 | 865.JPG 507 | 866.JPG 508 | 867.JPG 509 | 868.JPG 510 | 869.JPG 511 | 870.JPG 512 | 871.JPG 513 | 873.JPG 514 | 874.JPG 515 | 875.JPG 516 | 877.JPG 517 | 878.JPG 518 | 879.JPG 519 | 88.bmp 520 | 880.JPG 521 | 881.JPG 522 | 882.JPG 523 | 887.JPG 524 | 889.JPG 525 | 890.JPG 526 | 893.JPG 527 | 894.JPG 528 | 895.JPG 529 | 898.JPG 530 | 9.bmp 531 | 90.bmp 532 | 901.JPG 533 | 902.JPG 534 | 903.JPG 535 | 905.JPG 536 | 906.JPG 537 | 907.JPG 538 | 908.JPG 539 | 909.JPG 540 | 911.JPG 541 | 912.JPG 542 | 915.JPG 543 | 919.JPG 544 | 922.JPG 545 | 923.JPG 546 | 924.JPG 547 | 925.JPG 548 | 926.JPG 549 | 93.bmp 550 | 931.JPG 551 | 932.JPG 552 | 938.JPG 553 | 947.JPG 554 | 949.JPG 555 | 95.bmp 556 | 952.JPG 557 | 953.JPG 558 | 954.JPG 559 | 955.JPG 560 | 960.JPG 561 | 962.JPG 562 | 967.JPG 563 | 968.JPG 564 | 97.bmp 565 | 971.JPG 566 | 972.JPG 567 | 973.JPG 568 | 975.JPG 569 | 976.JPG 570 | 977.JPG 571 | 978.JPG 572 | 98.bmp 573 | 986.JPG 574 | 99.bmp 575 | 990.JPG 576 | 992.JPG 577 | 993.JPG 578 | 994.JPG 579 | 995.JPG 580 | 996.JPG 581 | 998.JPG 582 | 999.JPG 583 | t3.bmp 584 | t5.bmp 585 | t7.bmp 586 | -------------------------------------------------------------------------------- /src/phiqnet/databases/train_images_live.csv: -------------------------------------------------------------------------------- 1 | 10.bmp 2 | 1000.JPG 3 | 1002.JPG 4 | 1004.JPG 5 | 1005.JPG 6 | 1007.JPG 7 | 1010.JPG 8 | 1011.JPG 9 | 1015.JPG 10 | 1016.JPG 11 | 1019.JPG 12 | 1021.JPG 13 | 1028.JPG 14 | 1029.JPG 15 | 1030.JPG 16 | 1031.JPG 17 | 1032.JPG 18 | 1033.JPG 19 | 1034.JPG 20 | 104.bmp 21 | 1045.JPG 22 | 1048.JPG 23 | 1049.JPG 24 | 1054.JPG 25 | 1056.JPG 26 | 1057.JPG 27 | 106.bmp 28 | 1063.JPG 29 | 1064.JPG 30 | 1067.JPG 31 | 1068.JPG 32 | 107.bmp 33 | 1070.JPG 34 | 1071.JPG 35 | 1073.JPG 36 | 1074.JPG 37 | 1079.JPG 38 | 108.bmp 39 | 1080.JPG 40 | 1083.JPG 41 | 1084.JPG 42 | 1086.JPG 43 | 1087.JPG 44 | 1088.JPG 45 | 1089.JPG 46 | 109.bmp 47 | 1097.JPG 48 | 11.bmp 49 | 110.bmp 50 | 1101.JPG 51 | 1102.JPG 52 | 1103.JPG 53 | 1107.JPG 54 | 1108.JPG 55 | 1109.JPG 56 | 111.bmp 57 | 1112.JPG 58 | 1115.JPG 59 | 1116.JPG 60 | 1117.JPG 61 | 1118.JPG 62 | 1119.JPG 63 | 1120.JPG 64 | 1121.JPG 65 | 1122.JPG 66 | 1123.JPG 67 | 1124.JPG 68 | 1127.JPG 69 | 1128.JPG 70 | 1129.JPG 71 | 113.bmp 72 | 1130.JPG 73 | 1131.JPG 74 | 1132.JPG 75 | 1133.JPG 76 | 1135.JPG 77 | 1136.JPG 78 | 1137.JPG 79 | 1138.JPG 80 | 1150.JPG 81 | 1151.JPG 82 | 1152.JPG 83 | 1153.JPG 84 | 1154.JPG 85 | 1155.JPG 86 | 1158.JPG 87 | 1159.JPG 88 | 1160.bmp 89 | 1162.bmp 90 | 1164.bmp 91 | 121.bmp 92 | 122.bmp 93 | 123.bmp 94 | 124.bmp 95 | 125.bmp 96 | 130.bmp 97 | 131.bmp 98 | 133.bmp 99 | 134.bmp 100 | 14.bmp 101 | 145.bmp 102 | 146.bmp 103 | 147.bmp 104 | 15.bmp 105 | 150.bmp 106 | 154.bmp 107 | 157.bmp 108 | 16.bmp 109 | 160.bmp 110 | 162.bmp 111 | 163.bmp 112 | 165.bmp 113 | 166.bmp 114 | 167.bmp 115 | 168.bmp 116 | 171.bmp 117 | 173.bmp 118 | 175.bmp 119 | 176.bmp 120 | 178.bmp 121 | 179.bmp 122 | 180.bmp 123 | 181.bmp 124 | 184.bmp 125 | 185.bmp 126 | 186.bmp 127 | 188.bmp 128 | 19.bmp 129 | 190.bmp 130 | 193.bmp 131 | 196.bmp 132 | 197.bmp 133 | 201.bmp 134 | 206.bmp 135 | 207.bmp 136 | 211.bmp 137 | 212.bmp 138 | 215.bmp 139 | 216.bmp 140 | 218.bmp 141 | 22.bmp 142 | 221.bmp 143 | 223.bmp 144 | 224.bmp 145 | 226.bmp 146 | 227.bmp 147 | 228.bmp 148 | 229.bmp 149 | 23.bmp 150 | 230.bmp 151 | 232.bmp 152 | 235.bmp 153 | 239.bmp 154 | 24.bmp 155 | 240.bmp 156 | 242.bmp 157 | 244.bmp 158 | 247.bmp 159 | 248.bmp 160 | 25.bmp 161 | 251.bmp 162 | 252.bmp 163 | 256.bmp 164 | 258.bmp 165 | 260.bmp 166 | 263.bmp 167 | 266.bmp 168 | 267.bmp 169 | 268.bmp 170 | 272.bmp 171 | 273.bmp 172 | 275.bmp 173 | 277.bmp 174 | 280.bmp 175 | 281.bmp 176 | 282.bmp 177 | 285.bmp 178 | 288.bmp 179 | 289.bmp 180 | 294.bmp 181 | 297.bmp 182 | 298.bmp 183 | 299.bmp 184 | 300.bmp 185 | 301.bmp 186 | 302.bmp 187 | 303.bmp 188 | 304.bmp 189 | 308.bmp 190 | 309.bmp 191 | 311.bmp 192 | 312.bmp 193 | 317.bmp 194 | 320.bmp 195 | 321.bmp 196 | 323.bmp 197 | 324.bmp 198 | 326.bmp 199 | 327.bmp 200 | 328.bmp 201 | 33.bmp 202 | 330.bmp 203 | 333.bmp 204 | 335.bmp 205 | 336.bmp 206 | 337.bmp 207 | 339.bmp 208 | 34.bmp 209 | 340.bmp 210 | 341.bmp 211 | 342.bmp 212 | 344.bmp 213 | 345.bmp 214 | 346.bmp 215 | 350.bmp 216 | 353.bmp 217 | 354.bmp 218 | 355.bmp 219 | 359.bmp 220 | 36.bmp 221 | 361.bmp 222 | 364.bmp 223 | 367.bmp 224 | 37.bmp 225 | 370.bmp 226 | 372.bmp 227 | 374.bmp 228 | 375.bmp 229 | 376.bmp 230 | 378.bmp 231 | 379.bmp 232 | 38.bmp 233 | 380.bmp 234 | 383.bmp 235 | 384.bmp 236 | 386.bmp 237 | 388.bmp 238 | 391.bmp 239 | 393.bmp 240 | 394.bmp 241 | 395.bmp 242 | 397.bmp 243 | 398.bmp 244 | 399.bmp 245 | 4.bmp 246 | 401.bmp 247 | 402.bmp 248 | 403.bmp 249 | 404.bmp 250 | 408.bmp 251 | 41.bmp 252 | 411.bmp 253 | 412.bmp 254 | 413.bmp 255 | 419.bmp 256 | 42.bmp 257 | 420.bmp 258 | 421.bmp 259 | 424.bmp 260 | 427.bmp 261 | 433.bmp 262 | 438.bmp 263 | 439.bmp 264 | 440.bmp 265 | 442.bmp 266 | 445.bmp 267 | 451.bmp 268 | 453.bmp 269 | 455.bmp 270 | 459.bmp 271 | 460.bmp 272 | 461.bmp 273 | 462.bmp 274 | 463.bmp 275 | 464.bmp 276 | 472.bmp 277 | 474.bmp 278 | 476.bmp 279 | 477.bmp 280 | 479.bmp 281 | 485.bmp 282 | 487.bmp 283 | 488.bmp 284 | 489.bmp 285 | 490.bmp 286 | 491.bmp 287 | 492.bmp 288 | 496.bmp 289 | 499.bmp 290 | 5.bmp 291 | 50.bmp 292 | 501.bmp 293 | 502.bmp 294 | 505.bmp 295 | 506.bmp 296 | 511.bmp 297 | 512.bmp 298 | 515.bmp 299 | 516.bmp 300 | 521.bmp 301 | 522.bmp 302 | 526.bmp 303 | 528.bmp 304 | 529.bmp 305 | 53.bmp 306 | 532.bmp 307 | 534.JPG 308 | 535.JPG 309 | 538.JPG 310 | 540.JPG 311 | 541.JPG 312 | 542.JPG 313 | 546.JPG 314 | 547.JPG 315 | 548.JPG 316 | 549.JPG 317 | 55.bmp 318 | 551.JPG 319 | 553.JPG 320 | 554.JPG 321 | 556.JPG 322 | 557.JPG 323 | 559.JPG 324 | 56.bmp 325 | 561.JPG 326 | 562.JPG 327 | 563.JPG 328 | 57.bmp 329 | 570.JPG 330 | 572.JPG 331 | 573.JPG 332 | 574.JPG 333 | 575.JPG 334 | 576.JPG 335 | 577.JPG 336 | 578.JPG 337 | 579.JPG 338 | 58.bmp 339 | 580.JPG 340 | 581.JPG 341 | 582.JPG 342 | 585.JPG 343 | 586.JPG 344 | 587.JPG 345 | 589.JPG 346 | 59.bmp 347 | 590.JPG 348 | 591.JPG 349 | 594.JPG 350 | 595.JPG 351 | 599.JPG 352 | 604.JPG 353 | 606.JPG 354 | 608.JPG 355 | 615.JPG 356 | 616.JPG 357 | 62.bmp 358 | 622.JPG 359 | 623.JPG 360 | 624.JPG 361 | 625.JPG 362 | 626.JPG 363 | 628.JPG 364 | 63.bmp 365 | 630.JPG 366 | 631.JPG 367 | 634.JPG 368 | 635.JPG 369 | 636.JPG 370 | 637.JPG 371 | 641.JPG 372 | 643.JPG 373 | 647.JPG 374 | 648.JPG 375 | 650.JPG 376 | 651.JPG 377 | 653.JPG 378 | 655.JPG 379 | 656.JPG 380 | 658.JPG 381 | 660.JPG 382 | 661.JPG 383 | 662.JPG 384 | 663.JPG 385 | 664.JPG 386 | 668.JPG 387 | 669.JPG 388 | 67.bmp 389 | 672.JPG 390 | 673.JPG 391 | 674.JPG 392 | 675.JPG 393 | 677.JPG 394 | 678.JPG 395 | 679.JPG 396 | 68.bmp 397 | 682.JPG 398 | 684.JPG 399 | 685.JPG 400 | 687.JPG 401 | 689.JPG 402 | 690.JPG 403 | 691.JPG 404 | 692.JPG 405 | 695.JPG 406 | 699.JPG 407 | 7.bmp 408 | 700.JPG 409 | 703.JPG 410 | 704.JPG 411 | 705.JPG 412 | 708.JPG 413 | 71.bmp 414 | 713.JPG 415 | 715.JPG 416 | 717.JPG 417 | 72.bmp 418 | 721.JPG 419 | 722.JPG 420 | 723.JPG 421 | 724.JPG 422 | 725.JPG 423 | 726.JPG 424 | 727.JPG 425 | 728.JPG 426 | 73.bmp 427 | 730.JPG 428 | 731.JPG 429 | 734.JPG 430 | 735.JPG 431 | 736.JPG 432 | 737.JPG 433 | 738.JPG 434 | 739.JPG 435 | 74.bmp 436 | 741.JPG 437 | 742.JPG 438 | 743.JPG 439 | 744.JPG 440 | 749.JPG 441 | 75.bmp 442 | 751.JPG 443 | 754.JPG 444 | 755.JPG 445 | 756.JPG 446 | 758.JPG 447 | 760.JPG 448 | 762.JPG 449 | 763.JPG 450 | 765.JPG 451 | 766.JPG 452 | 767.JPG 453 | 768.JPG 454 | 769.JPG 455 | 77.bmp 456 | 772.JPG 457 | 78.bmp 458 | 780.JPG 459 | 782.JPG 460 | 787.JPG 461 | 789.JPG 462 | 790.JPG 463 | 792.JPG 464 | 793.JPG 465 | 794.JPG 466 | 796.JPG 467 | 797.JPG 468 | 799.JPG 469 | 800.JPG 470 | 802.JPG 471 | 803.JPG 472 | 805.JPG 473 | 807.JPG 474 | 808.JPG 475 | 809.JPG 476 | 81.bmp 477 | 810.JPG 478 | 812.JPG 479 | 813.JPG 480 | 817.JPG 481 | 818.JPG 482 | 819.JPG 483 | 820.JPG 484 | 822.JPG 485 | 824.JPG 486 | 825.JPG 487 | 826.JPG 488 | 827.JPG 489 | 829.JPG 490 | 830.JPG 491 | 831.JPG 492 | 834.JPG 493 | 835.JPG 494 | 836.JPG 495 | 837.JPG 496 | 84.bmp 497 | 841.JPG 498 | 843.JPG 499 | 846.JPG 500 | 847.JPG 501 | 848.JPG 502 | 851.JPG 503 | 853.JPG 504 | 859.JPG 505 | 860.JPG 506 | 861.JPG 507 | 863.JPG 508 | 864.JPG 509 | 87.bmp 510 | 872.JPG 511 | 876.JPG 512 | 883.JPG 513 | 884.JPG 514 | 885.JPG 515 | 886.JPG 516 | 888.JPG 517 | 89.bmp 518 | 891.JPG 519 | 892.JPG 520 | 896.JPG 521 | 897.JPG 522 | 899.JPG 523 | 900.JPG 524 | 904.JPG 525 | 91.bmp 526 | 910.JPG 527 | 913.JPG 528 | 914.JPG 529 | 916.JPG 530 | 917.JPG 531 | 918.JPG 532 | 92.bmp 533 | 920.JPG 534 | 921.JPG 535 | 927.JPG 536 | 928.JPG 537 | 929.JPG 538 | 930.JPG 539 | 933.JPG 540 | 934.JPG 541 | 935.JPG 542 | 936.JPG 543 | 937.JPG 544 | 939.JPG 545 | 94.bmp 546 | 940.JPG 547 | 941.JPG 548 | 942.JPG 549 | 943.JPG 550 | 944.JPG 551 | 945.JPG 552 | 946.JPG 553 | 948.JPG 554 | 950.JPG 555 | 951.JPG 556 | 956.JPG 557 | 957.JPG 558 | 958.JPG 559 | 959.JPG 560 | 96.bmp 561 | 961.JPG 562 | 963.JPG 563 | 964.JPG 564 | 965.JPG 565 | 966.JPG 566 | 969.JPG 567 | 970.JPG 568 | 974.JPG 569 | 979.JPG 570 | 980.JPG 571 | 981.JPG 572 | 982.JPG 573 | 983.JPG 574 | 984.JPG 575 | 985.JPG 576 | 987.JPG 577 | 988.JPG 578 | 989.JPG 579 | 991.JPG 580 | 997.JPG 581 | t1.bmp 582 | t2.bmp 583 | t4.bmp 584 | t6.bmp 585 | -------------------------------------------------------------------------------- /src/phiqnet/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/layers/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/layers/fpn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reference: Feature pyramid networks for object detection, CVPR'17. 3 | """ 4 | from tensorflow.keras.layers import Conv2D, Add 5 | from phiqnet.layers.upsample import Upsample 6 | 7 | 8 | def build_fpn(C2, C3, C4, C5, feature_size=256, name='fpn_'): 9 | """ 10 | Create the FPN layers on top of the backbone features 11 | :param C2: Feature stage C2 from the backbone 12 | :param C3: Feature stage C3 from the backbone 13 | :param C4: Feature stage C4 from the backbone 14 | :param C5: Feature stage C5 from the backbone 15 | :param feature_size: feature size to use for the resulting feature levels, set as the lowest channel dimension in the feature maps, i.e., 256 16 | :param name: name for naming the layer 17 | :return: pyramidical feature maps [P2, P3, P4, P5, P6] 18 | """ 19 | 20 | # upsample C5 to get P5 from the FPN paper 21 | P5 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C5_reduced')(C5) 22 | P5_upsampled = Upsample(name=name + 'P5_upsampled')([P5, C4]) 23 | P5 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P5')(P5) 24 | 25 | # add P5 elementwise to C4 26 | P4 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C4_reduced')(C4) 27 | P4 = Add(name=name + 'P4_merged')([P5_upsampled, P4]) 28 | P4_upsampled = Upsample(name=name + 'P4_upsampled')([P4, C3]) 29 | P4 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P4')(P4) 30 | 31 | # add P4 elementwise to C3 32 | P3 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C3_reduced')(C3) 33 | P3 = Add(name=name + 'P3_merged')([P4_upsampled, P3]) 34 | P3_upsampled = Upsample(name=name + 'P3_upsampled')([P3, C2]) 35 | P3 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P3')(P3) 36 | 37 | P2 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C2_reduced')(C2) 38 | P2 = Add(name=name + 'P2_merged')([P3_upsampled, P2]) 39 | P2 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P2')(P2) 40 | 41 | # "P6 is obtained via a 3x3 stride-2 conv on C5" 42 | P6 = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name=name + 'P6')(C5) 43 | 44 | return [P2, P3, P4, P5, P6] 45 | 46 | 47 | def build_non_fpn(C2, C3, C4, C5, feature_size=256): 48 | """ 49 | If no FPS is used, then use a bottle-neck layer to change the channel dimension to 256 50 | :param C2: Feature stage C2 from the backbone 51 | :param C3: Feature stage C3 from the backbone 52 | :param C4: Feature stage C4 from the backbone 53 | :param C5: Feature stage C5 from the backbone 54 | :param feature_size: feature size to use for the resulting feature levels, set as the lowest channel dimension in the feature maps, i.e., 256 55 | :return: pyramidical feature maps [P2, P3, P4, P5, P6] 56 | """ 57 | P2 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P2_bottleneck')(C2) 58 | P3 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P3_bottleneck')(C3) 59 | P4 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P4_bottleneck')(C4) 60 | P5 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name='P5_bottleneck')(C5) 61 | P6 = Conv2D(feature_size, kernel_size=1, strides=2, padding='same', name='P6_bottleneck')(C5) 62 | return [P2, P3, P4, P5, P6] 63 | 64 | -------------------------------------------------------------------------------- /src/phiqnet/layers/pan.py: -------------------------------------------------------------------------------- 1 | """ 2 | Reference: Path Aggregation Network for Instance Segmentation., CVPR'18. 3 | """ 4 | from tensorflow.keras.layers import Conv2D, Add 5 | from phiqnet.layers.upsample import Upsample 6 | 7 | 8 | def build_pan(C2, C3, C4, C5, feature_size=256, name='pan_', conv_on_P=False): 9 | """ 10 | Create the PAN layers on top of the backbone features 11 | :param C2: Feature stage C2 from the backbone 12 | :param C3: Feature stage C3 from the backbone 13 | :param C4: Feature stage C4 from the backbone 14 | :param C5: Feature stage C5 from the backbone 15 | :param feature_size: feature size to use for the resulting feature levels, set as the lowest channel dimension in the feature maps, i.e., 256 16 | :param name: name for naming the layer 17 | :param conv_on_P: flag to use or not another conv-layer on feature maps 18 | :return: pyramidical feature maps [N2, N3, N4, N5] 19 | """ 20 | P5 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C5_reduced')(C5) 21 | P5_upsampled = Upsample(name=name + 'P5_upsampled')([P5, C4]) 22 | 23 | P4 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C4_reduced')(C4) 24 | P4 = Add(name=name + 'P4_merged')([P5_upsampled, P4]) 25 | P4_upsampled = Upsample(name=name + 'P4_upsampled')([P4, C3]) 26 | 27 | P3 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C3_reduced')(C3) 28 | P3 = Add(name=name + 'P3_merged')([P4_upsampled, P3]) 29 | P3_upsampled = Upsample(name=name + 'P3_upsampled')([P3, C2]) 30 | 31 | P2 = Conv2D(feature_size, kernel_size=1, strides=1, padding='same', name=name + 'C2_reduced')(C2) 32 | P2 = Add(name=name + 'P2_merged')([P3_upsampled, P2]) 33 | 34 | if conv_on_P: 35 | P5 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P5')(P5) 36 | P4 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P4')(P4) 37 | P3 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P3')(P3) 38 | P2 = Conv2D(feature_size, kernel_size=3, strides=1, padding='same', name=name + 'P2')(P2) 39 | 40 | N2 = P2 41 | 42 | N2_reduced = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='N2_reduced')(N2) 43 | N3 = Add(name=name + 'N3_merged')([N2_reduced, P3]) 44 | N3 = Conv2D(feature_size, kernel_size=3, strides=1, activation='relu', padding='same', name=name + 'N3')(N3) 45 | 46 | N3_reduced = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='N3_reduced')(N3) 47 | N4 = Add(name=name + 'N4_merged')([N3_reduced, P4]) 48 | N4 = Conv2D(feature_size, kernel_size=3, strides=1, activation='relu', padding='same', name=name + 'N4')(N4) 49 | 50 | N4_reduced = Conv2D(feature_size, kernel_size=3, strides=2, padding='same', name='N4_reduced')(N4) 51 | N5 = Add(name=name + 'N5_merged')([N4_reduced, P5]) 52 | N5 = Conv2D(feature_size, kernel_size=3, strides=1, activation='relu', padding='same', name=name + 'N5')(N5) 53 | 54 | return [N2, N3, N4, N5] -------------------------------------------------------------------------------- /src/phiqnet/layers/upsample.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import Layer 3 | 4 | 5 | class Upsample(Layer): 6 | """ Keras layer for upsampling a Tensor to be the same shape as another Tensor. 7 | """ 8 | 9 | def call(self, inputs, **kwargs): 10 | """Upsamples a tensor 11 | :param inputs: List of [source, target] tensors 12 | :param kwargs: 13 | :return: Upsampled tensor 14 | """ 15 | source, target = inputs 16 | target_shape = tf.keras.backend.shape(target) 17 | if tf.keras.backend.image_data_format() == 'channels_first': 18 | source = tf.transpose(source, (0, 2, 3, 1)) 19 | output = tf.image.resize(source, (target_shape[2], target_shape[3]), 20 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 21 | output = tf.transpose(output, (0, 3, 1, 2)) 22 | return output 23 | else: 24 | return tf.image.resize(source, (target_shape[1], target_shape[2]), 25 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) 26 | 27 | def compute_output_shape(self, input_shape): 28 | """ 29 | Compute the output shapes given the input shapes 30 | :param input_shape: List of input shapes [boxes, classification, other[0], other[1], ...] 31 | :return: Tuple representing the output shapes 32 | """ 33 | if tf.keras.backend.image_data_format() == 'channels_first': 34 | return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4] 35 | else: 36 | return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],) 37 | -------------------------------------------------------------------------------- /src/phiqnet/loss/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/loss/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/loss/distribution_loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | Two loss functions that might be used in PHIQNet. 3 | """ 4 | from tensorflow.keras import backend as K 5 | import numpy as np 6 | from tensorflow.keras.losses import categorical_crossentropy 7 | 8 | 9 | def distribution_loss(y_true, y_pred): 10 | """ 11 | Loss on quality score distributions 12 | :param y_true: y_true 13 | :param y_pred: y_pred 14 | :return: loss 15 | """ 16 | mos_scales = np.array([1, 2, 3, 4, 5]) 17 | return K.mean(K.square((y_pred - y_true) * mos_scales)) # MSE 18 | 19 | 20 | def ordinal_loss(y_true, y_pred): 21 | """ 22 | A simple ordinal loss based on quality score distributions 23 | :param y_true: y_true 24 | :param y_pred: y_pred 25 | :return: loss 26 | """ 27 | weights = K.cast(K.abs(K.argmax(y_true, axis=1) - K.argmax(y_pred, axis=1))/(K.int_shape(y_pred)[1] - 1), dtype='float32') 28 | # return (1.0 + model_weights) * sigmoid_focal_crossentropy(y_true, y_pred) 29 | return (1.0 + weights) * categorical_crossentropy(y_true, y_pred) -------------------------------------------------------------------------------- /src/phiqnet/model_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/model_evaluation/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/model_evaluation/evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from PIL import Image 3 | import scipy.stats 4 | 5 | 6 | class ModelEvaluation: 7 | """ 8 | Evaluation the model, this script is actually copied from the evaluation callback. 9 | """ 10 | def __init__(self, model, image_files, scores, using_single_mos, imagenet_pretrain=False): 11 | self.model = model 12 | self.image_files = image_files 13 | self.scores = scores 14 | self.using_single_mos = using_single_mos 15 | self.imagenet_pretrain = imagenet_pretrain 16 | self.mos_scales = np.array([1, 2, 3, 4, 5]) 17 | 18 | def __get_prediction_mos(self, image): 19 | prediction = self.model.predict(np.expand_dims(image, axis=0)) 20 | return prediction[0][0] 21 | 22 | def __get_prediction_distribution(self, image): 23 | # debug_model = Model(inputs=self.model.inputs, outputs=self.model.get_layer('fpn_concatenate').output) 24 | # debug_results = debug_model.predict(np.expand_dims(image, axis=0)) 25 | 26 | prediction = self.model.predict(np.expand_dims(image, axis=0)) 27 | prediction = np.sum(np.multiply(self.mos_scales, prediction[0])) 28 | return prediction 29 | 30 | def __evaluation__(self, result_file=None): 31 | predictions = [] 32 | mos_scores = [] 33 | if result_file is not None: 34 | rf = open(result_file, 'w+') 35 | 36 | for image_file, score in zip(self.image_files, self.scores): 37 | image = Image.open(image_file) 38 | image = np.asarray(image, dtype=np.float32) 39 | if self.imagenet_pretrain: # image normalization using TF approach 40 | image /= 127.5 41 | image -= 1. 42 | else: # Image normalization by subtracting mean and dividing std 43 | image[:, :, 0] -= 117.27205081970828 44 | image[:, :, 1] -= 106.23294835284031 45 | image[:, :, 2] -= 94.40750328714887 46 | image[:, :, 0] /= 59.112836751661085 47 | image[:, :, 1] /= 55.65498543815568 48 | image[:, :, 2] /= 54.9486100975773 49 | 50 | if self.using_single_mos: 51 | prediction = self.__get_prediction_mos(image) 52 | else: 53 | score = np.sum(np.multiply(self.mos_scales, score)) 54 | prediction = self.__get_prediction_distribution(image) 55 | 56 | mos_scores.append(score) 57 | 58 | predictions.append(prediction) 59 | print('Real score: {}, predicted: {}'.format(score, prediction)) 60 | 61 | if result_file is not None: 62 | rf.write('{},{},{}\n'.format(image_file, score, prediction)) 63 | 64 | PLCC = scipy.stats.pearsonr(mos_scores, predictions)[0] 65 | SRCC = scipy.stats.spearmanr(mos_scores, predictions)[0] 66 | RMSE = np.sqrt(np.mean(np.subtract(predictions, mos_scores) ** 2)) 67 | MAD = np.mean(np.abs(np.subtract(predictions, mos_scores))) 68 | print('\nPLCC: {}, SRCC: {}, RMSE: {}, MAD: {}'.format(PLCC, SRCC, RMSE, MAD)) 69 | 70 | if result_file is not None: 71 | rf.close() 72 | return PLCC, SRCC, RMSE 73 | -------------------------------------------------------------------------------- /src/phiqnet/model_evaluation/validation.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is to evaluate (calculate the evaluation criteria PLCC, SROCC, RMSE) on individual testing sets. 3 | PHIQNet should be first generated, and then the model_weights file is loaded. 4 | """ 5 | import tensorflow as tf 6 | from phiqnet.models.image_quality_model import phiq_net 7 | from phiqnet.utils.imageset_handler import get_image_scores, get_image_score_from_groups 8 | from phiqnet.model_evaluation.evaluation import ModelEvaluation 9 | 10 | 11 | def val_main(args): 12 | if args['n_quality_levels'] > 1: 13 | using_single_mos = False 14 | else: 15 | using_single_mos = True 16 | 17 | if args['model_weights'] is not None and ('resnet' in args['backbone'] or args['backbone'] == 'inception'): 18 | imagenet_pretrain = True 19 | else: 20 | imagenet_pretrain = False 21 | 22 | val_folders = [ 23 | # r'..\databases\val\koniq_normal',] 24 | r'..\databases\val\koniq_small',] 25 | # r'..\databases\train\live', 26 | # r'..\databases\val\live'] 27 | 28 | koniq_mos_file = r'..\databases\koniq10k_images_scores.csv' 29 | live_mos_file = r'..\databases\live_mos.csv' 30 | 31 | image_scores = get_image_scores(koniq_mos_file, live_mos_file, using_single_mos=using_single_mos) 32 | test_image_file_groups, test_score_groups = get_image_score_from_groups(val_folders, image_scores) 33 | 34 | test_image_files = [] 35 | test_scores = [] 36 | for test_image_file_group, test_score_group in zip(test_image_file_groups, test_score_groups): 37 | test_image_files.extend(test_image_file_group) 38 | test_scores.extend(test_score_group) 39 | 40 | model = phiq_net(n_quality_levels=args['n_quality_levels'], 41 | naive_backbone=args['naive_backbone'], 42 | backbone=args['backbone'], 43 | fpn_type=args['fpn_type']) 44 | model.load_weights(args['model_weights']) 45 | 46 | # model1 = phiq_net(n_quality_levels=args['n_quality_levels'], 47 | # naive_backbone=args['naive_backbone'], 48 | # backbone=args['backbone'], 49 | # fpn_type=args['fpn_type']) 50 | # model1.load_weights(r'..\\model_weights\PHIQNet.h5', by_name=True) 51 | # model.load_weights(args['model_weights']) 52 | # for i in range(250): 53 | # extracted_weights = model1.layers[i].get_weights() 54 | # model.layers[i].set_weights(extracted_weights) 55 | 56 | evaluation = ModelEvaluation(model, test_image_files, test_scores, using_single_mos, 57 | imagenet_pretrain=imagenet_pretrain) 58 | plcc, srcc, rmse = evaluation.__evaluation__() 59 | 60 | 61 | if __name__ == '__main__': 62 | gpus = tf.config.experimental.list_physical_devices('GPU') 63 | tf.config.experimental.set_visible_devices(gpus[1], 'GPU') 64 | 65 | args = {} 66 | args['n_quality_levels'] = 5 67 | args['naive_backbone'] = False 68 | args['backbone'] = 'resnet50' 69 | args['fpn_type'] = 'fpn' 70 | # args['model_weights'] = r'..\databases\results\resnet50_mos_attention_fpn\38_0.0008_0.0208_0.0998_0.2286.h5' 71 | # args['model_weights'] = r'..\databases\results\resnet50_mos_attention_fpn_lr\96_0.0040_0.0488_0.1023_0.2326.h5' 72 | # args['model_weights'] = r'..\databases\results\resnet50_mos_attention_bifpn_lr\65_0.0080_0.0699_0.0621_0.1871.h5' 73 | # args['model_weights'] = r'..\databases\results\resnet50_distribution_attention_fpn_lr\61_0.8988_0.1192_1.0691_0.2386.h5' 74 | # args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_distribution_attention_bifpn_lr\107_0.0269_0.8673_0.1975_1.0134.h5' 75 | # args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_distribution_attention_fpn_lr_avg\117_0.0183_0.8621_0.2032_1.0449.h5' 76 | # args['model_weights'] = r'..\databases\results_distribution_loss\\resnet50_distribution_fpn_lr_avg\118_0.0255_0.8632_0.2084_1.0571.h5' 77 | # args['model_weights'] = r'..\databases\results_distribution_loss\resnet50_distribution_fpn_lr_avg_cbam_finetune\32_0.0792_0.8892_0.2181_1.0748.h5' 78 | # args['model_weights'] = r'..\databases\experiments\resnet50_distribution_attention_fpn_finetune\117_0.8532_1.0189.h5' 79 | # args['model_weights'] = r'..\databases\experiments\resnet50_mos_attention_fpn\74_0.0027_0.1180.h5' 80 | # args['model_weights'] = r'..\databases\experiments\resnet50_distribution_attention_fpn_no_imageaug\91_0.8545_1.0103.h5' 81 | # args['model_weights'] = r'..\databases\experiments\resnet50_mos_attention_fpn_finetune\45_0.0003_0.0950.h5' 82 | # args['model_weights'] = r'..\databases\experiments\koniq_normal\resnet50_mos_attention_fpn\37_0.0102_0.0499.h5' 83 | args['model_weights'] = r'..\databases\experiments\koniq_normal\resnet50_distribution_attention_fpn_finetune\09_0.8493_0.9294.h5' 84 | 85 | val_main(args) -------------------------------------------------------------------------------- /src/phiqnet/model_evaluation/validation_spag.py: -------------------------------------------------------------------------------- 1 | from phiqnet.models.image_quality_model import phiq_net 2 | from phiqnet.utils.imageset_handler import get_image_score_from_groups 3 | from phiqnet.model_evaluation.evaluation import ModelEvaluation 4 | 5 | 6 | def get_image_scores(mos_file): 7 | image_files = {} 8 | with open(mos_file, 'r+') as f: 9 | lines = f.readlines() 10 | for line in lines: 11 | content = line.split(',') 12 | image_file = content[0] 13 | score = float(content[1]) / 25. + 1 14 | image_files[image_file] = score 15 | 16 | return image_files 17 | 18 | 19 | def val_main(args): 20 | if args['n_quality_levels'] > 1: 21 | using_single_mos = False 22 | else: 23 | using_single_mos = True 24 | 25 | if args['model_weights'] is not None and ('resnet' in args['backbone'] or args['backbone'] == 'inception'): 26 | imagenet_pretrain = True 27 | else: 28 | imagenet_pretrain = False 29 | 30 | val_folders = [r'F:\SPAG_image_quality_dataset\TestImage'] 31 | spag_mos_file = r'..\databases\spag\image_mos.csv' 32 | image_scores = get_image_scores(spag_mos_file) 33 | test_image_file_groups, test_score_groups = get_image_score_from_groups(val_folders, image_scores) 34 | 35 | test_image_files = [] 36 | test_scores = [] 37 | for test_image_file_group, test_score_group in zip(test_image_file_groups, test_score_groups): 38 | test_image_files.extend(test_image_file_group) 39 | test_scores.extend(test_score_group) 40 | 41 | model = phiq_net(n_quality_levels=args['n_quality_levels'], 42 | naive_backbone=args['naive_backbone'], 43 | backbone=args['backbone'], 44 | fpn_type=args['fpn_type']) 45 | model.load_weights(args['model_weights']) 46 | 47 | evaluation = ModelEvaluation(model, test_image_files, test_scores, using_single_mos, 48 | imagenet_pretrain=imagenet_pretrain) 49 | result_file = r'..\databases\spag\result.csv' 50 | plcc, srcc, rmse = evaluation.__evaluation__(result_file) 51 | 52 | 53 | if __name__ == '__main__': 54 | # gpus = tf.config.experimental.list_physical_devices('GPU') 55 | # tf.config.experimental.set_visible_devices(gpus[1], 'GPU') 56 | 57 | args = {} 58 | # args['result_folder'] = r'..\databases\results' 59 | args['n_quality_levels'] = 5 60 | args['naive_backbone'] = False 61 | args['backbone'] = 'resnet50' 62 | args['fpn_type'] = 'fpn' 63 | args['model_weights'] = r'..\\model_weights\PHIQNet.h5' 64 | 65 | val_main(args) -------------------------------------------------------------------------------- /src/phiqnet/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/models/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/models/image_quality_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Main function to build PHIQNet. 3 | """ 4 | from phiqnet.layers.fpn import build_fpn, build_non_fpn 5 | from phiqnet.layers.bi_fpn import build_bifpn 6 | from phiqnet.layers.pan import build_pan 7 | from phiqnet.backbone.ResNest import ResNest 8 | from tensorflow.keras.layers import Input, Dense, Average, GlobalAveragePooling2D 9 | from tensorflow.keras.models import Model 10 | from phiqnet.models.prediction_model_contrast_sensitivity import channel_spatial_attention 11 | from phiqnet.backbone.resnet50 import ResNet50 12 | from phiqnet.backbone.resnet_family import ResNet18 13 | from phiqnet.backbone.resnet_feature_maps import ResNet152v2, ResNet152 14 | from phiqnet.backbone.vgg16 import VGG16 15 | 16 | 17 | def phiq_net(n_quality_levels, input_shape=(None, None, 3), naive_backbone=False, backbone='resnet50', fpn_type='fpn', 18 | attention_module=True): 19 | """ 20 | Build PHIQNet 21 | :param n_quality_levels: 1 for MOS prediction and 5 for score distribution 22 | :param input_shape: image input shape, keep as unspecifized 23 | :param naive_backbone: flag to use backbone only, i.e., without neck and head, if set to True 24 | :param backbone: backbone networks (resnet50/18/152v2, resnest, vgg16, etc.) 25 | :param fpn_type: chosen from 'fpn', 'bi-fpn' or 'pan' 26 | :param attention_module: flag to use or not attention module 27 | :return: PHIQNet model 28 | """ 29 | inputs = Input(shape=input_shape) 30 | n_classes = None 31 | return_feature_maps = True 32 | if naive_backbone: 33 | n_classes = 1 34 | return_feature_maps = False 35 | fc_activation = None 36 | verbose = False 37 | if backbone == 'resnest50': 38 | backbone_model = ResNest(verbose=verbose, 39 | n_classes=n_classes, dropout_rate=0, fc_activation=fc_activation, 40 | blocks_set=[3, 4, 6, 3], radix=2, groups=1, bottleneck_width=64, deep_stem=True, 41 | stem_width=32, avg_down=True, avd=True, avd_first=False, 42 | return_feature_maps=return_feature_maps).build(inputs) 43 | elif backbone == 'resnest34': 44 | backbone_model = ResNest(verbose=verbose, 45 | n_classes=n_classes, dropout_rate=0, fc_activation=fc_activation, 46 | blocks_set=[3, 4, 6, 3], radix=2, groups=1, bottleneck_width=64, deep_stem=True, 47 | stem_width=16, avg_down=True, avd=True, avd_first=False, using_basic_block=True, 48 | return_feature_maps=return_feature_maps).build(inputs) 49 | elif backbone == 'resnest18': 50 | backbone_model = ResNest(verbose=verbose, 51 | n_classes=n_classes, dropout_rate=0, fc_activation=fc_activation, 52 | blocks_set=[2, 2, 2, 2], radix=2, groups=1, bottleneck_width=64, deep_stem=True, 53 | stem_width=16, avg_down=True, avd=True, avd_first=False, using_basic_block=True, 54 | return_feature_maps=return_feature_maps).build(inputs) 55 | elif backbone == 'resnet50': 56 | backbone_model = ResNet50(inputs, 57 | return_feature_maps=return_feature_maps) 58 | elif backbone == 'resnet18': 59 | backbone_model = ResNet18(input_tensor=inputs, 60 | weights=None, 61 | include_top=False) 62 | elif backbone == 'resnet152v2': 63 | backbone_model = ResNet152v2(inputs) 64 | elif backbone == 'resnet152': 65 | backbone_model = ResNet152(inputs) 66 | elif backbone == 'vgg16': 67 | backbone_model = VGG16(inputs) 68 | else: 69 | raise NotImplementedError 70 | 71 | if naive_backbone: 72 | backbone_model.summary() 73 | return backbone_model 74 | 75 | C2, C3, C4, C5 = backbone_model.outputs 76 | pyramid_feature_size = 256 77 | if fpn_type == 'fpn': 78 | fpn_features = build_fpn(C2, C3, C4, C5, feature_size=pyramid_feature_size) 79 | elif fpn_type == 'pan': 80 | fpn_features = build_pan(C2, C3, C4, C5, feature_size=pyramid_feature_size) 81 | elif fpn_type == 'bifpn': 82 | for i in range(3): 83 | if i == 0: 84 | fpn_features = [C3, C4, C5] 85 | fpn_features = build_bifpn(fpn_features, pyramid_feature_size, i) 86 | else: 87 | fpn_features = build_non_fpn(C2, C3, C4, C5, feature_size=pyramid_feature_size) 88 | 89 | PF = [] 90 | for i, P in enumerate(fpn_features): 91 | if attention_module: 92 | PF.append(channel_spatial_attention(P, n_quality_levels, 'P{}'.format(i))) 93 | else: 94 | outputs = GlobalAveragePooling2D(name='avg_pool_{}'.format(i))(P) 95 | outputs = Dense(n_quality_levels, activation='softmax', name='fc_prediction_{}'.format(i))(outputs) 96 | PF.append(outputs) 97 | outputs = Average(name='PF_average')(PF) 98 | 99 | # pyramids = Concatenate(axis=1)(PF) 100 | # outputs = Dense(1, activation='linear', name='final_fc', use_bias=True)(pyramids) 101 | 102 | model = Model(inputs=inputs, outputs=outputs) 103 | model.summary() 104 | return model 105 | 106 | 107 | if __name__ == '__main__': 108 | input_shape = [None, None, 3] 109 | # model = phiq_net(n_quality_levels=5, input_shape=input_shape, backbone='resnet152v2') 110 | # model = phiq_net(n_quality_levels=5, input_shape=input_shape, backbone='resnet50') 111 | model = phiq_net(n_quality_levels=5, input_shape=input_shape, backbone='vgg16') 112 | -------------------------------------------------------------------------------- /src/phiqnet/models/prediction_model_contrast_sensitivity.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, Dense, Reshape, Average, \ 2 | multiply, Lambda, Conv2D, Concatenate 3 | from tensorflow.keras import backend as K 4 | 5 | 6 | def channel_spatial_attention(input_feature, n_quality_levels, name, return_feature_map=False, return_features=False): 7 | """ 8 | Attention model for implementing channel and spatial attention in IQA 9 | :param input_feature: feature maps from FPN or backbone 10 | :param n_quality_levels: 1 for MOS prediction and 5 for score distribution 11 | :param name: name of individual layers 12 | :param return_feature_map: flag to return feature map or not 13 | :param return_features: flag to return feature vector or not 14 | :return: output of attention module 15 | """ 16 | channel_input = input_feature 17 | spatial_input = input_feature 18 | 19 | channel = input_feature.shape[-1] 20 | shared_dense_layer = Dense(channel, 21 | kernel_initializer='he_normal', 22 | use_bias=True, 23 | bias_initializer='zeros', 24 | activation='sigmoid' 25 | ) 26 | 27 | avg_pool_channel = GlobalAveragePooling2D()(channel_input) 28 | avg_pool_channel = Reshape((1, channel))(avg_pool_channel) 29 | avg_pool_channel = shared_dense_layer(avg_pool_channel) 30 | 31 | max_pool_channel = GlobalMaxPooling2D()(channel_input) 32 | max_pool_channel = Reshape((1, channel))(max_pool_channel) 33 | max_pool_channel = shared_dense_layer(max_pool_channel) 34 | 35 | channel_weights = Average()([avg_pool_channel, max_pool_channel]) 36 | 37 | avg_pool_spatial = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(spatial_input) 38 | max_pool_spatial = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(spatial_input) 39 | spatial_weights = Concatenate(axis=3)([avg_pool_spatial, max_pool_spatial]) 40 | spatial_weights = Conv2D(filters=1, 41 | kernel_size=7, 42 | strides=1, 43 | padding='same', 44 | activation='sigmoid', 45 | kernel_initializer='he_normal', 46 | use_bias=False)(spatial_weights) 47 | 48 | outputs = multiply([multiply([input_feature, channel_weights]), spatial_weights]) 49 | 50 | if return_feature_map: 51 | return outputs 52 | 53 | outputs = GlobalAveragePooling2D(name=name + '_avg_pool')(outputs) 54 | if return_features: 55 | return outputs 56 | 57 | if n_quality_levels > 1: 58 | outputs = Dense(n_quality_levels, activation='softmax', name=name + '_fc_prediction')(outputs) 59 | else: 60 | outputs = Dense(n_quality_levels, activation='linear', name=name + 'fc_prediction')(outputs) 61 | 62 | return outputs 63 | -------------------------------------------------------------------------------- /src/phiqnet/pretrained_weights/README.md: -------------------------------------------------------------------------------- 1 | # Pretrained ImageNet weights 2 | 3 | Pretrained ImageNet weights for different backbone networks should be put here, and the weights can be downloaded from: 4 | 5 | [VGG16](https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5) 6 | 7 | [ResNet50](https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5) 8 | 9 | [ResNet152](https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152_weights_tf_dim_ordering_tf_kernels_notop.h5) 10 | 11 | [ResNet152V2](https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet152v2_weights_tf_dim_ordering_tf_kernels_notop.h5) 12 | 13 | [InceptionResNetV2](https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5) -------------------------------------------------------------------------------- /src/phiqnet/train/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/train/__init__.py -------------------------------------------------------------------------------- /src/phiqnet/train/group_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from PIL import Image 4 | from tensorflow.keras.utils import Sequence 5 | from imgaug import augmenters as iaa 6 | 7 | 8 | class GroupGenerator(Sequence): 9 | """ 10 | Generator to supply group image data, individual dataset should go to individual group because they can have different resolutions 11 | """ 12 | def __init__(self, image_file_groups, score_groups, batch_size=16, image_aug=True, shuffle=True, imagenet_pretrain=False): 13 | self.image_file_groups = image_file_groups 14 | self.score_groups = score_groups 15 | self.batch_size = batch_size 16 | self.shuffle = shuffle 17 | self.imagenet_pretrain = imagenet_pretrain 18 | if image_aug: 19 | # do image augmentation by left-right flip 20 | self.seq = iaa.Sequential([iaa.Fliplr(0.5)]) 21 | self.image_aug = image_aug 22 | self.on_epoch_end() 23 | 24 | def __len__(self): 25 | return sum(self.group_length) 26 | 27 | def on_epoch_end(self): 28 | if self.shuffle: 29 | # shuffle both group orders and image orders in each group 30 | images_scores = list(zip(self.image_file_groups, self.score_groups)) 31 | random.shuffle(images_scores) 32 | self.image_file_groups, self.score_groups = zip(*images_scores) 33 | 34 | self.index_groups = [] 35 | self.group_length = [] 36 | for i in range(len(self.image_file_groups)): 37 | self.index_groups.append(np.arange(len(self.image_file_groups[i]))) 38 | self.group_length.append(len(self.image_file_groups[i]) // self.batch_size) 39 | 40 | for i in range(len(self.index_groups)): 41 | np.random.shuffle(self.index_groups[i]) 42 | 43 | def __getitem__(self, item): 44 | lens = 0 45 | idx_0 = len(self.group_length) - 1 46 | for i, data_len in enumerate(self.group_length): 47 | lens += data_len 48 | if item < lens: 49 | idx_0 = i 50 | break 51 | item -= (lens - self.group_length[idx_0]) 52 | 53 | images = [] 54 | y_scores = [] 55 | 56 | for idx_1 in self.index_groups[idx_0][item * self.batch_size: (item + 1) * self.batch_size]: 57 | image = np.asarray(Image.open(self.image_file_groups[idx_0][idx_1]), dtype=np.float32) 58 | if self.imagenet_pretrain: 59 | # ImageNet normalization 60 | image /= 127.5 61 | image -= 1. 62 | else: 63 | # Normalization based on the combined database consisting of KonIQ-10k and LIVE-Wild datasets 64 | image[:, :, 0] -= 117.27205081970828 65 | image[:, :, 1] -= 106.23294835284031 66 | image[:, :, 2] -= 94.40750328714887 67 | image[:, :, 0] /= 59.112836751661085 68 | image[:, :, 1] /= 55.65498543815568 69 | image[:, :, 2] /= 54.9486100975773 70 | images.append(image) 71 | y_scores.append(self.score_groups[idx_0][idx_1]) 72 | 73 | if self.image_aug: 74 | images_aug = self.seq(images=images) 75 | return np.array(images_aug), np.array(y_scores) 76 | else: 77 | return np.array(images), np.array(y_scores) 78 | 79 | 80 | -------------------------------------------------------------------------------- /src/phiqnet/train/plot_train.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import os 3 | 4 | 5 | def get_all_metrics(history): 6 | metrics = set() 7 | for metric in history.history: 8 | if 'val_' in metric: 9 | metric = metric.replace(metric, 'val_') 10 | metrics.add(metric) 11 | return metrics 12 | 13 | 14 | def plot_history(history, result_dir, prefix): 15 | """ 16 | Plots the model training history in each epoch 17 | :param history: generated during model training 18 | :param result_dir: save the training history in this folder 19 | :return: None 20 | """ 21 | try: 22 | metrics = get_all_metrics(history) 23 | for metric in metrics: 24 | loss_metric = 'val_' + metric 25 | if metric in history.history and loss_metric in history.history: 26 | plt.plot(history.history[metric], 'g.-') 27 | plt.plot(history.history[loss_metric], 'r.-') 28 | plt.title(metric) 29 | plt.xlabel('epoch') 30 | plt.ylabel(metric) 31 | plt.legend(['train', 'val']) 32 | plt.savefig(os.path.join(result_dir, prefix + '_' + metric + '.png')) 33 | plt.close() 34 | except Exception as e: 35 | print(e) 36 | -------------------------------------------------------------------------------- /src/phiqnet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/junyongyou/lsct_phiqnet/0f7c627d2e2b5ec02c09cf74d380ad2ccc2da481/src/phiqnet/utils/__init__.py --------------------------------------------------------------------------------