├── IQA_DeepQA_FR_release ├── __init__.py ├── data_load │ ├── __init__.py │ ├── TID2008.py │ ├── TID2013.py │ ├── CSIQ.py │ ├── LIVE.py │ └── dataset.py ├── layers │ ├── __init__.py │ ├── normalization.py │ └── layers.py ├── models │ ├── __init__.py │ ├── model_record.py │ ├── FR_sens_1s.py │ ├── FR_sens_1.py │ └── model_basis.py ├── configs │ └── FR_sens_1.yaml ├── default_config.yaml ├── laplacian_pyr.py ├── train_iqa.py ├── config_parser.py ├── test_iqa.py ├── optimizer.py ├── draw_graph.py ├── utils.py └── trainer.py ├── .gitignore ├── example.py ├── README.md ├── gen_list_TID.m ├── gen_list_CSIQ.m └── gen_list_LIVE_IQA.m /IQA_DeepQA_FR_release/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store* 2 | ehthumbs.db 3 | Icon? 4 | Thumbs.db 5 | 6 | sftp-config.json 7 | 8 | .idea/ 9 | .vscode/ 10 | outputs/ 11 | __pycache__/ 12 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/data_load/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import theano.sandbox.cuda 3 | theano.sandbox.cuda.use(sys.argv[1] if len(sys.argv) > 1 else 'cuda0') 4 | 5 | from IQA_DeepQA_FR_release import train_iqa as tm 6 | 7 | tm.train_iqa( 8 | config_file='IQA_DeepQA_FR_release/configs/FR_sens_1.yaml', 9 | section='fr_sens_LIVE', 10 | tr_te_file='outputs/tr_va_live.txt', 11 | snap_path='outputs/FR/FR_sens_LIVE_1/', 12 | ) 13 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/configs/FR_sens_1.yaml: -------------------------------------------------------------------------------- 1 | common: 2 | database: 3 | scenes: all 4 | horz_ref: True 5 | patch_mode: shift_center 6 | color: gray 7 | train_size: 0.8 8 | model: 9 | model: IQA_DeepQA_FR_release.models.FR_sens_1 10 | ign: 4 11 | ign_scale: 8 12 | opt_scheme: adam 13 | lr: 1e-4 14 | wr_l2: 5e-3 15 | wr_tv: 1e-2 16 | training: 17 | batch_size: 5 18 | epochs: 80 19 | test_freq: 2 20 | save_freq: 1 21 | regular_snap_freq: 50 22 | n_imgs_to_record: 30 23 | prefix: 'FR_sens_' 24 | 25 | fr_sens_LIVE: 26 | database: 27 | sel_data: LIVE 28 | dist_types: all 29 | patch_size: [112, 112] 30 | model: 31 | opt_scheme: nadam 32 | lr: 1e-4 33 | 34 | fr_sens_CSIQ: 35 | database: 36 | sel_data: CSIQ 37 | dist_types: all 38 | # patch_size: [112, 112] 39 | patch_size: 40 | model: 41 | lr: 5e-4 42 | 43 | fr_sens_TID2008: 44 | database: 45 | sel_data: TID2008 46 | dist_types: all 47 | patch_size: 48 | # patch_size: [112, 112] 49 | # patch_size: 50 | model: 51 | lr: 5e-4 52 | # lr: 1e-4 53 | 54 | fr_sens_TID2013: 55 | database: 56 | sel_data: TID2013 57 | dist_types: all 58 | patch_size: 59 | model: 60 | # lr: 1e-3 61 | lr: 5e-4 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepQA 2 | We propose a convolutional neural networks (CNN) based FR-IQA model, named Deep Image Quality Assessment (DeepQA), where the behavior of the HVS is learned from the underlying data distribution of IQA databases. 3 | 4 | > Jongyoo Kim and Sanghoon Lee, “Deep learning of human visual sensitivity in image quality assessment framework,” in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017, pp. 1676–1684. 5 | 6 | 7 | ## Prerequisites 8 | This code was developed and tested with Theano 0.9, CUDA 8.0, and Windows. 9 | 10 | ## Environment setting 11 | ### Setting database path: 12 | For each database, set `BASE_PATH` to the actual root path of each database in the following files: 13 | `IQA_DeepQA_FR_release/data_load/LIVE.py`, 14 | `IQA_DeepQA_FR_release/data_load/CSIQ.py`, 15 | `IQA_DeepQA_FR_release/data_load/TID2008.py`, and 16 | `IQA_DeepQA_FR_release/data_load/TID2013.py`. 17 | 18 | ## Training DeepQA 19 | We provide the demo code for training a DeepQA model. 20 | ```bash 21 | python example.py 22 | ``` 23 | 24 | - `tr_te_file`: Store the randomly divided (training and testing) reference image indices in this file. 25 | - `snap_path`: This indicates the path to store snapshot files 26 | 27 | 28 | ## Quantitative results 29 | DeepQA was tested on the full-sets of LIVE IQA, CSIQ, TID2008, TID2013 databases. During the experiment, we randomly divided the reference images into two subsets, 80% for training and 20% for testing. The correlation coefficients were averaged after the procedure was repeated 10 times while dividing the training and testing sets randomly. 30 | 31 | |Database |SRCC |PLCC | 32 | |---------|:----:|:----:| 33 | |LIVE IQA |0.981 | 0.982| 34 | |CSIQ |0.961 | 0.965| 35 | |TID2008 |0.947 | 0.951| 36 | |TID2013 |0.939 | 0.947| 37 | 38 | -------------------------------------------------------------------------------- /gen_list_TID.m: -------------------------------------------------------------------------------- 1 | clear 2 | fclose all; 3 | 4 | %% Parameters 5 | base_path = 'D:/DB/IQA/TID2013/'; 6 | out_file = 'TID2013.txt'; 7 | % base_path = 'D:/DB/IQA/TID2008/'; 8 | % out_file = 'TID2008.txt'; 9 | ref_subpath = 'reference_images/'; 10 | dist_subpath = 'distorted_images/'; 11 | 12 | fid = fopen([base_path 'mos_with_names.txt'], 'r'); 13 | % image dst_idx dst_type dst_lev dmos_std dmos 14 | formatSpec = '%f %s'; 15 | data = textscan(fid, formatSpec, [Inf, 2]); 16 | % data = textscan(fid, formatSpec); 17 | fclose(fid); 18 | 19 | scores = data{1}; 20 | dist_name = data{2}; 21 | 22 | % Norm scores 23 | % fprintf('Orignal: %f ~ %f\n', min(scores), max(scores)) 24 | % scores = (scores - min(scores)) / (max(scores) - min(scores)); 25 | % fprintf('Norm. : %f ~ %f\n', min(scores), max(scores)) 26 | 27 | %% Dis/Ref images 28 | n_files = size(dist_name, 1); 29 | dist_imgs = cell(n_files, 1); 30 | ref_imgs = cell(n_files, 1); 31 | ref_idx = zeros(n_files, 1); 32 | dist_idx = zeros(n_files, 1); 33 | for im_idx = 1:n_files 34 | ref_name = [dist_name{im_idx}(1:3), '.bmp']; 35 | ref_idx(im_idx) = str2num(ref_name(2:3)); 36 | dist_idx(im_idx) = str2num(dist_name{im_idx}(5:6)); 37 | 38 | dist_imgs{im_idx} = [dist_subpath dist_name{im_idx}]; 39 | ref_imgs{im_idx} = [ref_subpath ref_name]; 40 | end 41 | 42 | % MOSs 43 | fprintf('Orignal: %f ~ %f\n', min(scores), max(scores)) 44 | scores = scores / 9; 45 | fprintf('Norm. : %f ~ %f\n', min(scores), max(scores)) 46 | 47 | %% Write 48 | fid = fopen([base_path out_file], 'w'); 49 | for im_idx = 1:n_files 50 | fprintf(fid, '%d %d %s %s %f\n', ref_idx(im_idx) - 1, dist_idx(im_idx) - 1, ... 51 | ref_imgs{im_idx}, dist_imgs{im_idx}, scores(im_idx)); 52 | end 53 | fclose(fid); 54 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/default_config.yaml: -------------------------------------------------------------------------------- 1 | database: 2 | # database 3 | sel_data: # database list: LIVE / TID2008 / TID2013 / ... 4 | scenes: all # list of ref. images, default='all' 5 | dist_types: all # distortion types, default='all' 6 | 7 | # patch 8 | patch_size: # ex) [32, 32] 9 | patch_step: # ex) [32, 32] 10 | patch_mode: # ex) 'both_side' or 'shift_center' 11 | random_crops: 0 # if > 0, randomly crop n samples 12 | 13 | # pre-processing 14 | horz_ref: False # reflect in horizaontal direction 15 | std_filt_r: 1.0 # filter patches using STD 16 | color: gray # ex) 'gray' or 'rgb' or 'ycbcr' 17 | local_norm: False # mean subtrated and locally normzalied images 18 | 19 | # etc. 20 | train_size: 0.8 # ratio of training data over total data 21 | shuffle: False # shuffle data 22 | sel_fr_met: # select a FR-IQA metric: SSIM/GMS/FSIM/FSIMc/VSI 23 | reverse_mos: False # if True, MOS -> 1.0 - MOS 24 | 25 | model: 26 | model: # model file path ex) IQA_DeepQA_FR_release.models.FR_deep_1 27 | input_size: # ex) [32, 32] 28 | num_ch: # ex) 3 29 | opt_scheme: adam # optimization sceheme 30 | lr: 1e-4 # initial learning rate 31 | 32 | training: 33 | batch_size: 100 # number of data in a batch 34 | epochs: 100 # number of epochs to train 35 | 36 | test_freq: 3 # validate the trained model every test_freq 37 | save_freq: 6 # save data every save_freq 38 | regular_snap_freq: 50 # save model snapshot every regular_snap_freq 39 | 40 | n_imgs_to_record: 40 # number of images to record 41 | prefix: '' # prefix of filenames of recording data 42 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/data_load/TID2008.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | 4 | # Define DB information 5 | BASE_PATH = 'D:/DB/IQA/TID2008' 6 | LIST_FILE_NAME = 'TID2008.txt' 7 | ALL_SCENES = list(range(24)) 8 | # ALL_SCENES = list(range(25)) 9 | ALL_DIST_TYPES = list(range(17)) 10 | 11 | 12 | def make_image_list(scenes, dist_types=None, show_info=True): 13 | """ 14 | Make image list from TID2008 database 15 | TID2008: 25 reference images x 17 distortions x 4 levels 16 | """ 17 | # Get reference / distorted image file lists: 18 | # d_img_list and score_list 19 | d_img_list, r_img_list, r_idx_list, score_list = [], [], [], [] 20 | # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME) 21 | list_file_name = LIST_FILE_NAME 22 | with open(list_file_name, 'r') as listFile: 23 | for line in listFile: 24 | # ref_idx ref_name dist_name dist_types, DMOS 25 | (scn_idx, dis_idx, ref, dis, score) = line.split() 26 | scn_idx = int(scn_idx) 27 | dis_idx = int(dis_idx) 28 | if scn_idx in scenes and dis_idx in dist_types: 29 | d_img_list.append(dis) 30 | r_img_list.append(ref) 31 | r_idx_list.append(scn_idx) 32 | score_list.append(float(score)) 33 | 34 | score_list = np.array(score_list, dtype='float32') 35 | n_images = len(d_img_list) 36 | 37 | if show_info: 38 | print(' - Scenes: %s' % ', '.join([str(i) for i in scenes])) 39 | print(' - Distortion types: %s' % ', '.join( 40 | [str(i) for i in dist_types])) 41 | print(' - Number of images: {:,}'.format(n_images)) 42 | print(' - MOS range: [{:.2f}, {:.2f}]'.format( 43 | np.min(score_list), np.max(score_list))) 44 | 45 | return { 46 | 'scenes': scenes, 47 | 'dist_types': dist_types, 48 | 'base_path': BASE_PATH, 49 | 'n_images': n_images, 50 | 'd_img_list': d_img_list, 51 | 'r_img_list': r_img_list, 52 | 'r_idx_list': r_idx_list, 53 | 'score_list': score_list} 54 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/data_load/TID2013.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | 4 | # Define DB information 5 | BASE_PATH = 'D:/DB/IQA/TID2013' 6 | LIST_FILE_NAME = 'TID2013.txt' 7 | ALL_SCENES = list(range(24)) 8 | # ALL_SCENES = list(range(25)) 9 | ALL_DIST_TYPES = list(range(24)) 10 | 11 | 12 | def make_image_list(scenes, dist_types=None, show_info=True): 13 | """ 14 | Make image list from TID2013 database 15 | TID2013: 25 reference images x 24 distortions x 5 levels 16 | """ 17 | # Get reference / distorted image file lists: 18 | # d_img_list and score_list 19 | d_img_list, r_img_list, r_idx_list, score_list = [], [], [], [] 20 | # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME) 21 | list_file_name = LIST_FILE_NAME 22 | with open(list_file_name, 'r') as listFile: 23 | for line in listFile: 24 | # ref_idx ref_name dist_name dist_types, DMOS 25 | (scn_idx, dis_idx, ref, dis, score) = line.split() 26 | scn_idx = int(scn_idx) 27 | dis_idx = int(dis_idx) 28 | if scn_idx in scenes and dis_idx in dist_types: 29 | d_img_list.append(dis) 30 | r_img_list.append(ref) 31 | r_idx_list.append(scn_idx) 32 | score_list.append(float(score)) 33 | 34 | score_list = np.array(score_list, dtype='float32') 35 | n_images = len(d_img_list) 36 | 37 | if show_info: 38 | print(' - Scenes: %s' % ', '.join([str(i) for i in scenes])) 39 | print(' - Distortion types: %s' % ', '.join( 40 | [str(i) for i in dist_types])) 41 | print(' - Number of images: {:,}'.format(n_images)) 42 | print(' - MOS range: [{:.2f}, {:.2f}]'.format( 43 | np.min(score_list), np.max(score_list))) 44 | 45 | return { 46 | 'scenes': scenes, 47 | 'dist_types': dist_types, 48 | 'base_path': BASE_PATH, 49 | 'n_images': n_images, 50 | 'd_img_list': d_img_list, 51 | 'r_img_list': r_img_list, 52 | 'r_idx_list': r_idx_list, 53 | 'score_list': score_list} 54 | -------------------------------------------------------------------------------- /gen_list_CSIQ.m: -------------------------------------------------------------------------------- 1 | clear 2 | fclose all; 3 | 4 | %% Parameters 5 | base_path = 'D:/DB/IQA/CSIQ/'; 6 | ref_subpath = 'src_imgs/'; 7 | dist_subpath = 'dst_imgs/'; 8 | out_file = 'CSIQ.txt'; 9 | 10 | % "csiq_dmos.txt" is made manually by copying the values from "csiq.DMOS.xlsx" 11 | % The contained text is like the following format: 12 | % 1600 1 noise 1 0.061 0.062 13 | % 1600 1 noise 2 0.097 0.206 14 | % 1600 1 noise 3 0.033 0.262 15 | % 1600 1 noise 4 0.107 0.375 16 | % 1600 1 noise 5 0.120 0.467 17 | 18 | fid = fopen([base_path 'csiq_dmos.txt'], 'r'); 19 | % image dst_idx dst_type dst_lev dmos_std dmos 20 | formatSpec = '%s %d %s %d %f %f'; 21 | % data = fscanf(fid, formatSpec, [6 Inf]); 22 | data = textscan(fid, formatSpec); 23 | fclose(fid); 24 | 25 | ref_names = data{1}; 26 | dist_idx = data{2}; 27 | dist_types = data{3}; 28 | dist_levs = data{4}; 29 | scores = data{6}; 30 | 31 | % Norm scores 32 | % fprintf('Orignal: %f ~ %f\n', min(scores), max(scores)) 33 | % scores = (scores - min(scores)) / (max(scores) - min(scores)); 34 | % fprintf('Norm. : %f ~ %f\n', min(scores), max(scores)) 35 | 36 | %% Dis/Ref images 37 | n_files = size(ref_names, 1); 38 | dist_imgs = cell(n_files, 1); 39 | ref_imgs = cell(n_files, 1); 40 | 41 | for im_idx = 1:n_files 42 | dist_imgs{im_idx} = [dist_subpath dist_types{im_idx} '/' ... 43 | ref_names{im_idx} '.' dist_types{im_idx} '.' num2str(dist_levs(im_idx)) '.png']; 44 | ref_imgs{im_idx} = [ref_subpath ref_names{im_idx} '.png']; 45 | end 46 | 47 | %% Ref idx 48 | ref_idx = zeros(n_files, 1); 49 | ref_cnt = 1; 50 | prev_ref_name = ref_names{1}; 51 | for im_idx = 1:n_files 52 | cur_ref_name = ref_names{im_idx}; 53 | if strcmp(prev_ref_name, cur_ref_name) 54 | ref_idx(im_idx) = ref_cnt; 55 | else 56 | ref_cnt = ref_cnt + 1; 57 | prev_ref_name = cur_ref_name; 58 | ref_idx(im_idx) = ref_cnt; 59 | end 60 | end 61 | 62 | %% Write 63 | fid = fopen([base_path out_file], 'w'); 64 | for im_idx = 1:n_files 65 | fprintf(fid, '%d %d %s %s %f\n', ref_idx(im_idx) - 1, dist_idx(im_idx) - 1, ... 66 | ref_imgs{im_idx}, dist_imgs{im_idx}, scores(im_idx)); 67 | end 68 | fclose(fid); 69 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/data_load/CSIQ.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import os 3 | import numpy as np 4 | 5 | # Define DB information 6 | BASE_PATH = 'D:/DB/IQA/CSIQ' 7 | LIST_FILE_NAME = 'CSIQ.txt' 8 | ALL_SCENES = list(range(30)) 9 | ALL_DIST_TYPES = list(range(6)) 10 | 11 | 12 | def make_image_list(scenes, dist_types=None, show_info=True): 13 | """ 14 | Make image list from CSIQ database 15 | """ 16 | 17 | # Get reference / distorted image file lists: 18 | # d_img_list and score_list 19 | d_img_list, r_img_list, r_idx_list, score_list = [], [], [], [] 20 | # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME) 21 | list_file_name = LIST_FILE_NAME 22 | with open(list_file_name, 'r') as listFile: 23 | for line in listFile: 24 | # ref_idx ref_name dist_name dist_types, DMOS 25 | (scn_idx, dis_idx, ref, dis, score) = line.split() 26 | scn_idx = int(scn_idx) 27 | dis_idx = int(dis_idx) 28 | if scn_idx in scenes and dis_idx in dist_types: 29 | d_img_list.append(dis) 30 | r_img_list.append(ref) 31 | r_idx_list.append(scn_idx) 32 | score_list.append(float(score)) 33 | 34 | score_list = np.array(score_list, dtype='float32') 35 | # DMOS -> reverse subjecive scores by default 36 | score_list = 1.0 - score_list 37 | n_images = len(d_img_list) 38 | 39 | dist_names = ['awgn', 'jpeg', 'jpeg2000', 'fnoise', 'blur', 'contrast'] 40 | if show_info: 41 | print(' - Scenes: %s' % ', '.join([str(i) for i in scenes])) 42 | print(' - Distortion types: %s' % ', '.join( 43 | [dist_names[idx] for idx in dist_types])) 44 | print(' - Number of images: {:,}'.format(n_images)) 45 | print(' - DMOS range: [{:.2f}, {:.2f}]'.format( 46 | np.min(score_list), np.max(score_list)), end='') 47 | print(' (Scale reversed)') 48 | 49 | return { 50 | 'scenes': scenes, 51 | 'dist_types': dist_types, 52 | 'base_path': BASE_PATH, 53 | 'n_images': n_images, 54 | 'd_img_list': d_img_list, 55 | 'r_img_list': r_img_list, 56 | 'r_idx_list': r_idx_list, 57 | 'score_list': score_list} 58 | 59 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/data_load/LIVE.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import numpy as np 3 | 4 | # Define DB information 5 | BASE_PATH = 'D:/DB/IQA/LIVE/LIVE IQA DB' 6 | LIST_FILE_NAME = 'LIVE_IQA.txt' 7 | ALL_SCENES = list(range(29)) 8 | ALL_DIST_TYPES = list(range(5)) 9 | 10 | 11 | def make_image_list(scenes, dist_types=None, show_info=True): 12 | """ 13 | Make image list from LIVE database 14 | LIVE: 29 reference images x 5 distortions 15 | (jpeg2000: 227 / jpeg: 233 / white_noise: 174 / 16 | gaussian_blur: 174 / fast_fading: 174) 17 | """ 18 | 19 | # Get reference / distorted image file lists: 20 | # d_img_list and score_list 21 | d_img_list, r_img_list, r_idx_list, score_list = [], [], [], [] 22 | # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME) 23 | list_file_name = LIST_FILE_NAME 24 | with open(list_file_name, 'r') as listFile: 25 | for line in listFile: 26 | # ref_idx ref_name dist_name dist_types, DMOS, width, height 27 | scn_idx, dis_idx, ref, dis, score, width, height = line.split() 28 | scn_idx = int(scn_idx) 29 | dis_idx = int(dis_idx) 30 | if scn_idx in scenes and dis_idx in dist_types: 31 | d_img_list.append(dis) 32 | r_img_list.append(ref) 33 | r_idx_list.append(scn_idx) 34 | score_list.append(float(score)) 35 | 36 | score_list = np.array(score_list, dtype='float32') 37 | # DMOS -> reverse subjecive scores by default 38 | score_list = 1.0 - score_list 39 | n_images = len(d_img_list) 40 | 41 | dist_names = ['jp2k', 'jpeg', 'wn', 'gblur', 'fastfading'] 42 | if show_info: 43 | scenes.sort() 44 | print(' - Scenes: %s' % ', '.join([str(i) for i in scenes])) 45 | print(' - Distortion types: %s' % ', '.join( 46 | [dist_names[idx] for idx in dist_types])) 47 | print(' - Number of images: {:,}'.format(n_images)) 48 | print(' - DMOS range: [{:.2f}, {:.2f}]'.format( 49 | np.min(score_list), np.max(score_list)), end='') 50 | print(' (Scale reversed)') 51 | 52 | return { 53 | 'scenes': scenes, 54 | 'dist_types': dist_types, 55 | 'base_path': BASE_PATH, 56 | 'n_images': n_images, 57 | 'd_img_list': d_img_list, 58 | 'r_img_list': r_img_list, 59 | 'r_idx_list': r_idx_list, 60 | 'score_list': score_list} 61 | -------------------------------------------------------------------------------- /gen_list_LIVE_IQA.m: -------------------------------------------------------------------------------- 1 | clear 2 | fclose all; 3 | 4 | %% Parameters 5 | base_path = 'D:/DB/IQA/LIVE/LIVE IQA DB/'; 6 | n_dist_set = [227, 233, 174, 174, 174]; 7 | dist_subpath = {'jp2k/', 'jpeg/', 'wn/', 'gblur/', 'fastfading/'}; 8 | ref_subpath = 'refimgs/'; 9 | ref_name_file = 'refnames_all.mat'; 10 | dmos_file = 'dmos_realigned.mat'; 11 | out_file = 'LIVE_IQA.txt'; 12 | % out_file = 'LIVE_IQA_nonorm.txt'; 13 | 14 | %% Dis/Ref images 15 | load([base_path, ref_name_file]); 16 | n_files = sum(n_dist_set); 17 | ref_imgs = refnames_all'; 18 | for idx = 1:n_files 19 | ref_imgs{idx} = [ref_subpath, refnames_all{idx}]; 20 | end 21 | dist_imgs = cell(n_files, 1); 22 | dist_types = zeros(n_files, 1); 23 | idx = 1; 24 | for dist_idx = 1:5 25 | for im_idx = 1:n_dist_set(dist_idx) 26 | dist_imgs{idx} = [dist_subpath{dist_idx}, sprintf('img%d.bmp', im_idx)]; 27 | dist_types(idx) = dist_idx; 28 | idx = idx + 1; 29 | end 30 | end 31 | 32 | %% Resolutions 33 | res_list = zeros(n_files, 2); 34 | for idx = 1:n_files 35 | ref_img = imread([base_path ref_imgs{idx}]); 36 | [height_r, width_r, ch_r] = size(ref_img); 37 | % dist_img = imread([base_path dist_imgs{idx}]); 38 | % [height_d, width_d, ch_d] = size(dist_img); 39 | % if height_r ~= height_d 40 | % fprintf('Height not matched %s - %s', dist_imgs{idx}, ref_imgs{idx}) 41 | % end 42 | % if width_r ~= width_d 43 | % fprintf('Width not matched %s - %s', dist_imgs{idx}, ref_imgs{idx}) 44 | % end 45 | % if ch_r ~= ch_d 46 | % fprintf('Channel not matched %s - %s', dist_imgs{idx}, ref_imgs{idx}) 47 | % end 48 | res_list(idx, :) = [height_r, width_r]; 49 | end 50 | 51 | %% DMOSs 52 | mos_str = load([base_path, dmos_file]); 53 | dmos_live = mos_str.dmos_new'; 54 | % dmos_max = max(dmos_live); 55 | % dmos_min = 0; 56 | % dmos_live(dmos_live < 0) = 0; 57 | % mos_data = (dmos_live - dmos_min) / (dmos_max - dmos_min); 58 | mos_data = dmos_live; 59 | 60 | %% Sort 61 | [ref_imgs_, I] = sort(ref_imgs); 62 | dist_types_ = dist_types(I); 63 | dist_imgs_ = dist_imgs(I); 64 | mos_data_ = mos_data(I); 65 | res_list_ = res_list(I, :); 66 | 67 | %% Ref idx 68 | ref_idx = zeros(n_files, 1); 69 | ref_cnt = 1; 70 | prev_ref_name = ref_imgs_{1}; 71 | for im_idx = 1:n_files 72 | cur_ref_name = ref_imgs_{im_idx}; 73 | if strcmp(prev_ref_name, cur_ref_name) 74 | ref_idx(im_idx) = ref_cnt; 75 | else 76 | ref_cnt = ref_cnt + 1; 77 | prev_ref_name = cur_ref_name; 78 | ref_idx(im_idx) = ref_cnt; 79 | end 80 | end 81 | 82 | %% Write 83 | fid = fopen([base_path, out_file], 'w'); 84 | for im_idx = 1:n_files 85 | fprintf(fid, '%d %d %s %s %f %d %d\n', ref_idx(im_idx) - 1, dist_types_(im_idx) - 1, ... 86 | ref_imgs_{im_idx}, dist_imgs_{im_idx}, mos_data_(im_idx), res_list_(im_idx, 2), res_list_(im_idx, 1)); 87 | end 88 | fclose(fid); 89 | 90 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/models/model_record.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from collections import OrderedDict 4 | 5 | 6 | class Record(object): 7 | 8 | def __init__(self): 9 | self.rec_data = OrderedDict() 10 | self.rec_im_data = OrderedDict() 11 | self.rec_imgs = OrderedDict() 12 | self.rec_kernels = [] 13 | 14 | ########################################################################### 15 | # Functions for recording data 16 | 17 | @property 18 | def data_keys(self): 19 | """Get dictionary keys of `rec_data`""" 20 | return list(self.rec_data) 21 | 22 | @property 23 | def im_data_keys(self): 24 | """Get dictionary keys of `rec_im_data`""" 25 | return list(self.rec_im_data) 26 | 27 | @property 28 | def imgs_keys(self): 29 | """Get dictionary keys of `rec_imgs`""" 30 | return list(self.rec_imgs) 31 | 32 | @property 33 | def num_data(self): 34 | """Get number of `rec_data`""" 35 | return len(self.rec_data) 36 | 37 | @property 38 | def num_im_data(self): 39 | """Get number of `rec_im_data`""" 40 | return len(self.rec_im_data) 41 | 42 | @property 43 | def num_imgs(self): 44 | """Get number of `rec_imgs`""" 45 | return len(self.rec_imgs) 46 | 47 | def empty_records(self): 48 | self.rec_data.clear() 49 | self.rec_im_data.clear() 50 | self.rec_imgs.clear() 51 | self.rec_kernels = [] 52 | 53 | def add_data(self, name, data, **kwargs): 54 | """Add scalar data of one minibatcth to monitor. 55 | """ 56 | kwargs['data'] = data 57 | self.rec_data[name] = kwargs 58 | 59 | def add_im_data(self, name, data, **kwargs): 60 | """Add scalar data for each image (imagewise) or patch (patchwise) 61 | to record. 62 | """ 63 | kwargs['data'] = data 64 | self.rec_im_data[name] = kwargs 65 | 66 | def add_imgs(self, name, data, **kwargs): 67 | """Add image data for each image (imagewise) or patch (patchwise) 68 | to record. 69 | Supplementary information can be added via `**kwargs`. 70 | """ 71 | kwargs['data'] = data 72 | self.rec_imgs[name] = kwargs 73 | 74 | def get_function_outputs(self, train=False): 75 | if train: 76 | return (self.get_data()) 77 | else: 78 | return (self.get_data() + self.get_im_data() + self.get_imgs()) 79 | 80 | def get_data(self): 81 | return [elem['data'] for elem in list(self.rec_data.values())] 82 | 83 | def get_im_data(self): 84 | return [elem['data'] for elem in list(self.rec_im_data.values())] 85 | 86 | def get_imgs(self): 87 | return [elem['data'] for elem in list(self.rec_imgs.values())] 88 | 89 | def get_until_indices(self, start=1): 90 | """Returns the 'until-indices' for each recording data type. 91 | """ 92 | until_loss = len(self.rec_data) + start 93 | until_im_info = until_loss + len(self.rec_im_data) 94 | until_img = until_im_info + len(self.rec_imgs) 95 | return until_loss, until_im_info, until_img 96 | 97 | def add_kernel(self, layers, nth_layers): 98 | """Add a kernel image from the `nth_layers` of self.layers[`key`] 99 | to record. 100 | """ 101 | if isinstance(nth_layers, (list, tuple)): 102 | for nth in nth_layers: 103 | layer = layers[nth] 104 | assert layer.__class__.__name__ == 'ConvLayer' 105 | self.rec_kernels.append(layer.W) 106 | else: 107 | layer = layers[nth_layers] 108 | assert layer.__class__.__name__ == 'ConvLayer' 109 | self.rec_kernels.append(layer.W) 110 | 111 | # def get_rec_info(self): 112 | # rec_info = {} 113 | # rec_info['rec_data'] = self.exclude_info(self.rec_data, 'data') 114 | # rec_info['rec_im_data'] = self.exclude_info(self.rec_im_data, 'data') 115 | # rec_info['rec_imgs'] = self.exclude_info(self.rec_imgs, 'data') 116 | # return rec_info 117 | 118 | # def exclude_info(self, dic, exclude): 119 | # new_dic = OrderedDict() 120 | # for dic_key in dic: 121 | # new_elems = {} 122 | # for elem_key in dic[dic_key]: 123 | # if elem_key == exclude: 124 | # continue 125 | # else: 126 | # new_elems[elem_key] = dic[dic_key][elem_key] 127 | # new_dic[dic_key] = new_elems 128 | # return new_dic 129 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/laplacian_pyr.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | # import theano.tensor as T 4 | import numpy as np 5 | import theano 6 | from theano.tensor.nnet import conv2d 7 | from theano.tensor.nnet.abstract_conv import conv2d_grad_wrt_inputs 8 | 9 | k = np.float32([1, 4, 6, 4, 1]) 10 | k = np.outer(k, k) 11 | k5x5 = (k / k.sum()).reshape((1, 1, 5, 5)) 12 | kern = theano.shared(k5x5, borrow=True) 13 | 14 | k5x5_3ch = k[:, :, None, None] / k.sum() * np.eye(3, dtype=np.float32) 15 | k5x5_3ch = k5x5_3ch.transpose([2, 3, 0, 1]) 16 | kern_3ch = theano.shared(k5x5_3ch, borrow=True) 17 | 18 | 19 | def downsample_img(img, n_ch=1): 20 | if n_ch == 1: 21 | kernel = kern 22 | filter_shape = [1, 1, 5, 5] 23 | elif n_ch == 3: 24 | kernel = kern_3ch 25 | filter_shape = [3, 3, 5, 5] 26 | else: 27 | raise NotImplementedError 28 | return conv2d(img, kernel, filter_shape=filter_shape, 29 | border_mode='half', subsample=(2, 2)) 30 | 31 | 32 | def upsample_img(img, out_shape, n_ch=1): 33 | if n_ch == 1: 34 | kernel = kern * 4 35 | filter_shape = [1, 1, 5, 5] 36 | elif n_ch == 3: 37 | kernel = kern_3ch * 4 38 | filter_shape = [3, 3, 5, 5] 39 | else: 40 | raise NotImplementedError 41 | return conv2d_tr_half(img, kernel, filter_shape=filter_shape, 42 | input_shape=out_shape, subsample=(2, 2)) 43 | 44 | 45 | def conv2d_tr_half(output, filters, filter_shape, input_shape, 46 | subsample=(1, 1)): 47 | input = conv2d_grad_wrt_inputs( 48 | output, filters, 49 | input_shape=(None, filter_shape[0], input_shape[2], input_shape[3]), 50 | filter_shape=filter_shape, border_mode='half', subsample=subsample) 51 | return input 52 | 53 | 54 | def lap_split(img, n_ch=1): 55 | '''Split the image into lo and hi frequency components''' 56 | lo = downsample_img(img, n_ch) 57 | lo2 = upsample_img(lo, img.shape, n_ch) 58 | hi = img - lo2 59 | return lo, hi 60 | 61 | 62 | def gen_lpyr(img, n_level, n_ch=1): 63 | '''Build Laplacian pyramid with n_level splits''' 64 | l_pyr = [] 65 | for i in range(n_level - 1): 66 | img, hi = lap_split(img, n_ch) 67 | l_pyr.append(hi) 68 | l_pyr.append(img) 69 | return l_pyr 70 | 71 | 72 | def gen_gpyr(img, n_level, n_ch=1): 73 | """Generate a Gaussian pyramid.""" 74 | g_pyr = [] 75 | g_pyr.append(img) 76 | for idx in range(n_level - 1): 77 | g_pyr.append(downsample_img(g_pyr[idx], n_ch)) 78 | return g_pyr 79 | 80 | 81 | def merge_lpyr(l_pyr, n_ch=1): 82 | '''Merge Laplacian pyramid''' 83 | l_pyr = l_pyr[::-1] 84 | img = l_pyr[0] 85 | for hi in l_pyr[1:]: 86 | img = upsample_img(img, hi.shape, n_ch) + hi 87 | return img 88 | 89 | 90 | def normalize_lowpass_subt(img, n_level, n_ch=1): 91 | '''Normalize image by subtracting the low-pass-filtered image''' 92 | # Downsample 93 | img_ = img 94 | pyr_sh = [] 95 | for i in range(n_level - 1): 96 | pyr_sh.append(img_.shape) 97 | img_ = downsample_img(img_, n_ch) 98 | 99 | # Upsample 100 | for i in range(n_level - 1): 101 | img_ = upsample_img(img_, pyr_sh[n_level - 2 - i], n_ch) 102 | return img - img_ 103 | 104 | 105 | def get_hi_lo_lap(img, n_level, n_ch=1): 106 | '''Normalize image by subtracting the low-pass-filtered image''' 107 | # Downsample 108 | img_ = img 109 | pyr_sh = [] 110 | for i in range(n_level - 1): 111 | pyr_sh.append(img_.shape) 112 | img_ = downsample_img(img_, n_ch) 113 | lo = img_ 114 | 115 | # Upsample 116 | for i in range(n_level - 1): 117 | img_ = upsample_img(img_, pyr_sh[n_level - 2 - i], n_ch) 118 | return img - img_, lo 119 | 120 | 121 | def get_lowfreq_upscale(l_pyr, n_ch=1): 122 | n_level = len(l_pyr) 123 | lf = l_pyr[-1] 124 | 125 | # Upsample 126 | for i in range(n_level - 1): 127 | lf = upsample_img(lf, l_pyr[n_level - 2 - i].shape, n_ch) 128 | return lf 129 | 130 | 131 | # def downsample_img(img, n_ch=1): 132 | # """Downsample an image by 2 by 2""" 133 | # if n_ch == 1: 134 | # output = conv2d(img, kern, filter_shape=(1, 1, 5, 5), 135 | # border_mode='half') 136 | # elif n_ch > 1: 137 | # conv_outs = [] 138 | # for ch in range(n_ch): 139 | # cur_ch = img[:, ch, :, :].dimshuffle(0, 'x', 1, 2) 140 | # conv_outs.append(conv2d(cur_ch, kern, filter_shape=(1, 1, 5, 5), 141 | # border_mode='half')) 142 | # output = T.concatenate(conv_outs, axis=1) 143 | # else: 144 | # raise NotImplementedError 145 | # return output[:, :, ::2, ::2] 146 | 147 | 148 | # def upsample_img(img, out_shape, n_ch=1): 149 | # """Upsample an image by 2 by 2""" 150 | # img_up = img.repeat(2, axis=2).repeat(2, axis=3) 151 | # if n_ch == 1: 152 | # output = conv2d(img_up, kern, filter_shape=(1, 1, 5, 5), 153 | # border_mode='half') 154 | # elif n_ch > 1: 155 | # conv_outs = [] 156 | # for ch in range(n_ch): 157 | # cur_ch = img_up[:, ch, :, :].dimshuffle(0, 'x', 1, 2) 158 | # conv_outs.append(conv2d(cur_ch, kern, filter_shape=(1, 1, 5, 5), 159 | # border_mode='half')) 160 | # output = T.concatenate(conv_outs, axis=1) 161 | # else: 162 | # raise NotImplementedError 163 | # return output[:, :, :out_shape[2], :out_shape[3]] 164 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/train_iqa.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import timeit 5 | from importlib import import_module 6 | 7 | import numpy as np 8 | import theano 9 | import theano.tensor as T 10 | 11 | from .config_parser import config_parser, dump_config 12 | from .data_load.data_loader_IQA import DataLoader 13 | from .trainer import Trainer 14 | 15 | 16 | def train_iqa(config_file, section, snap_path, 17 | output_path=None, snap_file=None, tr_te_file=None): 18 | """ 19 | Imagewise training of an IQA model using both reference and 20 | distorted images. 21 | """ 22 | db_config, model_config, train_config = config_parser( 23 | config_file, section) 24 | 25 | # Check snapshot file 26 | if snap_file is not None: 27 | assert os.path.isfile(snap_file), \ 28 | 'Not existing snap_file: %s' % snap_file 29 | 30 | # Initialize patch step 31 | init_patch_step(db_config, int(model_config.get('ign', 0)), 32 | int(model_config.get('ign_scale', 1))) 33 | 34 | # Load data 35 | data_loader = DataLoader(db_config) 36 | train_data, test_data = data_loader.load_data_tr_te(tr_te_file) 37 | # train_data, test_data = data_loader.load_toy_data_tr_te() 38 | 39 | # Create model 40 | model = create_model(model_config, 41 | train_data.patch_size, train_data.num_ch) 42 | if snap_file is not None: 43 | model.load(snap_file) 44 | 45 | # Create trainer 46 | trainer = Trainer(train_config, snap_path, output_path) 47 | 48 | # Store current configuration file 49 | dump_config(os.path.join(snap_path, 'config.yaml'), 50 | db_config, model_config, train_config) 51 | 52 | ########################################################################### 53 | # Train the model 54 | epochs = train_config.get('epochs', 100) 55 | batch_size = train_config.get('batch_size', 4) 56 | 57 | score = run_iqa_iw( 58 | train_data, test_data, model, trainer, epochs, batch_size) 59 | print("Best SRCC: {:.3f}, PLCC: {:.3f} ({:d})".format( 60 | score[0], score[1], score[2])) 61 | 62 | 63 | def run_iqa_iw(train_data, test_data, model, trainer, epochs, n_batch_imgs, 64 | x_c=None, x=None, mos_set=None, bat2img_idx_set=None, 65 | prefix2='iqa_'): 66 | """ 67 | @type model: .models.model_basis.ModelBasis 68 | @type train_data: .data_load.dataset.Dataset 69 | @type test_data: .data_load.dataset.Dataset 70 | """ 71 | te_n_batch_imgs = 1 72 | 73 | # Make dummy shared dataset 74 | max_num_patch = np.max(np.asarray(train_data.npat_img_list)[:, 0]) 75 | n_pats_dummy = max_num_patch * n_batch_imgs 76 | sh = model.input_shape 77 | np_set_r = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32') 78 | np_set_d = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32') 79 | shared_set_r = theano.shared(np_set_r, borrow=True) 80 | shared_set_d = theano.shared(np_set_d, borrow=True) 81 | 82 | train_data.set_imagewise() 83 | test_data.set_imagewise() 84 | 85 | print('\nCompile theano function: Regress on MOS', end='') 86 | print(' (imagewise / low GPU memory)') 87 | start_time = timeit.default_timer() 88 | if x is None: 89 | x = T.ftensor4('x') 90 | if x_c is None: 91 | x_c = T.ftensor4('x_c') 92 | if mos_set is None: 93 | mos_set = T.vector('mos_set') 94 | if bat2img_idx_set is None: 95 | bat2img_idx_set = T.imatrix('bat2img_idx_set') 96 | 97 | print(' (Make training model)') 98 | model.set_training_mode(True) 99 | cost, updates, rec_train = model.cost_updates_iqa( 100 | x, x_c, mos_set, n_batch_imgs, bat2img_idx_set) 101 | outputs = [cost] + rec_train.get_function_outputs(train=True) 102 | 103 | train_model = theano.function( 104 | [mos_set, bat2img_idx_set], 105 | [output for output in outputs], 106 | updates=updates, 107 | givens={ 108 | x: shared_set_r, 109 | x_c: shared_set_d 110 | }, 111 | on_unused_input='warn' 112 | ) 113 | 114 | print(' (Make testing model)') 115 | model.set_training_mode(False) 116 | cost, rec_test = model.cost_iqa( 117 | x, x_c, mos_set, te_n_batch_imgs, bat2img_idx_set=bat2img_idx_set) 118 | outputs = [cost] + rec_test.get_function_outputs(train=False) 119 | 120 | test_model = theano.function( 121 | [mos_set, bat2img_idx_set], 122 | [output for output in outputs], 123 | givens={ 124 | x: shared_set_r, 125 | x_c: shared_set_d 126 | }, 127 | on_unused_input='warn' 128 | ) 129 | 130 | minutes, seconds = divmod(timeit.default_timer() - start_time, 60) 131 | print(' - Compilation took {:02.0f}:{:05.2f}'.format(minutes, seconds)) 132 | 133 | def get_train_outputs(): 134 | res = train_data.next_batch(n_batch_imgs) 135 | np_set_r[:res['n_data']] = res['ref_data'] 136 | np_set_d[:res['n_data']] = res['dis_data'] 137 | shared_set_r.set_value(np_set_r) 138 | shared_set_d.set_value(np_set_d) 139 | return train_model(res['score_set'], res['bat2img_idx_set']) 140 | 141 | def get_test_outputs(): 142 | res = test_data.next_batch(te_n_batch_imgs) 143 | np_set_r[:res['n_data']] = res['ref_data'] 144 | np_set_d[:res['n_data']] = res['dis_data'] 145 | shared_set_r.set_value(np_set_r) 146 | shared_set_d.set_value(np_set_d) 147 | return test_model(res['score_set'], res['bat2img_idx_set']) 148 | 149 | # Main training routine 150 | return trainer.training_routine( 151 | model, get_train_outputs, rec_train, get_test_outputs, rec_test, 152 | n_batch_imgs, te_n_batch_imgs, train_data, test_data, 153 | epochs, prefix2, check_mos_corr=True) 154 | 155 | 156 | def init_patch_step(db_config, ign_border, ign_scale=8): 157 | """ 158 | Initialize patch_step: 159 | patch_step = patch_size - ign_border * ign_scale. 160 | """ 161 | patch_size = db_config.get('patch_size', None) 162 | patch_step = db_config.get('patch_step', None) 163 | random_crops = int(db_config.get('random_crops', 0)) 164 | 165 | if (patch_size is not None and patch_step is None and 166 | random_crops == 0): 167 | db_config['patch_step'] = ( 168 | patch_size[0] - ign_border * ign_scale, 169 | patch_size[1] - ign_border * ign_scale) 170 | print(' - Set patch_step according to patch_size and ign: (%d, %d)' % ( 171 | db_config['patch_step'][0], db_config['patch_step'][1] 172 | )) 173 | 174 | 175 | def create_model(model_config, patch_size=None, num_ch=None): 176 | """ 177 | Create a model using a model_config. 178 | Set input_size and num_ch according to patch_size and num_ch. 179 | """ 180 | model_module_name = model_config.get('model', None) 181 | assert model_module_name is not None 182 | model_module = import_module(model_module_name) 183 | 184 | # set input_size and num_ch according to dataset information 185 | if patch_size is not None: 186 | model_config['input_size'] = patch_size 187 | if num_ch is not None: 188 | model_config['num_ch'] = num_ch 189 | 190 | return model_module.Model(model_config) 191 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/config_parser.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import yaml 5 | import sys 6 | 7 | DEFAULT_CONFIG = os.path.join( 8 | os.path.dirname(__file__), 'default_config.yaml') 9 | 10 | 11 | def config_parser(config_file, section=None, default_config_file=None): 12 | print('\nConfig: %s' % config_file, end='') 13 | if section is not None: 14 | print(' (Sec.: %s)' % section) 15 | else: 16 | print('') 17 | 18 | # load default config data 19 | if default_config_file is None: 20 | default_config_file = DEFAULT_CONFIG 21 | exist_default_config = os.path.isfile(default_config_file) 22 | 23 | if exist_default_config: 24 | if (sys.version_info > (3, 0)): 25 | # if Python 3 26 | with open(default_config_file, 'r', encoding='utf-8') as stream: 27 | try: 28 | d_config_data = yaml.load(stream) 29 | except yaml.YAMLError as exc: 30 | print(exc) 31 | else: 32 | # if Python 2 33 | with open(default_config_file, 'r') as stream: 34 | try: 35 | d_config_data = yaml.load(stream) 36 | except yaml.YAMLError as exc: 37 | print(exc) 38 | db_config = d_config_data['database'] 39 | model_config = d_config_data['model'] 40 | train_config = d_config_data['training'] 41 | else: 42 | print(' @ Default config. file does not exist: %s' % ( 43 | default_config_file)) 44 | db_config = {} 45 | model_config = {} 46 | train_config = {} 47 | 48 | # load the current config data 49 | with open(config_file, 'r') as stream: 50 | try: 51 | config_data = yaml.load(stream) 52 | except yaml.YAMLError as exc: 53 | print(exc) 54 | 55 | use_common_config = False 56 | if 'common' in list(config_data.keys()): 57 | base_config_data = config_data['common'] 58 | use_common_config = True 59 | 60 | if section is not None: 61 | if section in list(config_data.keys()): 62 | config_data = config_data[section] 63 | else: 64 | raise ValueError('No %s in %s' % (section, config_file)) 65 | else: 66 | config_data = config_data 67 | 68 | # merge the current config into base config 69 | if use_common_config: 70 | if 'database' in list(base_config_data.keys()): 71 | overwrite_config(db_config, base_config_data['database']) 72 | if 'model' in list(base_config_data.keys()): 73 | overwrite_config(model_config, base_config_data['model']) 74 | if 'training' in list(base_config_data.keys()): 75 | overwrite_config(train_config, base_config_data['training']) 76 | 77 | if 'database' in list(config_data.keys()): 78 | overwrite_config(db_config, config_data['database']) 79 | if 'model' in list(config_data.keys()): 80 | overwrite_config(model_config, config_data['model']) 81 | if 'training' in list(config_data.keys()): 82 | overwrite_config(train_config, config_data['training']) 83 | 84 | check_subsection(db_config) 85 | 86 | # if db_config['num_subsection']: 87 | # if db_config['train']['num_subsection']: 88 | # copy_config(db_config['train'], db_config['train'][0]) 89 | # copy_config(db_config, db_config['train'][0]) 90 | # else: 91 | # copy_config(db_config, db_config['train']) 92 | 93 | show_configs(db_config, model_config, train_config) 94 | 95 | return db_config, model_config, train_config 96 | 97 | 98 | def dump_config(filename, db_config, model_config, train_config): 99 | cfg = {} 100 | cfg['database'] = db_config.copy() 101 | cfg['model'] = model_config.copy() 102 | cfg['training'] = train_config.copy() 103 | # check_child_list(cfg) 104 | with open(filename, 'w') as yaml_file: 105 | yaml.dump(cfg, yaml_file, default_flow_style=False) 106 | 107 | 108 | def check_child_list(parent_section): 109 | subsections = [] 110 | # check if parent_section has subsections 111 | for key, value in parent_section.items(): 112 | if isinstance(value, dict): 113 | subsections.append(key) 114 | 115 | if len(subsections) > 0: 116 | for subsection in subsections: 117 | # overwrite the copied child_section with new information 118 | check_child_list(parent_section[subsection]) 119 | 120 | for key, value in parent_section.items(): 121 | if isinstance(value, list): 122 | parent_section[key] = str(value) 123 | 124 | 125 | def check_subsection(parent_section): 126 | subsections = [] 127 | # check if parent_section has subsections 128 | for key, value in parent_section.items(): 129 | if isinstance(value, dict): 130 | subsections.append(key) 131 | 132 | if len(subsections) > 0: 133 | for subsection in subsections: 134 | # copy parent_section to child_section 135 | child_section = {} 136 | for key, value in parent_section.items(): 137 | if not isinstance(value, dict): 138 | child_section[key] = value 139 | 140 | # overwrite the copied child_section with new information 141 | overwrite_config(child_section, parent_section[subsection]) 142 | 143 | check_subsection(child_section) 144 | parent_section[subsection] = child_section 145 | 146 | # remove keys in parent_section 147 | for key in list(parent_section): 148 | if key not in subsections: 149 | parent_section.pop(key) 150 | 151 | parent_section['num_subsection'] = len(subsections) 152 | else: 153 | parent_section['num_subsection'] = 0 154 | 155 | 156 | def overwrite_config(base_config, new_config): 157 | for key, value in new_config.items(): 158 | base_config[key] = value 159 | 160 | 161 | def copy_config(base_config, new_config): 162 | for key, value in new_config.items(): 163 | if key not in base_config: 164 | base_config[key] = value 165 | 166 | 167 | def show_configs(db_config, model_config, train_config): 168 | # if 'train' in db_config: 169 | # print('Train Dataset: %s' % db_config['train']['sel_data']) 170 | # print(' - Scenes:', db_config['train']['scenes'], end='') 171 | # print(' / dist_types:', db_config['train']['dist_types']) 172 | # print(' - Patch size:', db_config['train']['patch_size'], end='') 173 | # print(' / Patch step:', db_config['train']['patch_step']) 174 | 175 | # print('Test Dataset: %s' % db_config['test']['sel_data']) 176 | # print(' - Scenes:', db_config['test']['scenes'], end='') 177 | # print(' / dist_types:', db_config['test']['dist_types']) 178 | # print(' - Patch size:', db_config['test']['patch_size'], end='') 179 | # print(' / Patch step:', db_config['test']['patch_step']) 180 | # else: 181 | # print('Dataset: %s' % db_config['sel_data']) 182 | # print(' - Scenes:', db_config['scenes'], end='') 183 | # print(' / dist_types:', db_config['dist_types']) 184 | # print(' - Patch size:', db_config['patch_size'], end='') 185 | # print(' / Patch step:', db_config['patch_step']) 186 | 187 | print('Model: %s' % model_config['model']) 188 | print(' - opt_scheme:', model_config['opt_scheme'], end='') 189 | print(' / lr:', model_config['lr']) 190 | strs = [] 191 | for key in list(model_config.keys()): 192 | if key[:3] == 'wl_': 193 | strs.append('%s: %s' % (key, model_config[key])) 194 | if len(strs) > 0: 195 | print(' - %s' % ', '.join(strs)) 196 | strs = [] 197 | for key in list(model_config.keys()): 198 | if key[:3] == 'wr_': 199 | strs.append('%s: %s' % (key, model_config[key])) 200 | if len(strs) > 0: 201 | print(' - %s' % ', '.join(strs)) 202 | 203 | print('Training') 204 | print(' - batch_size:', train_config['batch_size'], end='') 205 | print(' / epochs:', train_config['epochs'], end='') 206 | print(' / test_freq:', train_config['test_freq'], end='') 207 | print(' / save_freq:', train_config['save_freq']) 208 | print('') 209 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/test_iqa.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import timeit 5 | from importlib import import_module 6 | 7 | import numpy as np 8 | import theano 9 | import theano.tensor as T 10 | 11 | from .config_parser import config_parser 12 | from .data_load.data_loader_IQA import DataLoader 13 | from .trainer import Trainer 14 | 15 | 16 | def check_dist_list(testing_dist_list, db_config): 17 | if not isinstance(testing_dist_list, (list, tuple)): 18 | testing_dist_list = (testing_dist_list, ) 19 | 20 | dist_list = [] 21 | for testing_dist in testing_dist_list: 22 | if testing_dist == 'each': 23 | if db_config['sel_data'] == 'LIVE': 24 | from .data_load import LIVE 25 | dist_list += [[dist] for dist in LIVE.ALL_DIST_TYPES] 26 | elif db_config['sel_data'] == 'TID2008': 27 | from .data_load import TID2008 28 | dist_list += [[dist] for dist in TID2008.ALL_DIST_TYPES] 29 | elif db_config['sel_data'] == 'TID2013': 30 | from .data_load import TID2013 31 | dist_list += [[dist] for dist in TID2013.ALL_DIST_TYPES] 32 | elif db_config['sel_data'] == 'CSIQ': 33 | from .data_load import CSIQ 34 | dist_list += [[dist] for dist in CSIQ.ALL_DIST_TYPES] 35 | else: 36 | raise NotImplementedError 37 | else: 38 | dist_list.append(testing_dist) 39 | return dist_list 40 | 41 | 42 | def test_iqa(config_file, section, testing_dist_list=('each', 'all'), 43 | output_path=None, snap_file=None, load_keys=None, 44 | tr_te_file=None, use_ref_for_nr=True): 45 | db_config, model_config, train_config = config_parser( 46 | config_file, section) 47 | 48 | # Check snapshot file 49 | if snap_file is not None: 50 | assert os.path.isfile(snap_file), \ 51 | 'Not existing snap_file: %s' % snap_file 52 | 53 | testing_dist_list = check_dist_list(testing_dist_list, db_config) 54 | 55 | # Initialize patch step 56 | init_patch_step(db_config, int(model_config.get('ign', 0)), 57 | int(model_config.get('ign_scale', 1))) 58 | 59 | x_c = T.ftensor4('x_c') 60 | x = T.ftensor4('x') 61 | mos_set = T.vector('mos_set') 62 | bat2img_idx_set = T.imatrix('bat2img_idx_set') 63 | 64 | batch_size = train_config.get('batch_size', 1) 65 | 66 | # Write log 67 | if not os.path.isdir(output_path): 68 | os.makedirs(output_path) 69 | with open(os.path.join(output_path, 'results.txt'), 'a') as f_log: 70 | data = 'Dist. type, SRCC, PLCC\n' 71 | f_log.write(data) 72 | 73 | # Test for each testing distortion set in testing_dist_list 74 | made_model = False 75 | for idx, testing_dist in enumerate(testing_dist_list): 76 | print('\n##### %d/%d #####' % (idx + 1, len(testing_dist_list))) 77 | prefix2 = 'dist_%d' % idx 78 | 79 | # Load data 80 | db_config['dist_types'] = testing_dist 81 | data_loader = DataLoader(db_config) 82 | _, test_data = data_loader.load_data_tr_te(tr_te_file) 83 | 84 | if not made_model: 85 | # Create model 86 | model = create_model(model_config, 87 | test_data.patch_size, test_data.num_ch) 88 | 89 | if load_keys is None: 90 | model.load(snap_file) 91 | else: 92 | model.load_load_keys(load_keys, snap_file) 93 | # model.load_load_keys(['sens_map', 'reg_mos'], snap_file) 94 | made_model = True 95 | 96 | # Create trainer 97 | trainer = Trainer(train_config, output_path=output_path) 98 | 99 | score = run_iqa_iw( 100 | test_data, model, trainer, batch_size, 101 | x=x, x_c=x_c, mos_set=mos_set, bat2img_idx_set=bat2img_idx_set, 102 | prefix2=prefix2) 103 | 104 | # Write log 105 | with open(os.path.join(output_path, 'results.txt'), 'a') as f_log: 106 | data = '{:s}, {:.4f}, {:.4f}\n'.format( 107 | str(testing_dist), score[0], score[1]) 108 | f_log.write(data) 109 | 110 | 111 | def run_iqa_iw(test_data, model, trainer, 112 | n_batch_imgs, x=None, x_c=None, mos_set=None, 113 | bat2img_idx_set=None, prefix2=''): 114 | """ 115 | @type model: .models.model_basis.ModelBasis 116 | @type test_data: .data_load.dataset.Dataset 117 | """ 118 | # Make dummy shared dataset 119 | max_num_patch = np.max(np.asarray(test_data.npat_img_list)[:, 0]) 120 | n_pats_dummy = max_num_patch * n_batch_imgs 121 | sh = model.input_shape 122 | np_set_r = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32') 123 | np_set_d = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32') 124 | shared_set_r = theano.shared(np_set_r, borrow=True) 125 | shared_set_d = theano.shared(np_set_d, borrow=True) 126 | 127 | test_data.set_imagewise() 128 | 129 | print('\nCompile theano function: IQA using reference images', end=' ') 130 | print(' (imagewise / low GPU memory)') 131 | start_time = timeit.default_timer() 132 | if x is None: 133 | x = T.ftensor4('x') 134 | if x_c is None: 135 | x_c = T.ftensor4('x_c') 136 | if mos_set is None: 137 | mos_set = T.vector('mos_set') 138 | if bat2img_idx_set is None: 139 | bat2img_idx_set = T.imatrix('bat2img_idx_set') 140 | 141 | print(' (Make testing model)') 142 | model.set_training_mode(False) 143 | cost, rec_test = model.cost_iqa( 144 | x, x_c, mos_set, n_img=n_batch_imgs, bat2img_idx_set=bat2img_idx_set) 145 | outputs = [cost] + rec_test.get_function_outputs(train=False) 146 | 147 | test_model = theano.function( 148 | [mos_set, bat2img_idx_set], 149 | [output for output in outputs], 150 | givens={ 151 | x: shared_set_r, 152 | x_c: shared_set_d 153 | }, 154 | on_unused_input='warn' 155 | ) 156 | 157 | minutes, seconds = divmod(timeit.default_timer() - start_time, 60) 158 | print(' - Compilation took {:02.0f}:{:05.2f}'.format(minutes, seconds)) 159 | 160 | def get_test_outputs(): 161 | res = test_data.next_batch(n_batch_imgs) 162 | np_set_r[:res['n_data']] = res['ref_data'] 163 | np_set_d[:res['n_data']] = res['dis_data'] 164 | shared_set_r.set_value(np_set_r) 165 | shared_set_d.set_value(np_set_d) 166 | return test_model(res['score_set'], res['bat2img_idx_set']) 167 | 168 | # Main testing routine 169 | return trainer.testing_routine( 170 | get_test_outputs, rec_test, n_batch_imgs, test_data, 171 | prefix2, check_mos_corr=True) 172 | 173 | 174 | def init_patch_step(db_config, ign_border, ign_scale=8): 175 | """ 176 | Initialize patch_step: 177 | patch_step = patch_size - ign_border * ign_scale. 178 | """ 179 | patch_size = db_config.get('patch_size', None) 180 | patch_step = db_config.get('patch_step', None) 181 | random_crops = int(db_config.get('random_crops', 0)) 182 | 183 | if (patch_size is not None and patch_step is None and 184 | random_crops == 0): 185 | db_config['patch_step'] = ( 186 | patch_size[0] - ign_border * ign_scale, 187 | patch_size[1] - ign_border * ign_scale) 188 | print(' - Set patch_step according to patch_size and ign: (%d, %d)' % ( 189 | db_config['patch_step'][0], db_config['patch_step'][1] 190 | )) 191 | 192 | 193 | def create_model(model_config, patch_size=None, num_ch=None): 194 | """ 195 | Create a model using a model_config. 196 | Set input_size and num_ch according to patch_size and num_ch. 197 | """ 198 | model_module_name = model_config.get('model', None) 199 | assert model_module_name is not None 200 | model_module = import_module(model_module_name) 201 | 202 | # set input_size and num_ch according to dataset information 203 | if patch_size is not None: 204 | model_config['input_size'] = patch_size 205 | if num_ch is not None: 206 | model_config['num_ch'] = num_ch 207 | 208 | return model_module.Model(model_config) 209 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/models/FR_sens_1s.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | from theano.tensor.nnet import conv2d 9 | 10 | from ..laplacian_pyr import downsample_img, normalize_lowpass_subt 11 | from ..layers import layers 12 | from .model_basis import ModelBasis 13 | from .model_record import Record 14 | 15 | 16 | class Model(ModelBasis): 17 | def __init__(self, model_config, rng=None): 18 | super(Model, self).__init__(model_config, rng) 19 | self.set_configs(model_config) 20 | 21 | print('\nDeep FR-IQA simpler ver.1.0') 22 | print(' - Model file: %s' % (os.path.split(__file__)[1])) 23 | print(' - Ignore border: %d' % (self.ign)) 24 | print(' - Loss weights: sens=%.2e' % (self.wl_subj)) 25 | print(' - Regul. weights: L2=%.2e, TV=%.2e' % ( 26 | self.wr_l2, self.wr_tv)) 27 | 28 | self.init_model() 29 | 30 | def set_configs(self, model_config): 31 | self.set_opt_configs(model_config) 32 | self.wl_subj = float(model_config.get('wl_subj', 1e3)) 33 | self.wr_l2 = float(model_config.get('wr_l2', 5e-3)) 34 | self.wr_tv = float(model_config.get('wr_tv', 1e-2)) 35 | self.ign = int(model_config.get('ign', 4)) 36 | 37 | def init_model(self): 38 | print('\n - Sensitivity map encoder layers') 39 | key = 'sens_map' 40 | self.layers[key] = [] 41 | 42 | self.layers[key].append(layers.ConvLayer( 43 | self.input_shape, 32, (3, 3), layers.lrelu, name=key + '/conv1')) 44 | 45 | self.layers[key].append(layers.ConvLayer( 46 | self.last_sh(key), 32, (3, 3), layers.lrelu, name=key + '/conv2', 47 | subsample=(2, 2))) 48 | 49 | ####################################################################### 50 | self.layers[key].append(layers.ConvLayer( 51 | self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv3')) 52 | 53 | self.layers[key].append(layers.ConvLayer( 54 | self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv4', 55 | subsample=(2, 2))) 56 | 57 | self.layers[key].append(layers.ConvLayer( 58 | self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv5')) 59 | 60 | self.layers[key].append(layers.ConvLayer( 61 | self.last_sh(key), self.num_ch, (3, 3), T.nnet.relu, 62 | b=np.ones((self.num_ch,), dtype='float32'), name=key + '/conv6')) 63 | 64 | ####################################################################### 65 | print('\n - Regression mos layers') 66 | key = 'reg_mos' 67 | self.layers[key] = [] 68 | 69 | self.layers[key].append(layers.FCLayer( 70 | self.num_ch, 4, layers.lrelu, name=key + '/fc1')) 71 | 72 | self.layers[key].append(layers.FCLayer( 73 | self.last_sh(key), 1, T.nnet.relu, name=key + '/fc2' 74 | )) 75 | 76 | ####################################################################### 77 | # Sobel filters 78 | sobel_y_val = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], 79 | dtype='float32').reshape((1, 1, 3, 3)) 80 | self.sobel_y = theano.shared(sobel_y_val, borrow=True) 81 | 82 | sobel_x_val = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], 83 | dtype='float32').reshape((1, 1, 3, 3)) 84 | self.sobel_x = theano.shared(sobel_x_val, borrow=True) 85 | 86 | ####################################################################### 87 | 88 | super(Model, self).make_param_list() 89 | super(Model, self).show_num_params() 90 | 91 | def sobel(self, x, n_ch=1): 92 | """Apply Sobel operators and returns results in x and y directions""" 93 | if n_ch > 1: 94 | y_grads = [] 95 | x_grads = [] 96 | for ch in range(n_ch): 97 | cur_in = x[:, ch, :, :].dimshuffle(0, 'x', 1, 2) 98 | y_grads.append(conv2d(cur_in, self.sobel_y, 99 | filter_shape=(1, 1, 3, 3))) 100 | x_grads.append(conv2d(cur_in, self.sobel_x, 101 | filter_shape=(1, 1, 3, 3))) 102 | y_grad = T.concatenate(y_grads, axis=1) 103 | x_grad = T.concatenate(x_grads, axis=1) 104 | else: 105 | y_grad = conv2d(x, self.sobel_y, filter_shape=(1, 1, 3, 3)) 106 | x_grad = conv2d(x, self.sobel_x, filter_shape=(1, 1, 3, 3)) 107 | return y_grad, x_grad 108 | 109 | def get_total_variation(self, x, beta=1.5): 110 | """ 111 | Calculate total variation of the input. 112 | Arguments 113 | x: 4D tensor image. It must have 1 channel feauture 114 | """ 115 | y_grad, x_grad = self.sobel(x, self.num_ch) 116 | tv = T.mean((y_grad ** 2 + x_grad ** 2) ** (beta / 2)) 117 | return tv 118 | 119 | def log_diff_fn(self, in_a, in_b, eps=0.1): 120 | diff = 255.0 * (in_a - in_b) 121 | log_255_sq = np.float32(2 * np.log(255.0)) 122 | 123 | val = log_255_sq - T.log(diff ** 2 + eps) 124 | max_val = np.float32(log_255_sq - np.log(eps)) 125 | return val / max_val 126 | 127 | def power_diff_fn(self, in_a, in_b, power=0.2): 128 | diff = 255.0 * (in_a - in_b) 129 | 130 | val = T.abs_(diff) ** power 131 | max_val = np.float32(255.0 ** power) 132 | return val / max_val 133 | 134 | def sens_map_fn(self, x_c): 135 | output = self.get_key_layers_output(x_c, 'sens_map') 136 | return output 137 | 138 | def regress_mos_fn(self, feat_vec): 139 | return self.get_key_layers_output(feat_vec, 'reg_mos') 140 | 141 | def shave_border(self, feat_map): 142 | if self.ign > 0: 143 | return feat_map[:, :, self.ign:-self.ign, self.ign:-self.ign] 144 | else: 145 | return feat_map 146 | 147 | def cost_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None): 148 | """Get cost: regression onto MOS using both ref. adn dis. images 149 | """ 150 | records = Record() 151 | # concatenate the image patches 152 | if bat2img_idx_set: 153 | # if dummy data with fixed size is given and current data is 154 | # overwritten on dummy data with size of n_patches, 155 | # pick current dataset with size of n_patches 156 | n_patches = bat2img_idx_set[T.shape(bat2img_idx_set)[0] - 1][1] 157 | x_set = x[:n_patches] 158 | x_c_set = x_c[:n_patches] 159 | else: 160 | # if input is current data 161 | x_set = x 162 | x_c_set = x_c 163 | 164 | # Input image vectors to 4D tensors 165 | x_im = self.image_vec_to_tensor(x_set) 166 | x_c_im = self.image_vec_to_tensor(x_c_set) 167 | x_c_norm = normalize_lowpass_subt(x_c_im, 3, self.num_ch) 168 | 169 | # Get error map 170 | e = self.log_diff_fn(x_im, x_c_im, 1.0) 171 | e_ds4 = downsample_img(downsample_img(e, self.num_ch), self.num_ch) 172 | 173 | # predict sensitivity map 174 | sens_map = self.sens_map_fn(x_c_norm) 175 | 176 | # predict the score 177 | pred_map = sens_map * e_ds4 178 | pred_crop = self.shave_border(pred_map) 179 | 180 | # make feature vector 181 | if bat2img_idx_set: 182 | # if patch based 183 | feat_vec_list = [] 184 | for idx in range(n_img): 185 | idx_from = bat2img_idx_set[idx][0] 186 | idx_to = bat2img_idx_set[idx][1] 187 | 188 | # current predicted map 189 | c_pred_crop = pred_crop[idx_from: idx_to] 190 | pred_mean = T.mean(c_pred_crop, axis=(0, 2, 3), keepdims=True) 191 | feat_vec_list.append(pred_mean) 192 | 193 | feat_vec = T.concatenate(feat_vec_list, axis=0).flatten(2) 194 | # feat_vec = T.stack(feat_vec_list) 195 | else: 196 | # if image based 197 | feat_vec = T.mean(pred_crop, axis=(2, 3)) 198 | 199 | # regress onto MOS 200 | mos_p = self.regress_mos_fn(feat_vec).flatten() 201 | 202 | ###################################################################### 203 | # calculate MOS loss 204 | subj_loss = self.get_mse(mos_p, mos) 205 | 206 | # L2 regularization 207 | l2_reg = self.get_l2_regularization( 208 | ['sens_map', 'reg_mos'], mode='sum') 209 | 210 | # TV norm regularization 211 | tv = self.get_total_variation(sens_map, 3.0) 212 | 213 | # final cost 214 | cost = self.add_all_weighted_losses( 215 | [subj_loss, tv, l2_reg], 216 | [self.wl_subj, self.wr_tv, self.wr_l2]) 217 | 218 | # Data to record 219 | records.add_data('subj', subj_loss * self.wl_subj) 220 | records.add_data('tv', tv) 221 | 222 | records.add_im_data('mos_p', mos_p) 223 | records.add_im_data('mos_gt', mos) 224 | 225 | records.add_imgs('x_c', x_c_im, caxis=[0, 1], scale=1.0) 226 | # pyr_caxis = [-0.25, 0.25] 227 | # records.add_imgs('x_c_mf', x_c_mf, caxis=pyr_caxis, scale=0.5) 228 | # records.add_imgs('x_c_hf', x_c_hf, caxis=pyr_caxis, scale=1.0) 229 | records.add_imgs('e_ds', e_ds4, caxis=[0, 1.0], scale=0.25) 230 | records.add_imgs('sens_map', sens_map, caxis=[0, 1.5], scale=0.25) 231 | records.add_imgs('pred_map', pred_map, caxis=[0, 1.5], scale=0.25) 232 | 233 | # records.add_kernel(self.layers['sens_map'], [0]) 234 | 235 | return cost, records 236 | 237 | def cost_updates_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None): 238 | cost, records = self.cost_iqa( 239 | x, x_c, mos, n_img=n_img, bat2img_idx_set=bat2img_idx_set) 240 | updates = self.get_updates_keys( 241 | cost, ['sens_map', 'reg_mos']) 242 | return cost, updates, records 243 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/models/FR_sens_1.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | 5 | import numpy as np 6 | import theano 7 | import theano.tensor as T 8 | from theano.tensor.nnet import conv2d 9 | 10 | from ..laplacian_pyr import downsample_img, normalize_lowpass_subt 11 | from ..layers import layers 12 | from .model_basis import ModelBasis 13 | from .model_record import Record 14 | 15 | 16 | class Model(ModelBasis): 17 | def __init__(self, model_config, rng=None): 18 | super(Model, self).__init__(model_config, rng) 19 | self.set_configs(model_config) 20 | 21 | print('\nDeep FR-IQA main ver.1.0') 22 | print(' - Model file: %s' % (os.path.split(__file__)[1])) 23 | print(' - Ignore border: %d' % (self.ign)) 24 | print(' - Loss weights: sens=%.2e' % (self.wl_subj)) 25 | print(' - Regul. weights: L2=%.2e, TV=%.2e' % ( 26 | self.wr_l2, self.wr_tv)) 27 | 28 | self.init_model() 29 | 30 | def set_configs(self, model_config): 31 | self.set_opt_configs(model_config) 32 | self.wl_subj = float(model_config.get('wl_subj', 1e3)) 33 | self.wr_l2 = float(model_config.get('wr_l2', 5e-3)) 34 | self.wr_tv = float(model_config.get('wr_tv', 1e-2)) 35 | self.ign = int(model_config.get('ign', 4)) 36 | 37 | def init_model(self): 38 | print('\n - Sensitivity map encoder layers') 39 | key = 'sens_map' 40 | self.layers[key] = [] 41 | 42 | self.layers[key].append(layers.ConvLayer( 43 | self.input_shape, 32, (3, 3), layers.lrelu, name=key + '/conv1_1')) 44 | 45 | self.layers[key].append(layers.ConvLayer( 46 | self.last_sh(key), 32, (3, 3), layers.lrelu, name=key + '/conv2_1', 47 | subsample=(2, 2))) 48 | 49 | ####################################################################### 50 | 51 | self.layers[key].append(layers.ConvLayer( 52 | self.input_shape, 32, (3, 3), layers.lrelu, name=key + '/conv1_2')) 53 | 54 | self.layers[key].append(layers.ConvLayer( 55 | self.last_sh(key), 32, (3, 3), layers.lrelu, name=key + '/conv2_2', 56 | subsample=(2, 2))) 57 | 58 | ####################################################################### 59 | prev_sh = self.last_sh(key) 60 | concat_sh = (prev_sh[0], prev_sh[1] * 2) + prev_sh[2:] 61 | 62 | self.layers[key].append(layers.ConvLayer( 63 | concat_sh, 64, (3, 3), layers.lrelu, name=key + '/conv3')) 64 | 65 | self.layers[key].append(layers.ConvLayer( 66 | self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv4', 67 | subsample=(2, 2))) 68 | 69 | self.layers[key].append(layers.ConvLayer( 70 | self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv5')) 71 | 72 | self.layers[key].append(layers.ConvLayer( 73 | self.last_sh(key), self.num_ch, (3, 3), T.nnet.relu, 74 | b=np.ones((self.num_ch,), dtype='float32'), name=key + '/conv6')) 75 | 76 | ####################################################################### 77 | print('\n - Regression mos layers') 78 | key = 'reg_mos' 79 | self.layers[key] = [] 80 | 81 | self.layers[key].append(layers.FCLayer( 82 | self.num_ch, 4, layers.lrelu, name=key + '/fc1')) 83 | 84 | self.layers[key].append(layers.FCLayer( 85 | self.last_sh(key), 1, T.nnet.relu, name=key + '/fc2' 86 | )) 87 | 88 | ####################################################################### 89 | # Sobel filters 90 | sobel_y_val = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], 91 | dtype='float32').reshape((1, 1, 3, 3)) 92 | self.sobel_y = theano.shared(sobel_y_val, borrow=True) 93 | 94 | sobel_x_val = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], 95 | dtype='float32').reshape((1, 1, 3, 3)) 96 | self.sobel_x = theano.shared(sobel_x_val, borrow=True) 97 | 98 | ####################################################################### 99 | 100 | super(Model, self).make_param_list() 101 | super(Model, self).show_num_params() 102 | 103 | def sobel(self, x, n_ch=1): 104 | """Apply Sobel operators and returns results in x and y directions""" 105 | if n_ch > 1: 106 | y_grads = [] 107 | x_grads = [] 108 | for ch in range(n_ch): 109 | cur_in = x[:, ch, :, :].dimshuffle(0, 'x', 1, 2) 110 | y_grads.append(conv2d(cur_in, self.sobel_y, 111 | filter_shape=(1, 1, 3, 3))) 112 | x_grads.append(conv2d(cur_in, self.sobel_x, 113 | filter_shape=(1, 1, 3, 3))) 114 | y_grad = T.concatenate(y_grads, axis=1) 115 | x_grad = T.concatenate(x_grads, axis=1) 116 | else: 117 | y_grad = conv2d(x, self.sobel_y, filter_shape=(1, 1, 3, 3)) 118 | x_grad = conv2d(x, self.sobel_x, filter_shape=(1, 1, 3, 3)) 119 | return y_grad, x_grad 120 | 121 | def get_total_variation(self, x, beta=1.5): 122 | """ 123 | Calculate total variation of the input. 124 | Arguments 125 | x: 4D tensor image. It must have 1 channel feauture 126 | """ 127 | y_grad, x_grad = self.sobel(x, self.num_ch) 128 | tv = T.mean((y_grad ** 2 + x_grad ** 2) ** (beta / 2)) 129 | return tv 130 | 131 | def log_diff_fn(self, in_a, in_b, eps=0.1): 132 | diff = 255.0 * (in_a - in_b) 133 | log_255_sq = np.float32(2 * np.log(255.0)) 134 | 135 | val = log_255_sq - T.log(diff ** 2 + eps) 136 | max_val = np.float32(log_255_sq - np.log(eps)) 137 | return val / max_val 138 | 139 | def power_diff_fn(self, in_a, in_b, power=0.2): 140 | diff = 255.0 * (in_a - in_b) 141 | 142 | val = T.abs_(diff) ** power 143 | max_val = np.float32(255.0 ** power) 144 | return val / max_val 145 | 146 | def sens_map_fn(self, x_c, err): 147 | layers = self.layers['sens_map'] 148 | # x_c 149 | prev_out = layers[0].get_output(x_c) 150 | x_c_out = layers[1].get_output(prev_out) 151 | 152 | # err 153 | prev_out = layers[2].get_output(err) 154 | err_out = layers[3].get_output(prev_out) 155 | 156 | prev_out = T.concatenate([x_c_out, err_out], axis=1) 157 | for layer in layers[4:]: 158 | prev_out = layer.get_output(prev_out) 159 | return prev_out 160 | 161 | def regress_mos_fn(self, feat_vec): 162 | return self.get_key_layers_output(feat_vec, 'reg_mos') 163 | 164 | def shave_border(self, feat_map): 165 | if self.ign > 0: 166 | return feat_map[:, :, self.ign:-self.ign, self.ign:-self.ign] 167 | else: 168 | return feat_map 169 | 170 | def cost_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None): 171 | """ 172 | Get cost: regression onto MOS using both ref. adn dis. images 173 | """ 174 | records = Record() 175 | # concatenate the image patches 176 | if bat2img_idx_set: 177 | # if dummy data with fixed size is given and current data is 178 | # overwritten on dummy data with size of n_patches, 179 | # pick current dataset with size of n_patches 180 | n_patches = bat2img_idx_set[T.shape(bat2img_idx_set)[0] - 1][1] 181 | x_set = x[:n_patches] 182 | x_c_set = x_c[:n_patches] 183 | else: 184 | # if input is current data 185 | x_set = x 186 | x_c_set = x_c 187 | 188 | # Input image vectors to 4D tensors 189 | x_im = self.image_vec_to_tensor(x_set) 190 | x_c_im = self.image_vec_to_tensor(x_c_set) 191 | x_c_norm = normalize_lowpass_subt(x_c_im, 3, self.num_ch) 192 | 193 | # Get error map 194 | e = self.log_diff_fn(x_im, x_c_im, 1.0) 195 | e_ds4 = downsample_img(downsample_img(e, self.num_ch), self.num_ch) 196 | 197 | # predict sensitivity map 198 | sens_map = self.sens_map_fn(x_c_norm, e) 199 | 200 | # predict the score 201 | pred_map = sens_map * e_ds4 202 | pred_crop = self.shave_border(pred_map) 203 | 204 | # make feature vector 205 | if bat2img_idx_set: 206 | # if patch based 207 | feat_vec_list = [] 208 | for idx in range(n_img): 209 | idx_from = bat2img_idx_set[idx][0] 210 | idx_to = bat2img_idx_set[idx][1] 211 | 212 | c_pred_crop = pred_crop[idx_from: idx_to] 213 | pred_mean = T.mean(c_pred_crop, axis=(0, 2, 3), keepdims=True) 214 | feat_vec_list.append(pred_mean) 215 | 216 | feat_vec = T.concatenate(feat_vec_list, axis=0).flatten(2) 217 | # feat_vec = T.stack(feat_vec_list) 218 | else: 219 | # if image based 220 | feat_vec = T.mean(pred_crop, axis=(2, 3)) 221 | 222 | # regress onto MOS 223 | mos_p = self.regress_mos_fn(feat_vec).flatten() 224 | 225 | ###################################################################### 226 | # MOS loss 227 | subj_loss = self.get_mse(mos_p, mos) 228 | 229 | # L2 regularization 230 | l2_reg = self.get_l2_regularization( 231 | ['sens_map', 'reg_mos'], mode='sum') 232 | 233 | # TV norm regularization 234 | tv = self.get_total_variation(sens_map, 3.0) 235 | 236 | # final cost 237 | cost = self.add_all_weighted_losses( 238 | [subj_loss, tv, l2_reg], 239 | [self.wl_subj, self.wr_tv, self.wr_l2]) 240 | 241 | # Data to record 242 | records.add_data('subj', subj_loss * self.wl_subj) 243 | records.add_data('tv', tv) 244 | 245 | records.add_im_data('mos_p', mos_p) 246 | records.add_im_data('mos_gt', mos) 247 | 248 | records.add_imgs('x_c', x_c_im, caxis=[-0.5, 0.5], scale=1.0) 249 | records.add_imgs('e_ds', e_ds4, caxis=[0, 1.0], scale=0.25) 250 | records.add_imgs('sens_map', sens_map, caxis=[0, 1.5], scale=0.25) 251 | records.add_imgs('pred_map', pred_map, caxis=[0, 1.5], scale=0.25) 252 | 253 | # records.add_kernel(self.layers['sens_map'], [0]) 254 | 255 | return cost, records 256 | 257 | def cost_updates_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None): 258 | cost, records = self.cost_iqa( 259 | x, x_c, mos, n_img=n_img, bat2img_idx_set=bat2img_idx_set) 260 | updates = self.get_updates_keys( 261 | cost, ['sens_map', 'reg_mos']) 262 | return cost, updates, records 263 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/optimizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | import theano 5 | import theano.tensor as T 6 | 7 | 8 | class Optimizer(object): 9 | def __init__(self, lr_init=1e-3): 10 | self.lr = theano.shared( 11 | np.asarray(lr_init, dtype=theano.config.floatX), borrow=True) 12 | 13 | def get_updates_cost(self, cost, params, scheme='nadam', lr_factors=None): 14 | if scheme == 'adagrad': 15 | updates = self.get_updates_adagrad(cost, params) 16 | elif scheme == 'adadelta': 17 | updates = self.get_updates_adadelta(cost, params) 18 | elif scheme == 'rmsprop': 19 | updates = self.get_updates_rmsprop(cost, params) 20 | elif scheme == 'adam': 21 | updates = self.get_updates_adam(cost, params, 22 | lr_factors=lr_factors) 23 | elif scheme == 'nadam': 24 | updates = self.get_updates_nadam(cost, params, 25 | lr_factors=lr_factors) 26 | elif scheme == 'sgd': 27 | # updates = self.get_updates_sgd_momentum(cost, params) 28 | updates = self.get_updates_sgd_momentum( 29 | cost, params, grad_clip=0.01) 30 | else: 31 | raise ValueError( 32 | 'Select the proper scheme (%s): ' % scheme, 33 | 'adagrad / adadelta / rmsprop / adam / nadam / sgd') 34 | 35 | return updates 36 | 37 | def get_updates_adagrad(self, cost, params, eps=1e-8): 38 | lr = self.lr 39 | print(' - Adagrad: lr = %.2e' % (lr.get_value(borrow=True))) 40 | 41 | grads = T.grad(cost, params) 42 | updates = [] 43 | 44 | for p, g in zip(params, grads): 45 | value = p.get_value(borrow=True) 46 | accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), 47 | broadcastable=p.broadcastable) 48 | accu_new = accu + g ** 2 49 | new_p = p - (lr * g / T.sqrt(accu_new + eps)) 50 | 51 | updates.append((accu, accu_new)) 52 | updates.append((p, new_p)) 53 | 54 | return updates 55 | 56 | def get_updates_adadelta(self, cost, params, rho=0.95, eps=1e-6): 57 | lr = self.lr 58 | print(' - Adadelta: lr = %.2e' % (lr.get_value(borrow=True))) 59 | one = T.constant(1.) 60 | 61 | grads = T.grad(cost, params) 62 | updates = [] 63 | 64 | for p, g in zip(params, grads): 65 | value = p.get_value(borrow=True) 66 | # accu: accumulate gradient magnitudes 67 | accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), 68 | broadcastable=p.broadcastable) 69 | # delta_accu: accumulate update magnitudes (recursively!) 70 | delta_accu = theano.shared( 71 | np.zeros(value.shape, dtype=value.dtype), 72 | broadcastable=p.broadcastable) 73 | 74 | # update accu (as in rmsprop) 75 | accu_new = rho * accu + (one - rho) * g ** 2 76 | updates.append((accu, accu_new)) 77 | 78 | # compute parameter update, using the 'old' delta_accu 79 | update = (g * T.sqrt(delta_accu + eps) / 80 | T.sqrt(accu_new + eps)) 81 | new_param = p - lr * update 82 | updates.append((p, new_param)) 83 | 84 | # update delta_accu (as accu, but accumulating updates) 85 | delta_accu_new = rho * delta_accu + (one - rho) * update ** 2 86 | updates.append((delta_accu, delta_accu_new)) 87 | 88 | return updates 89 | 90 | def get_updates_rmsprop(self, cost, params, rho=0.9, eps=1e-8): 91 | lr = self.lr 92 | print(' - RMSprop: lr = %.2e' % (lr.get_value(borrow=True))) 93 | one = T.constant(1.) 94 | 95 | grads = T.grad(cost=cost, wrt=params) 96 | 97 | updates = [] 98 | for p, g in zip(params, grads): 99 | value = p.get_value(borrow=True) 100 | accu = theano.shared(np.zeros(value.shape, dtype=value.dtype), 101 | broadcastable=p.broadcastable) 102 | accu_new = rho * accu + (one - rho) * g ** 2 103 | gradient_scaling = T.sqrt(accu_new + eps) 104 | g = g / gradient_scaling 105 | 106 | updates.append((accu, accu_new)) 107 | updates.append((p, p - lr * g)) 108 | 109 | return updates 110 | 111 | def get_updates_adam(self, cost, params, 112 | beta1=0.9, beta2=0.999, epsilon=1e-8, 113 | lr_factors=None): 114 | """ 115 | Adam optimizer. 116 | 117 | Parameters 118 | ---------- 119 | lr: float >= 0. Learning rate. 120 | beta1/beta2: floats, 0 < beta < 1. Generally close to 1. 121 | epsilon: float >= 0. 122 | 123 | References 124 | ---------- 125 | [1] Adam - A Method for Stochastic Optimization 126 | [2] Lasage: 127 | https://github.com/Lasagne/Lasagne/blob/master/lasagne/updates.py 128 | """ 129 | lr = self.lr 130 | print(' - Adam: lr = %.2e' % (lr.get_value(borrow=True))) 131 | 132 | one = T.constant(1.) 133 | self.iterations = theano.shared( 134 | np.asarray(0., dtype=theano.config.floatX), borrow=True) 135 | 136 | grads = T.grad(cost, params) 137 | updates = [(self.iterations, self.iterations + 1)] 138 | 139 | t = self.iterations + 1. 140 | lr_t = lr * (T.sqrt(one - beta2 ** t) / (one - beta1 ** t)) 141 | 142 | if not lr_factors: 143 | lr_factors = [1.0 for i in range(len(params))] 144 | for p, g, factor in zip(params, grads, lr_factors): 145 | p_val = p.get_value(borrow=True) 146 | m = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype), 147 | broadcastable=p.broadcastable) 148 | v = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype), 149 | broadcastable=p.broadcastable) 150 | 151 | m_t = (beta1 * m) + (one - beta1) * g 152 | v_t = (beta2 * v) + (one - beta2) * g ** 2 153 | p_t = p - lr_t * factor * m_t / (T.sqrt(v_t) + epsilon) 154 | 155 | updates.append((m, m_t)) 156 | updates.append((v, v_t)) 157 | updates.append((p, p_t)) 158 | 159 | return updates 160 | 161 | def get_updates_nadam(self, cost, params, 162 | beta1=0.9, beta2=0.999, 163 | epsilon=1e-8, schedule_decay=0.004, 164 | lr_factors=None): 165 | """ 166 | Nesterov Adam. 167 | Keras implementation. 168 | Much like Adam is essentially RMSprop with momentum, 169 | Nadam is Adam RMSprop with Nesterov momentum. 170 | 171 | Parameters 172 | ---------- 173 | lr: float >= 0. 174 | Learning rate. 175 | beta1: float 176 | beta2: float 177 | 0 < beta < 1. Generally close to 1. 178 | epsilon: float >= 0. 179 | 180 | References 181 | ---------- 182 | [1] Nadam report - http://cs229.stanford.edu/proj2015/054_report.pdf 183 | [2] On the importance of initialization and momentum in deep learning - 184 | http://www.cs.toronto.edu/~fritz/absps/momentum.pdf 185 | """ 186 | lr = self.lr 187 | print(' - Nesterov Adam: lr = %.2e' % (lr.get_value(borrow=True))) 188 | 189 | one = T.constant(1.) 190 | self.iterations = theano.shared( 191 | np.asarray(0., dtype=theano.config.floatX), borrow=True) 192 | self.m_schedule = theano.shared( 193 | np.asarray(1., dtype=theano.config.floatX), borrow=True) 194 | self.beta1 = theano.shared( 195 | np.asarray(beta1, dtype=theano.config.floatX), borrow=True) 196 | self.beta2 = theano.shared( 197 | np.asarray(beta2, dtype=theano.config.floatX), borrow=True) 198 | self.schedule_decay = schedule_decay 199 | 200 | grads = T.grad(cost, params) 201 | updates = [(self.iterations, self.iterations + 1)] 202 | 203 | t = self.iterations + 1. 204 | 205 | # Due to the recommendations in [2], i.e. warming momentum schedule 206 | momentum_cache_t = self.beta1 * ( 207 | one - 0.5 * (T.pow(0.96, t * self.schedule_decay))) 208 | momentum_cache_t_1 = self.beta1 * ( 209 | one - 0.5 * (T.pow(0.96, (t + 1.) * self.schedule_decay))) 210 | m_schedule_new = self.m_schedule * momentum_cache_t 211 | m_schedule_next = (self.m_schedule * momentum_cache_t * 212 | momentum_cache_t_1) 213 | updates.append((self.m_schedule, m_schedule_new)) 214 | 215 | if not lr_factors: 216 | lr_factors = [1.0 for i in range(len(params))] 217 | for p, g, factor in zip(params, grads, lr_factors): 218 | # print('@', p.name, '-', factor) 219 | p_val = p.get_value(borrow=True) 220 | m = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype), 221 | broadcastable=p.broadcastable) 222 | v = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype), 223 | broadcastable=p.broadcastable) 224 | 225 | # the following equations given in [1] 226 | g_prime = g / (one - m_schedule_new) 227 | m_t = self.beta1 * m + (one - self.beta1) * g 228 | m_t_prime = m_t / (one - m_schedule_next) 229 | v_t = self.beta2 * v + (one - self.beta2) * g ** 2 230 | v_t_prime = v_t / (one - T.pow(self.beta2, t)) 231 | m_t_bar = ((one - momentum_cache_t) * g_prime + 232 | momentum_cache_t_1 * m_t_prime) 233 | 234 | updates.append((m, m_t)) 235 | updates.append((v, v_t)) 236 | 237 | p_t = p - self.lr * factor * m_t_bar / (T.sqrt(v_t_prime) + epsilon) 238 | 239 | updates.append((p, p_t)) 240 | return updates 241 | 242 | def get_updates_sgd_momentum(self, cost, params, 243 | decay_mode=None, decay=0., 244 | momentum=0.9, nesterov=False, 245 | grad_clip=None, constant_clip=True): 246 | print(' - SGD: lr = %.2e' % (self.lr.get_value(borrow=True)), end='') 247 | print(', decay = %.2f' % (decay), end='') 248 | print(', momentum = %.2f' % (momentum), end='') 249 | print(', nesterov =', nesterov, end='') 250 | print(', grad_clip =', grad_clip) 251 | 252 | self.grad_clip = grad_clip 253 | self.constant_clip = constant_clip 254 | self.iterations = theano.shared( 255 | np.asarray(0., dtype=theano.config.floatX), borrow=True) 256 | 257 | # lr = self.lr_float 258 | lr = self.lr * (1.0 / (1.0 + decay * self.iterations)) 259 | # lr = self.lr * (decay ** T.floor(self.iterations / decay_step)) 260 | 261 | updates = [(self.iterations, self.iterations + 1.)] 262 | 263 | # Get gradients and apply clipping 264 | if self.grad_clip is None: 265 | grads = T.grad(cost, params) 266 | else: 267 | assert self.grad_clip > 0 268 | if self.constant_clip: 269 | # Constant clipping using theano.gradient.grad_clip 270 | clip = self.grad_clip 271 | grads = T.grad( 272 | theano.gradient.grad_clip(cost, -clip, clip), 273 | params) 274 | else: 275 | # Adaptive clipping 276 | clip = self.grad_clip / lr 277 | grads_ = T.grad(cost, params) 278 | grads = [T.clip(g, -clip, clip) for g in grads_] 279 | 280 | for p, g in zip(params, grads): 281 | # v_prev = theano.shared(p.get_value(borrow=True) * 0.) 282 | p_val = p.get_value(borrow=True) 283 | v_prev = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype), 284 | broadcastable=p.broadcastable) 285 | v = momentum * v_prev - lr * g 286 | updates.append((v_prev, v)) 287 | 288 | if nesterov: 289 | new_p = p + momentum * v - lr * g 290 | else: 291 | new_p = p + v 292 | 293 | updates.append((p, new_p)) 294 | return updates 295 | 296 | def set_learning_rate(self, lr): 297 | self.lr.set_value(np.asarray(lr, dtype=theano.config.floatX)) 298 | 299 | def mult_learning_rate(self, factor=0.5): 300 | new_lr = self.lr.get_value() * factor 301 | self.lr.set_value(np.asarray(new_lr, dtype=theano.config.floatX)) 302 | print(' * change learning rate to %.2e' % (new_lr)) 303 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/draw_graph.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | import matplotlib 5 | import matplotlib.pyplot as plt 6 | import re 7 | import glob 8 | import os 9 | import fnmatch 10 | import sys 11 | 12 | 13 | def read_parse_log(log_file): 14 | # Read log file 15 | print('Load data: %s' % log_file) 16 | with open(log_file, 'r') as l_file: 17 | lines = l_file.readlines() 18 | 19 | # Find the last starting line 20 | last_start = -1 21 | for idx, line in enumerate(lines): 22 | match_date = re.search(r'(\d+/\d+/\d+)', line) 23 | if match_date: 24 | last_start = idx 25 | if last_start < 0: 26 | print("Not proper file: %s." % log_file) 27 | print("Starting line must contain date 00/00/00.") 28 | return 29 | 30 | # Get time and date 31 | match_time = re.search(r'(\d+:\d+:\d+)', lines[last_start]) 32 | time_str = match_time.group() if match_time else "" 33 | match_date = re.search(r'(\d+/\d+/\d+)', lines[last_start]) 34 | date_str = match_date.group() if match_date else "" 35 | 36 | # Get labels of data 37 | labels = lines[last_start + 1].replace(', ', ' ').split() 38 | labels = ["epoch"] + labels 39 | 40 | # Load data 41 | n_label = len(labels) 42 | n_data = len(lines) - last_start - 2 43 | data = np.zeros((n_data, n_label), dtype=float) 44 | for row in range(n_data): 45 | line_idx = row + last_start + 2 46 | cur = lines[line_idx].replace(', ', ' ').split() 47 | if len(cur) != n_label: 48 | print("Not proper file: %s." % log_file) 49 | print("Data dimension (in line %d)" % (line_idx + 1), end=' ') 50 | print("doesn't match to the number of labels.") 51 | return 52 | for col in range(n_label): 53 | data[row, col] = float(cur[col]) 54 | 55 | return labels, data, time_str, date_str 56 | 57 | 58 | def draw_log(log_file, out_img_file=None): 59 | """ 60 | Read log_file and draw. 61 | """ 62 | #--------------------------------------------------------- 63 | # Read log file 64 | labels, data, time_str, date_str = read_parse_log(log_file) 65 | 66 | #--------------------------------------------------------- 67 | # Get tile shape ~ sqrt(number of figures) 68 | n_figure = len(labels) - 1 # except for the first column (epoch) 69 | draw_cc = labels[-1] == 'PLCC' and labels[-2] == 'SRCC' 70 | if draw_cc: # if draw_cc, the last figure contains both SRCC and PLCC 71 | n_figure = n_figure - 1 72 | tile_sh = int(np.ceil(np.sqrt(n_figure))) 73 | tile_sh = str(tile_sh) + str(tile_sh) 74 | 75 | #--------------------------------------------------------- 76 | # Draw graph 77 | style_1 = 'b-' 78 | style_2 = 'r-.' 79 | 80 | matplotlib.rcParams.update({'font.size': 8}) 81 | plt.figure() 82 | plt.suptitle(log_file + ' - ' + time_str + ' ' + date_str) 83 | for fig_idx in range(n_figure): 84 | if fig_idx == n_figure - 1 and draw_cc: 85 | # if draw_cc, the last figure contains both SRCC and PLCC 86 | break 87 | lab_idx = fig_idx + 1 88 | plt.subplot(tile_sh + str(fig_idx + 1)) 89 | plt.plot(data[:, 0], data[:, lab_idx]) 90 | plt.title(labels[lab_idx]) 91 | plt.grid(True) 92 | plt.xlim(1, data[-1, 0]) 93 | 94 | if draw_cc: 95 | plt.subplot(tile_sh + str(n_figure)) 96 | plt.plot(data[:, 0], data[:, -1], style_1, label='PLCC') 97 | plt.plot(data[:, 0], data[:, -2], style_2, label='SRCC') 98 | plt.legend(loc=0) 99 | plt.title('CC') 100 | plt.grid(True) 101 | plt.xlim(1, data[-1, 0]) 102 | 103 | plt.tight_layout() 104 | plt.subplots_adjust(top=0.9) 105 | if out_img_file: 106 | print(' - Save to image: %s' % out_img_file) 107 | plt.savefig(out_img_file, dpi=100) 108 | 109 | 110 | def draw_log_train_test(log_file_tr, log_file_te, out_img_file=None): 111 | """ 112 | Read log_file_tr and log_file_te and draw. 113 | """ 114 | #--------------------------------------------------------- 115 | # Read log files 116 | labels_tr, data_tr, time_str, date_str = read_parse_log(log_file_tr) 117 | labels_te, data_te, time_str_te, date_str_te = read_parse_log(log_file_te) 118 | 119 | assert time_str == time_str_te 120 | assert date_str == date_str_te 121 | assert (len(labels_tr) == len(labels_te) or 122 | len(labels_tr) == len(labels_te) - 2) 123 | for idx in range(len(labels_tr)): 124 | assert labels_tr[idx] == labels_te[idx] 125 | 126 | #--------------------------------------------------------- 127 | # Get tile shape ~ sqrt(number of figures) 128 | n_figure = len(labels_te) - 1 # except for the first column (epoch) 129 | draw_cc = labels_te[-1] == 'PLCC' and labels_te[-2] == 'SRCC' 130 | if draw_cc: # if draw_cc, the last figure contains both SRCC and PLCC 131 | n_figure = n_figure - 1 132 | tile_sh = int(np.ceil(np.sqrt(n_figure))) 133 | tile_sh = str(tile_sh) + str(tile_sh) 134 | 135 | #--------------------------------------------------------- 136 | # Draw graph 137 | style_1 = 'b-' 138 | style_2 = 'r-.' 139 | 140 | matplotlib.rcParams.update({'font.size': 8}) 141 | plt.figure() 142 | plt.suptitle(log_file_tr + ' - ' + time_str + ' ' + date_str) 143 | 144 | for fig_idx in range(n_figure): 145 | if fig_idx == n_figure - 1 and draw_cc: 146 | # if draw_cc, the last figure contains both SRCC and PLCC 147 | break 148 | lab_idx = fig_idx + 1 149 | plt.subplot(tile_sh + str(fig_idx + 1)) 150 | plt.plot(data_tr[:, 0], data_tr[:, lab_idx], style_1, label='train') 151 | plt.plot(data_te[:, 0], data_te[:, lab_idx], style_2, label='test') 152 | plt.legend(loc=0) 153 | plt.title(labels_te[lab_idx]) 154 | plt.grid(True) 155 | plt.xlim(1, data_te[-1, 0]) 156 | 157 | if draw_cc: 158 | plt.subplot(tile_sh + str(n_figure)) 159 | plt.plot(data_te[:, 0], data_te[:, -1], style_1, label='PLCC') 160 | plt.plot(data_te[:, 0], data_te[:, -2], style_2, label='SRCC') 161 | plt.legend(loc=0) 162 | plt.title('CC') 163 | plt.grid(True) 164 | plt.xlim(1, data_te[-1, 0]) 165 | 166 | plt.tight_layout() 167 | plt.subplots_adjust(top=0.9) 168 | if out_img_file: 169 | print(' - Save to image: %s' % out_img_file) 170 | plt.savefig(out_img_file, dpi=100) 171 | 172 | 173 | def draw_all_logs(root_path, keywords=['test'], show_figs=False): 174 | log_file_list = [] 175 | if sys.version_info >= (3, 5): 176 | for filename in glob.iglob(root_path + "**/*.txt", recursive=True): 177 | if any(word in filename for word in keywords): 178 | log_file_list.append(filename) 179 | else: 180 | for root, dirnames, filenames in os.walk(root_path): 181 | for filename in fnmatch.filter(filenames, '*.txt'): 182 | if any(word in filename for word in keywords): 183 | log_file_list.append(os.path.join(root, filename)) 184 | 185 | pass_list = [] 186 | for log_file in log_file_list: 187 | try: 188 | head, tail = os.path.split(log_file) 189 | prefix = head[len(root_path):] 190 | prefix = prefix.replace('\\', '_').replace('/', '_') 191 | draw_log(log_file, 192 | os.path.join(root_path, prefix + tail[:-4] + '.png')) 193 | except: 194 | pass_list.append(log_file) 195 | continue 196 | 197 | if pass_list: 198 | print(" @ Ignored file list:") 199 | for name in pass_list: 200 | print(" - %s\n" % name) 201 | 202 | if show_figs: 203 | plt.show() 204 | 205 | 206 | def draw_all_logs_train_test(root_path, show_figs=False): 207 | keywords = ['log_test'] 208 | tr_log_file_list = [] 209 | te_log_file_list = [] 210 | if sys.version_info >= (3, 5): 211 | for filename in glob.iglob(root_path + "**/*.txt", recursive=True): 212 | if any(word in filename for word in keywords): 213 | te_log_file_list.append(filename) 214 | tr_log_file_list.append(filename[:-9] + ".txt") 215 | else: 216 | for root, dirnames, filenames in os.walk(root_path): 217 | for filename in fnmatch.filter(filenames, '*.txt'): 218 | if any(word in filename for word in keywords): 219 | name = os.path.join(root, filename) 220 | te_log_file_list.append(name) 221 | tr_log_file_list.append(name[:-9] + ".txt") 222 | 223 | # idx = 1 224 | # for tr_log_file, te_log_file in zip(tr_log_file_list, te_log_file_list): 225 | # print("%3d: %s\n %s" % (idx, tr_log_file, te_log_file)) 226 | # idx += 1 227 | 228 | pass_list = [] 229 | for tr_log_file, te_log_file in zip(tr_log_file_list, te_log_file_list): 230 | try: 231 | head, tail = os.path.split(tr_log_file) 232 | prefix = head[len(root_path):] 233 | prefix = prefix.replace('\\', '_').replace('/', '_') 234 | draw_log_train_test( 235 | tr_log_file, te_log_file, 236 | os.path.join(root_path, prefix + tail[:-4] + '.png')) 237 | except: 238 | pass_list.append(tr_log_file) 239 | continue 240 | 241 | if pass_list: 242 | print(" @ Ignored file list:") 243 | for name in pass_list: 244 | print(" - %s\n" % name) 245 | 246 | if show_figs: 247 | plt.show() 248 | 249 | # def draw_log(log_file, out_img_file=None): 250 | # """ 251 | # Read log_file and draw. 252 | # """ 253 | # #--------------------------------------------------------- 254 | # # Read log file 255 | # labels, data, time_str, date_str = read_parse_log(log_file) 256 | 257 | # print('Load data: %s' % log_file) 258 | # with open(log_file, 'r') as l_file: 259 | # lines = l_file.readlines() 260 | 261 | # # Find the last starting line 262 | # last_start = -1 263 | # for idx, line in enumerate(lines): 264 | # match_date = re.search(r'(\d+/\d+/\d+)', line) 265 | # if match_date: 266 | # last_start = idx 267 | # if last_start < 0: 268 | # print("Not proper file: %s." % log_file) 269 | # print("Starting line must contain date 00/00/00.") 270 | # return 271 | 272 | # # Get time and date 273 | # match_time = re.search(r'(\d+:\d+:\d+)', lines[last_start]) 274 | # time_str = match_time.group() if match_time else "" 275 | # match_date = re.search(r'(\d+/\d+/\d+)', lines[last_start]) 276 | # date_str = match_date.group() if match_date else "" 277 | # gen_title = log_file + ' - ' + time_str + ' ' + date_str 278 | 279 | # # Get labels of data 280 | # labels = lines[last_start + 1].replace(', ', ' ').split() 281 | # labels = ["epoch"] + labels 282 | 283 | # # Load data 284 | # n_label = len(labels) 285 | # n_data = len(lines) - last_start - 2 286 | # data = np.zeros((n_data, n_label), dtype=float) 287 | # for row in range(n_data): 288 | # line_idx = row + last_start + 2 289 | # cur = lines[line_idx].replace(', ', ' ').split() 290 | # if len(cur) != n_label: 291 | # print("Not proper file: %s." % log_file) 292 | # print("Data dimension (in line %d)" % (line_idx + 1), end=' ') 293 | # print("doesn't match to the number of labels.") 294 | # return 295 | # for col in range(n_label): 296 | # data[row, col] = float(cur[col]) 297 | 298 | # #--------------------------------------------------------- 299 | # # Get tile shape ~ sqrt(number of figures) 300 | # n_figure = n_label - 1 301 | # draw_cc = labels[-1] == 'PLCC' and labels[-2] == 'SRCC' 302 | # if draw_cc: # if draw_cc, the last figure contains both SRCC and PLCC 303 | # n_figure = n_figure - 1 304 | # tile_sh = int(np.ceil(np.sqrt(n_figure))) 305 | # tile_sh = str(tile_sh) + str(tile_sh) 306 | 307 | # #--------------------------------------------------------- 308 | # # Draw graph 309 | # matplotlib.rcParams.update({'font.size': 8}) 310 | # plt.figure() 311 | # plt.suptitle(gen_title) 312 | # for fig_idx in range(n_figure): 313 | # if fig_idx == n_figure - 1 and draw_cc: 314 | # # if draw_cc, the last figure contains both SRCC and PLCC 315 | # break 316 | # plt.subplot(tile_sh + str(fig_idx + 1)) 317 | # lab_idx = fig_idx + 1 318 | # plt.plot(data[:, 0], data[:, lab_idx]) 319 | # plt.title(labels[lab_idx]) 320 | # plt.grid(True) 321 | # plt.xlim(1, data[-1, 0]) 322 | 323 | # if draw_cc: 324 | # plt.subplot(tile_sh + str(n_figure)) 325 | # plt.plot(data[:, 0], data[:, -1], 'b-x', label='PLCC') 326 | # plt.plot(data[:, 0], data[:, -2], 'r-.', label='SRCC') 327 | # plt.legend(loc=0) 328 | # plt.title('CC') 329 | # plt.grid(True) 330 | # plt.xlim(1, data[-1, 0]) 331 | 332 | # plt.tight_layout() 333 | # plt.subplots_adjust(top=0.9) 334 | # if out_img_file: 335 | # print(' - Save to image: %s' % out_img_file) 336 | # plt.savefig(out_img_file, dpi=100) 337 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/models/model_basis.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import pickle 4 | from collections import OrderedDict 5 | 6 | import numpy as np 7 | import theano.tensor as T 8 | from functools import reduce 9 | 10 | from .. import optimizer 11 | from ..layers import layers 12 | 13 | 14 | class ModelBasis(object): 15 | """ 16 | Arguments 17 | model_config: model configuration dictionary 18 | 19 | Attributes of model_config 20 | input_size: input image size, (height, width). 21 | num_ch: number of input channels 22 | lr: initial learning rate 23 | """ 24 | 25 | def __init__(self, model_config={}, rng=None): 26 | # Make input_shape 27 | self.input_size = tuple(model_config.get('input_size', None)) 28 | assert len(self.input_size) == 2 29 | self.num_ch = model_config.get('num_ch', None) 30 | assert self.num_ch is not None 31 | self.input_shape = (None, self.num_ch) + self.input_size 32 | 33 | # Get optimizer 34 | self.opt = optimizer.Optimizer() 35 | self.set_opt_configs(model_config) 36 | 37 | # Initialize variables 38 | self.layers = OrderedDict() 39 | self.params = OrderedDict() 40 | 41 | def set_opt_configs(self, model_config=None, opt_scheme=None, lr=None): 42 | if model_config is None: 43 | assert lr is not None and opt_scheme is not None 44 | else: 45 | lr = float(model_config.get('lr', 1e-3)) 46 | opt_scheme = model_config.get('opt_scheme', 'adam') 47 | self.lr = lr 48 | self.opt_scheme = opt_scheme 49 | self.opt.set_learning_rate(self.lr) 50 | 51 | ########################################################################### 52 | # Functions for cost calculation 53 | 54 | def get_l2_regularization(self, layer_keys=None, mode='sum', 55 | attr_list=['W', 'gamma']): 56 | if layer_keys is None: 57 | layer_keys = list(self.layers.keys()) 58 | l2 = [] 59 | if mode == 'sum': 60 | for key in layer_keys: 61 | for layer in self.layers[key]: 62 | for attr in attr_list: 63 | if hasattr(layer, attr): 64 | l2.append(T.sum(getattr(layer, attr) ** 2)) 65 | return T.sum(l2) 66 | elif mode == 'mean': 67 | for key in layer_keys: 68 | for layer in self.layers[key]: 69 | for attr in attr_list: 70 | if hasattr(layer, attr): 71 | l2.append(T.sum(getattr(layer, attr) ** 2)) 72 | return T.mean(l2) 73 | else: 74 | raise NotImplementedError 75 | 76 | def get_mse(self, x, y, return_map=False): 77 | if return_map: 78 | return (x - y) ** 2 79 | else: 80 | # return T.mean(((x - y) ** 2).flatten(2), axis=1) 81 | return T.mean((x - y) ** 2) 82 | 83 | def add_all_weighted_losses(self, losses, weights): 84 | """Add the losses with the weights multiplied. 85 | If the weight is 0, the corresponding loss is ignored. 86 | """ 87 | assert len(losses) == len(weights) 88 | loss_list = [] 89 | for loss, weight in zip(losses, weights): 90 | if weight != 0: 91 | loss_list.append(weight * loss) 92 | return reduce(lambda x, y: x + y, loss_list) 93 | 94 | ########################################################################### 95 | # Functions to help build layers 96 | 97 | def last_sh(self, key, nth=-1): 98 | """Get the `nth` output shape in the `key` layers 99 | """ 100 | assert len(self.layers[key]) > 0, "No layers in the key: %s" % key 101 | idx = len(self.layers[key]) + nth if nth < 0 else nth 102 | out_sh = None 103 | while out_sh is None: 104 | if idx < 0: 105 | out_sh = self.input_shape 106 | out_sh = self.layers[key][idx].get_out_shape() 107 | idx = idx - 1 108 | return out_sh 109 | 110 | def get_concat_shape(self, key0, key1): 111 | """Get the concatenated shape of the outputs of 112 | `key0` and `key1` layers 113 | """ 114 | prev_sh0 = self.last_sh(key0) 115 | prev_sh1 = self.last_sh(key1) 116 | if isinstance(prev_sh0, (list, tuple)): 117 | assert prev_sh0[0] == prev_sh1[0] 118 | assert prev_sh0[2:] == prev_sh1[2:] 119 | return (prev_sh0[0], prev_sh0[1] + prev_sh1[1]) + prev_sh0[2:] 120 | else: 121 | return prev_sh0 + prev_sh1 122 | 123 | def image_vec_to_tensor(self, input): 124 | """Reshape input into 4D tensor. 125 | """ 126 | # im_sh = (-1, self.input_size[0], 127 | # self.input_size[1], self.num_ch) 128 | # return input.reshape(im_sh).dimshuffle(0, 3, 1, 2) 129 | return input.dimshuffle(0, 3, 1, 2) 130 | 131 | 132 | ########################################################################### 133 | 134 | def get_key_layers_output(self, input, key, var_shape=False): 135 | """Put `input` to the `key` layers and return the final output. 136 | """ 137 | prev_out = input 138 | for layer in self.layers[key]: 139 | prev_out = layer.get_output(prev_out, var_shape=var_shape) 140 | return prev_out 141 | 142 | def get_updates(self, cost, wrt_params): 143 | return self.opt.get_updates_cost(cost, wrt_params, self.opt_scheme) 144 | 145 | def get_updates_keys(self, cost, keys=[], params=[], 146 | params_lr_factors=None): 147 | wrt_params = [] 148 | for key in keys: 149 | wrt_params += self.params[key] 150 | if params: 151 | wrt_params += params 152 | 153 | lr_factors = None 154 | if params_lr_factors: 155 | lr_factors = [] 156 | for key in keys: 157 | lr_factors += params_lr_factors[key] 158 | assert len(wrt_params) == len(lr_factors) 159 | 160 | # remove factors of 0 161 | new_wrt_params = [] 162 | new_lr_factors = [] 163 | for idx in range(len(wrt_params)): 164 | if lr_factors[idx] > 0.0: 165 | new_wrt_params.append(wrt_params[idx]) 166 | new_lr_factors.append(lr_factors[idx]) 167 | wrt_params = new_wrt_params 168 | lr_factors = new_lr_factors 169 | 170 | print(' - Update w.r.t.: %s' % ', '.join(keys)) 171 | return self.opt.get_updates_cost(cost, wrt_params, self.opt_scheme, 172 | lr_factors) 173 | 174 | ########################################################################### 175 | # Functions to control batch normalization and dropout layers 176 | 177 | def get_batch_norm_layers(self, keys=[]): 178 | # For the first call, generate bn_layers 179 | if not hasattr(self, 'bn_layers'): 180 | self.bn_layers = {} 181 | for key in list(self.layers.keys()): 182 | self.bn_layers[key] = [] 183 | for layer in self.layers[key]: 184 | if layer.__class__.__name__ == 'BatchNormLayer': 185 | self.bn_layers[key].append(layer) 186 | 187 | layers = [] 188 | for key in keys: 189 | layers += self.bn_layers[key] 190 | return layers 191 | 192 | def set_batch_norm_update_averages(self, update_averages, keys=[]): 193 | # if update_averages: 194 | # print(' - Batch norm: update the stored averages') 195 | # else: 196 | # print(' - Batch norm: not update the stored averages') 197 | layers = self.get_batch_norm_layers(keys) 198 | for layer in layers: 199 | layer.update_averages = update_averages 200 | 201 | def set_batch_norm_training(self, training, keys=[]): 202 | # if training: 203 | # print(' - Batch norm: use mini-batch statistics') 204 | # else: 205 | # print(' - Batch norm: use the stored statistics') 206 | layers = self.get_batch_norm_layers(keys) 207 | for layer in layers: 208 | layer.deterministic = not training 209 | 210 | def set_dropout_on(self, training): 211 | layers.DropoutLayer.set_dropout_training(training) 212 | 213 | def set_training_mode(self, training): 214 | """Decide the behavior of batch normalization and dropout. 215 | Parameters 216 | ---------- 217 | training: boolean 218 | if True, training mode / False: testing mode. 219 | """ 220 | # Decide behaviors of the model during training 221 | # Batch normalization 222 | l_keys = [key for key in list(self.layers.keys())] 223 | self.set_batch_norm_update_averages(training, l_keys) 224 | self.set_batch_norm_training(training, l_keys) 225 | 226 | # Dropout 227 | self.set_dropout_on(training) 228 | 229 | ########################################################################### 230 | # Functions to help deal with parameters of the model 231 | 232 | def make_param_list(self): 233 | """collect all the parameters from `self.layers` 234 | """ 235 | self.params, self.bn_layers = {}, {} 236 | 237 | for key in list(self.layers.keys()): 238 | self.params[key] = [] 239 | self.bn_layers[key] = [] 240 | for layer in self.layers[key]: 241 | if layer.get_params(): 242 | self.params[key] += layer.get_params() 243 | if layer.__class__.__name__ == 'BatchNormLayer': 244 | self.bn_layers[key].append(layer) 245 | 246 | def get_lr_factors_of_params(self, lr_factors_dict): 247 | """collect all the parameters from `self.layers` 248 | """ 249 | params_lr_factors = {} 250 | for key in list(self.layers.keys()): 251 | params_lr_factors[key] = [] 252 | for layer in self.layers[key]: 253 | for p in layer.get_params(): 254 | params_lr_factors[key].append( 255 | lr_factors_dict.get(layer.name, 1.0)) 256 | return params_lr_factors 257 | 258 | def show_num_params(self): 259 | """Dislay the number of parameters for each layer_key. 260 | """ 261 | paramscnt = {} 262 | for key in list(self.layers.keys()): 263 | paramscnt[key] = 0 264 | for p in self.params[key]: 265 | paramscnt[key] += np.prod(p.get_value(borrow=True).shape) 266 | if paramscnt[key] > 0: 267 | print(' - Num params %s:' % key, '{:,}'.format(paramscnt[key])) 268 | 269 | def get_params(self, layer_keys=None): 270 | """Get concatenated parameter list 271 | from layers belonging to layer_keys""" 272 | if layer_keys is None: 273 | layer_keys = list(self.layers.keys()) 274 | 275 | params = [] 276 | bn_mean_std = [] 277 | for key in layer_keys: 278 | params += self.params[key] 279 | 280 | for key in layer_keys: 281 | for layer in self.bn_layers[key]: 282 | bn_mean_std += layer.statistics 283 | params += bn_mean_std 284 | return params 285 | 286 | def save(self, filename): 287 | """Save parameters to file. 288 | """ 289 | params = self.get_params() 290 | with open(filename, 'wb') as f: 291 | pickle.dump(params, f, protocol=2) 292 | # pickle.dump(params, f, protocol=pickle.HIGHEST_PROTOCOL) 293 | print(' = Save params: %s' % (filename)) 294 | 295 | def load(self, filename): 296 | """Load parameters from file. 297 | """ 298 | params = self.get_params() 299 | with open(filename, 'rb') as f: 300 | newparams = pickle.load(f) 301 | 302 | assert len(newparams) == len(params) 303 | for p, new_p in zip(params, newparams): 304 | if p.name != new_p.name: 305 | print((' @ WARNING: Different name - (loaded) %s != %s' 306 | % (new_p.name, p.name))) 307 | new_p_sh = new_p.get_value(borrow=True).shape 308 | p_sh = p.get_value(borrow=True).shape 309 | if p_sh != new_p_sh: 310 | # print(new_p.name, p_sh, new_p_sh) 311 | print(' @ WARNING: Different shape %s - (loaded)' % new_p.name, 312 | new_p_sh, end='') 313 | print(' !=', p_sh) 314 | continue 315 | p.set_value(new_p.get_value()) 316 | print(' = Load all params: %s ' % (filename)) 317 | 318 | def load_params_keys(self, layer_keys, filename): 319 | """Load the selected parameters from file. 320 | Parameters from layers belong to layer_keys. 321 | """ 322 | print(' = Load params: %s (keys = %s)' % ( 323 | filename, ', '.join(layer_keys))) 324 | to_params = self.get_params(layer_keys) 325 | with open(filename, 'rb') as f: 326 | from_params = pickle.load(f) 327 | 328 | # Copy the params having same shape and name 329 | copied_idx = [] 330 | for fidx, f_param in enumerate(from_params): 331 | f_val = f_param.get_value(borrow=True) 332 | for tidx, t_param in enumerate(to_params): 333 | t_val = t_param.get_value(borrow=True) 334 | if f_val.shape == t_val.shape and f_param.name == t_param.name: 335 | t_param.set_value(f_val) 336 | del to_params[tidx] 337 | copied_idx.append(fidx) 338 | break 339 | # print(' = Copied from_param: ', [ 340 | # from_params[idx] for idx in copied_idx]) 341 | if to_params: 342 | print(' = Not existing to_param: ', to_params) 343 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/data_load/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Dataset(object): 5 | """ 6 | Dataset class containing images, scores, and supplementary information 7 | for image quality assessment. 8 | 9 | Attributes 10 | ---------- 11 | dis_data: 4D numpy array 12 | distorted image patches 13 | ref_data: 4D numpy array (optional) 14 | reference image patches 15 | dis2ref_idx: 1D numpy array (optional) 16 | index to ref. patcehs of dis. patches 17 | loc_data: 4D numpy array (optional) 18 | local quality scores 19 | score_data: 2D numpy array 20 | subjective score list 21 | npat_img_list: 2D numpy array 22 | number of patches of each image 23 | pat2img_idx_list: 2D numpy array 24 | start and end indices list of images 25 | """ 26 | def __init__(self): 27 | # Data 28 | self.dis_data = None # distorted image patches 29 | self.ref_data = None # reference image patches 30 | self.dis2ref_idx = None # index to ref. patcehs of dis. patches 31 | self.loc_data = None # local scoes 32 | self.score_data = None # subjective score list 33 | self.n_patches = 0 34 | 35 | # Data for image-wise training 36 | self.npat_img_list = None # number of patches of each image 37 | self.pat2img_idx_list = None # start and end indices of each image 38 | self.filt_idx_list = None # filtered indices list of each image 39 | self.n_images = 0 40 | 41 | # Configurations 42 | self.shuffle = False 43 | self.imagewise = False 44 | 45 | self.exist_ref = False 46 | self.exist_loc = False 47 | self.exist_score = False 48 | self.exist_npat = False 49 | self.exist_filt_idx = False 50 | 51 | # Variables 52 | self.epochs_completed = 0 53 | self.index_in_epoch = 0 54 | self.rand_imidx_list = None 55 | 56 | # Data configuration 57 | self.num_ch = None 58 | self.patch_size = None 59 | self.patch_step = None 60 | self.random_crops = None 61 | self.loc_size = None 62 | 63 | def set_patch_config(self, patch_step=None, random_crops=None): 64 | assert patch_step is not None and random_crops is not None 65 | if patch_step is not None: 66 | assert isinstance(patch_step, (list, tuple)) 67 | self.patch_step = patch_step 68 | self.random_crops = random_crops 69 | 70 | def put_data(self, dis_data, ref_data=[], 71 | dis2ref_idx=None, loc_data=[], 72 | score_data=None, npat_img_list=None, filt_idx_list=None, 73 | imagewise=True, shuffle=False): 74 | """Construct a Dataset. 75 | 76 | Parameters 77 | ---------- 78 | dis_data: 4D numpy array 79 | distorted image patches 80 | ref_data: 4D numpy array (optional) 81 | reference image patches 82 | dis2ref_idx: 1D numpy array (optional) 83 | index to ref. patcehs of dis. patches 84 | loc_data: 4D numpy array (optional) 85 | local quality scores 86 | npat_img_list: list 87 | number of patches of each image 88 | score_data: 1D numpy array 89 | subjective score of each image or patch 90 | imagewise: boolean 91 | if True, next_batch returns the grouped image patches 92 | using pat2img_idx_list 93 | shuffle: boolean 94 | if True, shuffle the dataset 95 | """ 96 | # dis_data 97 | self.dis_data = dis_data 98 | if isinstance(self.dis_data, list): 99 | self.dis_data = np.asarray(self.dis_data, 'float32') 100 | if len(self.dis_data[0].shape) < 3: 101 | self.dis_data = np.expand_dims(self.dis_data, 3) 102 | 103 | self.n_patches = self.dis_data.shape[0] 104 | self.patch_size = ( 105 | self.dis_data.shape[1], self.dis_data.shape[2]) 106 | self.num_ch = self.dis_data.shape[3] 107 | 108 | # ref_data 109 | if ref_data: 110 | self.exist_ref = True 111 | self.ref_data = ref_data 112 | if isinstance(self.ref_data, list): 113 | self.ref_data = np.asarray(self.ref_data, 'float32') 114 | assert len(self.dis_data[0].shape) == len(self.ref_data[0].shape) 115 | if len(self.ref_data[0].shape) < 3: 116 | self.ref_data = np.expand_dims(self.ref_data, 3) 117 | 118 | assert dis2ref_idx is not None 119 | self.dis2ref_idx = np.asarray(dis2ref_idx, 'int32') 120 | else: 121 | self.exist_ref = False 122 | 123 | # loc_data 124 | if loc_data: 125 | self.exist_loc = True 126 | self.loc_data = loc_data 127 | if isinstance(self.loc_data, list): 128 | self.loc_data = np.asarray(self.loc_data, 'float32') 129 | if len(self.loc_data[0].shape) < 3: 130 | self.loc_data = np.expand_dims(self.loc_data, 3) 131 | 132 | self.loc_size = ( 133 | self.loc_data.shape[1], self.loc_data.shape[2]) 134 | else: 135 | self.exist_loc = False 136 | 137 | # score_data 138 | if score_data is not None: 139 | self.exist_score = True 140 | self.score_data_org = np.asarray(score_data, 'float32') 141 | self.score_data = self.score_data_org.copy() 142 | self.n_images = self.score_data.shape[0] 143 | else: 144 | self.exist_score = False 145 | 146 | self.imagewise = imagewise 147 | 148 | # npat_img_list 149 | if npat_img_list is not None: 150 | self.exist_npat = True 151 | self.npat_img_list = np.asarray(npat_img_list, 'int32') 152 | if self.n_images == 0: 153 | self.n_images = self.npat_img_list.shape[0] 154 | self.pat2img_idx_list = self.gen_pat2img_idx_list() 155 | 156 | if not self.imagewise and self.exist_score: 157 | self.score_data = self.gen_patchwise_scores() 158 | else: 159 | self.exist_npat = False 160 | 161 | if self.n_images == 0: 162 | self.n_images = self.n_patches 163 | 164 | # filt_idx_list 165 | if filt_idx_list is not None: 166 | self.exist_filt_idx = True 167 | self.filt_idx_list = filt_idx_list 168 | else: 169 | self.exist_filt_idx = False 170 | 171 | self.n_data = self.n_images if self.imagewise else self.n_patches 172 | 173 | self.shuffle = shuffle 174 | if self.shuffle: 175 | self.rand_imidx_list = np.random.permutation(self.n_data) 176 | else: 177 | self.rand_imidx_list = np.arange(self.n_data) 178 | 179 | self.validate_datasize() 180 | 181 | def set_imagewise(self): 182 | """Set this Dataset for imagewise training and testing 183 | """ 184 | if self.imagewise is False: 185 | self.imagewise = True 186 | self.n_data = self.n_images 187 | if self.exist_score and self.exist_npat: 188 | self.score_data = self.score_data_org.copy() 189 | 190 | # Reset batch to generate prpoer rand_imidx_list 191 | self.reset_batch() 192 | 193 | def set_patchwise(self): 194 | """Set this Dataset for patchwise training and testing 195 | """ 196 | if self.imagewise is True: 197 | self.imagewise = False 198 | self.n_data = self.n_patches 199 | if self.exist_score and self.exist_npat: 200 | self.score_data = self.gen_patchwise_scores() 201 | 202 | # Reset batch to generate prpoer rand_imidx_list 203 | self.reset_batch() 204 | 205 | def validate_datasize(self): 206 | # if self.exist_ref: 207 | # assert self.n_patches == self.ref_data.shape[0], ( 208 | # 'dis_data.shape: %s ref_data.shape: %s' % ( 209 | # self.dis_data.shape, self.ref_data.shape)) 210 | 211 | if self.exist_loc: 212 | assert self.n_patches == self.loc_data.shape[0], ( 213 | 'dis_data.shape: %s loc_data.shape: %s' % ( 214 | self.dis_data.shape, self.loc_data.shape)) 215 | 216 | if self.exist_npat: 217 | # assert self.exist_score 218 | assert self.n_images == self.npat_img_list.shape[0], ( 219 | 'n_score_data: %d != n_npat_img_list: %d' % ( 220 | self.n_images, self.npat_img_list.shape[0])) 221 | 222 | # if self.imagewise: 223 | # assert self.exist_npat 224 | # else: 225 | # assert self.n_patches == self.score_data.shape[0], ( 226 | # 'n_patches: %d != n_score_data: %d' % 227 | # (self.n_patches, self.score_data.shape[0])) 228 | 229 | def gen_pat2img_idx_list(self): 230 | """ 231 | Generate pat2img_idx_list from npat_img_list 232 | """ 233 | pat2img_idx_list = np.zeros((self.n_patches, 2), dtype='int32') 234 | n_patches = 0 235 | for im_idx in range(self.n_images): 236 | (cur_npat, ny, nx) = self.npat_img_list[im_idx] 237 | pat2img_idx_list[im_idx] = [n_patches, n_patches + cur_npat] 238 | n_patches += cur_npat 239 | assert n_patches == self.n_patches, ( 240 | 'obtained n_patches(%d) ~= n_patches(%d)' % ( 241 | n_patches, self.n_patches)) 242 | 243 | return pat2img_idx_list 244 | 245 | def gen_patchwise_scores(self): 246 | """ 247 | Generate patch-wise training targets by expanding 248 | image-wise score_data using pat2img_idx_list 249 | """ 250 | new_scores = np.zeros(self.n_patches, dtype='float32') 251 | for im_idx in range(self.n_images): 252 | cur_idx_from, cur_idx_to = self.pat2img_idx_list[im_idx] 253 | new_scores[cur_idx_from:cur_idx_to] = self.score_data[im_idx] 254 | 255 | return new_scores 256 | 257 | def next_batch(self, batch_size): 258 | """ 259 | Return the next `batch_size` examples from this dataset. 260 | Parameters 261 | ---------- 262 | batch_size: integer 263 | number of images (imagewise) or patches (patchwise) of 264 | current batch 265 | 266 | Returns 267 | ------- 268 | A dictionary containing: 269 | - 'dis_data': 4D numpy array 270 | distorted image patches 271 | - 'ref_data': 4D numpy array (optional) 272 | reference image patches 273 | - 'loc_data': 4D numpy array (optional) 274 | local quality scores 275 | - 'score_data': 2D numpy array 276 | subjective score list 277 | - 'bat2img_idx_set': 2D numpy array (optional - imagewise) 278 | from and to indices of each image in the current batch 279 | - 'n_data': integer (optional - imagewise) 280 | number of patches in the current batch 281 | """ 282 | assert batch_size <= self.n_data 283 | 284 | start = self.index_in_epoch 285 | self.index_in_epoch += batch_size 286 | if self.index_in_epoch > self.n_data: 287 | # Finished epoch 288 | self.epochs_completed += 1 289 | 290 | # Shuffle the data 291 | if self.shuffle: 292 | self.rand_imidx_list = np.random.permutation(self.n_data) 293 | 294 | # Start next epoch 295 | start = 0 296 | self.index_in_epoch = batch_size 297 | end = self.index_in_epoch 298 | self.im_idx_list = self.rand_imidx_list[start:end] 299 | 300 | if self.imagewise: 301 | # If image-wise training 302 | # Get bat2img_idx_set and idx_set 303 | bat2img_idx_set = np.zeros((batch_size, 2), dtype='int32') 304 | score_set = np.zeros(batch_size, dtype='float32') 305 | idx_set_list = [] 306 | cur_inb_from = 0 307 | for in_bat_idx, im_idx in enumerate(self.im_idx_list): 308 | cur_idx_from, cur_idx_to = self.pat2img_idx_list[im_idx] 309 | idx_set_list.append( 310 | np.arange(cur_idx_from, cur_idx_to, dtype='int32')) 311 | cur_inb_to = cur_inb_from + (cur_idx_to - cur_idx_from) 312 | bat2img_idx_set[in_bat_idx] = [cur_inb_from, cur_inb_to] 313 | cur_inb_from = cur_inb_to 314 | if self.exist_score: 315 | score_set[in_bat_idx] = self.score_data[im_idx] 316 | idx_set = np.concatenate(idx_set_list) 317 | 318 | self.bat2img_idx_set = bat2img_idx_set 319 | 320 | res = { 321 | 'dis_data': self.dis_data[idx_set], 322 | 'bat2img_idx_set': bat2img_idx_set, 323 | 'n_data': cur_inb_to 324 | } 325 | if self.exist_score: 326 | res['score_set'] = score_set 327 | if self.exist_ref: 328 | res['ref_data'] = self.ref_data[self.dis2ref_idx[idx_set]] 329 | if self.exist_loc: 330 | res['loc_data'] = self.loc_data[idx_set] 331 | else: 332 | res = { 333 | 'dis_data': self.dis_data[self.im_idx_list] 334 | } 335 | if self.exist_score: 336 | res['score_set'] = self.score_data[self.im_idx_list] 337 | if self.exist_ref: 338 | res['ref_data'] = self.ref_data[ 339 | self.dis2ref_idx[self.im_idx_list]] 340 | if self.exist_loc: 341 | res['loc_data'] = self.loc_data[self.im_idx_list] 342 | 343 | return res 344 | 345 | def reset_batch(self): 346 | """ 347 | Make batch index in epoch 0, and shuffle data. 348 | """ 349 | self.epochs_completed = 0 350 | self.index_in_epoch = 0 351 | if self.shuffle: 352 | self.rand_imidx_list = np.random.permutation(self.n_data) 353 | else: 354 | self.rand_imidx_list = np.arange(self.n_data) 355 | 356 | def get_current_recon_info(self): 357 | """ 358 | Get information to reconstruct patches into an image. 359 | 360 | Returns 361 | ------- 362 | A dictionary containing: 363 | - 'npat_img_list': (N, 3) matrix 364 | where N is the number of images, and each row 365 | indicate each image. 366 | [number of patches, num of patches along y-axis, 367 | num of patches along x-axis]. 368 | - 'bat2img_idx_set': (N, 2) matrix 369 | where each row indicate each image. 370 | [from-index in current batch, to-index in current batch] 371 | - 'filt_idx_list' (optional): (N, 1) list 372 | where where each element has indices list of existing 373 | patches. 374 | """ 375 | assert self.imagewise 376 | assert self.index_in_epoch != 0 377 | 378 | res = { 379 | 'npat_img_list': self.npat_img_list[self.im_idx_list], 380 | 'bat2img_idx_set': self.bat2img_idx_set 381 | } 382 | if self.exist_filt_idx: 383 | res['filt_idx_list'] = [ 384 | self.filt_idx_list[idx] for idx in self.im_idx_list] 385 | else: 386 | res['filt_idx_list'] = None 387 | 388 | return res 389 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/layers/normalization.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | from theano.tensor.nnet import bn 8 | from .layers import Layer, linear 9 | 10 | 11 | class BatchNormLayer(Layer): 12 | """ 13 | Batch normalization layer. 14 | (theano.tensor.nnet.bn.batch_normalization_{train, test}) 15 | 16 | Parameters 17 | ---------- 18 | input_shape: int or a tuple of ints 19 | Input feature dimension or (batch_size, input feature dimension) 20 | activation: function 21 | Activation function. 22 | axes: {``'spatial'``, ``'per-activation'``} 23 | epsilon: float 24 | alpha: float 25 | """ 26 | layers = [] 27 | 28 | def __init__(self, input_shape, activation=linear, axis=1, axes='spatial', 29 | epsilon=1e-4, alpha=0.1, name=None): 30 | super(BatchNormLayer, self).__init__() 31 | 32 | self.input_shape = input_shape 33 | self.activation = activation 34 | self.axis = axis 35 | self.axes_org = axes 36 | self.epsilon = epsilon 37 | self.alpha = alpha 38 | self.name = 'BN' if name is None else name 39 | self.act_name = activation.__name__ 40 | self.deterministic = False 41 | 42 | shape = [input_shape[self.axis]] 43 | ndim = len(input_shape) 44 | if axes == 'per-activation': 45 | self.axes = (0,) 46 | elif axes == 'spatial': 47 | self.axes = (0,) + tuple(range(2, ndim)) 48 | self.non_bc_axes = tuple(i for i in range(ndim) if i not in self.axes) 49 | 50 | self.gamma = theano.shared(np.ones(shape, dtype=theano.config.floatX), 51 | name=name + '_G', borrow=True) 52 | self.beta = theano.shared(np.zeros(shape, dtype=theano.config.floatX), 53 | name=name + '_B', borrow=True) 54 | 55 | self.mean = theano.shared(np.zeros(shape, dtype=theano.config.floatX), 56 | name=name + '_mean', borrow=True) 57 | self.var = theano.shared(np.ones(shape, dtype=theano.config.floatX), 58 | name=name + '_var', borrow=True) 59 | 60 | self.params = [self.gamma, self.beta] 61 | self.statistics = [self.mean, self.var] 62 | BatchNormLayer.layers.append(self) 63 | 64 | # Show information 65 | print(' # %s (BN) ' % (self.name), end='') 66 | print('act.: %s,' % self.act_name) 67 | 68 | def get_output(self, input, **kwargs): 69 | # prepare dimshuffle pattern inserting broadcastable axes as needed 70 | param_axes = iter(list(range(input.ndim - len(self.axes)))) 71 | pattern = ['x' if input_axis in self.axes 72 | else next(param_axes) 73 | for input_axis in range(input.ndim)] 74 | 75 | # apply dimshuffle pattern to all parameters 76 | beta = self.beta.dimshuffle(pattern) 77 | gamma = self.gamma.dimshuffle(pattern) 78 | mean = self.mean.dimshuffle(pattern) 79 | var = self.var.dimshuffle(pattern) 80 | 81 | if not self.deterministic: 82 | normalized, _, _, mean_, var_ = bn.batch_normalization_train( 83 | input, gamma, beta, self.axes_org, 84 | self.epsilon, self.alpha, mean, var) 85 | 86 | # Update running mean and variance 87 | # Tricks adopted from Lasagne implementation 88 | # http://lasagne.readthedocs.io/en/latest/modules/layers/normalization.html 89 | running_mean = theano.clone(self.mean, share_inputs=False) 90 | running_var = theano.clone(self.var, share_inputs=False) 91 | running_mean.default_update = mean_.dimshuffle(self.non_bc_axes) 92 | running_var.default_update = var_.dimshuffle(self.non_bc_axes) 93 | self.mean += 0 * running_mean 94 | self.var += 0 * running_var 95 | else: 96 | normalized = bn.batch_normalization_test( 97 | input, gamma, beta, mean, var, self.axes_org, self.epsilon) 98 | # normalized, _, _, _, _ = bn.batch_normalization_train( 99 | # input, gamma, beta, self.axes_org, self.epsilon, 0, mean, var) 100 | # normalized = (input - mean) * (gamma / T.sqrt(var)) + beta 101 | 102 | return self.activation(normalized) 103 | 104 | def get_out_shape(self): 105 | return self.input_shape 106 | 107 | def reset_stats(self): 108 | # reset mean and var 109 | self.mean.set_value(np.zeros(self.mean.get_value().shape, 110 | dtype=theano.config.floatX)) 111 | self.var.set_value(np.ones(self.var.get_value().shape, 112 | dtype=theano.config.floatX)) 113 | 114 | def get_stats(self): 115 | return (self.mean, self.var) 116 | 117 | @staticmethod 118 | def set_batch_norms_training(training): 119 | deterministic = False if training else True 120 | print(' - Batch norm layres: deterministic =', deterministic) 121 | for layer in BatchNormLayer.layers: 122 | layer.deterministic = deterministic 123 | layer.update_averages = not deterministic 124 | 125 | @staticmethod 126 | def reset_batch_norms_stats(): 127 | print(' - Batch norm layres: reset mean and var') 128 | for layer in BatchNormLayer.layers: 129 | layer.reset_stats() 130 | 131 | 132 | class BatchNormLayer_old(Layer): 133 | """ 134 | Batch normalization layer 135 | (theano.tensor.nnet.bn.batch_normalization) 136 | """ 137 | layers = [] 138 | 139 | def __init__(self, input_shape, activation=linear, 140 | epsilon=1e-4, alpha=0.1, name=None): 141 | super(BatchNormLayer, self).__init__() 142 | 143 | if len(input_shape) == 2: 144 | self.axes = (0,) 145 | shape = [input_shape[0]] 146 | elif len(input_shape) == 4: 147 | self.axes = (0, 2, 3) 148 | shape = [input_shape[1]] 149 | else: 150 | raise NotImplementedError 151 | 152 | self.name = 'BN' if name is None else name 153 | self.epsilon = epsilon 154 | self.alpha = alpha 155 | self.deterministic = False 156 | self.update_averages = True 157 | self.activation = activation 158 | self.act_name = activation.__name__ 159 | self.input_shape = input_shape 160 | 161 | self.gamma = theano.shared(np.ones(shape, dtype=theano.config.floatX), 162 | name=name + '_G', borrow=True) 163 | self.beta = theano.shared(np.zeros(shape, dtype=theano.config.floatX), 164 | name=name + '_B', borrow=True) 165 | 166 | self.mean = theano.shared(np.zeros(shape, dtype=theano.config.floatX), 167 | name=name + '_mean', borrow=True) 168 | self.std = theano.shared(np.ones(shape, dtype=theano.config.floatX), 169 | name=name + '_std', borrow=True) 170 | 171 | self.params = [self.gamma, self.beta] 172 | self.statistics = [self.mean, self.std] 173 | BatchNormLayer.layers.append(self) 174 | 175 | # Show information 176 | print(' # %s (BN_T) ' % (self.name), end='') 177 | print('act.: %s,' % self.act_name) 178 | 179 | def get_output(self, input, **kwargs): 180 | input_mean = input.mean(self.axes) 181 | input_std = T.sqrt(input.var(self.axes) + self.epsilon) 182 | 183 | # Decide whether to use the stored averages or mini-batch statistics 184 | use_averages = self.deterministic 185 | if use_averages: 186 | mean = self.mean 187 | std = self.std 188 | else: 189 | mean = input_mean 190 | std = input_std 191 | 192 | # Decide whether to update the stored averages 193 | update_averages = self.update_averages and not use_averages 194 | if update_averages: 195 | # Trick: To update the stored statistics, we create memory-aliased 196 | # clones of the stored statistics: 197 | running_mean = theano.clone(self.mean, share_inputs=False) 198 | running_std = theano.clone(self.std, share_inputs=False) 199 | # set a default update for them: 200 | running_mean.default_update = ((1 - self.alpha) * running_mean + 201 | self.alpha * input_mean) 202 | running_std.default_update = ((1 - self.alpha) * running_std + 203 | self.alpha * input_std) 204 | # and make sure they end up in the graph without participating in 205 | # the computation (this way their default_update will be collected 206 | # and applied, but the computation will be optimized away): 207 | mean += 0 * running_mean 208 | std += 0 * running_std 209 | 210 | # prepare dimshuffle pattern inserting broadcastable axes as needed 211 | param_axes = iter(list(range(input.ndim - len(self.axes)))) 212 | pattern = ['x' if input_axis in self.axes 213 | else next(param_axes) 214 | for input_axis in range(input.ndim)] 215 | 216 | # apply dimshuffle pattern to all parameters 217 | beta = 0 if self.beta is None else self.beta.dimshuffle(pattern) 218 | gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern) 219 | mean = mean.dimshuffle(pattern) 220 | std = std.dimshuffle(pattern) 221 | 222 | # normalize 223 | normalized = bn.batch_normalization(input, gamma, beta, mean, std) 224 | return self.activation(normalized) 225 | 226 | def get_out_shape(self): 227 | return self.input_shape 228 | 229 | def reset_stats(self): 230 | # reset mean and std 231 | self.mean.set_value(np.zeros(self.mean.get_value().shape, 232 | dtype=theano.config.floatX)) 233 | self.std.set_value(np.ones(self.std.get_value().shape, 234 | dtype=theano.config.floatX)) 235 | 236 | def get_stats(self): 237 | return (self.mean, self.std) 238 | 239 | @staticmethod 240 | def set_batch_norms_training(training): 241 | deterministic = False if training else True 242 | print(' - Batch norm layres: deterministic =', deterministic) 243 | for layer in BatchNormLayer.layers: 244 | layer.deterministic = deterministic 245 | layer.update_averages = not deterministic 246 | 247 | @staticmethod 248 | def reset_batch_norms_stats(): 249 | print(' - Batch norm layres: reset mean and std') 250 | for layer in BatchNormLayer.layers: 251 | layer.reset_stats() 252 | 253 | 254 | class BatchNormLayer_L(Layer): 255 | """ 256 | Batch normalization layer. 257 | Core algorithm is brought from Lasagne. 258 | http://lasagne.readthedocs.io/en/latest/modules/layers/normalization.html 259 | """ 260 | layers = [] 261 | 262 | def __init__(self, input_shape, activation=linear, 263 | epsilon=1e-4, alpha=0.1, name=None): 264 | super(BatchNormLayer, self).__init__() 265 | 266 | if len(input_shape) == 2: 267 | self.axes = (0,) 268 | shape = [input_shape[0]] 269 | elif len(input_shape) == 4: 270 | self.axes = (0, 2, 3) 271 | shape = [input_shape[1]] 272 | else: 273 | raise NotImplementedError 274 | 275 | self.name = 'BN' if name is None else name 276 | self.epsilon = epsilon 277 | self.alpha = alpha 278 | self.deterministic = False 279 | self.update_averages = True 280 | self.activation = activation 281 | self.act_name = activation.__name__ 282 | self.input_shape = input_shape 283 | 284 | self.gamma = theano.shared(np.ones(shape, dtype=theano.config.floatX), 285 | name=name + '_G', borrow=True) 286 | self.beta = theano.shared(np.zeros(shape, dtype=theano.config.floatX), 287 | name=name + '_B', borrow=True) 288 | 289 | self.mean = theano.shared(np.zeros(shape, dtype=theano.config.floatX), 290 | name=name + '_mean', borrow=True) 291 | self.invstd = theano.shared(np.ones(shape, dtype=theano.config.floatX), 292 | name=name + '_invstd', borrow=True) 293 | 294 | self.params = [self.gamma, self.beta] 295 | self.statistics = [self.mean, self.invstd] 296 | BatchNormLayer.layers.append(self) 297 | 298 | # Show information 299 | print(' # %s (BN_L) ' % (self.name), end='') 300 | print('act.: %s,' % self.act_name) 301 | 302 | def get_output(self, input, **kwargs): 303 | input_mean = input.mean(self.axes) 304 | input_invstd = T.inv(T.sqrt(input.var(self.axes) + self.epsilon)) 305 | 306 | # Decide whether to use the stored averages or mini-batch statistics 307 | use_averages = self.deterministic 308 | if use_averages: 309 | mean = self.mean 310 | invstd = self.invstd 311 | else: 312 | mean = input_mean 313 | invstd = input_invstd 314 | 315 | # Decide whether to update the stored averages 316 | update_averages = self.update_averages and not use_averages 317 | if update_averages: 318 | # Trick: To update the stored statistics, we create memory-aliased 319 | # clones of the stored statistics: 320 | running_mean = theano.clone(self.mean, share_inputs=False) 321 | running_invstd = theano.clone(self.invstd, share_inputs=False) 322 | # set a default update for them: 323 | running_mean.default_update = ( 324 | (1 - self.alpha) * running_mean + self.alpha * input_mean) 325 | running_invstd.default_update = ( 326 | (1 - self.alpha) * running_invstd + self.alpha * input_invstd) 327 | # and make sure they end up in the graph without participating in 328 | # the computation (this way their default_update will be collected 329 | # and applied, but the computation will be optimized away): 330 | mean += 0 * running_mean 331 | invstd += 0 * running_invstd 332 | 333 | # prepare dimshuffle pattern inserting broadcastable axes as needed 334 | param_axes = iter(list(range(input.ndim - len(self.axes)))) 335 | pattern = ['x' if input_axis in self.axes 336 | else next(param_axes) 337 | for input_axis in range(input.ndim)] 338 | 339 | # apply dimshuffle pattern to all parameters 340 | beta = 0 if self.beta is None else self.beta.dimshuffle(pattern) 341 | gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern) 342 | mean = mean.dimshuffle(pattern) 343 | invstd = invstd.dimshuffle(pattern) 344 | 345 | # normalize 346 | normalized = (input - mean) * (gamma * invstd) + beta 347 | return self.activation(normalized) 348 | 349 | def get_out_shape(self): 350 | return self.input_shape 351 | 352 | def reset_stats(self): 353 | # reset mean and invstd 354 | self.mean.set_value(np.zeros(self.mean.get_value().shape, 355 | dtype=theano.config.floatX)) 356 | self.invstd.set_value(np.ones(self.invstd.get_value().shape, 357 | dtype=theano.config.floatX)) 358 | 359 | def get_stats(self): 360 | return (self.mean, self.invstd) 361 | 362 | @staticmethod 363 | def set_batch_norms_training(training): 364 | deterministic = False if training else True 365 | print(' - Batch norm layres: deterministic =', deterministic) 366 | for layer in BatchNormLayer.layers: 367 | layer.deterministic = deterministic 368 | layer.update_averages = not deterministic 369 | 370 | @staticmethod 371 | def reset_batch_norms_stats(): 372 | print(' - Batch norm layres: reset mean and invstd') 373 | for layer in BatchNormLayer.layers: 374 | layer.reset_stats() 375 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/utils.py: -------------------------------------------------------------------------------- 1 | """ This file contains different utility functions that are not connected 2 | in anyway to the networks presented in the tutorials, but rather help in 3 | processing the outputs into a more understandable way. 4 | 5 | For example ``tile_raster_images`` helps in generating a easy to grasp 6 | image from a set of samples or weights. 7 | """ 8 | from __future__ import absolute_import, division, print_function 9 | 10 | import numpy as np 11 | import PIL.Image as Image 12 | 13 | 14 | def scale_to_unit_interval(ndar, eps=1e-8): 15 | """ Scales all values in the ndarray ndar to be between 0 and 1 """ 16 | ndar = ndar.copy() 17 | ndar -= ndar.min() 18 | ndar *= 1.0 / (ndar.max() + eps) 19 | return ndar 20 | 21 | 22 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), 23 | scale_rows_to_unit_interval=True, 24 | output_pixel_vals=True): 25 | """ 26 | Transform an array with one flattened image per row, into an array in 27 | which images are reshaped and layed out like tiles on a floor. 28 | 29 | This function is useful for visualizing datasets whose rows are images, 30 | and also columns of matrices for transforming those rows 31 | (such as the first layer of a neural net). 32 | 33 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 34 | be 2-D ndarrays or None; 35 | :param X: a 2-D array in which every row is a flattened image. 36 | 37 | :type img_shape: tuple; (height, width) 38 | :param img_shape: the original shape of each image 39 | 40 | :type tile_shape: tuple; (rows, cols) 41 | :param tile_shape: the number of images to tile (rows, cols) 42 | 43 | :param output_pixel_vals: if output should be pixel values (i.e. int8 44 | values) or floats 45 | 46 | :param scale_rows_to_unit_interval: if the values need to be scaled before 47 | being plotted to [0,1] or not 48 | 49 | 50 | :returns: array suitable for viewing as an image. 51 | (See:`Image.fromarray`.) 52 | :rtype: a 2-d array with same dtype as X. 53 | 54 | """ 55 | 56 | assert len(img_shape) == 2 57 | assert len(tile_shape) == 2 58 | assert len(tile_spacing) == 2 59 | 60 | # The expression below can be re-written in a more C style as 61 | # follows : 62 | # 63 | # out_shape = [0,0] 64 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 65 | # tile_spacing[0] 66 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 67 | # tile_spacing[1] 68 | out_shape = [ 69 | (ishp + tsp) * tshp - tsp 70 | for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing) 71 | ] 72 | 73 | if isinstance(X, tuple): 74 | assert len(X) == 4 75 | # Create an output numpy ndarray to store the image 76 | if output_pixel_vals: 77 | out_array = np.zeros((out_shape[0], out_shape[1], 4), 78 | dtype='uint8') 79 | else: 80 | out_array = np.zeros((out_shape[0], out_shape[1], 4), 81 | dtype=X.dtype) 82 | 83 | # colors default to 0, alpha defaults to 1 (opaque) 84 | if output_pixel_vals: 85 | channel_defaults = [0, 0, 0, 255] 86 | else: 87 | channel_defaults = [0., 0., 0., 1.] 88 | 89 | for i in range(4): 90 | if X[i] is None: 91 | # if channel is None, fill it with zeros of the correct 92 | # dtype 93 | dt = out_array.dtype 94 | if output_pixel_vals: 95 | dt = 'uint8' 96 | out_array[:, :, i] = np.zeros( 97 | out_shape, 98 | dtype=dt 99 | ) + channel_defaults[i] 100 | else: 101 | # use a recurrent call to compute the channel and store it 102 | # in the output 103 | out_array[:, :, i] = tile_raster_images( 104 | X[i], img_shape, tile_shape, tile_spacing, 105 | scale_rows_to_unit_interval, output_pixel_vals) 106 | return out_array 107 | 108 | else: 109 | # if we are dealing with only one channel 110 | H, W = img_shape 111 | Hs, Ws = tile_spacing 112 | 113 | # generate a matrix to store the output 114 | dt = X.dtype 115 | if output_pixel_vals: 116 | dt = 'uint8' 117 | out_array = np.zeros(out_shape, dtype=dt) 118 | 119 | for tile_row in range(tile_shape[0]): 120 | for tile_col in range(tile_shape[1]): 121 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 122 | this_x = X[tile_row * tile_shape[1] + tile_col] 123 | if scale_rows_to_unit_interval: 124 | # if we should scale values to be between 0 and 1 125 | # do this by calling the `scale_to_unit_interval` 126 | # function 127 | this_img = scale_to_unit_interval( 128 | this_x.reshape(img_shape)) 129 | else: 130 | this_img = this_x.reshape(img_shape) 131 | # add the slice to the corresponding position in the 132 | # output array 133 | c = 1 134 | if output_pixel_vals: 135 | c = 255 136 | out_array[ 137 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 138 | tile_col * (W + Ws): tile_col * (W + Ws) + W 139 | ] = this_img * c 140 | return out_array 141 | 142 | 143 | def tile_tensor_array(X, tile_shape, img_shape=None, tile_spacing=(0, 0)): 144 | """ 145 | Transform an array with one flattened image per row, into an array in 146 | which images are reshaped and layed out like tiles on a floor. 147 | 148 | :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can 149 | be 2-D ndarrays or None; 150 | :param X: a 2-D array in which every row is a flattened image. 151 | 152 | :type img_shape: tuple; (height, width) 153 | :param img_shape: the original shape of each image 154 | 155 | :type tile_shape: tuple; (rows, cols) 156 | :param tile_shape: the number of images to tile (rows, cols) 157 | 158 | :returns: array suitable for viewing as an image. 159 | (See:`Image.fromarray`.) 160 | :rtype: a 2-d array with same dtype as X. 161 | 162 | """ 163 | 164 | assert len(tile_shape) == 2 165 | assert len(tile_spacing) == 2 166 | tile_shape = (int(tile_shape[0]), int(tile_shape[1])) 167 | tile_spacing = (int(tile_spacing[0]), int(tile_spacing[1])) 168 | if img_shape is None: 169 | img_shape = (int(X.shape[2]), int(X.shape[3])) 170 | else: 171 | img_shape = (int(img_shape[0]), int(img_shape[1])) 172 | 173 | # The expression below can be re-written in a more C style as 174 | # follows : 175 | # 176 | # out_shape = [0,0] 177 | # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - 178 | # tile_spacing[0] 179 | # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - 180 | # tile_spacing[1] 181 | out_shape = [ 182 | (ishp + tsp) * tshp - tsp 183 | for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing) 184 | ] 185 | 186 | if isinstance(X, tuple): 187 | assert len(X) == 4 188 | # Create an output np ndarray to store the image 189 | out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype) 190 | 191 | # colors default to 0, alpha defaults to 1 (opaque) 192 | channel_defaults = [0., 0., 0., 1.] 193 | 194 | for i in range(4): 195 | if X[i] is None: 196 | # if channel is None, fill it with zeros of the correct 197 | # dtype 198 | dt = out_array.dtype 199 | out_array[:, :, i] = (np.zeros(out_shape, dtype=dt) + 200 | channel_defaults[i]) 201 | else: 202 | # use a recurrent call to compute the channel and store it 203 | # in the output 204 | out_array[:, :, i] = tile_raster_images( 205 | X[i], tile_shape, img_shape, tile_spacing) 206 | return out_array 207 | 208 | else: 209 | # if we are dealing with only one channel 210 | H, W = img_shape 211 | Hs, Ws = tile_spacing 212 | 213 | # generate a matrix to store the output 214 | dt = X.dtype 215 | out_array = np.zeros(out_shape, dtype=dt) 216 | 217 | for tile_row in range(tile_shape[0]): 218 | for tile_col in range(tile_shape[1]): 219 | if tile_row * tile_shape[1] + tile_col < X.shape[0]: 220 | this_x = X[tile_row * tile_shape[1] + tile_col] 221 | this_img = this_x.reshape(img_shape) 222 | 223 | # add the slice to the corresponding position in the 224 | # output array 225 | if Hs >= 0 and Ws >= 0: 226 | out_array[ 227 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 228 | tile_col * (W + Ws): tile_col * (W + Ws) + W 229 | ] = this_img 230 | 231 | elif Hs < 0 and Ws < 0: 232 | u_tr = int((-Hs + 1) / 2) 233 | d_tr = int(-Hs / 2) 234 | l_tr = int((-Ws + 1) / 2) 235 | r_tr = int(-Ws / 2) 236 | if tile_row == 0: 237 | u_tr = 0 238 | if tile_row == tile_shape[0] - 1: 239 | d_tr = 0 240 | if tile_col == 0: 241 | l_tr = 0 242 | if tile_col == tile_shape[1] - 1: 243 | r_tr = 0 244 | out_array[ 245 | tile_row * (H + Hs) + u_tr: 246 | tile_row * (H + Hs) + H - d_tr, 247 | tile_col * (W + Ws) + l_tr: 248 | tile_col * (W + Ws) + W - r_tr 249 | ] = this_img[u_tr: H - d_tr, l_tr: W - r_tr] 250 | 251 | else: 252 | raise NotImplementedError() 253 | return out_array 254 | 255 | 256 | def tile_tensor4_from_list(X, tile_shape, idx_list=None, img_shape=None, 257 | tile_spacing=(0, 0), caxis=None, 258 | image_name=None): 259 | """ 260 | Generate tiled image array from 4D or 3D numpy array 261 | Parameter 262 | --------- 263 | X : 4D or 3D numpy array 264 | [batch, channel, height, width] or [batch, height, width] 265 | """ 266 | assert len(X.shape) in [3, 4] 267 | assert len(tile_shape) == 2 268 | assert len(tile_spacing) == 2 269 | tile_shape = (int(tile_shape[0]), int(tile_shape[1])) 270 | tile_spacing = (int(tile_spacing[0]), int(tile_spacing[1])) 271 | 272 | if idx_list is None: 273 | idx_list = range(tile_shape[0] * tile_shape[1]) 274 | else: 275 | assert np.max(idx_list) <= tile_shape[0] * tile_shape[1], \ 276 | 'max idx_list (%d) > number of tiles (%d)' % ( 277 | np.max(idx_list), tile_shape[0] * tile_shape[1]) 278 | 279 | # check image shape 280 | if img_shape is None: 281 | if len(X.shape) == 4: 282 | img_shape = (int(X.shape[2]), int(X.shape[3])) 283 | nch = int(X.shape[1]) 284 | elif len(X.shape) == 3: 285 | img_shape = (int(X.shape[1]), int(X.shape[2])) 286 | nch = 1 287 | else: 288 | raise NotImplementedError() 289 | else: 290 | img_shape = (int(img_shape[0]), int(img_shape[1])) 291 | nch = int(X.shape[1]) 292 | 293 | out_shape = [ 294 | (ishp + tsp) * tshp - tsp 295 | for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing) 296 | ] 297 | 298 | if caxis is not None: 299 | X = image_caxis(X, caxis) 300 | default_rgb = [255, 0, 0] 301 | else: 302 | default_rgb = [0, 0, 0] 303 | 304 | # Create an output np ndarray to store the image 305 | out_array = np.ones((out_shape[0], out_shape[1], 3), dtype=X.dtype) 306 | for ch in range(3): 307 | out_array[:, :, ch] = out_array[:, :, ch] * default_rgb[ch] 308 | 309 | H, W = img_shape 310 | Hs, Ws = tile_spacing 311 | 312 | if nch == 1: 313 | for idx, pat_idx in enumerate(idx_list): 314 | this_x = X[idx] 315 | this_img = this_x.reshape(img_shape) 316 | 317 | tile_row = int(pat_idx / tile_shape[1]) 318 | tile_col = pat_idx - tile_row * tile_shape[1] 319 | 320 | if Hs >= 0 and Ws >= 0: 321 | this_img_rgb = np.repeat( 322 | this_img[:, :, np.newaxis], 3, axis=2) 323 | out_array[ 324 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 325 | tile_col * (W + Ws): tile_col * (W + Ws) + W, 326 | :] = this_img_rgb 327 | 328 | elif Hs < 0 and Ws < 0: 329 | u_tr = int((-Hs + 1) / 2) 330 | d_tr = int(-Hs / 2) 331 | l_tr = int((-Ws + 1) / 2) 332 | r_tr = int(-Ws / 2) 333 | if tile_row == 0: 334 | u_tr = 0 335 | if tile_row == tile_shape[0] - 1: 336 | d_tr = 0 337 | if tile_col == 0: 338 | l_tr = 0 339 | if tile_col == tile_shape[1] - 1: 340 | r_tr = 0 341 | 342 | this_img_rgb = np.repeat( 343 | this_img[u_tr: H - d_tr, l_tr: W - r_tr, np.newaxis], 344 | 3, axis=2) 345 | out_array[ 346 | tile_row * (H + Hs) + u_tr: 347 | tile_row * (H + Hs) + H - d_tr, 348 | tile_col * (W + Ws) + l_tr: 349 | tile_col * (W + Ws) + W - r_tr, 350 | :] = this_img_rgb 351 | 352 | else: 353 | raise NotImplementedError() 354 | elif nch == 3: 355 | for idx, pat_idx in enumerate(idx_list): 356 | for ch in range(nch): 357 | this_x = X[idx, ch] 358 | this_img = this_x.reshape(img_shape) 359 | 360 | tile_row = int(pat_idx / tile_shape[1]) 361 | tile_col = pat_idx - tile_row * tile_shape[1] 362 | 363 | if Hs >= 0 and Ws >= 0: 364 | out_array[ 365 | tile_row * (H + Hs): tile_row * (H + Hs) + H, 366 | tile_col * (W + Ws): tile_col * (W + Ws) + W, 367 | ch] = this_img 368 | 369 | elif Hs < 0 and Ws < 0: 370 | u_tr = int((-Hs + 1) / 2) 371 | d_tr = int(-Hs / 2) 372 | l_tr = int((-Ws + 1) / 2) 373 | r_tr = int(-Ws / 2) 374 | if tile_row == 0: 375 | u_tr = 0 376 | if tile_row == tile_shape[0] - 1: 377 | d_tr = 0 378 | if tile_col == 0: 379 | l_tr = 0 380 | if tile_col == tile_shape[1] - 1: 381 | r_tr = 0 382 | 383 | out_array[ 384 | tile_row * (H + Hs) + u_tr: 385 | tile_row * (H + Hs) + H - d_tr, 386 | tile_col * (W + Ws) + l_tr: 387 | tile_col * (W + Ws) + W - r_tr, 388 | ch] = this_img[u_tr: H - d_tr, l_tr: W - r_tr] 389 | 390 | else: 391 | raise NotImplementedError() 392 | else: 393 | raise NotImplementedError() 394 | 395 | if image_name is not None: 396 | img = Image.fromarray(out_array.astype(np.uint8)) 397 | img.save(image_name) 398 | return img 399 | else: 400 | return out_array 401 | 402 | 403 | def image_from_nparray(np_arr_img, img_size=None, caxis='auto'): 404 | """ 405 | Convert numpy array to PIL image 406 | Parameter 407 | --------- 408 | np_arr_img : 3D or 2D or 1D (img_size must be given) numpy array 409 | [height, width, channel] or [height, width] or [height * width] 410 | """ 411 | # check img_size 412 | assert len(np_arr_img.shape) in [1, 2, 3] 413 | 414 | if len(np_arr_img.shape) == 1: 415 | assert img_size is not None 416 | if len(img_size) == 3: 417 | if img_size[2] == 1: 418 | # if gray 419 | img_ = np_arr_img.reshape((img_size[0], img_size[1])) 420 | else: 421 | # if RGB 422 | img_ = np_arr_img.reshape(img_size) 423 | elif len(img_size) == 2: 424 | if np_arr_img.shape[0] == np.product(img_size[:]): 425 | # if gray 426 | img_ = np_arr_img.reshape(img_size) 427 | elif np_arr_img.shape[0] == np.product(img_size[:]) * 3: 428 | # if RGB 429 | img_ = np_arr_img.reshape((img_size[0], img_size[1], 3)) 430 | else: 431 | raise ValueError( 432 | 'Wrong shape: np_array = {0} / target = {1}'.format( 433 | np_arr_img.shape, img_size)) 434 | else: 435 | raise ValueError('Wrong shape: {0}'.format(img_size)) 436 | elif len(np_arr_img.shape) == 2: 437 | # if gray 438 | img_ = np_arr_img 439 | else: 440 | if np_arr_img.shape[2] == 1: 441 | # if gray 442 | img_ = np_arr_img[:, :, 0] 443 | else: 444 | # if RGB 445 | assert np_arr_img.shape[2] == 3 446 | img_ = np_arr_img 447 | 448 | img_ = image_caxis(img_, caxis) 449 | img = Image.fromarray(img_.astype(np.uint8)) 450 | 451 | if img.mode != 'RGB': 452 | img = img.convert('RGB') 453 | return img 454 | 455 | 456 | def image_from_tensor(tensor_4d, caxis='auto'): 457 | # transpose into (row, column, channel) 458 | img_ = np.transpose(tensor_4d, axes=(1, 2, 0)) 459 | 460 | # if the image is gray, remove channel axis 461 | if img_.shape[2] == 1: 462 | img_ = img_.reshape(img_.shape[0], img_.shape[1]) 463 | 464 | img_ = image_caxis(img_, caxis) 465 | img = Image.fromarray(img_.astype(np.uint8)) 466 | 467 | if img.mode != 'RGB': 468 | img = img.convert('RGB') 469 | return img 470 | 471 | 472 | def image_caxis(img, caxis='auto'): 473 | if caxis is None or caxis == 'auto': 474 | min_val = img.min() 475 | max_val = img.max() + 1e-8 476 | else: 477 | assert len(caxis) == 2 478 | min_val = np.float(caxis[0]) 479 | max_val = np.float(caxis[1]) 480 | img = ((img - min_val) / (max_val - min_val) * 255.0).astype(img.dtype) 481 | img[img > 255.0] = 255.0 482 | img[img < 0.0] = 0.0 483 | 484 | return img 485 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/layers/layers.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | import theano 6 | import theano.tensor as T 7 | from theano.tensor.nnet import conv2d 8 | from theano.tensor.nnet.abstract_conv import conv2d_grad_wrt_inputs 9 | from theano.tensor.signal.pool import pool_2d 10 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams 11 | 12 | 13 | # Activation function wrappers 14 | def linear(x): 15 | return x 16 | 17 | 18 | def tanh(x): 19 | """ Hyperbolic tangent """ 20 | return T.tanh(x) 21 | 22 | 23 | def sigm(x): 24 | """ Sigmoid """ 25 | return T.nnet.sigmoid(x) 26 | 27 | 28 | def relu(x, alpha=0.0): 29 | """ Rectified linear unit """ 30 | return T.nnet.relu(x, alpha) 31 | 32 | 33 | def lrelu(x, alpha=0.1): 34 | """ Leaky ReLU """ 35 | return T.nnet.relu(x, alpha) 36 | 37 | 38 | def elu(x, alpha=1.0): 39 | """ Exponential LU """ 40 | return T.nnet.elu(x, alpha) 41 | 42 | 43 | ############################################################################## 44 | class Layer(object): 45 | """ 46 | Base class for layers 47 | """ 48 | # init_rng = np.random.RandomState(1235) 49 | init_rng = np.random.RandomState() 50 | 51 | def __init__(self): 52 | self.params = [] 53 | self.rng = Layer.init_rng 54 | 55 | def get_params(self): 56 | return self.params 57 | 58 | def get_output(self, input, **kwargs): 59 | raise NotImplementedError("get_output") 60 | 61 | def get_out_shape(self): 62 | return None 63 | 64 | def init_he(self, shape, activation, sampling='uniform', lrelu_alpha=0.1): 65 | # He et al. 2015 66 | if activation in [T.nnet.relu, relu, elu]: # relu or elu 67 | gain = np.sqrt(2) 68 | elif activation == lrelu: # lrelu 69 | gain = np.sqrt(2 / (1 + lrelu_alpha ** 2)) 70 | else: 71 | gain = 1.0 72 | 73 | # len(shape) == 2 -> fully-connected layers 74 | fan_in = shape[0] if len(shape) == 2 else np.prod(shape[1:]) 75 | 76 | if sampling == 'normal': 77 | std = gain * np.sqrt(1. / fan_in) 78 | return np.asarray(self.rng.normal(0., std, shape), 79 | dtype=theano.config.floatX) 80 | elif sampling == 'uniform': 81 | bound = gain * np.sqrt(3. / fan_in) 82 | return np.asarray(self.rng.uniform(-bound, bound, shape), 83 | dtype=theano.config.floatX) 84 | else: 85 | raise NotImplementedError 86 | 87 | 88 | ############################################################################## 89 | # Neural Network Layers 90 | class FCLayer(Layer): 91 | """ 92 | Fully connected layer. 93 | 94 | Parameters 95 | ---------- 96 | input_shape: int or a tuple of ints 97 | Input feature dimension or (batch_size, input feature dimension) 98 | n_out: int 99 | Output feature dimension. 100 | activation: function 101 | Activation function. 102 | W: tensor, numpy or None 103 | Filter weights. If this is not given, the weight is initialized by 104 | random values. 105 | b: tensor, numpy or None 106 | Biases. If this is not given, the weight is initialized by 107 | random values. 108 | """ 109 | def __init__(self, input_shape, n_out, activation=linear, 110 | W=None, b=None, no_bias=False, name=None): 111 | super(FCLayer, self).__init__() 112 | 113 | if isinstance(input_shape, (list, tuple)): 114 | self.input_shape = input_shape 115 | else: 116 | self.input_shape = (None, input_shape) 117 | self.n_in = self.input_shape[1] 118 | self.n_out = n_out 119 | self.activation = activation 120 | self.act_name = activation.__name__ 121 | self.no_bias = no_bias 122 | self.name = 'FC' if name is None else name 123 | 124 | self.params = [] 125 | if isinstance(W, T.sharedvar.TensorSharedVariable): 126 | self.W = W 127 | else: 128 | if W is None: 129 | W_values = self.init_he( 130 | (self.n_in, self.n_out), self.activation) 131 | else: 132 | W_values = W 133 | self.W = theano.shared(W_values, self.name + '_W', borrow=True) 134 | self.params += [self.W] 135 | 136 | if self.no_bias: 137 | self.b = None 138 | elif isinstance(b, T.sharedvar.TensorSharedVariable): 139 | self.b = b 140 | else: 141 | if b is None: 142 | b_values = np.zeros((n_out,), dtype=theano.config.floatX) 143 | else: 144 | b_values = b 145 | self.b = theano.shared(b_values, self.name + '_b', borrow=True) 146 | self.params += [self.b] 147 | 148 | # Show information 149 | print(' # %s (FC): in = %d -> out = %d,' % ( 150 | self.name, self.n_in, self.n_out), end=' ') 151 | print('act.: %s,' % self.act_name, end=' ') 152 | if self.no_bias: 153 | print('No bias') 154 | else: 155 | print('') 156 | 157 | def get_output(self, input, **kwargs): 158 | lin_output = T.dot(input, self.W) 159 | if not self.no_bias: 160 | lin_output += self.b 161 | return self.activation(lin_output) 162 | 163 | def get_out_shape(self): 164 | return (self.input_shape[0], self.n_out) 165 | 166 | 167 | class ConvLayer(Layer): 168 | """ 169 | Convolutional layer. 170 | 171 | Parameters 172 | ---------- 173 | input_shape: a tuple of ints 174 | (batch size, num input feature maps, image height, image width) 175 | num_filts int 176 | Number of output channels. 177 | filt_size: a tuple of ints 178 | (filter rows, filter columns) 179 | activation: function 180 | Activation function. 181 | mode: ``'half'``, ``'valid'`` or ``'full'`` 182 | Border mode of convolution. 183 | subsample: a tuple of ints (len = 2) 184 | Stide of convolution. 185 | filter_dilation: a tuple of ints (len = 2) 186 | Dilation of convolution. 187 | W: tensor, numpy or None 188 | Filter weights. If this is not given, the weight is initialized by 189 | random values. 190 | b: tensor, numpy or None 191 | Biases. If this is not given, the weight is initialized by 192 | random values. 193 | no_bias: bool 194 | If True, bias is not used in this layer. 195 | """ 196 | def __init__(self, input_shape, num_filts, filt_size, activation=linear, 197 | mode='half', subsample=(1, 1), filter_dilation=(1, 1), 198 | W=None, b=None, no_bias=False, name=None): 199 | super(ConvLayer, self).__init__() 200 | 201 | # Make filter shape 202 | assert len(filt_size) == 2 203 | filter_shape = (num_filts, input_shape[1]) + filt_size 204 | 205 | # Calculate output shape and validate 206 | if isinstance(mode, tuple): 207 | self.mode = mode 208 | self.out_size = [ 209 | input_shape[i] - filter_shape[i] + 2 * self.mode[i - 2] + 1 210 | for i in range(2, len(input_shape))] 211 | else: 212 | self.mode = mode.lower() 213 | if self.mode == 'valid': 214 | self.out_size = [input_shape[i] - filter_shape[i] + 1 215 | for i in range(2, len(input_shape))] 216 | elif self.mode == 'half': 217 | self.out_size = input_shape[2:] 218 | elif self.mode == 'full': 219 | self.out_size = [input_shape[i] - filter_shape[i] - 1 220 | for i in range(2, len(input_shape))] 221 | else: 222 | raise ValueError('Invalid mode: %s' % self.mode) 223 | self.out_size = tuple(self.out_size) 224 | for sz in self.out_size: 225 | if sz < 1: 226 | raise ValueError('Invalid feature size: (%s).' % 227 | ', '.join([str(i) for i in self.out_size])) 228 | 229 | self.filter_shape = filter_shape 230 | self.input_shape = input_shape 231 | self.activation = activation 232 | self.act_name = activation.__name__ 233 | self.no_bias = no_bias 234 | self.name = 'Conv' if name is None else name 235 | self.subsample = subsample 236 | self.filter_dilation = filter_dilation 237 | 238 | # Initialize parameters 239 | self.params = [] 240 | if isinstance(W, T.sharedvar.TensorSharedVariable): 241 | self.W = W 242 | else: 243 | if W is None: 244 | W_values = self.init_he(filter_shape, self.activation) 245 | else: 246 | W_values = W 247 | self.W = theano.shared(W_values, self.name + '_W', 248 | borrow=True) 249 | self.params += [self.W] 250 | 251 | if self.no_bias: 252 | self.b = None 253 | elif isinstance(b, T.sharedvar.TensorSharedVariable): 254 | self.b = b 255 | else: 256 | if b is None: 257 | b_values = np.zeros((filter_shape[0],), 258 | dtype=theano.config.floatX) 259 | else: 260 | b_values = b 261 | self.b = theano.shared(b_values, self.name + '_b', 262 | borrow=True) 263 | self.params += [self.b] 264 | 265 | # Show information 266 | print(' # %s (Conv-%s):' % (name, mode), end=' ') 267 | print('flt.(%s),' % ', '.join( 268 | [str(i) for i in self.filter_shape]), end=' ') 269 | print('in.(%s),' % ', '.join( 270 | [str(i) for i in self.input_shape[1:]]), end=' ') 271 | print('act.: %s,' % self.act_name, end=' ') 272 | if self.no_bias: 273 | print('No bias') 274 | else: 275 | print('') 276 | if self.subsample != (1, 1): 277 | print(' subsample (%s) -> (%s)' % ( 278 | ', '.join([str(i) for i in self.input_shape[1:]]), 279 | ', '.join([str(i) for i in self.get_out_shape()[1:]]))) 280 | 281 | def get_output(self, input, **kwargs): 282 | var_shape = kwargs.get('var_shape', False) 283 | 284 | lin_output = conv2d( 285 | input=input, 286 | filters=self.W, 287 | input_shape=None if var_shape else self.input_shape, 288 | filter_shape=self.filter_shape, 289 | border_mode=self.mode, 290 | subsample=self.subsample, 291 | filter_dilation=self.filter_dilation 292 | ) 293 | 294 | if not self.no_bias: 295 | lin_output += self.b.dimshuffle('x', 0, 'x', 'x') 296 | 297 | return self.activation(lin_output) 298 | 299 | def get_out_shape(self, after_ss=True): 300 | out_size = self.out_size 301 | if after_ss: 302 | out_size = [(out_size[i] + self.subsample[i] - 1) // 303 | self.subsample[i] for i in range(len(out_size))] 304 | 305 | return (self.input_shape[0], self.filter_shape[0]) + tuple(out_size) 306 | 307 | 308 | class ConvGradLayer(Layer): 309 | """ 310 | Transposed convolutional layer. 311 | 312 | Parameters 313 | ---------- 314 | out_shape: a tuple of ints 315 | (batch size, num output feature maps, image height, image width) 316 | num_in_feat: int 317 | Number input feature maps. 318 | filt_size: a tuple of ints 319 | (filter rows, filter columns) 320 | activation: function 321 | Activation function. 322 | mode: ``'half'``, ``'valid'`` or ``'full'`` 323 | Border mode of convolution in the forward path. 324 | subsample: a tuple of ints (len = 2) 325 | Stide of convolution in the forward path. 326 | filter_dilation: a tuple of ints (len = 2) 327 | Dilation of convolution in the forward path. 328 | W: tensor, numpy or None 329 | Filter weights. If this is not given, the weight is initialized by 330 | random values. 331 | b: tensor, numpy or None 332 | Biases. If this is not given, the weight is initialized by 333 | random values. 334 | no_bias: bool 335 | If True, bias is not used in this layer. 336 | """ 337 | def __init__(self, out_shape, num_in_feat, filt_size, activation=linear, 338 | mode='half', subsample=(1, 1), filter_dilation=(1, 1), 339 | W=None, b=None, no_bias=False, name=None): 340 | super(ConvGradLayer, self).__init__() 341 | 342 | # Make filter shape 343 | assert len(filt_size) == 2 344 | filter_shape = (out_shape[1], num_in_feat) + filt_size 345 | 346 | self.mode = mode.lower() 347 | self.filter_shape = filter_shape 348 | self.out_shape = out_shape 349 | self.activation = activation 350 | self.act_name = activation.__name__ 351 | self.no_bias = no_bias 352 | self.name = 'ConvGr' if name is None else name 353 | self.subsample = subsample 354 | self.filter_dilation = filter_dilation 355 | 356 | # Initialize parameters 357 | self.params = [] 358 | if isinstance(W, T.sharedvar.TensorSharedVariable): 359 | self.W = W 360 | else: 361 | if W is None: 362 | W_values = self.init_he(filter_shape, self.activation) 363 | else: 364 | W_values = W 365 | self.W = theano.shared(W_values, self.name + '_W', 366 | borrow=True) 367 | self.params += [self.W] 368 | 369 | if self.no_bias: 370 | self.b = None 371 | elif isinstance(b, T.sharedvar.TensorSharedVariable): 372 | self.b = b 373 | else: 374 | if b is None: 375 | b_values = np.zeros((filter_shape[0],), 376 | dtype=theano.config.floatX) 377 | else: 378 | b_values = b 379 | self.b = theano.shared(b_values, self.name + '_b', 380 | borrow=True) 381 | self.params += [self.b] 382 | 383 | # Show information 384 | print(' # %s (ConvGr-%s):' % (name, mode), end=' ') 385 | print('flt.(%s),' % ', '.join( 386 | [str(i) for i in self.filter_shape]), end=' ') 387 | print('out.(%s),' % ', '.join( 388 | [str(i) for i in self.out_shape[1:]]), end=' ') 389 | print('act.: %s,' % self.act_name, end=' ') 390 | if self.no_bias: 391 | print('No bias') 392 | else: 393 | print('') 394 | if self.subsample != (1, 1): 395 | print(' upsample -> (%s)' % ( 396 | ', '.join([str(i) for i in self.out_shape[1:]]))) 397 | 398 | def get_output(self, input, **kwargs): 399 | lin_output = conv2d_grad_wrt_inputs( 400 | output_grad=input, 401 | filters=self.W, 402 | input_shape=self.out_shape, 403 | filter_shape=self.filter_shape, 404 | border_mode=self.mode, 405 | subsample=self.subsample, 406 | # filter_flip=True, 407 | filter_dilation=self.filter_dilation 408 | ) 409 | if not self.no_bias: 410 | lin_output += self.b.dimshuffle('x', 0, 'x', 'x') 411 | 412 | return self.activation(lin_output) 413 | 414 | def get_out_shape(self, **kwargs): 415 | return self.out_shape 416 | 417 | 418 | class ActivationLayer(Layer): 419 | """ 420 | Activation layer (no weights and bias). 421 | 422 | Parameters 423 | ---------- 424 | activation: function 425 | Activation function. 426 | """ 427 | def __init__(self, activation=linear, name=None): 428 | super(ActivationLayer, self).__init__() 429 | 430 | self.activation = activation 431 | self.act_name = activation.__name__ 432 | self.name = 'Act' if name is None else name 433 | 434 | # Show information 435 | print(' # %s (Act.)' % (self.name), end=' ') 436 | print('act.: %s,' % self.act_name) 437 | 438 | def get_output(self, input, **kwargs): 439 | return self.activation(input) 440 | 441 | 442 | class BiasLayer(Layer): 443 | """ 444 | Bias layer (no weights). 445 | 446 | Parameters 447 | ---------- 448 | input_shape: int or a tuple of ints 449 | Input feature dimension or (batch_size, input feature dimension) 450 | axis: int 451 | Axis of input to add the bias. 452 | activation: function 453 | Activation function. 454 | b: tensor, numpy or None 455 | Biases. If this is not given, the weight is initialized by 456 | random values. 457 | """ 458 | def __init__(self, input_shape, axis=1, activation=linear, 459 | b=None, name=None): 460 | super(BiasLayer, self).__init__() 461 | 462 | self.input_shape = input_shape 463 | self.axis = axis 464 | self.activation = activation 465 | self.name = 'Bias' if name is None else name 466 | self.act_name = activation.__name__ 467 | 468 | if isinstance(input_shape, (list, tuple)): 469 | self.bias_sh = (input_shape[self.axis],) 470 | self.in_dim = len(input_shape) 471 | else: 472 | self.bias_sh = (input_shape,) 473 | self.in_dim = 2 474 | 475 | if isinstance(b, T.sharedvar.TensorSharedVariable): 476 | self.b = b 477 | else: 478 | if b is None: 479 | b_values = np.zeros(self.bias_sh, dtype=theano.config.floatX) 480 | else: 481 | b_values = b 482 | self.b = theano.shared(b_values, self.name + '_b', borrow=True) 483 | self.params += [self.b] 484 | 485 | # Show information 486 | print(' # %s (Bias)' % (self.name), end=' ') 487 | if self.in_dim > 2: 488 | print('in.(%s),' % ', '.join( 489 | [str(i) for i in self.input_shape[1:]]), end=' ') 490 | else: 491 | print('in.(%d),' % self.input_shape, end=' ') 492 | print('bias dim:%d,' % self.axis, end=' ') 493 | print('act.: %s,' % self.act_name) 494 | 495 | def get_output(self, input, **kwargs): 496 | if self.in_dim > 2: 497 | pattern = [0 if ii == self.axis else 'x' 498 | for ii in range(self.in_dim)] 499 | lin_output = input + self.b.dimshuffle(pattern) 500 | else: 501 | lin_output = input + self.b 502 | return self.activation(lin_output) 503 | 504 | def get_out_shape(self): 505 | return self.input_shape 506 | 507 | 508 | class TensorToVectorLayer(Layer): 509 | """ 510 | Converts 4D tensor to 2D tensor. 511 | 512 | Parameters 513 | ---------- 514 | input_shape: a tuple of ints 515 | (batch size, num input feature maps, image height, image width) 516 | """ 517 | def __init__(self, input_shape): 518 | super(TensorToVectorLayer, self).__init__() 519 | 520 | self.input_shape = input_shape 521 | print(' # tensor to vector: (%s) -> %d' % ( 522 | ', '.join([str(i) for i in self.input_shape[1:]]), 523 | np.prod(self.input_shape[1:]))) 524 | 525 | def get_output(self, input, **kwargs): 526 | return input.flatten(2) 527 | 528 | def get_out_shape(self): 529 | return (self.input_shape[0], np.prod(self.input_shape[1:])) 530 | 531 | 532 | class VectorToTensorLayer(Layer): 533 | """ 534 | Converts 2D tensor to 4D tensor. 535 | 536 | Parameters 537 | ---------- 538 | output_shape: a tuple of ints 539 | (batch size, num output feature maps, image height, image width) 540 | """ 541 | def __init__(self, output_shape): 542 | super(VectorToTensorLayer, self).__init__() 543 | 544 | self.output_shape = output_shape 545 | print(' # vector to tensor: (%s)' % ', '.join( 546 | [str(i) for i in self.output_shape[1:]])) 547 | 548 | def get_output(self, input, **kwargs): 549 | # output_shape = (T.shape(input)[0], ) + self.output_shape[1:] 550 | output_shape = (-1,) + self.output_shape[1:] 551 | return input.reshape(output_shape) 552 | 553 | def get_out_shape(self): 554 | return self.output_shape 555 | 556 | 557 | ############################################################################## 558 | 559 | 560 | class UpsampleLayer(Layer): 561 | """ 562 | Upscale the input by a specified factor. 563 | 564 | Parameters 565 | ---------- 566 | mode: {``'zero'``, ``'NN'``} 567 | Put zeros or nearest neigbor pixels between original pixels. 568 | """ 569 | def __init__(self, input_shape, us=(2, 2), out_shape=None, mode='zero'): 570 | super(UpsampleLayer, self).__init__() 571 | 572 | self.input_shape = input_shape 573 | self.us = us 574 | self.mode = mode 575 | self.out_shape = out_shape 576 | print(' # upsample-(%s)-%s (%s) -> (%s)' % ( 577 | mode, 578 | ', '.join([str(i) for i in self.us]), 579 | ', '.join([str(i) for i in self.input_shape[1:]]), 580 | ', '.join([str(i) for i in self.get_out_shape()[1:]]))) 581 | 582 | def get_output(self, input, **kwargs): 583 | us = self.us 584 | if self.mode == 'zero': 585 | sh = input.shape 586 | upsample = T.zeros((sh[0], sh[1], sh[2] * us[0], sh[3] * us[1]), 587 | dtype=input.dtype) 588 | out = T.set_subtensor(upsample[:, :, ::us[0], ::us[1]], input) 589 | 590 | elif self.mode == 'NN': 591 | out = input.repeat(us[0], axis=2).repeat(us[1], axis=3) 592 | 593 | else: 594 | raise ValueError('Select the proper mode: zero / NN') 595 | 596 | return out 597 | 598 | def get_out_shape(self): 599 | in_sh = self.input_shape 600 | out_len0 = in_sh[2] * self.us[0] 601 | out_len1 = in_sh[3] * self.us[1] 602 | return (in_sh[0], in_sh[1], out_len0, out_len1) 603 | 604 | 605 | class Pool2DLayer(Layer): 606 | """ 607 | Downscale the input by a specified factor. 608 | 609 | Parameters 610 | ---------- 611 | input_shape: a tuple of ints 612 | (batch size, num input feature maps, image height, image width) 613 | pool_size: tuple of length 2 or theano vector of ints of size 2. 614 | Factor by which to downscale (vertical ws, horizontal ws). 615 | (2,2) will halve the image in each dimension. 616 | pad: tuple of two ints - (pad_h, pad_w), 617 | pad zeros to extend beyond four borders of the images, 618 | pad_h is the size of the top and bottom margins, 619 | and pad_w is the size of the left and right margins. 620 | ignore_border: bool 621 | (default None, will print a warning and set to False) 622 | When True, (5,5) input with ds=(2,2) will generate a (2,2) output. 623 | (3,3) otherwise. 624 | mode: {``'max'``, ``'sum'``, ``'average_inc_pad'``, ``'average_exc_pad'``} 625 | """ 626 | def __init__(self, input_shape, pool_size, stride=None, pad=(0, 0), 627 | ignore_border=True, mode='max'): 628 | super(Pool2DLayer, self).__init__() 629 | 630 | self.input_shape = input_shape 631 | self.pool_size = pool_size 632 | 633 | if len(self.input_shape) != 4: 634 | raise ValueError("Tried to create a 2D pooling layer with " 635 | "input shape %r. Expected 4 input dimensions " 636 | "(batchsize, channels, 2 spatial dimensions)." 637 | % (self.input_shape,)) 638 | 639 | if stride is None: 640 | self.stride = self.pool_size 641 | else: 642 | self.stride = stride 643 | 644 | self.pad = pad 645 | 646 | self.ignore_border = ignore_border 647 | self.mode = mode 648 | print(' # Pool-%s (%s) -> (%s)' % ( 649 | mode, 650 | ', '.join([str(i) for i in self.input_shape[1:]]), 651 | ', '.join([str(i) for i in self.get_out_shape()[1:]]))) 652 | 653 | def get_output(self, input, **kwargs): 654 | pooled = pool_2d(input, 655 | ws=self.pool_size, 656 | stride=self.stride, 657 | ignore_border=self.ignore_border, 658 | pad=self.pad, 659 | mode=self.mode, 660 | ) 661 | return pooled 662 | 663 | def get_out_shape(self): 664 | output_shape = list(self.input_shape) # copy / convert to mutable list 665 | 666 | output_shape[2] = pool_output_length(self.input_shape[2], 667 | pool_size=self.pool_size[0], 668 | stride=self.stride[0], 669 | pad=self.pad[0], 670 | ignore_border=self.ignore_border) 671 | 672 | output_shape[3] = pool_output_length(self.input_shape[3], 673 | pool_size=self.pool_size[1], 674 | stride=self.stride[1], 675 | pad=self.pad[1], 676 | ignore_border=self.ignore_border) 677 | 678 | return tuple(output_shape) 679 | 680 | 681 | def pool_output_length(input_length, pool_size, stride, pad, ignore_border): 682 | if input_length is None or pool_size is None: 683 | return None 684 | 685 | if ignore_border: 686 | output_length = input_length + 2 * pad - pool_size + 1 687 | output_length = (output_length + stride - 1) // stride 688 | 689 | # output length calculation taken from: 690 | # https://github.com/Theano/Theano/blob/master/theano/tensor/signal/downsample.py 691 | else: 692 | assert pad == 0 693 | 694 | if stride >= pool_size: 695 | output_length = (input_length + stride - 1) // stride 696 | else: 697 | output_length = max( 698 | 0, (input_length - pool_size + stride - 1) // stride) + 1 699 | 700 | return output_length 701 | 702 | 703 | ############################################################################## 704 | # Dropout 705 | class DropoutLayer(Layer): 706 | """ 707 | Conducts Dropout. 708 | """ 709 | layers = [] 710 | 711 | def __init__(self, p=0.5, rescale=True): 712 | super(DropoutLayer, self).__init__() 713 | 714 | self._srng = RandomStreams(self.rng.randint(1, 2147462579)) 715 | self.p = p 716 | self.rescale = rescale 717 | self.deterministic = False 718 | DropoutLayer.layers.append(self) 719 | print(' # Dropout: p = %.2f' % (self.p)) 720 | 721 | def get_output(self, input, **kwargs): 722 | if self.deterministic or self.p == 0: 723 | return input 724 | else: 725 | # Using theano constant to prevent upcasting 726 | one = T.constant(1) 727 | retain_prob = one - self.p 728 | if self.rescale: 729 | input /= retain_prob 730 | 731 | return input * self._srng.binomial(input.shape, p=retain_prob, 732 | dtype=input.dtype) 733 | 734 | @staticmethod 735 | def set_dropout_training(training): 736 | deterministic = False if training else True 737 | # print(' - Dropout layres: deterministic =', deterministic) 738 | for layer in DropoutLayer.layers: 739 | layer.deterministic = deterministic 740 | -------------------------------------------------------------------------------- /IQA_DeepQA_FR_release/trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import sys 5 | import time 6 | import timeit 7 | import math 8 | 9 | import numpy as np 10 | import PIL.Image as Image 11 | from scipy.stats import spearmanr, pearsonr, kendalltau 12 | 13 | from .utils import tile_raster_images, image_from_nparray 14 | from .utils import tile_tensor4_from_list 15 | 16 | 17 | class Trainer(object): 18 | """ 19 | # Trainer classs managing training and testing routines. 20 | Trains & tests models over epochs, shows monitoring variables, 21 | save model snapshots, store data and images. 22 | 23 | Arguments 24 | --------- 25 | train_config: dictionary 26 | - 'batch_size': number of data in a batch 27 | - 'epochs': maximum number of epochs for training 28 | - 'test_freq': test_model the trained model every test_freq 29 | - 'save_freq': save data every save_freq 30 | - 'regular_snap_freq': save model snapshot every regular_snap_freq 31 | - 'n_imgs_to_record': number of images to record 32 | - 'prefix': prefix of filenames of recording data 33 | snap_path: string 34 | path to save snapshot file. 35 | output_path: string 36 | path to save output data. 37 | """ 38 | 39 | def __init__(self, train_config, snap_path=None, output_path=None): 40 | 41 | self.test_freq = train_config.get('test_freq', None) 42 | assert self.test_freq is not None 43 | self.save_freq = train_config.get('save_freq', None) 44 | if self.save_freq is None: 45 | self.save_freq = self.test_freq 46 | self.regular_snap_freq = train_config.get('regular_snap_freq', 40) 47 | self.n_imgs_to_record = train_config.get('n_imgs_to_record', 20) 48 | 49 | self.prefix = train_config.get('prefix', '') 50 | self.set_path(snap_path, output_path) 51 | 52 | def set_path(self, snap_path, output_path=None): 53 | if snap_path is not None: 54 | if not os.path.isdir(snap_path): 55 | os.makedirs(snap_path) 56 | 57 | if output_path is not None: 58 | if not os.path.isdir(output_path): 59 | os.makedirs(output_path) 60 | else: 61 | output_path = snap_path 62 | 63 | self.snap_path = snap_path 64 | self.output_path = output_path 65 | 66 | def training_routine(self, model, get_train_outputs, rec_train, 67 | get_test_outputs, rec_test, 68 | train_batch_size, test_batch_size, 69 | train_data, test_data, 70 | epochs, prefix2='', check_mos_corr=False): 71 | """ 72 | # Actual training routine. 73 | 74 | @type model: .models.model_basis.ModelBasis 75 | @type rec_train: .models.model_record.Record 76 | @type rec_test: .models.model_record.Record 77 | @type train_data: .data_load.dataset.Dataset 78 | @type test_data: .data_load.dataset.Dataset 79 | """ 80 | 81 | # check validity 82 | assert self.snap_path is not None 83 | 84 | # get numbers of training and Testing batches 85 | n_train_imgs = train_data.n_data 86 | n_test_imgs = test_data.n_data 87 | n_train_batches = int(n_train_imgs / train_batch_size) 88 | n_test_batches = int(n_test_imgs / test_batch_size) 89 | assert n_train_batches > 0, 'n_train_batches = %d' % (n_train_batches) 90 | assert n_test_batches > 0, 'n_test_batches = %d' % (n_test_batches) 91 | 92 | # check n_imgs_to_record 93 | n_valid_rec_batches = self.n_imgs_to_record // test_batch_size + 1 94 | if n_valid_rec_batches > n_test_batches: 95 | n_valid_rec_batches = n_test_batches 96 | 97 | if self.n_imgs_to_record < test_batch_size: 98 | n_imgs_to_record = self.n_imgs_to_record 99 | else: 100 | n_imgs_to_record = n_valid_rec_batches * test_batch_size 101 | 102 | # get numbers of data and images to monitor and write 103 | until_loss, until_im_info, until_img = rec_test.get_until_indices(1) 104 | 105 | # snapshot file names 106 | snapshot_file_latest = os.path.join( 107 | self.snap_path, self.prefix + prefix2 + 'snapshot_lastest.npy') 108 | snapshot_file_best = os.path.join( 109 | self.snap_path, self.prefix + prefix2 + 'snapshot_best.npy') 110 | snapshot_file_best_srcc = os.path.join( 111 | self.snap_path, self.prefix + prefix2 + 'snapshot_best_srcc.npy') 112 | snapshot_file_regular = os.path.join( 113 | self.snap_path, self.prefix + prefix2 + 'snapshot_{:03d}.npy') 114 | snapshot_file_fin = os.path.join( 115 | self.snap_path, self.prefix + prefix2 + 'snapshot.npy') 116 | 117 | # log file names 118 | log_file = os.path.join( 119 | self.snap_path, prefix2 + 'log.txt') 120 | log_test_file = os.path.join( 121 | self.output_path, prefix2 + 'log_test.txt') 122 | 123 | # Show information 124 | print('\nTrain', end='') 125 | if train_data.imagewise: 126 | print(' imagewise', end='') 127 | else: 128 | print(' patchwise', end='') 129 | print(' / Test', end='') 130 | if test_data.imagewise: 131 | print(' imagewise', end='') 132 | else: 133 | print(' patchwise', end='') 134 | print(' (%d epochs).' % (epochs)) 135 | print('Save a snapshot every %d epochs,' % self.save_freq, end='') 136 | print(' and test the model every %d epochs.' % self.test_freq) 137 | print(' - Regular snapshot: every %d epochs' % self.regular_snap_freq) 138 | print(' - Snapshot path: %s' % self.snap_path) 139 | print(' - Batch size: %d (train) / %d (test)' % ( 140 | train_batch_size, test_batch_size)) 141 | print(' - Training batches: %d (%d images)' % ( 142 | n_train_batches, n_train_imgs)) 143 | print(' - Testing batches: %d (%d images)' % ( 144 | n_test_batches, n_test_imgs), end='') 145 | print(' / Missed images: %d' % ( 146 | n_test_imgs - n_test_batches * test_batch_size)) 147 | print(' - Monitor data: %s' % (', '.join(rec_train.data_keys))) 148 | print(' - Monitor images: %s' % (', '.join(rec_test.data_keys))) 149 | print(' - Monitor im. data: %s' % (', '.join(rec_test.im_data_keys))) 150 | print(' - Num of rec. images: %d (%d x %d batches)' % ( 151 | n_imgs_to_record, test_batch_size, n_valid_rec_batches)) 152 | 153 | # get MOS list 154 | if check_mos_corr: 155 | # if check_mos_corr is true, the first value of 156 | # rec_im_data must be mos predicted. 157 | assert rec_test.im_data_keys[0] == 'mos_p' 158 | assert test_data.exist_score 159 | n_valid_test_imgs = n_test_batches * test_batch_size 160 | test_score_list = test_data.score_data[:n_valid_test_imgs] 161 | mos_p_list = np.zeros(n_valid_test_imgs, dtype='float32') 162 | print(' - Check SRCC/PLCC using %d images' % (n_valid_test_imgs)) 163 | 164 | start_time = timeit.default_timer() 165 | prev_time = start_time 166 | best_test_loss = np.inf 167 | 168 | # write current time in log file 169 | cur_time = 'Started at %s\n' % (time.strftime('%X %x')) 170 | key_str = 'cost, ' + ", ".join(rec_train.data_keys) + '\n' 171 | with open(log_file, 'a') as f_hist: 172 | f_hist.write(cur_time) 173 | f_hist.write(key_str) 174 | 175 | key_str = 'cost, ' + ", ".join(rec_train.data_keys) 176 | key_str += ', SRCC, PLCC\n' if check_mos_corr else '\n' 177 | with open(log_test_file, 'a') as f_hist: 178 | f_hist.write(cur_time) 179 | f_hist.write(key_str) 180 | 181 | # if check_mos_corr is True -> best_score_set = (SRCC, PLCC) 182 | # else -> best_score_set = (losses[0], losses[1]) 183 | best_score_set = (0., 0., -1) if check_mos_corr else (np.inf, 0., -1) 184 | 185 | ####################################################################### 186 | # go through training epochs 187 | for epoch in range(epochs): 188 | # train model 189 | losses = np.zeros(rec_train.num_data + 1, dtype='float32') 190 | for batch_idx in range(n_train_batches): 191 | # get training loss 192 | losses += get_train_outputs() 193 | losses /= n_train_batches 194 | 195 | # write log 196 | with open(log_file, 'a') as f_hist: 197 | data = '%d' % (epoch + 1) 198 | for idx in range(-1, rec_train.num_data): 199 | data += '\t%.6f' % (losses[idx + 1]) 200 | data += '\n' 201 | f_hist.write(data) 202 | 203 | # show information 204 | end_time = timeit.default_timer() 205 | pr_str = ' {:3d}, cost {:.3f}, '.format(epoch + 1, losses[0]) 206 | for idx, key in enumerate(rec_train.data_keys): 207 | pr_str += '{:s} {:.3f}, '.format(key, losses[idx + 1]) 208 | minutes, seconds = divmod(end_time - prev_time, 60) 209 | pr_str += 'time {:02.0f}:{:05.2f}\n'.format(minutes, seconds) 210 | sys.stdout.write(pr_str) 211 | sys.stdout.flush() 212 | prev_time = end_time 213 | 214 | # save snapshot every regular_snap_freq 215 | if (epoch + 1) % self.regular_snap_freq == 0: 216 | model.save(snapshot_file_regular.format(epoch + 1)) 217 | 218 | ################################################################## 219 | # test_model the trained model and save a snapshot 220 | # For every safe_freq and test_freq 221 | test_model = (epoch + 1) % self.test_freq == 0 222 | save_data = (epoch + 1) % self.save_freq == 0 223 | if test_model or save_data: 224 | if save_data: 225 | # make output folder 226 | numstr = '{:03d}'.format(epoch + 1) 227 | out_path = os.path.join( 228 | self.output_path, prefix2 + numstr + '/') 229 | if not os.path.isdir(out_path): 230 | os.makedirs(out_path) 231 | 232 | im_data = np.zeros( 233 | (rec_test.num_im_data, n_imgs_to_record), 234 | dtype='float32') 235 | 236 | losses = np.zeros(rec_test.num_data + 1, dtype='float32') 237 | for test_bat_idx in range(0, n_test_batches): 238 | # get testing loss 239 | outputs = get_test_outputs() 240 | losses += outputs[:until_loss] 241 | cur_im_data = outputs[until_loss:until_im_info] 242 | cur_images = outputs[until_im_info:until_img] 243 | 244 | # get predicted mos 245 | if check_mos_corr: 246 | mos_p = cur_im_data[0] 247 | idx_from = test_bat_idx * test_batch_size 248 | idx_to = (test_bat_idx + 1) * test_batch_size 249 | mos_p_list[idx_from:idx_to] = mos_p 250 | 251 | # write image data 252 | if (save_data and rec_test.num_im_data > 0 and 253 | test_bat_idx < n_valid_rec_batches): 254 | idx_from = test_bat_idx * test_batch_size 255 | idx_to = (test_bat_idx + 1) * test_batch_size 256 | im_data[:, idx_from:idx_to] = cur_im_data 257 | 258 | # write images 259 | if (save_data and rec_test.num_imgs > 0 and 260 | test_bat_idx < n_valid_rec_batches): 261 | if test_data.imagewise: 262 | # if imagewise is True, reconstructs a complete 263 | # image using the patches in the minibatch 264 | rec_info = test_data.get_current_recon_info() 265 | draw_tiled_images( 266 | cur_images, rec_test.rec_imgs, test_bat_idx, 267 | out_path, 268 | rec_info['bat2img_idx_set'], 269 | rec_info['npat_img_list'], 270 | rec_info['filt_idx_list'], 271 | test_data.patch_size, 272 | test_data.patch_step) 273 | else: 274 | draw_images( 275 | cur_images, rec_test.rec_imgs, test_bat_idx, 276 | test_batch_size, out_path) 277 | 278 | losses /= n_test_batches 279 | 280 | # get SRCC and PLCC 281 | if check_mos_corr: 282 | rho_s, _ = spearmanr(test_score_list, mos_p_list) 283 | rho_p, _ = pearsonr(test_score_list, mos_p_list) 284 | 285 | if math.isnan(rho_s) or math.isnan(rho_p): 286 | print('@ Stop iteration! (NaN)') 287 | best_score_set = (0, 0, epoch) 288 | break 289 | else: 290 | if rho_s > best_score_set[0]: 291 | best_score_set = (rho_s, rho_p, epoch) 292 | model.save(snapshot_file_best_srcc) 293 | else: 294 | if losses[0] < best_score_set[0]: 295 | if rec_test.num_data >= 1: 296 | best_score_set = (losses[0], losses[1], epoch) 297 | else: 298 | best_score_set = (losses[0], 0, epoch) 299 | 300 | # save the latest snapshot 301 | model.save(snapshot_file_latest) 302 | 303 | # save the best snapshot 304 | if losses[0] < best_test_loss: 305 | best_test_loss = losses[0] 306 | print(' # BEST', end=' ') 307 | model.save(snapshot_file_best) 308 | 309 | # For every save_freq 310 | if save_data: 311 | # write image data 312 | if rec_test.num_im_data > 0: 313 | with open(out_path + 'info.txt', 'w') as f: 314 | # header 315 | data = 'epoch: %s (%s)\n' % ( 316 | numstr, ', '.join(rec_test.im_data_keys)) 317 | f.write(data) 318 | 319 | for idx in range(n_imgs_to_record): 320 | imidx = idx 321 | data = '%d' % idx 322 | for ii in range(rec_test.num_im_data): 323 | data += '\t%.6f' % (im_data[ii][imidx]) 324 | data += '\n' 325 | f.write(data) 326 | 327 | # write mos 328 | if check_mos_corr: 329 | with open(out_path + 'mos_res.txt', 'w') as f: 330 | # header 331 | data = 'epoch: %s (mos_p, mos)\n' % (numstr) 332 | f.write(data) 333 | 334 | for idx in range(n_valid_test_imgs): 335 | data = '{:.6f}\t{:.6f}\n'.format( 336 | mos_p_list[idx], test_score_list[idx]) 337 | f.write(data) 338 | data = 'SRCC: {:.4f}, PLCC: {:.4f}\n'.format( 339 | rho_s, rho_p) 340 | f.write(data) 341 | 342 | # write kernel images 343 | draw_kernels(rec_test.rec_kernels, self.output_path, 344 | prefix2, '_' + numstr) 345 | 346 | # write log 347 | with open(log_test_file, 'a') as f_hist: 348 | data = '{:d}'.format(epoch + 1) 349 | for idx in range(-1, rec_test.num_data): 350 | data += '\t{:.6f}'.format(losses[idx + 1]) 351 | if check_mos_corr: 352 | data += '\t{:.4f}\t{:.4f}'.format(rho_s, rho_p) 353 | data += '\n' 354 | f_hist.write(data) 355 | 356 | # show information 357 | end_time = timeit.default_timer() 358 | pr_str = ' * vcost {:.3f}, '.format(losses[0]) 359 | for idx, key in enumerate(rec_train.data_keys): 360 | pr_str += '{:s} {:.3f}, '.format(key, losses[idx + 1]) 361 | if check_mos_corr: 362 | pr_str += 'SRCC {:.3f}, PLCC {:.3f}, '.format(rho_s, rho_p) 363 | minutes, seconds = divmod(end_time - prev_time, 60) 364 | pr_str += 'time {:02.0f}:{:05.2f}\n'.format(minutes, seconds) 365 | sys.stdout.write(pr_str) 366 | sys.stdout.flush() 367 | prev_time = end_time 368 | 369 | end_time = timeit.default_timer() 370 | total_time = end_time - start_time 371 | print(' - Train ran for %.2fm' % ((total_time) / 60.)) 372 | print(' - Finished at %s' % (time.strftime('%X %x'))) 373 | 374 | if best_score_set[0] != 0: 375 | model.save(snapshot_file_fin) 376 | 377 | return best_score_set 378 | 379 | def testing_routine(self, get_test_outputs, rec_test, 380 | test_batch_size, test_data, prefix2='', 381 | check_mos_corr=False): 382 | """Actual testing routine: group patches for each image 383 | 384 | @type rec_test: .models.model_record.Record 385 | """ 386 | # get numbers of training and Testing batches 387 | n_test_imgs = test_data.n_images 388 | n_test_batches = int(n_test_imgs / test_batch_size) 389 | assert n_test_batches > 0 390 | 391 | n_valid_test_imgs = n_test_batches * test_batch_size 392 | 393 | if self.n_imgs_to_record == 'all': 394 | n_imgs_to_record = n_valid_test_imgs 395 | else: 396 | n_valid_rec_batches = self.n_imgs_to_record // test_batch_size + 1 397 | if n_valid_rec_batches > n_test_batches: 398 | n_valid_rec_batches = n_test_batches 399 | n_imgs_to_record = n_valid_rec_batches * test_batch_size 400 | 401 | # get numbers of data and images to monitor and write 402 | until_loss = rec_test.num_data + 1 403 | until_im_info = until_loss + rec_test.num_im_data 404 | until_img = until_im_info + rec_test.num_imgs 405 | 406 | # Show information 407 | print('\nTest the model') 408 | if test_data.imagewise: 409 | print(' (imagewise)') 410 | else: 411 | print(' (patchwise)') 412 | print(' - Num of images in a batch: %d' % (test_batch_size)) 413 | print(' - Testing batches: %d (%d images)' % ( 414 | n_test_batches, n_test_imgs)) 415 | print(' - Missed images in validation: %d' % ( 416 | n_test_imgs - n_test_batches * test_batch_size)) 417 | print(' - Image recording batches: %d (%d images)' % ( 418 | n_valid_rec_batches, n_imgs_to_record)) 419 | print(' - Monitor data: %s' % (', '.join(rec_test.data_keys))) 420 | print(' - Monitor images: %s' % (', '.join(rec_test.data_keys))) 421 | print(' - Monitor im. data: %s' % (', '.join(rec_test.im_data_keys))) 422 | 423 | # get MOS list 424 | if check_mos_corr: 425 | # if check_mos_corr is true, the first value of 426 | # rec_im_data must be mos predicted. 427 | assert rec_test.im_data_keys[0] == 'mos_p' 428 | assert test_data.exist_score 429 | 430 | test_score_list = test_data.score_data[:n_valid_test_imgs] 431 | mos_p_list = np.zeros(n_valid_test_imgs, dtype='float32') 432 | print(' - Check SRCC/PLCC using %d images' % (n_valid_test_imgs)) 433 | 434 | start_time = timeit.default_timer() 435 | prev_time = start_time 436 | 437 | # write current time in log file 438 | cur_time = 'Started at %s\n' % (time.strftime('%X %x')) 439 | log_file = os.path.join(self.output_path, prefix2 + 'log_test.txt') 440 | with open(log_file, 'a') as f_hist: 441 | f_hist.write(cur_time) 442 | 443 | out_path = os.path.join(self.output_path, prefix2 + '/') 444 | if not os.path.isdir(out_path): 445 | os.makedirs(out_path) 446 | 447 | im_data = np.zeros( 448 | (rec_test.num_im_data, n_valid_test_imgs), dtype='float32') 449 | 450 | best_score_set = (0., 0.) if check_mos_corr else (np.inf, np.inf) 451 | 452 | losses = np.zeros(rec_test.num_data + 1, dtype='float32') 453 | for test_bat_idx in range(0, n_test_batches): 454 | # get testing loss 455 | outputs = get_test_outputs() 456 | 457 | losses += outputs[:until_loss] 458 | cur_im_data = outputs[until_loss:until_im_info] 459 | cur_images = outputs[until_im_info:until_img] 460 | 461 | # get predicted mos 462 | if check_mos_corr: 463 | mos_p = cur_im_data[0] 464 | idx_from = test_bat_idx * test_batch_size 465 | idx_to = (test_bat_idx + 1) * test_batch_size 466 | mos_p_list[idx_from:idx_to] = mos_p 467 | 468 | # write image data 469 | if rec_test.num_im_data > 0: 470 | idx_from = test_bat_idx * test_batch_size 471 | idx_to = (test_bat_idx + 1) * test_batch_size 472 | im_data[:, idx_from:idx_to] = cur_im_data 473 | 474 | # write images 475 | if rec_test.num_imgs > 0 and test_bat_idx < n_valid_rec_batches: 476 | if test_data.imagewise: 477 | rec_info = test_data.get_current_recon_info() 478 | draw_tiled_images( 479 | cur_images, rec_test.rec_imgs, test_bat_idx, 480 | out_path, 481 | rec_info['bat2img_idx_set'], 482 | rec_info['npat_img_list'], 483 | rec_info['filt_idx_list'], 484 | test_data.patch_size, 485 | test_data.patch_step) 486 | else: 487 | draw_images( 488 | cur_images, rec_test.rec_imgs, test_bat_idx, 489 | test_batch_size, out_path) 490 | rec_info = test_data.get_current_recon_info() 491 | draw_tiled_images( 492 | cur_images, rec_test.rec_imgs, test_bat_idx, out_path, 493 | rec_info['bat2img_idx_set'], 494 | rec_info['npat_img_list'], 495 | rec_info['filt_idx_list'], 496 | test_data.patch_size, 497 | test_data.patch_step) 498 | 499 | losses /= n_test_batches 500 | 501 | # get SRCC and PLCC 502 | if check_mos_corr: 503 | rho_s, _ = spearmanr(test_score_list, mos_p_list) 504 | rho_p, _ = pearsonr(test_score_list, mos_p_list) 505 | tau, _ = kendalltau(test_score_list, mos_p_list) 506 | rmse = np.sqrt(((test_score_list - mos_p_list) ** 2).mean()) 507 | best_score_set = (rho_s, rho_p) 508 | else: 509 | if rec_test.num_data >= 1: 510 | best_score_set = (losses[0], losses[1]) 511 | else: 512 | best_score_set = (losses[0], 0) 513 | 514 | # write image data 515 | if rec_test.num_im_data > 0: 516 | with open(out_path + 'info.txt', 'w') as f: 517 | # header 518 | data = 'imidx, %s\n' % ( 519 | ', '.join(rec_test.im_data_keys)) 520 | f.write(data) 521 | 522 | for idx in range(n_valid_test_imgs): 523 | imidx = idx 524 | data = '%d' % idx 525 | for ii in range(rec_test.num_im_data): 526 | data += '\t%.6f' % (im_data[ii][imidx]) 527 | data += '\n' 528 | f.write(data) 529 | 530 | # write mos 531 | if check_mos_corr: 532 | with open(out_path + 'mos_res.txt', 'w') as f: 533 | # header 534 | data = 'mos_p, mos\n' 535 | f.write(data) 536 | 537 | for idx in range(n_valid_test_imgs): 538 | data = '{:.6f}\t{:.6f}\n'.format( 539 | mos_p_list[idx], test_score_list[idx]) 540 | f.write(data) 541 | data = 'SRCC: {:.4f}, PLCC: {:.4f}'.format(rho_s, rho_p) 542 | data += ', KRCC: {:.4f}, RMSE: {:.4f}\n'.format(tau, rmse) 543 | f.write(data) 544 | 545 | # write kernel images 546 | draw_kernels(rec_test.rec_kernels, self.output_path, prefix2) 547 | 548 | # show information 549 | end_time = timeit.default_timer() 550 | pr_str = ' * vcost {:.3f}, '.format(losses[0]) 551 | for idx, key in enumerate(rec_test.data_keys): 552 | pr_str += '{:s} {:.3f}, '.format(key, losses[idx + 1]) 553 | if check_mos_corr: 554 | pr_str += 'SRCC {:.3f}, PLCC {:.3f}, '.format(rho_s, rho_p) 555 | pr_str += 'KRCC {:.3f}, RMSE {:.3f}, '.format(tau, rmse) 556 | minutes, seconds = divmod(end_time - prev_time, 60) 557 | pr_str += 'time {:02.0f}:{:05.2f}\n'.format(minutes, seconds) 558 | sys.stdout.write(pr_str) 559 | sys.stdout.flush() 560 | prev_time = end_time 561 | 562 | end_time = timeit.default_timer() 563 | total_time = end_time - start_time 564 | print(' - Test ran for %.2fm' % ((total_time) / 60.)) 565 | print(' - Finished at %s' % (time.strftime('%X %x'))) 566 | 567 | return best_score_set 568 | 569 | 570 | def draw_kernels(kernels, out_path, prefix='', suffix=''): 571 | if not os.path.isdir(out_path): 572 | os.makedirs(out_path) 573 | 574 | for idx in range(len(kernels)): 575 | kernel = kernels[idx].get_value(borrow=True) 576 | name = kernels[idx].name.replace('/', '_') 577 | assert len(kernel.shape) == 4 578 | (nkern, nfeat, kern_sz0, kern_sz1) = kernel.shape 579 | tile = int(np.ceil(np.sqrt(nkern))) 580 | 581 | imgshape = ((kern_sz0 + 1) * tile - 1, (kern_sz1 + 1) * tile - 1) 582 | tot_kern_array = np.zeros((nfeat, imgshape[0] * imgshape[1])) 583 | feat_tile = int(np.ceil(np.sqrt(nfeat))) 584 | 585 | for fidx in range(nfeat): 586 | kern_array = tile_raster_images( 587 | X=kernel[:, fidx, :, :], 588 | img_shape=(kern_sz0, kern_sz1), 589 | tile_shape=(tile, tile), 590 | tile_spacing=(1, 1)) 591 | tot_kern_array[fidx] = kern_array.flatten() 592 | 593 | tot_kern_image = Image.fromarray(tile_raster_images( 594 | X=tot_kern_array, 595 | img_shape=imgshape, 596 | tile_shape=(feat_tile, feat_tile), 597 | tile_spacing=(2, 2))) 598 | 599 | img_name = '%s%s%s.png' % (prefix, name, suffix) 600 | tot_kern_image.save(os.path.join(out_path, img_name)) 601 | 602 | 603 | def draw_tiled_images(images, img_info_dict, bat_idx, out_path, 604 | bat2img_idx_set, npat_img_list, filt_idx_list=None, 605 | patch_size=None, patch_step=None): 606 | n_batch_imgs = len(npat_img_list) 607 | 608 | for ii, key in enumerate(img_info_dict): 609 | for idx in range(n_batch_imgs): 610 | idx_from, idx_to = bat2img_idx_set[idx] 611 | cur_img = images[ii][idx_from: idx_to] 612 | caxis = img_info_dict[key].get('caxis', None) 613 | scale = img_info_dict[key].get('scale', None) 614 | if scale: 615 | tile_spacing = ( 616 | int(-(patch_size[0] - patch_step[0]) * scale), 617 | int(-(patch_size[1] - patch_step[1]) * scale)) 618 | else: 619 | tile_spacing = (0, 0) 620 | 621 | nch = int(cur_img.shape[1]) 622 | if nch == 1 or nch == 3: 623 | tiled_array = tile_tensor4_from_list( 624 | X=cur_img, 625 | tile_shape=npat_img_list[idx][1:], 626 | idx_list=filt_idx_list[idx], 627 | tile_spacing=tile_spacing, 628 | caxis=caxis) 629 | img = Image.fromarray(tiled_array.astype(np.uint8)) 630 | img_name = '%d_%s.png' % (bat_idx * n_batch_imgs + idx, key) 631 | img.save(os.path.join(out_path, img_name)) 632 | else: 633 | for ch_idx in range(nch): 634 | tiled_array = tile_tensor4_from_list( 635 | X=cur_img[:, ch_idx, :, :], 636 | tile_shape=npat_img_list[idx][1:], 637 | idx_list=filt_idx_list[idx], 638 | tile_spacing=tile_spacing, 639 | caxis=caxis) 640 | img = Image.fromarray(tiled_array.astype(np.uint8)) 641 | img_name = '%d_%s_%02d.png' % ( 642 | bat_idx * n_batch_imgs + idx, key, ch_idx) 643 | img.save(os.path.join(out_path, img_name)) 644 | 645 | 646 | def draw_images(images, img_info_dict, bat_idx, n_batch_imgs, out_path): 647 | for ii, key in enumerate(img_info_dict): 648 | for idx in range(n_batch_imgs): 649 | cur_img = images[ii][idx] 650 | caxis = img_info_dict[key].get('caxis', None) 651 | 652 | nch = int(cur_img.shape[0]) 653 | if nch == 1 or nch == 3: 654 | img = image_from_nparray( 655 | np.transpose(cur_img, (1, 2, 0)), caxis=caxis) 656 | img_name = '%d_%s.png' % (bat_idx * n_batch_imgs + idx, key) 657 | img.save(os.path.join(out_path, img_name)) 658 | else: 659 | for ch_idx in range(nch): 660 | img = image_from_nparray( 661 | cur_img[ch_idx, :, :], caxis=caxis) 662 | img_name = '%d_%s_%02d.png' % ( 663 | bat_idx * n_batch_imgs + idx, key, ch_idx) 664 | img.save(os.path.join(out_path, img_name)) 665 | --------------------------------------------------------------------------------