├── IQA_DeepQA_FR_release
    ├── __init__.py
    ├── data_load
    │   ├── __init__.py
    │   ├── TID2008.py
    │   ├── TID2013.py
    │   ├── CSIQ.py
    │   ├── LIVE.py
    │   └── dataset.py
    ├── layers
    │   ├── __init__.py
    │   ├── normalization.py
    │   └── layers.py
    ├── models
    │   ├── __init__.py
    │   ├── model_record.py
    │   ├── FR_sens_1s.py
    │   ├── FR_sens_1.py
    │   └── model_basis.py
    ├── configs
    │   └── FR_sens_1.yaml
    ├── default_config.yaml
    ├── laplacian_pyr.py
    ├── train_iqa.py
    ├── config_parser.py
    ├── test_iqa.py
    ├── optimizer.py
    ├── draw_graph.py
    ├── utils.py
    └── trainer.py
├── .gitignore
├── example.py
├── README.md
├── gen_list_TID.m
├── gen_list_CSIQ.m
└── gen_list_LIVE_IQA.m


/IQA_DeepQA_FR_release/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store*
 2 | ehthumbs.db
 3 | Icon?
 4 | Thumbs.db
 5 | 
 6 | sftp-config.json
 7 | 
 8 | .idea/
 9 | .vscode/
10 | outputs/
11 | __pycache__/
12 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/data_load/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/models/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import theano.sandbox.cuda
 3 | theano.sandbox.cuda.use(sys.argv[1] if len(sys.argv) > 1 else 'cuda0')
 4 | 
 5 | from IQA_DeepQA_FR_release import train_iqa as tm
 6 | 
 7 | tm.train_iqa(
 8 |     config_file='IQA_DeepQA_FR_release/configs/FR_sens_1.yaml',
 9 |     section='fr_sens_LIVE',
10 |     tr_te_file='outputs/tr_va_live.txt',
11 |     snap_path='outputs/FR/FR_sens_LIVE_1/',
12 | )
13 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/configs/FR_sens_1.yaml:
--------------------------------------------------------------------------------
 1 | common:
 2 |     database:
 3 |         scenes: all
 4 |         horz_ref: True
 5 |         patch_mode: shift_center
 6 |         color: gray
 7 |         train_size: 0.8
 8 |     model:
 9 |         model: IQA_DeepQA_FR_release.models.FR_sens_1
10 |         ign: 4
11 |         ign_scale: 8
12 |         opt_scheme: adam
13 |         lr: 1e-4
14 |         wr_l2: 5e-3
15 |         wr_tv: 1e-2
16 |     training:
17 |         batch_size: 5
18 |         epochs: 80
19 |         test_freq: 2
20 |         save_freq: 1
21 |         regular_snap_freq: 50
22 |         n_imgs_to_record: 30
23 |         prefix: 'FR_sens_'
24 | 
25 | fr_sens_LIVE:
26 |     database:
27 |         sel_data: LIVE
28 |         dist_types: all
29 |         patch_size: [112, 112]
30 |     model:
31 |         opt_scheme: nadam
32 |         lr: 1e-4
33 | 
34 | fr_sens_CSIQ:
35 |     database:
36 |         sel_data: CSIQ
37 |         dist_types: all
38 |         # patch_size: [112, 112]
39 |         patch_size:
40 |     model:
41 |         lr: 5e-4
42 | 
43 | fr_sens_TID2008:
44 |     database:
45 |         sel_data: TID2008
46 |         dist_types: all
47 |         patch_size:
48 |         # patch_size: [112, 112]
49 |         # patch_size:
50 |     model:
51 |         lr: 5e-4
52 |         # lr: 1e-4
53 | 
54 | fr_sens_TID2013:
55 |     database:
56 |         sel_data: TID2013
57 |         dist_types: all
58 |         patch_size:
59 |     model:
60 |         # lr: 1e-3
61 |         lr: 5e-4
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepQA
 2 | We propose a convolutional neural networks (CNN) based FR-IQA model, named Deep Image Quality Assessment (DeepQA), where the behavior of the HVS is learned from the underlying data distribution of IQA databases.
 3 | 
 4 | > Jongyoo Kim and Sanghoon Lee, “Deep learning of human visual sensitivity in image quality assessment framework,” in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017, pp. 1676–1684.
 5 | 
 6 | 
 7 | ## Prerequisites
 8 | This code was developed and tested with Theano 0.9, CUDA 8.0, and Windows.
 9 | 
10 | ## Environment setting
11 | ### Setting database path:
12 | For each database, set `BASE_PATH` to the actual root path of each database in the following files:
13 | `IQA_DeepQA_FR_release/data_load/LIVE.py`,
14 | `IQA_DeepQA_FR_release/data_load/CSIQ.py`,
15 | `IQA_DeepQA_FR_release/data_load/TID2008.py`, and
16 | `IQA_DeepQA_FR_release/data_load/TID2013.py`.
17 | 
18 | ## Training DeepQA
19 | We provide the demo code for training a DeepQA model.
20 | ```bash
21 | python example.py
22 | ```
23 | 
24 | - `tr_te_file`: Store the randomly divided (training and testing) reference image indices in this file.
25 | - `snap_path`: This indicates the path to store snapshot files
26 | 
27 | 
28 | ## Quantitative results
29 | DeepQA was tested on the full-sets of LIVE IQA, CSIQ, TID2008, TID2013 databases. During the experiment, we randomly divided the reference images into two subsets, 80% for training and 20% for testing. The correlation coefﬁcients were averaged after the procedure was repeated 10 times while dividing the training and testing sets randomly.
30 | 
31 | |Database |SRCC  |PLCC  |
32 | |---------|:----:|:----:|
33 | |LIVE IQA |0.981 | 0.982|
34 | |CSIQ     |0.961 | 0.965|
35 | |TID2008  |0.947 | 0.951|
36 | |TID2013  |0.939 | 0.947|
37 | 
38 | 


--------------------------------------------------------------------------------
/gen_list_TID.m:
--------------------------------------------------------------------------------
 1 | clear
 2 | fclose all;
 3 | 
 4 | %% Parameters
 5 | base_path = 'D:/DB/IQA/TID2013/';
 6 | out_file = 'TID2013.txt';
 7 | % base_path = 'D:/DB/IQA/TID2008/';
 8 | % out_file = 'TID2008.txt';
 9 | ref_subpath = 'reference_images/';
10 | dist_subpath = 'distorted_images/';
11 | 
12 | fid = fopen([base_path 'mos_with_names.txt'], 'r');
13 | % image dst_idx dst_type dst_lev dmos_std dmos
14 | formatSpec = '%f %s';
15 | data = textscan(fid, formatSpec, [Inf, 2]);
16 | % data = textscan(fid, formatSpec);
17 | fclose(fid);
18 | 
19 | scores = data{1};
20 | dist_name = data{2};
21 | 
22 | % Norm scores
23 | % fprintf('Orignal: %f ~ %f\n', min(scores), max(scores))
24 | % scores = (scores - min(scores)) / (max(scores) - min(scores));
25 | % fprintf('Norm.  : %f ~ %f\n', min(scores), max(scores))
26 | 
27 | %% Dis/Ref images
28 | n_files = size(dist_name, 1);
29 | dist_imgs = cell(n_files, 1);
30 | ref_imgs = cell(n_files, 1);
31 | ref_idx = zeros(n_files, 1);
32 | dist_idx = zeros(n_files, 1);
33 | for im_idx = 1:n_files
34 |     ref_name = [dist_name{im_idx}(1:3), '.bmp'];
35 |     ref_idx(im_idx) = str2num(ref_name(2:3));
36 |     dist_idx(im_idx) = str2num(dist_name{im_idx}(5:6));
37 |     
38 |     dist_imgs{im_idx} = [dist_subpath dist_name{im_idx}];
39 |     ref_imgs{im_idx} = [ref_subpath ref_name];
40 | end
41 | 
42 | % MOSs
43 | fprintf('Orignal: %f ~ %f\n', min(scores), max(scores))
44 | scores = scores / 9;
45 | fprintf('Norm.  : %f ~ %f\n', min(scores), max(scores))
46 | 
47 | %% Write
48 | fid = fopen([base_path out_file], 'w');
49 | for im_idx = 1:n_files
50 |     fprintf(fid, '%d %d %s %s %f\n', ref_idx(im_idx) - 1, dist_idx(im_idx) - 1, ...
51 |         ref_imgs{im_idx}, dist_imgs{im_idx}, scores(im_idx));
52 | end
53 | fclose(fid);
54 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/default_config.yaml:
--------------------------------------------------------------------------------
 1 | database:
 2 |     # database
 3 |     sel_data:               # database list: LIVE / TID2008 / TID2013 / ...
 4 |     scenes: all             # list of ref. images, default='all'
 5 |     dist_types: all         # distortion types, default='all'
 6 | 
 7 |     # patch
 8 |     patch_size:             # ex) [32, 32]
 9 |     patch_step:             # ex) [32, 32]
10 |     patch_mode:             # ex) 'both_side' or 'shift_center'
11 |     random_crops: 0         # if > 0, randomly crop n samples
12 | 
13 |     # pre-processing
14 |     horz_ref: False         # reflect in horizaontal direction
15 |     std_filt_r: 1.0         # filter patches using STD
16 |     color: gray             # ex) 'gray' or 'rgb' or 'ycbcr'
17 |     local_norm: False       # mean subtrated and locally normzalied images
18 | 
19 |     # etc.
20 |     train_size: 0.8         # ratio of training data over total data
21 |     shuffle: False          # shuffle data
22 |     sel_fr_met:             # select a FR-IQA metric: SSIM/GMS/FSIM/FSIMc/VSI
23 |     reverse_mos: False      # if True, MOS -> 1.0 - MOS
24 | 
25 | model:
26 |     model:                  # model file path ex) IQA_DeepQA_FR_release.models.FR_deep_1
27 |     input_size:             # ex) [32, 32]
28 |     num_ch:                 # ex) 3
29 |     opt_scheme: adam        # optimization sceheme
30 |     lr: 1e-4                # initial learning rate
31 | 
32 | training:
33 |     batch_size: 100         # number of data in a batch
34 |     epochs: 100             # number of epochs to train
35 | 
36 |     test_freq: 3           # validate the trained model every test_freq
37 |     save_freq: 6            # save data every save_freq
38 |     regular_snap_freq: 50   # save model snapshot every regular_snap_freq
39 | 
40 |     n_imgs_to_record: 40    # number of images to record
41 |     prefix: ''              # prefix of filenames of recording data
42 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/data_load/TID2008.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | 
 4 | # Define DB information
 5 | BASE_PATH = 'D:/DB/IQA/TID2008'
 6 | LIST_FILE_NAME = 'TID2008.txt'
 7 | ALL_SCENES = list(range(24))
 8 | # ALL_SCENES = list(range(25))
 9 | ALL_DIST_TYPES = list(range(17))
10 | 
11 | 
12 | def make_image_list(scenes, dist_types=None, show_info=True):
13 |     """
14 |     Make image list from TID2008 database
15 |     TID2008: 25 reference images x 17 distortions x 4 levels
16 |     """
17 |     # Get reference / distorted image file lists:
18 |     # d_img_list and score_list
19 |     d_img_list, r_img_list, r_idx_list, score_list = [], [], [], []
20 |     # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME)
21 |     list_file_name = LIST_FILE_NAME
22 |     with open(list_file_name, 'r') as listFile:
23 |         for line in listFile:
24 |             # ref_idx ref_name dist_name dist_types, DMOS
25 |             (scn_idx, dis_idx, ref, dis, score) = line.split()
26 |             scn_idx = int(scn_idx)
27 |             dis_idx = int(dis_idx)
28 |             if scn_idx in scenes and dis_idx in dist_types:
29 |                 d_img_list.append(dis)
30 |                 r_img_list.append(ref)
31 |                 r_idx_list.append(scn_idx)
32 |                 score_list.append(float(score))
33 | 
34 |     score_list = np.array(score_list, dtype='float32')
35 |     n_images = len(d_img_list)
36 | 
37 |     if show_info:
38 |         print(' - Scenes: %s' % ', '.join([str(i) for i in scenes]))
39 |         print(' - Distortion types: %s' % ', '.join(
40 |             [str(i) for i in dist_types]))
41 |         print(' - Number of images: {:,}'.format(n_images))
42 |         print(' - MOS range: [{:.2f}, {:.2f}]'.format(
43 |             np.min(score_list), np.max(score_list)))
44 | 
45 |     return {
46 |         'scenes': scenes,
47 |         'dist_types': dist_types,
48 |         'base_path': BASE_PATH,
49 |         'n_images': n_images,
50 |         'd_img_list': d_img_list,
51 |         'r_img_list': r_img_list,
52 |         'r_idx_list': r_idx_list,
53 |         'score_list': score_list}
54 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/data_load/TID2013.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | 
 4 | # Define DB information
 5 | BASE_PATH = 'D:/DB/IQA/TID2013'
 6 | LIST_FILE_NAME = 'TID2013.txt'
 7 | ALL_SCENES = list(range(24))
 8 | # ALL_SCENES = list(range(25))
 9 | ALL_DIST_TYPES = list(range(24))
10 | 
11 | 
12 | def make_image_list(scenes, dist_types=None, show_info=True):
13 |     """
14 |     Make image list from TID2013 database
15 |     TID2013: 25 reference images x 24 distortions x 5 levels
16 |     """
17 |     # Get reference / distorted image file lists:
18 |     # d_img_list and score_list
19 |     d_img_list, r_img_list, r_idx_list, score_list = [], [], [], []
20 |     # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME)
21 |     list_file_name = LIST_FILE_NAME
22 |     with open(list_file_name, 'r') as listFile:
23 |         for line in listFile:
24 |             # ref_idx ref_name dist_name dist_types, DMOS
25 |             (scn_idx, dis_idx, ref, dis, score) = line.split()
26 |             scn_idx = int(scn_idx)
27 |             dis_idx = int(dis_idx)
28 |             if scn_idx in scenes and dis_idx in dist_types:
29 |                 d_img_list.append(dis)
30 |                 r_img_list.append(ref)
31 |                 r_idx_list.append(scn_idx)
32 |                 score_list.append(float(score))
33 | 
34 |     score_list = np.array(score_list, dtype='float32')
35 |     n_images = len(d_img_list)
36 | 
37 |     if show_info:
38 |         print(' - Scenes: %s' % ', '.join([str(i) for i in scenes]))
39 |         print(' - Distortion types: %s' % ', '.join(
40 |             [str(i) for i in dist_types]))
41 |         print(' - Number of images: {:,}'.format(n_images))
42 |         print(' - MOS range: [{:.2f}, {:.2f}]'.format(
43 |             np.min(score_list), np.max(score_list)))
44 | 
45 |     return {
46 |         'scenes': scenes,
47 |         'dist_types': dist_types,
48 |         'base_path': BASE_PATH,
49 |         'n_images': n_images,
50 |         'd_img_list': d_img_list,
51 |         'r_img_list': r_img_list,
52 |         'r_idx_list': r_idx_list,
53 |         'score_list': score_list}
54 | 


--------------------------------------------------------------------------------
/gen_list_CSIQ.m:
--------------------------------------------------------------------------------
 1 | clear
 2 | fclose all;
 3 | 
 4 | %% Parameters
 5 | base_path = 'D:/DB/IQA/CSIQ/';
 6 | ref_subpath = 'src_imgs/';
 7 | dist_subpath = 'dst_imgs/';
 8 | out_file = 'CSIQ.txt';
 9 | 
10 | % "csiq_dmos.txt" is made manually by copying the values from "csiq.DMOS.xlsx"
11 | % The contained text is like the following format:
12 | % 1600	1	noise	1	0.061	0.062
13 | % 1600	1	noise	2	0.097	0.206
14 | % 1600	1	noise	3	0.033	0.262
15 | % 1600	1	noise	4	0.107	0.375
16 | % 1600	1	noise	5	0.120	0.467
17 | 
18 | fid = fopen([base_path 'csiq_dmos.txt'], 'r');
19 | % image dst_idx dst_type dst_lev dmos_std dmos
20 | formatSpec = '%s %d %s %d %f %f';
21 | % data = fscanf(fid, formatSpec, [6 Inf]);
22 | data = textscan(fid, formatSpec);
23 | fclose(fid);
24 | 
25 | ref_names = data{1};
26 | dist_idx = data{2};
27 | dist_types = data{3};
28 | dist_levs = data{4};
29 | scores = data{6};
30 | 
31 | % Norm scores
32 | % fprintf('Orignal: %f ~ %f\n', min(scores), max(scores))
33 | % scores = (scores - min(scores)) / (max(scores) - min(scores));
34 | % fprintf('Norm.  : %f ~ %f\n', min(scores), max(scores))
35 | 
36 | %% Dis/Ref images
37 | n_files = size(ref_names, 1);
38 | dist_imgs = cell(n_files, 1);
39 | ref_imgs = cell(n_files, 1);
40 | 
41 | for im_idx = 1:n_files
42 |     dist_imgs{im_idx} = [dist_subpath dist_types{im_idx} '/' ...
43 |         ref_names{im_idx} '.' dist_types{im_idx} '.' num2str(dist_levs(im_idx)) '.png'];
44 |     ref_imgs{im_idx} = [ref_subpath ref_names{im_idx} '.png'];
45 | end
46 | 
47 | %% Ref idx
48 | ref_idx = zeros(n_files, 1);
49 | ref_cnt = 1;
50 | prev_ref_name = ref_names{1};
51 | for im_idx = 1:n_files
52 |     cur_ref_name = ref_names{im_idx};
53 |     if strcmp(prev_ref_name, cur_ref_name)
54 |         ref_idx(im_idx) = ref_cnt;
55 |     else
56 |         ref_cnt = ref_cnt + 1;
57 |         prev_ref_name = cur_ref_name;
58 |         ref_idx(im_idx) = ref_cnt;
59 |     end    
60 | end
61 | 
62 | %% Write
63 | fid = fopen([base_path out_file], 'w');
64 | for im_idx = 1:n_files
65 |     fprintf(fid, '%d %d %s %s %f\n', ref_idx(im_idx) - 1, dist_idx(im_idx) - 1, ...
66 |         ref_imgs{im_idx}, dist_imgs{im_idx}, scores(im_idx));
67 | end
68 | fclose(fid);
69 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/data_load/CSIQ.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import os
 3 | import numpy as np
 4 | 
 5 | # Define DB information
 6 | BASE_PATH = 'D:/DB/IQA/CSIQ'
 7 | LIST_FILE_NAME = 'CSIQ.txt'
 8 | ALL_SCENES = list(range(30))
 9 | ALL_DIST_TYPES = list(range(6))
10 | 
11 | 
12 | def make_image_list(scenes, dist_types=None, show_info=True):
13 |     """
14 |     Make image list from CSIQ database
15 |     """
16 | 
17 |     # Get reference / distorted image file lists:
18 |     # d_img_list and score_list
19 |     d_img_list, r_img_list, r_idx_list, score_list = [], [], [], []
20 |     # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME)
21 |     list_file_name = LIST_FILE_NAME
22 |     with open(list_file_name, 'r') as listFile:
23 |         for line in listFile:
24 |             # ref_idx ref_name dist_name dist_types, DMOS
25 |             (scn_idx, dis_idx, ref, dis, score) = line.split()
26 |             scn_idx = int(scn_idx)
27 |             dis_idx = int(dis_idx)
28 |             if scn_idx in scenes and dis_idx in dist_types:
29 |                 d_img_list.append(dis)
30 |                 r_img_list.append(ref)
31 |                 r_idx_list.append(scn_idx)
32 |                 score_list.append(float(score))
33 | 
34 |     score_list = np.array(score_list, dtype='float32')
35 |     # DMOS -> reverse subjecive scores by default
36 |     score_list = 1.0 - score_list
37 |     n_images = len(d_img_list)
38 | 
39 |     dist_names = ['awgn', 'jpeg', 'jpeg2000', 'fnoise', 'blur', 'contrast']
40 |     if show_info:
41 |         print(' - Scenes: %s' % ', '.join([str(i) for i in scenes]))
42 |         print(' - Distortion types: %s' % ', '.join(
43 |             [dist_names[idx] for idx in dist_types]))
44 |         print(' - Number of images: {:,}'.format(n_images))
45 |         print(' - DMOS range: [{:.2f}, {:.2f}]'.format(
46 |             np.min(score_list), np.max(score_list)), end='')
47 |         print(' (Scale reversed)')
48 | 
49 |     return {
50 |         'scenes': scenes,
51 |         'dist_types': dist_types,
52 |         'base_path': BASE_PATH,
53 |         'n_images': n_images,
54 |         'd_img_list': d_img_list,
55 |         'r_img_list': r_img_list,
56 |         'r_idx_list': r_idx_list,
57 |         'score_list': score_list}
58 | 
59 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/data_load/LIVE.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | import numpy as np
 3 | 
 4 | # Define DB information
 5 | BASE_PATH = 'D:/DB/IQA/LIVE/LIVE IQA DB'
 6 | LIST_FILE_NAME = 'LIVE_IQA.txt'
 7 | ALL_SCENES = list(range(29))
 8 | ALL_DIST_TYPES = list(range(5))
 9 | 
10 | 
11 | def make_image_list(scenes, dist_types=None, show_info=True):
12 |     """
13 |     Make image list from LIVE database
14 |     LIVE: 29 reference images x 5 distortions
15 |     (jpeg2000: 227 / jpeg: 233 / white_noise: 174 /
16 |         gaussian_blur: 174 / fast_fading: 174)
17 |     """
18 | 
19 |     # Get reference / distorted image file lists:
20 |     # d_img_list and score_list
21 |     d_img_list, r_img_list, r_idx_list, score_list = [], [], [], []
22 |     # list_file_name = os.path.join(BASE_PATH, LIST_FILE_NAME)
23 |     list_file_name = LIST_FILE_NAME
24 |     with open(list_file_name, 'r') as listFile:
25 |         for line in listFile:
26 |             # ref_idx ref_name dist_name dist_types, DMOS, width, height
27 |             scn_idx, dis_idx, ref, dis, score, width, height = line.split()
28 |             scn_idx = int(scn_idx)
29 |             dis_idx = int(dis_idx)
30 |             if scn_idx in scenes and dis_idx in dist_types:
31 |                 d_img_list.append(dis)
32 |                 r_img_list.append(ref)
33 |                 r_idx_list.append(scn_idx)
34 |                 score_list.append(float(score))
35 | 
36 |     score_list = np.array(score_list, dtype='float32')
37 |     # DMOS -> reverse subjecive scores by default
38 |     score_list = 1.0 - score_list
39 |     n_images = len(d_img_list)
40 | 
41 |     dist_names = ['jp2k', 'jpeg', 'wn', 'gblur', 'fastfading']
42 |     if show_info:
43 |         scenes.sort()
44 |         print(' - Scenes: %s' % ', '.join([str(i) for i in scenes]))
45 |         print(' - Distortion types: %s' % ', '.join(
46 |             [dist_names[idx] for idx in dist_types]))
47 |         print(' - Number of images: {:,}'.format(n_images))
48 |         print(' - DMOS range: [{:.2f}, {:.2f}]'.format(
49 |             np.min(score_list), np.max(score_list)), end='')
50 |         print(' (Scale reversed)')
51 | 
52 |     return {
53 |         'scenes': scenes,
54 |         'dist_types': dist_types,
55 |         'base_path': BASE_PATH,
56 |         'n_images': n_images,
57 |         'd_img_list': d_img_list,
58 |         'r_img_list': r_img_list,
59 |         'r_idx_list': r_idx_list,
60 |         'score_list': score_list}
61 | 


--------------------------------------------------------------------------------
/gen_list_LIVE_IQA.m:
--------------------------------------------------------------------------------
 1 | clear
 2 | fclose all;
 3 | 
 4 | %% Parameters
 5 | base_path = 'D:/DB/IQA/LIVE/LIVE IQA DB/';
 6 | n_dist_set = [227, 233, 174, 174, 174];
 7 | dist_subpath = {'jp2k/', 'jpeg/', 'wn/', 'gblur/', 'fastfading/'};
 8 | ref_subpath = 'refimgs/';
 9 | ref_name_file = 'refnames_all.mat';
10 | dmos_file = 'dmos_realigned.mat';
11 | out_file = 'LIVE_IQA.txt';
12 | % out_file = 'LIVE_IQA_nonorm.txt';
13 | 
14 | %% Dis/Ref images
15 | load([base_path, ref_name_file]);
16 | n_files = sum(n_dist_set);
17 | ref_imgs = refnames_all';
18 | for idx = 1:n_files
19 |     ref_imgs{idx} = [ref_subpath, refnames_all{idx}];
20 | end
21 | dist_imgs = cell(n_files, 1);
22 | dist_types = zeros(n_files, 1);
23 | idx = 1;
24 | for dist_idx = 1:5
25 |     for im_idx = 1:n_dist_set(dist_idx)
26 |         dist_imgs{idx} = [dist_subpath{dist_idx}, sprintf('img%d.bmp', im_idx)];
27 |         dist_types(idx) = dist_idx;
28 |         idx = idx + 1;
29 |     end
30 | end
31 | 
32 | %% Resolutions
33 | res_list = zeros(n_files, 2);
34 | for idx = 1:n_files
35 |     ref_img = imread([base_path ref_imgs{idx}]);
36 |     [height_r, width_r, ch_r] = size(ref_img);
37 | %     dist_img = imread([base_path dist_imgs{idx}]);
38 | %     [height_d, width_d, ch_d] = size(dist_img);
39 | %     if height_r ~= height_d
40 | %         fprintf('Height not matched %s - %s', dist_imgs{idx}, ref_imgs{idx})
41 | %     end
42 | %     if width_r ~= width_d
43 | %         fprintf('Width not matched %s - %s', dist_imgs{idx}, ref_imgs{idx})
44 | %     end
45 | %     if ch_r ~= ch_d
46 | %         fprintf('Channel not matched %s - %s', dist_imgs{idx}, ref_imgs{idx})
47 | %     end
48 |     res_list(idx, :) = [height_r, width_r];
49 | end
50 | 
51 | %% DMOSs
52 | mos_str = load([base_path, dmos_file]);
53 | dmos_live = mos_str.dmos_new';
54 | % dmos_max = max(dmos_live);
55 | % dmos_min = 0;
56 | % dmos_live(dmos_live < 0) = 0;
57 | % mos_data = (dmos_live - dmos_min) / (dmos_max - dmos_min);
58 | mos_data = dmos_live;
59 | 
60 | %% Sort
61 | [ref_imgs_, I] = sort(ref_imgs);
62 | dist_types_ = dist_types(I);
63 | dist_imgs_ = dist_imgs(I);
64 | mos_data_ = mos_data(I);
65 | res_list_ = res_list(I, :);
66 | 
67 | %% Ref idx
68 | ref_idx = zeros(n_files, 1);
69 | ref_cnt = 1;
70 | prev_ref_name = ref_imgs_{1};
71 | for im_idx = 1:n_files
72 |     cur_ref_name = ref_imgs_{im_idx};
73 |     if strcmp(prev_ref_name, cur_ref_name)
74 |         ref_idx(im_idx) = ref_cnt;
75 |     else
76 |         ref_cnt = ref_cnt + 1;
77 |         prev_ref_name = cur_ref_name;
78 |         ref_idx(im_idx) = ref_cnt;
79 |     end    
80 | end
81 | 
82 | %% Write
83 | fid = fopen([base_path, out_file], 'w');
84 | for im_idx = 1:n_files
85 |     fprintf(fid, '%d %d %s %s %f %d %d\n', ref_idx(im_idx) - 1, dist_types_(im_idx) - 1, ...
86 |         ref_imgs_{im_idx}, dist_imgs_{im_idx}, mos_data_(im_idx), res_list_(im_idx, 2), res_list_(im_idx, 1));
87 | end
88 | fclose(fid);
89 | 
90 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/models/model_record.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | from collections import OrderedDict
  4 | 
  5 | 
  6 | class Record(object):
  7 | 
  8 |     def __init__(self):
  9 |         self.rec_data = OrderedDict()
 10 |         self.rec_im_data = OrderedDict()
 11 |         self.rec_imgs = OrderedDict()
 12 |         self.rec_kernels = []
 13 | 
 14 |     ###########################################################################
 15 |     # Functions for recording data
 16 | 
 17 |     @property
 18 |     def data_keys(self):
 19 |         """Get dictionary keys of `rec_data`"""
 20 |         return list(self.rec_data)
 21 | 
 22 |     @property
 23 |     def im_data_keys(self):
 24 |         """Get dictionary keys of `rec_im_data`"""
 25 |         return list(self.rec_im_data)
 26 | 
 27 |     @property
 28 |     def imgs_keys(self):
 29 |         """Get dictionary keys of `rec_imgs`"""
 30 |         return list(self.rec_imgs)
 31 | 
 32 |     @property
 33 |     def num_data(self):
 34 |         """Get number of `rec_data`"""
 35 |         return len(self.rec_data)
 36 | 
 37 |     @property
 38 |     def num_im_data(self):
 39 |         """Get number of `rec_im_data`"""
 40 |         return len(self.rec_im_data)
 41 | 
 42 |     @property
 43 |     def num_imgs(self):
 44 |         """Get number of `rec_imgs`"""
 45 |         return len(self.rec_imgs)
 46 | 
 47 |     def empty_records(self):
 48 |         self.rec_data.clear()
 49 |         self.rec_im_data.clear()
 50 |         self.rec_imgs.clear()
 51 |         self.rec_kernels = []
 52 | 
 53 |     def add_data(self, name, data, **kwargs):
 54 |         """Add scalar data of one minibatcth to monitor.
 55 |         """
 56 |         kwargs['data'] = data
 57 |         self.rec_data[name] = kwargs
 58 | 
 59 |     def add_im_data(self, name, data, **kwargs):
 60 |         """Add scalar data for each image (imagewise) or patch (patchwise)
 61 |         to record.
 62 |         """
 63 |         kwargs['data'] = data
 64 |         self.rec_im_data[name] = kwargs
 65 | 
 66 |     def add_imgs(self, name, data, **kwargs):
 67 |         """Add image data for each image (imagewise) or patch (patchwise)
 68 |         to record.
 69 |         Supplementary information can be added via `**kwargs`.
 70 |         """
 71 |         kwargs['data'] = data
 72 |         self.rec_imgs[name] = kwargs
 73 | 
 74 |     def get_function_outputs(self, train=False):
 75 |         if train:
 76 |             return (self.get_data())
 77 |         else:
 78 |             return (self.get_data() + self.get_im_data() + self.get_imgs())
 79 | 
 80 |     def get_data(self):
 81 |         return [elem['data'] for elem in list(self.rec_data.values())]
 82 | 
 83 |     def get_im_data(self):
 84 |         return [elem['data'] for elem in list(self.rec_im_data.values())]
 85 | 
 86 |     def get_imgs(self):
 87 |         return [elem['data'] for elem in list(self.rec_imgs.values())]
 88 | 
 89 |     def get_until_indices(self, start=1):
 90 |         """Returns the 'until-indices' for each recording data type.
 91 |         """
 92 |         until_loss = len(self.rec_data) + start
 93 |         until_im_info = until_loss + len(self.rec_im_data)
 94 |         until_img = until_im_info + len(self.rec_imgs)
 95 |         return until_loss, until_im_info, until_img
 96 | 
 97 |     def add_kernel(self, layers, nth_layers):
 98 |         """Add a kernel image from the `nth_layers` of self.layers[`key`]
 99 |         to record.
100 |         """
101 |         if isinstance(nth_layers, (list, tuple)):
102 |             for nth in nth_layers:
103 |                 layer = layers[nth]
104 |                 assert layer.__class__.__name__ == 'ConvLayer'
105 |                 self.rec_kernels.append(layer.W)
106 |         else:
107 |             layer = layers[nth_layers]
108 |             assert layer.__class__.__name__ == 'ConvLayer'
109 |             self.rec_kernels.append(layer.W)
110 | 
111 |     # def get_rec_info(self):
112 |     #     rec_info = {}
113 |     #     rec_info['rec_data'] = self.exclude_info(self.rec_data, 'data')
114 |     #     rec_info['rec_im_data'] = self.exclude_info(self.rec_im_data, 'data')
115 |     #     rec_info['rec_imgs'] = self.exclude_info(self.rec_imgs, 'data')
116 |     #     return rec_info
117 | 
118 |     # def exclude_info(self, dic, exclude):
119 |     #     new_dic = OrderedDict()
120 |     #     for dic_key in dic:
121 |     #         new_elems = {}
122 |     #         for elem_key in dic[dic_key]:
123 |     #             if elem_key == exclude:
124 |     #                 continue
125 |     #             else:
126 |     #                 new_elems[elem_key] = dic[dic_key][elem_key]
127 |     #         new_dic[dic_key] = new_elems
128 |     #     return new_dic
129 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/laplacian_pyr.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | # import theano.tensor as T
  4 | import numpy as np
  5 | import theano
  6 | from theano.tensor.nnet import conv2d
  7 | from theano.tensor.nnet.abstract_conv import conv2d_grad_wrt_inputs
  8 | 
  9 | k = np.float32([1, 4, 6, 4, 1])
 10 | k = np.outer(k, k)
 11 | k5x5 = (k / k.sum()).reshape((1, 1, 5, 5))
 12 | kern = theano.shared(k5x5, borrow=True)
 13 | 
 14 | k5x5_3ch = k[:, :, None, None] / k.sum() * np.eye(3, dtype=np.float32)
 15 | k5x5_3ch = k5x5_3ch.transpose([2, 3, 0, 1])
 16 | kern_3ch = theano.shared(k5x5_3ch, borrow=True)
 17 | 
 18 | 
 19 | def downsample_img(img, n_ch=1):
 20 |     if n_ch == 1:
 21 |         kernel = kern
 22 |         filter_shape = [1, 1, 5, 5]
 23 |     elif n_ch == 3:
 24 |         kernel = kern_3ch
 25 |         filter_shape = [3, 3, 5, 5]
 26 |     else:
 27 |         raise NotImplementedError
 28 |     return conv2d(img, kernel, filter_shape=filter_shape,
 29 |                   border_mode='half', subsample=(2, 2))
 30 | 
 31 | 
 32 | def upsample_img(img, out_shape, n_ch=1):
 33 |     if n_ch == 1:
 34 |         kernel = kern * 4
 35 |         filter_shape = [1, 1, 5, 5]
 36 |     elif n_ch == 3:
 37 |         kernel = kern_3ch * 4
 38 |         filter_shape = [3, 3, 5, 5]
 39 |     else:
 40 |         raise NotImplementedError
 41 |     return conv2d_tr_half(img, kernel, filter_shape=filter_shape,
 42 |                           input_shape=out_shape, subsample=(2, 2))
 43 | 
 44 | 
 45 | def conv2d_tr_half(output, filters, filter_shape, input_shape,
 46 |                    subsample=(1, 1)):
 47 |     input = conv2d_grad_wrt_inputs(
 48 |         output, filters,
 49 |         input_shape=(None, filter_shape[0], input_shape[2], input_shape[3]),
 50 |         filter_shape=filter_shape, border_mode='half', subsample=subsample)
 51 |     return input
 52 | 
 53 | 
 54 | def lap_split(img, n_ch=1):
 55 |     '''Split the image into lo and hi frequency components'''
 56 |     lo = downsample_img(img, n_ch)
 57 |     lo2 = upsample_img(lo, img.shape, n_ch)
 58 |     hi = img - lo2
 59 |     return lo, hi
 60 | 
 61 | 
 62 | def gen_lpyr(img, n_level, n_ch=1):
 63 |     '''Build Laplacian pyramid with n_level splits'''
 64 |     l_pyr = []
 65 |     for i in range(n_level - 1):
 66 |         img, hi = lap_split(img, n_ch)
 67 |         l_pyr.append(hi)
 68 |     l_pyr.append(img)
 69 |     return l_pyr
 70 | 
 71 | 
 72 | def gen_gpyr(img, n_level, n_ch=1):
 73 |     """Generate a Gaussian pyramid."""
 74 |     g_pyr = []
 75 |     g_pyr.append(img)
 76 |     for idx in range(n_level - 1):
 77 |         g_pyr.append(downsample_img(g_pyr[idx], n_ch))
 78 |     return g_pyr
 79 | 
 80 | 
 81 | def merge_lpyr(l_pyr, n_ch=1):
 82 |     '''Merge Laplacian pyramid'''
 83 |     l_pyr = l_pyr[::-1]
 84 |     img = l_pyr[0]
 85 |     for hi in l_pyr[1:]:
 86 |         img = upsample_img(img, hi.shape, n_ch) + hi
 87 |     return img
 88 | 
 89 | 
 90 | def normalize_lowpass_subt(img, n_level, n_ch=1):
 91 |     '''Normalize image by subtracting the low-pass-filtered image'''
 92 |     # Downsample
 93 |     img_ = img
 94 |     pyr_sh = []
 95 |     for i in range(n_level - 1):
 96 |         pyr_sh.append(img_.shape)
 97 |         img_ = downsample_img(img_, n_ch)
 98 | 
 99 |     # Upsample
100 |     for i in range(n_level - 1):
101 |         img_ = upsample_img(img_, pyr_sh[n_level - 2 - i], n_ch)
102 |     return img - img_
103 | 
104 | 
105 | def get_hi_lo_lap(img, n_level, n_ch=1):
106 |     '''Normalize image by subtracting the low-pass-filtered image'''
107 |     # Downsample
108 |     img_ = img
109 |     pyr_sh = []
110 |     for i in range(n_level - 1):
111 |         pyr_sh.append(img_.shape)
112 |         img_ = downsample_img(img_, n_ch)
113 |     lo = img_
114 | 
115 |     # Upsample
116 |     for i in range(n_level - 1):
117 |         img_ = upsample_img(img_, pyr_sh[n_level - 2 - i], n_ch)
118 |     return img - img_, lo
119 | 
120 | 
121 | def get_lowfreq_upscale(l_pyr, n_ch=1):
122 |     n_level = len(l_pyr)
123 |     lf = l_pyr[-1]
124 | 
125 |     # Upsample
126 |     for i in range(n_level - 1):
127 |         lf = upsample_img(lf, l_pyr[n_level - 2 - i].shape, n_ch)
128 |     return lf
129 | 
130 | 
131 | # def downsample_img(img, n_ch=1):
132 | #     """Downsample an image by 2 by 2"""
133 | #     if n_ch == 1:
134 | #         output = conv2d(img, kern, filter_shape=(1, 1, 5, 5),
135 | #                         border_mode='half')
136 | #     elif n_ch > 1:
137 | #         conv_outs = []
138 | #         for ch in range(n_ch):
139 | #             cur_ch = img[:, ch, :, :].dimshuffle(0, 'x', 1, 2)
140 | #             conv_outs.append(conv2d(cur_ch, kern, filter_shape=(1, 1, 5, 5),
141 | #                                     border_mode='half'))
142 | #         output = T.concatenate(conv_outs, axis=1)
143 | #     else:
144 | #         raise NotImplementedError
145 | #     return output[:, :, ::2, ::2]
146 | 
147 | 
148 | # def upsample_img(img, out_shape, n_ch=1):
149 | #     """Upsample an image by 2 by 2"""
150 | #     img_up = img.repeat(2, axis=2).repeat(2, axis=3)
151 | #     if n_ch == 1:
152 | #         output = conv2d(img_up, kern, filter_shape=(1, 1, 5, 5),
153 | #                         border_mode='half')
154 | #     elif n_ch > 1:
155 | #         conv_outs = []
156 | #         for ch in range(n_ch):
157 | #             cur_ch = img_up[:, ch, :, :].dimshuffle(0, 'x', 1, 2)
158 | #             conv_outs.append(conv2d(cur_ch, kern, filter_shape=(1, 1, 5, 5),
159 | #                                     border_mode='half'))
160 | #         output = T.concatenate(conv_outs, axis=1)
161 | #     else:
162 | #         raise NotImplementedError
163 | #     return output[:, :, :out_shape[2], :out_shape[3]]
164 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/train_iqa.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import timeit
  5 | from importlib import import_module
  6 | 
  7 | import numpy as np
  8 | import theano
  9 | import theano.tensor as T
 10 | 
 11 | from .config_parser import config_parser, dump_config
 12 | from .data_load.data_loader_IQA import DataLoader
 13 | from .trainer import Trainer
 14 | 
 15 | 
 16 | def train_iqa(config_file, section, snap_path,
 17 |               output_path=None, snap_file=None, tr_te_file=None):
 18 |     """
 19 |     Imagewise training of an IQA model using both reference and
 20 |     distorted images.
 21 |     """
 22 |     db_config, model_config, train_config = config_parser(
 23 |         config_file, section)
 24 | 
 25 |     # Check snapshot file
 26 |     if snap_file is not None:
 27 |         assert os.path.isfile(snap_file), \
 28 |             'Not existing snap_file: %s' % snap_file
 29 | 
 30 |     # Initialize patch step
 31 |     init_patch_step(db_config, int(model_config.get('ign', 0)),
 32 |                     int(model_config.get('ign_scale', 1)))
 33 | 
 34 |     # Load data
 35 |     data_loader = DataLoader(db_config)
 36 |     train_data, test_data = data_loader.load_data_tr_te(tr_te_file)
 37 |     # train_data, test_data = data_loader.load_toy_data_tr_te()
 38 | 
 39 |     # Create model
 40 |     model = create_model(model_config,
 41 |                          train_data.patch_size, train_data.num_ch)
 42 |     if snap_file is not None:
 43 |         model.load(snap_file)
 44 | 
 45 |     # Create trainer
 46 |     trainer = Trainer(train_config, snap_path, output_path)
 47 | 
 48 |     # Store current configuration file
 49 |     dump_config(os.path.join(snap_path, 'config.yaml'),
 50 |                 db_config, model_config, train_config)
 51 | 
 52 |     ###########################################################################
 53 |     # Train the model
 54 |     epochs = train_config.get('epochs', 100)
 55 |     batch_size = train_config.get('batch_size', 4)
 56 | 
 57 |     score = run_iqa_iw(
 58 |         train_data, test_data, model, trainer, epochs, batch_size)
 59 |     print("Best SRCC: {:.3f}, PLCC: {:.3f} ({:d})".format(
 60 |         score[0], score[1], score[2]))
 61 | 
 62 | 
 63 | def run_iqa_iw(train_data, test_data, model, trainer, epochs, n_batch_imgs,
 64 |                x_c=None, x=None, mos_set=None, bat2img_idx_set=None,
 65 |                prefix2='iqa_'):
 66 |     """
 67 |     @type model: .models.model_basis.ModelBasis
 68 |     @type train_data: .data_load.dataset.Dataset
 69 |     @type test_data: .data_load.dataset.Dataset
 70 |     """
 71 |     te_n_batch_imgs = 1
 72 | 
 73 |     # Make dummy shared dataset
 74 |     max_num_patch = np.max(np.asarray(train_data.npat_img_list)[:, 0])
 75 |     n_pats_dummy = max_num_patch * n_batch_imgs
 76 |     sh = model.input_shape
 77 |     np_set_r = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32')
 78 |     np_set_d = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32')
 79 |     shared_set_r = theano.shared(np_set_r, borrow=True)
 80 |     shared_set_d = theano.shared(np_set_d, borrow=True)
 81 | 
 82 |     train_data.set_imagewise()
 83 |     test_data.set_imagewise()
 84 | 
 85 |     print('\nCompile theano function: Regress on MOS', end='')
 86 |     print(' (imagewise / low GPU memory)')
 87 |     start_time = timeit.default_timer()
 88 |     if x is None:
 89 |         x = T.ftensor4('x')
 90 |     if x_c is None:
 91 |         x_c = T.ftensor4('x_c')
 92 |     if mos_set is None:
 93 |         mos_set = T.vector('mos_set')
 94 |     if bat2img_idx_set is None:
 95 |         bat2img_idx_set = T.imatrix('bat2img_idx_set')
 96 | 
 97 |     print(' (Make training model)')
 98 |     model.set_training_mode(True)
 99 |     cost, updates, rec_train = model.cost_updates_iqa(
100 |         x, x_c, mos_set, n_batch_imgs, bat2img_idx_set)
101 |     outputs = [cost] + rec_train.get_function_outputs(train=True)
102 | 
103 |     train_model = theano.function(
104 |         [mos_set, bat2img_idx_set],
105 |         [output for output in outputs],
106 |         updates=updates,
107 |         givens={
108 |             x: shared_set_r,
109 |             x_c: shared_set_d
110 |         },
111 |         on_unused_input='warn'
112 |     )
113 | 
114 |     print(' (Make testing model)')
115 |     model.set_training_mode(False)
116 |     cost, rec_test = model.cost_iqa(
117 |         x, x_c, mos_set, te_n_batch_imgs, bat2img_idx_set=bat2img_idx_set)
118 |     outputs = [cost] + rec_test.get_function_outputs(train=False)
119 | 
120 |     test_model = theano.function(
121 |         [mos_set, bat2img_idx_set],
122 |         [output for output in outputs],
123 |         givens={
124 |             x: shared_set_r,
125 |             x_c: shared_set_d
126 |         },
127 |         on_unused_input='warn'
128 |     )
129 | 
130 |     minutes, seconds = divmod(timeit.default_timer() - start_time, 60)
131 |     print(' - Compilation took {:02.0f}:{:05.2f}'.format(minutes, seconds))
132 | 
133 |     def get_train_outputs():
134 |         res = train_data.next_batch(n_batch_imgs)
135 |         np_set_r[:res['n_data']] = res['ref_data']
136 |         np_set_d[:res['n_data']] = res['dis_data']
137 |         shared_set_r.set_value(np_set_r)
138 |         shared_set_d.set_value(np_set_d)
139 |         return train_model(res['score_set'], res['bat2img_idx_set'])
140 | 
141 |     def get_test_outputs():
142 |         res = test_data.next_batch(te_n_batch_imgs)
143 |         np_set_r[:res['n_data']] = res['ref_data']
144 |         np_set_d[:res['n_data']] = res['dis_data']
145 |         shared_set_r.set_value(np_set_r)
146 |         shared_set_d.set_value(np_set_d)
147 |         return test_model(res['score_set'], res['bat2img_idx_set'])
148 | 
149 |     # Main training routine
150 |     return trainer.training_routine(
151 |         model, get_train_outputs, rec_train, get_test_outputs, rec_test,
152 |         n_batch_imgs, te_n_batch_imgs, train_data, test_data,
153 |         epochs, prefix2, check_mos_corr=True)
154 | 
155 | 
156 | def init_patch_step(db_config, ign_border, ign_scale=8):
157 |     """
158 |     Initialize patch_step:
159 |     patch_step = patch_size - ign_border * ign_scale.
160 |     """
161 |     patch_size = db_config.get('patch_size', None)
162 |     patch_step = db_config.get('patch_step', None)
163 |     random_crops = int(db_config.get('random_crops', 0))
164 | 
165 |     if (patch_size is not None and patch_step is None and
166 |             random_crops == 0):
167 |         db_config['patch_step'] = (
168 |             patch_size[0] - ign_border * ign_scale,
169 |             patch_size[1] - ign_border * ign_scale)
170 |         print(' - Set patch_step according to patch_size and ign: (%d, %d)' % (
171 |             db_config['patch_step'][0], db_config['patch_step'][1]
172 |         ))
173 | 
174 | 
175 | def create_model(model_config, patch_size=None, num_ch=None):
176 |     """
177 |     Create a model using a model_config.
178 |     Set input_size and num_ch according to patch_size and num_ch.
179 |     """
180 |     model_module_name = model_config.get('model', None)
181 |     assert model_module_name is not None
182 |     model_module = import_module(model_module_name)
183 | 
184 |     # set input_size and num_ch according to dataset information
185 |     if patch_size is not None:
186 |         model_config['input_size'] = patch_size
187 |     if num_ch is not None:
188 |         model_config['num_ch'] = num_ch
189 | 
190 |     return model_module.Model(model_config)
191 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/config_parser.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import yaml
  5 | import sys
  6 | 
  7 | DEFAULT_CONFIG = os.path.join(
  8 |     os.path.dirname(__file__), 'default_config.yaml')
  9 | 
 10 | 
 11 | def config_parser(config_file, section=None, default_config_file=None):
 12 |     print('\nConfig: %s' % config_file, end='')
 13 |     if section is not None:
 14 |         print(' (Sec.: %s)' % section)
 15 |     else:
 16 |         print('')
 17 | 
 18 |     # load default config data
 19 |     if default_config_file is None:
 20 |         default_config_file = DEFAULT_CONFIG
 21 |     exist_default_config = os.path.isfile(default_config_file)
 22 | 
 23 |     if exist_default_config:
 24 |         if (sys.version_info > (3, 0)):
 25 |             # if Python 3
 26 |             with open(default_config_file, 'r', encoding='utf-8') as stream:
 27 |                 try:
 28 |                     d_config_data = yaml.load(stream)
 29 |                 except yaml.YAMLError as exc:
 30 |                     print(exc)
 31 |         else:
 32 |             # if Python 2
 33 |             with open(default_config_file, 'r') as stream:
 34 |                 try:
 35 |                     d_config_data = yaml.load(stream)
 36 |                 except yaml.YAMLError as exc:
 37 |                     print(exc)
 38 |         db_config = d_config_data['database']
 39 |         model_config = d_config_data['model']
 40 |         train_config = d_config_data['training']
 41 |     else:
 42 |         print(' @ Default config. file does not exist: %s' % (
 43 |             default_config_file))
 44 |         db_config = {}
 45 |         model_config = {}
 46 |         train_config = {}
 47 | 
 48 |     # load the current config data
 49 |     with open(config_file, 'r') as stream:
 50 |         try:
 51 |             config_data = yaml.load(stream)
 52 |         except yaml.YAMLError as exc:
 53 |             print(exc)
 54 | 
 55 |     use_common_config = False
 56 |     if 'common' in list(config_data.keys()):
 57 |         base_config_data = config_data['common']
 58 |         use_common_config = True
 59 | 
 60 |     if section is not None:
 61 |         if section in list(config_data.keys()):
 62 |             config_data = config_data[section]
 63 |         else:
 64 |             raise ValueError('No %s in %s' % (section, config_file))
 65 |     else:
 66 |         config_data = config_data
 67 | 
 68 |     # merge the current config into base config
 69 |     if use_common_config:
 70 |         if 'database' in list(base_config_data.keys()):
 71 |             overwrite_config(db_config, base_config_data['database'])
 72 |         if 'model' in list(base_config_data.keys()):
 73 |             overwrite_config(model_config, base_config_data['model'])
 74 |         if 'training' in list(base_config_data.keys()):
 75 |             overwrite_config(train_config, base_config_data['training'])
 76 | 
 77 |     if 'database' in list(config_data.keys()):
 78 |         overwrite_config(db_config, config_data['database'])
 79 |     if 'model' in list(config_data.keys()):
 80 |         overwrite_config(model_config, config_data['model'])
 81 |     if 'training' in list(config_data.keys()):
 82 |         overwrite_config(train_config, config_data['training'])
 83 | 
 84 |     check_subsection(db_config)
 85 | 
 86 |     # if db_config['num_subsection']:
 87 |     #     if db_config['train']['num_subsection']:
 88 |     #         copy_config(db_config['train'], db_config['train'][0])
 89 |     #         copy_config(db_config, db_config['train'][0])
 90 |     #     else:
 91 |     #         copy_config(db_config, db_config['train'])
 92 | 
 93 |     show_configs(db_config, model_config, train_config)
 94 | 
 95 |     return db_config, model_config, train_config
 96 | 
 97 | 
 98 | def dump_config(filename, db_config, model_config, train_config):
 99 |     cfg = {}
100 |     cfg['database'] = db_config.copy()
101 |     cfg['model'] = model_config.copy()
102 |     cfg['training'] = train_config.copy()
103 |     # check_child_list(cfg)
104 |     with open(filename, 'w') as yaml_file:
105 |         yaml.dump(cfg, yaml_file, default_flow_style=False)
106 | 
107 | 
108 | def check_child_list(parent_section):
109 |     subsections = []
110 |     # check if parent_section has subsections
111 |     for key, value in parent_section.items():
112 |         if isinstance(value, dict):
113 |             subsections.append(key)
114 | 
115 |     if len(subsections) > 0:
116 |         for subsection in subsections:
117 |             # overwrite the copied child_section with new information
118 |             check_child_list(parent_section[subsection])
119 | 
120 |     for key, value in parent_section.items():
121 |         if isinstance(value, list):
122 |             parent_section[key] = str(value)
123 | 
124 | 
125 | def check_subsection(parent_section):
126 |     subsections = []
127 |     # check if parent_section has subsections
128 |     for key, value in parent_section.items():
129 |         if isinstance(value, dict):
130 |             subsections.append(key)
131 | 
132 |     if len(subsections) > 0:
133 |         for subsection in subsections:
134 |             # copy parent_section to child_section
135 |             child_section = {}
136 |             for key, value in parent_section.items():
137 |                 if not isinstance(value, dict):
138 |                     child_section[key] = value
139 | 
140 |             # overwrite the copied child_section with new information
141 |             overwrite_config(child_section, parent_section[subsection])
142 | 
143 |             check_subsection(child_section)
144 |             parent_section[subsection] = child_section
145 | 
146 |         # remove keys in parent_section
147 |         for key in list(parent_section):
148 |             if key not in subsections:
149 |                 parent_section.pop(key)
150 | 
151 |         parent_section['num_subsection'] = len(subsections)
152 |     else:
153 |         parent_section['num_subsection'] = 0
154 | 
155 | 
156 | def overwrite_config(base_config, new_config):
157 |     for key, value in new_config.items():
158 |         base_config[key] = value
159 | 
160 | 
161 | def copy_config(base_config, new_config):
162 |     for key, value in new_config.items():
163 |         if key not in base_config:
164 |             base_config[key] = value
165 | 
166 | 
167 | def show_configs(db_config, model_config, train_config):
168 |     # if 'train' in db_config:
169 |     #     print('Train Dataset: %s' % db_config['train']['sel_data'])
170 |     #     print(' - Scenes:', db_config['train']['scenes'], end='')
171 |     #     print(' / dist_types:', db_config['train']['dist_types'])
172 |     #     print(' - Patch size:', db_config['train']['patch_size'], end='')
173 |     #     print(' / Patch step:', db_config['train']['patch_step'])
174 | 
175 |     #     print('Test Dataset: %s' % db_config['test']['sel_data'])
176 |     #     print(' - Scenes:', db_config['test']['scenes'], end='')
177 |     #     print(' / dist_types:', db_config['test']['dist_types'])
178 |     #     print(' - Patch size:', db_config['test']['patch_size'], end='')
179 |     #     print(' / Patch step:', db_config['test']['patch_step'])
180 |     # else:
181 |     #     print('Dataset: %s' % db_config['sel_data'])
182 |     #     print(' - Scenes:', db_config['scenes'], end='')
183 |     #     print(' / dist_types:', db_config['dist_types'])
184 |     #     print(' - Patch size:', db_config['patch_size'], end='')
185 |     #     print(' / Patch step:', db_config['patch_step'])
186 | 
187 |     print('Model: %s' % model_config['model'])
188 |     print(' - opt_scheme:', model_config['opt_scheme'], end='')
189 |     print(' / lr:', model_config['lr'])
190 |     strs = []
191 |     for key in list(model_config.keys()):
192 |         if key[:3] == 'wl_':
193 |             strs.append('%s: %s' % (key, model_config[key]))
194 |     if len(strs) > 0:
195 |         print(' - %s' % ', '.join(strs))
196 |     strs = []
197 |     for key in list(model_config.keys()):
198 |         if key[:3] == 'wr_':
199 |             strs.append('%s: %s' % (key, model_config[key]))
200 |     if len(strs) > 0:
201 |         print(' - %s' % ', '.join(strs))
202 | 
203 |     print('Training')
204 |     print(' - batch_size:', train_config['batch_size'], end='')
205 |     print(' / epochs:', train_config['epochs'], end='')
206 |     print(' / test_freq:', train_config['test_freq'], end='')
207 |     print(' / save_freq:', train_config['save_freq'])
208 |     print('')
209 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/test_iqa.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import timeit
  5 | from importlib import import_module
  6 | 
  7 | import numpy as np
  8 | import theano
  9 | import theano.tensor as T
 10 | 
 11 | from .config_parser import config_parser
 12 | from .data_load.data_loader_IQA import DataLoader
 13 | from .trainer import Trainer
 14 | 
 15 | 
 16 | def check_dist_list(testing_dist_list, db_config):
 17 |     if not isinstance(testing_dist_list, (list, tuple)):
 18 |         testing_dist_list = (testing_dist_list, )
 19 | 
 20 |     dist_list = []
 21 |     for testing_dist in testing_dist_list:
 22 |         if testing_dist == 'each':
 23 |             if db_config['sel_data'] == 'LIVE':
 24 |                 from .data_load import LIVE
 25 |                 dist_list += [[dist] for dist in LIVE.ALL_DIST_TYPES]
 26 |             elif db_config['sel_data'] == 'TID2008':
 27 |                 from .data_load import TID2008
 28 |                 dist_list += [[dist] for dist in TID2008.ALL_DIST_TYPES]
 29 |             elif db_config['sel_data'] == 'TID2013':
 30 |                 from .data_load import TID2013
 31 |                 dist_list += [[dist] for dist in TID2013.ALL_DIST_TYPES]
 32 |             elif db_config['sel_data'] == 'CSIQ':
 33 |                 from .data_load import CSIQ
 34 |                 dist_list += [[dist] for dist in CSIQ.ALL_DIST_TYPES]
 35 |             else:
 36 |                 raise NotImplementedError
 37 |         else:
 38 |             dist_list.append(testing_dist)
 39 |     return dist_list
 40 | 
 41 | 
 42 | def test_iqa(config_file, section, testing_dist_list=('each', 'all'),
 43 |              output_path=None, snap_file=None, load_keys=None,
 44 |              tr_te_file=None, use_ref_for_nr=True):
 45 |     db_config, model_config, train_config = config_parser(
 46 |         config_file, section)
 47 | 
 48 |     # Check snapshot file
 49 |     if snap_file is not None:
 50 |         assert os.path.isfile(snap_file), \
 51 |             'Not existing snap_file: %s' % snap_file
 52 | 
 53 |     testing_dist_list = check_dist_list(testing_dist_list, db_config)
 54 | 
 55 |     # Initialize patch step
 56 |     init_patch_step(db_config, int(model_config.get('ign', 0)),
 57 |                     int(model_config.get('ign_scale', 1)))
 58 | 
 59 |     x_c = T.ftensor4('x_c')
 60 |     x = T.ftensor4('x')
 61 |     mos_set = T.vector('mos_set')
 62 |     bat2img_idx_set = T.imatrix('bat2img_idx_set')
 63 | 
 64 |     batch_size = train_config.get('batch_size', 1)
 65 | 
 66 |     # Write log
 67 |     if not os.path.isdir(output_path):
 68 |         os.makedirs(output_path)
 69 |     with open(os.path.join(output_path, 'results.txt'), 'a') as f_log:
 70 |         data = 'Dist. type, SRCC, PLCC\n'
 71 |         f_log.write(data)
 72 | 
 73 |     # Test for each testing distortion set in testing_dist_list
 74 |     made_model = False
 75 |     for idx, testing_dist in enumerate(testing_dist_list):
 76 |         print('\n##### %d/%d #####' % (idx + 1, len(testing_dist_list)))
 77 |         prefix2 = 'dist_%d' % idx
 78 | 
 79 |         # Load data
 80 |         db_config['dist_types'] = testing_dist
 81 |         data_loader = DataLoader(db_config)
 82 |         _, test_data = data_loader.load_data_tr_te(tr_te_file)
 83 | 
 84 |         if not made_model:
 85 |             # Create model
 86 |             model = create_model(model_config,
 87 |                                  test_data.patch_size, test_data.num_ch)
 88 | 
 89 |             if load_keys is None:
 90 |                 model.load(snap_file)
 91 |             else:
 92 |                 model.load_load_keys(load_keys, snap_file)
 93 |                 # model.load_load_keys(['sens_map', 'reg_mos'], snap_file)
 94 |             made_model = True
 95 | 
 96 |         # Create trainer
 97 |         trainer = Trainer(train_config, output_path=output_path)
 98 | 
 99 |         score = run_iqa_iw(
100 |             test_data, model, trainer, batch_size,
101 |             x=x, x_c=x_c, mos_set=mos_set, bat2img_idx_set=bat2img_idx_set,
102 |             prefix2=prefix2)
103 | 
104 |         # Write log
105 |         with open(os.path.join(output_path, 'results.txt'), 'a') as f_log:
106 |             data = '{:s}, {:.4f}, {:.4f}\n'.format(
107 |                 str(testing_dist), score[0], score[1])
108 |             f_log.write(data)
109 | 
110 | 
111 | def run_iqa_iw(test_data, model, trainer,
112 |                n_batch_imgs, x=None, x_c=None, mos_set=None,
113 |                bat2img_idx_set=None, prefix2=''):
114 |     """
115 |     @type model: .models.model_basis.ModelBasis
116 |     @type test_data: .data_load.dataset.Dataset
117 |     """
118 |     # Make dummy shared dataset
119 |     max_num_patch = np.max(np.asarray(test_data.npat_img_list)[:, 0])
120 |     n_pats_dummy = max_num_patch * n_batch_imgs
121 |     sh = model.input_shape
122 |     np_set_r = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32')
123 |     np_set_d = np.zeros((n_pats_dummy, sh[2], sh[3], sh[1]), dtype='float32')
124 |     shared_set_r = theano.shared(np_set_r, borrow=True)
125 |     shared_set_d = theano.shared(np_set_d, borrow=True)
126 | 
127 |     test_data.set_imagewise()
128 | 
129 |     print('\nCompile theano function: IQA using reference images', end=' ')
130 |     print(' (imagewise / low GPU memory)')
131 |     start_time = timeit.default_timer()
132 |     if x is None:
133 |         x = T.ftensor4('x')
134 |     if x_c is None:
135 |         x_c = T.ftensor4('x_c')
136 |     if mos_set is None:
137 |         mos_set = T.vector('mos_set')
138 |     if bat2img_idx_set is None:
139 |         bat2img_idx_set = T.imatrix('bat2img_idx_set')
140 | 
141 |     print(' (Make testing model)')
142 |     model.set_training_mode(False)
143 |     cost, rec_test = model.cost_iqa(
144 |         x, x_c, mos_set, n_img=n_batch_imgs, bat2img_idx_set=bat2img_idx_set)
145 |     outputs = [cost] + rec_test.get_function_outputs(train=False)
146 | 
147 |     test_model = theano.function(
148 |         [mos_set, bat2img_idx_set],
149 |         [output for output in outputs],
150 |         givens={
151 |             x: shared_set_r,
152 |             x_c: shared_set_d
153 |         },
154 |         on_unused_input='warn'
155 |     )
156 | 
157 |     minutes, seconds = divmod(timeit.default_timer() - start_time, 60)
158 |     print(' - Compilation took {:02.0f}:{:05.2f}'.format(minutes, seconds))
159 | 
160 |     def get_test_outputs():
161 |         res = test_data.next_batch(n_batch_imgs)
162 |         np_set_r[:res['n_data']] = res['ref_data']
163 |         np_set_d[:res['n_data']] = res['dis_data']
164 |         shared_set_r.set_value(np_set_r)
165 |         shared_set_d.set_value(np_set_d)
166 |         return test_model(res['score_set'], res['bat2img_idx_set'])
167 | 
168 |     # Main testing routine
169 |     return trainer.testing_routine(
170 |         get_test_outputs, rec_test, n_batch_imgs, test_data,
171 |         prefix2, check_mos_corr=True)
172 | 
173 | 
174 | def init_patch_step(db_config, ign_border, ign_scale=8):
175 |     """
176 |     Initialize patch_step:
177 |     patch_step = patch_size - ign_border * ign_scale.
178 |     """
179 |     patch_size = db_config.get('patch_size', None)
180 |     patch_step = db_config.get('patch_step', None)
181 |     random_crops = int(db_config.get('random_crops', 0))
182 | 
183 |     if (patch_size is not None and patch_step is None and
184 |             random_crops == 0):
185 |         db_config['patch_step'] = (
186 |             patch_size[0] - ign_border * ign_scale,
187 |             patch_size[1] - ign_border * ign_scale)
188 |         print(' - Set patch_step according to patch_size and ign: (%d, %d)' % (
189 |             db_config['patch_step'][0], db_config['patch_step'][1]
190 |         ))
191 | 
192 | 
193 | def create_model(model_config, patch_size=None, num_ch=None):
194 |     """
195 |     Create a model using a model_config.
196 |     Set input_size and num_ch according to patch_size and num_ch.
197 |     """
198 |     model_module_name = model_config.get('model', None)
199 |     assert model_module_name is not None
200 |     model_module = import_module(model_module_name)
201 | 
202 |     # set input_size and num_ch according to dataset information
203 |     if patch_size is not None:
204 |         model_config['input_size'] = patch_size
205 |     if num_ch is not None:
206 |         model_config['num_ch'] = num_ch
207 | 
208 |     return model_module.Model(model_config)
209 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/models/FR_sens_1s.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | 
  5 | import numpy as np
  6 | import theano
  7 | import theano.tensor as T
  8 | from theano.tensor.nnet import conv2d
  9 | 
 10 | from ..laplacian_pyr import downsample_img, normalize_lowpass_subt
 11 | from ..layers import layers
 12 | from .model_basis import ModelBasis
 13 | from .model_record import Record
 14 | 
 15 | 
 16 | class Model(ModelBasis):
 17 |     def __init__(self, model_config, rng=None):
 18 |         super(Model, self).__init__(model_config, rng)
 19 |         self.set_configs(model_config)
 20 | 
 21 |         print('\nDeep FR-IQA simpler ver.1.0')
 22 |         print(' - Model file: %s' % (os.path.split(__file__)[1]))
 23 |         print(' - Ignore border: %d' % (self.ign))
 24 |         print(' - Loss weights: sens=%.2e' % (self.wl_subj))
 25 |         print(' - Regul. weights: L2=%.2e, TV=%.2e' % (
 26 |             self.wr_l2, self.wr_tv))
 27 | 
 28 |         self.init_model()
 29 | 
 30 |     def set_configs(self, model_config):
 31 |         self.set_opt_configs(model_config)
 32 |         self.wl_subj = float(model_config.get('wl_subj', 1e3))
 33 |         self.wr_l2 = float(model_config.get('wr_l2', 5e-3))
 34 |         self.wr_tv = float(model_config.get('wr_tv', 1e-2))
 35 |         self.ign = int(model_config.get('ign', 4))
 36 | 
 37 |     def init_model(self):
 38 |         print('\n - Sensitivity map encoder layers')
 39 |         key = 'sens_map'
 40 |         self.layers[key] = []
 41 | 
 42 |         self.layers[key].append(layers.ConvLayer(
 43 |             self.input_shape, 32, (3, 3), layers.lrelu, name=key + '/conv1'))
 44 | 
 45 |         self.layers[key].append(layers.ConvLayer(
 46 |             self.last_sh(key), 32, (3, 3), layers.lrelu, name=key + '/conv2',
 47 |             subsample=(2, 2)))
 48 | 
 49 |         #######################################################################
 50 |         self.layers[key].append(layers.ConvLayer(
 51 |             self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv3'))
 52 | 
 53 |         self.layers[key].append(layers.ConvLayer(
 54 |             self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv4',
 55 |             subsample=(2, 2)))
 56 | 
 57 |         self.layers[key].append(layers.ConvLayer(
 58 |             self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv5'))
 59 | 
 60 |         self.layers[key].append(layers.ConvLayer(
 61 |             self.last_sh(key), self.num_ch, (3, 3), T.nnet.relu,
 62 |             b=np.ones((self.num_ch,), dtype='float32'), name=key + '/conv6'))
 63 | 
 64 |         #######################################################################
 65 |         print('\n - Regression mos layers')
 66 |         key = 'reg_mos'
 67 |         self.layers[key] = []
 68 | 
 69 |         self.layers[key].append(layers.FCLayer(
 70 |             self.num_ch, 4, layers.lrelu, name=key + '/fc1'))
 71 | 
 72 |         self.layers[key].append(layers.FCLayer(
 73 |             self.last_sh(key), 1, T.nnet.relu, name=key + '/fc2'
 74 |         ))
 75 | 
 76 |         #######################################################################
 77 |         # Sobel filters
 78 |         sobel_y_val = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]],
 79 |                                dtype='float32').reshape((1, 1, 3, 3))
 80 |         self.sobel_y = theano.shared(sobel_y_val, borrow=True)
 81 | 
 82 |         sobel_x_val = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]],
 83 |                                dtype='float32').reshape((1, 1, 3, 3))
 84 |         self.sobel_x = theano.shared(sobel_x_val, borrow=True)
 85 | 
 86 |         #######################################################################
 87 | 
 88 |         super(Model, self).make_param_list()
 89 |         super(Model, self).show_num_params()
 90 | 
 91 |     def sobel(self, x, n_ch=1):
 92 |         """Apply Sobel operators and returns results in x and y directions"""
 93 |         if n_ch > 1:
 94 |             y_grads = []
 95 |             x_grads = []
 96 |             for ch in range(n_ch):
 97 |                 cur_in = x[:, ch, :, :].dimshuffle(0, 'x', 1, 2)
 98 |                 y_grads.append(conv2d(cur_in, self.sobel_y,
 99 |                                       filter_shape=(1, 1, 3, 3)))
100 |                 x_grads.append(conv2d(cur_in, self.sobel_x,
101 |                                       filter_shape=(1, 1, 3, 3)))
102 |             y_grad = T.concatenate(y_grads, axis=1)
103 |             x_grad = T.concatenate(x_grads, axis=1)
104 |         else:
105 |             y_grad = conv2d(x, self.sobel_y, filter_shape=(1, 1, 3, 3))
106 |             x_grad = conv2d(x, self.sobel_x, filter_shape=(1, 1, 3, 3))
107 |         return y_grad, x_grad
108 | 
109 |     def get_total_variation(self, x, beta=1.5):
110 |         """
111 |         Calculate total variation of the input.
112 |         Arguments
113 |             x: 4D tensor image. It must have 1 channel feauture
114 |         """
115 |         y_grad, x_grad = self.sobel(x, self.num_ch)
116 |         tv = T.mean((y_grad ** 2 + x_grad ** 2) ** (beta / 2))
117 |         return tv
118 | 
119 |     def log_diff_fn(self, in_a, in_b, eps=0.1):
120 |         diff = 255.0 * (in_a - in_b)
121 |         log_255_sq = np.float32(2 * np.log(255.0))
122 | 
123 |         val = log_255_sq - T.log(diff ** 2 + eps)
124 |         max_val = np.float32(log_255_sq - np.log(eps))
125 |         return val / max_val
126 | 
127 |     def power_diff_fn(self, in_a, in_b, power=0.2):
128 |         diff = 255.0 * (in_a - in_b)
129 | 
130 |         val = T.abs_(diff) ** power
131 |         max_val = np.float32(255.0 ** power)
132 |         return val / max_val
133 | 
134 |     def sens_map_fn(self, x_c):
135 |         output = self.get_key_layers_output(x_c, 'sens_map')
136 |         return output
137 | 
138 |     def regress_mos_fn(self, feat_vec):
139 |         return self.get_key_layers_output(feat_vec, 'reg_mos')
140 | 
141 |     def shave_border(self, feat_map):
142 |         if self.ign > 0:
143 |             return feat_map[:, :, self.ign:-self.ign, self.ign:-self.ign]
144 |         else:
145 |             return feat_map
146 | 
147 |     def cost_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None):
148 |         """Get cost: regression onto MOS using both ref. adn dis. images
149 |         """
150 |         records = Record()
151 |         # concatenate the image patches
152 |         if bat2img_idx_set:
153 |             # if dummy data with fixed size is given and current data is
154 |             # overwritten on dummy data with size of n_patches,
155 |             # pick current dataset with size of n_patches
156 |             n_patches = bat2img_idx_set[T.shape(bat2img_idx_set)[0] - 1][1]
157 |             x_set = x[:n_patches]
158 |             x_c_set = x_c[:n_patches]
159 |         else:
160 |             # if input is current data
161 |             x_set = x
162 |             x_c_set = x_c
163 | 
164 |         # Input image vectors to 4D tensors
165 |         x_im = self.image_vec_to_tensor(x_set)
166 |         x_c_im = self.image_vec_to_tensor(x_c_set)
167 |         x_c_norm = normalize_lowpass_subt(x_c_im, 3, self.num_ch)
168 | 
169 |         # Get error map
170 |         e = self.log_diff_fn(x_im, x_c_im, 1.0)
171 |         e_ds4 = downsample_img(downsample_img(e, self.num_ch), self.num_ch)
172 | 
173 |         # predict sensitivity map
174 |         sens_map = self.sens_map_fn(x_c_norm)
175 | 
176 |         # predict the score
177 |         pred_map = sens_map * e_ds4
178 |         pred_crop = self.shave_border(pred_map)
179 | 
180 |         # make feature vector
181 |         if bat2img_idx_set:
182 |             # if patch based
183 |             feat_vec_list = []
184 |             for idx in range(n_img):
185 |                 idx_from = bat2img_idx_set[idx][0]
186 |                 idx_to = bat2img_idx_set[idx][1]
187 | 
188 |                 # current predicted map
189 |                 c_pred_crop = pred_crop[idx_from: idx_to]
190 |                 pred_mean = T.mean(c_pred_crop, axis=(0, 2, 3), keepdims=True)
191 |                 feat_vec_list.append(pred_mean)
192 | 
193 |             feat_vec = T.concatenate(feat_vec_list, axis=0).flatten(2)
194 |             # feat_vec = T.stack(feat_vec_list)
195 |         else:
196 |             # if image based
197 |             feat_vec = T.mean(pred_crop, axis=(2, 3))
198 | 
199 |         # regress onto MOS
200 |         mos_p = self.regress_mos_fn(feat_vec).flatten()
201 | 
202 |         ######################################################################
203 |         # calculate MOS loss
204 |         subj_loss = self.get_mse(mos_p, mos)
205 | 
206 |         # L2 regularization
207 |         l2_reg = self.get_l2_regularization(
208 |             ['sens_map', 'reg_mos'], mode='sum')
209 | 
210 |         # TV norm regularization
211 |         tv = self.get_total_variation(sens_map, 3.0)
212 | 
213 |         # final cost
214 |         cost = self.add_all_weighted_losses(
215 |             [subj_loss, tv, l2_reg],
216 |             [self.wl_subj, self.wr_tv, self.wr_l2])
217 | 
218 |         # Data to record
219 |         records.add_data('subj', subj_loss * self.wl_subj)
220 |         records.add_data('tv', tv)
221 | 
222 |         records.add_im_data('mos_p', mos_p)
223 |         records.add_im_data('mos_gt', mos)
224 | 
225 |         records.add_imgs('x_c', x_c_im, caxis=[0, 1], scale=1.0)
226 |         # pyr_caxis = [-0.25, 0.25]
227 |         # records.add_imgs('x_c_mf', x_c_mf, caxis=pyr_caxis, scale=0.5)
228 |         # records.add_imgs('x_c_hf', x_c_hf, caxis=pyr_caxis, scale=1.0)
229 |         records.add_imgs('e_ds', e_ds4, caxis=[0, 1.0], scale=0.25)
230 |         records.add_imgs('sens_map', sens_map, caxis=[0, 1.5], scale=0.25)
231 |         records.add_imgs('pred_map', pred_map, caxis=[0, 1.5], scale=0.25)
232 | 
233 |         # records.add_kernel(self.layers['sens_map'], [0])
234 | 
235 |         return cost, records
236 | 
237 |     def cost_updates_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None):
238 |         cost, records = self.cost_iqa(
239 |             x, x_c, mos, n_img=n_img, bat2img_idx_set=bat2img_idx_set)
240 |         updates = self.get_updates_keys(
241 |             cost, ['sens_map', 'reg_mos'])
242 |         return cost, updates, records
243 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/models/FR_sens_1.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | 
  5 | import numpy as np
  6 | import theano
  7 | import theano.tensor as T
  8 | from theano.tensor.nnet import conv2d
  9 | 
 10 | from ..laplacian_pyr import downsample_img, normalize_lowpass_subt
 11 | from ..layers import layers
 12 | from .model_basis import ModelBasis
 13 | from .model_record import Record
 14 | 
 15 | 
 16 | class Model(ModelBasis):
 17 |     def __init__(self, model_config, rng=None):
 18 |         super(Model, self).__init__(model_config, rng)
 19 |         self.set_configs(model_config)
 20 | 
 21 |         print('\nDeep FR-IQA main ver.1.0')
 22 |         print(' - Model file: %s' % (os.path.split(__file__)[1]))
 23 |         print(' - Ignore border: %d' % (self.ign))
 24 |         print(' - Loss weights: sens=%.2e' % (self.wl_subj))
 25 |         print(' - Regul. weights: L2=%.2e, TV=%.2e' % (
 26 |             self.wr_l2, self.wr_tv))
 27 | 
 28 |         self.init_model()
 29 | 
 30 |     def set_configs(self, model_config):
 31 |         self.set_opt_configs(model_config)
 32 |         self.wl_subj = float(model_config.get('wl_subj', 1e3))
 33 |         self.wr_l2 = float(model_config.get('wr_l2', 5e-3))
 34 |         self.wr_tv = float(model_config.get('wr_tv', 1e-2))
 35 |         self.ign = int(model_config.get('ign', 4))
 36 | 
 37 |     def init_model(self):
 38 |         print('\n - Sensitivity map encoder layers')
 39 |         key = 'sens_map'
 40 |         self.layers[key] = []
 41 | 
 42 |         self.layers[key].append(layers.ConvLayer(
 43 |             self.input_shape, 32, (3, 3), layers.lrelu, name=key + '/conv1_1'))
 44 | 
 45 |         self.layers[key].append(layers.ConvLayer(
 46 |             self.last_sh(key), 32, (3, 3), layers.lrelu, name=key + '/conv2_1',
 47 |             subsample=(2, 2)))
 48 | 
 49 |         #######################################################################
 50 | 
 51 |         self.layers[key].append(layers.ConvLayer(
 52 |             self.input_shape, 32, (3, 3), layers.lrelu, name=key + '/conv1_2'))
 53 | 
 54 |         self.layers[key].append(layers.ConvLayer(
 55 |             self.last_sh(key), 32, (3, 3), layers.lrelu, name=key + '/conv2_2',
 56 |             subsample=(2, 2)))
 57 | 
 58 |         #######################################################################
 59 |         prev_sh = self.last_sh(key)
 60 |         concat_sh = (prev_sh[0], prev_sh[1] * 2) + prev_sh[2:]
 61 | 
 62 |         self.layers[key].append(layers.ConvLayer(
 63 |             concat_sh, 64, (3, 3), layers.lrelu, name=key + '/conv3'))
 64 | 
 65 |         self.layers[key].append(layers.ConvLayer(
 66 |             self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv4',
 67 |             subsample=(2, 2)))
 68 | 
 69 |         self.layers[key].append(layers.ConvLayer(
 70 |             self.last_sh(key), 64, (3, 3), layers.lrelu, name=key + '/conv5'))
 71 | 
 72 |         self.layers[key].append(layers.ConvLayer(
 73 |             self.last_sh(key), self.num_ch, (3, 3), T.nnet.relu,
 74 |             b=np.ones((self.num_ch,), dtype='float32'), name=key + '/conv6'))
 75 | 
 76 |         #######################################################################
 77 |         print('\n - Regression mos layers')
 78 |         key = 'reg_mos'
 79 |         self.layers[key] = []
 80 | 
 81 |         self.layers[key].append(layers.FCLayer(
 82 |             self.num_ch, 4, layers.lrelu, name=key + '/fc1'))
 83 | 
 84 |         self.layers[key].append(layers.FCLayer(
 85 |             self.last_sh(key), 1, T.nnet.relu, name=key + '/fc2'
 86 |         ))
 87 | 
 88 |         #######################################################################
 89 |         # Sobel filters
 90 |         sobel_y_val = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]],
 91 |                                dtype='float32').reshape((1, 1, 3, 3))
 92 |         self.sobel_y = theano.shared(sobel_y_val, borrow=True)
 93 | 
 94 |         sobel_x_val = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]],
 95 |                                dtype='float32').reshape((1, 1, 3, 3))
 96 |         self.sobel_x = theano.shared(sobel_x_val, borrow=True)
 97 | 
 98 |         #######################################################################
 99 | 
100 |         super(Model, self).make_param_list()
101 |         super(Model, self).show_num_params()
102 | 
103 |     def sobel(self, x, n_ch=1):
104 |         """Apply Sobel operators and returns results in x and y directions"""
105 |         if n_ch > 1:
106 |             y_grads = []
107 |             x_grads = []
108 |             for ch in range(n_ch):
109 |                 cur_in = x[:, ch, :, :].dimshuffle(0, 'x', 1, 2)
110 |                 y_grads.append(conv2d(cur_in, self.sobel_y,
111 |                                       filter_shape=(1, 1, 3, 3)))
112 |                 x_grads.append(conv2d(cur_in, self.sobel_x,
113 |                                       filter_shape=(1, 1, 3, 3)))
114 |             y_grad = T.concatenate(y_grads, axis=1)
115 |             x_grad = T.concatenate(x_grads, axis=1)
116 |         else:
117 |             y_grad = conv2d(x, self.sobel_y, filter_shape=(1, 1, 3, 3))
118 |             x_grad = conv2d(x, self.sobel_x, filter_shape=(1, 1, 3, 3))
119 |         return y_grad, x_grad
120 | 
121 |     def get_total_variation(self, x, beta=1.5):
122 |         """
123 |         Calculate total variation of the input.
124 |         Arguments
125 |             x: 4D tensor image. It must have 1 channel feauture
126 |         """
127 |         y_grad, x_grad = self.sobel(x, self.num_ch)
128 |         tv = T.mean((y_grad ** 2 + x_grad ** 2) ** (beta / 2))
129 |         return tv
130 | 
131 |     def log_diff_fn(self, in_a, in_b, eps=0.1):
132 |         diff = 255.0 * (in_a - in_b)
133 |         log_255_sq = np.float32(2 * np.log(255.0))
134 | 
135 |         val = log_255_sq - T.log(diff ** 2 + eps)
136 |         max_val = np.float32(log_255_sq - np.log(eps))
137 |         return val / max_val
138 | 
139 |     def power_diff_fn(self, in_a, in_b, power=0.2):
140 |         diff = 255.0 * (in_a - in_b)
141 | 
142 |         val = T.abs_(diff) ** power
143 |         max_val = np.float32(255.0 ** power)
144 |         return val / max_val
145 | 
146 |     def sens_map_fn(self, x_c, err):
147 |         layers = self.layers['sens_map']
148 |         # x_c
149 |         prev_out = layers[0].get_output(x_c)
150 |         x_c_out = layers[1].get_output(prev_out)
151 | 
152 |         # err
153 |         prev_out = layers[2].get_output(err)
154 |         err_out = layers[3].get_output(prev_out)
155 | 
156 |         prev_out = T.concatenate([x_c_out, err_out], axis=1)
157 |         for layer in layers[4:]:
158 |             prev_out = layer.get_output(prev_out)
159 |         return prev_out
160 | 
161 |     def regress_mos_fn(self, feat_vec):
162 |         return self.get_key_layers_output(feat_vec, 'reg_mos')
163 | 
164 |     def shave_border(self, feat_map):
165 |         if self.ign > 0:
166 |             return feat_map[:, :, self.ign:-self.ign, self.ign:-self.ign]
167 |         else:
168 |             return feat_map
169 | 
170 |     def cost_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None):
171 |         """
172 |         Get cost: regression onto MOS using both ref. adn dis. images
173 |         """
174 |         records = Record()
175 |         # concatenate the image patches
176 |         if bat2img_idx_set:
177 |             # if dummy data with fixed size is given and current data is
178 |             # overwritten on dummy data with size of n_patches,
179 |             # pick current dataset with size of n_patches
180 |             n_patches = bat2img_idx_set[T.shape(bat2img_idx_set)[0] - 1][1]
181 |             x_set = x[:n_patches]
182 |             x_c_set = x_c[:n_patches]
183 |         else:
184 |             # if input is current data
185 |             x_set = x
186 |             x_c_set = x_c
187 | 
188 |         # Input image vectors to 4D tensors
189 |         x_im = self.image_vec_to_tensor(x_set)
190 |         x_c_im = self.image_vec_to_tensor(x_c_set)
191 |         x_c_norm = normalize_lowpass_subt(x_c_im, 3, self.num_ch)
192 | 
193 |         # Get error map
194 |         e = self.log_diff_fn(x_im, x_c_im, 1.0)
195 |         e_ds4 = downsample_img(downsample_img(e, self.num_ch), self.num_ch)
196 | 
197 |         # predict sensitivity map
198 |         sens_map = self.sens_map_fn(x_c_norm, e)
199 | 
200 |         # predict the score
201 |         pred_map = sens_map * e_ds4
202 |         pred_crop = self.shave_border(pred_map)
203 | 
204 |         # make feature vector
205 |         if bat2img_idx_set:
206 |             # if patch based
207 |             feat_vec_list = []
208 |             for idx in range(n_img):
209 |                 idx_from = bat2img_idx_set[idx][0]
210 |                 idx_to = bat2img_idx_set[idx][1]
211 | 
212 |                 c_pred_crop = pred_crop[idx_from: idx_to]
213 |                 pred_mean = T.mean(c_pred_crop, axis=(0, 2, 3), keepdims=True)
214 |                 feat_vec_list.append(pred_mean)
215 | 
216 |             feat_vec = T.concatenate(feat_vec_list, axis=0).flatten(2)
217 |             # feat_vec = T.stack(feat_vec_list)
218 |         else:
219 |             # if image based
220 |             feat_vec = T.mean(pred_crop, axis=(2, 3))
221 | 
222 |         # regress onto MOS
223 |         mos_p = self.regress_mos_fn(feat_vec).flatten()
224 | 
225 |         ######################################################################
226 |         # MOS loss
227 |         subj_loss = self.get_mse(mos_p, mos)
228 | 
229 |         # L2 regularization
230 |         l2_reg = self.get_l2_regularization(
231 |             ['sens_map', 'reg_mos'], mode='sum')
232 | 
233 |         # TV norm regularization
234 |         tv = self.get_total_variation(sens_map, 3.0)
235 | 
236 |         # final cost
237 |         cost = self.add_all_weighted_losses(
238 |             [subj_loss, tv, l2_reg],
239 |             [self.wl_subj, self.wr_tv, self.wr_l2])
240 | 
241 |         # Data to record
242 |         records.add_data('subj', subj_loss * self.wl_subj)
243 |         records.add_data('tv', tv)
244 | 
245 |         records.add_im_data('mos_p', mos_p)
246 |         records.add_im_data('mos_gt', mos)
247 | 
248 |         records.add_imgs('x_c', x_c_im, caxis=[-0.5, 0.5], scale=1.0)
249 |         records.add_imgs('e_ds', e_ds4, caxis=[0, 1.0], scale=0.25)
250 |         records.add_imgs('sens_map', sens_map, caxis=[0, 1.5], scale=0.25)
251 |         records.add_imgs('pred_map', pred_map, caxis=[0, 1.5], scale=0.25)
252 | 
253 |         # records.add_kernel(self.layers['sens_map'], [0])
254 | 
255 |         return cost, records
256 | 
257 |     def cost_updates_iqa(self, x, x_c, mos, n_img=None, bat2img_idx_set=None):
258 |         cost, records = self.cost_iqa(
259 |             x, x_c, mos, n_img=n_img, bat2img_idx_set=bat2img_idx_set)
260 |         updates = self.get_updates_keys(
261 |             cost, ['sens_map', 'reg_mos'])
262 |         return cost, updates, records
263 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/optimizer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import numpy as np
  4 | import theano
  5 | import theano.tensor as T
  6 | 
  7 | 
  8 | class Optimizer(object):
  9 |     def __init__(self, lr_init=1e-3):
 10 |         self.lr = theano.shared(
 11 |             np.asarray(lr_init, dtype=theano.config.floatX), borrow=True)
 12 | 
 13 |     def get_updates_cost(self, cost, params, scheme='nadam', lr_factors=None):
 14 |         if scheme == 'adagrad':
 15 |             updates = self.get_updates_adagrad(cost, params)
 16 |         elif scheme == 'adadelta':
 17 |             updates = self.get_updates_adadelta(cost, params)
 18 |         elif scheme == 'rmsprop':
 19 |             updates = self.get_updates_rmsprop(cost, params)
 20 |         elif scheme == 'adam':
 21 |             updates = self.get_updates_adam(cost, params,
 22 |                                             lr_factors=lr_factors)
 23 |         elif scheme == 'nadam':
 24 |             updates = self.get_updates_nadam(cost, params,
 25 |                                              lr_factors=lr_factors)
 26 |         elif scheme == 'sgd':
 27 |             # updates = self.get_updates_sgd_momentum(cost, params)
 28 |             updates = self.get_updates_sgd_momentum(
 29 |                 cost, params, grad_clip=0.01)
 30 |         else:
 31 |             raise ValueError(
 32 |                 'Select the proper scheme (%s): ' % scheme,
 33 |                 'adagrad / adadelta / rmsprop / adam / nadam / sgd')
 34 | 
 35 |         return updates
 36 | 
 37 |     def get_updates_adagrad(self, cost, params, eps=1e-8):
 38 |         lr = self.lr
 39 |         print(' - Adagrad: lr = %.2e' % (lr.get_value(borrow=True)))
 40 | 
 41 |         grads = T.grad(cost, params)
 42 |         updates = []
 43 | 
 44 |         for p, g in zip(params, grads):
 45 |             value = p.get_value(borrow=True)
 46 |             accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
 47 |                                  broadcastable=p.broadcastable)
 48 |             accu_new = accu + g ** 2
 49 |             new_p = p - (lr * g / T.sqrt(accu_new + eps))
 50 | 
 51 |             updates.append((accu, accu_new))
 52 |             updates.append((p, new_p))
 53 | 
 54 |         return updates
 55 | 
 56 |     def get_updates_adadelta(self, cost, params, rho=0.95, eps=1e-6):
 57 |         lr = self.lr
 58 |         print(' - Adadelta: lr = %.2e' % (lr.get_value(borrow=True)))
 59 |         one = T.constant(1.)
 60 | 
 61 |         grads = T.grad(cost, params)
 62 |         updates = []
 63 | 
 64 |         for p, g in zip(params, grads):
 65 |             value = p.get_value(borrow=True)
 66 |             # accu: accumulate gradient magnitudes
 67 |             accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
 68 |                                  broadcastable=p.broadcastable)
 69 |             # delta_accu: accumulate update magnitudes (recursively!)
 70 |             delta_accu = theano.shared(
 71 |                 np.zeros(value.shape, dtype=value.dtype),
 72 |                 broadcastable=p.broadcastable)
 73 | 
 74 |             # update accu (as in rmsprop)
 75 |             accu_new = rho * accu + (one - rho) * g ** 2
 76 |             updates.append((accu, accu_new))
 77 | 
 78 |             # compute parameter update, using the 'old' delta_accu
 79 |             update = (g * T.sqrt(delta_accu + eps) /
 80 |                       T.sqrt(accu_new + eps))
 81 |             new_param = p - lr * update
 82 |             updates.append((p, new_param))
 83 | 
 84 |             # update delta_accu (as accu, but accumulating updates)
 85 |             delta_accu_new = rho * delta_accu + (one - rho) * update ** 2
 86 |             updates.append((delta_accu, delta_accu_new))
 87 | 
 88 |         return updates
 89 | 
 90 |     def get_updates_rmsprop(self, cost, params, rho=0.9, eps=1e-8):
 91 |         lr = self.lr
 92 |         print(' - RMSprop: lr = %.2e' % (lr.get_value(borrow=True)))
 93 |         one = T.constant(1.)
 94 | 
 95 |         grads = T.grad(cost=cost, wrt=params)
 96 | 
 97 |         updates = []
 98 |         for p, g in zip(params, grads):
 99 |             value = p.get_value(borrow=True)
100 |             accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
101 |                                  broadcastable=p.broadcastable)
102 |             accu_new = rho * accu + (one - rho) * g ** 2
103 |             gradient_scaling = T.sqrt(accu_new + eps)
104 |             g = g / gradient_scaling
105 | 
106 |             updates.append((accu, accu_new))
107 |             updates.append((p, p - lr * g))
108 | 
109 |         return updates
110 | 
111 |     def get_updates_adam(self, cost, params,
112 |                          beta1=0.9, beta2=0.999, epsilon=1e-8,
113 |                          lr_factors=None):
114 |         """
115 |         Adam optimizer.
116 | 
117 |         Parameters
118 |         ----------
119 |             lr: float >= 0. Learning rate.
120 |             beta1/beta2: floats, 0 < beta < 1. Generally close to 1.
121 |             epsilon: float >= 0.
122 | 
123 |         References
124 |         ----------
125 |         [1] Adam - A Method for Stochastic Optimization
126 |         [2] Lasage:
127 |             https://github.com/Lasagne/Lasagne/blob/master/lasagne/updates.py
128 |         """
129 |         lr = self.lr
130 |         print(' - Adam: lr = %.2e' % (lr.get_value(borrow=True)))
131 | 
132 |         one = T.constant(1.)
133 |         self.iterations = theano.shared(
134 |             np.asarray(0., dtype=theano.config.floatX), borrow=True)
135 | 
136 |         grads = T.grad(cost, params)
137 |         updates = [(self.iterations, self.iterations + 1)]
138 | 
139 |         t = self.iterations + 1.
140 |         lr_t = lr * (T.sqrt(one - beta2 ** t) / (one - beta1 ** t))
141 | 
142 |         if not lr_factors:
143 |             lr_factors = [1.0 for i in range(len(params))]
144 |         for p, g, factor in zip(params, grads, lr_factors):
145 |             p_val = p.get_value(borrow=True)
146 |             m = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype),
147 |                               broadcastable=p.broadcastable)
148 |             v = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype),
149 |                               broadcastable=p.broadcastable)
150 | 
151 |             m_t = (beta1 * m) + (one - beta1) * g
152 |             v_t = (beta2 * v) + (one - beta2) * g ** 2
153 |             p_t = p - lr_t * factor * m_t / (T.sqrt(v_t) + epsilon)
154 | 
155 |             updates.append((m, m_t))
156 |             updates.append((v, v_t))
157 |             updates.append((p, p_t))
158 | 
159 |         return updates
160 | 
161 |     def get_updates_nadam(self, cost, params,
162 |                           beta1=0.9, beta2=0.999,
163 |                           epsilon=1e-8, schedule_decay=0.004,
164 |                           lr_factors=None):
165 |         """
166 |         Nesterov Adam.
167 |         Keras implementation.
168 |         Much like Adam is essentially RMSprop with momentum,
169 |         Nadam is Adam RMSprop with Nesterov momentum.
170 | 
171 |         Parameters
172 |         ----------
173 |         lr: float >= 0.
174 |             Learning rate.
175 |         beta1: float
176 |         beta2: float
177 |             0 < beta < 1. Generally close to 1.
178 |         epsilon: float >= 0.
179 | 
180 |         References
181 |         ----------
182 |         [1] Nadam report - http://cs229.stanford.edu/proj2015/054_report.pdf
183 |         [2] On the importance of initialization and momentum in deep learning -
184 |             http://www.cs.toronto.edu/~fritz/absps/momentum.pdf
185 |         """
186 |         lr = self.lr
187 |         print(' - Nesterov Adam: lr = %.2e' % (lr.get_value(borrow=True)))
188 | 
189 |         one = T.constant(1.)
190 |         self.iterations = theano.shared(
191 |             np.asarray(0., dtype=theano.config.floatX), borrow=True)
192 |         self.m_schedule = theano.shared(
193 |             np.asarray(1., dtype=theano.config.floatX), borrow=True)
194 |         self.beta1 = theano.shared(
195 |             np.asarray(beta1, dtype=theano.config.floatX), borrow=True)
196 |         self.beta2 = theano.shared(
197 |             np.asarray(beta2, dtype=theano.config.floatX), borrow=True)
198 |         self.schedule_decay = schedule_decay
199 | 
200 |         grads = T.grad(cost, params)
201 |         updates = [(self.iterations, self.iterations + 1)]
202 | 
203 |         t = self.iterations + 1.
204 | 
205 |         # Due to the recommendations in [2], i.e. warming momentum schedule
206 |         momentum_cache_t = self.beta1 * (
207 |             one - 0.5 * (T.pow(0.96, t * self.schedule_decay)))
208 |         momentum_cache_t_1 = self.beta1 * (
209 |             one - 0.5 * (T.pow(0.96, (t + 1.) * self.schedule_decay)))
210 |         m_schedule_new = self.m_schedule * momentum_cache_t
211 |         m_schedule_next = (self.m_schedule * momentum_cache_t *
212 |                            momentum_cache_t_1)
213 |         updates.append((self.m_schedule, m_schedule_new))
214 | 
215 |         if not lr_factors:
216 |             lr_factors = [1.0 for i in range(len(params))]
217 |         for p, g, factor in zip(params, grads, lr_factors):
218 |             # print('@', p.name, '-', factor)
219 |             p_val = p.get_value(borrow=True)
220 |             m = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype),
221 |                               broadcastable=p.broadcastable)
222 |             v = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype),
223 |                               broadcastable=p.broadcastable)
224 | 
225 |             # the following equations given in [1]
226 |             g_prime = g / (one - m_schedule_new)
227 |             m_t = self.beta1 * m + (one - self.beta1) * g
228 |             m_t_prime = m_t / (one - m_schedule_next)
229 |             v_t = self.beta2 * v + (one - self.beta2) * g ** 2
230 |             v_t_prime = v_t / (one - T.pow(self.beta2, t))
231 |             m_t_bar = ((one - momentum_cache_t) * g_prime +
232 |                        momentum_cache_t_1 * m_t_prime)
233 | 
234 |             updates.append((m, m_t))
235 |             updates.append((v, v_t))
236 | 
237 |             p_t = p - self.lr * factor * m_t_bar / (T.sqrt(v_t_prime) + epsilon)
238 | 
239 |             updates.append((p, p_t))
240 |         return updates
241 | 
242 |     def get_updates_sgd_momentum(self, cost, params,
243 |                                  decay_mode=None, decay=0.,
244 |                                  momentum=0.9, nesterov=False,
245 |                                  grad_clip=None, constant_clip=True):
246 |         print(' - SGD: lr = %.2e' % (self.lr.get_value(borrow=True)), end='')
247 |         print(', decay = %.2f' % (decay), end='')
248 |         print(', momentum = %.2f' % (momentum), end='')
249 |         print(', nesterov =', nesterov, end='')
250 |         print(', grad_clip =', grad_clip)
251 | 
252 |         self.grad_clip = grad_clip
253 |         self.constant_clip = constant_clip
254 |         self.iterations = theano.shared(
255 |             np.asarray(0., dtype=theano.config.floatX), borrow=True)
256 | 
257 |         # lr = self.lr_float
258 |         lr = self.lr * (1.0 / (1.0 + decay * self.iterations))
259 |         # lr = self.lr * (decay ** T.floor(self.iterations / decay_step))
260 | 
261 |         updates = [(self.iterations, self.iterations + 1.)]
262 | 
263 |         # Get gradients and apply clipping
264 |         if self.grad_clip is None:
265 |             grads = T.grad(cost, params)
266 |         else:
267 |             assert self.grad_clip > 0
268 |             if self.constant_clip:
269 |                 # Constant clipping using theano.gradient.grad_clip
270 |                 clip = self.grad_clip
271 |                 grads = T.grad(
272 |                     theano.gradient.grad_clip(cost, -clip, clip),
273 |                     params)
274 |             else:
275 |                 # Adaptive clipping
276 |                 clip = self.grad_clip / lr
277 |                 grads_ = T.grad(cost, params)
278 |                 grads = [T.clip(g, -clip, clip) for g in grads_]
279 | 
280 |         for p, g in zip(params, grads):
281 |             # v_prev = theano.shared(p.get_value(borrow=True) * 0.)
282 |             p_val = p.get_value(borrow=True)
283 |             v_prev = theano.shared(np.zeros(p_val.shape, dtype=p_val.dtype),
284 |                                    broadcastable=p.broadcastable)
285 |             v = momentum * v_prev - lr * g
286 |             updates.append((v_prev, v))
287 | 
288 |             if nesterov:
289 |                 new_p = p + momentum * v - lr * g
290 |             else:
291 |                 new_p = p + v
292 | 
293 |             updates.append((p, new_p))
294 |         return updates
295 | 
296 |     def set_learning_rate(self, lr):
297 |         self.lr.set_value(np.asarray(lr, dtype=theano.config.floatX))
298 | 
299 |     def mult_learning_rate(self, factor=0.5):
300 |         new_lr = self.lr.get_value() * factor
301 |         self.lr.set_value(np.asarray(new_lr, dtype=theano.config.floatX))
302 |         print(' * change learning rate to %.2e' % (new_lr))
303 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/draw_graph.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import numpy as np
  4 | import matplotlib
  5 | import matplotlib.pyplot as plt
  6 | import re
  7 | import glob
  8 | import os
  9 | import fnmatch
 10 | import sys
 11 | 
 12 | 
 13 | def read_parse_log(log_file):
 14 |     # Read log file
 15 |     print('Load data: %s' % log_file)
 16 |     with open(log_file, 'r') as l_file:
 17 |         lines = l_file.readlines()
 18 | 
 19 |     # Find the last starting line
 20 |     last_start = -1
 21 |     for idx, line in enumerate(lines):
 22 |         match_date = re.search(r'(\d+/\d+/\d+)', line)
 23 |         if match_date:
 24 |             last_start = idx
 25 |     if last_start < 0:
 26 |         print("Not proper file: %s." % log_file)
 27 |         print("Starting line must contain date 00/00/00.")
 28 |         return
 29 | 
 30 |     # Get time and date
 31 |     match_time = re.search(r'(\d+:\d+:\d+)', lines[last_start])
 32 |     time_str = match_time.group() if match_time else ""
 33 |     match_date = re.search(r'(\d+/\d+/\d+)', lines[last_start])
 34 |     date_str = match_date.group() if match_date else ""
 35 | 
 36 |     # Get labels of data
 37 |     labels = lines[last_start + 1].replace(', ', ' ').split()
 38 |     labels = ["epoch"] + labels
 39 | 
 40 |     # Load data
 41 |     n_label = len(labels)
 42 |     n_data = len(lines) - last_start - 2
 43 |     data = np.zeros((n_data, n_label), dtype=float)
 44 |     for row in range(n_data):
 45 |         line_idx = row + last_start + 2
 46 |         cur = lines[line_idx].replace(', ', ' ').split()
 47 |         if len(cur) != n_label:
 48 |             print("Not proper file: %s." % log_file)
 49 |             print("Data dimension (in line %d)" % (line_idx + 1), end=' ')
 50 |             print("doesn't match to the number of labels.")
 51 |             return
 52 |         for col in range(n_label):
 53 |             data[row, col] = float(cur[col])
 54 | 
 55 |     return labels, data, time_str, date_str
 56 | 
 57 | 
 58 | def draw_log(log_file, out_img_file=None):
 59 |     """
 60 |     Read log_file and draw.
 61 |     """
 62 |     #---------------------------------------------------------
 63 |     # Read log file
 64 |     labels, data, time_str, date_str = read_parse_log(log_file)
 65 | 
 66 |     #---------------------------------------------------------
 67 |     # Get tile shape ~ sqrt(number of figures)
 68 |     n_figure = len(labels) - 1  # except for the first column (epoch)
 69 |     draw_cc = labels[-1] == 'PLCC' and labels[-2] == 'SRCC'
 70 |     if draw_cc:  # if draw_cc, the last figure contains both SRCC and PLCC
 71 |         n_figure = n_figure - 1
 72 |     tile_sh = int(np.ceil(np.sqrt(n_figure)))
 73 |     tile_sh = str(tile_sh) + str(tile_sh)
 74 | 
 75 |     #---------------------------------------------------------
 76 |     # Draw graph
 77 |     style_1 = 'b-'
 78 |     style_2 = 'r-.'
 79 | 
 80 |     matplotlib.rcParams.update({'font.size': 8})
 81 |     plt.figure()
 82 |     plt.suptitle(log_file + ' - ' + time_str + ' ' + date_str)
 83 |     for fig_idx in range(n_figure):
 84 |         if fig_idx == n_figure - 1 and draw_cc:
 85 |             # if draw_cc, the last figure contains both SRCC and PLCC
 86 |             break
 87 |         lab_idx = fig_idx + 1
 88 |         plt.subplot(tile_sh + str(fig_idx + 1))
 89 |         plt.plot(data[:, 0], data[:, lab_idx])
 90 |         plt.title(labels[lab_idx])
 91 |         plt.grid(True)
 92 |         plt.xlim(1, data[-1, 0])
 93 | 
 94 |     if draw_cc:
 95 |         plt.subplot(tile_sh + str(n_figure))
 96 |         plt.plot(data[:, 0], data[:, -1], style_1, label='PLCC')
 97 |         plt.plot(data[:, 0], data[:, -2], style_2, label='SRCC')
 98 |         plt.legend(loc=0)
 99 |         plt.title('CC')
100 |         plt.grid(True)
101 |         plt.xlim(1, data[-1, 0])
102 | 
103 |     plt.tight_layout()
104 |     plt.subplots_adjust(top=0.9)
105 |     if out_img_file:
106 |         print(' - Save to image: %s' % out_img_file)
107 |         plt.savefig(out_img_file, dpi=100)
108 | 
109 | 
110 | def draw_log_train_test(log_file_tr, log_file_te, out_img_file=None):
111 |     """
112 |     Read log_file_tr and log_file_te and draw.
113 |     """
114 |     #---------------------------------------------------------
115 |     # Read log files
116 |     labels_tr, data_tr, time_str, date_str = read_parse_log(log_file_tr)
117 |     labels_te, data_te, time_str_te, date_str_te = read_parse_log(log_file_te)
118 | 
119 |     assert time_str == time_str_te
120 |     assert date_str == date_str_te
121 |     assert (len(labels_tr) == len(labels_te) or
122 |             len(labels_tr) == len(labels_te) - 2)
123 |     for idx in range(len(labels_tr)):
124 |         assert labels_tr[idx] == labels_te[idx]
125 | 
126 |     #---------------------------------------------------------
127 |     # Get tile shape ~ sqrt(number of figures)
128 |     n_figure = len(labels_te) - 1  # except for the first column (epoch)
129 |     draw_cc = labels_te[-1] == 'PLCC' and labels_te[-2] == 'SRCC'
130 |     if draw_cc:  # if draw_cc, the last figure contains both SRCC and PLCC
131 |         n_figure = n_figure - 1
132 |     tile_sh = int(np.ceil(np.sqrt(n_figure)))
133 |     tile_sh = str(tile_sh) + str(tile_sh)
134 | 
135 |     #---------------------------------------------------------
136 |     # Draw graph
137 |     style_1 = 'b-'
138 |     style_2 = 'r-.'
139 | 
140 |     matplotlib.rcParams.update({'font.size': 8})
141 |     plt.figure()
142 |     plt.suptitle(log_file_tr + ' - ' + time_str + ' ' + date_str)
143 | 
144 |     for fig_idx in range(n_figure):
145 |         if fig_idx == n_figure - 1 and draw_cc:
146 |             # if draw_cc, the last figure contains both SRCC and PLCC
147 |             break
148 |         lab_idx = fig_idx + 1
149 |         plt.subplot(tile_sh + str(fig_idx + 1))
150 |         plt.plot(data_tr[:, 0], data_tr[:, lab_idx], style_1, label='train')
151 |         plt.plot(data_te[:, 0], data_te[:, lab_idx], style_2, label='test')
152 |         plt.legend(loc=0)
153 |         plt.title(labels_te[lab_idx])
154 |         plt.grid(True)
155 |         plt.xlim(1, data_te[-1, 0])
156 | 
157 |     if draw_cc:
158 |         plt.subplot(tile_sh + str(n_figure))
159 |         plt.plot(data_te[:, 0], data_te[:, -1], style_1, label='PLCC')
160 |         plt.plot(data_te[:, 0], data_te[:, -2], style_2, label='SRCC')
161 |         plt.legend(loc=0)
162 |         plt.title('CC')
163 |         plt.grid(True)
164 |         plt.xlim(1, data_te[-1, 0])
165 | 
166 |     plt.tight_layout()
167 |     plt.subplots_adjust(top=0.9)
168 |     if out_img_file:
169 |         print(' - Save to image: %s' % out_img_file)
170 |         plt.savefig(out_img_file, dpi=100)
171 | 
172 | 
173 | def draw_all_logs(root_path, keywords=['test'], show_figs=False):
174 |     log_file_list = []
175 |     if sys.version_info >= (3, 5):
176 |         for filename in glob.iglob(root_path + "**/*.txt", recursive=True):
177 |             if any(word in filename for word in keywords):
178 |                 log_file_list.append(filename)
179 |     else:
180 |         for root, dirnames, filenames in os.walk(root_path):
181 |             for filename in fnmatch.filter(filenames, '*.txt'):
182 |                 if any(word in filename for word in keywords):
183 |                     log_file_list.append(os.path.join(root, filename))
184 | 
185 |     pass_list = []
186 |     for log_file in log_file_list:
187 |         try:
188 |             head, tail = os.path.split(log_file)
189 |             prefix = head[len(root_path):]
190 |             prefix = prefix.replace('\\', '_').replace('/', '_')
191 |             draw_log(log_file,
192 |                      os.path.join(root_path, prefix + tail[:-4] + '.png'))
193 |         except:
194 |             pass_list.append(log_file)
195 |             continue
196 | 
197 |     if pass_list:
198 |         print(" @ Ignored file list:")
199 |         for name in pass_list:
200 |             print(" - %s\n" % name)
201 | 
202 |     if show_figs:
203 |         plt.show()
204 | 
205 | 
206 | def draw_all_logs_train_test(root_path, show_figs=False):
207 |     keywords = ['log_test']
208 |     tr_log_file_list = []
209 |     te_log_file_list = []
210 |     if sys.version_info >= (3, 5):
211 |         for filename in glob.iglob(root_path + "**/*.txt", recursive=True):
212 |             if any(word in filename for word in keywords):
213 |                 te_log_file_list.append(filename)
214 |                 tr_log_file_list.append(filename[:-9] + ".txt")
215 |     else:
216 |         for root, dirnames, filenames in os.walk(root_path):
217 |             for filename in fnmatch.filter(filenames, '*.txt'):
218 |                 if any(word in filename for word in keywords):
219 |                     name = os.path.join(root, filename)
220 |                     te_log_file_list.append(name)
221 |                     tr_log_file_list.append(name[:-9] + ".txt")
222 | 
223 |     # idx = 1
224 |     # for tr_log_file, te_log_file in zip(tr_log_file_list, te_log_file_list):
225 |     #     print("%3d: %s\n     %s" % (idx, tr_log_file, te_log_file))
226 |     #     idx += 1
227 | 
228 |     pass_list = []
229 |     for tr_log_file, te_log_file in zip(tr_log_file_list, te_log_file_list):
230 |         try:
231 |             head, tail = os.path.split(tr_log_file)
232 |             prefix = head[len(root_path):]
233 |             prefix = prefix.replace('\\', '_').replace('/', '_')
234 |             draw_log_train_test(
235 |                 tr_log_file, te_log_file,
236 |                 os.path.join(root_path, prefix + tail[:-4] + '.png'))
237 |         except:
238 |             pass_list.append(tr_log_file)
239 |             continue
240 | 
241 |     if pass_list:
242 |         print(" @ Ignored file list:")
243 |         for name in pass_list:
244 |             print(" - %s\n" % name)
245 | 
246 |     if show_figs:
247 |         plt.show()
248 | 
249 | # def draw_log(log_file, out_img_file=None):
250 | #     """
251 | #     Read log_file and draw.
252 | #     """
253 | #     #---------------------------------------------------------
254 | #     # Read log file
255 | #     labels, data, time_str, date_str = read_parse_log(log_file)
256 | 
257 | #     print('Load data: %s' % log_file)
258 | #     with open(log_file, 'r') as l_file:
259 | #         lines = l_file.readlines()
260 | 
261 | #     # Find the last starting line
262 | #     last_start = -1
263 | #     for idx, line in enumerate(lines):
264 | #         match_date = re.search(r'(\d+/\d+/\d+)', line)
265 | #         if match_date:
266 | #             last_start = idx
267 | #     if last_start < 0:
268 | #         print("Not proper file: %s." % log_file)
269 | #         print("Starting line must contain date 00/00/00.")
270 | #         return
271 | 
272 | #     # Get time and date
273 | #     match_time = re.search(r'(\d+:\d+:\d+)', lines[last_start])
274 | #     time_str = match_time.group() if match_time else ""
275 | #     match_date = re.search(r'(\d+/\d+/\d+)', lines[last_start])
276 | #     date_str = match_date.group() if match_date else ""
277 | #     gen_title = log_file + ' - ' + time_str + ' ' + date_str
278 | 
279 | #     # Get labels of data
280 | #     labels = lines[last_start + 1].replace(', ', ' ').split()
281 | #     labels = ["epoch"] + labels
282 | 
283 | #     # Load data
284 | #     n_label = len(labels)
285 | #     n_data = len(lines) - last_start - 2
286 | #     data = np.zeros((n_data, n_label), dtype=float)
287 | #     for row in range(n_data):
288 | #         line_idx = row + last_start + 2
289 | #         cur = lines[line_idx].replace(', ', ' ').split()
290 | #         if len(cur) != n_label:
291 | #             print("Not proper file: %s." % log_file)
292 | #             print("Data dimension (in line %d)" % (line_idx + 1), end=' ')
293 | #             print("doesn't match to the number of labels.")
294 | #             return
295 | #         for col in range(n_label):
296 | #             data[row, col] = float(cur[col])
297 | 
298 | #     #---------------------------------------------------------
299 | #     # Get tile shape ~ sqrt(number of figures)
300 | #     n_figure = n_label - 1
301 | #     draw_cc = labels[-1] == 'PLCC' and labels[-2] == 'SRCC'
302 | #     if draw_cc:  # if draw_cc, the last figure contains both SRCC and PLCC
303 | #         n_figure = n_figure - 1
304 | #     tile_sh = int(np.ceil(np.sqrt(n_figure)))
305 | #     tile_sh = str(tile_sh) + str(tile_sh)
306 | 
307 | #     #---------------------------------------------------------
308 | #     # Draw graph
309 | #     matplotlib.rcParams.update({'font.size': 8})
310 | #     plt.figure()
311 | #     plt.suptitle(gen_title)
312 | #     for fig_idx in range(n_figure):
313 | #         if fig_idx == n_figure - 1 and draw_cc:
314 | #             # if draw_cc, the last figure contains both SRCC and PLCC
315 | #             break
316 | #         plt.subplot(tile_sh + str(fig_idx + 1))
317 | #         lab_idx = fig_idx + 1
318 | #         plt.plot(data[:, 0], data[:, lab_idx])
319 | #         plt.title(labels[lab_idx])
320 | #         plt.grid(True)
321 | #         plt.xlim(1, data[-1, 0])
322 | 
323 | #     if draw_cc:
324 | #         plt.subplot(tile_sh + str(n_figure))
325 | #         plt.plot(data[:, 0], data[:, -1], 'b-x', label='PLCC')
326 | #         plt.plot(data[:, 0], data[:, -2], 'r-.', label='SRCC')
327 | #         plt.legend(loc=0)
328 | #         plt.title('CC')
329 | #         plt.grid(True)
330 | #         plt.xlim(1, data[-1, 0])
331 | 
332 | #     plt.tight_layout()
333 | #     plt.subplots_adjust(top=0.9)
334 | #     if out_img_file:
335 | #         print(' - Save to image: %s' % out_img_file)
336 | #         plt.savefig(out_img_file, dpi=100)
337 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/models/model_basis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import pickle
  4 | from collections import OrderedDict
  5 | 
  6 | import numpy as np
  7 | import theano.tensor as T
  8 | from functools import reduce
  9 | 
 10 | from .. import optimizer
 11 | from ..layers import layers
 12 | 
 13 | 
 14 | class ModelBasis(object):
 15 |     """
 16 |     Arguments
 17 |         model_config: model configuration dictionary
 18 | 
 19 |     Attributes of model_config
 20 |         input_size: input image size, (height, width).
 21 |         num_ch: number of input channels
 22 |         lr: initial learning rate
 23 |     """
 24 | 
 25 |     def __init__(self, model_config={}, rng=None):
 26 |         # Make input_shape
 27 |         self.input_size = tuple(model_config.get('input_size', None))
 28 |         assert len(self.input_size) == 2
 29 |         self.num_ch = model_config.get('num_ch', None)
 30 |         assert self.num_ch is not None
 31 |         self.input_shape = (None, self.num_ch) + self.input_size
 32 | 
 33 |         # Get optimizer
 34 |         self.opt = optimizer.Optimizer()
 35 |         self.set_opt_configs(model_config)
 36 | 
 37 |         # Initialize variables
 38 |         self.layers = OrderedDict()
 39 |         self.params = OrderedDict()
 40 | 
 41 |     def set_opt_configs(self, model_config=None, opt_scheme=None, lr=None):
 42 |         if model_config is None:
 43 |             assert lr is not None and opt_scheme is not None
 44 |         else:
 45 |             lr = float(model_config.get('lr', 1e-3))
 46 |             opt_scheme = model_config.get('opt_scheme', 'adam')
 47 |         self.lr = lr
 48 |         self.opt_scheme = opt_scheme
 49 |         self.opt.set_learning_rate(self.lr)
 50 | 
 51 |     ###########################################################################
 52 |     # Functions for cost calculation
 53 | 
 54 |     def get_l2_regularization(self, layer_keys=None, mode='sum',
 55 |                               attr_list=['W', 'gamma']):
 56 |         if layer_keys is None:
 57 |             layer_keys = list(self.layers.keys())
 58 |         l2 = []
 59 |         if mode == 'sum':
 60 |             for key in layer_keys:
 61 |                 for layer in self.layers[key]:
 62 |                     for attr in attr_list:
 63 |                         if hasattr(layer, attr):
 64 |                             l2.append(T.sum(getattr(layer, attr) ** 2))
 65 |             return T.sum(l2)
 66 |         elif mode == 'mean':
 67 |             for key in layer_keys:
 68 |                 for layer in self.layers[key]:
 69 |                     for attr in attr_list:
 70 |                         if hasattr(layer, attr):
 71 |                             l2.append(T.sum(getattr(layer, attr) ** 2))
 72 |             return T.mean(l2)
 73 |         else:
 74 |             raise NotImplementedError
 75 | 
 76 |     def get_mse(self, x, y, return_map=False):
 77 |         if return_map:
 78 |             return (x - y) ** 2
 79 |         else:
 80 |             # return T.mean(((x - y) ** 2).flatten(2), axis=1)
 81 |             return T.mean((x - y) ** 2)
 82 | 
 83 |     def add_all_weighted_losses(self, losses, weights):
 84 |         """Add the losses with the weights multiplied.
 85 |         If the weight is 0, the corresponding loss is ignored.
 86 |         """
 87 |         assert len(losses) == len(weights)
 88 |         loss_list = []
 89 |         for loss, weight in zip(losses, weights):
 90 |             if weight != 0:
 91 |                 loss_list.append(weight * loss)
 92 |         return reduce(lambda x, y: x + y, loss_list)
 93 | 
 94 |     ###########################################################################
 95 |     # Functions to help build layers
 96 | 
 97 |     def last_sh(self, key, nth=-1):
 98 |         """Get the `nth` output shape in the `key` layers
 99 |         """
100 |         assert len(self.layers[key]) > 0, "No layers in the key: %s" % key
101 |         idx = len(self.layers[key]) + nth if nth < 0 else nth
102 |         out_sh = None
103 |         while out_sh is None:
104 |             if idx < 0:
105 |                 out_sh = self.input_shape
106 |             out_sh = self.layers[key][idx].get_out_shape()
107 |             idx = idx - 1
108 |         return out_sh
109 | 
110 |     def get_concat_shape(self, key0, key1):
111 |         """Get the concatenated shape of the outputs of
112 |         `key0` and `key1` layers
113 |         """
114 |         prev_sh0 = self.last_sh(key0)
115 |         prev_sh1 = self.last_sh(key1)
116 |         if isinstance(prev_sh0, (list, tuple)):
117 |             assert prev_sh0[0] == prev_sh1[0]
118 |             assert prev_sh0[2:] == prev_sh1[2:]
119 |             return (prev_sh0[0], prev_sh0[1] + prev_sh1[1]) + prev_sh0[2:]
120 |         else:
121 |             return prev_sh0 + prev_sh1
122 | 
123 |     def image_vec_to_tensor(self, input):
124 |         """Reshape input into 4D tensor.
125 |         """
126 |         # im_sh = (-1, self.input_size[0],
127 |         #          self.input_size[1], self.num_ch)
128 |         # return input.reshape(im_sh).dimshuffle(0, 3, 1, 2)
129 |         return input.dimshuffle(0, 3, 1, 2)
130 | 
131 | 
132 |     ###########################################################################
133 | 
134 |     def get_key_layers_output(self, input, key, var_shape=False):
135 |         """Put `input` to the `key` layers and return the final output.
136 |         """
137 |         prev_out = input
138 |         for layer in self.layers[key]:
139 |             prev_out = layer.get_output(prev_out, var_shape=var_shape)
140 |         return prev_out
141 | 
142 |     def get_updates(self, cost, wrt_params):
143 |         return self.opt.get_updates_cost(cost, wrt_params, self.opt_scheme)
144 | 
145 |     def get_updates_keys(self, cost, keys=[], params=[],
146 |                          params_lr_factors=None):
147 |         wrt_params = []
148 |         for key in keys:
149 |             wrt_params += self.params[key]
150 |         if params:
151 |             wrt_params += params
152 | 
153 |         lr_factors = None
154 |         if params_lr_factors:
155 |             lr_factors = []
156 |             for key in keys:
157 |                 lr_factors += params_lr_factors[key]
158 |             assert len(wrt_params) == len(lr_factors)
159 | 
160 |             # remove factors of 0
161 |             new_wrt_params = []
162 |             new_lr_factors = []
163 |             for idx in range(len(wrt_params)):
164 |                 if lr_factors[idx] > 0.0:
165 |                     new_wrt_params.append(wrt_params[idx])
166 |                     new_lr_factors.append(lr_factors[idx])
167 |             wrt_params = new_wrt_params
168 |             lr_factors = new_lr_factors
169 | 
170 |         print(' - Update w.r.t.: %s' % ', '.join(keys))
171 |         return self.opt.get_updates_cost(cost, wrt_params, self.opt_scheme,
172 |                                          lr_factors)
173 | 
174 |     ###########################################################################
175 |     # Functions to control batch normalization and dropout layers
176 | 
177 |     def get_batch_norm_layers(self, keys=[]):
178 |         # For the first call, generate bn_layers
179 |         if not hasattr(self, 'bn_layers'):
180 |             self.bn_layers = {}
181 |             for key in list(self.layers.keys()):
182 |                 self.bn_layers[key] = []
183 |                 for layer in self.layers[key]:
184 |                     if layer.__class__.__name__ == 'BatchNormLayer':
185 |                         self.bn_layers[key].append(layer)
186 | 
187 |         layers = []
188 |         for key in keys:
189 |             layers += self.bn_layers[key]
190 |         return layers
191 | 
192 |     def set_batch_norm_update_averages(self, update_averages, keys=[]):
193 |         # if update_averages:
194 |         #     print(' - Batch norm: update the stored averages')
195 |         # else:
196 |         #     print(' - Batch norm: not update the stored averages')
197 |         layers = self.get_batch_norm_layers(keys)
198 |         for layer in layers:
199 |             layer.update_averages = update_averages
200 | 
201 |     def set_batch_norm_training(self, training, keys=[]):
202 |         # if training:
203 |         #     print(' - Batch norm: use mini-batch statistics')
204 |         # else:
205 |         #     print(' - Batch norm: use the stored statistics')
206 |         layers = self.get_batch_norm_layers(keys)
207 |         for layer in layers:
208 |             layer.deterministic = not training
209 | 
210 |     def set_dropout_on(self, training):
211 |         layers.DropoutLayer.set_dropout_training(training)
212 | 
213 |     def set_training_mode(self, training):
214 |         """Decide the behavior of batch normalization and dropout.
215 |         Parameters
216 |         ----------
217 |         training: boolean
218 |             if True, training mode / False: testing mode.
219 |         """
220 |         # Decide behaviors of the model during training
221 |         # Batch normalization
222 |         l_keys = [key for key in list(self.layers.keys())]
223 |         self.set_batch_norm_update_averages(training, l_keys)
224 |         self.set_batch_norm_training(training, l_keys)
225 | 
226 |         # Dropout
227 |         self.set_dropout_on(training)
228 | 
229 |     ###########################################################################
230 |     # Functions to help deal with parameters of the model
231 | 
232 |     def make_param_list(self):
233 |         """collect all the parameters from `self.layers`
234 |         """
235 |         self.params, self.bn_layers = {}, {}
236 | 
237 |         for key in list(self.layers.keys()):
238 |             self.params[key] = []
239 |             self.bn_layers[key] = []
240 |             for layer in self.layers[key]:
241 |                 if layer.get_params():
242 |                     self.params[key] += layer.get_params()
243 |                 if layer.__class__.__name__ == 'BatchNormLayer':
244 |                     self.bn_layers[key].append(layer)
245 | 
246 |     def get_lr_factors_of_params(self, lr_factors_dict):
247 |         """collect all the parameters from `self.layers`
248 |         """
249 |         params_lr_factors = {}
250 |         for key in list(self.layers.keys()):
251 |             params_lr_factors[key] = []
252 |             for layer in self.layers[key]:
253 |                 for p in layer.get_params():
254 |                     params_lr_factors[key].append(
255 |                         lr_factors_dict.get(layer.name, 1.0))
256 |         return params_lr_factors
257 | 
258 |     def show_num_params(self):
259 |         """Dislay the number of parameters for each layer_key.
260 |         """
261 |         paramscnt = {}
262 |         for key in list(self.layers.keys()):
263 |             paramscnt[key] = 0
264 |             for p in self.params[key]:
265 |                 paramscnt[key] += np.prod(p.get_value(borrow=True).shape)
266 |             if paramscnt[key] > 0:
267 |                 print(' - Num params %s:' % key, '{:,}'.format(paramscnt[key]))
268 | 
269 |     def get_params(self, layer_keys=None):
270 |         """Get concatenated parameter list
271 |         from layers belonging to layer_keys"""
272 |         if layer_keys is None:
273 |             layer_keys = list(self.layers.keys())
274 | 
275 |         params = []
276 |         bn_mean_std = []
277 |         for key in layer_keys:
278 |             params += self.params[key]
279 | 
280 |         for key in layer_keys:
281 |             for layer in self.bn_layers[key]:
282 |                 bn_mean_std += layer.statistics
283 |         params += bn_mean_std
284 |         return params
285 | 
286 |     def save(self, filename):
287 |         """Save parameters to file.
288 |         """
289 |         params = self.get_params()
290 |         with open(filename, 'wb') as f:
291 |             pickle.dump(params, f, protocol=2)
292 |             # pickle.dump(params, f, protocol=pickle.HIGHEST_PROTOCOL)
293 |         print(' = Save params: %s' % (filename))
294 | 
295 |     def load(self, filename):
296 |         """Load parameters from file.
297 |         """
298 |         params = self.get_params()
299 |         with open(filename, 'rb') as f:
300 |             newparams = pickle.load(f)
301 | 
302 |         assert len(newparams) == len(params)
303 |         for p, new_p in zip(params, newparams):
304 |             if p.name != new_p.name:
305 |                 print((' @ WARNING: Different name - (loaded) %s != %s'
306 |                       % (new_p.name, p.name)))
307 |             new_p_sh = new_p.get_value(borrow=True).shape
308 |             p_sh = p.get_value(borrow=True).shape
309 |             if p_sh != new_p_sh:
310 |                 # print(new_p.name, p_sh, new_p_sh)
311 |                 print(' @ WARNING: Different shape %s - (loaded)' % new_p.name,
312 |                       new_p_sh, end='')
313 |                 print(' !=', p_sh)
314 |                 continue
315 |             p.set_value(new_p.get_value())
316 |         print(' = Load all params: %s ' % (filename))
317 | 
318 |     def load_params_keys(self, layer_keys, filename):
319 |         """Load the selected parameters from file.
320 |         Parameters from layers belong to layer_keys.
321 |         """
322 |         print(' = Load params: %s (keys = %s)' % (
323 |             filename, ', '.join(layer_keys)))
324 |         to_params = self.get_params(layer_keys)
325 |         with open(filename, 'rb') as f:
326 |             from_params = pickle.load(f)
327 | 
328 |         # Copy the params having same shape and name
329 |         copied_idx = []
330 |         for fidx, f_param in enumerate(from_params):
331 |             f_val = f_param.get_value(borrow=True)
332 |             for tidx, t_param in enumerate(to_params):
333 |                 t_val = t_param.get_value(borrow=True)
334 |                 if f_val.shape == t_val.shape and f_param.name == t_param.name:
335 |                     t_param.set_value(f_val)
336 |                     del to_params[tidx]
337 |                     copied_idx.append(fidx)
338 |                     break
339 |         # print(' = Copied from_param: ', [
340 |         #     from_params[idx] for idx in copied_idx])
341 |         if to_params:
342 |             print(' = Not existing to_param: ', to_params)
343 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/data_load/dataset.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class Dataset(object):
  5 |     """
  6 |     Dataset class containing images, scores, and supplementary information
  7 |     for image quality assessment.
  8 | 
  9 |     Attributes
 10 |     ----------
 11 |     dis_data: 4D numpy array
 12 |         distorted image patches
 13 |     ref_data: 4D numpy array (optional)
 14 |         reference image patches
 15 |     dis2ref_idx: 1D numpy array (optional)
 16 |         index to ref. patcehs of dis. patches
 17 |     loc_data: 4D numpy array (optional)
 18 |         local quality scores
 19 |     score_data: 2D numpy array
 20 |         subjective score list
 21 |     npat_img_list: 2D numpy array
 22 |         number of patches of each image
 23 |     pat2img_idx_list: 2D numpy array
 24 |         start and end indices list of images
 25 |     """
 26 |     def __init__(self):
 27 |         # Data
 28 |         self.dis_data = None  # distorted image patches
 29 |         self.ref_data = None  # reference image patches
 30 |         self.dis2ref_idx = None  # index to ref. patcehs of dis. patches
 31 |         self.loc_data = None  # local scoes
 32 |         self.score_data = None  # subjective score list
 33 |         self.n_patches = 0
 34 | 
 35 |         # Data for image-wise training
 36 |         self.npat_img_list = None  # number of patches of each image
 37 |         self.pat2img_idx_list = None  # start and end indices of each image
 38 |         self.filt_idx_list = None  # filtered indices list of each image
 39 |         self.n_images = 0
 40 | 
 41 |         # Configurations
 42 |         self.shuffle = False
 43 |         self.imagewise = False
 44 | 
 45 |         self.exist_ref = False
 46 |         self.exist_loc = False
 47 |         self.exist_score = False
 48 |         self.exist_npat = False
 49 |         self.exist_filt_idx = False
 50 | 
 51 |         # Variables
 52 |         self.epochs_completed = 0
 53 |         self.index_in_epoch = 0
 54 |         self.rand_imidx_list = None
 55 | 
 56 |         # Data configuration
 57 |         self.num_ch = None
 58 |         self.patch_size = None
 59 |         self.patch_step = None
 60 |         self.random_crops = None
 61 |         self.loc_size = None
 62 | 
 63 |     def set_patch_config(self, patch_step=None, random_crops=None):
 64 |         assert patch_step is not None and random_crops is not None
 65 |         if patch_step is not None:
 66 |             assert isinstance(patch_step, (list, tuple))
 67 |         self.patch_step = patch_step
 68 |         self.random_crops = random_crops
 69 | 
 70 |     def put_data(self, dis_data, ref_data=[],
 71 |                  dis2ref_idx=None, loc_data=[],
 72 |                  score_data=None, npat_img_list=None, filt_idx_list=None,
 73 |                  imagewise=True, shuffle=False):
 74 |         """Construct a Dataset.
 75 | 
 76 |         Parameters
 77 |         ----------
 78 |         dis_data: 4D numpy array
 79 |             distorted image patches
 80 |         ref_data: 4D numpy array (optional)
 81 |             reference image patches
 82 |         dis2ref_idx: 1D numpy array (optional)
 83 |             index to ref. patcehs of dis. patches
 84 |         loc_data: 4D numpy array (optional)
 85 |             local quality scores
 86 |         npat_img_list: list
 87 |             number of patches of each image
 88 |         score_data: 1D numpy array
 89 |             subjective score of each image or patch
 90 |         imagewise: boolean
 91 |             if True, next_batch returns the grouped image patches
 92 |             using pat2img_idx_list
 93 |         shuffle: boolean
 94 |             if True, shuffle the dataset
 95 |         """
 96 |         # dis_data
 97 |         self.dis_data = dis_data
 98 |         if isinstance(self.dis_data, list):
 99 |             self.dis_data = np.asarray(self.dis_data, 'float32')
100 |         if len(self.dis_data[0].shape) < 3:
101 |             self.dis_data = np.expand_dims(self.dis_data, 3)
102 | 
103 |         self.n_patches = self.dis_data.shape[0]
104 |         self.patch_size = (
105 |             self.dis_data.shape[1], self.dis_data.shape[2])
106 |         self.num_ch = self.dis_data.shape[3]
107 | 
108 |         # ref_data
109 |         if ref_data:
110 |             self.exist_ref = True
111 |             self.ref_data = ref_data
112 |             if isinstance(self.ref_data, list):
113 |                 self.ref_data = np.asarray(self.ref_data, 'float32')
114 |             assert len(self.dis_data[0].shape) == len(self.ref_data[0].shape)
115 |             if len(self.ref_data[0].shape) < 3:
116 |                 self.ref_data = np.expand_dims(self.ref_data, 3)
117 | 
118 |             assert dis2ref_idx is not None
119 |             self.dis2ref_idx = np.asarray(dis2ref_idx, 'int32')
120 |         else:
121 |             self.exist_ref = False
122 | 
123 |         # loc_data
124 |         if loc_data:
125 |             self.exist_loc = True
126 |             self.loc_data = loc_data
127 |             if isinstance(self.loc_data, list):
128 |                 self.loc_data = np.asarray(self.loc_data, 'float32')
129 |             if len(self.loc_data[0].shape) < 3:
130 |                 self.loc_data = np.expand_dims(self.loc_data, 3)
131 | 
132 |             self.loc_size = (
133 |                 self.loc_data.shape[1], self.loc_data.shape[2])
134 |         else:
135 |             self.exist_loc = False
136 | 
137 |         # score_data
138 |         if score_data is not None:
139 |             self.exist_score = True
140 |             self.score_data_org = np.asarray(score_data, 'float32')
141 |             self.score_data = self.score_data_org.copy()
142 |             self.n_images = self.score_data.shape[0]
143 |         else:
144 |             self.exist_score = False
145 | 
146 |         self.imagewise = imagewise
147 | 
148 |         # npat_img_list
149 |         if npat_img_list is not None:
150 |             self.exist_npat = True
151 |             self.npat_img_list = np.asarray(npat_img_list, 'int32')
152 |             if self.n_images == 0:
153 |                 self.n_images = self.npat_img_list.shape[0]
154 |             self.pat2img_idx_list = self.gen_pat2img_idx_list()
155 | 
156 |             if not self.imagewise and self.exist_score:
157 |                 self.score_data = self.gen_patchwise_scores()
158 |         else:
159 |             self.exist_npat = False
160 | 
161 |         if self.n_images == 0:
162 |             self.n_images = self.n_patches
163 | 
164 |         # filt_idx_list
165 |         if filt_idx_list is not None:
166 |             self.exist_filt_idx = True
167 |             self.filt_idx_list = filt_idx_list
168 |         else:
169 |             self.exist_filt_idx = False
170 | 
171 |         self.n_data = self.n_images if self.imagewise else self.n_patches
172 | 
173 |         self.shuffle = shuffle
174 |         if self.shuffle:
175 |             self.rand_imidx_list = np.random.permutation(self.n_data)
176 |         else:
177 |             self.rand_imidx_list = np.arange(self.n_data)
178 | 
179 |         self.validate_datasize()
180 | 
181 |     def set_imagewise(self):
182 |         """Set this Dataset for imagewise training and testing
183 |         """
184 |         if self.imagewise is False:
185 |             self.imagewise = True
186 |             self.n_data = self.n_images
187 |             if self.exist_score and self.exist_npat:
188 |                 self.score_data = self.score_data_org.copy()
189 | 
190 |         # Reset batch to generate prpoer rand_imidx_list
191 |         self.reset_batch()
192 | 
193 |     def set_patchwise(self):
194 |         """Set this Dataset for patchwise training and testing
195 |         """
196 |         if self.imagewise is True:
197 |             self.imagewise = False
198 |             self.n_data = self.n_patches
199 |             if self.exist_score and self.exist_npat:
200 |                 self.score_data = self.gen_patchwise_scores()
201 | 
202 |         # Reset batch to generate prpoer rand_imidx_list
203 |         self.reset_batch()
204 | 
205 |     def validate_datasize(self):
206 |         # if self.exist_ref:
207 |         #     assert self.n_patches == self.ref_data.shape[0], (
208 |         #         'dis_data.shape: %s ref_data.shape: %s' % (
209 |         #             self.dis_data.shape, self.ref_data.shape))
210 | 
211 |         if self.exist_loc:
212 |             assert self.n_patches == self.loc_data.shape[0], (
213 |                 'dis_data.shape: %s loc_data.shape: %s' % (
214 |                     self.dis_data.shape, self.loc_data.shape))
215 | 
216 |         if self.exist_npat:
217 |             # assert self.exist_score
218 |             assert self.n_images == self.npat_img_list.shape[0], (
219 |                 'n_score_data: %d != n_npat_img_list: %d' % (
220 |                     self.n_images, self.npat_img_list.shape[0]))
221 | 
222 |         # if self.imagewise:
223 |         #     assert self.exist_npat
224 |         # else:
225 |         #     assert self.n_patches == self.score_data.shape[0], (
226 |         #         'n_patches: %d != n_score_data: %d' %
227 |         #         (self.n_patches, self.score_data.shape[0]))
228 | 
229 |     def gen_pat2img_idx_list(self):
230 |         """
231 |         Generate pat2img_idx_list from npat_img_list
232 |         """
233 |         pat2img_idx_list = np.zeros((self.n_patches, 2), dtype='int32')
234 |         n_patches = 0
235 |         for im_idx in range(self.n_images):
236 |             (cur_npat, ny, nx) = self.npat_img_list[im_idx]
237 |             pat2img_idx_list[im_idx] = [n_patches, n_patches + cur_npat]
238 |             n_patches += cur_npat
239 |         assert n_patches == self.n_patches, (
240 |             'obtained n_patches(%d) ~= n_patches(%d)' % (
241 |                 n_patches, self.n_patches))
242 | 
243 |         return pat2img_idx_list
244 | 
245 |     def gen_patchwise_scores(self):
246 |         """
247 |         Generate patch-wise training targets by expanding
248 |         image-wise score_data using pat2img_idx_list
249 |         """
250 |         new_scores = np.zeros(self.n_patches, dtype='float32')
251 |         for im_idx in range(self.n_images):
252 |             cur_idx_from, cur_idx_to = self.pat2img_idx_list[im_idx]
253 |             new_scores[cur_idx_from:cur_idx_to] = self.score_data[im_idx]
254 | 
255 |         return new_scores
256 | 
257 |     def next_batch(self, batch_size):
258 |         """
259 |         Return the next `batch_size` examples from this dataset.
260 |         Parameters
261 |         ----------
262 |         batch_size: integer
263 |             number of images (imagewise) or patches (patchwise) of
264 |             current batch
265 | 
266 |         Returns
267 |         -------
268 |         A dictionary containing:
269 |         - 'dis_data': 4D numpy array
270 |             distorted image patches
271 |         - 'ref_data': 4D numpy array (optional)
272 |             reference image patches
273 |         - 'loc_data': 4D numpy array (optional)
274 |             local quality scores
275 |         - 'score_data': 2D numpy array
276 |             subjective score list
277 |         - 'bat2img_idx_set': 2D numpy array (optional - imagewise)
278 |             from and to indices of each image in the current batch
279 |         - 'n_data': integer (optional - imagewise)
280 |             number of patches in the current batch
281 |         """
282 |         assert batch_size <= self.n_data
283 | 
284 |         start = self.index_in_epoch
285 |         self.index_in_epoch += batch_size
286 |         if self.index_in_epoch > self.n_data:
287 |             # Finished epoch
288 |             self.epochs_completed += 1
289 | 
290 |             # Shuffle the data
291 |             if self.shuffle:
292 |                 self.rand_imidx_list = np.random.permutation(self.n_data)
293 | 
294 |             # Start next epoch
295 |             start = 0
296 |             self.index_in_epoch = batch_size
297 |         end = self.index_in_epoch
298 |         self.im_idx_list = self.rand_imidx_list[start:end]
299 | 
300 |         if self.imagewise:
301 |             # If image-wise training
302 |             # Get bat2img_idx_set and idx_set
303 |             bat2img_idx_set = np.zeros((batch_size, 2), dtype='int32')
304 |             score_set = np.zeros(batch_size, dtype='float32')
305 |             idx_set_list = []
306 |             cur_inb_from = 0
307 |             for in_bat_idx, im_idx in enumerate(self.im_idx_list):
308 |                 cur_idx_from, cur_idx_to = self.pat2img_idx_list[im_idx]
309 |                 idx_set_list.append(
310 |                     np.arange(cur_idx_from, cur_idx_to, dtype='int32'))
311 |                 cur_inb_to = cur_inb_from + (cur_idx_to - cur_idx_from)
312 |                 bat2img_idx_set[in_bat_idx] = [cur_inb_from, cur_inb_to]
313 |                 cur_inb_from = cur_inb_to
314 |                 if self.exist_score:
315 |                     score_set[in_bat_idx] = self.score_data[im_idx]
316 |             idx_set = np.concatenate(idx_set_list)
317 | 
318 |             self.bat2img_idx_set = bat2img_idx_set
319 | 
320 |             res = {
321 |                 'dis_data': self.dis_data[idx_set],
322 |                 'bat2img_idx_set': bat2img_idx_set,
323 |                 'n_data': cur_inb_to
324 |             }
325 |             if self.exist_score:
326 |                 res['score_set'] = score_set
327 |             if self.exist_ref:
328 |                 res['ref_data'] = self.ref_data[self.dis2ref_idx[idx_set]]
329 |             if self.exist_loc:
330 |                 res['loc_data'] = self.loc_data[idx_set]
331 |         else:
332 |             res = {
333 |                 'dis_data': self.dis_data[self.im_idx_list]
334 |             }
335 |             if self.exist_score:
336 |                 res['score_set'] = self.score_data[self.im_idx_list]
337 |             if self.exist_ref:
338 |                 res['ref_data'] = self.ref_data[
339 |                     self.dis2ref_idx[self.im_idx_list]]
340 |             if self.exist_loc:
341 |                 res['loc_data'] = self.loc_data[self.im_idx_list]
342 | 
343 |         return res
344 | 
345 |     def reset_batch(self):
346 |         """
347 |         Make batch index in epoch 0, and shuffle data.
348 |         """
349 |         self.epochs_completed = 0
350 |         self.index_in_epoch = 0
351 |         if self.shuffle:
352 |             self.rand_imidx_list = np.random.permutation(self.n_data)
353 |         else:
354 |             self.rand_imidx_list = np.arange(self.n_data)
355 | 
356 |     def get_current_recon_info(self):
357 |         """
358 |         Get information to reconstruct patches into an image.
359 | 
360 |         Returns
361 |         -------
362 |         A dictionary containing:
363 |         - 'npat_img_list': (N, 3) matrix
364 |             where N is the number of images, and each row
365 |             indicate each image.
366 |             [number of patches, num of patches along y-axis,
367 |             num of patches along x-axis].
368 |         - 'bat2img_idx_set': (N, 2) matrix
369 |             where each row indicate each image.
370 |             [from-index in current batch, to-index in current batch]
371 |         - 'filt_idx_list' (optional): (N, 1) list
372 |             where where each element has indices list of existing
373 |             patches.
374 |         """
375 |         assert self.imagewise
376 |         assert self.index_in_epoch != 0
377 | 
378 |         res = {
379 |             'npat_img_list': self.npat_img_list[self.im_idx_list],
380 |             'bat2img_idx_set': self.bat2img_idx_set
381 |         }
382 |         if self.exist_filt_idx:
383 |             res['filt_idx_list'] = [
384 |                 self.filt_idx_list[idx] for idx in self.im_idx_list]
385 |         else:
386 |             res['filt_idx_list'] = None
387 | 
388 |         return res
389 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/layers/normalization.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import numpy as np
  4 | 
  5 | import theano
  6 | import theano.tensor as T
  7 | from theano.tensor.nnet import bn
  8 | from .layers import Layer, linear
  9 | 
 10 | 
 11 | class BatchNormLayer(Layer):
 12 |     """
 13 |     Batch normalization layer.
 14 |     (theano.tensor.nnet.bn.batch_normalization_{train, test})
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     input_shape: int or a tuple of ints
 19 |         Input feature dimension or (batch_size, input feature dimension)
 20 |     activation: function
 21 |         Activation function.
 22 |     axes: {``'spatial'``, ``'per-activation'``}
 23 |     epsilon: float
 24 |     alpha: float
 25 |     """
 26 |     layers = []
 27 | 
 28 |     def __init__(self, input_shape, activation=linear, axis=1, axes='spatial',
 29 |                  epsilon=1e-4, alpha=0.1, name=None):
 30 |         super(BatchNormLayer, self).__init__()
 31 | 
 32 |         self.input_shape = input_shape
 33 |         self.activation = activation
 34 |         self.axis = axis
 35 |         self.axes_org = axes
 36 |         self.epsilon = epsilon
 37 |         self.alpha = alpha
 38 |         self.name = 'BN' if name is None else name
 39 |         self.act_name = activation.__name__
 40 |         self.deterministic = False
 41 | 
 42 |         shape = [input_shape[self.axis]]
 43 |         ndim = len(input_shape)
 44 |         if axes == 'per-activation':
 45 |             self.axes = (0,)
 46 |         elif axes == 'spatial':
 47 |             self.axes = (0,) + tuple(range(2, ndim))
 48 |         self.non_bc_axes = tuple(i for i in range(ndim) if i not in self.axes)
 49 | 
 50 |         self.gamma = theano.shared(np.ones(shape, dtype=theano.config.floatX),
 51 |                                    name=name + '_G', borrow=True)
 52 |         self.beta = theano.shared(np.zeros(shape, dtype=theano.config.floatX),
 53 |                                   name=name + '_B', borrow=True)
 54 | 
 55 |         self.mean = theano.shared(np.zeros(shape, dtype=theano.config.floatX),
 56 |                                   name=name + '_mean', borrow=True)
 57 |         self.var = theano.shared(np.ones(shape, dtype=theano.config.floatX),
 58 |                                  name=name + '_var', borrow=True)
 59 | 
 60 |         self.params = [self.gamma, self.beta]
 61 |         self.statistics = [self.mean, self.var]
 62 |         BatchNormLayer.layers.append(self)
 63 | 
 64 |         # Show information
 65 |         print('  # %s (BN) ' % (self.name), end='')
 66 |         print('act.: %s,' % self.act_name)
 67 | 
 68 |     def get_output(self, input, **kwargs):
 69 |         # prepare dimshuffle pattern inserting broadcastable axes as needed
 70 |         param_axes = iter(list(range(input.ndim - len(self.axes))))
 71 |         pattern = ['x' if input_axis in self.axes
 72 |                    else next(param_axes)
 73 |                    for input_axis in range(input.ndim)]
 74 | 
 75 |         # apply dimshuffle pattern to all parameters
 76 |         beta = self.beta.dimshuffle(pattern)
 77 |         gamma = self.gamma.dimshuffle(pattern)
 78 |         mean = self.mean.dimshuffle(pattern)
 79 |         var = self.var.dimshuffle(pattern)
 80 | 
 81 |         if not self.deterministic:
 82 |             normalized, _, _, mean_, var_ = bn.batch_normalization_train(
 83 |                 input, gamma, beta, self.axes_org,
 84 |                 self.epsilon, self.alpha, mean, var)
 85 | 
 86 |             # Update running mean and variance
 87 |             # Tricks adopted from Lasagne implementation
 88 |             # http://lasagne.readthedocs.io/en/latest/modules/layers/normalization.html
 89 |             running_mean = theano.clone(self.mean, share_inputs=False)
 90 |             running_var = theano.clone(self.var, share_inputs=False)
 91 |             running_mean.default_update = mean_.dimshuffle(self.non_bc_axes)
 92 |             running_var.default_update = var_.dimshuffle(self.non_bc_axes)
 93 |             self.mean += 0 * running_mean
 94 |             self.var += 0 * running_var
 95 |         else:
 96 |             normalized = bn.batch_normalization_test(
 97 |                 input, gamma, beta, mean, var, self.axes_org, self.epsilon)
 98 |             # normalized, _, _, _, _ = bn.batch_normalization_train(
 99 |             #     input, gamma, beta, self.axes_org, self.epsilon, 0, mean, var)
100 |             # normalized = (input - mean) * (gamma / T.sqrt(var)) + beta
101 | 
102 |         return self.activation(normalized)
103 | 
104 |     def get_out_shape(self):
105 |         return self.input_shape
106 | 
107 |     def reset_stats(self):
108 |         # reset mean and var
109 |         self.mean.set_value(np.zeros(self.mean.get_value().shape,
110 |                                      dtype=theano.config.floatX))
111 |         self.var.set_value(np.ones(self.var.get_value().shape,
112 |                                    dtype=theano.config.floatX))
113 | 
114 |     def get_stats(self):
115 |         return (self.mean, self.var)
116 | 
117 |     @staticmethod
118 |     def set_batch_norms_training(training):
119 |         deterministic = False if training else True
120 |         print(' - Batch norm layres: deterministic =', deterministic)
121 |         for layer in BatchNormLayer.layers:
122 |             layer.deterministic = deterministic
123 |             layer.update_averages = not deterministic
124 | 
125 |     @staticmethod
126 |     def reset_batch_norms_stats():
127 |         print(' - Batch norm layres: reset mean and var')
128 |         for layer in BatchNormLayer.layers:
129 |             layer.reset_stats()
130 | 
131 | 
132 | class BatchNormLayer_old(Layer):
133 |     """
134 |     Batch normalization layer
135 |     (theano.tensor.nnet.bn.batch_normalization)
136 |     """
137 |     layers = []
138 | 
139 |     def __init__(self, input_shape, activation=linear,
140 |                  epsilon=1e-4, alpha=0.1, name=None):
141 |         super(BatchNormLayer, self).__init__()
142 | 
143 |         if len(input_shape) == 2:
144 |             self.axes = (0,)
145 |             shape = [input_shape[0]]
146 |         elif len(input_shape) == 4:
147 |             self.axes = (0, 2, 3)
148 |             shape = [input_shape[1]]
149 |         else:
150 |             raise NotImplementedError
151 | 
152 |         self.name = 'BN' if name is None else name
153 |         self.epsilon = epsilon
154 |         self.alpha = alpha
155 |         self.deterministic = False
156 |         self.update_averages = True
157 |         self.activation = activation
158 |         self.act_name = activation.__name__
159 |         self.input_shape = input_shape
160 | 
161 |         self.gamma = theano.shared(np.ones(shape, dtype=theano.config.floatX),
162 |                                    name=name + '_G', borrow=True)
163 |         self.beta = theano.shared(np.zeros(shape, dtype=theano.config.floatX),
164 |                                   name=name + '_B', borrow=True)
165 | 
166 |         self.mean = theano.shared(np.zeros(shape, dtype=theano.config.floatX),
167 |                                   name=name + '_mean', borrow=True)
168 |         self.std = theano.shared(np.ones(shape, dtype=theano.config.floatX),
169 |                                  name=name + '_std', borrow=True)
170 | 
171 |         self.params = [self.gamma, self.beta]
172 |         self.statistics = [self.mean, self.std]
173 |         BatchNormLayer.layers.append(self)
174 | 
175 |         # Show information
176 |         print('  # %s (BN_T) ' % (self.name), end='')
177 |         print('act.: %s,' % self.act_name)
178 | 
179 |     def get_output(self, input, **kwargs):
180 |         input_mean = input.mean(self.axes)
181 |         input_std = T.sqrt(input.var(self.axes) + self.epsilon)
182 | 
183 |         # Decide whether to use the stored averages or mini-batch statistics
184 |         use_averages = self.deterministic
185 |         if use_averages:
186 |             mean = self.mean
187 |             std = self.std
188 |         else:
189 |             mean = input_mean
190 |             std = input_std
191 | 
192 |         # Decide whether to update the stored averages
193 |         update_averages = self.update_averages and not use_averages
194 |         if update_averages:
195 |             # Trick: To update the stored statistics, we create memory-aliased
196 |             # clones of the stored statistics:
197 |             running_mean = theano.clone(self.mean, share_inputs=False)
198 |             running_std = theano.clone(self.std, share_inputs=False)
199 |             # set a default update for them:
200 |             running_mean.default_update = ((1 - self.alpha) * running_mean +
201 |                                            self.alpha * input_mean)
202 |             running_std.default_update = ((1 - self.alpha) * running_std +
203 |                                           self.alpha * input_std)
204 |             # and make sure they end up in the graph without participating in
205 |             # the computation (this way their default_update will be collected
206 |             # and applied, but the computation will be optimized away):
207 |             mean += 0 * running_mean
208 |             std += 0 * running_std
209 | 
210 |         # prepare dimshuffle pattern inserting broadcastable axes as needed
211 |         param_axes = iter(list(range(input.ndim - len(self.axes))))
212 |         pattern = ['x' if input_axis in self.axes
213 |                    else next(param_axes)
214 |                    for input_axis in range(input.ndim)]
215 | 
216 |         # apply dimshuffle pattern to all parameters
217 |         beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
218 |         gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
219 |         mean = mean.dimshuffle(pattern)
220 |         std = std.dimshuffle(pattern)
221 | 
222 |         # normalize
223 |         normalized = bn.batch_normalization(input, gamma, beta, mean, std)
224 |         return self.activation(normalized)
225 | 
226 |     def get_out_shape(self):
227 |         return self.input_shape
228 | 
229 |     def reset_stats(self):
230 |         # reset mean and std
231 |         self.mean.set_value(np.zeros(self.mean.get_value().shape,
232 |                                      dtype=theano.config.floatX))
233 |         self.std.set_value(np.ones(self.std.get_value().shape,
234 |                                    dtype=theano.config.floatX))
235 | 
236 |     def get_stats(self):
237 |         return (self.mean, self.std)
238 | 
239 |     @staticmethod
240 |     def set_batch_norms_training(training):
241 |         deterministic = False if training else True
242 |         print(' - Batch norm layres: deterministic =', deterministic)
243 |         for layer in BatchNormLayer.layers:
244 |             layer.deterministic = deterministic
245 |             layer.update_averages = not deterministic
246 | 
247 |     @staticmethod
248 |     def reset_batch_norms_stats():
249 |         print(' - Batch norm layres: reset mean and std')
250 |         for layer in BatchNormLayer.layers:
251 |             layer.reset_stats()
252 | 
253 | 
254 | class BatchNormLayer_L(Layer):
255 |     """
256 |     Batch normalization layer.
257 |     Core algorithm is brought from Lasagne.
258 |     http://lasagne.readthedocs.io/en/latest/modules/layers/normalization.html
259 |     """
260 |     layers = []
261 | 
262 |     def __init__(self, input_shape, activation=linear,
263 |                  epsilon=1e-4, alpha=0.1, name=None):
264 |         super(BatchNormLayer, self).__init__()
265 | 
266 |         if len(input_shape) == 2:
267 |             self.axes = (0,)
268 |             shape = [input_shape[0]]
269 |         elif len(input_shape) == 4:
270 |             self.axes = (0, 2, 3)
271 |             shape = [input_shape[1]]
272 |         else:
273 |             raise NotImplementedError
274 | 
275 |         self.name = 'BN' if name is None else name
276 |         self.epsilon = epsilon
277 |         self.alpha = alpha
278 |         self.deterministic = False
279 |         self.update_averages = True
280 |         self.activation = activation
281 |         self.act_name = activation.__name__
282 |         self.input_shape = input_shape
283 | 
284 |         self.gamma = theano.shared(np.ones(shape, dtype=theano.config.floatX),
285 |                                    name=name + '_G', borrow=True)
286 |         self.beta = theano.shared(np.zeros(shape, dtype=theano.config.floatX),
287 |                                   name=name + '_B', borrow=True)
288 | 
289 |         self.mean = theano.shared(np.zeros(shape, dtype=theano.config.floatX),
290 |                                   name=name + '_mean', borrow=True)
291 |         self.invstd = theano.shared(np.ones(shape, dtype=theano.config.floatX),
292 |                                     name=name + '_invstd', borrow=True)
293 | 
294 |         self.params = [self.gamma, self.beta]
295 |         self.statistics = [self.mean, self.invstd]
296 |         BatchNormLayer.layers.append(self)
297 | 
298 |         # Show information
299 |         print('  # %s (BN_L) ' % (self.name), end='')
300 |         print('act.: %s,' % self.act_name)
301 | 
302 |     def get_output(self, input, **kwargs):
303 |         input_mean = input.mean(self.axes)
304 |         input_invstd = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))
305 | 
306 |         # Decide whether to use the stored averages or mini-batch statistics
307 |         use_averages = self.deterministic
308 |         if use_averages:
309 |             mean = self.mean
310 |             invstd = self.invstd
311 |         else:
312 |             mean = input_mean
313 |             invstd = input_invstd
314 | 
315 |         # Decide whether to update the stored averages
316 |         update_averages = self.update_averages and not use_averages
317 |         if update_averages:
318 |             # Trick: To update the stored statistics, we create memory-aliased
319 |             # clones of the stored statistics:
320 |             running_mean = theano.clone(self.mean, share_inputs=False)
321 |             running_invstd = theano.clone(self.invstd, share_inputs=False)
322 |             # set a default update for them:
323 |             running_mean.default_update = (
324 |                 (1 - self.alpha) * running_mean + self.alpha * input_mean)
325 |             running_invstd.default_update = (
326 |                 (1 - self.alpha) * running_invstd + self.alpha * input_invstd)
327 |             # and make sure they end up in the graph without participating in
328 |             # the computation (this way their default_update will be collected
329 |             # and applied, but the computation will be optimized away):
330 |             mean += 0 * running_mean
331 |             invstd += 0 * running_invstd
332 | 
333 |         # prepare dimshuffle pattern inserting broadcastable axes as needed
334 |         param_axes = iter(list(range(input.ndim - len(self.axes))))
335 |         pattern = ['x' if input_axis in self.axes
336 |                    else next(param_axes)
337 |                    for input_axis in range(input.ndim)]
338 | 
339 |         # apply dimshuffle pattern to all parameters
340 |         beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
341 |         gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
342 |         mean = mean.dimshuffle(pattern)
343 |         invstd = invstd.dimshuffle(pattern)
344 | 
345 |         # normalize
346 |         normalized = (input - mean) * (gamma * invstd) + beta
347 |         return self.activation(normalized)
348 | 
349 |     def get_out_shape(self):
350 |         return self.input_shape
351 | 
352 |     def reset_stats(self):
353 |         # reset mean and invstd
354 |         self.mean.set_value(np.zeros(self.mean.get_value().shape,
355 |                                      dtype=theano.config.floatX))
356 |         self.invstd.set_value(np.ones(self.invstd.get_value().shape,
357 |                                       dtype=theano.config.floatX))
358 | 
359 |     def get_stats(self):
360 |         return (self.mean, self.invstd)
361 | 
362 |     @staticmethod
363 |     def set_batch_norms_training(training):
364 |         deterministic = False if training else True
365 |         print(' - Batch norm layres: deterministic =', deterministic)
366 |         for layer in BatchNormLayer.layers:
367 |             layer.deterministic = deterministic
368 |             layer.update_averages = not deterministic
369 | 
370 |     @staticmethod
371 |     def reset_batch_norms_stats():
372 |         print(' - Batch norm layres: reset mean and invstd')
373 |         for layer in BatchNormLayer.layers:
374 |             layer.reset_stats()
375 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/utils.py:
--------------------------------------------------------------------------------
  1 | """ This file contains different utility functions that are not connected
  2 | in anyway to the networks presented in the tutorials, but rather help in
  3 | processing the outputs into a more understandable way.
  4 | 
  5 | For example ``tile_raster_images`` helps in generating a easy to grasp
  6 | image from a set of samples or weights.
  7 | """
  8 | from __future__ import absolute_import, division, print_function
  9 | 
 10 | import numpy as np
 11 | import PIL.Image as Image
 12 | 
 13 | 
 14 | def scale_to_unit_interval(ndar, eps=1e-8):
 15 |     """ Scales all values in the ndarray ndar to be between 0 and 1 """
 16 |     ndar = ndar.copy()
 17 |     ndar -= ndar.min()
 18 |     ndar *= 1.0 / (ndar.max() + eps)
 19 |     return ndar
 20 | 
 21 | 
 22 | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
 23 |                        scale_rows_to_unit_interval=True,
 24 |                        output_pixel_vals=True):
 25 |     """
 26 |     Transform an array with one flattened image per row, into an array in
 27 |     which images are reshaped and layed out like tiles on a floor.
 28 | 
 29 |     This function is useful for visualizing datasets whose rows are images,
 30 |     and also columns of matrices for transforming those rows
 31 |     (such as the first layer of a neural net).
 32 | 
 33 |     :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
 34 |     be 2-D ndarrays or None;
 35 |     :param X: a 2-D array in which every row is a flattened image.
 36 | 
 37 |     :type img_shape: tuple; (height, width)
 38 |     :param img_shape: the original shape of each image
 39 | 
 40 |     :type tile_shape: tuple; (rows, cols)
 41 |     :param tile_shape: the number of images to tile (rows, cols)
 42 | 
 43 |     :param output_pixel_vals: if output should be pixel values (i.e. int8
 44 |     values) or floats
 45 | 
 46 |     :param scale_rows_to_unit_interval: if the values need to be scaled before
 47 |     being plotted to [0,1] or not
 48 | 
 49 | 
 50 |     :returns: array suitable for viewing as an image.
 51 |     (See:`Image.fromarray`.)
 52 |     :rtype: a 2-d array with same dtype as X.
 53 | 
 54 |     """
 55 | 
 56 |     assert len(img_shape) == 2
 57 |     assert len(tile_shape) == 2
 58 |     assert len(tile_spacing) == 2
 59 | 
 60 |     # The expression below can be re-written in a more C style as
 61 |     # follows :
 62 |     #
 63 |     # out_shape    = [0,0]
 64 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
 65 |     #                tile_spacing[0]
 66 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
 67 |     #                tile_spacing[1]
 68 |     out_shape = [
 69 |         (ishp + tsp) * tshp - tsp
 70 |         for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)
 71 |     ]
 72 | 
 73 |     if isinstance(X, tuple):
 74 |         assert len(X) == 4
 75 |         # Create an output numpy ndarray to store the image
 76 |         if output_pixel_vals:
 77 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
 78 |                                  dtype='uint8')
 79 |         else:
 80 |             out_array = np.zeros((out_shape[0], out_shape[1], 4),
 81 |                                  dtype=X.dtype)
 82 | 
 83 |         # colors default to 0, alpha defaults to 1 (opaque)
 84 |         if output_pixel_vals:
 85 |             channel_defaults = [0, 0, 0, 255]
 86 |         else:
 87 |             channel_defaults = [0., 0., 0., 1.]
 88 | 
 89 |         for i in range(4):
 90 |             if X[i] is None:
 91 |                 # if channel is None, fill it with zeros of the correct
 92 |                 # dtype
 93 |                 dt = out_array.dtype
 94 |                 if output_pixel_vals:
 95 |                     dt = 'uint8'
 96 |                 out_array[:, :, i] = np.zeros(
 97 |                     out_shape,
 98 |                     dtype=dt
 99 |                 ) + channel_defaults[i]
100 |             else:
101 |                 # use a recurrent call to compute the channel and store it
102 |                 # in the output
103 |                 out_array[:, :, i] = tile_raster_images(
104 |                     X[i], img_shape, tile_shape, tile_spacing,
105 |                     scale_rows_to_unit_interval, output_pixel_vals)
106 |         return out_array
107 | 
108 |     else:
109 |         # if we are dealing with only one channel
110 |         H, W = img_shape
111 |         Hs, Ws = tile_spacing
112 | 
113 |         # generate a matrix to store the output
114 |         dt = X.dtype
115 |         if output_pixel_vals:
116 |             dt = 'uint8'
117 |         out_array = np.zeros(out_shape, dtype=dt)
118 | 
119 |         for tile_row in range(tile_shape[0]):
120 |             for tile_col in range(tile_shape[1]):
121 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
122 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
123 |                     if scale_rows_to_unit_interval:
124 |                         # if we should scale values to be between 0 and 1
125 |                         # do this by calling the `scale_to_unit_interval`
126 |                         # function
127 |                         this_img = scale_to_unit_interval(
128 |                             this_x.reshape(img_shape))
129 |                     else:
130 |                         this_img = this_x.reshape(img_shape)
131 |                     # add the slice to the corresponding position in the
132 |                     # output array
133 |                     c = 1
134 |                     if output_pixel_vals:
135 |                         c = 255
136 |                     out_array[
137 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
138 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W
139 |                     ] = this_img * c
140 |         return out_array
141 | 
142 | 
143 | def tile_tensor_array(X, tile_shape, img_shape=None, tile_spacing=(0, 0)):
144 |     """
145 |     Transform an array with one flattened image per row, into an array in
146 |     which images are reshaped and layed out like tiles on a floor.
147 | 
148 |     :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can
149 |     be 2-D ndarrays or None;
150 |     :param X: a 2-D array in which every row is a flattened image.
151 | 
152 |     :type img_shape: tuple; (height, width)
153 |     :param img_shape: the original shape of each image
154 | 
155 |     :type tile_shape: tuple; (rows, cols)
156 |     :param tile_shape: the number of images to tile (rows, cols)
157 | 
158 |     :returns: array suitable for viewing as an image.
159 |     (See:`Image.fromarray`.)
160 |     :rtype: a 2-d array with same dtype as X.
161 | 
162 |     """
163 | 
164 |     assert len(tile_shape) == 2
165 |     assert len(tile_spacing) == 2
166 |     tile_shape = (int(tile_shape[0]), int(tile_shape[1]))
167 |     tile_spacing = (int(tile_spacing[0]), int(tile_spacing[1]))
168 |     if img_shape is None:
169 |         img_shape = (int(X.shape[2]), int(X.shape[3]))
170 |     else:
171 |         img_shape = (int(img_shape[0]), int(img_shape[1]))
172 | 
173 |     # The expression below can be re-written in a more C style as
174 |     # follows :
175 |     #
176 |     # out_shape    = [0,0]
177 |     # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
178 |     #                tile_spacing[0]
179 |     # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
180 |     #                tile_spacing[1]
181 |     out_shape = [
182 |         (ishp + tsp) * tshp - tsp
183 |         for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)
184 |     ]
185 | 
186 |     if isinstance(X, tuple):
187 |         assert len(X) == 4
188 |         # Create an output np ndarray to store the image
189 |         out_array = np.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)
190 | 
191 |         # colors default to 0, alpha defaults to 1 (opaque)
192 |         channel_defaults = [0., 0., 0., 1.]
193 | 
194 |         for i in range(4):
195 |             if X[i] is None:
196 |                 # if channel is None, fill it with zeros of the correct
197 |                 # dtype
198 |                 dt = out_array.dtype
199 |                 out_array[:, :, i] = (np.zeros(out_shape, dtype=dt) +
200 |                                       channel_defaults[i])
201 |             else:
202 |                 # use a recurrent call to compute the channel and store it
203 |                 # in the output
204 |                 out_array[:, :, i] = tile_raster_images(
205 |                     X[i], tile_shape, img_shape, tile_spacing)
206 |         return out_array
207 | 
208 |     else:
209 |         # if we are dealing with only one channel
210 |         H, W = img_shape
211 |         Hs, Ws = tile_spacing
212 | 
213 |         # generate a matrix to store the output
214 |         dt = X.dtype
215 |         out_array = np.zeros(out_shape, dtype=dt)
216 | 
217 |         for tile_row in range(tile_shape[0]):
218 |             for tile_col in range(tile_shape[1]):
219 |                 if tile_row * tile_shape[1] + tile_col < X.shape[0]:
220 |                     this_x = X[tile_row * tile_shape[1] + tile_col]
221 |                     this_img = this_x.reshape(img_shape)
222 | 
223 |                     # add the slice to the corresponding position in the
224 |                     # output array
225 |                     if Hs >= 0 and Ws >= 0:
226 |                         out_array[
227 |                             tile_row * (H + Hs): tile_row * (H + Hs) + H,
228 |                             tile_col * (W + Ws): tile_col * (W + Ws) + W
229 |                         ] = this_img
230 | 
231 |                     elif Hs < 0 and Ws < 0:
232 |                         u_tr = int((-Hs + 1) / 2)
233 |                         d_tr = int(-Hs / 2)
234 |                         l_tr = int((-Ws + 1) / 2)
235 |                         r_tr = int(-Ws / 2)
236 |                         if tile_row == 0:
237 |                             u_tr = 0
238 |                         if tile_row == tile_shape[0] - 1:
239 |                             d_tr = 0
240 |                         if tile_col == 0:
241 |                             l_tr = 0
242 |                         if tile_col == tile_shape[1] - 1:
243 |                             r_tr = 0
244 |                         out_array[
245 |                             tile_row * (H + Hs) + u_tr:
246 |                             tile_row * (H + Hs) + H - d_tr,
247 |                             tile_col * (W + Ws) + l_tr:
248 |                             tile_col * (W + Ws) + W - r_tr
249 |                         ] = this_img[u_tr: H - d_tr, l_tr: W - r_tr]
250 | 
251 |                     else:
252 |                         raise NotImplementedError()
253 |         return out_array
254 | 
255 | 
256 | def tile_tensor4_from_list(X, tile_shape, idx_list=None, img_shape=None,
257 |                            tile_spacing=(0, 0), caxis=None,
258 |                            image_name=None):
259 |     """
260 |     Generate tiled image array from 4D or 3D numpy array
261 |     Parameter
262 |     ---------
263 |         X : 4D or 3D numpy array
264 |             [batch, channel, height, width] or  [batch, height, width]
265 |     """
266 |     assert len(X.shape) in [3, 4]
267 |     assert len(tile_shape) == 2
268 |     assert len(tile_spacing) == 2
269 |     tile_shape = (int(tile_shape[0]), int(tile_shape[1]))
270 |     tile_spacing = (int(tile_spacing[0]), int(tile_spacing[1]))
271 | 
272 |     if idx_list is None:
273 |         idx_list = range(tile_shape[0] * tile_shape[1])
274 |     else:
275 |         assert np.max(idx_list) <= tile_shape[0] * tile_shape[1], \
276 |             'max idx_list (%d) > number of tiles (%d)' % (
277 |                 np.max(idx_list), tile_shape[0] * tile_shape[1])
278 | 
279 |     # check image shape
280 |     if img_shape is None:
281 |         if len(X.shape) == 4:
282 |             img_shape = (int(X.shape[2]), int(X.shape[3]))
283 |             nch = int(X.shape[1])
284 |         elif len(X.shape) == 3:
285 |             img_shape = (int(X.shape[1]), int(X.shape[2]))
286 |             nch = 1
287 |         else:
288 |             raise NotImplementedError()
289 |     else:
290 |         img_shape = (int(img_shape[0]), int(img_shape[1]))
291 |         nch = int(X.shape[1])
292 | 
293 |     out_shape = [
294 |         (ishp + tsp) * tshp - tsp
295 |         for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)
296 |     ]
297 | 
298 |     if caxis is not None:
299 |         X = image_caxis(X, caxis)
300 |         default_rgb = [255, 0, 0]
301 |     else:
302 |         default_rgb = [0, 0, 0]
303 | 
304 |     # Create an output np ndarray to store the image
305 |     out_array = np.ones((out_shape[0], out_shape[1], 3), dtype=X.dtype)
306 |     for ch in range(3):
307 |         out_array[:, :, ch] = out_array[:, :, ch] * default_rgb[ch]
308 | 
309 |     H, W = img_shape
310 |     Hs, Ws = tile_spacing
311 | 
312 |     if nch == 1:
313 |         for idx, pat_idx in enumerate(idx_list):
314 |             this_x = X[idx]
315 |             this_img = this_x.reshape(img_shape)
316 | 
317 |             tile_row = int(pat_idx / tile_shape[1])
318 |             tile_col = pat_idx - tile_row * tile_shape[1]
319 | 
320 |             if Hs >= 0 and Ws >= 0:
321 |                 this_img_rgb = np.repeat(
322 |                     this_img[:, :, np.newaxis], 3, axis=2)
323 |                 out_array[
324 |                     tile_row * (H + Hs): tile_row * (H + Hs) + H,
325 |                     tile_col * (W + Ws): tile_col * (W + Ws) + W,
326 |                     :] = this_img_rgb
327 | 
328 |             elif Hs < 0 and Ws < 0:
329 |                 u_tr = int((-Hs + 1) / 2)
330 |                 d_tr = int(-Hs / 2)
331 |                 l_tr = int((-Ws + 1) / 2)
332 |                 r_tr = int(-Ws / 2)
333 |                 if tile_row == 0:
334 |                     u_tr = 0
335 |                 if tile_row == tile_shape[0] - 1:
336 |                     d_tr = 0
337 |                 if tile_col == 0:
338 |                     l_tr = 0
339 |                 if tile_col == tile_shape[1] - 1:
340 |                     r_tr = 0
341 | 
342 |                 this_img_rgb = np.repeat(
343 |                     this_img[u_tr: H - d_tr, l_tr: W - r_tr, np.newaxis],
344 |                     3, axis=2)
345 |                 out_array[
346 |                     tile_row * (H + Hs) + u_tr:
347 |                     tile_row * (H + Hs) + H - d_tr,
348 |                     tile_col * (W + Ws) + l_tr:
349 |                     tile_col * (W + Ws) + W - r_tr,
350 |                     :] = this_img_rgb
351 | 
352 |             else:
353 |                 raise NotImplementedError()
354 |     elif nch == 3:
355 |         for idx, pat_idx in enumerate(idx_list):
356 |             for ch in range(nch):
357 |                 this_x = X[idx, ch]
358 |                 this_img = this_x.reshape(img_shape)
359 | 
360 |                 tile_row = int(pat_idx / tile_shape[1])
361 |                 tile_col = pat_idx - tile_row * tile_shape[1]
362 | 
363 |                 if Hs >= 0 and Ws >= 0:
364 |                     out_array[
365 |                         tile_row * (H + Hs): tile_row * (H + Hs) + H,
366 |                         tile_col * (W + Ws): tile_col * (W + Ws) + W,
367 |                         ch] = this_img
368 | 
369 |                 elif Hs < 0 and Ws < 0:
370 |                     u_tr = int((-Hs + 1) / 2)
371 |                     d_tr = int(-Hs / 2)
372 |                     l_tr = int((-Ws + 1) / 2)
373 |                     r_tr = int(-Ws / 2)
374 |                     if tile_row == 0:
375 |                         u_tr = 0
376 |                     if tile_row == tile_shape[0] - 1:
377 |                         d_tr = 0
378 |                     if tile_col == 0:
379 |                         l_tr = 0
380 |                     if tile_col == tile_shape[1] - 1:
381 |                         r_tr = 0
382 | 
383 |                     out_array[
384 |                         tile_row * (H + Hs) + u_tr:
385 |                         tile_row * (H + Hs) + H - d_tr,
386 |                         tile_col * (W + Ws) + l_tr:
387 |                         tile_col * (W + Ws) + W - r_tr,
388 |                         ch] = this_img[u_tr: H - d_tr, l_tr: W - r_tr]
389 | 
390 |                 else:
391 |                     raise NotImplementedError()
392 |     else:
393 |         raise NotImplementedError()
394 | 
395 |     if image_name is not None:
396 |         img = Image.fromarray(out_array.astype(np.uint8))
397 |         img.save(image_name)
398 |         return img
399 |     else:
400 |         return out_array
401 | 
402 | 
403 | def image_from_nparray(np_arr_img, img_size=None, caxis='auto'):
404 |     """
405 |     Convert numpy array to PIL image
406 |     Parameter
407 |     ---------
408 |         np_arr_img : 3D or 2D or 1D (img_size must be given) numpy array
409 |             [height, width, channel] or [height, width] or [height * width]
410 |     """
411 |     # check img_size
412 |     assert len(np_arr_img.shape) in [1, 2, 3]
413 | 
414 |     if len(np_arr_img.shape) == 1:
415 |         assert img_size is not None
416 |         if len(img_size) == 3:
417 |             if img_size[2] == 1:
418 |                 # if gray
419 |                 img_ = np_arr_img.reshape((img_size[0], img_size[1]))
420 |             else:
421 |                 # if RGB
422 |                 img_ = np_arr_img.reshape(img_size)
423 |         elif len(img_size) == 2:
424 |             if np_arr_img.shape[0] == np.product(img_size[:]):
425 |                 # if gray
426 |                 img_ = np_arr_img.reshape(img_size)
427 |             elif np_arr_img.shape[0] == np.product(img_size[:]) * 3:
428 |                 # if RGB
429 |                 img_ = np_arr_img.reshape((img_size[0], img_size[1], 3))
430 |             else:
431 |                 raise ValueError(
432 |                     'Wrong shape: np_array = {0} / target = {1}'.format(
433 |                         np_arr_img.shape, img_size))
434 |         else:
435 |             raise ValueError('Wrong shape: {0}'.format(img_size))
436 |     elif len(np_arr_img.shape) == 2:
437 |         # if gray
438 |         img_ = np_arr_img
439 |     else:
440 |         if np_arr_img.shape[2] == 1:
441 |             # if gray
442 |             img_ = np_arr_img[:, :, 0]
443 |         else:
444 |             # if RGB
445 |             assert np_arr_img.shape[2] == 3
446 |             img_ = np_arr_img
447 | 
448 |     img_ = image_caxis(img_, caxis)
449 |     img = Image.fromarray(img_.astype(np.uint8))
450 | 
451 |     if img.mode != 'RGB':
452 |         img = img.convert('RGB')
453 |     return img
454 | 
455 | 
456 | def image_from_tensor(tensor_4d, caxis='auto'):
457 |     # transpose into (row, column, channel)
458 |     img_ = np.transpose(tensor_4d, axes=(1, 2, 0))
459 | 
460 |     # if the image is gray, remove channel axis
461 |     if img_.shape[2] == 1:
462 |         img_ = img_.reshape(img_.shape[0], img_.shape[1])
463 | 
464 |     img_ = image_caxis(img_, caxis)
465 |     img = Image.fromarray(img_.astype(np.uint8))
466 | 
467 |     if img.mode != 'RGB':
468 |         img = img.convert('RGB')
469 |     return img
470 | 
471 | 
472 | def image_caxis(img, caxis='auto'):
473 |     if caxis is None or caxis == 'auto':
474 |         min_val = img.min()
475 |         max_val = img.max() + 1e-8
476 |     else:
477 |         assert len(caxis) == 2
478 |         min_val = np.float(caxis[0])
479 |         max_val = np.float(caxis[1])
480 |     img = ((img - min_val) / (max_val - min_val) * 255.0).astype(img.dtype)
481 |     img[img > 255.0] = 255.0
482 |     img[img < 0.0] = 0.0
483 | 
484 |     return img
485 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/layers/layers.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import numpy as np
  4 | 
  5 | import theano
  6 | import theano.tensor as T
  7 | from theano.tensor.nnet import conv2d
  8 | from theano.tensor.nnet.abstract_conv import conv2d_grad_wrt_inputs
  9 | from theano.tensor.signal.pool import pool_2d
 10 | from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 11 | 
 12 | 
 13 | # Activation function wrappers
 14 | def linear(x):
 15 |     return x
 16 | 
 17 | 
 18 | def tanh(x):
 19 |     """ Hyperbolic tangent """
 20 |     return T.tanh(x)
 21 | 
 22 | 
 23 | def sigm(x):
 24 |     """ Sigmoid """
 25 |     return T.nnet.sigmoid(x)
 26 | 
 27 | 
 28 | def relu(x, alpha=0.0):
 29 |     """ Rectified linear unit """
 30 |     return T.nnet.relu(x, alpha)
 31 | 
 32 | 
 33 | def lrelu(x, alpha=0.1):
 34 |     """ Leaky ReLU """
 35 |     return T.nnet.relu(x, alpha)
 36 | 
 37 | 
 38 | def elu(x, alpha=1.0):
 39 |     """ Exponential LU """
 40 |     return T.nnet.elu(x, alpha)
 41 | 
 42 | 
 43 | ##############################################################################
 44 | class Layer(object):
 45 |     """
 46 |     Base class for layers
 47 |     """
 48 |     # init_rng = np.random.RandomState(1235)
 49 |     init_rng = np.random.RandomState()
 50 | 
 51 |     def __init__(self):
 52 |         self.params = []
 53 |         self.rng = Layer.init_rng
 54 | 
 55 |     def get_params(self):
 56 |         return self.params
 57 | 
 58 |     def get_output(self, input, **kwargs):
 59 |         raise NotImplementedError("get_output")
 60 | 
 61 |     def get_out_shape(self):
 62 |         return None
 63 | 
 64 |     def init_he(self, shape, activation, sampling='uniform', lrelu_alpha=0.1):
 65 |         # He et al. 2015
 66 |         if activation in [T.nnet.relu, relu, elu]:  # relu or elu
 67 |             gain = np.sqrt(2)
 68 |         elif activation == lrelu:  # lrelu
 69 |             gain = np.sqrt(2 / (1 + lrelu_alpha ** 2))
 70 |         else:
 71 |             gain = 1.0
 72 | 
 73 |         # len(shape) == 2 -> fully-connected layers
 74 |         fan_in = shape[0] if len(shape) == 2 else np.prod(shape[1:])
 75 | 
 76 |         if sampling == 'normal':
 77 |             std = gain * np.sqrt(1. / fan_in)
 78 |             return np.asarray(self.rng.normal(0., std, shape),
 79 |                               dtype=theano.config.floatX)
 80 |         elif sampling == 'uniform':
 81 |             bound = gain * np.sqrt(3. / fan_in)
 82 |             return np.asarray(self.rng.uniform(-bound, bound, shape),
 83 |                               dtype=theano.config.floatX)
 84 |         else:
 85 |             raise NotImplementedError
 86 | 
 87 | 
 88 | ##############################################################################
 89 | # Neural Network Layers
 90 | class FCLayer(Layer):
 91 |     """
 92 |     Fully connected layer.
 93 | 
 94 |     Parameters
 95 |     ----------
 96 |     input_shape: int or a tuple of ints
 97 |         Input feature dimension or (batch_size, input feature dimension)
 98 |     n_out: int
 99 |         Output feature dimension.
100 |     activation: function
101 |         Activation function.
102 |     W: tensor, numpy or None
103 |         Filter weights. If this is not given, the weight is initialized by
104 |         random values.
105 |     b: tensor, numpy or None
106 |         Biases. If this is not given, the weight is initialized by
107 |         random values.
108 |     """
109 |     def __init__(self, input_shape, n_out, activation=linear,
110 |                  W=None, b=None, no_bias=False, name=None):
111 |         super(FCLayer, self).__init__()
112 | 
113 |         if isinstance(input_shape, (list, tuple)):
114 |             self.input_shape = input_shape
115 |         else:
116 |             self.input_shape = (None, input_shape)
117 |         self.n_in = self.input_shape[1]
118 |         self.n_out = n_out
119 |         self.activation = activation
120 |         self.act_name = activation.__name__
121 |         self.no_bias = no_bias
122 |         self.name = 'FC' if name is None else name
123 | 
124 |         self.params = []
125 |         if isinstance(W, T.sharedvar.TensorSharedVariable):
126 |             self.W = W
127 |         else:
128 |             if W is None:
129 |                 W_values = self.init_he(
130 |                     (self.n_in, self.n_out), self.activation)
131 |             else:
132 |                 W_values = W
133 |             self.W = theano.shared(W_values, self.name + '_W', borrow=True)
134 |             self.params += [self.W]
135 | 
136 |         if self.no_bias:
137 |             self.b = None
138 |         elif isinstance(b, T.sharedvar.TensorSharedVariable):
139 |             self.b = b
140 |         else:
141 |             if b is None:
142 |                 b_values = np.zeros((n_out,), dtype=theano.config.floatX)
143 |             else:
144 |                 b_values = b
145 |             self.b = theano.shared(b_values, self.name + '_b', borrow=True)
146 |             self.params += [self.b]
147 | 
148 |         # Show information
149 |         print('  # %s (FC): in = %d -> out = %d,' % (
150 |             self.name, self.n_in, self.n_out), end=' ')
151 |         print('act.: %s,' % self.act_name, end=' ')
152 |         if self.no_bias:
153 |             print('No bias')
154 |         else:
155 |             print('')
156 | 
157 |     def get_output(self, input, **kwargs):
158 |         lin_output = T.dot(input, self.W)
159 |         if not self.no_bias:
160 |             lin_output += self.b
161 |         return self.activation(lin_output)
162 | 
163 |     def get_out_shape(self):
164 |         return (self.input_shape[0], self.n_out)
165 | 
166 | 
167 | class ConvLayer(Layer):
168 |     """
169 |     Convolutional layer.
170 | 
171 |     Parameters
172 |     ----------
173 |     input_shape: a tuple of ints
174 |         (batch size, num input feature maps, image height, image width)
175 |     num_filts int
176 |         Number of output channels.
177 |     filt_size: a tuple of ints
178 |         (filter rows, filter columns)
179 |     activation: function
180 |         Activation function.
181 |     mode: ``'half'``, ``'valid'`` or ``'full'``
182 |         Border mode of convolution.
183 |     subsample: a tuple of ints (len = 2)
184 |         Stide of convolution.
185 |     filter_dilation: a tuple of ints (len = 2)
186 |         Dilation of convolution.
187 |     W: tensor, numpy or None
188 |         Filter weights. If this is not given, the weight is initialized by
189 |         random values.
190 |     b: tensor, numpy or None
191 |         Biases. If this is not given, the weight is initialized by
192 |         random values.
193 |     no_bias: bool
194 |         If True, bias is not used in this layer.
195 |     """
196 |     def __init__(self, input_shape, num_filts, filt_size, activation=linear,
197 |                  mode='half', subsample=(1, 1), filter_dilation=(1, 1),
198 |                  W=None, b=None, no_bias=False, name=None):
199 |         super(ConvLayer, self).__init__()
200 | 
201 |         # Make filter shape
202 |         assert len(filt_size) == 2
203 |         filter_shape = (num_filts, input_shape[1]) + filt_size
204 | 
205 |         # Calculate output shape and validate
206 |         if isinstance(mode, tuple):
207 |             self.mode = mode
208 |             self.out_size = [
209 |                 input_shape[i] - filter_shape[i] + 2 * self.mode[i - 2] + 1
210 |                 for i in range(2, len(input_shape))]
211 |         else:
212 |             self.mode = mode.lower()
213 |             if self.mode == 'valid':
214 |                 self.out_size = [input_shape[i] - filter_shape[i] + 1
215 |                                  for i in range(2, len(input_shape))]
216 |             elif self.mode == 'half':
217 |                 self.out_size = input_shape[2:]
218 |             elif self.mode == 'full':
219 |                 self.out_size = [input_shape[i] - filter_shape[i] - 1
220 |                                  for i in range(2, len(input_shape))]
221 |             else:
222 |                 raise ValueError('Invalid mode: %s' % self.mode)
223 |         self.out_size = tuple(self.out_size)
224 |         for sz in self.out_size:
225 |             if sz < 1:
226 |                 raise ValueError('Invalid feature size: (%s).' %
227 |                                  ', '.join([str(i) for i in self.out_size]))
228 | 
229 |         self.filter_shape = filter_shape
230 |         self.input_shape = input_shape
231 |         self.activation = activation
232 |         self.act_name = activation.__name__
233 |         self.no_bias = no_bias
234 |         self.name = 'Conv' if name is None else name
235 |         self.subsample = subsample
236 |         self.filter_dilation = filter_dilation
237 | 
238 |         # Initialize parameters
239 |         self.params = []
240 |         if isinstance(W, T.sharedvar.TensorSharedVariable):
241 |             self.W = W
242 |         else:
243 |             if W is None:
244 |                 W_values = self.init_he(filter_shape, self.activation)
245 |             else:
246 |                 W_values = W
247 |             self.W = theano.shared(W_values, self.name + '_W',
248 |                                    borrow=True)
249 |             self.params += [self.W]
250 | 
251 |         if self.no_bias:
252 |             self.b = None
253 |         elif isinstance(b, T.sharedvar.TensorSharedVariable):
254 |             self.b = b
255 |         else:
256 |             if b is None:
257 |                 b_values = np.zeros((filter_shape[0],),
258 |                                     dtype=theano.config.floatX)
259 |             else:
260 |                 b_values = b
261 |             self.b = theano.shared(b_values, self.name + '_b',
262 |                                    borrow=True)
263 |             self.params += [self.b]
264 | 
265 |         # Show information
266 |         print('  # %s (Conv-%s):' % (name, mode), end=' ')
267 |         print('flt.(%s),' % ', '.join(
268 |             [str(i) for i in self.filter_shape]), end=' ')
269 |         print('in.(%s),' % ', '.join(
270 |             [str(i) for i in self.input_shape[1:]]), end=' ')
271 |         print('act.: %s,' % self.act_name, end=' ')
272 |         if self.no_bias:
273 |             print('No bias')
274 |         else:
275 |             print('')
276 |         if self.subsample != (1, 1):
277 |             print('    subsample (%s) -> (%s)' % (
278 |                 ', '.join([str(i) for i in self.input_shape[1:]]),
279 |                 ', '.join([str(i) for i in self.get_out_shape()[1:]])))
280 | 
281 |     def get_output(self, input, **kwargs):
282 |         var_shape = kwargs.get('var_shape', False)
283 | 
284 |         lin_output = conv2d(
285 |             input=input,
286 |             filters=self.W,
287 |             input_shape=None if var_shape else self.input_shape,
288 |             filter_shape=self.filter_shape,
289 |             border_mode=self.mode,
290 |             subsample=self.subsample,
291 |             filter_dilation=self.filter_dilation
292 |         )
293 | 
294 |         if not self.no_bias:
295 |             lin_output += self.b.dimshuffle('x', 0, 'x', 'x')
296 | 
297 |         return self.activation(lin_output)
298 | 
299 |     def get_out_shape(self, after_ss=True):
300 |         out_size = self.out_size
301 |         if after_ss:
302 |             out_size = [(out_size[i] + self.subsample[i] - 1) //
303 |                         self.subsample[i] for i in range(len(out_size))]
304 | 
305 |         return (self.input_shape[0], self.filter_shape[0]) + tuple(out_size)
306 | 
307 | 
308 | class ConvGradLayer(Layer):
309 |     """
310 |     Transposed convolutional layer.
311 | 
312 |     Parameters
313 |     ----------
314 |     out_shape: a tuple of ints
315 |         (batch size, num output feature maps, image height, image width)
316 |     num_in_feat: int
317 |         Number input feature maps.
318 |     filt_size: a tuple of ints
319 |         (filter rows, filter columns)
320 |     activation: function
321 |         Activation function.
322 |     mode: ``'half'``, ``'valid'`` or ``'full'``
323 |         Border mode of convolution in the forward path.
324 |     subsample: a tuple of ints (len = 2)
325 |         Stide of convolution in the forward path.
326 |     filter_dilation: a tuple of ints (len = 2)
327 |         Dilation of convolution in the forward path.
328 |     W: tensor, numpy or None
329 |         Filter weights. If this is not given, the weight is initialized by
330 |         random values.
331 |     b: tensor, numpy or None
332 |         Biases. If this is not given, the weight is initialized by
333 |         random values.
334 |     no_bias: bool
335 |         If True, bias is not used in this layer.
336 |     """
337 |     def __init__(self, out_shape, num_in_feat, filt_size, activation=linear,
338 |                  mode='half', subsample=(1, 1), filter_dilation=(1, 1),
339 |                  W=None, b=None, no_bias=False, name=None):
340 |         super(ConvGradLayer, self).__init__()
341 | 
342 |         # Make filter shape
343 |         assert len(filt_size) == 2
344 |         filter_shape = (out_shape[1], num_in_feat) + filt_size
345 | 
346 |         self.mode = mode.lower()
347 |         self.filter_shape = filter_shape
348 |         self.out_shape = out_shape
349 |         self.activation = activation
350 |         self.act_name = activation.__name__
351 |         self.no_bias = no_bias
352 |         self.name = 'ConvGr' if name is None else name
353 |         self.subsample = subsample
354 |         self.filter_dilation = filter_dilation
355 | 
356 |         # Initialize parameters
357 |         self.params = []
358 |         if isinstance(W, T.sharedvar.TensorSharedVariable):
359 |             self.W = W
360 |         else:
361 |             if W is None:
362 |                 W_values = self.init_he(filter_shape, self.activation)
363 |             else:
364 |                 W_values = W
365 |             self.W = theano.shared(W_values, self.name + '_W',
366 |                                    borrow=True)
367 |             self.params += [self.W]
368 | 
369 |         if self.no_bias:
370 |             self.b = None
371 |         elif isinstance(b, T.sharedvar.TensorSharedVariable):
372 |             self.b = b
373 |         else:
374 |             if b is None:
375 |                 b_values = np.zeros((filter_shape[0],),
376 |                                     dtype=theano.config.floatX)
377 |             else:
378 |                 b_values = b
379 |             self.b = theano.shared(b_values, self.name + '_b',
380 |                                    borrow=True)
381 |             self.params += [self.b]
382 | 
383 |         # Show information
384 |         print('  # %s (ConvGr-%s):' % (name, mode), end=' ')
385 |         print('flt.(%s),' % ', '.join(
386 |             [str(i) for i in self.filter_shape]), end=' ')
387 |         print('out.(%s),' % ', '.join(
388 |             [str(i) for i in self.out_shape[1:]]), end=' ')
389 |         print('act.: %s,' % self.act_name, end=' ')
390 |         if self.no_bias:
391 |             print('No bias')
392 |         else:
393 |             print('')
394 |         if self.subsample != (1, 1):
395 |             print('    upsample -> (%s)' % (
396 |                 ', '.join([str(i) for i in self.out_shape[1:]])))
397 | 
398 |     def get_output(self, input, **kwargs):
399 |         lin_output = conv2d_grad_wrt_inputs(
400 |             output_grad=input,
401 |             filters=self.W,
402 |             input_shape=self.out_shape,
403 |             filter_shape=self.filter_shape,
404 |             border_mode=self.mode,
405 |             subsample=self.subsample,
406 |             # filter_flip=True,
407 |             filter_dilation=self.filter_dilation
408 |         )
409 |         if not self.no_bias:
410 |             lin_output += self.b.dimshuffle('x', 0, 'x', 'x')
411 | 
412 |         return self.activation(lin_output)
413 | 
414 |     def get_out_shape(self, **kwargs):
415 |         return self.out_shape
416 | 
417 | 
418 | class ActivationLayer(Layer):
419 |     """
420 |     Activation layer (no weights and bias).
421 | 
422 |     Parameters
423 |     ----------
424 |     activation: function
425 |         Activation function.
426 |     """
427 |     def __init__(self, activation=linear, name=None):
428 |         super(ActivationLayer, self).__init__()
429 | 
430 |         self.activation = activation
431 |         self.act_name = activation.__name__
432 |         self.name = 'Act' if name is None else name
433 | 
434 |         # Show information
435 |         print('  # %s (Act.)' % (self.name), end=' ')
436 |         print('act.: %s,' % self.act_name)
437 | 
438 |     def get_output(self, input, **kwargs):
439 |         return self.activation(input)
440 | 
441 | 
442 | class BiasLayer(Layer):
443 |     """
444 |     Bias layer (no weights).
445 | 
446 |     Parameters
447 |     ----------
448 |     input_shape: int or a tuple of ints
449 |         Input feature dimension or (batch_size, input feature dimension)
450 |     axis: int
451 |         Axis of input to add the bias.
452 |     activation: function
453 |         Activation function.
454 |     b: tensor, numpy or None
455 |         Biases. If this is not given, the weight is initialized by
456 |         random values.
457 |     """
458 |     def __init__(self, input_shape, axis=1, activation=linear,
459 |                  b=None, name=None):
460 |         super(BiasLayer, self).__init__()
461 | 
462 |         self.input_shape = input_shape
463 |         self.axis = axis
464 |         self.activation = activation
465 |         self.name = 'Bias' if name is None else name
466 |         self.act_name = activation.__name__
467 | 
468 |         if isinstance(input_shape, (list, tuple)):
469 |             self.bias_sh = (input_shape[self.axis],)
470 |             self.in_dim = len(input_shape)
471 |         else:
472 |             self.bias_sh = (input_shape,)
473 |             self.in_dim = 2
474 | 
475 |         if isinstance(b, T.sharedvar.TensorSharedVariable):
476 |             self.b = b
477 |         else:
478 |             if b is None:
479 |                 b_values = np.zeros(self.bias_sh, dtype=theano.config.floatX)
480 |             else:
481 |                 b_values = b
482 |             self.b = theano.shared(b_values, self.name + '_b', borrow=True)
483 |             self.params += [self.b]
484 | 
485 |         # Show information
486 |         print('  # %s (Bias)' % (self.name), end=' ')
487 |         if self.in_dim > 2:
488 |             print('in.(%s),' % ', '.join(
489 |                 [str(i) for i in self.input_shape[1:]]), end=' ')
490 |         else:
491 |             print('in.(%d),' % self.input_shape, end=' ')
492 |         print('bias dim:%d,' % self.axis, end=' ')
493 |         print('act.: %s,' % self.act_name)
494 | 
495 |     def get_output(self, input, **kwargs):
496 |         if self.in_dim > 2:
497 |             pattern = [0 if ii == self.axis else 'x'
498 |                        for ii in range(self.in_dim)]
499 |             lin_output = input + self.b.dimshuffle(pattern)
500 |         else:
501 |             lin_output = input + self.b
502 |         return self.activation(lin_output)
503 | 
504 |     def get_out_shape(self):
505 |         return self.input_shape
506 | 
507 | 
508 | class TensorToVectorLayer(Layer):
509 |     """
510 |     Converts 4D tensor to 2D tensor.
511 | 
512 |     Parameters
513 |     ----------
514 |     input_shape: a tuple of ints
515 |         (batch size, num input feature maps, image height, image width)
516 |     """
517 |     def __init__(self, input_shape):
518 |         super(TensorToVectorLayer, self).__init__()
519 | 
520 |         self.input_shape = input_shape
521 |         print('  # tensor to vector: (%s) -> %d' % (
522 |             ', '.join([str(i) for i in self.input_shape[1:]]),
523 |             np.prod(self.input_shape[1:])))
524 | 
525 |     def get_output(self, input, **kwargs):
526 |         return input.flatten(2)
527 | 
528 |     def get_out_shape(self):
529 |         return (self.input_shape[0], np.prod(self.input_shape[1:]))
530 | 
531 | 
532 | class VectorToTensorLayer(Layer):
533 |     """
534 |     Converts 2D tensor to 4D tensor.
535 | 
536 |     Parameters
537 |     ----------
538 |     output_shape: a tuple of ints
539 |         (batch size, num output feature maps, image height, image width)
540 |     """
541 |     def __init__(self, output_shape):
542 |         super(VectorToTensorLayer, self).__init__()
543 | 
544 |         self.output_shape = output_shape
545 |         print('  # vector to tensor: (%s)' % ', '.join(
546 |             [str(i) for i in self.output_shape[1:]]))
547 | 
548 |     def get_output(self, input, **kwargs):
549 |         # output_shape = (T.shape(input)[0], ) + self.output_shape[1:]
550 |         output_shape = (-1,) + self.output_shape[1:]
551 |         return input.reshape(output_shape)
552 | 
553 |     def get_out_shape(self):
554 |         return self.output_shape
555 | 
556 | 
557 | ##############################################################################
558 | 
559 | 
560 | class UpsampleLayer(Layer):
561 |     """
562 |     Upscale the input by a specified factor.
563 | 
564 |     Parameters
565 |     ----------
566 |     mode: {``'zero'``, ``'NN'``}
567 |         Put zeros or nearest neigbor pixels between original pixels.
568 |     """
569 |     def __init__(self, input_shape, us=(2, 2), out_shape=None, mode='zero'):
570 |         super(UpsampleLayer, self).__init__()
571 | 
572 |         self.input_shape = input_shape
573 |         self.us = us
574 |         self.mode = mode
575 |         self.out_shape = out_shape
576 |         print('  # upsample-(%s)-%s (%s) -> (%s)' % (
577 |             mode,
578 |             ', '.join([str(i) for i in self.us]),
579 |             ', '.join([str(i) for i in self.input_shape[1:]]),
580 |             ', '.join([str(i) for i in self.get_out_shape()[1:]])))
581 | 
582 |     def get_output(self, input, **kwargs):
583 |         us = self.us
584 |         if self.mode == 'zero':
585 |             sh = input.shape
586 |             upsample = T.zeros((sh[0], sh[1], sh[2] * us[0], sh[3] * us[1]),
587 |                                dtype=input.dtype)
588 |             out = T.set_subtensor(upsample[:, :, ::us[0], ::us[1]], input)
589 | 
590 |         elif self.mode == 'NN':
591 |             out = input.repeat(us[0], axis=2).repeat(us[1], axis=3)
592 | 
593 |         else:
594 |             raise ValueError('Select the proper mode: zero / NN')
595 | 
596 |         return out
597 | 
598 |     def get_out_shape(self):
599 |         in_sh = self.input_shape
600 |         out_len0 = in_sh[2] * self.us[0]
601 |         out_len1 = in_sh[3] * self.us[1]
602 |         return (in_sh[0], in_sh[1], out_len0, out_len1)
603 | 
604 | 
605 | class Pool2DLayer(Layer):
606 |     """
607 |     Downscale the input by a specified factor.
608 | 
609 |     Parameters
610 |     ----------
611 |     input_shape: a tuple of ints
612 |         (batch size, num input feature maps, image height, image width)
613 |     pool_size: tuple of length 2 or theano vector of ints of size 2.
614 |         Factor by which to downscale (vertical ws, horizontal ws).
615 |         (2,2) will halve the image in each dimension.
616 |     pad: tuple of two ints - (pad_h, pad_w),
617 |         pad zeros to extend beyond four borders of the images,
618 |         pad_h is the size of the top and bottom margins,
619 |         and pad_w is the size of the left and right margins.
620 |     ignore_border: bool
621 |         (default None, will print a warning and set to False)
622 |         When True, (5,5) input with ds=(2,2) will generate a (2,2) output.
623 |         (3,3) otherwise.
624 |     mode: {``'max'``, ``'sum'``, ``'average_inc_pad'``, ``'average_exc_pad'``}
625 |     """
626 |     def __init__(self, input_shape, pool_size, stride=None, pad=(0, 0),
627 |                  ignore_border=True, mode='max'):
628 |         super(Pool2DLayer, self).__init__()
629 | 
630 |         self.input_shape = input_shape
631 |         self.pool_size = pool_size
632 | 
633 |         if len(self.input_shape) != 4:
634 |             raise ValueError("Tried to create a 2D pooling layer with "
635 |                              "input shape %r. Expected 4 input dimensions "
636 |                              "(batchsize, channels, 2 spatial dimensions)."
637 |                              % (self.input_shape,))
638 | 
639 |         if stride is None:
640 |             self.stride = self.pool_size
641 |         else:
642 |             self.stride = stride
643 | 
644 |         self.pad = pad
645 | 
646 |         self.ignore_border = ignore_border
647 |         self.mode = mode
648 |         print('  # Pool-%s (%s) -> (%s)' % (
649 |             mode,
650 |             ', '.join([str(i) for i in self.input_shape[1:]]),
651 |             ', '.join([str(i) for i in self.get_out_shape()[1:]])))
652 | 
653 |     def get_output(self, input, **kwargs):
654 |         pooled = pool_2d(input,
655 |                          ws=self.pool_size,
656 |                          stride=self.stride,
657 |                          ignore_border=self.ignore_border,
658 |                          pad=self.pad,
659 |                          mode=self.mode,
660 |                          )
661 |         return pooled
662 | 
663 |     def get_out_shape(self):
664 |         output_shape = list(self.input_shape)  # copy / convert to mutable list
665 | 
666 |         output_shape[2] = pool_output_length(self.input_shape[2],
667 |                                              pool_size=self.pool_size[0],
668 |                                              stride=self.stride[0],
669 |                                              pad=self.pad[0],
670 |                                              ignore_border=self.ignore_border)
671 | 
672 |         output_shape[3] = pool_output_length(self.input_shape[3],
673 |                                              pool_size=self.pool_size[1],
674 |                                              stride=self.stride[1],
675 |                                              pad=self.pad[1],
676 |                                              ignore_border=self.ignore_border)
677 | 
678 |         return tuple(output_shape)
679 | 
680 | 
681 | def pool_output_length(input_length, pool_size, stride, pad, ignore_border):
682 |     if input_length is None or pool_size is None:
683 |         return None
684 | 
685 |     if ignore_border:
686 |         output_length = input_length + 2 * pad - pool_size + 1
687 |         output_length = (output_length + stride - 1) // stride
688 | 
689 |     # output length calculation taken from:
690 |     # https://github.com/Theano/Theano/blob/master/theano/tensor/signal/downsample.py
691 |     else:
692 |         assert pad == 0
693 | 
694 |         if stride >= pool_size:
695 |             output_length = (input_length + stride - 1) // stride
696 |         else:
697 |             output_length = max(
698 |                 0, (input_length - pool_size + stride - 1) // stride) + 1
699 | 
700 |     return output_length
701 | 
702 | 
703 | ##############################################################################
704 | # Dropout
705 | class DropoutLayer(Layer):
706 |     """
707 |     Conducts Dropout.
708 |     """
709 |     layers = []
710 | 
711 |     def __init__(self, p=0.5, rescale=True):
712 |         super(DropoutLayer, self).__init__()
713 | 
714 |         self._srng = RandomStreams(self.rng.randint(1, 2147462579))
715 |         self.p = p
716 |         self.rescale = rescale
717 |         self.deterministic = False
718 |         DropoutLayer.layers.append(self)
719 |         print('  # Dropout: p = %.2f' % (self.p))
720 | 
721 |     def get_output(self, input, **kwargs):
722 |         if self.deterministic or self.p == 0:
723 |             return input
724 |         else:
725 |             # Using theano constant to prevent upcasting
726 |             one = T.constant(1)
727 |             retain_prob = one - self.p
728 |             if self.rescale:
729 |                 input /= retain_prob
730 | 
731 |             return input * self._srng.binomial(input.shape, p=retain_prob,
732 |                                                dtype=input.dtype)
733 | 
734 |     @staticmethod
735 |     def set_dropout_training(training):
736 |         deterministic = False if training else True
737 |         # print(' - Dropout layres: deterministic =', deterministic)
738 |         for layer in DropoutLayer.layers:
739 |             layer.deterministic = deterministic
740 | 


--------------------------------------------------------------------------------
/IQA_DeepQA_FR_release/trainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import sys
  5 | import time
  6 | import timeit
  7 | import math
  8 | 
  9 | import numpy as np
 10 | import PIL.Image as Image
 11 | from scipy.stats import spearmanr, pearsonr, kendalltau
 12 | 
 13 | from .utils import tile_raster_images, image_from_nparray
 14 | from .utils import tile_tensor4_from_list
 15 | 
 16 | 
 17 | class Trainer(object):
 18 |     """
 19 |     # Trainer classs managing training and testing routines.
 20 |     Trains & tests models over epochs, shows monitoring variables,
 21 |     save model snapshots, store data and images.
 22 | 
 23 |     Arguments
 24 |     ---------
 25 |     train_config: dictionary
 26 |         - 'batch_size': number of data in a batch
 27 |         - 'epochs': maximum number of epochs for training
 28 |         - 'test_freq': test_model the trained model every test_freq
 29 |         - 'save_freq': save data every save_freq
 30 |         - 'regular_snap_freq': save model snapshot every regular_snap_freq
 31 |         - 'n_imgs_to_record': number of images to record
 32 |         - 'prefix': prefix of filenames of recording data
 33 |     snap_path: string
 34 |         path to save snapshot file.
 35 |     output_path: string
 36 |         path to save output data.
 37 |     """
 38 | 
 39 |     def __init__(self, train_config, snap_path=None, output_path=None):
 40 | 
 41 |         self.test_freq = train_config.get('test_freq', None)
 42 |         assert self.test_freq is not None
 43 |         self.save_freq = train_config.get('save_freq', None)
 44 |         if self.save_freq is None:
 45 |             self.save_freq = self.test_freq
 46 |         self.regular_snap_freq = train_config.get('regular_snap_freq', 40)
 47 |         self.n_imgs_to_record = train_config.get('n_imgs_to_record', 20)
 48 | 
 49 |         self.prefix = train_config.get('prefix', '')
 50 |         self.set_path(snap_path, output_path)
 51 | 
 52 |     def set_path(self, snap_path, output_path=None):
 53 |         if snap_path is not None:
 54 |             if not os.path.isdir(snap_path):
 55 |                 os.makedirs(snap_path)
 56 | 
 57 |         if output_path is not None:
 58 |             if not os.path.isdir(output_path):
 59 |                 os.makedirs(output_path)
 60 |         else:
 61 |             output_path = snap_path
 62 | 
 63 |         self.snap_path = snap_path
 64 |         self.output_path = output_path
 65 | 
 66 |     def training_routine(self, model, get_train_outputs, rec_train,
 67 |                          get_test_outputs, rec_test,
 68 |                          train_batch_size, test_batch_size,
 69 |                          train_data, test_data,
 70 |                          epochs, prefix2='', check_mos_corr=False):
 71 |         """
 72 |         # Actual training routine.
 73 | 
 74 |         @type model: .models.model_basis.ModelBasis
 75 |         @type rec_train: .models.model_record.Record
 76 |         @type rec_test: .models.model_record.Record
 77 |         @type train_data: .data_load.dataset.Dataset
 78 |         @type test_data: .data_load.dataset.Dataset
 79 |         """
 80 | 
 81 |         # check validity
 82 |         assert self.snap_path is not None
 83 | 
 84 |         # get numbers of training and Testing batches
 85 |         n_train_imgs = train_data.n_data
 86 |         n_test_imgs = test_data.n_data
 87 |         n_train_batches = int(n_train_imgs / train_batch_size)
 88 |         n_test_batches = int(n_test_imgs / test_batch_size)
 89 |         assert n_train_batches > 0, 'n_train_batches = %d' % (n_train_batches)
 90 |         assert n_test_batches > 0, 'n_test_batches = %d' % (n_test_batches)
 91 | 
 92 |         # check n_imgs_to_record
 93 |         n_valid_rec_batches = self.n_imgs_to_record // test_batch_size + 1
 94 |         if n_valid_rec_batches > n_test_batches:
 95 |             n_valid_rec_batches = n_test_batches
 96 | 
 97 |         if self.n_imgs_to_record < test_batch_size:
 98 |             n_imgs_to_record = self.n_imgs_to_record
 99 |         else:
100 |             n_imgs_to_record = n_valid_rec_batches * test_batch_size
101 | 
102 |         # get numbers of data and images to monitor and write
103 |         until_loss, until_im_info, until_img = rec_test.get_until_indices(1)
104 | 
105 |         # snapshot file names
106 |         snapshot_file_latest = os.path.join(
107 |             self.snap_path, self.prefix + prefix2 + 'snapshot_lastest.npy')
108 |         snapshot_file_best = os.path.join(
109 |             self.snap_path, self.prefix + prefix2 + 'snapshot_best.npy')
110 |         snapshot_file_best_srcc = os.path.join(
111 |             self.snap_path, self.prefix + prefix2 + 'snapshot_best_srcc.npy')
112 |         snapshot_file_regular = os.path.join(
113 |             self.snap_path, self.prefix + prefix2 + 'snapshot_{:03d}.npy')
114 |         snapshot_file_fin = os.path.join(
115 |             self.snap_path, self.prefix + prefix2 + 'snapshot.npy')
116 | 
117 |         # log file names
118 |         log_file = os.path.join(
119 |             self.snap_path, prefix2 + 'log.txt')
120 |         log_test_file = os.path.join(
121 |             self.output_path, prefix2 + 'log_test.txt')
122 | 
123 |         # Show information
124 |         print('\nTrain', end='')
125 |         if train_data.imagewise:
126 |             print(' imagewise', end='')
127 |         else:
128 |             print(' patchwise', end='')
129 |         print(' / Test', end='')
130 |         if test_data.imagewise:
131 |             print(' imagewise', end='')
132 |         else:
133 |             print(' patchwise', end='')
134 |         print(' (%d epochs).' % (epochs))
135 |         print('Save a snapshot every %d epochs,' % self.save_freq, end='')
136 |         print(' and test the model every %d epochs.' % self.test_freq)
137 |         print(' - Regular snapshot: every %d epochs' % self.regular_snap_freq)
138 |         print(' - Snapshot path: %s' % self.snap_path)
139 |         print(' - Batch size: %d (train) / %d (test)' % (
140 |             train_batch_size, test_batch_size))
141 |         print(' - Training batches: %d (%d images)' % (
142 |             n_train_batches, n_train_imgs))
143 |         print(' - Testing batches: %d (%d images)' % (
144 |             n_test_batches, n_test_imgs), end='')
145 |         print(' / Missed images: %d' % (
146 |             n_test_imgs - n_test_batches * test_batch_size))
147 |         print(' - Monitor data: %s' % (', '.join(rec_train.data_keys)))
148 |         print(' - Monitor images: %s' % (', '.join(rec_test.data_keys)))
149 |         print(' - Monitor im. data: %s' % (', '.join(rec_test.im_data_keys)))
150 |         print(' - Num of rec. images: %d (%d x %d batches)' % (
151 |             n_imgs_to_record, test_batch_size, n_valid_rec_batches))
152 | 
153 |         # get MOS list
154 |         if check_mos_corr:
155 |             # if check_mos_corr is true, the first value of
156 |             # rec_im_data must be mos predicted.
157 |             assert rec_test.im_data_keys[0] == 'mos_p'
158 |             assert test_data.exist_score
159 |             n_valid_test_imgs = n_test_batches * test_batch_size
160 |             test_score_list = test_data.score_data[:n_valid_test_imgs]
161 |             mos_p_list = np.zeros(n_valid_test_imgs, dtype='float32')
162 |             print(' - Check SRCC/PLCC using %d images' % (n_valid_test_imgs))
163 | 
164 |         start_time = timeit.default_timer()
165 |         prev_time = start_time
166 |         best_test_loss = np.inf
167 | 
168 |         # write current time in log file
169 |         cur_time = 'Started at %s\n' % (time.strftime('%X %x'))
170 |         key_str = 'cost, ' + ", ".join(rec_train.data_keys) + '\n'
171 |         with open(log_file, 'a') as f_hist:
172 |             f_hist.write(cur_time)
173 |             f_hist.write(key_str)
174 | 
175 |         key_str = 'cost, ' + ", ".join(rec_train.data_keys)
176 |         key_str += ', SRCC, PLCC\n' if check_mos_corr else '\n'
177 |         with open(log_test_file, 'a') as f_hist:
178 |             f_hist.write(cur_time)
179 |             f_hist.write(key_str)
180 | 
181 |         # if check_mos_corr is True -> best_score_set = (SRCC, PLCC)
182 |         # else -> best_score_set = (losses[0], losses[1])
183 |         best_score_set = (0., 0., -1) if check_mos_corr else (np.inf, 0., -1)
184 | 
185 |         #######################################################################
186 |         # go through training epochs
187 |         for epoch in range(epochs):
188 |             # train model
189 |             losses = np.zeros(rec_train.num_data + 1, dtype='float32')
190 |             for batch_idx in range(n_train_batches):
191 |                 # get training loss
192 |                 losses += get_train_outputs()
193 |             losses /= n_train_batches
194 | 
195 |             # write log
196 |             with open(log_file, 'a') as f_hist:
197 |                 data = '%d' % (epoch + 1)
198 |                 for idx in range(-1, rec_train.num_data):
199 |                     data += '\t%.6f' % (losses[idx + 1])
200 |                 data += '\n'
201 |                 f_hist.write(data)
202 | 
203 |             # show information
204 |             end_time = timeit.default_timer()
205 |             pr_str = ' {:3d}, cost {:.3f}, '.format(epoch + 1, losses[0])
206 |             for idx, key in enumerate(rec_train.data_keys):
207 |                 pr_str += '{:s} {:.3f}, '.format(key, losses[idx + 1])
208 |             minutes, seconds = divmod(end_time - prev_time, 60)
209 |             pr_str += 'time {:02.0f}:{:05.2f}\n'.format(minutes, seconds)
210 |             sys.stdout.write(pr_str)
211 |             sys.stdout.flush()
212 |             prev_time = end_time
213 | 
214 |             # save snapshot every regular_snap_freq
215 |             if (epoch + 1) % self.regular_snap_freq == 0:
216 |                 model.save(snapshot_file_regular.format(epoch + 1))
217 | 
218 |             ##################################################################
219 |             # test_model the trained model and save a snapshot
220 |             # For every safe_freq and test_freq
221 |             test_model = (epoch + 1) % self.test_freq == 0
222 |             save_data = (epoch + 1) % self.save_freq == 0
223 |             if test_model or save_data:
224 |                 if save_data:
225 |                     # make output folder
226 |                     numstr = '{:03d}'.format(epoch + 1)
227 |                     out_path = os.path.join(
228 |                         self.output_path, prefix2 + numstr + '/')
229 |                     if not os.path.isdir(out_path):
230 |                         os.makedirs(out_path)
231 | 
232 |                     im_data = np.zeros(
233 |                         (rec_test.num_im_data, n_imgs_to_record),
234 |                         dtype='float32')
235 | 
236 |                 losses = np.zeros(rec_test.num_data + 1, dtype='float32')
237 |                 for test_bat_idx in range(0, n_test_batches):
238 |                     # get testing loss
239 |                     outputs = get_test_outputs()
240 |                     losses += outputs[:until_loss]
241 |                     cur_im_data = outputs[until_loss:until_im_info]
242 |                     cur_images = outputs[until_im_info:until_img]
243 | 
244 |                     # get predicted mos
245 |                     if check_mos_corr:
246 |                         mos_p = cur_im_data[0]
247 |                         idx_from = test_bat_idx * test_batch_size
248 |                         idx_to = (test_bat_idx + 1) * test_batch_size
249 |                         mos_p_list[idx_from:idx_to] = mos_p
250 | 
251 |                     # write image data
252 |                     if (save_data and rec_test.num_im_data > 0 and
253 |                             test_bat_idx < n_valid_rec_batches):
254 |                         idx_from = test_bat_idx * test_batch_size
255 |                         idx_to = (test_bat_idx + 1) * test_batch_size
256 |                         im_data[:, idx_from:idx_to] = cur_im_data
257 | 
258 |                     # write images
259 |                     if (save_data and rec_test.num_imgs > 0 and
260 |                             test_bat_idx < n_valid_rec_batches):
261 |                         if test_data.imagewise:
262 |                             # if imagewise is True, reconstructs a complete
263 |                             # image using the patches in the minibatch
264 |                             rec_info = test_data.get_current_recon_info()
265 |                             draw_tiled_images(
266 |                                 cur_images, rec_test.rec_imgs, test_bat_idx,
267 |                                 out_path,
268 |                                 rec_info['bat2img_idx_set'],
269 |                                 rec_info['npat_img_list'],
270 |                                 rec_info['filt_idx_list'],
271 |                                 test_data.patch_size,
272 |                                 test_data.patch_step)
273 |                         else:
274 |                             draw_images(
275 |                                 cur_images, rec_test.rec_imgs, test_bat_idx,
276 |                                 test_batch_size, out_path)
277 | 
278 |                 losses /= n_test_batches
279 | 
280 |                 # get SRCC and PLCC
281 |                 if check_mos_corr:
282 |                     rho_s, _ = spearmanr(test_score_list, mos_p_list)
283 |                     rho_p, _ = pearsonr(test_score_list, mos_p_list)
284 | 
285 |                     if math.isnan(rho_s) or math.isnan(rho_p):
286 |                         print('@ Stop iteration! (NaN)')
287 |                         best_score_set = (0, 0, epoch)
288 |                         break
289 |                     else:
290 |                         if rho_s > best_score_set[0]:
291 |                             best_score_set = (rho_s, rho_p, epoch)
292 |                             model.save(snapshot_file_best_srcc)
293 |                 else:
294 |                     if losses[0] < best_score_set[0]:
295 |                         if rec_test.num_data >= 1:
296 |                             best_score_set = (losses[0], losses[1], epoch)
297 |                         else:
298 |                             best_score_set = (losses[0], 0, epoch)
299 | 
300 |                 # save the latest snapshot
301 |                 model.save(snapshot_file_latest)
302 | 
303 |                 # save the best snapshot
304 |                 if losses[0] < best_test_loss:
305 |                     best_test_loss = losses[0]
306 |                     print(' # BEST', end=' ')
307 |                     model.save(snapshot_file_best)
308 | 
309 |                 # For every save_freq
310 |                 if save_data:
311 |                     # write image data
312 |                     if rec_test.num_im_data > 0:
313 |                         with open(out_path + 'info.txt', 'w') as f:
314 |                             # header
315 |                             data = 'epoch: %s (%s)\n' % (
316 |                                 numstr, ', '.join(rec_test.im_data_keys))
317 |                             f.write(data)
318 | 
319 |                             for idx in range(n_imgs_to_record):
320 |                                 imidx = idx
321 |                                 data = '%d' % idx
322 |                                 for ii in range(rec_test.num_im_data):
323 |                                     data += '\t%.6f' % (im_data[ii][imidx])
324 |                                 data += '\n'
325 |                                 f.write(data)
326 | 
327 |                     # write mos
328 |                     if check_mos_corr:
329 |                         with open(out_path + 'mos_res.txt', 'w') as f:
330 |                             # header
331 |                             data = 'epoch: %s (mos_p, mos)\n' % (numstr)
332 |                             f.write(data)
333 | 
334 |                             for idx in range(n_valid_test_imgs):
335 |                                 data = '{:.6f}\t{:.6f}\n'.format(
336 |                                     mos_p_list[idx], test_score_list[idx])
337 |                                 f.write(data)
338 |                             data = 'SRCC: {:.4f}, PLCC: {:.4f}\n'.format(
339 |                                 rho_s, rho_p)
340 |                             f.write(data)
341 | 
342 |                     # write kernel images
343 |                     draw_kernels(rec_test.rec_kernels, self.output_path,
344 |                                  prefix2, '_' + numstr)
345 | 
346 |                 # write log
347 |                 with open(log_test_file, 'a') as f_hist:
348 |                     data = '{:d}'.format(epoch + 1)
349 |                     for idx in range(-1, rec_test.num_data):
350 |                         data += '\t{:.6f}'.format(losses[idx + 1])
351 |                     if check_mos_corr:
352 |                         data += '\t{:.4f}\t{:.4f}'.format(rho_s, rho_p)
353 |                     data += '\n'
354 |                     f_hist.write(data)
355 | 
356 |                 # show information
357 |                 end_time = timeit.default_timer()
358 |                 pr_str = ' * vcost {:.3f}, '.format(losses[0])
359 |                 for idx, key in enumerate(rec_train.data_keys):
360 |                     pr_str += '{:s} {:.3f}, '.format(key, losses[idx + 1])
361 |                 if check_mos_corr:
362 |                     pr_str += 'SRCC {:.3f}, PLCC {:.3f}, '.format(rho_s, rho_p)
363 |                 minutes, seconds = divmod(end_time - prev_time, 60)
364 |                 pr_str += 'time {:02.0f}:{:05.2f}\n'.format(minutes, seconds)
365 |                 sys.stdout.write(pr_str)
366 |                 sys.stdout.flush()
367 |                 prev_time = end_time
368 | 
369 |         end_time = timeit.default_timer()
370 |         total_time = end_time - start_time
371 |         print(' - Train ran for %.2fm' % ((total_time) / 60.))
372 |         print(' - Finished at %s' % (time.strftime('%X %x')))
373 | 
374 |         if best_score_set[0] != 0:
375 |             model.save(snapshot_file_fin)
376 | 
377 |         return best_score_set
378 | 
379 |     def testing_routine(self, get_test_outputs, rec_test,
380 |                         test_batch_size, test_data, prefix2='',
381 |                         check_mos_corr=False):
382 |         """Actual testing routine: group patches for each image
383 | 
384 |         @type rec_test: .models.model_record.Record
385 |         """
386 |         # get numbers of training and Testing batches
387 |         n_test_imgs = test_data.n_images
388 |         n_test_batches = int(n_test_imgs / test_batch_size)
389 |         assert n_test_batches > 0
390 | 
391 |         n_valid_test_imgs = n_test_batches * test_batch_size
392 | 
393 |         if self.n_imgs_to_record == 'all':
394 |             n_imgs_to_record = n_valid_test_imgs
395 |         else:
396 |             n_valid_rec_batches = self.n_imgs_to_record // test_batch_size + 1
397 |             if n_valid_rec_batches > n_test_batches:
398 |                 n_valid_rec_batches = n_test_batches
399 |             n_imgs_to_record = n_valid_rec_batches * test_batch_size
400 | 
401 |         # get numbers of data and images to monitor and write
402 |         until_loss = rec_test.num_data + 1
403 |         until_im_info = until_loss + rec_test.num_im_data
404 |         until_img = until_im_info + rec_test.num_imgs
405 | 
406 |         # Show information
407 |         print('\nTest the model')
408 |         if test_data.imagewise:
409 |             print(' (imagewise)')
410 |         else:
411 |             print(' (patchwise)')
412 |         print(' - Num of images in a batch: %d' % (test_batch_size))
413 |         print(' - Testing batches: %d (%d images)' % (
414 |             n_test_batches, n_test_imgs))
415 |         print(' - Missed images in validation: %d' % (
416 |             n_test_imgs - n_test_batches * test_batch_size))
417 |         print(' - Image recording batches: %d (%d images)' % (
418 |             n_valid_rec_batches, n_imgs_to_record))
419 |         print(' - Monitor data: %s' % (', '.join(rec_test.data_keys)))
420 |         print(' - Monitor images: %s' % (', '.join(rec_test.data_keys)))
421 |         print(' - Monitor im. data: %s' % (', '.join(rec_test.im_data_keys)))
422 | 
423 |         # get MOS list
424 |         if check_mos_corr:
425 |             # if check_mos_corr is true, the first value of
426 |             # rec_im_data must be mos predicted.
427 |             assert rec_test.im_data_keys[0] == 'mos_p'
428 |             assert test_data.exist_score
429 | 
430 |             test_score_list = test_data.score_data[:n_valid_test_imgs]
431 |             mos_p_list = np.zeros(n_valid_test_imgs, dtype='float32')
432 |             print(' - Check SRCC/PLCC using %d images' % (n_valid_test_imgs))
433 | 
434 |         start_time = timeit.default_timer()
435 |         prev_time = start_time
436 | 
437 |         # write current time in log file
438 |         cur_time = 'Started at %s\n' % (time.strftime('%X %x'))
439 |         log_file = os.path.join(self.output_path, prefix2 + 'log_test.txt')
440 |         with open(log_file, 'a') as f_hist:
441 |             f_hist.write(cur_time)
442 | 
443 |         out_path = os.path.join(self.output_path, prefix2 + '/')
444 |         if not os.path.isdir(out_path):
445 |             os.makedirs(out_path)
446 | 
447 |         im_data = np.zeros(
448 |             (rec_test.num_im_data, n_valid_test_imgs), dtype='float32')
449 | 
450 |         best_score_set = (0., 0.) if check_mos_corr else (np.inf, np.inf)
451 | 
452 |         losses = np.zeros(rec_test.num_data + 1, dtype='float32')
453 |         for test_bat_idx in range(0, n_test_batches):
454 |             # get testing loss
455 |             outputs = get_test_outputs()
456 | 
457 |             losses += outputs[:until_loss]
458 |             cur_im_data = outputs[until_loss:until_im_info]
459 |             cur_images = outputs[until_im_info:until_img]
460 | 
461 |             # get predicted mos
462 |             if check_mos_corr:
463 |                 mos_p = cur_im_data[0]
464 |                 idx_from = test_bat_idx * test_batch_size
465 |                 idx_to = (test_bat_idx + 1) * test_batch_size
466 |                 mos_p_list[idx_from:idx_to] = mos_p
467 | 
468 |             # write image data
469 |             if rec_test.num_im_data > 0:
470 |                 idx_from = test_bat_idx * test_batch_size
471 |                 idx_to = (test_bat_idx + 1) * test_batch_size
472 |                 im_data[:, idx_from:idx_to] = cur_im_data
473 | 
474 |             # write images
475 |             if rec_test.num_imgs > 0 and test_bat_idx < n_valid_rec_batches:
476 |                 if test_data.imagewise:
477 |                     rec_info = test_data.get_current_recon_info()
478 |                     draw_tiled_images(
479 |                         cur_images, rec_test.rec_imgs, test_bat_idx,
480 |                         out_path,
481 |                         rec_info['bat2img_idx_set'],
482 |                         rec_info['npat_img_list'],
483 |                         rec_info['filt_idx_list'],
484 |                         test_data.patch_size,
485 |                         test_data.patch_step)
486 |                 else:
487 |                     draw_images(
488 |                         cur_images, rec_test.rec_imgs, test_bat_idx,
489 |                         test_batch_size, out_path)
490 |                 rec_info = test_data.get_current_recon_info()
491 |                 draw_tiled_images(
492 |                     cur_images, rec_test.rec_imgs, test_bat_idx, out_path,
493 |                     rec_info['bat2img_idx_set'],
494 |                     rec_info['npat_img_list'],
495 |                     rec_info['filt_idx_list'],
496 |                     test_data.patch_size,
497 |                     test_data.patch_step)
498 | 
499 |         losses /= n_test_batches
500 | 
501 |         # get SRCC and PLCC
502 |         if check_mos_corr:
503 |             rho_s, _ = spearmanr(test_score_list, mos_p_list)
504 |             rho_p, _ = pearsonr(test_score_list, mos_p_list)
505 |             tau, _ = kendalltau(test_score_list, mos_p_list)
506 |             rmse = np.sqrt(((test_score_list - mos_p_list) ** 2).mean())
507 |             best_score_set = (rho_s, rho_p)
508 |         else:
509 |             if rec_test.num_data >= 1:
510 |                 best_score_set = (losses[0], losses[1])
511 |             else:
512 |                 best_score_set = (losses[0], 0)
513 | 
514 |         # write image data
515 |         if rec_test.num_im_data > 0:
516 |             with open(out_path + 'info.txt', 'w') as f:
517 |                 # header
518 |                 data = 'imidx, %s\n' % (
519 |                     ', '.join(rec_test.im_data_keys))
520 |                 f.write(data)
521 | 
522 |                 for idx in range(n_valid_test_imgs):
523 |                     imidx = idx
524 |                     data = '%d' % idx
525 |                     for ii in range(rec_test.num_im_data):
526 |                         data += '\t%.6f' % (im_data[ii][imidx])
527 |                     data += '\n'
528 |                     f.write(data)
529 | 
530 |         # write mos
531 |         if check_mos_corr:
532 |             with open(out_path + 'mos_res.txt', 'w') as f:
533 |                 # header
534 |                 data = 'mos_p, mos\n'
535 |                 f.write(data)
536 | 
537 |                 for idx in range(n_valid_test_imgs):
538 |                     data = '{:.6f}\t{:.6f}\n'.format(
539 |                         mos_p_list[idx], test_score_list[idx])
540 |                     f.write(data)
541 |                 data = 'SRCC: {:.4f}, PLCC: {:.4f}'.format(rho_s, rho_p)
542 |                 data += ', KRCC: {:.4f}, RMSE: {:.4f}\n'.format(tau, rmse)
543 |                 f.write(data)
544 | 
545 |         # write kernel images
546 |         draw_kernels(rec_test.rec_kernels, self.output_path, prefix2)
547 | 
548 |         # show information
549 |         end_time = timeit.default_timer()
550 |         pr_str = ' * vcost {:.3f}, '.format(losses[0])
551 |         for idx, key in enumerate(rec_test.data_keys):
552 |             pr_str += '{:s} {:.3f}, '.format(key, losses[idx + 1])
553 |         if check_mos_corr:
554 |             pr_str += 'SRCC {:.3f}, PLCC {:.3f}, '.format(rho_s, rho_p)
555 |             pr_str += 'KRCC {:.3f}, RMSE {:.3f}, '.format(tau, rmse)
556 |         minutes, seconds = divmod(end_time - prev_time, 60)
557 |         pr_str += 'time {:02.0f}:{:05.2f}\n'.format(minutes, seconds)
558 |         sys.stdout.write(pr_str)
559 |         sys.stdout.flush()
560 |         prev_time = end_time
561 | 
562 |         end_time = timeit.default_timer()
563 |         total_time = end_time - start_time
564 |         print(' - Test ran for %.2fm' % ((total_time) / 60.))
565 |         print(' - Finished at %s' % (time.strftime('%X %x')))
566 | 
567 |         return best_score_set
568 | 
569 | 
570 | def draw_kernels(kernels, out_path, prefix='', suffix=''):
571 |     if not os.path.isdir(out_path):
572 |         os.makedirs(out_path)
573 | 
574 |     for idx in range(len(kernels)):
575 |         kernel = kernels[idx].get_value(borrow=True)
576 |         name = kernels[idx].name.replace('/', '_')
577 |         assert len(kernel.shape) == 4
578 |         (nkern, nfeat, kern_sz0, kern_sz1) = kernel.shape
579 |         tile = int(np.ceil(np.sqrt(nkern)))
580 | 
581 |         imgshape = ((kern_sz0 + 1) * tile - 1, (kern_sz1 + 1) * tile - 1)
582 |         tot_kern_array = np.zeros((nfeat, imgshape[0] * imgshape[1]))
583 |         feat_tile = int(np.ceil(np.sqrt(nfeat)))
584 | 
585 |         for fidx in range(nfeat):
586 |             kern_array = tile_raster_images(
587 |                 X=kernel[:, fidx, :, :],
588 |                 img_shape=(kern_sz0, kern_sz1),
589 |                 tile_shape=(tile, tile),
590 |                 tile_spacing=(1, 1))
591 |             tot_kern_array[fidx] = kern_array.flatten()
592 | 
593 |         tot_kern_image = Image.fromarray(tile_raster_images(
594 |             X=tot_kern_array,
595 |             img_shape=imgshape,
596 |             tile_shape=(feat_tile, feat_tile),
597 |             tile_spacing=(2, 2)))
598 | 
599 |         img_name = '%s%s%s.png' % (prefix, name, suffix)
600 |         tot_kern_image.save(os.path.join(out_path, img_name))
601 | 
602 | 
603 | def draw_tiled_images(images, img_info_dict, bat_idx, out_path,
604 |                       bat2img_idx_set, npat_img_list, filt_idx_list=None,
605 |                       patch_size=None, patch_step=None):
606 |     n_batch_imgs = len(npat_img_list)
607 | 
608 |     for ii, key in enumerate(img_info_dict):
609 |         for idx in range(n_batch_imgs):
610 |             idx_from, idx_to = bat2img_idx_set[idx]
611 |             cur_img = images[ii][idx_from: idx_to]
612 |             caxis = img_info_dict[key].get('caxis', None)
613 |             scale = img_info_dict[key].get('scale', None)
614 |             if scale:
615 |                 tile_spacing = (
616 |                     int(-(patch_size[0] - patch_step[0]) * scale),
617 |                     int(-(patch_size[1] - patch_step[1]) * scale))
618 |             else:
619 |                 tile_spacing = (0, 0)
620 | 
621 |             nch = int(cur_img.shape[1])
622 |             if nch == 1 or nch == 3:
623 |                 tiled_array = tile_tensor4_from_list(
624 |                     X=cur_img,
625 |                     tile_shape=npat_img_list[idx][1:],
626 |                     idx_list=filt_idx_list[idx],
627 |                     tile_spacing=tile_spacing,
628 |                     caxis=caxis)
629 |                 img = Image.fromarray(tiled_array.astype(np.uint8))
630 |                 img_name = '%d_%s.png' % (bat_idx * n_batch_imgs + idx, key)
631 |                 img.save(os.path.join(out_path, img_name))
632 |             else:
633 |                 for ch_idx in range(nch):
634 |                     tiled_array = tile_tensor4_from_list(
635 |                         X=cur_img[:, ch_idx, :, :],
636 |                         tile_shape=npat_img_list[idx][1:],
637 |                         idx_list=filt_idx_list[idx],
638 |                         tile_spacing=tile_spacing,
639 |                         caxis=caxis)
640 |                     img = Image.fromarray(tiled_array.astype(np.uint8))
641 |                     img_name = '%d_%s_%02d.png' % (
642 |                         bat_idx * n_batch_imgs + idx, key, ch_idx)
643 |                     img.save(os.path.join(out_path, img_name))
644 | 
645 | 
646 | def draw_images(images, img_info_dict, bat_idx, n_batch_imgs, out_path):
647 |     for ii, key in enumerate(img_info_dict):
648 |         for idx in range(n_batch_imgs):
649 |             cur_img = images[ii][idx]
650 |             caxis = img_info_dict[key].get('caxis', None)
651 | 
652 |             nch = int(cur_img.shape[0])
653 |             if nch == 1 or nch == 3:
654 |                 img = image_from_nparray(
655 |                     np.transpose(cur_img, (1, 2, 0)), caxis=caxis)
656 |                 img_name = '%d_%s.png' % (bat_idx * n_batch_imgs + idx, key)
657 |                 img.save(os.path.join(out_path, img_name))
658 |             else:
659 |                 for ch_idx in range(nch):
660 |                     img = image_from_nparray(
661 |                         cur_img[ch_idx, :, :], caxis=caxis)
662 |                     img_name = '%d_%s_%02d.png' % (
663 |                         bat_idx * n_batch_imgs + idx, key, ch_idx)
664 |                     img.save(os.path.join(out_path, img_name))
665 | 


--------------------------------------------------------------------------------