├── README.md
├── baseline-cca
    ├── cca.py
    ├── dcca.py
    ├── dccae.py
    └── util.py
├── baseline-cpmnet
    ├── change_format.py
    ├── test_lianzheng.py
    └── util
    │   ├── classfiy.py
    │   ├── get_sn.py
    │   ├── model.py
    │   └── util.py
├── baseline-mmin
    ├── auto
    │   ├── close_screen.sh
    │   ├── combine_results.py
    │   └── task_generate.py
    ├── change_format.py
    ├── data
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── base_dataset.cpython-37.pyc
    │   │   ├── base_dataset.cpython-38.pyc
    │   │   ├── cmumosei_miss_dataset.cpython-38.pyc
    │   │   ├── cmumosei_multimodal_dataset.cpython-38.pyc
    │   │   ├── iemocapfour_miss_dataset.cpython-37.pyc
    │   │   ├── iemocapfour_miss_dataset.cpython-38.pyc
    │   │   ├── iemocapfour_multimodal_dataset.cpython-38.pyc
    │   │   ├── iemocapsix_miss_dataset.cpython-38.pyc
    │   │   └── iemocapsix_multimodal_dataset.cpython-38.pyc
    │   ├── base_dataset.py
    │   ├── cmumosei_miss_dataset.py
    │   ├── cmumosei_multimodal_dataset.py
    │   ├── cmumosi_miss_dataset.py
    │   ├── cmumosi_multimodal_dataset.py
    │   ├── comparE_dataset.py
    │   ├── config
    │   │   ├── CMUMOSEI_config.json
    │   │   ├── CMUMOSI_config.json
    │   │   ├── IEMOCAPFOUR_config.json
    │   │   └── IEMOCAPSIX_config.json
    │   ├── iemocapfour_miss_dataset.py
    │   ├── iemocapfour_multimodal_dataset.py
    │   ├── iemocapsix_miss_dataset.py
    │   ├── iemocapsix_multimodal_dataset.py
    │   ├── melspec_dataset.py
    │   ├── msp_miss_dataset.py
    │   ├── msp_multimodal_dataset.py
    │   ├── multimodal_dataset.py
    │   ├── multimodal_miss_dataset.py
    │   └── word_aligned_dataset.py
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── base_model.cpython-37.pyc
    │   │   ├── base_model.cpython-38.pyc
    │   │   ├── mmin_AE_model.cpython-38.pyc
    │   │   ├── mmin_CRA_model.cpython-37.pyc
    │   │   ├── mmin_CRA_model.cpython-38.pyc
    │   │   ├── mmin_model.cpython-38.pyc
    │   │   ├── utt_fusion_model.cpython-37.pyc
    │   │   └── utt_fusion_model.cpython-38.pyc
    │   ├── base_model.py
    │   ├── lstm_audio_model.py
    │   ├── mmin_AE_model.py
    │   ├── mmin_CRA_model.py
    │   ├── mmin_ablation_model.py
    │   ├── mmin_model.py
    │   ├── mmin_no_cycle_model.py
    │   ├── mmin_old_model.py
    │   ├── networks
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── autoencoder.cpython-37.pyc
    │   │   │   ├── autoencoder.cpython-38.pyc
    │   │   │   ├── classifier.cpython-37.pyc
    │   │   │   ├── classifier.cpython-38.pyc
    │   │   │   ├── fc.cpython-37.pyc
    │   │   │   ├── fc.cpython-38.pyc
    │   │   │   ├── lstm.cpython-37.pyc
    │   │   │   ├── lstm.cpython-38.pyc
    │   │   │   ├── textcnn.cpython-37.pyc
    │   │   │   ├── textcnn.cpython-38.pyc
    │   │   │   ├── tools.cpython-37.pyc
    │   │   │   └── tools.cpython-38.pyc
    │   │   ├── autoencoder.py
    │   │   ├── classifier.py
    │   │   ├── fc.py
    │   │   ├── lstm.py
    │   │   ├── textcnn.py
    │   │   └── tools.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── config.cpython-37.pyc
    │   │   │   └── config.cpython-38.pyc
    │   │   ├── config.py
    │   │   └── load_pretrained.py
    │   ├── utt_fusion_model.py
    │   └── uttf_dataaug_model.py
    ├── opts
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── base_opts.cpython-37.pyc
    │   │   ├── base_opts.cpython-38.pyc
    │   │   ├── train_opts.cpython-37.pyc
    │   │   └── train_opts.cpython-38.pyc
    │   ├── base_opts.py
    │   ├── test_opts.py
    │   └── train_opts.py
    ├── preprocess
    │   ├── IEMOCAP
    │   │   ├── make_aligned.py
    │   │   ├── make_comparE.py
    │   │   ├── make_melspec.py
    │   │   ├── make_torch_denseface.py
    │   │   ├── melspec_extractor.py
    │   │   ├── migrate_VL_feat.py
    │   │   ├── migrate_compaeE_tonpy.py
    │   │   └── statis_comparE.py
    │   ├── MSP
    │   │   ├── make_aligned.py
    │   │   ├── make_aligned_info.py
    │   │   └── make_comparE.py
    │   ├── debug.py
    │   └── tools
    │   │   ├── bert_extractor.py
    │   │   ├── denseface
    │   │       ├── densenet.py
    │   │       ├── densenet_train.py
    │   │       └── vision_network
    │   │       │   ├── __init__.py
    │   │       │   ├── data_providers
    │   │       │       ├── __init__.py
    │   │       │       ├── base_provider.py
    │   │       │       ├── cifar.py
    │   │       │       ├── downloader.py
    │   │       │       ├── fer.py
    │   │       │       ├── svhn.py
    │   │       │       └── utils.py
    │   │       │   ├── denseface_feature.py
    │   │       │   ├── models
    │   │       │       ├── __init__.py
    │   │       │       └── dense_net.py
    │   │       │   └── run_dense_net.py
    │   │   └── denseface_extractor.py
    ├── train_baseline.py
    ├── train_miss.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-37.pyc
    │       ├── __init__.cpython-38.pyc
    │       ├── logger.cpython-37.pyc
    │       └── logger.cpython-38.pyc
    │   ├── image_pool.py
    │   └── logger.py
├── config.py
├── dataset
    ├── CMUMOSEI
    │   └── CMUMOSEI_features_raw_2way.pkl
    ├── CMUMOSI
    │   └── CMUMOSI_features_raw_2way.pkl
    └── IEMOCAP
    │   ├── IEMOCAP_features_raw_4way.pkl
    │   └── IEMOCAP_features_raw_6way.pkl
├── detect.py
├── environment.yml
├── face_detection_yunet_2021sep.onnx
├── feature_extraction
    ├── audio
    │   ├── __pycache__
    │   │   ├── config.cpython-38.pyc
    │   │   ├── feature_extractor.cpython-38.pyc
    │   │   └── util.cpython-38.pyc
    │   ├── extract_handcrafted_feature.py
    │   ├── extract_panns_embedding.py
    │   ├── extract_vggish_embedding.py
    │   ├── extract_wav2vec2_embedding.py
    │   ├── extract_wav2vec_embedding.py
    │   ├── feature_extractor.py
    │   ├── panns
    │   │   ├── __pycache__
    │   │   │   ├── models.cpython-38.pyc
    │   │   │   └── pytorch_utils.cpython-38.pyc
    │   │   ├── evaluate.py
    │   │   ├── finetune_template.py
    │   │   ├── inference.py
    │   │   ├── losses.py
    │   │   ├── main.py
    │   │   ├── models.py
    │   │   └── pytorch_utils.py
    │   ├── run.sh
    │   ├── smile.log
    │   ├── util.py
    │   └── vggish
    │   │   ├── README.md
    │   │   ├── __pycache__
    │   │       ├── mel_features.cpython-38.pyc
    │   │       ├── vggish_input.cpython-38.pyc
    │   │       ├── vggish_params.cpython-38.pyc
    │   │       └── vggish_slim.cpython-38.pyc
    │   │   ├── mel_features.py
    │   │   ├── vggish_inference_demo.py
    │   │   ├── vggish_input.py
    │   │   ├── vggish_params.py
    │   │   ├── vggish_pca_params.npz
    │   │   ├── vggish_postprocess.py
    │   │   ├── vggish_slim.py
    │   │   ├── vggish_smoke_test.py
    │   │   └── vggish_train_demo.py
    ├── text
    │   ├── __pycache__
    │   │   └── util.cpython-38.pyc
    │   ├── extract_text_embedding.py
    │   ├── extract_text_embedding_LZ.py
    │   └── util.py
    └── visual
    │   ├── __pycache__
    │       ├── config.cpython-38.pyc
    │       ├── dataset.cpython-38.pyc
    │       └── util.cpython-38.pyc
    │   ├── dataset.py
    │   ├── emonet
    │       ├── __init__.py
    │       ├── __pycache__
    │       │   ├── __init__.cpython-38.pyc
    │       │   └── data_augmentation.cpython-38.pyc
    │       ├── data
    │       │   ├── __init__.py
    │       │   └── affecnet.py
    │       ├── data_augmentation.py
    │       ├── evaluation.py
    │       ├── metrics.py
    │       └── models
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │       ├── __init__.cpython-38.pyc
    │       │       └── emonet.cpython-38.pyc
    │       │   └── emonet.py
    │   ├── extract_emonet_embedding.py
    │   ├── extract_ferplus_embedding.py
    │   ├── extract_manet_embedding.py
    │   ├── extract_openface.py
    │   ├── manet
    │       ├── LICENSE
    │       ├── README.md
    │       ├── log
    │       │   ├── AffectNet7.png
    │       │   ├── AffectNet7.txt
    │       │   ├── AffectNet8.png
    │       │   ├── AffectNet8.txt
    │       │   ├── CAER-S.png
    │       │   ├── CAER-S.txt
    │       │   ├── FED-RO.png
    │       │   ├── FED-RO.txt
    │       │   ├── RAF-DB.png
    │       │   ├── RAF-DB.txt
    │       │   ├── SFEW.png
    │       │   ├── SFEW.txt
    │       │   ├── [02-08]-[16-22]-cnn.png
    │       │   ├── [02-08]-[16-22]-log.txt
    │       │   ├── [02-08]-[19-12]-cnn.png
    │       │   ├── [02-08]-[19-12]-log.txt
    │       │   ├── [02-08]-[21-19]-cnn.png
    │       │   ├── [02-08]-[21-19]-log.txt
    │       │   ├── [02-08]-[22-55]-cnn.png
    │       │   ├── [02-08]-[22-55]-log.txt
    │       │   ├── [02-12]-[19-11]-cnn.png
    │       │   ├── [02-12]-[19-11]-log.txt
    │       │   ├── [02-12]-[19-11]-scratch-log.txt
    │       │   ├── [02-12]-[22-21]-cnn.png
    │       │   ├── [02-12]-[22-21]-log.txt
    │       │   ├── [02-12]-[22-21]-scratch-lr0.01-log.txt
    │       │   ├── [05-28]-[13-07]-cnn.png
    │       │   ├── [05-28]-[13-07]-log.txt
    │       │   └── [05-28]-[13-07]-scratch-lr0.1-log.txt
    │       ├── main.py
    │       ├── model
    │       │   ├── __pycache__
    │       │   │   ├── attention.cpython-38.pyc
    │       │   │   └── manet.cpython-38.pyc
    │       │   ├── attention.py
    │       │   └── manet.py
    │       └── reorganize_rafdb.py
    │   ├── pytorch-benchmarks
    │       ├── .gitignore
    │       ├── LICENSE.md
    │       ├── README.md
    │       ├── fer2013
    │       │   ├── __init__.py
    │       │   ├── fer.py
    │       │   └── fer_loader.py
    │       ├── imagenet
    │       │   ├── __init__.py
    │       │   ├── evaluation.py
    │       │   └── imagenet.py.bak
    │       ├── lfw_eval.py
    │       ├── matlab_cp2tform.py
    │       ├── model
    │       │   ├── alexnet_face_fer_bn_dag.py
    │       │   ├── resnet50_ferplus_dag.py
    │       │   ├── senet50_ferplus_dag.py
    │       │   ├── vgg_m_face_bn_fer_dag.py
    │       │   └── vgg_vd_face_fer_dag.py
    │       ├── run_fer_benchmarks.py
    │       ├── run_imagenet_benchmarks.py
    │       └── utils
    │       │   ├── __init__.py
    │       │   └── benchmark_helpers.py
    │   └── util.py
├── gcnet
    ├── dataloader_cmumosi.py
    ├── dataloader_iemocap.py
    ├── graph.py
    ├── loss.py
    ├── model.py
    ├── module.py
    └── train_gcnet.py
├── preprocess.py
├── requirements-cpmnet.txt
├── requirements.txt
├── run.sh
├── run_ae.sh
├── run_cca.sh
├── run_cpmnetsub1.sh
├── run_cpmnetsub2.sh
├── run_cpmnetsub3.sh
├── run_cra.sh
├── run_dcca.sh
├── run_dccae.sh
├── run_gcnet.sh
└── run_mmin.sh


/baseline-cpmnet/util/classfiy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.preprocessing import OneHotEncoder
 3 | 
 4 | 
 5 | def convert_to_one_hot(y, C):
 6 |     return np.eye(C)[y.reshape(-1)]
 7 | 
 8 | 
 9 | def vote(lsd1, lsd2, label, n=1):
10 |     """Sometimes the prediction accuracy will be higher in this way.
11 |     :param lsd1: train set's latent space data
12 |     :param lsd2: test set's latent space data
13 |     :param label: label of train set
14 |     :param n: Similar to K in k-nearest neighbors algorithm
15 |     :return: Predicted label
16 |     """
17 |     F_h_h = np.dot(lsd2, np.transpose(lsd1))
18 |     gt_list = []
19 |     label = label.reshape(len(label), 1)
20 |     for num in range(n):
21 |         F_h_h_argmax = np.argmax(F_h_h, axis=1)
22 |         F_h_h_onehot = convert_to_one_hot(F_h_h_argmax, len(label))
23 |         F_h_h = F_h_h - np.multiply(F_h_h, F_h_h_onehot)
24 |         gt_list.append(np.dot(F_h_h_onehot, label))
25 |     gt_ = np.array(gt_list).transpose(2, 1, 0)[0].astype(np.int64)
26 |     count_list = []
27 |     count_list.append([np.argmax(np.bincount(gt_[i])) for i in range(lsd2.shape[0])])
28 |     gt_pre = np.array(count_list)
29 |     return gt_pre.transpose()
30 | 
31 | def ave(lsd1, lsd2, label):
32 |     """In most cases, this method is used to predict the highest accuracy.
33 |     :param lsd1: train set's latent space data
34 |     :param lsd2: test set's latent space data
35 |     :param label: label of train set
36 |     :return: Predicted label
37 |     """
38 |     F_h_h = np.dot(lsd2, np.transpose(lsd1)) # 每个测试样本和所有训练样本的距离
39 |     label = label.reshape(len(label), 1) - 1
40 |     enc = OneHotEncoder()
41 |     a = enc.fit_transform(label)
42 |     label_onehot = a.toarray()               # 每个测试样本的onehot标签
43 |     label_num = np.sum(label_onehot, axis=0) # 没有样本数量【训练集】
44 |     F_h_h_sum = np.dot(F_h_h, label_onehot)
45 |     F_h_h_mean = F_h_h_sum / label_num       # 每个类的中性距离
46 |     label_pre = np.argmax(F_h_h_mean, axis=1) + 1
47 |     return label_pre                         # 计算预测标签结果
48 | 


--------------------------------------------------------------------------------
/baseline-cpmnet/util/get_sn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.random import randint
 3 | from sklearn.preprocessing import OneHotEncoder
 4 | 
 5 | 
 6 | def get_sn(view_num, alldata_len, missing_rate):
 7 |     """Randomly generate incomplete data information, simulate partial view data with complete view data
 8 |     :param view_num:view number
 9 |     :param alldata_len:number of samples
10 |     :param missing_rate:Defined in section 3.2 of the paper
11 |     :return:Sn
12 |     """
13 |     one_rate = 1-missing_rate      # missing_rate: 0.8; one_rate: 0.2
14 | 
15 |     if one_rate <= (1 / view_num): # 
16 |         enc = OneHotEncoder(categories=[np.arange(view_num)])
17 |         view_preserve = enc.fit_transform(randint(0, view_num, size=(alldata_len, 1))).toarray() # only select one view [avoid all zero input]
18 |         return view_preserve # [samplenum, viewnum=2] => one value set=1, others=0
19 | 
20 |     if one_rate == 1:
21 |         matrix = randint(1, 2, size=(alldata_len, view_num)) # [samplenum, viewnum=2] => all ones
22 |         return matrix
23 | 
24 |     ## for one_rate between [1 / view_num, 1] => can have multi view input
25 |     ## ensure at least one of them is avaliable 
26 |     ## since some sample is overlapped, which increase difficulties
27 |     error = 1
28 |     while error >= 0.005:
29 | 
30 |         ## gain initial view_preserve
31 |         enc = OneHotEncoder(categories=[np.arange(view_num)])
32 |         view_preserve = enc.fit_transform(randint(0, view_num, size=(alldata_len, 1))).toarray() # [samplenum, viewnum=2] => one value set=1, others=0
33 | 
34 |         ## further generate one_num samples
35 |         one_num = view_num * alldata_len * one_rate - alldata_len  # left one_num after previous step
36 |         ratio = one_num / (view_num * alldata_len)                 # now processed ratio
37 |         print (f'first ratio: {ratio}')
38 |         matrix_iter = (randint(0, 100, size=(alldata_len, view_num)) < int(ratio * 100)).astype(np.int) # based on ratio => matrix_iter
39 |         a = np.sum(((matrix_iter + view_preserve) > 1).astype(np.int)) # a: overlap number
40 |         one_num_iter = one_num / (1 - a / one_num)
41 |         ratio = one_num_iter / (view_num * alldata_len)
42 |         print (f'second ratio: {ratio}')
43 |         matrix_iter = (randint(0, 100, size=(alldata_len, view_num)) < int(ratio * 100)).astype(np.int)
44 |         matrix = ((matrix_iter + view_preserve) > 0).astype(np.int)
45 |         ratio = np.sum(matrix) / (view_num * alldata_len)
46 |         print (f'third ratio: {ratio}')
47 |         error = abs(one_rate - ratio)
48 |         
49 |     return matrix
50 | 
51 | 
52 | def save_Sn(Sn, str_name):
53 |     np.savetxt(str_name + '.csv', Sn, delimiter=',')
54 | 
55 | 
56 | def load_Sn(str_name):
57 |     return np.loadtxt(str_name + '.csv', delimiter=',')
58 | 


--------------------------------------------------------------------------------
/baseline-mmin/auto/close_screen.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | grep_name=$1
 3 | echo "screen contains name $grep_name:"
 4 | screen -ls | grep $grep_name
 5 | while true
 6 | do
 7 | 	read -r -p "Close these screens? [Y/n] " input
 8 | 
 9 | 	case $input in
10 | 	    [yY][eE][sS]|[yY])
11 | 			screen -ls | awk '{print $1}'| grep $grep_name | awk '{print "screen -S "$1" -X quit"}'| sh
12 | 			echo "Finished"
13 |             exit 1
14 | 			;;
15 | 
16 | 	    [nN][oO]|[nN])
17 | 			echo "Abort"
18 | 			exit 1	       	
19 | 			;;
20 | 
21 | 	    *)
22 | 			echo "Invalid input..."
23 | 			;;
24 | 	esac
25 | done
26 | 


--------------------------------------------------------------------------------
/baseline-mmin/auto/combine_results.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | def read_results(file):
 5 |     ans = []
 6 |     lines = open(file).readlines()
 7 |     for line in lines:
 8 |         if not line.startswith('0'): continue
 9 |         ans.append(list(map(lambda x: float(x), line.strip().split('\t'))))
10 |            
11 |     data = np.array(ans).astype(np.float)
12 |     assert data.shape[0] == 24
13 |     val_data = data[0: 10]
14 |     tst_data = data[12: 22]
15 |     return val_data, tst_data
16 | 
17 | def combine(result1, result2):
18 |     result = result1 * (result1>=result2) + result2 * (result1<result2)
19 |     return result
20 | 
21 | def combine_file(file1, file2, output):
22 |     val_data1, tst_data1 = read_results(file1)
23 |     val_data2, tst_data2 = read_results(file2)
24 |     val_data = combine(val_data1, val_data2)
25 |     val_mean = np.expand_dims(np.mean(val_data, axis=0), 0)
26 |     val_std = np.expand_dims(np.std(val_data, axis=0), 0)
27 |     val_data = np.vstack([val_data, val_mean, val_std])
28 |     tst_data = combine(tst_data1, tst_data2)
29 |     tst_mean = np.expand_dims(np.mean(tst_data, axis=0), 0)
30 |     tst_std = np.expand_dims(np.std(tst_data, axis=0), 0)
31 |     tst_data = np.vstack([tst_data, tst_mean, tst_std])
32 |     f = open(output, 'w')
33 |     f.write(output.split('/')[-1] + '\n')
34 |     f.write('val:\n')
35 |     for d in val_data:
36 |         line = '\t'.join(list(map(lambda x:'{:.4f}'.format(x), d))) + '\n'
37 |         f.write(line)
38 | 
39 |     val_mean = val_mean[0]
40 |     val_std = val_std[0]
41 |     acc = '{:.4f}±{:.4f}'.format(val_mean[0], val_std[0])
42 |     uar = '{:.4f}±{:.4f}'.format(val_mean[1], val_std[1])
43 |     f1 = '{:.4f}±{:.4f}'.format(val_mean[2], val_std[2])
44 |     f.write('VAL result:\nacc %s uar %s f1 %s\n\n' % (acc, uar, f1))
45 | 
46 |     
47 |     f.write('tst:\n')
48 |     for d in tst_data:
49 |         line = '\t'.join(list(map(lambda x:'{:.4f}'.format(x), d))) + '\n'
50 |         f.write(line)
51 |     
52 |     tst_mean = tst_mean[0]
53 |     tst_std = tst_std[0]
54 |     acc = '{:.4f}±{:.4f}'.format(tst_mean[0], tst_std[0])
55 |     uar = '{:.4f}±{:.4f}'.format(tst_mean[1], tst_std[1])
56 |     f1 = '{:.4f}±{:.4f}'.format(tst_mean[2], tst_std[2])
57 |     f.write('TEST result:\nacc %s uar %s f1 %s\n' % (acc, uar, f1))
58 |     
59 | # print(val_data)
60 | # print(tst_data)
61 | root = 'today_tasks/results'
62 | save_root = 'today_tasks/results_combine'
63 | # run_idx1 = 'A_ts_Adnn512,256,128_lossBCE_kd1.0_temp2.0_ce0.0_mmd0.0_run1'
64 | # run_idx2 = 'A_ts_Adnn512,256,128_lossBCE_kd1.0_temp2.0_ce0.0_mmd0.0_run2'
65 | # out = 'A_ts_Adnn512,256,128_lossBCE_kd1.0_temp2.0_ce0.0_mmd0.0'
66 | # combine_file(os.path.join(root, run_idx1), os.path.join(root, run_idx2), os.path.join(save_root, out))
67 | total_file = os.listdir(root)
68 | name_set = set()
69 | for file in total_file:
70 |     name = '_'.join(file.split('_')[:-1])
71 |     name_set.add(name)
72 | 
73 | for name in name_set:
74 |     run_idx1 = name + '_run1'
75 |     run_idx2 = name + '_run2'
76 |     out = name
77 |     combine_file(os.path.join(root, run_idx1), os.path.join(root, run_idx2), os.path.join(save_root, out))


--------------------------------------------------------------------------------
/baseline-mmin/auto/task_generate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def make_grid(params):
 4 |     total_length = 1
 5 |     for key, value in params.items():
 6 |         total_length *= len(value)
 7 |     
 8 |     ans = []
 9 |     for _ in range(total_length):
10 |         ans.append({})
11 |     
12 |     combo_num = total_length
13 |     for key, value in params.items():
14 |         combo_num = combo_num // len(value)
15 |         for i in range(0, total_length, combo_num):
16 |             for j in range(combo_num):
17 |                 ans[i+j][key] = value[i//combo_num%len(value)]
18 | 
19 |     return ans
20 | 
21 | def make_task(parameters):
22 |     # tuned hyper-parameters
23 |     param_grid = make_grid(parameters)
24 |     template = 'sh ' + task_script + ' ' + ' '.join(['{' + key + '}' for key in parameters.keys()])
25 | 
26 |     total_cmd = []
27 |     for param in param_grid:
28 |         cmd = template.format(**param)
29 |         total_cmd.append(cmd)
30 |     
31 |     # 平均分配gpu
32 |     cmd_with_gpu = []
33 |     for i in range(len(avialable_gpus)):
34 |         task_num = len(total_cmd) / len(avialable_gpus)
35 |         cmds = total_cmd[int(i*task_num):int((i+1)*task_num)]
36 |         for cmd in cmds:
37 |             cmd_with_gpu.append(cmd + ' ' + str(avialable_gpus[i]))
38 |     
39 |     for i in range(num_sessions):
40 |         session_name = '{}_{}'.format(screen_name, i)
41 |         task_file = os.path.join(auto_script_dir, f'{i}_task.sh')
42 |         f = open(task_file, 'w')
43 |         f.write('screen -dmS {}\n'.format(session_name))
44 |         task_num = len(cmd_with_gpu) / num_sessions
45 |         cmds = cmd_with_gpu[int(i*task_num):int((i+1)*task_num)]
46 |         for cmd in cmds:
47 |             _cmd = "screen -x -S {} -p 0 -X stuff '{}\n'\n".format(session_name, cmd)
48 |             f.write(_cmd)
49 |         f.write("screen -x -S {} -p 0 -X stuff 'exit\n'\n".format(session_name))
50 |    
51 | if __name__ == '__main__':
52 |     auto_script_dir = 'auto/tmp'                                # 生成脚本路径
53 |     script_root = 'auto/scripts'
54 |     task_script = script_root + '/' + 'mmin.sh'           # 执行script路径
55 |     avialable_gpus = [0,1,2,3,4,5]                                  # 可用GPU有哪些
56 |     num_sessions = 6                                            # 一共开多少个session同时执行
57 |     avialable_gpus = avialable_gpus[:num_sessions]
58 |     screen_name = 'mmin'
59 |     parameters = {                                              # 一共有哪些参数
60 |         'mse_weight': [0.1, 0.15, 0.2],
61 |         'cycle_weight': [0.05, 0.1, 0.2],
62 |         'run_idx': [1, 2]
63 |     }
64 |     make_task(parameters)
65 | 
66 |     for i in range(num_sessions):
67 |         cmd = 'sh {}/{}_task.sh'.format(auto_script_dir, i)
68 |         print(cmd)
69 |         os.system(cmd)


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/base_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/base_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/base_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/base_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/cmumosei_miss_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/cmumosei_miss_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/cmumosei_multimodal_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/cmumosei_multimodal_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/iemocapfour_miss_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/iemocapfour_miss_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/iemocapfour_miss_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/iemocapfour_miss_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/iemocapfour_multimodal_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/iemocapfour_multimodal_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/iemocapsix_miss_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/iemocapsix_miss_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/__pycache__/iemocapsix_multimodal_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/data/__pycache__/iemocapsix_multimodal_dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/data/comparE_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from typing import List
  4 | import torch
  5 | import numpy as np
  6 | import h5py
  7 | from torch.nn.utils.rnn import pad_sequence
  8 | from torch.nn.utils.rnn import pack_padded_sequence
  9 | 
 10 | from data.base_dataset import BaseDataset
 11 | 
 12 | 
 13 | class ComparEDataset(BaseDataset):
 14 |     @staticmethod
 15 |     def modify_commandline_options(parser, isTrain=None):
 16 |         parser.add_argument('--cvNo', type=int, help='which cross validation set')
 17 |         parser.add_argument('--output_dim', type=int, default=4, help='how many label types in this dataset')
 18 |         parser.add_argument('--norm_method', type=str, choices=['utt', 'trn'], help='how to normalize input comparE feature')
 19 |         return parser
 20 |     
 21 |     def __init__(self, opt, set_name):
 22 |         ''' IEMOCAP dataset reader
 23 |             set_name in ['trn', 'val', 'tst']
 24 |         '''
 25 |         super().__init__(opt)
 26 | 
 27 |         # record & load basic settings 
 28 |         cvNo = opt.cvNo
 29 |         self.set_name = set_name
 30 |         pwd = os.path.abspath(__file__)
 31 |         pwd = os.path.dirname(pwd)
 32 |         config = json.load(open(os.path.join(pwd, 'config', 'IEMOCAP_config.json')))
 33 |         self.norm_method = opt.norm_method
 34 |         # load feature
 35 |         self.all_A = h5py.File(os.path.join(config['feature_root'], 'A', 'comparE.h5'), 'r')
 36 |         self.mean_std = h5py.File(os.path.join(config['feature_root'], 'A', 'comparE_mean_std.h5'), 'r')
 37 |         self.mean = torch.from_numpy(self.mean_std[str(cvNo)]['mean'][()]).unsqueeze(0).float()
 38 |         self.std = torch.from_numpy(self.mean_std[str(cvNo)]['std'][()]).unsqueeze(0).float()
 39 |         # load target
 40 |         label_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_label.npy")
 41 |         int2name_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_int2name.npy")
 42 |         self.label = np.load(label_path)
 43 |         self.label = np.argmax(self.label, axis=1)
 44 |         self.int2name = np.load(int2name_path)
 45 |         self.manual_collate_fn = True
 46 | 
 47 |     def __getitem__(self, index):
 48 |         int2name = self.int2name[index][0].decode()
 49 |         label = torch.tensor(self.label[index])
 50 |         # process A_feat
 51 |         A_feat = torch.from_numpy(self.all_A[int2name][()]).float()
 52 |         A_feat = self.normalize_on_utt(A_feat) if self.norm_method == 'utt' else self.normalize_on_trn(A_feat)
 53 |         return {
 54 |             'A_feat': A_feat, 
 55 |             'label': label,
 56 |             'int2name': int2name
 57 |         }
 58 |     
 59 |     def __len__(self):
 60 |         return len(self.label)
 61 |     
 62 |     def normalize_on_utt(self, features):
 63 |         mean_f = torch.mean(features, dim=0).unsqueeze(0).float()
 64 |         std_f = torch.std(features, dim=0).unsqueeze(0).float()
 65 |         std_f[std_f == 0.0] = 1.0
 66 |         features = (features - mean_f) / std_f
 67 |         return features
 68 |     
 69 |     def normalize_on_trn(self, features):
 70 |         features = (features - self.mean) / self.std
 71 |         return features
 72 | 
 73 |     def collate_fn(self, batch):
 74 |         A = [sample['A_feat'] for sample in batch]
 75 |         lengths = torch.tensor([len(sample) for sample in A]).long()
 76 |         A = pad_sequence(A, batch_first=True, padding_value=0)
 77 |         # A = pack_padded_sequence(A, lengths=lengths, batch_first=True, enforce_sorted=False)
 78 |         label = torch.tensor([sample['label'] for sample in batch])
 79 |         int2name = [sample['int2name'] for sample in batch]
 80 |         return {
 81 |             'A_feat': A, 
 82 |             'label': label,
 83 |             'lengths': lengths,
 84 |             'int2name': int2name
 85 |         }
 86 | 
 87 | if __name__ == '__main__':
 88 |     class test:
 89 |         cvNo = 1
 90 |         norm_method = 'trn'
 91 |     
 92 |     opt = test()
 93 |     print('Reading from dataset:')
 94 |     a = ComparEDataset(opt, set_name='trn')
 95 |     data = next(iter(a))
 96 |     for k, v in data.items():
 97 |         if k not in ['int2name', 'label']:
 98 |             print(k, v.shape)
 99 |         else:
100 |             print(k, v)
101 |     print('Reading from dataloader:')
102 |     x = [a[100], a[34], a[890]]
103 |     print('each one:')
104 |     for i, _x in enumerate(x):
105 |         print(i, ':')
106 |         for k, v in _x.items():
107 |             if k not in ['int2name', 'label']:
108 |                 print(k, v.shape)
109 |             else:
110 |                 print(k, v)
111 |     print('packed output')
112 |     x = a.collate_fn(x)
113 |     for k, v in x.items():
114 |         if k not in ['int2name', 'label']:
115 |             print(k, v.shape)
116 |         else:
117 |             print(k, v)
118 |     


--------------------------------------------------------------------------------
/baseline-mmin/data/config/CMUMOSEI_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "data_root": "/data6/lrc/MSP-IMPROV",
3 |     "target_root": "CMUMOSEI_features_2021/target",
4 |     "total_cv": 1,
5 |     "feature_root": "CMUMOSEI_features_2021"
6 | }


--------------------------------------------------------------------------------
/baseline-mmin/data/config/CMUMOSI_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "data_root": "/data6/lrc/MSP-IMPROV",
3 |     "target_root": "CMUMOSI_features_2021/target",
4 |     "total_cv": 1,
5 |     "feature_root": "CMUMOSI_features_2021"
6 | }


--------------------------------------------------------------------------------
/baseline-mmin/data/config/IEMOCAPFOUR_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "data_root": "/data6/lrc/MSP-IMPROV",
3 |     "target_root": "IEMOCAPFOUR_features_2021/target",
4 |     "total_cv": 5,
5 |     "feature_root": "IEMOCAPFOUR_features_2021"
6 | }


--------------------------------------------------------------------------------
/baseline-mmin/data/config/IEMOCAPSIX_config.json:
--------------------------------------------------------------------------------
1 | {
2 |     "data_root": "/data6/lrc/MSP-IMPROV",
3 |     "target_root": "IEMOCAPSIX_features_2021/target",
4 |     "total_cv": 5,
5 |     "feature_root": "IEMOCAPSIX_features_2021"
6 | }


--------------------------------------------------------------------------------
/baseline-mmin/data/iemocapfour_multimodal_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from typing import List
  4 | import torch
  5 | import numpy as np
  6 | import h5py
  7 | from torch.nn.utils.rnn import pad_sequence
  8 | from torch.nn.utils.rnn import pack_padded_sequence
  9 | 
 10 | from data.base_dataset import BaseDataset
 11 | 
 12 | 
 13 | class IEMOCAPFOURMultimodalDataset(BaseDataset):
 14 |     @staticmethod
 15 |     def modify_commandline_options(parser, isTrain=None):
 16 |         parser.add_argument('--cvNo', type=int, help='which cross validation set')
 17 |         parser.add_argument('--output_dim', type=int, help='how many label types in this dataset')
 18 |         parser.add_argument('--norm_method', type=str, choices=['utt', 'trn'], help='how to normalize input comparE feature')
 19 |         return parser
 20 |     
 21 |     def __init__(self, opt, set_name):
 22 |         ''' IEMOCAP dataset reader
 23 |             set_name in ['trn', 'val', 'tst']
 24 |         '''
 25 |         super().__init__(opt)
 26 | 
 27 |         # record & load basic settings 
 28 |         cvNo = opt.cvNo
 29 |         self.set_name = set_name
 30 |         self.dataset = opt.dataset_mode.split('_')[0]
 31 |         pwd = os.path.abspath(__file__)
 32 |         pwd = os.path.dirname(pwd)
 33 |         config = json.load(open(os.path.join(pwd, 'config', 'IEMOCAPFOUR_config.json')))
 34 |         self.norm_method = opt.norm_method
 35 |         # load feature
 36 |         self.all_A = np.load(os.path.join(config['feature_root'], 'A', str(opt.cvNo), f'{set_name}.npy'), 'r')
 37 |         self.all_V = np.load(os.path.join(config['feature_root'], 'V', str(opt.cvNo), f'{set_name}.npy'), 'r')
 38 |         self.all_L = np.load(os.path.join(config['feature_root'], 'L', str(opt.cvNo), f'{set_name}.npy'), 'r')
 39 |         # load target
 40 |         label_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_label.npy")
 41 |         int2name_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_int2name.npy")
 42 |         self.label = np.load(label_path)
 43 |         # self.label = np.argmax(self.label, axis=1)
 44 |         self.int2name = np.load(int2name_path)
 45 |         self.manual_collate_fn = False ## for utterance level features
 46 | 
 47 |     def __getitem__(self, index):
 48 |         int2name = self.int2name[index]
 49 |         ###############
 50 |         if self.dataset in ['cmumosi']:
 51 |             label = torch.tensor(self.label[index]).float()
 52 |         elif self.dataset in ['iemocapfour', 'iemocapsix']:
 53 |             label = torch.tensor(self.label[index]).long()
 54 |         ###############
 55 |         # process A_feat
 56 |         A_feat = torch.FloatTensor(self.all_A[index])
 57 |         # process V_feat 
 58 |         V_feat = torch.FloatTensor(self.all_V[index])
 59 |         # proveee L_feat
 60 |         L_feat = torch.FloatTensor(self.all_L[index])
 61 |         return {
 62 |             'A_feat': A_feat, 
 63 |             'V_feat': V_feat,
 64 |             'L_feat': L_feat,
 65 |             'label': label,
 66 |             'int2name': int2name
 67 |         }
 68 |     
 69 |     def __len__(self):
 70 |         return len(self.label)
 71 |     
 72 |     def normalize_on_utt(self, features):
 73 |         mean_f = torch.mean(features, dim=0).unsqueeze(0).float()
 74 |         std_f = torch.std(features, dim=0).unsqueeze(0).float()
 75 |         std_f[std_f == 0.0] = 1.0
 76 |         features = (features - mean_f) / std_f
 77 |         return features
 78 |     
 79 |     def normalize_on_trn(self, features):
 80 |         features = (features - self.mean) / self.std
 81 |         return features
 82 | 
 83 |     def collate_fn(self, batch):
 84 |         A = [sample['A_feat'] for sample in batch]
 85 |         V = [sample['V_feat'] for sample in batch]
 86 |         L = [sample['L_feat'] for sample in batch]
 87 |         lengths = torch.tensor([len(sample) for sample in A]).long()
 88 |         A = pad_sequence(A, batch_first=True, padding_value=0)
 89 |         V = pad_sequence(V, batch_first=True, padding_value=0)
 90 |         L = pad_sequence(L, batch_first=True, padding_value=0)
 91 |         label = torch.tensor([sample['label'] for sample in batch])
 92 |         int2name = [sample['int2name'] for sample in batch]
 93 |         return {
 94 |             'A_feat': A, 
 95 |             'V_feat': V,
 96 |             'L_feat': L,
 97 |             'label': label,
 98 |             'lengths': lengths,
 99 |             'int2name': int2name
100 |         }
101 | 


--------------------------------------------------------------------------------
/baseline-mmin/data/iemocapsix_multimodal_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | from typing import List
  4 | import torch
  5 | import numpy as np
  6 | import h5py
  7 | from torch.nn.utils.rnn import pad_sequence
  8 | from torch.nn.utils.rnn import pack_padded_sequence
  9 | 
 10 | from data.base_dataset import BaseDataset
 11 | 
 12 | 
 13 | class IEMOCAPSIXMultimodalDataset(BaseDataset):
 14 |     @staticmethod
 15 |     def modify_commandline_options(parser, isTrain=None):
 16 |         parser.add_argument('--cvNo', type=int, help='which cross validation set')
 17 |         parser.add_argument('--output_dim', type=int, help='how many label types in this dataset')
 18 |         parser.add_argument('--norm_method', type=str, choices=['utt', 'trn'], help='how to normalize input comparE feature')
 19 |         return parser
 20 |     
 21 |     def __init__(self, opt, set_name):
 22 |         ''' IEMOCAP dataset reader
 23 |             set_name in ['trn', 'val', 'tst']
 24 |         '''
 25 |         super().__init__(opt)
 26 | 
 27 |         # record & load basic settings 
 28 |         cvNo = opt.cvNo
 29 |         self.set_name = set_name
 30 |         self.dataset = opt.dataset_mode.split('_')[0]
 31 |         pwd = os.path.abspath(__file__)
 32 |         pwd = os.path.dirname(pwd)
 33 |         config = json.load(open(os.path.join(pwd, 'config', 'IEMOCAPSIX_config.json')))
 34 |         self.norm_method = opt.norm_method
 35 | 
 36 |         # load feature
 37 |         self.all_A = np.load(os.path.join(config['feature_root'], 'A', str(opt.cvNo), f'{set_name}.npy'), 'r')
 38 |         self.all_V = np.load(os.path.join(config['feature_root'], 'V', str(opt.cvNo), f'{set_name}.npy'), 'r')
 39 |         self.all_L = np.load(os.path.join(config['feature_root'], 'L', str(opt.cvNo), f'{set_name}.npy'), 'r')
 40 | 
 41 |         # load target
 42 |         label_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_label.npy")
 43 |         int2name_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_int2name.npy")
 44 |         self.label = np.load(label_path)
 45 |         self.int2name = np.load(int2name_path)
 46 |         self.manual_collate_fn = False ## for utterance level features
 47 | 
 48 |     def __getitem__(self, index):
 49 |         int2name = self.int2name[index]
 50 |         if self.dataset in ['cmumosi']:
 51 |             label = torch.tensor(self.label[index]).float()
 52 |         elif self.dataset in ['iemocapfour', 'iemocapsix']:
 53 |             label = torch.tensor(self.label[index]).long()
 54 |         # process A_feat
 55 |         A_feat = torch.FloatTensor(self.all_A[index])
 56 |         # process V_feat 
 57 |         V_feat = torch.FloatTensor(self.all_V[index])
 58 |         # proveee L_feat
 59 |         L_feat = torch.FloatTensor(self.all_L[index])
 60 |         return {
 61 |             'A_feat': A_feat, 
 62 |             'V_feat': V_feat,
 63 |             'L_feat': L_feat,
 64 |             'label': label,
 65 |             'int2name': int2name
 66 |         }
 67 |     
 68 |     def __len__(self):
 69 |         return len(self.label)
 70 |     
 71 |     def normalize_on_utt(self, features):
 72 |         mean_f = torch.mean(features, dim=0).unsqueeze(0).float()
 73 |         std_f = torch.std(features, dim=0).unsqueeze(0).float()
 74 |         std_f[std_f == 0.0] = 1.0
 75 |         features = (features - mean_f) / std_f
 76 |         return features
 77 |     
 78 |     def normalize_on_trn(self, features):
 79 |         features = (features - self.mean) / self.std
 80 |         return features
 81 | 
 82 |     def collate_fn(self, batch):
 83 |         A = [sample['A_feat'] for sample in batch]
 84 |         V = [sample['V_feat'] for sample in batch]
 85 |         L = [sample['L_feat'] for sample in batch]
 86 |         lengths = torch.tensor([len(sample) for sample in A]).long()
 87 |         A = pad_sequence(A, batch_first=True, padding_value=0)
 88 |         V = pad_sequence(V, batch_first=True, padding_value=0)
 89 |         L = pad_sequence(L, batch_first=True, padding_value=0)
 90 |         label = torch.tensor([sample['label'] for sample in batch])
 91 |         int2name = [sample['int2name'] for sample in batch]
 92 |         return {
 93 |             'A_feat': A, 
 94 |             'V_feat': V,
 95 |             'L_feat': L,
 96 |             'label': label,
 97 |             'lengths': lengths,
 98 |             'int2name': int2name
 99 |         }
100 | 


--------------------------------------------------------------------------------
/baseline-mmin/data/melspec_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import torch
  4 | import numpy as np
  5 | import h5py
  6 | import PIL
  7 | from torchvision.transforms import transforms
  8 | 
  9 | from data.base_dataset import BaseDataset
 10 | 
 11 | 
 12 | class MelspecDataset(BaseDataset):
 13 |     @staticmethod
 14 |     def modify_commandline_options(parser, isTrain=None):
 15 |         parser.add_argument('--cvNo', type=int, help='which cross validation set')
 16 |         parser.add_argument('--output_dim', type=int, help='how many label types in this dataset')
 17 |         parser.add_argument('--spec_aug', action='store_true', help='whether to do specaug')
 18 |         parser.add_argument('--time_mask', type=float, default=0.1, help='specaug parameter time mask')
 19 |         parser.add_argument('--freq_mask', type=float, default=0.1, help='specaug parameter freq mask')
 20 |         return parser
 21 |     
 22 |     def __init__(self, opt, set_name):
 23 |         ''' IEMOCAP dataset reader
 24 |             set_name in ['trn', 'val', 'tst']
 25 |         '''
 26 |         super().__init__(opt)
 27 | 
 28 |         # record & load basic settings 
 29 |         cvNo = opt.cvNo
 30 |         self.set_name = set_name
 31 |         pwd = os.path.abspath(__file__)
 32 |         pwd = os.path.dirname(pwd)
 33 |         config = json.load(open(os.path.join(pwd, 'config', 'IEMOCAP_config.json')))
 34 |         
 35 |         # load feature
 36 |         self.all_A = h5py.File(os.path.join(config['feature_root'], 'A', 'melspec.h5'), 'r')
 37 | 
 38 |         # load target
 39 |         label_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_label.npy")
 40 |         int2name_path = os.path.join(config['target_root'], f'{cvNo}', f"{set_name}_int2name.npy")
 41 |         self.label = np.load(label_path)
 42 |         self.label = np.argmax(self.label, axis=1)
 43 |         self.int2name = np.load(int2name_path)
 44 | 
 45 |         # loading setting
 46 |         self.spec_aug = opt.spec_aug
 47 |         self.time_mask = opt.time_mask
 48 |         self.freq_mask = opt.freq_mask
 49 |         
 50 |         self.manual_collate_fn = False
 51 | 
 52 |     def get_transform(self):
 53 |         if self.set_name == 'trn':
 54 |             _transform = transforms.Compose([
 55 |                 transforms.RandomHorizontalFlip(0.5),
 56 |                 transforms.ToTensor()
 57 |             ])
 58 |         else:
 59 |             _transform = transforms.Compose([
 60 |                 transforms.ToTensor()
 61 |             ])
 62 |         
 63 |         return _transform
 64 | 
 65 |     def process_melspec(self, melspec):
 66 |         if not hasattr(self, "transform"):
 67 |             self.transform = self.get_transform()
 68 |         
 69 |         image = melspec
 70 |         time_dim, base_dim = image.shape[1], image.shape[0]
 71 |         crop = np.random.randint(0, time_dim - base_dim)
 72 |         image = image[:, crop:crop + base_dim, ...]
 73 | 
 74 |         if self.set_name == 'trn' and self.spec_aug:
 75 |             freq_mask_begin = int(np.random.uniform(0, 1 - self.freq_mask) * base_dim)
 76 |             image[freq_mask_begin:freq_mask_begin + int(self.freq_mask * base_dim), ...] = 0
 77 |             time_mask_begin = int(np.random.uniform(0, 1 - self.time_mask) * base_dim)
 78 |             image[:, time_mask_begin:time_mask_begin + int(self.time_mask * base_dim), ...] = 0
 79 | 
 80 |         image = PIL.Image.fromarray(image[...,0], mode='L')
 81 |         image = self.transform(image).div_(255)
 82 |         return image.float()
 83 | 
 84 |     def __getitem__(self, index):
 85 |         int2name = self.int2name[index][0].decode()
 86 |         label = torch.tensor(self.label[index])
 87 |         # process A_feat
 88 |         A_feat = self.all_A[int2name]
 89 |         A_feat = self.process_melspec(A_feat)
 90 |         return {
 91 |             'A_feat': A_feat, 
 92 |             'label': label,
 93 |             'int2name': int2name
 94 |         }
 95 |     
 96 |     def __len__(self):
 97 |         return len(self.label)
 98 | 
 99 | if __name__ == '__main__':
100 |     class test:
101 |         cvNo = 1
102 |         spec_aug = True
103 |         time_mask = 0.1
104 |         freq_mask = 0.1
105 |     
106 |     opt = test()
107 |     a = MelspecDataset(opt, set_name='trn')
108 |     data = next(iter(a))
109 |     for k, v in data.items():
110 |         if k not in ['int2name', 'label']:
111 |             print(k, v.shape)
112 |         else:
113 |             print(k, v)
114 |     


--------------------------------------------------------------------------------
/baseline-mmin/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """This package contains modules related to objective functions, optimizations, and network architectures.
 2 | 
 3 | To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
 4 | You need to implement the following five functions:
 5 |     -- <__init__>:                      initialize the class; first call BaseModel.__init__(self, opt).
 6 |     -- <set_input>:                     unpack data from dataset and apply preprocessing.
 7 |     -- <forward>:                       produce intermediate results.
 8 |     -- <optimize_parameters>:           calculate loss, gradients, and update network weights.
 9 |     -- <modify_commandline_options>:    (optionally) add model-specific options and set default options.
10 | 
11 | In the function <__init__>, you need to define four lists:
12 |     -- self.loss_names (str list):          specify the training losses that you want to plot and save.
13 |     -- self.model_names (str list):         define networks used in our training.
14 |     -- self.visual_names (str list):        specify the images that you want to display and save.
15 |     -- self.optimizers (optimizer list):    define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
16 | 
17 | Now you can use the model class by specifying flag '--model dummy'.
18 | See our template model class 'template_model.py' for more details.
19 | """
20 | 
21 | import importlib
22 | from models.base_model import BaseModel
23 | 
24 | 
25 | def find_model_using_name(model_name):
26 |     """Import the module "models/[model_name]_model.py".
27 | 
28 |     In the file, the class called DatasetNameModel() will
29 |     be instantiated. It has to be a subclass of BaseModel,
30 |     and it is case-insensitive.
31 |     """
32 |     model_filename = "models." + model_name + "_model" # 'models.mmin_model'
33 |     modellib = importlib.import_module(model_filename)
34 |     model = None
35 |     target_model_name = model_name.replace('_', '') + 'model'
36 |     for name, cls in modellib.__dict__.items():
37 |         if name.lower() == target_model_name.lower() and issubclass(cls, BaseModel):
38 |             model = cls
39 | 
40 |     if model is None:
41 |         print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
42 |         exit(0)
43 | 
44 |     return model
45 | 
46 | 
47 | def get_option_setter(model_name):
48 |     """Return the static method <modify_commandline_options> of the model class."""
49 |     model_class = find_model_using_name(model_name)
50 |     return model_class.modify_commandline_options
51 | 
52 | 
53 | def create_model(opt):
54 |     """Create a model given the option.
55 | 
56 |     This function warps the class CustomDatasetDataLoader.
57 |     This is the main interface between this package and 'train.py'/'test.py'
58 | 
59 |     Example:
60 |         >>> from models import create_model
61 |         >>> model = create_model(opt)
62 |     """
63 |     model = find_model_using_name(opt.model)
64 |     instance = model(opt)
65 |     print("model [%s] was created" % type(instance).__name__)
66 |     return instance
67 | 


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/base_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/base_model.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/base_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/base_model.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/mmin_AE_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/mmin_AE_model.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/mmin_CRA_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/mmin_CRA_model.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/mmin_CRA_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/mmin_CRA_model.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/mmin_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/mmin_model.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/utt_fusion_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/utt_fusion_model.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/__pycache__/utt_fusion_model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/__pycache__/utt_fusion_model.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/lstm_audio_model.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import os
 4 | import json
 5 | import torch.nn.functional as F
 6 | from models.base_model import BaseModel
 7 | from models.networks.lstm import LSTMEncoder
 8 | from models.networks.fc_encoder import FcEncoder
 9 | 
10 | 
11 | class LSTMAudioModel(BaseModel):
12 |     '''
13 |     A: DNN
14 |     V: denseface + LSTM + maxpool
15 |     L: bert + textcnn
16 |     '''
17 |     @staticmethod
18 |     def modify_commandline_options(parser, is_train=True):
19 |         parser.add_argument('--input_dim', type=int, default=130)
20 |         parser.add_argument('--cls_layers', type=str, default='256,128')
21 |         parser.add_argument('--hidden_size', type=int, default=256)
22 |         parser.add_argument('--embd_method', type=str, default='maxpool')
23 |         return parser
24 | 
25 |     def __init__(self, opt):
26 |         """Initialize the LSTM autoencoder class
27 |         Parameters:
28 |             opt (Option class)-- stores all the experiment flags; needs to be a subclass of BaseOptions
29 |         """
30 |         super().__init__(opt)
31 |         # our expriment is on 10 fold setting, teacher is on 5 fold setting, the train set should match
32 |         self.loss_names = ['CE']
33 |         self.model_names = ['A', 'C']
34 |         self.netA = LSTMEncoder(opt.input_dim, opt.hidden_size, embd_method=opt.embd_method)
35 |         cls_layers = [int(x) for x in opt.cls_layers.split(',')] + [opt.output_dim]
36 |         self.netC = FcEncoder(opt.hidden_size, cls_layers, dropout=0.3)
37 |             
38 |         if self.isTrain:
39 |             self.criterion_ce = torch.nn.CrossEntropyLoss()
40 |             # initialize optimizers; schedulers will be automatically created by function <BaseModel.setup>.
41 |             paremeters = [{'params': getattr(self, 'net'+net).parameters()} for net in self.model_names]
42 |             self.optimizer = torch.optim.Adam(paremeters, lr=opt.lr, betas=(opt.beta1, 0.998)) # 0.999
43 |             self.optimizers.append(self.optimizer)
44 |             self.output_dim = opt.output_dim
45 | 
46 |         # modify save_dir
47 |         self.save_dir = os.path.join(self.save_dir, str(opt.cvNo))
48 |         if not os.path.exists(self.save_dir):
49 |             os.mkdir(self.save_dir)
50 |     
51 |     def set_input(self, input):
52 |         """
53 |         Unpack input data from the dataloader and perform necessary pre-processing steps.
54 |         Parameters:
55 |             input (dict): include the data itself and its metadata information.
56 |         """
57 |         self.A_feat = input['A_feat'].to(self.device)
58 |         self.label = input['label'].to(self.device)
59 |         self.input = input
60 | 
61 |     def forward(self):
62 |         """Run forward pass; called by both functions <optimize_parameters> and <test>."""
63 |         self.feat = self.netA(self.A_feat)
64 |         self.logits = self.netC(self.feat)
65 |         self.pred = F.softmax(self.logits, dim=-1)
66 |         
67 |     def backward(self):
68 |         """Calculate the loss for back propagation"""
69 |         self.loss_CE = self.criterion_ce(self.logits, self.label)
70 |         loss = self.loss_CE
71 |         loss.backward()
72 |         for model in self.model_names:
73 |             torch.nn.utils.clip_grad_norm_(getattr(self, 'net'+model).parameters(), 5.0) # 0.1
74 | 
75 |     def optimize_parameters(self, epoch):
76 |         """Calculate losses, gradients, and update network weights; called in every training iteration"""
77 |         self.forward()   
78 |         self.optimizer.zero_grad()  
79 |         self.backward()            
80 |         self.optimizer.step() 
81 | 


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__init__.py:
--------------------------------------------------------------------------------
1 | ''' Contains network files. '''


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/autoencoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/autoencoder.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/autoencoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/autoencoder.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/classifier.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/classifier.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/classifier.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/classifier.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/fc.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/fc.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/fc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/fc.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/lstm.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/lstm.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/lstm.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/lstm.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/textcnn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/textcnn.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/textcnn.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/textcnn.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/tools.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/tools.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/__pycache__/tools.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/networks/__pycache__/tools.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/fc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class FcEncoder(nn.Module):
 5 |     def __init__(self, input_dim, layers, dropout=0.5, use_bn=False):
 6 |         ''' Fully Connect classifier
 7 |             fc+relu+bn+dropout， 最后分类128-4层是直接fc的
 8 |             Parameters:
 9 |             --------------------------
10 |             input_dim: input feature dim
11 |             layers: [x1, x2, x3] will create 3 layers with x1, x2, x3 hidden nodes respectively.
12 |             dropout: dropout rate
13 |             use_bn: use batchnorm or not
14 |         '''
15 |         super().__init__()
16 |         self.all_layers = []
17 |         for i in range(0, len(layers)):
18 |             self.all_layers.append(nn.Linear(input_dim, layers[i]))
19 |             self.all_layers.append(nn.ReLU())
20 |             if use_bn:
21 |                 self.all_layers.append(nn.BatchNorm1d(layers[i]))
22 |             if dropout > 0:
23 |                 self.all_layers.append(nn.Dropout(dropout))
24 |             input_dim = layers[i]
25 |         
26 |         self.module = nn.Sequential(*self.all_layers)
27 |     
28 |     def forward(self, x):
29 |         ## make layers to a whole module
30 |         feat = self.module(x)
31 |         return feat


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/lstm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class LSTMEncoder(nn.Module):
 7 |     ''' one directional LSTM encoder
 8 |     '''
 9 |     def __init__(self, input_size, hidden_size, embd_method='last'):
10 |         super(LSTMEncoder, self).__init__()
11 |         self.input_size = input_size
12 |         self.hidden_size = hidden_size
13 |         self.rnn = nn.LSTM(self.input_size, self.hidden_size, batch_first=True)
14 |         assert embd_method in ['maxpool', 'attention', 'last']
15 |         self.embd_method = embd_method
16 |         
17 |         if self.embd_method == 'attention':
18 |             self.attention_vector_weight = nn.Parameter(torch.Tensor(hidden_size, 1))
19 |             self.attention_layer = nn.Sequential(
20 |                 nn.Linear(self.hidden_size, self.hidden_size),
21 |                 nn.Tanh(),
22 |             )
23 |             self.softmax = nn.Softmax(dim=-1)
24 | 
25 |     def embd_attention(self, r_out, h_n):
26 |         ''''
27 |         参考这篇博客的实现:
28 |         https://blog.csdn.net/dendi_hust/article/details/94435919
29 |         https://blog.csdn.net/fkyyly/article/details/82501126
30 |         论文：Hierarchical Attention Networks for Document Classification
31 |         formulation:  lstm_output*softmax(u * tanh(W*lstm_output + Bias)
32 |         W and Bias 是映射函数，其中 Bias 可加可不加
33 |         u 是 attention vector 大小等于 hidden size
34 |         '''
35 |         hidden_reps = self.attention_layer(r_out)                       # [batch_size, seq_len, hidden_size]
36 |         atten_weight = (hidden_reps @ self.attention_vector_weight)     # [batch_size, seq_len, 1]
37 |         atten_weight = self.softmax(atten_weight)                       # [batch_size, seq_len, 1]
38 |         # [batch_size, seq_len, hidden_size] * [batch_size, seq_len, 1]  =  [batch_size, seq_len, hidden_size]
39 |         sentence_vector = torch.sum(r_out * atten_weight, dim=1)       # [batch_size, hidden_size]
40 |         return sentence_vector
41 | 
42 |     def embd_maxpool(self, r_out, h_n):
43 |         # embd = self.maxpool(r_out.transpose(1,2))   # r_out.size()=>[batch_size, seq_len, hidden_size]
44 |                                                     # r_out.transpose(1, 2) => [batch_size, hidden_size, seq_len]
45 |         in_feat = r_out.transpose(1,2)
46 |         embd = F.max_pool1d(in_feat, in_feat.size(2), in_feat.size(2))
47 |         return embd.squeeze()
48 | 
49 |     def embd_last(self, r_out, h_n):
50 |         #Just for  one layer and single direction
51 |         return h_n.squeeze()
52 | 
53 |     def forward(self, x):
54 |         '''
55 |         r_out shape: seq_len, batch, num_directions * hidden_size
56 |         hn and hc shape: num_layers * num_directions, batch, hidden_size
57 |         '''
58 |         r_out, (h_n, h_c) = self.rnn(x)
59 |         embd = getattr(self, 'embd_'+self.embd_method)(r_out, h_n)
60 |         return embd


--------------------------------------------------------------------------------
/baseline-mmin/models/networks/textcnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class TextCNN(nn.Module):
 7 |     def __init__(self, input_dim, emb_size=128, in_channels=1, out_channels=128, kernel_heights=[3,4,5], dropout=0.5):
 8 |         super().__init__()
 9 |         '''
10 |         cat((conv1-relu+conv2-relu+conv3-relu)+maxpool) + dropout, and to trans
11 |         '''
12 |         self.conv1 = nn.Conv2d(in_channels, out_channels, (kernel_heights[0], input_dim), stride=1, padding=0)
13 |         self.conv2 = nn.Conv2d(in_channels, out_channels, (kernel_heights[1], input_dim), stride=1, padding=0)
14 |         self.conv3 = nn.Conv2d(in_channels, out_channels, (kernel_heights[2], input_dim), stride=1, padding=0)
15 |         self.dropout = nn.Dropout(dropout)
16 |         self.embd = nn.Sequential(
17 |             nn.Linear(len(kernel_heights)*out_channels, emb_size),
18 |             nn.ReLU(inplace=True),
19 |         )
20 | 
21 |     def conv_block(self, input, conv_layer):
22 |         conv_out = conv_layer(input)# conv_out.size() = (batch_size, out_channels, dim, 1)
23 |         activation = F.relu(conv_out.squeeze(3))# activation.size() = (batch_size, out_channels, dim1)
24 |         max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2) # maxpool_out.size() = (batch_size, out_channels)
25 |         return max_out
26 | 
27 |     def forward(self, frame_x):
28 |         batch_size, seq_len, feat_dim = frame_x.size()
29 |         frame_x = frame_x.view(batch_size, 1, seq_len, feat_dim)
30 |         max_out1 = self.conv_block(frame_x, self.conv1)
31 |         max_out2 = self.conv_block(frame_x, self.conv2)
32 |         max_out3 = self.conv_block(frame_x, self.conv3)
33 |         all_out = torch.cat((max_out1, max_out2, max_out3), 1)
34 |         fc_in = self.dropout(all_out)
35 |         embd = self.embd(fc_in)
36 |         return embd


--------------------------------------------------------------------------------
/baseline-mmin/models/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/utils/__init__.py


--------------------------------------------------------------------------------
/baseline-mmin/models/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/utils/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/utils/__pycache__/config.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/utils/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/models/utils/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/models/utils/config.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | class OptConfig(object):
 4 |     def __init__(self):
 5 |         pass
 6 | 
 7 |     def load(self, config_dict):
 8 |         if sys.version > '3':
 9 |             for key, value in config_dict.items():
10 |                 if not isinstance(value, dict):
11 |                     setattr(self, key, value)
12 |                 else:
13 |                     self.load(value)
14 |         else:
15 |             for key, value in config_dict.iteritems():
16 |                 if not isinstance(value, dict):
17 |                     setattr(self, key, value)
18 |                 else:
19 |                     self.load(value)


--------------------------------------------------------------------------------
/baseline-mmin/models/utils/load_pretrained.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from .config import OptConfig
 4 | 
 5 | def load_from_opt_record(file_path):
 6 |     opt_content = json.load(open(file_path, 'r'))
 7 |     opt = OptConfig()
 8 |     opt.load(opt_content)
 9 |     return opt
10 | 
11 | def load_pretrained_model(model_class, checkpoints_dir, cv, gpu_ids):
12 |     path = os.path.join(checkpoints_dir, str(cv))
13 |     config_path = os.path.join(checkpoints_dir, 'train_opt.conf')
14 |     config = load_from_opt_record(config_path)
15 |     config.isTrain = False                             # teacher model should be in test mode
16 |     config.gpu_ids = gpu_ids                       # set gpu to the same
17 |     model = model_class(config)
18 |     model.cuda()
19 |     model.load_networks_cv(path)
20 |     model.eval()
21 |     return model
22 | 


--------------------------------------------------------------------------------
/baseline-mmin/opts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/opts/__init__.py


--------------------------------------------------------------------------------
/baseline-mmin/opts/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/opts/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/opts/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/opts/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/opts/__pycache__/base_opts.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/opts/__pycache__/base_opts.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/opts/__pycache__/base_opts.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/opts/__pycache__/base_opts.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/opts/__pycache__/train_opts.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/opts/__pycache__/train_opts.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/opts/__pycache__/train_opts.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/opts/__pycache__/train_opts.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/opts/test_opts.py:
--------------------------------------------------------------------------------
 1 | from .base_opts import BaseOptions
 2 | 
 3 | 
 4 | class TestOptions(BaseOptions):
 5 |     """This class includes training options.
 6 | 
 7 |     It also includes shared options defined in BaseOptions.
 8 |     """
 9 | 
10 |     def initialize(self, parser):
11 |         parser = BaseOptions.initialize(self, parser)
12 |         parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
13 |         parser.add_argument('--method', type=str, default='mean', help='How to calculate final test result, [concat, mean]')
14 |         parser.add_argument('--simple', action='store_true', help='simple print information')
15 |         self.isTrain = False
16 |         return parser
17 | 


--------------------------------------------------------------------------------
/baseline-mmin/opts/train_opts.py:
--------------------------------------------------------------------------------
 1 | from .base_opts import BaseOptions
 2 | 
 3 | 
 4 | class TrainOptions(BaseOptions):
 5 |     """This class includes training options.
 6 | 
 7 |     It also includes shared options defined in BaseOptions.
 8 |     """
 9 | 
10 |     def initialize(self, parser):
11 |         parser = BaseOptions.initialize(self, parser)
12 |         # network saving and loading parameters
13 |         # parser.add_argument('--cvNo', type=int, default=5, help='which cross validation set')
14 |         parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
15 |         parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results')
16 |         parser.add_argument('--save_epoch_freq', type=int, default=1, help='frequency of saving checkpoints at the end of epochs')
17 |         parser.add_argument('--save_by_iter', action='store_true', help='whether saves model by iteration')
18 |         parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
19 |         parser.add_argument('--epoch_count', type=int, default=1, help='the starting epoch count, we save the model by <epoch_count>, <epoch_count>+<save_latest_freq>, ...')
20 |         parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc')
21 | 
22 |         # training parameters
23 |         parser.add_argument('--mask_rate', type=float, default=0.0, help='input mask rate, ranging from [0.0, 0.1, ..., 1.0]')
24 |         parser.add_argument('--niter', type=int, default=20, help='# of iter at starting learning rate')
25 |         parser.add_argument('--niter_decay', type=int, default=80, help='# of iter to linearly decay learning rate to zero')
26 |         parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
27 |         parser.add_argument('--lr', type=float, default=2e-4, help='initial learning rate for adam')
28 |         parser.add_argument('--lr_policy', type=str, default='linear', help='learning rate policy. [linear | step | plateau | cosine]')
29 |         parser.add_argument('--lr_decay_iters', type=int, default=50, help='multiply by a gamma every lr_decay_iters iterations')
30 | 
31 |         # test with predefined mask path
32 |         parser.add_argument('--test_mask', type=str, default=None, help='test under same mask for fair comparision')
33 | 
34 |         # expr setting 
35 |         parser.add_argument('--run_idx', type=int, default=1, help='experiment number; for repeat experiment')
36 |         self.isTrain = True
37 |         return parser
38 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/IEMOCAP/make_comparE.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import h5py
 3 | import json
 4 | import numpy as np
 5 | import pandas as pd
 6 | import scipy.signal as spsig
 7 | from tqdm import tqdm
 8 | 
 9 | 
10 | class ComParEExtractor(object):
11 |     ''' 抽取comparE特征, 输入音频路径, 输出npy数组, 每帧130d
12 |     '''
13 |     def __init__(self, opensmile_tool_dir=None, downsample=10, tmp_dir='.tmp', no_tmp=False):
14 |         ''' Extract ComparE feature
15 |             tmp_dir: where to save opensmile csv file
16 |             no_tmp: if true, delete tmp file
17 |         '''
18 |         if not os.path.exists(tmp_dir):
19 |             os.makedirs(tmp_dir)
20 |         if opensmile_tool_dir is None:
21 |             opensmile_tool_dir = '/root/opensmile-2.3.0/'
22 |         self.opensmile_tool_dir = opensmile_tool_dir
23 |         self.tmp_dir = tmp_dir
24 |         self.downsample = downsample
25 |         self.no_tmp = no_tmp
26 |     
27 |     def __call__(self, wav):
28 |         basename = os.path.basename(wav).split('.')[0]
29 |         save_path = os.path.join(self.tmp_dir, basename+".csv")
30 |         cmd = 'SMILExtract -C {}/config/ComParE_2016.conf \
31 |             -appendcsvlld 0 -timestampcsvlld 1 -headercsvlld 1 \
32 |             -I {} -lldcsvoutput {} -instname xx -O ? -noconsoleoutput 1'
33 |         os.system(cmd.format(self.opensmile_tool_dir, wav, save_path))
34 |         
35 |         df = pd.read_csv(save_path, delimiter=';')
36 |         wav_data = df.iloc[:, 2:]
37 |         if len(wav_data) > self.downsample:
38 |             wav_data = spsig.resample_poly(wav_data, up=1, down=self.downsample, axis=0)
39 |             if self.no_tmp:
40 |                 os.remove(save_path) 
41 |         else:
42 |             wav_data = None
43 |             self.print(f'Error in {wav}, no feature extracted')
44 | 
45 |         return wav_data
46 | 
47 | 
48 | def get_trn_val_tst(target_root_dir, cv, setname):
49 |     int2name = np.load(os.path.join(target_root_dir, str(cv), '{}_int2name.npy'.format(setname)))
50 |     int2label = np.load(os.path.join(target_root_dir, str(cv), '{}_label.npy'.format(setname)))
51 |     assert len(int2name) == len(int2label)
52 |     return int2name, int2label
53 | 
54 | def make_all_comparE(config):
55 |     extractor = ComParEExtractor()
56 |     trn_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'trn')
57 |     val_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'val')
58 |     tst_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'tst')
59 |     trn_int2name = list(map(lambda x: x[0].decode(), trn_int2name))
60 |     val_int2name = list(map(lambda x: x[0].decode(), val_int2name))
61 |     tst_int2name = list(map(lambda x: x[0].decode(), tst_int2name))
62 |     all_utt_ids = trn_int2name + val_int2name + tst_int2name
63 |     all_h5f = h5py.File(os.path.join(config['feature_root'], 'A', 'comparE.h5'), 'w')
64 |     for utt_id in tqdm(all_utt_ids):
65 |         ses_id = utt_id[4]
66 |         dialog_id = '_'.join(utt_id.split('_')[:-1])
67 |         wav_path = os.path.join(config['data_root'], f'Session{ses_id}', 'sentences', 'wav', f'{dialog_id}', f'{utt_id}.wav')
68 |         feat = extractor(wav_path)
69 |         all_h5f[utt_id] = feat
70 | 
71 | def normlize_on_trn(config, input_file, output_file):
72 |     h5f = h5py.File(output_file, 'w')
73 |     in_data = h5py.File(input_file, 'r')
74 |     for cv in range(1, 11):
75 |         trn_int2name, _ = get_trn_val_tst(config['target_root'], cv, 'trn')
76 |         trn_int2name = list(map(lambda x: x[0].decode(), trn_int2name))
77 |         all_feat = [in_data[utt_id][()] for utt_id in trn_int2name]
78 |         all_feat = np.concatenate(all_feat, axis=0)
79 |         mean_f = np.mean(all_feat, axis=0)
80 |         std_f = np.std(all_feat, axis=0)
81 |         std_f[std_f == 0.0] = 1.0
82 |         cv_group = h5f.create_group(str(cv))
83 |         cv_group['mean'] = mean_f
84 |         cv_group['std'] = std_f
85 |         print(cv)
86 |         print("mean:", np.sum(mean_f))
87 |         print("std:", np.sum(std_f))
88 |     
89 |     
90 | if __name__ == '__main__':
91 |     pwd = os.path.abspath(__file__)
92 |     pwd = os.path.dirname(pwd)
93 |     config_path = os.path.join(pwd, '../', 'data/config', 'IEMOCAP_config.json')
94 |     config = json.load(open(config_path))
95 |     # make_all_comparE(config)
96 |     normlize_on_trn(config, os.path.join(config['feature_root'], 'A', 'comparE.h5'), os.path.join(config['feature_root'], 'A', 'comparE_mean_std.h5'))


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/IEMOCAP/make_melspec.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | import h5py
 5 | from tqdm import tqdm
 6 | from preprocess.melspec_extractor import MelSpecExtractor
 7 | 
 8 | def get_trn_val_tst(target_root_dir, cv, setname):
 9 |     int2name = np.load(os.path.join(target_root_dir, str(cv), '{}_int2name.npy'.format(setname)))
10 |     int2label = np.load(os.path.join(target_root_dir, str(cv), '{}_label.npy'.format(setname)))
11 |     assert len(int2name) == len(int2label)
12 |     return int2name, int2label
13 | 
14 | def extract_all_melspec(config):
15 |     extractor = MelSpecExtractor()
16 |     trn_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'trn')
17 |     val_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'val')
18 |     tst_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'tst')
19 |     trn_int2name = list(map(lambda x: x[0].decode(), trn_int2name))
20 |     val_int2name = list(map(lambda x: x[0].decode(), val_int2name))
21 |     tst_int2name = list(map(lambda x: x[0].decode(), tst_int2name))
22 |     all_utt_ids = trn_int2name + val_int2name + tst_int2name
23 |     all_h5f = h5py.File(os.path.join(config['feature_root'], 'A', 'melspec.h5'), 'w')
24 |     for utt_id in tqdm(all_utt_ids):
25 |         ses_id = utt_id[4]
26 |         dialog_id = '_'.join(utt_id.split('_')[:-1])
27 |         wav_path = os.path.join(config['data_root'], f'Session{ses_id}', 'sentences', 'wav', f'{dialog_id}', f'{utt_id}.wav')
28 |         melspec = extractor.extract(wav_path)
29 |         all_h5f[utt_id] = melspec
30 | 
31 | if __name__ == '__main__':
32 |     pwd = os.path.abspath(__file__)
33 |     pwd = os.path.dirname(pwd)
34 |     config_path = os.path.join(pwd, '../', 'data/config', 'IEMOCAP_config.json')
35 |     config = json.load(open(config_path))
36 |     extract_all_melspec(config)
37 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/IEMOCAP/make_torch_denseface.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import h5py
 3 | import json
 4 | import numpy as np
 5 | import pandas as pd
 6 | import scipy.signal as spsig
 7 | from tqdm import tqdm
 8 | 
 9 | def get_trn_val_tst(target_root_dir, cv, setname):
10 |     int2name = np.load(os.path.join(target_root_dir, str(cv), '{}_int2name.npy'.format(setname)))
11 |     int2label = np.load(os.path.join(target_root_dir, str(cv), '{}_label.npy'.format(setname)))
12 |     assert len(int2name) == len(int2label)
13 |     return int2name, int2label
14 | 
15 | def make_all_comparE(config):
16 |     extractor = ComParEExtractor()
17 |     trn_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'trn')
18 |     val_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'val')
19 |     tst_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'tst')
20 |     trn_int2name = list(map(lambda x: x[0].decode(), trn_int2name))
21 |     val_int2name = list(map(lambda x: x[0].decode(), val_int2name))
22 |     tst_int2name = list(map(lambda x: x[0].decode(), tst_int2name))
23 |     all_utt_ids = trn_int2name + val_int2name + tst_int2name
24 |     all_h5f = h5py.File(os.path.join(config['feature_root'], 'A', 'comparE.h5'), 'w')
25 |     for utt_id in tqdm(all_utt_ids):
26 |         ses_id = utt_id[4]
27 |         dialog_id = '_'.join(utt_id.split('_')[:-1])
28 |         wav_path = os.path.join(config['data_root'], f'Session{ses_id}', 'sentences', 'wav', f'{dialog_id}', f'{utt_id}.wav')
29 |         feat = extractor(wav_path)
30 |         all_h5f[utt_id] = feat
31 | 
32 | def normlize_on_trn(config, input_file, output_file):
33 |     h5f = h5py.File(output_file, 'w')
34 |     in_data = h5py.File(input_file, 'r')
35 |     for cv in range(1, 11):
36 |         trn_int2name, _ = get_trn_val_tst(config['target_root'], cv, 'trn')
37 |         trn_int2name = list(map(lambda x: x[0].decode(), trn_int2name))
38 |         all_feat = [in_data[utt_id][()] for utt_id in trn_int2name]
39 |         all_feat = np.concatenate(all_feat, axis=0)
40 |         mean_f = np.mean(all_feat, axis=0)
41 |         std_f = np.std(all_feat, axis=0)
42 |         std_f[std_f == 0.0] = 1.0
43 |         cv_group = h5f.create_group(str(cv))
44 |         cv_group['mean'] = mean_f
45 |         cv_group['std'] = std_f
46 |         print(cv)
47 |         print("mean:", np.sum(mean_f))
48 |         print("std:", np.sum(std_f))
49 |     
50 |     
51 | if __name__ == '__main__':
52 |     pwd = os.path.abspath(__file__)
53 |     pwd = os.path.dirname(pwd)
54 |     config_path = os.path.join(pwd, '../', 'data/config', 'IEMOCAP_config.json')
55 |     config = json.load(open(config_path))
56 |     # make_all_comparE(config)
57 |     normlize_on_trn(config, os.path.join(config['feature_root'], 'A', 'comparE.h5'), os.path.join(config['feature_root'], 'A', 'comparE_mean_std.h5'))


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/IEMOCAP/melspec_extractor.py:
--------------------------------------------------------------------------------
  1 | from re import M
  2 | import numpy as np  # linear algebra
  3 | from tqdm import tqdm
  4 | import PIL
  5 | import os
  6 | import librosa
  7 | import random
  8 | 
  9 | 
 10 | class default_config:
 11 |     sampling_rate = 16000
 12 |     duration = 2  # sec
 13 |     hop_length = 125 * duration  # to make time steps 128
 14 |     fmin = 20
 15 |     fmax = sampling_rate // 2
 16 |     n_mels = 128
 17 |     n_fft = n_mels * 20
 18 |     padmode = 'constant'
 19 |     samples = sampling_rate * duration
 20 | 
 21 | 
 22 | class MelSpecExtractor(object):
 23 |     def __init__(self, sampling_rate=None, duration=None, hop_length=None, \
 24 |             fmin=None, fmax=None, n_mels=None, n_fft=None, padmode=None, max_samples=None):
 25 |         self.sampling_rate = sampling_rate or default_config.sampling_rate
 26 |         self.duration = duration or default_config.duration
 27 |         self.hop_length = hop_length or default_config.hop_length
 28 |         self.fmin = fmin or default_config.fmin
 29 |         self.fmax = fmax or default_config.fmax
 30 |         self.n_mels = n_mels or default_config.n_mels
 31 |         self.n_fft = n_fft or default_config.n_fft
 32 |         self.padmode = padmode or default_config.padmode
 33 |         self.max_samples = max_samples or default_config.samples
 34 |         assert self.max_samples > 0, 'max_samples parameters must be larger than zero'
 35 |     
 36 | 
 37 |     def read_audio(self, pathname, trim_long_data):
 38 |         y, _ = librosa.load(pathname, sr=self.sampling_rate)
 39 |         # trim silence
 40 |         if 0 < len(y):  # workaround: 0 length causes error
 41 |             y, _ = librosa.effects.trim(y)  # trim, top_db=default(60)
 42 |         else:
 43 |             print(f"found zero length audio {pathname}")
 44 |             y = np.zeros((self.max_samples,), np.float32)
 45 |         # make it unified length to self.samples
 46 |         if len(y) > self.max_samples:  # long enough
 47 |             if trim_long_data:
 48 |                 y = y[0 : self.max_samples]
 49 |         else:  # pad blank
 50 |             leny = len(y)
 51 |             padding = self.max_samples - len(y)  # add padding at both ends
 52 |             offset = padding // 2
 53 |             y = np.pad(y, (offset, self.max_samples - len(y) - offset), self.padmode)
 54 |         return y
 55 | 
 56 | 
 57 |     def audio_to_melspectrogram(self, audio):
 58 |         spectrogram = librosa.feature.melspectrogram(audio,
 59 |                                                     sr=self.sampling_rate,
 60 |                                                     n_mels=self.n_mels,
 61 |                                                     hop_length=self.hop_length,
 62 |                                                     n_fft=self.n_fft,
 63 |                                                     fmin=self.fmin,
 64 |                                                     fmax=self.fmax)
 65 |         spectrogram = librosa.power_to_db(spectrogram)
 66 |         spectrogram = spectrogram.astype(np.float32)
 67 |         return spectrogram
 68 | 
 69 | 
 70 |     def read_as_melspectrogram(self, pathname, trim_long_data=False):
 71 |         x = self.read_audio(pathname, trim_long_data)
 72 |         mels = self.audio_to_melspectrogram(x)
 73 |         return mels
 74 |     
 75 | 
 76 |     def mono_to_color(self, X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6):
 77 |         # Stack X as [X,X,X]
 78 |         X = np.stack([X, X, X], axis=-1)
 79 | 
 80 |         # Standardize
 81 |         mean = mean or X.mean()
 82 |         X = X - mean
 83 |         std = std or X.std()
 84 |         Xstd = X / (std + eps)
 85 |         _min, _max = Xstd.min(), Xstd.max()
 86 |         norm_max = norm_max or _max
 87 |         norm_min = norm_min or _min
 88 |         if (_max - _min) > eps:
 89 |             # Normalize to [0, 255]
 90 |             V = Xstd
 91 |             V[V < norm_min] = norm_min
 92 |             V[V > norm_max] = norm_max
 93 |             V = 255 * (V - norm_min) / (norm_max - norm_min)
 94 |             V = V.astype(np.uint8)
 95 |         else:
 96 |             # Just zero
 97 |             V = np.zeros_like(Xstd, dtype=np.uint8)
 98 |         return V
 99 | 
100 | 
101 |     def extract(self, wav_path):
102 |         x = self.read_as_melspectrogram(wav_path, trim_long_data=False)
103 |         x_color = self.mono_to_color(x)
104 |         return x_color
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     extractor = MelSpecExtractor()
109 |     # wav_path = '/data3/lrc/IEMOCAP_full_release/Session1/sentences/wav/Ses01F_script03_2/Ses01F_script03_2_M001.wav'
110 |     wav_path = '/data3/lrc/IEMOCAP_full_release/Session1/sentences/wav/Ses01F_script03_2/Ses01F_script03_2_M026.wav'
111 |     melspec = extractor.extract(wav_path)
112 |     print(melspec.shape)


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/IEMOCAP/migrate_VL_feat.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import h5py
 3 | import json
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | 
 7 | def get_trn_val_tst(target_root_dir, cv, setname):
 8 |     int2name = np.load(os.path.join(target_root_dir, str(cv), '{}_int2name.npy'.format(setname)))
 9 |     int2label = np.load(os.path.join(target_root_dir, str(cv), '{}_label.npy'.format(setname)))
10 |     assert len(int2name) == len(int2label)
11 |     return int2name, int2label
12 | 
13 | 
14 | def migrate_V(config):
15 |     migrate_root = os.path.join('/data3/lrc/Iemocap_feature/cv_level/feature/denseface/', str(1))
16 |     src_v_trn = np.load(os.path.join(migrate_root, 'trn.npy'))
17 |     src_v_val = np.load(os.path.join(migrate_root, 'val.npy'))
18 |     src_v_tst = np.load(os.path.join(migrate_root, 'tst.npy'))
19 |     src_v_feat = np.concatenate([src_v_trn, src_v_val, src_v_tst], axis=0)
20 |     trn_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'trn')
21 |     val_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'val')
22 |     tst_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'tst')
23 |     trn_int2name = list(map(lambda x: x[0].decode(), trn_int2name))
24 |     val_int2name = list(map(lambda x: x[0].decode(), val_int2name))
25 |     tst_int2name = list(map(lambda x: x[0].decode(), tst_int2name))
26 |     all_utt_ids = trn_int2name + val_int2name + tst_int2name
27 |     all_h5f = h5py.File(os.path.join(config['feature_root'], 'V', 'denseface.h5'), 'w')
28 |     for utt_id, v_feat in tqdm(zip(all_utt_ids, src_v_feat), total=len(all_utt_ids)):
29 |         all_h5f[utt_id] = v_feat
30 | 
31 | 
32 | def migrate_L(config):
33 |     migrate_root = os.path.join('/data3/lrc/Iemocap_feature/cv_level/feature/text/', str(1))
34 |     src_l_trn = np.load(os.path.join(migrate_root, 'trn.npy'))
35 |     src_l_val = np.load(os.path.join(migrate_root, 'val.npy'))
36 |     src_l_tst = np.load(os.path.join(migrate_root, 'tst.npy'))
37 |     src_l_feat = np.concatenate([src_l_trn, src_l_val, src_l_tst], axis=0)
38 |     trn_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'trn')
39 |     val_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'val')
40 |     tst_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'tst')
41 |     trn_int2name = list(map(lambda x: x[0].decode(), trn_int2name))
42 |     val_int2name = list(map(lambda x: x[0].decode(), val_int2name))
43 |     tst_int2name = list(map(lambda x: x[0].decode(), tst_int2name))
44 |     all_utt_ids = trn_int2name + val_int2name + tst_int2name
45 |     all_h5f = h5py.File(os.path.join(config['feature_root'], 'L', 'bert_large.h5'), 'w')
46 |     for utt_id, l_feat in tqdm(zip(all_utt_ids, src_l_feat), total=len(all_utt_ids)):
47 |         all_h5f[utt_id] = l_feat
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     pwd = os.path.abspath(__file__)
52 |     pwd = os.path.dirname(pwd)
53 |     config_path = os.path.join(pwd, '../', 'data/config', 'IEMOCAP_config.json')
54 |     config = json.load(open(config_path))
55 |     # migrate_V(config)
56 |     migrate_L(config)
57 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/IEMOCAP/migrate_compaeE_tonpy.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import h5py
 4 | import numpy as np
 5 | 
 6 | def get_trn_val_tst(target_root_dir, cv, setname):
 7 |     int2name = np.load(os.path.join(target_root_dir, str(cv), '{}_int2name.npy'.format(setname)))
 8 |     int2label = np.load(os.path.join(target_root_dir, str(cv), '{}_label.npy'.format(setname)))
 9 |     assert len(int2name) == len(int2label)
10 |     return int2name, int2label
11 | 
12 | def padding_to_fixlen(feat, max_len):
13 |     assert feat.ndim == 2
14 |     if feat.shape[0] >= max_len:
15 |         feat = feat[:max_len]
16 |     else:
17 |         feat = np.concatenate([feat, \
18 |             np.zeros((max_len-feat.shape[0], feat.shape[1]))], axis=0)
19 |     return feat
20 | 
21 | def migrate_comparE_to_npy(config):
22 |     max_len = 60
23 |     feat_path = os.path.join(config['feature_root'], 'A', 'comparE.h5')
24 |     mean_std_path = os.path.join(config['feature_root'], 'A', 'comparE_mean_std.h5')
25 |     feat_h5f = h5py.File(feat_path, 'r')
26 |     mean_std = h5py.File(mean_std_path, 'r')
27 |     for cv in range(1, 11):
28 |         save_dir = f'/data3/lrc/Iemocap_feature/cv_level/feature/comparE/{cv}'
29 |         if not os.path.exists(save_dir):
30 |             os.makedirs(save_dir)
31 |         mean = mean_std[str(cv)]['mean'][()]
32 |         std = mean_std[str(cv)]['std'][()]
33 |         for part in ['trn', 'val', 'tst']:
34 |             part_feat = []
35 |             int2name, _ = get_trn_val_tst(config['target_root'], cv, part)
36 |             int2name = [x[0].decode() for x in int2name]
37 |             for utt_id in int2name:
38 |                 feat = feat_h5f[utt_id][()]
39 |                 feat = (feat-mean)/std
40 |                 feat = padding_to_fixlen(feat, max_len)
41 |                 part_feat.append(feat)
42 |             part_feat = np.array(part_feat)
43 |             print(f"cv: {cv} {part} {part_feat.shape}")
44 |             save_path = os.path.join(save_dir, f"{part}.npy")
45 |             np.save(save_path, part_feat)
46 |     
47 | if __name__ == '__main__':
48 |     pwd = os.path.abspath(__file__)
49 |     pwd = os.path.dirname(pwd)
50 |     config_path = os.path.join(pwd, '../', 'data/config', 'IEMOCAP_config.json')
51 |     config = json.load(open(config_path))
52 |     migrate_comparE_to_npy(config)
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/IEMOCAP/statis_comparE.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import h5py
 3 | import json
 4 | 
 5 | def statis_comparE(config):
 6 |     path = os.path.join(config['feature_root'], 'A', 'comparE.h5')
 7 |     h5f = h5py.File(path, 'r')
 8 |     lengths = []
 9 |     for utt_id in h5f.keys():
10 |         lengths.append(h5f[utt_id][()].shape[0])
11 |     lengths = sorted(lengths)
12 |     print('MIN:', min(lengths))
13 |     print('MAX:', max(lengths))
14 |     print('MEAN: {:.2f}'.format(sum(lengths) / len(lengths)))
15 |     print('50%:', lengths[len(lengths)//2])
16 |     print('75%:', lengths[int(len(lengths)*0.75)])
17 |     print('90%:', lengths[int(len(lengths)*0.9)])
18 | 
19 | if __name__ == '__main__':
20 |     pwd = os.path.abspath(__file__)
21 |     pwd = os.path.dirname(pwd)
22 |     config_path = os.path.join(pwd, '../', 'data/config', 'IEMOCAP_config.json')
23 |     config = json.load(open(config_path))
24 |     statis_comparE(config)


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/MSP/make_aligned_info.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | from tqdm import tqdm
 5 | import re
 6 | import string
 7 | 
 8 | def get_trn_val_tst(target_root_dir, cv, setname):
 9 |     int2name = np.load(os.path.join(target_root_dir, str(cv), '{}_int2name.npy'.format(setname)))
10 |     int2label = np.load(os.path.join(target_root_dir, str(cv), '{}_label.npy'.format(setname)))
11 |     assert len(int2name) == len(int2label)
12 |     return int2name, int2label
13 | 
14 | 
15 | def get_all_utt_id(config):
16 |     trn_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'trn')
17 |     val_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'val')
18 |     tst_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'tst')
19 |     trn_int2name = trn_int2name.tolist()
20 |     val_int2name = val_int2name.tolist()
21 |     tst_int2name = tst_int2name.tolist()
22 |     all_utt_ids = trn_int2name + val_int2name + tst_int2name
23 |     return all_utt_ids
24 | 
25 | def align_script(wav, config, out):
26 |     _cmd = 'python /data6/p2fa-vislab/align.py {} {} {} '.format(wav, config, out) # >/dev/null 2>&1
27 |     os.system(_cmd)
28 | 
29 | def clean(text):
30 |     punc = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~'
31 |     text = re.sub(r"[%s]+" %punc, " ",text)
32 |     text.replace('  ', ' ')
33 |     return text
34 | 
35 | def make_aligned_info(config):
36 |     save_dir = os.path.join(config['feature_root'], 'aligned', 'word_aligned_info')
37 |     tmp_dir = os.path.join(config['feature_root'], 'aligned', 'tmp')
38 |     if not os.path.exists(save_dir):
39 |         os.mkdir(save_dir)
40 |     if not os.path.exists(tmp_dir):
41 |         os.mkdir(tmp_dir)
42 |     transcript_dir = os.path.join(config['data_root'], 'All_human_transcriptions')
43 |     all_utt_ids = get_all_utt_id(config)
44 |     for utt_id in tqdm(all_utt_ids):
45 |         align_save_path = os.path.join(save_dir, utt_id + '.json')
46 |         if os.path.exists(align_save_path):
47 |             continue
48 |         wav_dir = os.path.join(config['feature_root'], 'audio_11025')
49 |         wav_path = os.path.join(wav_dir, '{}.wav'.format(utt_id))
50 |         transcript_path = os.path.join(transcript_dir, utt_id + '.txt')
51 |         transcript = open(transcript_path).read().strip()
52 |         transcript = clean(transcript)
53 |         print('"' + transcript + '"')
54 |         tmp_path = os.path.join(tmp_dir, utt_id + '.json')
55 |         tmp_data = [{
56 |             "speaker": "Steve",
57 |             "line": transcript,
58 |         }]
59 |         json.dump(tmp_data, open(tmp_path, 'w'))
60 |         align_script(wav_path, tmp_path, align_save_path)
61 | 
62 | def convert_sr(config):
63 |     sampled_audio_dir = os.path.join(config['feature_root'], 'audio_11025')
64 |     if not os.path.exists(sampled_audio_dir):
65 |         os.mkdir(sampled_audio_dir)
66 |     all_utt_ids = get_all_utt_id(config)
67 |     for utt_id in tqdm(all_utt_ids):
68 |         ses_id = int(utt_id.split('-')[3][-1])
69 |         dialog_id = utt_id.split('-')[2]
70 |         wav_path = os.path.join(config['data_root'], 'Audio', \
71 |             'session{}'.format(ses_id), dialog_id, 'S', '{}.wav'.format(utt_id))
72 |         cmd = 'sox {} -r 11025 {}'
73 |         new_audio_path = os.path.join(sampled_audio_dir, utt_id + '.wav')
74 |         os.system(cmd.format(wav_path, new_audio_path))
75 | 
76 | if __name__ == '__main__':
77 |     pwd = os.path.abspath(__file__)
78 |     pwd = os.path.dirname(pwd)
79 |     config_path = os.path.join(pwd, '../../', 'data/config', 'MSP_config.json')
80 |     config = json.load(open(config_path))
81 |     make_aligned_info(config)
82 |     # convert_sr(config)


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/MSP/make_comparE.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import h5py
  3 | import json
  4 | import numpy as np
  5 | import pandas as pd
  6 | import scipy.signal as spsig
  7 | from torch.nn.functional import normalize
  8 | from tqdm import tqdm
  9 | 
 10 | 
 11 | class ComParEExtractor(object):
 12 |     ''' 抽取comparE特征, 输入音频路径, 输出npy数组, 每帧130d
 13 |     '''
 14 |     def __init__(self, opensmile_tool_dir=None, downsample=10, tmp_dir='.tmp', no_tmp=False):
 15 |         ''' Extract ComparE feature
 16 |             tmp_dir: where to save opensmile csv file
 17 |             no_tmp: if true, delete tmp file
 18 |         '''
 19 |         if not os.path.exists(tmp_dir):
 20 |             os.makedirs(tmp_dir)
 21 |         if opensmile_tool_dir is None:
 22 |             opensmile_tool_dir = '/root/opensmile-2.3.0/'
 23 |         self.opensmile_tool_dir = opensmile_tool_dir
 24 |         self.tmp_dir = tmp_dir
 25 |         self.downsample = downsample
 26 |         self.no_tmp = no_tmp
 27 |     
 28 |     def __call__(self, wav):
 29 |         basename = os.path.basename(wav).split('.')[0]
 30 |         save_path = os.path.join(self.tmp_dir, basename+".csv")
 31 |         cmd = 'SMILExtract -C {}/config/ComParE_2016.conf \
 32 |             -appendcsvlld 0 -timestampcsvlld 1 -headercsvlld 1 \
 33 |             -I {} -lldcsvoutput {} -instname xx -O ? -noconsoleoutput 1'
 34 |         os.system(cmd.format(self.opensmile_tool_dir, wav, save_path))
 35 |         
 36 |         df = pd.read_csv(save_path, delimiter=';')
 37 |         wav_data = df.iloc[:, 2:]
 38 |         if len(wav_data) > self.downsample:
 39 |             wav_data = spsig.resample_poly(wav_data, up=1, down=self.downsample, axis=0)
 40 |             if self.no_tmp:
 41 |                 os.remove(save_path) 
 42 |         else:
 43 |             wav_data = None
 44 |             self.print(f'Error in {wav}, no feature extracted')
 45 | 
 46 |         return wav_data
 47 | 
 48 | 
 49 | def get_trn_val_tst(target_root_dir, cv, setname):
 50 |     int2name = np.load(os.path.join(target_root_dir, str(cv), '{}_int2name.npy'.format(setname)))
 51 |     int2label = np.load(os.path.join(target_root_dir, str(cv), '{}_label.npy'.format(setname)))
 52 |     assert len(int2name) == len(int2label)
 53 |     return int2name, int2label
 54 | 
 55 | def padding_to_fixlen(feat, max_len):
 56 |     assert feat.ndim == 2
 57 |     if feat.shape[0] >= max_len:
 58 |         feat = feat[:max_len]
 59 |     else:
 60 |         feat = np.concatenate([feat, \
 61 |             np.zeros((max_len-feat.shape[0], feat.shape[1]))], axis=0)
 62 |     return feat
 63 | 
 64 | def make_all_comparE(config):
 65 |     max_len = 50
 66 |     extractor = ComParEExtractor()
 67 |     trn_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'trn')
 68 |     val_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'val')
 69 |     tst_int2name, _ = get_trn_val_tst(config['target_root'], 1, 'tst')
 70 |     trn_int2name = trn_int2name.tolist()
 71 |     val_int2name = val_int2name.tolist()
 72 |     tst_int2name = tst_int2name.tolist()
 73 |     all_utt_ids = trn_int2name + val_int2name + tst_int2name
 74 |     all_feat = {}
 75 |     for utt_id in tqdm(all_utt_ids): # MSP-IMPROV-S01A-F01-S-FM01
 76 |         ses_id = int(utt_id.split('-')[3][-1])
 77 |         dialog_id = utt_id.split('-')[2]
 78 |         wav_path = os.path.join(config['data_root'], 'Audio', f'session{ses_id}', dialog_id, 'S', f'{utt_id}.wav')
 79 |         feat = extractor(wav_path)
 80 |         all_feat[utt_id] = padding_to_fixlen(feat, max_len)
 81 |     
 82 |     for cv in range(1, config['total_cv']+1):
 83 |         save_dir = os.path.join(config['feature_root'], 'A', str(cv))
 84 |         if not os.path.exists(save_dir):
 85 |             os.makedirs(save_dir)
 86 |         for set_name in ['trn', 'val', 'tst']:
 87 |             int2name, _ = get_trn_val_tst(config['target_root'], cv, set_name)
 88 |             cv_feats = []
 89 |             for utt_id in int2name:
 90 |                 cv_feats.append(all_feat[utt_id])
 91 |             cv_feats = np.array(cv_feats)
 92 |             cv_feats = normalize(cv_feats)
 93 |             save_path = os.path.join(save_dir, set_name + '.npy')
 94 |             print(f'fold:{cv} {set_name} {cv_feats.shape}')
 95 |             np.save(save_path, cv_feats)
 96 | 
 97 | def normalize(feats):
 98 |     _feats = feats.reshape(-1, feats.shape[2])
 99 |     mean = np.mean(_feats, axis=0)
100 |     std = np.std(_feats, axis=0)
101 |     std[std == 0.0] = 1.0
102 |     ret = (feats-mean) / (std)
103 |     return ret
104 |     
105 | if __name__ == '__main__':
106 |     pwd = os.path.abspath(__file__)
107 |     pwd = os.path.dirname(pwd)
108 |     config_path = os.path.join(pwd, '../../', 'data/config', 'MSP_config.json')
109 |     config = json.load(open(config_path))
110 |     make_all_comparE(config)


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/debug.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | def show_wdseg(utt_id):
 5 |     root = '/data3/lrc/IEMOCAP_full_release'
 6 |     word_info_dir = os.path.join(root, 'Session{}/sentences/ForcedAlignment/{}')
 7 |     session_id = int(utt_id[4])
 8 |     dialog_id = '_'.join(utt_id.split('_')[:-1])
 9 |     word_info_path = os.path.join(word_info_dir.format(session_id, dialog_id), utt_id + '.wdseg')
10 |     print(f'{utt_id} wdset info:')
11 |     print(open(word_info_path, 'r').read())
12 | 
13 | def show_sentence(utt_id):
14 |     root = '/data3/lrc/IEMOCAP_full_release'
15 |     sentence_dir = os.path.join(root, 'Session{}/dialog/transcriptions/{}.txt')
16 |     session_id = int(utt_id[4])
17 |     dialog_id = '_'.join(utt_id.split('_')[:-1])
18 |     transcript_path = sentence_dir.format(session_id, dialog_id)
19 |     print(f'{utt_id} transcripts:')
20 |     for line in open(transcript_path).readlines():
21 |         if line.startswith(utt_id):
22 |             print(line)
23 |             break
24 | 
25 | if __name__ == '__main__':
26 |     utt_id = sys.argv[1]
27 |     show_wdseg(utt_id)
28 |     show_sentence(utt_id)
29 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/bert_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from transformers import BertTokenizer, BertModel
 4 | 
 5 | class BertExtractor(object):
 6 |     def __init__(self, cuda=False, cuda_num=None):
 7 |         self.tokenizer = BertTokenizer.from_pretrained('/data2/lrc/bert_cache/pytorch')
 8 |         self.model = BertModel.from_pretrained('/data2/lrc/bert_cache/pytorch')
 9 |         self.model.eval()
10 | 
11 |         if cuda:
12 |             self.cuda = True
13 |             self.cuda_num = cuda_num
14 |             self.model = self.model.cuda(self.cuda_num)
15 |         else:
16 |             self.cuda = False
17 |         
18 |     def tokenize(self, word_lst):
19 |         word_lst = ['[CLS]'] + word_lst + ['[SEP]']
20 |         word_idx = []
21 |         ids = []
22 |         for idx, word in enumerate(word_lst):
23 |             ws = self.tokenizer.tokenize(word)
24 |             if not ws:
25 |                 # some special char
26 |                 continue
27 |             token_ids = self.tokenizer.convert_tokens_to_ids(ws)
28 |             ids.extend(token_ids)
29 |             if word not in ['[CLS]', '[SEP]']:
30 |                 word_idx += [idx-1] * len(token_ids)
31 |         return ids, word_idx
32 |     
33 |     def get_embd(self, token_ids):
34 |         # token_ids = torch.tensor(token_ids)
35 |         # print('TOKENIZER:', [self.tokenizer._convert_id_to_token(_id) for _id in token_ids])
36 |         token_ids = torch.tensor(token_ids).unsqueeze(0)
37 |         if self.cuda:
38 |             token_ids = token_ids.to(self.cuda_num)
39 |             
40 |         with torch.no_grad():
41 |             outputs = self.model(token_ids)
42 |             
43 |             # last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple
44 |         
45 |         sequence_output = outputs[0]
46 |         pooled_output = outputs[1]
47 |         return sequence_output, pooled_output
48 | 
49 |     def extract(self, text):
50 |         input_ids = torch.tensor(self.tokenizer.encode(text)).unsqueeze(0)  # Batch size 1
51 |         if self.cuda:
52 |             input_ids = input_ids.cuda(self.cuda_num)
53 | 
54 |         with torch.no_grad():
55 |             outputs = self.model(input_ids)
56 |             
57 |             # last_hidden_states = outputs[0]  # The last hidden-state is the first element of the output tuple
58 |         
59 |         sequence_output = outputs[0]
60 |         pooled_output = outputs[1]
61 |         return sequence_output, pooled_output


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/densenet_train.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | from __future__ import division
 3 | 
 4 | import tensorflow as tf
 5 | import framework.model.trntst
 6 | 
 7 | 
 8 | def TrnTst(framework.model.trntst.TrnTst):
 9 | 
10 |   def _construct_feed_dict_in_trn(self, data):
11 |     raise NotImplementedError("""please customize construct_feed_dict_in_trn""")
12 | 
13 |   # return loss value  
14 |   def feed_data_and_run_loss_op_in_val(self, data, sess):
15 |     raise NotImplementedError("""please customize feed_data_and_run_loss_op_in_val""")
16 | 
17 |   # add eval result to metrics dictionary, key is metric name, val is metric value
18 |   def predict_and_eval_in_val(self, sess, tst_reader, metrics):
19 |     raise NotImplementedError("""please customize predict_and_eval_in_val""")
20 | 
21 |   # write predict result to predict_file
22 |   def predict_in_tst(self, sess, tst_reader, predict_file):
23 |     raise NotImplementedError("""please customize predict_in_tst""")
24 | 
25 |   def _iterate_epoch(self, sess, trn_reader, tst_reader, 
26 |     summarywriter, step, total_step, epoch):
27 |     
28 |     trn_batch_size = self.model_cfg.trn_batch_size
29 |     avg_trn_loss = 0.
30 |     batches_per_epoch = 0
31 | 
32 |     for data in trn_reader.yield_trn_batch(trn_batch_size):
33 |       if self.model_cfg.monitor_iter > 0 and step % self.model_cfg.monitor_iter == 0:
34 |         self.feed_data_and_monitor(data, sess, step)
35 | 
36 |       loss_value = self.feed_data_and_trn(data, sess, summarywriter=summarywriter, step=step)
37 |       # print('step', step, 'loss', loss_value)
38 |       avg_trn_loss += loss_value
39 |       batches_per_epoch += 1
40 |       
41 |       step += 1
42 | 
43 |       if self.model_cfg.summary_iter > 0 and step % self.model_cfg.summary_iter == 0:
44 |         summarystr = self.feed_data_and_summary(data, sess)
45 |         summarywriter.add_summary(summarystr, step)
46 | 
47 |       if self.model_cfg.val_iter > 0 and step % self.model_cfg.val_iter == 0:
48 |         metrics = self._validation(sess, tst_reader)
49 |         metrics_str = 'step (%d/%d) '%(step, total_step)
50 |         for key in metrics:
51 |           metrics_str += '%s:%.4f '%(key, metrics[key])
52 |         self._logger.info(metrics_str)
53 | 
54 |     self.model.saver.save(
55 |       sess, os.path.join(self.path_cfg.model_dir, 'epoch'), global_step=epoch)
56 | 
57 |     avg_trn_loss /= batches_per_epoch
58 |     return step, avg_trn_loss
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/vision_network/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/preprocess/tools/denseface/vision_network/__init__.py


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/vision_network/data_providers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/preprocess/tools/denseface/vision_network/data_providers/__init__.py


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/vision_network/data_providers/base_provider.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from __future__ import division
  3 | 
  4 | import numpy as np
  5 | 
  6 | 
  7 | class DataSet:
  8 |   """Class to represent some dataset: train, validation, test"""
  9 |   @property
 10 |   def num_examples(self):
 11 |     """Return qtty of examples in dataset"""
 12 |     raise NotImplementedError
 13 | 
 14 |   def next_batch(self, batch_size):
 15 |     """Return batch of required size of data, labels"""
 16 |     raise NotImplementedError
 17 | 
 18 | 
 19 | class ImagesDataSet(DataSet):
 20 |   """Dataset for images that provide some often used methods"""
 21 | 
 22 |   def _measure_mean_and_std(self):
 23 |     # for every channel in image
 24 |     means = []
 25 |     stds = []
 26 |     # for every channel in image(assume this is last dimension)
 27 |     for ch in range(self.images.shape[-1]):
 28 |       means.append(np.mean(self.images[:, :, :, ch]))
 29 |       stds.append(np.std(self.images[:, :, :, ch]))
 30 |     self._means = np.array(means, np.float32)
 31 |     self._stds = np.array(stds, np.float32)
 32 | 
 33 |   @property
 34 |   def images_means(self):
 35 |     if not hasattr(self, '_means'):
 36 |       self._measure_mean_and_std()
 37 |     return self._means
 38 | 
 39 |   @property
 40 |   def images_stds(self):
 41 |     if not hasattr(self, '_stds'):
 42 |       self._measure_mean_and_std()
 43 |     return self._stds
 44 | 
 45 |   def shuffle_images_and_labels(self, images, labels):
 46 |     rand_indexes = np.random.permutation(images.shape[0])
 47 |     shuffled_images = images[rand_indexes]
 48 |     shuffled_labels = labels[rand_indexes]
 49 |     return shuffled_images, shuffled_labels
 50 | 
 51 |   def normalize_images(self, images, normalization_type):
 52 |     """
 53 |     Args:
 54 |       images: numpy 4D array
 55 |       normalization_type: `str`, available choices:
 56 |         - divide_255
 57 |         - divide_256
 58 |         - by_chanels
 59 |     """
 60 |     if normalization_type == 'divide_255':
 61 |       images = images / 255
 62 |     elif normalization_type == 'divide_256':
 63 |       images = images / 256
 64 |     elif normalization_type == 'by_chanels':
 65 |       images = images.astype('float64')
 66 |       # for every channel in image(assume this is last dimension)
 67 |       for i in range(images.shape[-1]):
 68 |         images[:, :, :, i] = ((images[:, :, :, i] - self.images_means[i]) /
 69 |                      self.images_stds[i])
 70 |     else:
 71 |       raise Exception("Unknown type of normalization")
 72 |     return images
 73 | 
 74 |   def normalize_all_images_by_chanels(self, initial_images):
 75 |     new_images = np.zeros(initial_images.shape)
 76 |     for i in range(initial_images.shape[0]):
 77 |       new_images[i] = self.normalize_image_by_chanel(initial_images[i])
 78 |     return new_images
 79 | 
 80 |   def normalize_image_by_chanel(self, image):
 81 |     new_image = np.zeros(image.shape)
 82 |     for chanel in range(3):
 83 |       mean = np.mean(image[:, :, chanel])
 84 |       std = np.std(image[:, :, chanel])
 85 |       new_image[:, :, chanel] = (image[:, :, chanel] - mean) / std
 86 |     return new_image
 87 | 
 88 | 
 89 | class DataProvider:
 90 |   @property
 91 |   def data_shape(self):
 92 |     """Return shape as python list of one data entry"""
 93 |     raise NotImplementedError
 94 | 
 95 |   @property
 96 |   def n_classes(self):
 97 |     """Return `int` of num classes"""
 98 |     raise NotImplementedError
 99 | 
100 |   def labels_to_one_hot(self, labels):
101 |     """Convert 1D array of labels to one hot representation
102 |     
103 |     Args:
104 |       labels: 1D numpy array
105 |     """
106 |     new_labels = np.zeros((labels.shape[0], self.n_classes))
107 |     new_labels[range(labels.shape[0]), labels] = np.ones(labels.shape)
108 |     return new_labels
109 | 
110 |   def labels_from_one_hot(self, labels):
111 |     """Convert 2D array of labels to 1D class based representation
112 |     
113 |     Args:
114 |       labels: 2D numpy array
115 |     """
116 |     return np.argmax(labels, axis=1)
117 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/vision_network/data_providers/downloader.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import urllib
 4 | import tarfile
 5 | import zipfile
 6 | 
 7 | 
 8 | def report_download_progress(count, block_size, total_size):
 9 |   pct_complete = float(count * block_size) / total_size
10 |   msg = "\r {0:.1%} already downloaded".format(pct_complete)
11 |   sys.stdout.write(msg)
12 |   sys.stdout.flush()
13 | 
14 | 
15 | def download_data_url(url, download_dir):
16 |   filename = url.split('/')[-1]
17 |   file_path = os.path.join(download_dir, filename)
18 | 
19 |   if not os.path.exists(file_path):
20 |     os.makedirs(download_dir)
21 | 
22 |     print("Download %s to %s" % (url, file_path))
23 |     file_path, _ = urllib.urlretrieve(
24 |       url=url,
25 |       filename=file_path,
26 |       reporthook=report_download_progress)
27 | 
28 |     print("\nExtracting files")
29 |     if file_path.endswith(".zip"):
30 |       zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir)
31 |     elif file_path.endswith((".tar.gz", ".tgz")):
32 |       tarfile.open(name=file_path, mode="r:gz").extractall(download_dir)
33 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/vision_network/data_providers/utils.py:
--------------------------------------------------------------------------------
 1 | from .cifar import Cifar10DataProvider, Cifar100DataProvider, \
 2 | 	Cifar10AugmentedDataProvider, Cifar100AugmentedDataProvider
 3 | from .svhn import SVHNDataProvider
 4 | from .fer import FERPlusDataProvider, AVECDataProvider, MUSEDataProvider, VGGFACE2DataProvieder
 5 | 
 6 | 
 7 | def get_data_provider_by_name(name, data_dir, train_params):
 8 | 	"""Return required data provider class"""
 9 | 	if name == 'C10':
10 | 		return Cifar10DataProvider(save_path=data_dir, **train_params)
11 | 	if name == 'C10+':
12 | 		return Cifar10AugmentedDataProvider(save_path=data_dir, **train_params)
13 | 	if name == 'C100':
14 | 		return Cifar100DataProvider(save_path=data_dir, **train_params)
15 | 	if name == 'C100+':
16 | 		return Cifar100AugmentedDataProvider(save_path=data_dir, **train_params)
17 | 	if name == 'SVHN':
18 | 		return SVHNDataProvider(**train_params)
19 | 	if name == 'FER+':
20 | 		return FERPlusDataProvider(data_dir, **train_params)
21 | 	if name == 'AVEC':
22 | 		return AVECDataProvider(data_dir, **train_params)
23 | 	if name == 'MUSE':
24 | 		return MUSEDataProvider(data_dir, **train_params)
25 | 	if name == 'VGGFACE2':
26 | 		return VGGFACE2DataProvieder(data_dir, **train_params)
27 | 	else:
28 | 		print("Sorry, data provider for `%s` dataset "
29 | 			  "was not implemented yet" % name)
30 | 		exit()
31 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/vision_network/denseface_feature.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from __future__ import division
  3 | 
  4 | import os
  5 | import argparse
  6 | import cv2
  7 | import collections
  8 | import numpy as np
  9 | 
 10 | from models.dense_net import DenseNet
 11 | 
 12 | img_size = 64
 13 | 
 14 | # # FER+ MODEL
 15 | # images_mean = 129
 16 | # images_std = 63.58
 17 | 
 18 | # FER+-MEC finetune MODEL
 19 | images_mean = 106
 20 | images_std = 58
 21 | 
 22 | def parse_opts():
 23 |   parser = argparse.ArgumentParser()
 24 |   parser.add_argument(
 25 |     '--model_type', '-m', type=str, choices=['DenseNet', 'DenseNet-BC'],
 26 |     default='DenseNet',
 27 |     help='What type of model to use')
 28 |   parser.add_argument(
 29 |     '--growth_rate', '-k', type=int, choices=[12, 24, 40],
 30 |     default=12,
 31 |     help='Grows rate for every layer, '
 32 |        'choices were restricted to used in paper')
 33 |   parser.add_argument(
 34 |     '--depth', '-d', type=int, choices=[40, 100, 190, 250],
 35 |     default=40,
 36 |     help='Depth of whole network, restricted to paper choices')
 37 |   parser.add_argument(
 38 |     '--total_blocks', '-tb', type=int, default=3, metavar='',
 39 |     help='Total blocks of layers stack (default: %(default)s)')
 40 |   parser.add_argument(
 41 |     '--reduction', '-red', type=float, default=0.5, metavar='',
 42 |     help='reduction Theta at transition layer for DenseNets-BC models')
 43 |   parser.add_argument('--batch_size', dest='batch_size', type=int,
 44 |     default=32)
 45 | 
 46 |   parser.add_argument('--face_dir', dest='face_dir', help='face dir')
 47 |   parser.add_argument('--outft_dir', dest='outft_dir')
 48 |   parser.add_argument('--model_path', dest='model_path')
 49 | 
 50 |   args = parser.parse_args()
 51 | 
 52 |   args.keep_prob = 1.0
 53 |   if args.model_type == 'DenseNet':
 54 |     args.bc_mode = False
 55 |     args.reduction = 1.0
 56 |   elif args.model_type == 'DenseNet-BC':
 57 |     args.bc_mode = True
 58 | 
 59 |   return args
 60 | 
 61 | 
 62 | def extract_feature_batch():
 63 |   
 64 |   args = parse_opts()
 65 |   model_params = vars(args)
 66 |   batch_size = args.batch_size
 67 | 
 68 |   print("Initialize the model..")
 69 |   # fake data_provider
 70 |   DataProvider = collections.namedtuple('DataProvider', ['data_shape', 'n_classes'])
 71 |   data_provider = DataProvider(data_shape=(img_size, img_size, 1), n_classes=10)
 72 |   model = DenseNet(data_provider=data_provider, **model_params)
 73 |   end_points = model.end_points
 74 |   # for key, value in end_points.iteritems():
 75 |   #   print(key, value.get_shape().as_list())
 76 |   # restore model
 77 |   model.saver.restore(model.sess, args.model_path)
 78 |   print("Successfully load model from model path: %s" % args.model_path)
 79 | 
 80 |   video_names = [x for x in os.listdir(args.face_dir)]
 81 |   video_names.sort()
 82 |   avg_num_imgs = 0
 83 | 
 84 |   for vid, video_name in enumerate(video_names):
 85 |     video_dir = os.path.join(args.face_dir, video_name)
 86 |     img_paths = os.listdir(video_dir)
 87 |     if len(img_paths) == 0:
 88 |       continue
 89 | 
 90 |     output_subdir = os.path.join(args.outft_dir, video_name)
 91 |     if os.path.exists(output_subdir):
 92 |       continue
 93 |     else:
 94 |       os.makedirs(output_subdir)
 95 | 
 96 |     img_paths.sort(key=lambda x:int(x.split('.')[0]))
 97 |     avg_num_imgs += len(img_paths)
 98 | 
 99 |     imgs = []
100 |     for img_path in img_paths:
101 |       img_path = os.path.join(video_dir, img_path)
102 |       img = cv2.imread(img_path)
103 |       img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
104 |       img = cv2.resize(img, (img_size, img_size))
105 |       imgs.append(img)
106 | 
107 |     imgs = (np.array(imgs, np.float32) - images_mean) / images_std
108 |     imgs = np.expand_dims(imgs, 3)
109 |     # pool4.shape=(batch_size, 4, 4, 256)
110 |     # fc5.shape=fc6.shape=(batch_size, 1, 1, 512)
111 |     # prob.shape=(batch_size, num_classes)
112 |     fcs, probs = [], []
113 |     for i in xrange(0, imgs.shape[0], batch_size):
114 |       feed_dict = {
115 |         model.images: imgs[i: i + batch_size],
116 |         model.is_training: False
117 |       }
118 |       fc, prob = model.sess.run(
119 |         [end_points['fc'], end_points['preds']],
120 |         feed_dict=feed_dict)
121 |       # prev_last_pools.extend(prev_last_pool)
122 |       fcs.extend(fc)
123 |       probs.extend(prob)
124 | 
125 |     # prev_last_pools = np.array(prev_last_pools, np.float32)
126 |     fcs = np.array(fcs, np.float32)
127 |     probs = np.array(probs, np.float32)
128 | 
129 |     
130 |     # with open(os.path.join(output_subdir, 'pool.npy'), 'wb') as f:
131 |     #   np.save(f, prev_last_pools)
132 |     with open(os.path.join(output_subdir, 'fc.npy'), 'wb') as f:
133 |       np.save(f, fcs)
134 |     with open(os.path.join(output_subdir, 'prob.npy'), 'wb') as f:
135 |       np.save(f, probs)
136 | 
137 |     print(vid, video_name, len(img_paths), 
138 |       fcs.shape, probs.shape)
139 | 
140 |   avg_num_imgs /= float(len(video_names))
141 |   print('average faces per video', avg_num_imgs)
142 |   
143 | 
144 | if __name__ == '__main__':
145 |   extract_feature_batch()
146 | 
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface/vision_network/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/preprocess/tools/denseface/vision_network/models/__init__.py


--------------------------------------------------------------------------------
/baseline-mmin/preprocess/tools/denseface_extractor.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os, glob
 3 | import cv2
 4 | import numpy as np
 5 | import tensorflow as tf
 6 | import collections
 7 | 
 8 | from preprocess.tools.denseface.vision_network.models.dense_net import DenseNet
 9 | 
10 | class DensefaceExtractor(object):
11 |     def __init__(self, restore_path=None, mean=131.0754, std=47.858177, device=0, smooth=False):
12 |         """ extract densenet feature
13 |             Parameters:
14 |             ------------------------
15 |             model: model class returned by function 'load_model'
16 |         """
17 |         if restore_path is None:
18 |             restore_path = '/data2/zjm/tools/FER_models/denseface/DenseNet-BC_growth-rate12_depth100_FERPlus/model/epoch-200'
19 |         self.model = self.load_model(restore_path)
20 |         self.mean = mean
21 |         self.std = std
22 |         self.previous_img = None        # smooth 的情况下, 如果没有人脸则用上一张人脸填充
23 |         self.previous_img_path = None
24 |         self.smooth = smooth
25 |         self.dim = 342                  # returned feature dim
26 |         self.device = device
27 |     
28 |     def load_model(self, restore_path):
29 |         print("Initialize the model..")
30 |         # fake data_provider
31 |         growth_rate = 12
32 |         img_size = 64
33 |         depth = 100
34 |         total_blocks = 3
35 |         reduction = 0.5
36 |         keep_prob = 1.0
37 |         bc_mode = True
38 |         model_path = restore_path
39 |         dataset = 'FER+'
40 |         num_class = 8
41 | 
42 |         DataProvider = collections.namedtuple('DataProvider', ['data_shape', 'n_classes'])
43 |         data_provider = DataProvider(data_shape=(img_size, img_size, 1), n_classes=num_class)
44 |         model = DenseNet(data_provider=data_provider, growth_rate=growth_rate, depth=depth,
45 |                         total_blocks=total_blocks, keep_prob=keep_prob, reduction=reduction,
46 |                         bc_mode=bc_mode, dataset=dataset)
47 | 
48 |         model.saver.restore(model.sess, model_path)
49 |         print("Successfully load model from model path: {}".format(model_path))
50 |         return model
51 |     
52 |     def __call__(self, img_path):
53 |         if os.path.exists(img_path):
54 |             img = cv2.imread(img_path)
55 |             if not isinstance(img, np.ndarray):
56 |                 print(f'Warning: Error in {img_path}')
57 |                 return None
58 |             
59 |             img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
60 |             img = cv2.resize(img, (64, 64))
61 |             if self.smooth:
62 |                 self.previous_img = img
63 |                 self.previous_img_path = img_path
64 | 
65 |         elif self.smooth and self.previous_img is not None:
66 |             # print('Path {} does not exists. Use previous img: {}'.format(img_path, self.previous_img_path))
67 |             img = self.previous_img
68 |         
69 |         else:
70 |             feat = np.zeros([1, self.dim]) # smooth的话第一张就是黑图的话就直接返回0特征, 不smooth缺图就返回0
71 |             return feat
72 |         
73 |         img = (img - self.mean) / self.std
74 |         img = np.expand_dims(img, -1) # channel = 1
75 |         img = np.expand_dims(img, 0) # batch_size=1
76 |         with tf.device('/gpu:{}'.format(self.device)):
77 |             feed_dict = {
78 |                 self.model.images: img,
79 |                 self.model.is_training: False
80 |             }
81 | 
82 |             # emo index
83 |             # fer_idx_to_class = ['neu', 'hap', 'sur', 'sad', 'ang', 'dis', 'fea', 'con']
84 | 
85 |             ft, soft_label = \
86 |                 self.model.sess.run([self.model.end_points['fc'], 
87 |                                      self.model.end_points['preds']], feed_dict=feed_dict)
88 |         return ft, soft_label


--------------------------------------------------------------------------------
/baseline-mmin/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/utils/__init__.py


--------------------------------------------------------------------------------
/baseline-mmin/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/utils/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/utils/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/baseline-mmin/utils/__pycache__/logger.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/baseline-mmin/utils/__pycache__/logger.cpython-38.pyc


--------------------------------------------------------------------------------
/baseline-mmin/utils/image_pool.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import torch
 3 | 
 4 | 
 5 | class ImagePool():
 6 |     """This class implements an image buffer that stores previously generated images.
 7 | 
 8 |     This buffer enables us to update discriminators using a history of generated images
 9 |     rather than the ones produced by the latest generators.
10 |     """
11 | 
12 |     def __init__(self, pool_size):
13 |         """Initialize the ImagePool class
14 | 
15 |         Parameters:
16 |             pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created
17 |         """
18 |         self.pool_size = pool_size
19 |         if self.pool_size > 0:  # create an empty pool
20 |             self.num_imgs = 0
21 |             self.images = []
22 | 
23 |     def query(self, images):
24 |         """Return an image from the pool.
25 | 
26 |         Parameters:
27 |             images: the latest generated images from the generator
28 | 
29 |         Returns images from the buffer.
30 | 
31 |         By 50/100, the buffer will return input images.
32 |         By 50/100, the buffer will return images previously stored in the buffer,
33 |         and insert the current images to the buffer.
34 |         """
35 |         if self.pool_size == 0:  # if the buffer size is 0, do nothing
36 |             return images
37 |         return_images = []
38 |         for image in images:
39 |             image = torch.unsqueeze(image.data, 0)
40 |             if self.num_imgs < self.pool_size:   # if the buffer is not full; keep inserting current images to the buffer
41 |                 self.num_imgs = self.num_imgs + 1
42 |                 self.images.append(image)
43 |                 return_images.append(image)
44 |             else:
45 |                 p = random.uniform(0, 1)
46 |                 if p > 0.5:  # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer
47 |                     random_id = random.randint(0, self.pool_size - 1)  # randint is inclusive
48 |                     tmp = self.images[random_id].clone()
49 |                     self.images[random_id] = image
50 |                     return_images.append(tmp)
51 |                 else:       # by another 50% chance, the buffer will return the current image
52 |                     return_images.append(image)
53 |         return_images = torch.cat(return_images, 0)   # collect all the images and return
54 |         return return_images
55 | 


--------------------------------------------------------------------------------
/baseline-mmin/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os
 3 | import logging
 4 | import fcntl
 5 | 
 6 | def get_logger(path, suffix):
 7 |     cur_time = time.strftime('%Y-%m-%d-%H.%M.%S',time.localtime(time.time()))
 8 |     logger = logging.getLogger(__name__+cur_time)
 9 |     logger.setLevel(level = logging.INFO)
10 |     handler = logging.FileHandler(os.path.join(path, f"{suffix}_{cur_time}.log"))
11 |     handler.setLevel(logging.INFO)
12 |     formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13 |     handler.setFormatter(formatter)
14 | 
15 |     console = logging.StreamHandler()
16 |     console.setLevel(logging.INFO)
17 | 
18 |     logger.addHandler(handler)
19 |     logger.addHandler(console)
20 |     return logger
21 | 
22 | class ResultRecorder(object):
23 |     def __init__(self, path, total_cv=10):
24 |         self.path = path # ./logs/utt_fusion_AVL_run2/result.tsv
25 |         self.total_cv = total_cv
26 |         if not os.path.exists(self.path):
27 |             f = open(self.path, 'w')
28 |             f.write('acc\tuar\tf1\n')
29 |             f.close()
30 |     
31 |     def is_full(self, content):
32 |         if len(content) < self.total_cv+1:
33 |             return False
34 |         
35 |         for line in content:
36 |             if not len(line.split('\t')) == 3:
37 |                 return False
38 |         return True
39 |     
40 |     def calc_mean(self, content):
41 |         acc = [float(line.split('\t')[0]) for line in content[1:]]
42 |         uar = [float(line.split('\t')[1]) for line in content[1:]]
43 |         f1 = [float(line.split('\t')[2]) for line in content[1:]]
44 |         mean_acc = sum(acc) / len(acc)
45 |         mean_uar = sum(uar) / len(uar)
46 |         mean_f1 = sum(f1) / len(f1)
47 |         return mean_acc, mean_uar, mean_f1
48 | 
49 |     def write_result_to_tsv(self, results, cvNo):
50 |         # 使用fcntl对文件加锁,避免多个不同进程同时操作同一个文件
51 |         f_in = open(self.path)
52 |         fcntl.flock(f_in.fileno(), fcntl.LOCK_EX) # 加锁
53 |         content = f_in.readlines()
54 |         if len(content) < self.total_cv+1:
55 |             content += ['\n'] * (self.total_cv-len(content)+1)
56 |         content[cvNo] = '{:.4f}\t{:.4f}\t{:.4f}\n'.format(results['acc'], results['uar'], results['f1'])
57 |         if self.is_full(content):
58 |             mean_acc, mean_uar, mean_f1 = self.calc_mean(content)
59 |             content.append('{:.4f}\t{:.4f}\t{:.4f}\n'.format(mean_acc, mean_uar, mean_f1))
60 | 
61 |         f_out = open(self.path, 'w')
62 |         f_out.writelines(content)
63 |         f_out.close()
64 |         f_in.close()                              # 释放锁
65 |         
66 | 
67 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # *_*coding:utf-8 *_*
 2 | import os
 3 | import sys
 4 | import socket
 5 | 
 6 | ## gain linux ip
 7 | def get_host_ip():
 8 |     try:
 9 |         s = socket.socket(socket.AF_INET,socket.SOCK_DGRAM)
10 |         s.connect(('10.0.0.1',8080))
11 |         ip= s.getsockname()[0]
12 |     finally:
13 |         s.close()
14 |     return ip
15 | 
16 | ############ For LINUX ##############
17 | # path
18 | DATA_DIR = {
19 | 	'CMUMOSI': '/share/home/lianzheng/gcnet-master/dataset/CMUMOSI',   # for nlpr
20 | 	'CMUMOSEI': '/share/home/lianzheng/gcnet-master/dataset/CMUMOSEI',# for nlpr
21 | 	'IEMOCAPSix': '/share/home/lianzheng/gcnet-master/dataset/IEMOCAP', # for nlpr
22 | 	'IEMOCAPFour': '/share/home/lianzheng/gcnet-master/dataset/IEMOCAP', # for nlpr
23 | }
24 | PATH_TO_RAW_AUDIO = {
25 | 	'CMUMOSI': os.path.join(DATA_DIR['CMUMOSI'], 'subaudio'),
26 | 	'CMUMOSEI': os.path.join(DATA_DIR['CMUMOSEI'], 'subaudio'),
27 | 	'IEMOCAPSix': os.path.join(DATA_DIR['IEMOCAPSix'], 'subaudio'),
28 | 	'IEMOCAPFour': os.path.join(DATA_DIR['IEMOCAPFour'], 'subaudio'),
29 | }
30 | PATH_TO_RAW_FACE = {
31 | 	'CMUMOSI': os.path.join(DATA_DIR['CMUMOSI'], 'openface_face'),
32 | 	'CMUMOSEI': os.path.join(DATA_DIR['CMUMOSEI'], 'openface_face'),
33 | 	'IEMOCAPSix': os.path.join(DATA_DIR['IEMOCAPSix'], 'subvideofaces'), # without openfac
34 | 	'IEMOCAPFour': os.path.join(DATA_DIR['IEMOCAPFour'], 'subvideofaces'),
35 | }
36 | PATH_TO_TRANSCRIPTIONS = {
37 | 	'CMUMOSI': os.path.join(DATA_DIR['CMUMOSI'], 'transcription.csv'),
38 | 	'CMUMOSEI': os.path.join(DATA_DIR['CMUMOSEI'], 'transcription.csv'),
39 | 	'IEMOCAPSix': os.path.join(DATA_DIR['IEMOCAPSix'], 'transcription.csv'),
40 | 	'IEMOCAPFour': os.path.join(DATA_DIR['IEMOCAPFour'], 'transcription.csv'),
41 | }
42 | PATH_TO_FEATURES = {
43 | 	'CMUMOSI': os.path.join(DATA_DIR['CMUMOSI'], 'features'),
44 | 	'CMUMOSEI': os.path.join(DATA_DIR['CMUMOSEI'], 'features'),
45 | 	'IEMOCAPSix': os.path.join(DATA_DIR['IEMOCAPSix'], 'features'),
46 | 	'IEMOCAPFour': os.path.join(DATA_DIR['IEMOCAPFour'], 'features'),
47 | }
48 | PATH_TO_LABEL = {
49 | 	'CMUMOSI': os.path.join(DATA_DIR['CMUMOSI'], 'CMUMOSI_features_raw_2way.pkl'),
50 | 	'CMUMOSEI': os.path.join(DATA_DIR['CMUMOSEI'], 'CMUMOSEI_features_raw_2way.pkl'),
51 | 	'IEMOCAPSix': os.path.join(DATA_DIR['IEMOCAPSix'], 'IEMOCAP_features_raw_6way.pkl'),
52 | 	'IEMOCAPFour': os.path.join(DATA_DIR['IEMOCAPFour'], 'IEMOCAP_features_raw_4way.pkl'),
53 | }
54 | 
55 | # pre-trained models, including supervised and unsupervised
56 | PATH_TO_PRETRAINED_MODELS = '/share/home/lianzheng/tools'
57 | PATH_TO_OPENSMILE = '/share/home/lianzheng/tools/opensmile-2.3.0/'
58 | PATH_TO_FFMPEG = '/share/home/lianzheng/tools/ffmpeg-4.4.1-i686-static/ffmpeg'
59 | 
60 | # dir
61 | SAVED_ROOT = os.path.join('../saved')
62 | DATA_DIR = os.path.join(SAVED_ROOT, 'data')
63 | MODEL_DIR = os.path.join(SAVED_ROOT, 'model')
64 | LOG_DIR = os.path.join(SAVED_ROOT, 'log')
65 | 
66 | 
67 | 
68 | ############ For Windows ##############
69 | DATA_DIR_Win = {
70 | 	'CMUMOSI': 'E:\\Dataset\\CMU-MOSI\\Raw',
71 | 	'CMUMOSEI1': 'E:\\Dataset\\CMU-MOSEI', # extract openface in five subprocess
72 | 	'CMUMOSEI2': 'E:\\Dataset\\CMU-MOSEI', # extract openface in five subprocess
73 | 	'CMUMOSEI3': 'E:\\Dataset\\CMU-MOSEI', # extract openface in five subprocess
74 | 	'CMUMOSEI4': 'E:\\Dataset\\CMU-MOSEI', # extract openface in five subprocess
75 | 	'CMUMOSEI5': 'E:\\Dataset\\CMU-MOSEI', # extract openface in five subprocess
76 | }
77 | 
78 | PATH_TO_RAW_FACE_Win = {
79 | 	'CMUMOSI': os.path.join(DATA_DIR_Win['CMUMOSI'], 'Video\\Segmented'),
80 | 	'CMUMOSEI1': os.path.join(DATA_DIR_Win['CMUMOSEI1'], 'subvideo1'),
81 | 	'CMUMOSEI2': os.path.join(DATA_DIR_Win['CMUMOSEI2'], 'subvideo2'),
82 | 	'CMUMOSEI3': os.path.join(DATA_DIR_Win['CMUMOSEI3'], 'subvideo3'),
83 | 	'CMUMOSEI4': os.path.join(DATA_DIR_Win['CMUMOSEI4'], 'subvideo4'),
84 | 	'CMUMOSEI5': os.path.join(DATA_DIR_Win['CMUMOSEI5'], 'subvideo5'),
85 | }
86 | 
87 | PATH_TO_FEATURES_Win = {
88 | 	'CMUMOSI': os.path.join(DATA_DIR_Win['CMUMOSI'], 'features'),
89 | 	'CMUMOSEI1': os.path.join(DATA_DIR_Win['CMUMOSEI1'], 'features'),
90 | 	'CMUMOSEI2': os.path.join(DATA_DIR_Win['CMUMOSEI2'], 'features'),
91 | 	'CMUMOSEI3': os.path.join(DATA_DIR_Win['CMUMOSEI3'], 'features'),
92 | 	'CMUMOSEI4': os.path.join(DATA_DIR_Win['CMUMOSEI4'], 'features'),
93 | 	'CMUMOSEI5': os.path.join(DATA_DIR_Win['CMUMOSEI5'], 'features'),
94 | }
95 | 
96 | PATH_TO_OPENFACE_Win = "H:\\desktop\\Multimedia-Transformer\\gcnet-master\\OpenFace_2.2.0_win_x64\\OpenFace_2.2.0_win_x64"
97 | PATH_TO_FFMPEG_Win = "H:\\desktop\\Multimedia-Transformer\\tools\\ffmpeg-3.4.1-win32-static\\bin\\ffmpeg"
98 | 
99 | 


--------------------------------------------------------------------------------
/dataset/CMUMOSEI/CMUMOSEI_features_raw_2way.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/dataset/CMUMOSEI/CMUMOSEI_features_raw_2way.pkl


--------------------------------------------------------------------------------
/dataset/CMUMOSI/CMUMOSI_features_raw_2way.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/dataset/CMUMOSI/CMUMOSI_features_raw_2way.pkl


--------------------------------------------------------------------------------
/dataset/IEMOCAP/IEMOCAP_features_raw_4way.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/dataset/IEMOCAP/IEMOCAP_features_raw_4way.pkl


--------------------------------------------------------------------------------
/dataset/IEMOCAP/IEMOCAP_features_raw_6way.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/dataset/IEMOCAP/IEMOCAP_features_raw_6way.pkl


--------------------------------------------------------------------------------
/face_detection_yunet_2021sep.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/face_detection_yunet_2021sep.onnx


--------------------------------------------------------------------------------
/feature_extraction/audio/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/__pycache__/feature_extractor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/__pycache__/feature_extractor.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/panns/__pycache__/models.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/panns/__pycache__/models.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/panns/__pycache__/pytorch_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/panns/__pycache__/pytorch_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/panns/evaluate.py:
--------------------------------------------------------------------------------
 1 | from sklearn import metrics
 2 | 
 3 | from pytorch_utils import forward
 4 | 
 5 | 
 6 | class Evaluator(object):
 7 |     def __init__(self, model):
 8 |         """Evaluator.
 9 | 
10 |         Args:
11 |           model: object
12 |         """
13 |         self.model = model
14 |         
15 |     def evaluate(self, data_loader):
16 |         """Forward evaluation data and calculate statistics.
17 | 
18 |         Args:
19 |           data_loader: object
20 | 
21 |         Returns:
22 |           statistics: dict, 
23 |               {'average_precision': (classes_num,), 'auc': (classes_num,)}
24 |         """
25 | 
26 |         # Forward
27 |         output_dict = forward(
28 |             model=self.model, 
29 |             generator=data_loader, 
30 |             return_target=True)
31 | 
32 |         clipwise_output = output_dict['clipwise_output']    # (audios_num, classes_num)
33 |         target = output_dict['target']    # (audios_num, classes_num)
34 | 
35 |         average_precision = metrics.average_precision_score(
36 |             target, clipwise_output, average=None)
37 | 
38 |         auc = metrics.roc_auc_score(target, clipwise_output, average=None)
39 |         
40 |         statistics = {'average_precision': average_precision, 'auc': auc}
41 | 
42 |         return statistics


--------------------------------------------------------------------------------
/feature_extraction/audio/panns/finetune_template.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
  4 | import numpy as np
  5 | import argparse
  6 | import h5py
  7 | import math
  8 | import time
  9 | import logging
 10 | import matplotlib.pyplot as plt
 11 | 
 12 | import torch
 13 | torch.backends.cudnn.benchmark=True
 14 | torch.manual_seed(0)
 15 | import torch.nn as nn
 16 | import torch.nn.functional as F
 17 | import torch.optim as optim
 18 | import torch.utils.data
 19 |  
 20 | from utilities import get_filename
 21 | from models import *
 22 | import config
 23 | 
 24 | 
 25 | class Transfer_Cnn14(nn.Module):
 26 |     def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, 
 27 |         fmax, classes_num, freeze_base):
 28 |         """Classifier for a new task using pretrained Cnn14 as a sub module.
 29 |         """
 30 |         super(Transfer_Cnn14, self).__init__()
 31 |         audioset_classes_num = 527
 32 |         
 33 |         self.base = Cnn14(sample_rate, window_size, hop_size, mel_bins, fmin, 
 34 |             fmax, audioset_classes_num)
 35 | 
 36 |         # Transfer to another task layer
 37 |         self.fc_transfer = nn.Linear(2048, classes_num, bias=True)
 38 | 
 39 |         if freeze_base:
 40 |             # Freeze AudioSet pretrained layers
 41 |             for param in self.base.parameters():
 42 |                 param.requires_grad = False
 43 | 
 44 |         self.init_weights()
 45 | 
 46 |     def init_weights(self):
 47 |         init_layer(self.fc_transfer)
 48 | 
 49 |     def load_from_pretrain(self, pretrained_checkpoint_path):
 50 |         checkpoint = torch.load(pretrained_checkpoint_path)
 51 |         self.base.load_state_dict(checkpoint['model'])
 52 | 
 53 |     def forward(self, input, mixup_lambda=None):
 54 |         """Input: (batch_size, data_length)
 55 |         """
 56 |         output_dict = self.base(input, mixup_lambda)
 57 |         embedding = output_dict['embedding']
 58 | 
 59 |         clipwise_output =  torch.log_softmax(self.fc_transfer(embedding), dim=-1)
 60 |         output_dict['clipwise_output'] = clipwise_output
 61 |  
 62 |         return output_dict
 63 | 
 64 | 
 65 | def train(args):
 66 | 
 67 |     # Arugments & parameters
 68 |     sample_rate = args.sample_rate
 69 |     window_size = args.window_size
 70 |     hop_size = args.hop_size
 71 |     mel_bins = args.mel_bins
 72 |     fmin = args.fmin
 73 |     fmax = args.fmax
 74 |     model_type = args.model_type
 75 |     pretrained_checkpoint_path = args.pretrained_checkpoint_path
 76 |     freeze_base = args.freeze_base
 77 |     device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
 78 | 
 79 |     classes_num = config.classes_num
 80 |     pretrain = True if pretrained_checkpoint_path else False
 81 |     
 82 |     # Model
 83 |     Model = eval(model_type)
 84 |     model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, 
 85 |         classes_num, freeze_base)
 86 | 
 87 |     # Load pretrained model
 88 |     if pretrain:
 89 |         logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path))
 90 |         model.load_from_pretrain(pretrained_checkpoint_path)
 91 | 
 92 |     # Parallel
 93 |     print('GPU number: {}'.format(torch.cuda.device_count()))
 94 |     model = torch.nn.DataParallel(model)
 95 | 
 96 |     if 'cuda' in device:
 97 |         model.to(device)
 98 | 
 99 |     print('Load pretrained model successfully!')
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     parser = argparse.ArgumentParser(description='Example of parser. ')
104 |     subparsers = parser.add_subparsers(dest='mode')
105 | 
106 |     # Train
107 |     parser_train = subparsers.add_parser('train')
108 |     parser_train.add_argument('--sample_rate', type=int, required=True)
109 |     parser_train.add_argument('--window_size', type=int, required=True)
110 |     parser_train.add_argument('--hop_size', type=int, required=True)
111 |     parser_train.add_argument('--mel_bins', type=int, required=True)
112 |     parser_train.add_argument('--fmin', type=int, required=True)
113 |     parser_train.add_argument('--fmax', type=int, required=True) 
114 |     parser_train.add_argument('--model_type', type=str, required=True)
115 |     parser_train.add_argument('--pretrained_checkpoint_path', type=str)
116 |     parser_train.add_argument('--freeze_base', action='store_true', default=False)
117 |     parser_train.add_argument('--cuda', action='store_true', default=False)
118 | 
119 |     # Parse arguments
120 |     args = parser.parse_args()
121 |     args.filename = get_filename(__file__)
122 | 
123 |     if args.mode == 'train':
124 |         train(args)
125 | 
126 |     else:
127 |         raise Exception('Error argument!')


--------------------------------------------------------------------------------
/feature_extraction/audio/panns/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def clip_bce(output_dict, target_dict):
 6 |     """Binary crossentropy loss.
 7 |     """
 8 |     return F.binary_cross_entropy(
 9 |         output_dict['clipwise_output'], target_dict['target'])
10 | 
11 | 
12 | def get_loss_func(loss_type):
13 |     if loss_type == 'clip_bce':
14 |         return clip_bce


--------------------------------------------------------------------------------
/feature_extraction/audio/run.sh:
--------------------------------------------------------------------------------
 1 | python extract_handcrafted_feature.py --dataset='CHEAVD' --feature_extractor='pyAudio' --feature_set='pyAudio' --feature_level='UTTERANCE'
 2 | python extract_handcrafted_feature.py --dataset='CHEAVD' --feature_extractor='opensmile' --feature_set='IS09' --feature_level='UTTERANCE'
 3 | python extract_handcrafted_feature.py --dataset='CHEAVD' --feature_extractor='opensmile' --feature_set='IS10' --feature_level='UTTERANCE'
 4 | python extract_handcrafted_feature.py --dataset='CHEAVD' --feature_extractor='opensmile' --feature_set='IS13' --feature_level='UTTERANCE'
 5 | python extract_handcrafted_feature.py --dataset='CHEAVD' --feature_extractor='opensmile' --feature_set='eGeMAPS' --feature_level='UTTERANCE'
 6 | python extract_handcrafted_feature.py --dataset='CHEAVD' --feature_extractor='Librosa' --feature_set='mel_spec' --feature_level='UTTERANCE'
 7 | python extract_handcrafted_feature.py --dataset='CHEAVD' --feature_extractor='Librosa' --feature_set='mfcc' --feature_level='UTTERANCE'
 8 | python extract_wav2vec_embedding.py --dataset='CHEAVD' --feature_level='UTTERANCE' --gpu=0
 9 | python extract_wav2vec2_embedding.py --dataset='CHEAVD' --model_name='wav2vec2-base' --feature_level='UTTERANCE' --gpu=0
10 | python extract_wav2vec2_embedding.py --dataset='CHEAVD' --model_name='wav2vec2-base-960h' --feature_level='UTTERANCE' --gpu=0
11 | python extract_wav2vec2_embedding.py --dataset='CHEAVD' --model_name='wav2vec2-large-960h' --feature_level='UTTERANCE' --gpu=0
12 | python extract_panns_embedding.py --dataset='CHEAVD' --feature_level='UTTERANCE' --gpu=0
13 | python extract_vggish_embedding.py --dataset='CHEAVD' --feature_level='UTTERANCE' --gpu=0


--------------------------------------------------------------------------------
/feature_extraction/audio/smile.log:
--------------------------------------------------------------------------------
 1 | [ 13.01.2022 - 16:42:06 ]
 2 |     (MSG) [2] in SMILExtract : openSMILE starting!
 3 | [ 13.01.2022 - 16:42:06 ]
 4 |     (MSG) [2] in SMILExtract : config file is: /share/home/lianzheng/tools/opensmile-2.3.0/config/gemaps/eGeMAPSv01a.conf
 5 | [ 13.01.2022 - 16:42:06 ]
 6 |     (MSG) [2] in cComponentManager : successfully registered 96 component types.
 7 | [ 13.01.2022 - 16:42:06 ]
 8 |     (MSG) [2] in instance 'gemapsv01a_logSpectral' : logSpecFloor = -140.00  (specFloor = 1.000000e-14)
 9 | [ 13.01.2022 - 16:42:06 ]
10 |     (MSG) [2] in instance 'egemapsv01a_logSpectral_flux' : logSpecFloor = -140.00  (specFloor = 1.000000e-14)
11 | [ 13.01.2022 - 16:42:06 ]
12 |     (MSG) [2] in instance 'lldsink' : No filename given, disabling this sink component.
13 | [ 13.01.2022 - 16:42:06 ]
14 |     (MSG) [2] in instance 'lldhtksink' : No filename given, disabling this sink component.
15 | [ 13.01.2022 - 16:42:06 ]
16 |     (MSG) [2] in instance 'lldarffsink' : No filename given, disabling this sink component.
17 | [ 13.01.2022 - 16:42:06 ]
18 |     (MSG) [2] in instance 'arffsink' : No filename given, disabling this sink component.
19 | [ 13.01.2022 - 16:42:06 ]
20 |     (MSG) [2] in instance 'htksink' : No filename given, disabling this sink component.
21 | [ 13.01.2022 - 16:42:06 ]
22 |     (WARN) [1] in instance 'gemapsv01a_formantVoiced.reader' : Mismatch in input level buffer sizes (levelconf.nT). Level #0 has size 5 which is smaller than the max. input size of all input levels (150). This might cause the processing to hang unpredictably or cause incomplete processing.
23 | [ 13.01.2022 - 16:42:06 ]
24 |     (WARN) [1] in instance 'gemapsv01a_logSpectralVoiced.reader' : Mismatch in input level buffer sizes (levelconf.nT). Level #0 has size 5 which is smaller than the max. input size of all input levels (150). This might cause the processing to hang unpredictably or cause incomplete processing.
25 | [ 13.01.2022 - 16:42:06 ]
26 |     (WARN) [1] in instance 'gemapsv01a_logSpectralUnvoiced.reader' : Mismatch in input level buffer sizes (levelconf.nT). Level #0 has size 5 which is smaller than the max. input size of all input levels (150). This might cause the processing to hang unpredictably or cause incomplete processing.
27 | [ 13.01.2022 - 16:42:06 ]
28 |     (WARN) [1] in instance 'egemapsv01a_logSpectralVoiced.reader' : Mismatch in input level buffer sizes (levelconf.nT). Level #0 has size 5 which is smaller than the max. input size of all input levels (150). This might cause the processing to hang unpredictably or cause incomplete processing.
29 | [ 13.01.2022 - 16:42:06 ]
30 |     (WARN) [1] in instance 'egemapsv01a_logSpectralUnvoiced.reader' : Mismatch in input level buffer sizes (levelconf.nT). Level #0 has size 5 which is smaller than the max. input size of all input levels (150). This might cause the processing to hang unpredictably or cause incomplete processing.
31 | [ 13.01.2022 - 16:42:06 ]
32 |     (MSG) [2] in cComponentManager : successfully finished createInstances
33 |                                  (77 component instances were finalised, 1 data memories were finalised)
34 | [ 13.01.2022 - 16:42:06 ]
35 |     (MSG) [2] in cComponentManager : starting single thread processing loop
36 | [ 13.01.2022 - 16:42:06 ]
37 |     (MSG) [2] in cComponentManager : Processing finished! System ran for 603 ticks.
38 | 


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/__pycache__/mel_features.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/vggish/__pycache__/mel_features.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/__pycache__/vggish_input.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/vggish/__pycache__/vggish_input.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/__pycache__/vggish_params.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/vggish/__pycache__/vggish_params.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/__pycache__/vggish_slim.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/vggish/__pycache__/vggish_slim.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/vggish_input.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Compute input examples for VGGish from audio waveform."""
 17 | 
 18 | import numpy as np
 19 | import resampy # verison: 0.2.2, pip install resampy
 20 | import math
 21 | from vggish import mel_features
 22 | from vggish import vggish_params
 23 | 
 24 | try:
 25 |   import soundfile as sf
 26 | 
 27 |   def wav_read(wav_file):
 28 |     wav_data, sr = sf.read(wav_file, dtype='int16')
 29 |     return wav_data, sr
 30 | 
 31 | except ImportError:
 32 | 
 33 |   def wav_read(wav_file):
 34 |     raise NotImplementedError('WAV file reading requires soundfile package.')
 35 | 
 36 | 
 37 | def waveform_to_examples(data, sample_rate, hop_sec):
 38 |   """Converts audio waveform into an array of examples for VGGish.
 39 | 
 40 |   Args:
 41 |     data: np.array of either one dimension (mono) or two dimensions
 42 |       (multi-channel, with the outer dimension representing channels).
 43 |       Each sample is generally expected to lie in the range [-1.0, +1.0],
 44 |       although this is not required.
 45 |     sample_rate: Sample rate of data.
 46 | 
 47 |   Returns:
 48 |     3-D np.array of shape [num_examples, num_frames, num_bands] which represents
 49 |     a sequence of examples, each of which contains a patch of log mel
 50 |     spectrogram, covering num_frames frames of audio and num_bands mel frequency
 51 |     bands, where the frame length is vggish_params.STFT_HOP_LENGTH_SECONDS.
 52 |   """
 53 |   # Convert to mono.
 54 |   if len(data.shape) > 1:
 55 |     data = np.mean(data, axis=1)
 56 |   # Resample to the rate assumed by VGGish.
 57 |   if sample_rate != vggish_params.SAMPLE_RATE:
 58 |     data = resampy.resample(data, sample_rate, vggish_params.SAMPLE_RATE)
 59 | 
 60 |   # Compute log mel spectrogram features.
 61 |   log_mel = mel_features.log_mel_spectrogram(
 62 |       data,
 63 |       audio_sample_rate=vggish_params.SAMPLE_RATE,
 64 |       log_offset=vggish_params.LOG_OFFSET,
 65 |       window_length_secs=vggish_params.STFT_WINDOW_LENGTH_SECONDS,
 66 |       hop_length_secs=vggish_params.STFT_HOP_LENGTH_SECONDS,
 67 |       num_mel_bins=vggish_params.NUM_MEL_BINS,
 68 |       lower_edge_hertz=vggish_params.MEL_MIN_HZ,
 69 |       upper_edge_hertz=vggish_params.MEL_MAX_HZ)
 70 | 
 71 |   # Frame features into examples.
 72 |   features_sample_rate = 1.0 / vggish_params.STFT_HOP_LENGTH_SECONDS
 73 |   example_window_length = int(round(
 74 |       vggish_params.EXAMPLE_WINDOW_SECONDS * features_sample_rate))
 75 |   example_hop_length = int(round(
 76 |       hop_sec * features_sample_rate))
 77 |       # vggish_params.EXAMPLE_HOP_SECONDS * features_sample_rate)) # orginal
 78 |   log_mel_examples = mel_features.frame(
 79 |       log_mel,
 80 |       window_length=example_window_length,
 81 |       hop_length=example_hop_length)
 82 |   return log_mel_examples
 83 | 
 84 | 
 85 | def wavfile_to_examples(wav_file, hop_sec):
 86 |   """Convenience wrapper around waveform_to_examples() for a common WAV format.
 87 | 
 88 |   Args:
 89 |     wav_file: String path to a file, or a file-like object. The file
 90 |     is assumed to contain WAV audio data with signed 16-bit PCM samples.
 91 | 
 92 |   Returns:
 93 |     See waveform_to_examples.
 94 |   """
 95 |   wav_data, sr = wav_read(wav_file)
 96 |   assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype
 97 |   samples = wav_data / 32768.0  # Convert to [-1.0, +1.0]
 98 | 
 99 |   ### process for samples < 1000ms, pad to longer than 1000ms
100 |   if len(samples) < sr:
101 |       samples = samples.tolist()
102 |       samples = samples * math.ceil(sr/len(samples))
103 |       samples = np.array(samples)
104 | 
105 |   return waveform_to_examples(samples, sr, hop_sec)
106 | 


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/vggish_params.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Global parameters for the VGGish model.
17 | 
18 | See vggish_slim.py for more information.
19 | """
20 | 
21 | # Architectural constants.
22 | NUM_FRAMES = 96  # Frames in input mel-spectrogram patch.
23 | NUM_BANDS = 64  # Frequency bands in input mel-spectrogram patch.
24 | EMBEDDING_SIZE = 128  # Size of embedding layer.
25 | 
26 | # Hyperparameters used in feature and example generation.
27 | SAMPLE_RATE = 16000
28 | STFT_WINDOW_LENGTH_SECONDS = 0.025
29 | STFT_HOP_LENGTH_SECONDS = 0.010
30 | NUM_MEL_BINS = NUM_BANDS
31 | MEL_MIN_HZ = 125
32 | MEL_MAX_HZ = 7500
33 | LOG_OFFSET = 0.01  # Offset used for stabilized log of input mel-spectrogram.
34 | EXAMPLE_WINDOW_SECONDS = 0.96  # Each example contains 96 10ms frames
35 | # Note: original value for EXAMPLE_HOP_SECONDS is 0.96, i.e. no overlapping between adjacent examples
36 | # EXAMPLE_HOP_SECONDS = 0.25     # with zero overlap.
37 | 
38 | # Parameters used for embedding postprocessing.
39 | PCA_EIGEN_VECTORS_NAME = 'pca_eigen_vectors'
40 | PCA_MEANS_NAME = 'pca_means'
41 | QUANTIZE_MIN_VAL = -2.0
42 | QUANTIZE_MAX_VAL = +2.0
43 | 
44 | # Hyperparameters used in training.
45 | INIT_STDDEV = 0.01  # Standard deviation used to initialize weights.
46 | LEARNING_RATE = 1e-4  # Learning rate for the Adam optimizer.
47 | ADAM_EPSILON = 1e-8  # Epsilon for the Adam optimizer.
48 | 
49 | # Names of ops, tensors, and features.
50 | INPUT_OP_NAME = 'vggish/input_features'
51 | INPUT_TENSOR_NAME = INPUT_OP_NAME + ':0'
52 | OUTPUT_OP_NAME = 'vggish/embedding'
53 | OUTPUT_TENSOR_NAME = OUTPUT_OP_NAME + ':0'
54 | AUDIO_EMBEDDING_FEATURE_NAME = 'audio_embedding'
55 | 


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/vggish_pca_params.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/audio/vggish/vggish_pca_params.npz


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/vggish_postprocess.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Post-process embeddings from VGGish."""
17 | 
18 | import numpy as np
19 | 
20 | import vggish_params
21 | 
22 | 
23 | class Postprocessor(object):
24 |   """Post-processes VGGish embeddings.
25 | 
26 |   The initial release of AudioSet included 128-D VGGish embeddings for each
27 |   segment of AudioSet. These released embeddings were produced by applying
28 |   a PCA transformation (technically, a whitening transform is included as well)
29 |   and 8-bit quantization to the raw embedding output from VGGish, in order to
30 |   stay compatible with the YouTube-8M project which provides visual embeddings
31 |   in the same format for a large set of YouTube videos. This class implements
32 |   the same PCA (with whitening) and quantization transformations.
33 |   """
34 | 
35 |   def __init__(self, pca_params_npz_path):
36 |     """Constructs a postprocessor.
37 | 
38 |     Args:
39 |       pca_params_npz_path: Path to a NumPy-format .npz file that
40 |         contains the PCA parameters used in postprocessing.
41 |     """
42 |     params = np.load(pca_params_npz_path)
43 |     self._pca_matrix = params[vggish_params.PCA_EIGEN_VECTORS_NAME]
44 |     # Load means into a column vector for easier broadcasting later.
45 |     self._pca_means = params[vggish_params.PCA_MEANS_NAME].reshape(-1, 1)
46 |     assert self._pca_matrix.shape == (
47 |         vggish_params.EMBEDDING_SIZE, vggish_params.EMBEDDING_SIZE), (
48 |             'Bad PCA matrix shape: %r' % (self._pca_matrix.shape,))
49 |     assert self._pca_means.shape == (vggish_params.EMBEDDING_SIZE, 1), (
50 |         'Bad PCA means shape: %r' % (self._pca_means.shape,))
51 | 
52 |   def postprocess(self, embeddings_batch):
53 |     """Applies postprocessing to a batch of embeddings.
54 | 
55 |     Args:
56 |       embeddings_batch: An nparray of shape [batch_size, embedding_size]
57 |         containing output from the embedding layer of VGGish.
58 | 
59 |     Returns:
60 |       An nparray of the same shape as the input but of type uint8,
61 |       containing the PCA-transformed and quantized version of the input.
62 |     """
63 |     assert len(embeddings_batch.shape) == 2, (
64 |         'Expected 2-d batch, got %r' % (embeddings_batch.shape,))
65 |     assert embeddings_batch.shape[1] == vggish_params.EMBEDDING_SIZE, (
66 |         'Bad batch shape: %r' % (embeddings_batch.shape,))
67 | 
68 |     # Apply PCA.
69 |     # - Embeddings come in as [batch_size, embedding_size].
70 |     # - Transpose to [embedding_size, batch_size].
71 |     # - Subtract pca_means column vector from each column.
72 |     # - Premultiply by PCA matrix of shape [output_dims, input_dims]
73 |     #   where both are are equal to embedding_size in our case.
74 |     # - Transpose result back to [batch_size, embedding_size].
75 |     pca_applied = np.dot(self._pca_matrix,
76 |                          (embeddings_batch.T - self._pca_means)).T
77 | 
78 |     # Quantize by:
79 |     # - clipping to [min, max] range
80 |     clipped_embeddings = np.clip(
81 |         pca_applied, vggish_params.QUANTIZE_MIN_VAL,
82 |         vggish_params.QUANTIZE_MAX_VAL)
83 |     # - convert to 8-bit in range [0.0, 255.0]
84 |     quantized_embeddings = (
85 |         (clipped_embeddings - vggish_params.QUANTIZE_MIN_VAL) *
86 |         (255.0 /
87 |          (vggish_params.QUANTIZE_MAX_VAL - vggish_params.QUANTIZE_MIN_VAL)))
88 |     # - cast 8-bit float to uint8
89 |     quantized_embeddings = quantized_embeddings.astype(np.uint8)
90 | 
91 |     return quantized_embeddings
92 | 


--------------------------------------------------------------------------------
/feature_extraction/audio/vggish/vggish_smoke_test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """A smoke test for VGGish.
 17 | 
 18 | This is a simple smoke test of a local install of VGGish and its associated
 19 | downloaded files. We create a synthetic sound, extract log mel spectrogram
 20 | features, run them through VGGish, post-process the embedding ouputs, and
 21 | check some simple statistics of the results, allowing for variations that
 22 | might occur due to platform/version differences in the libraries we use.
 23 | 
 24 | Usage:
 25 | - Download the VGGish checkpoint and PCA parameters into the same directory as
 26 |   the VGGish source code. If you keep them elsewhere, update the checkpoint_path
 27 |   and pca_params_path variables below.
 28 | - Run:
 29 |   $ python vggish_smoke_test.py
 30 | """
 31 | 
 32 | from __future__ import print_function
 33 | 
 34 | import numpy as np
 35 | import tensorflow.compat.v1 as tf
 36 | import os
 37 | os.environ['CUDA_VISIBLE_DEVICES'] = '6'
 38 | tf.disable_v2_behavior()
 39 | 
 40 | import vggish_input
 41 | import vggish_params
 42 | import vggish_postprocess
 43 | import vggish_slim
 44 | 
 45 | print('\nTesting your install of VGGish\n')
 46 | 
 47 | # Paths to downloaded VGGish files.
 48 | checkpoint_path = 'vggish_model.ckpt'
 49 | pca_params_path = 'vggish_pca_params.npz'
 50 | 
 51 | # Relative tolerance of errors in mean and standard deviation of embeddings.
 52 | rel_error = 0.1  # Up to 10%
 53 | 
 54 | # Generate a 1 kHz sine wave at 44.1 kHz (we use a high sampling rate
 55 | # to test resampling to 16 kHz during feature extraction).
 56 | num_secs = 3
 57 | freq = 1000
 58 | sr = 44100
 59 | t = np.linspace(0, num_secs, int(num_secs * sr))
 60 | x = np.sin(2 * np.pi * freq * t)
 61 | 
 62 | # Produce a batch of log mel spectrogram examples.
 63 | input_batch = vggish_input.waveform_to_examples(x, sr)
 64 | print('Log Mel Spectrogram example: ', input_batch[0])
 65 | np.testing.assert_equal(
 66 |     input_batch.shape,
 67 |     [num_secs, vggish_params.NUM_FRAMES, vggish_params.NUM_BANDS])
 68 | 
 69 | # Define VGGish, load the checkpoint, and run the batch through the model to
 70 | # produce embeddings.
 71 | with tf.Graph().as_default(), tf.Session() as sess:
 72 |   vggish_slim.define_vggish_slim()
 73 |   vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint_path)
 74 | 
 75 |   features_tensor = sess.graph.get_tensor_by_name(
 76 |       vggish_params.INPUT_TENSOR_NAME)
 77 |   embedding_tensor = sess.graph.get_tensor_by_name(
 78 |       vggish_params.OUTPUT_TENSOR_NAME)
 79 |   [embedding_batch] = sess.run([embedding_tensor],
 80 |                                feed_dict={features_tensor: input_batch})
 81 |   print('VGGish embedding: ', embedding_batch[0])
 82 |   expected_embedding_mean = 0.131
 83 |   expected_embedding_std = 0.238
 84 |   np.testing.assert_allclose(
 85 |       [np.mean(embedding_batch), np.std(embedding_batch)],
 86 |       [expected_embedding_mean, expected_embedding_std],
 87 |       rtol=rel_error)
 88 | 
 89 | # Postprocess the results to produce whitened quantized embeddings.
 90 | pproc = vggish_postprocess.Postprocessor(pca_params_path)
 91 | postprocessed_batch = pproc.postprocess(embedding_batch)
 92 | print('Postprocessed VGGish embedding: ', postprocessed_batch[0])
 93 | expected_postprocessed_mean = 123.0
 94 | expected_postprocessed_std = 75.0
 95 | np.testing.assert_allclose(
 96 |     [np.mean(postprocessed_batch), np.std(postprocessed_batch)],
 97 |     [expected_postprocessed_mean, expected_postprocessed_std],
 98 |     rtol=rel_error)
 99 | 
100 | print('\nLooks Good To Me!\n')
101 | 


--------------------------------------------------------------------------------
/feature_extraction/text/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/text/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/text/util.py:
--------------------------------------------------------------------------------
 1 | # *_*coding:utf-8 *_*
 2 | import os
 3 | import re
 4 | import pandas as pd
 5 | import numpy as np
 6 | import unicodedata
 7 | 
 8 | 
 9 | def write_feature_to_csv(embeddings, timestamps, words, csv_file, log_file=None, embedding_dim=None):
10 |     # get label file
11 |     vid = os.path.basename(os.path.splitext(csv_file)[0])
12 |     # label_dir = os.path.abspath(os.path.join(os.path.dirname(csv_file), '../../label_segments/arousal'))
13 |     # assert os.path.exists(label_dir), f'Error:  label dir "{label_dir}" does not exist!'
14 |     save_dir = os.path.dirname(csv_file)
15 |     task_id = int(re.search('c(\d)_muse_', save_dir).group(1))  # infer the task id from save_dir (naive/unelegant approach)
16 |     if task_id == 2:  # for task "c2"
17 |         rel_path = '../au'  # use csv file in "au" feature as reference beacause of there is no timestamp in the label file
18 |     elif task_id == 4:  # for task "c4"
19 |         rel_path = '../../label_segments/anno12_EDA'  # no arousal label for this task
20 |     else:
21 |         rel_path = '../../label_segments/arousal'
22 |     label_dir = os.path.abspath(os.path.join(save_dir, rel_path))
23 |     assert os.path.exists(label_dir), f'Error:  label dir "{label_dir}" does not exist!'
24 |     label_file = os.path.join(label_dir, f'{vid}.csv')
25 |     df_label = pd.read_csv(label_file)
26 |     meta_columns = ['timestamp', 'segment_id']
27 |     metas = df_label[meta_columns].values
28 |     label_timestamps = metas[:,0]
29 |     # align word, timestamp & embedding
30 |     # embedding_dim = len(embeddings[0]) # use the argument "embedding_dim" instead, in case of embeddings is []
31 |     n_frames = len(label_timestamps)
32 |     aligned_embeddings = np.zeros((n_frames, embedding_dim))
33 |     aligned_timestamps = np.empty((n_frames, 2), dtype=np.object)
34 |     aligned_words = np.empty((n_frames,), dtype=np.object)
35 |     label_timestamp_idxs = np.arange(n_frames)
36 |     hit_count = 0
37 |     for i, (s_t, e_t) in enumerate(timestamps):
38 |         idxs = label_timestamp_idxs[np.where((label_timestamps >= s_t) & (label_timestamps < e_t))]
39 |         if len(idxs) > 0:
40 |             aligned_embeddings[idxs] = embeddings[i]
41 |             aligned_timestamps[idxs] = [int(s_t), int(e_t)]
42 |             aligned_words[idxs] = words[i]
43 |             hit_count += len(idxs)
44 |     print(f'Video "{vid}" hit rate: {hit_count/n_frames:.1%}.')
45 |     # write csv file
46 |     columns = meta_columns + [str(i) for i in range(embedding_dim)]
47 |     data = np.column_stack([metas, aligned_embeddings])
48 |     df = pd.DataFrame(data=data, columns=columns)
49 |     df[meta_columns] = df[meta_columns].astype(np.int64)
50 |     df.to_csv(csv_file, index=False)
51 |     # write log file
52 |     if log_file is not None:
53 |         log_columns = meta_columns + ['start', 'end', 'word']
54 |         log_data = np.column_stack([metas, aligned_timestamps, aligned_words])
55 |         log_df = pd.DataFrame(data=log_data, columns=log_columns)
56 |         log_df[meta_columns] = log_df[meta_columns].astype(np.int64)
57 |         if not os.path.exists(os.path.dirname(log_file)):
58 |             os.makedirs(os.path.dirname(log_file))
59 |         log_df.to_csv(log_file, index=False)
60 |     return data
61 | 
62 | 
63 | 
64 | 
65 | def load_glove(embedding_file):
66 |     embeddings = {}
67 |     with open(embedding_file, 'r') as f:
68 |         for line in f.readlines():
69 |             splited_line = line.split(' ')
70 |             word = splited_line[0]
71 |             embedding = np.array([float(val) for val in splited_line[1:]])  # to numpy
72 |             embeddings[word] = embedding
73 |     embedding_dim = len(embedding)
74 |     return embeddings, embedding_dim
75 | 
76 | 
77 | def load_word2vec(embedding_file):
78 |     import gensim
79 |     model = gensim.models.KeyedVectors.load_word2vec_format(embedding_file, binary=True)
80 |     # embeddings = dict(zip(model.vocab, model.vectors)) # for Gensim 3.x
81 |     embedding_dim = model.vector_size
82 |     return model, embedding_dim
83 | 
84 | 
85 | # strip accent in unicode string
86 | def strip_accent(string):
87 |     return ''.join(
88 |         character for character in unicodedata.normalize('NFD', string)
89 |         if unicodedata.category(character) != 'Mn'
90 |     )
91 | 
92 | 
93 | 
94 | 
95 | if __name__ == '__main__':
96 |     main()


--------------------------------------------------------------------------------
/feature_extraction/visual/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/__pycache__/util.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/dataset.py:
--------------------------------------------------------------------------------
 1 | # *_*coding:utf-8 *_*
 2 | import os
 3 | import glob
 4 | from PIL import Image
 5 | from skimage import io
 6 | import torch.utils.data as data
 7 | 
 8 | 
 9 | class FaceDataset(data.Dataset):
10 |     def __init__(self, vid, face_dir, transform=None):
11 |         super(FaceDataset, self).__init__()
12 |         self.vid = vid
13 |         self.path = os.path.join(face_dir, vid)
14 |         self.transform = transform
15 |         self.frames = self.get_frames()
16 | 
17 |     def get_frames(self):
18 |         frames = glob.glob(os.path.join(self.path, '*'))
19 |         # if len(frames) == 0:
20 |         # raise ValueError("number of frames of video {} should not be zero.".format(self.vid))
21 |         # frames = sorted(frames, key=lambda x: int(os.path.basename(os.path.splitext(x)[0])))
22 |         # frame_ids = [int(os.path.basename(os.path.splitext(file)[0])) for file in frames]
23 | 
24 |         return frames
25 | 
26 |     def __len__(self):
27 |         return len(self.frames)
28 | 
29 |     def __getitem__(self, index):
30 |         path = self.frames[index]
31 |         img = Image.open(path)
32 |         if self.transform is not None:
33 |             img = self.transform(img)
34 |         # fid = int(os.path.basename(os.path.splitext(path)[0]))
35 |         name = os.path.basename(path)[:-4]
36 |         return img, name
37 | 
38 | 
39 | 
40 | class FaceDatasetForEmoNet(data.Dataset):
41 |     def __init__(self, vid, face_dir, transform=None, augmentor=None):
42 |         super(FaceDatasetForEmoNet, self).__init__()
43 |         self.vid = vid
44 |         self.path = os.path.join(face_dir, vid)
45 |         self.augmentor = augmentor
46 |         self.transform = transform
47 |         self.frames = self.get_frames()
48 | 
49 |     def get_frames(self):
50 |         frames = glob.glob(os.path.join(self.path, '*'))
51 |         # frames = sorted(frames, key=lambda x: int(os.path.basename(os.path.splitext(x)[0])))
52 |         return frames
53 | 
54 |     def __len__(self):
55 |         return len(self.frames)
56 | 
57 |     def __getitem__(self, index):
58 |         path = self.frames[index]
59 |         img = io.imread(path)
60 |         if self.augmentor is not None:
61 |             img = self.augmentor(img)[0]
62 |         if self.transform is not None:
63 |             img = self.transform(img)
64 |         # fid = int(os.path.basename(os.path.splitext(path)[0]))
65 |         # return img, fid
66 |         name = os.path.basename(path)[:-4]
67 |         return img, name


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.0'
2 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/emonet/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/__pycache__/data_augmentation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/emonet/__pycache__/data_augmentation.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .affecnet import AffectNet
2 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def ACC(ground_truth, predictions):
 5 |     """Evaluates the mean accuracy
 6 |     """
 7 |     return np.mean(ground_truth.astype(int) == predictions.astype(int))
 8 | 
 9 | def RMSE(ground_truth, predictions):
10 |     """
11 |         Evaluates the RMSE between estimate and ground truth.
12 |     """
13 |     return np.sqrt(np.mean((ground_truth-predictions)**2))
14 | 
15 | 
16 | def SAGR(ground_truth, predictions):
17 |     """
18 |         Evaluates the SAGR between estimate and ground truth.
19 |     """
20 |     return np.mean(np.sign(ground_truth) == np.sign(predictions))
21 | 
22 | 
23 | def PCC(ground_truth, predictions):
24 |     """
25 |         Evaluates the Pearson Correlation Coefficient.
26 |         Inputs are numpy arrays.
27 |         Corr = Cov(GT, Est)/(std(GT)std(Est))
28 |     """
29 |     return np.corrcoef(ground_truth, predictions)[0,1]
30 | 
31 | 
32 | def CCC(ground_truth, predictions):
33 |     """
34 |         Evaluates the Concordance Correlation Coefficient.
35 |         Inputs are numpy arrays.
36 |     """
37 |     mean_pred = np.mean(predictions)
38 |     mean_gt = np.mean(ground_truth)
39 | 
40 |     std_pred= np.std(predictions)
41 |     std_gt = np.std(ground_truth)
42 | 
43 |     pearson = PCC(ground_truth, predictions)
44 |     return 2.0*pearson*std_pred*std_gt/(std_pred**2+std_gt**2+(mean_pred-mean_gt)**2)
45 | 
46 | def ICC(labels, predictions):
47 |     """Evaluates the ICC(3, 1) 
48 |     """
49 |     naus = predictions.shape[1]
50 |     icc = np.zeros(naus)
51 | 
52 |     n = predictions.shape[0]
53 | 
54 |     for i in range(0,naus):
55 |         a = np.asmatrix(labels[:,i]).transpose()
56 |         b = np.asmatrix(predictions[:,i]).transpose()
57 |         dat = np.hstack((a, b))
58 |         mpt = np.mean(dat, axis=1)
59 |         mpr = np.mean(dat, axis=0)
60 |         tm  = np.mean(mpt, axis=0)
61 |         BSS = np.sum(np.square(mpt-tm))*2
62 |         BMS = BSS/(n-1)
63 |         RSS = np.sum(np.square(mpr-tm))*n
64 |         tmp = np.square(dat - np.hstack((mpt,mpt)))
65 |         WSS = np.sum(np.sum(tmp, axis=1))
66 |         ESS = WSS - RSS
67 |         EMS = ESS/(n-1)
68 |         icc[i] = (BMS - EMS)/(BMS + EMS)
69 | 
70 |     return icc
71 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .emonet import EmoNet
2 | 
3 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/emonet/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/emonet/models/__pycache__/emonet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/emonet/models/__pycache__/emonet.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/extract_emonet_embedding.py:
--------------------------------------------------------------------------------
 1 | # *_*coding:utf-8 *_*
 2 | import os
 3 | import argparse
 4 | from tqdm import tqdm
 5 | import torch
 6 | import torch.nn.parallel
 7 | import torch.optim
 8 | import torch.utils.data
 9 | import torch.utils.data.distributed
10 | import torchvision.transforms as transforms
11 | import numpy as np
12 | 
13 | from emonet.models.emonet import EmoNet
14 | from dataset import FaceDatasetForEmoNet
15 | from util import write_feature_to_csv, get_vids, write_feature_to_npy
16 | from emonet.data_augmentation import DataAugmentor
17 | 
18 | # import config
19 | import sys
20 | sys.path.append('../../')
21 | import config
22 | 
23 | def extract(data_loader, model):
24 |     model.eval()
25 |     with torch.no_grad():
26 |         features, timestamps = [], []
27 |         for images, names in tqdm(data_loader):
28 |             images = images.cuda()
29 |             embedding = model(images, return_embedding=True)
30 |             features.append(embedding.cpu().detach().numpy())
31 |             timestamps.extend(names)
32 |         features, timestamps = np.row_stack(features), np.array(timestamps)
33 |         return features, timestamps
34 | 
35 | 
36 | 
37 | def main(params):
38 |     os.environ["CUDA_VISIBLE_DEVICES"] = params.gpu
39 | 
40 |     print(f'==> Extracting emonet embedding...')
41 |     # in: face dir
42 |     face_dir = config.PATH_TO_RAW_FACE[params.dataset]
43 |     # out: feature csv dir
44 |     save_dir = os.path.join(config.PATH_TO_FEATURES[params.dataset], 'emonet')
45 |     if not os.path.exists(save_dir):
46 |         os.mkdir(save_dir)
47 |     elif params.overwrite:
48 |         print(f'==> Warning: overwrite save_dir "{save_dir}"!')
49 |     else:
50 |         raise Exception(f'==> Error: save_dir "{save_dir}" already exists, set overwrite=TRUE if needed!')
51 | 
52 |     # load model
53 |     model = EmoNet().cuda()
54 |     # model = torch.nn.DataParallel(model).cuda()
55 |     checkpoint_file = os.path.join(config.PATH_TO_PRETRAINED_MODELS, 'emonet/emonet_8.pth')
56 |     checkpoint = torch.load(checkpoint_file)
57 |     pre_trained_dict = {k.replace('module.', ''): v for k,v in checkpoint.items()}
58 |     model.load_state_dict(pre_trained_dict)
59 | 
60 |     # transform
61 |     augmentor = DataAugmentor(256, 256)
62 |     transform = transforms.Compose([transforms.ToTensor()])
63 | 
64 |     # extract embedding video by video
65 |     vids = get_vids(face_dir)
66 |     print(f'Find total "{len(vids)}" videos.')
67 |     for i, vid in enumerate(vids, 1):
68 |         print(f"Processing video '{vid}' ({i}/{len(vids)})...")
69 |         # forward
70 |         dataset = FaceDatasetForEmoNet(vid, face_dir, transform=transform, augmentor=augmentor)
71 |         if len(dataset) == 0:
72 |             print("Warning: number of frames of video {} should not be zero.".format(vid))
73 |             features, timestamps = [], []
74 |         else:
75 |             data_loader = torch.utils.data.DataLoader(dataset, batch_size=32, num_workers=4, pin_memory=True)
76 |             features, timestamps = extract(data_loader, model)
77 |             
78 |         # write
79 |         # write_feature_to_csv(features, timestamps, save_dir, vid, feature_dim=feature_dim)
80 |         write_feature_to_npy(features, timestamps, save_dir, vid)
81 | 
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     parser = argparse.ArgumentParser(description='Run.')
86 |     parser.add_argument('--gpu', type=str, default='5', help='gpu id')
87 |     parser.add_argument('--overwrite', action='store_true', default=True, help='whether overwrite existed feature folder.')
88 |     parser.add_argument('--dataset', type=str, default='BoxOfLies', help='input dataset')
89 |     params = parser.parse_args()
90 | 
91 |     main(params)


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Zengqun Zhao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/README.md:
--------------------------------------------------------------------------------
 1 | # MA-Net
 2 | 
 3 | PyTorch implementation of the paper *“Learning Deep Global Multi-scale and Local Attention Features 
 4 | for Facial Expression Recognition in the Wild”*, This work is under submission.
 5 | 
 6 | ## Requirements
 7 | - Python $\geq$3.6
 8 | - PyTorch $\geq$1.2
 9 | - torchvision $\geq$0.4.0
10 | - numpy
11 | - matplotlib
12 | - datetime
13 | - shutil
14 | - time
15 | - argparse
16 | - os
17 | 
18 | ## Training
19 | 
20 | - Step 1: download basic emotions dataset of [RAF-DB](http://www.whdeng.cn/raf/model1.html), and make sure it have the structure like following:
21 |  
22 | ```
23 | ./RAF-DB/
24 |          train/
25 |                0/
26 |                  train_09748.jpg
27 |                  ...
28 |                  train_12271.jpg
29 |                1/
30 |                ...
31 |                6/
32 |          test/
33 |               0/
34 |               ...
35 |               6/
36 | 
37 | [Note] 0: Neutral; 1: Happiness; 2: Sadness; 3: Surprise; 4: Fear; 5: Disgust; 6: Anger
38 | ```
39 | 
40 | - Step 2: download pre-trained model from
41 |    [Google Drive](https://drive.google.com/file/d/1tro_RCovLKNACt4MKYp3dmIvvxiOC2pi/view?usp=sharing),
42 |     and put it into ***./checkpoint***.
43 |     
44 | - Step 3: change the ***project_path*** and ***data_path*** in *main.py* to your path 
45 | 
46 | - Step 4: run ```python main.py ```
47 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/AffectNet7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/AffectNet7.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/AffectNet8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/AffectNet8.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/CAER-S.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/CAER-S.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/FED-RO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/FED-RO.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/RAF-DB.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/RAF-DB.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/SFEW.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/SFEW.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/[02-08]-[16-22]-cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/[02-08]-[16-22]-cnn.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/[02-08]-[19-12]-cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/[02-08]-[19-12]-cnn.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/[02-08]-[21-19]-cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/[02-08]-[21-19]-cnn.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/[02-08]-[22-55]-cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/[02-08]-[22-55]-cnn.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/[02-12]-[19-11]-cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/[02-12]-[19-11]-cnn.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/[02-12]-[22-21]-cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/[02-12]-[22-21]-cnn.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/log/[05-28]-[13-07]-cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/log/[05-28]-[13-07]-cnn.png


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/model/__pycache__/attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/model/__pycache__/attention.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/model/__pycache__/manet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/manet/model/__pycache__/manet.cpython-38.pyc


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/model/attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class BasicConv(nn.Module):
 7 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
 8 |         super(BasicConv, self).__init__()
 9 |         self.out_channels = out_planes
10 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
11 |         self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
12 |         self.relu = nn.ReLU() if relu else None
13 | 
14 |     def forward(self, x):
15 |         x = self.conv(x)
16 |         if self.bn is not None:
17 |             x = self.bn(x)
18 |         if self.relu is not None:
19 |             x = self.relu(x)
20 |         return x
21 | 
22 | 
23 | class Flatten(nn.Module):
24 |     def forward(self, x):
25 |         return x.view(x.size(0), -1)
26 | 
27 | 
28 | class ChannelGate(nn.Module):
29 |     def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
30 |         super(ChannelGate, self).__init__()
31 |         self.gate_channels = gate_channels
32 |         self.mlp = nn.Sequential(Flatten(),
33 |                                  nn.Linear(gate_channels, gate_channels // reduction_ratio),
34 |                                  nn.ReLU(),
35 |                                  nn.Linear(gate_channels // reduction_ratio, gate_channels))
36 |         self.pool_types = pool_types
37 | 
38 |     def forward(self, x):
39 |         channel_att_sum = None
40 |         for pool_type in self.pool_types:
41 |             if pool_type == 'avg':
42 |                 avg_pool = F.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
43 |                 channel_att_raw = self.mlp(avg_pool )
44 |             elif pool_type == 'max':
45 |                 max_pool = F.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
46 |                 channel_att_raw = self.mlp(max_pool)
47 |             if channel_att_sum is None:
48 |                 channel_att_sum = channel_att_raw
49 |             else:
50 |                 channel_att_sum = channel_att_sum + channel_att_raw
51 | 
52 |         scale = torch.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x)
53 |         return x * scale
54 | 
55 | 
56 | class ChannelPool(nn.Module):
57 |     def forward(self, x):
58 |         return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)
59 | 
60 | 
61 | class SpatialGate(nn.Module):
62 |     def __init__(self):
63 |         super(SpatialGate, self).__init__()
64 |         kernel_size = 7
65 |         self.compress = ChannelPool()
66 |         self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
67 | 
68 |     def forward(self, x):
69 |         x_compress = self.compress(x)
70 |         x_out = self.spatial(x_compress)
71 |         scale = torch.sigmoid(x_out)
72 |         return x * scale
73 | 
74 | 
75 | class CBAM(nn.Module):
76 |     def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
77 |         super(CBAM, self).__init__()
78 |         self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
79 |         self.SpatialGate = SpatialGate()
80 | 
81 |     def forward(self, x):
82 |         x_out = self.ChannelGate(x)
83 |         x_out = self.SpatialGate(x_out)
84 | 
85 |         return x_out
86 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/manet/reorganize_rafdb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import pandas as pd
 4 | import shutil
 5 | 
 6 | 
 7 | rafdb_path = '/data1/sunlicai/Affective Computing/Dataset/RAF-DB/basic'
 8 | src_path = os.path.join(rafdb_path, 'Image/aligned')
 9 | tgt_path = os.path.join(rafdb_path, 'Image/aligned_c') # split/class_id/img_file
10 | label_file = os.path.join(rafdb_path, 'EmoLabel/list_patition_label.txt')
11 | df = pd.read_csv(label_file, header=None, delimiter=' ')
12 | file_names, label_ids = df[0].values, df[1].values
13 | print(f'Number of images: {len(df)}.')
14 | name_to_label = dict(zip(file_names, label_ids))
15 | img_files = glob.glob(os.path.join(src_path, '*.jpg'))
16 | 
17 | for src_file in img_files:
18 |     img_name = os.path.basename(src_file).replace('_aligned', '')
19 |     label = name_to_label[img_name]
20 |     split = img_name.split('_')[0]
21 |     saved_path = os.path.join(tgt_path, split, str(label))
22 |     if not os.path.exists(saved_path):
23 |         os.makedirs(saved_path)
24 |     tgt_file = os.path.join(saved_path, img_name)
25 |     shutil.copyfile(src_file, tgt_file)
26 |     print(f'Copy "{src_file}" to "{tgt_file}".')


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | .nfs*
4 | scratch
5 | res_cache
6 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Samuel Albanie
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/README.md:
--------------------------------------------------------------------------------
 1 | ### pytorch-benchmark
 2 | 
 3 | Some scripts for validating models on common benchmarks. Assumes at least Python3 and PyTorch 4.0.
 4 | 
 5 | 
 6 | ### Supported datasets:
 7 | 
 8 | * **ImageNet** (this is essentially just a cut-down version of the [official example](https://github.com/pytorch/examples/tree/master/imagenet))
 9 | * **Fer2013** - A dataset of greyscale faces labelled with emotions.
10 | 
11 | 
12 | 
13 | ### References
14 | 
15 | **ImageNet**: [paper](https://arxiv.org/abs/1409.0575)
16 | 
17 | ```
18 | @article{ILSVRC15,
19 | Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
20 | Title = {{ImageNet Large Scale Visual Recognition Challenge}},
21 | Year = {2015},
22 | journal   = {International Journal of Computer Vision (IJCV)},
23 | doi = {10.1007/s11263-015-0816-y},
24 | volume={115},
25 | number={3},
26 | pages={211-252}
27 | }
28 | ```
29 | 
30 | **FER2013**: [paper](https://arxiv.org/abs/1307.0414)
31 | 
32 | ```
33 | @inproceedings{goodfellow2013challenges,
34 |   title={Challenges in representation learning: A report on three machine learning contests},
35 |   author={Goodfellow, Ian J and Erhan, Dumitru and Carrier, Pierre Luc and Courville, Aaron and Mirza, Mehdi and Hamner, Ben and Cukierski, Will and Tang, Yichuan and Thaler, David and Lee, Dong-Hyun and others},
36 |   booktitle={International Conference on Neural Information Processing},
37 |   pages={117--124},
38 |   year={2013},
39 |   organization={Springer}
40 | }
41 | ```
42 | 
43 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/fer2013/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/pytorch-benchmarks/fer2013/__init__.py


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/fer2013/fer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Fer2013 benchmark
  3 | 
  4 | The module evaluates the performance of a pytorch model on the FER2013
  5 | benchmark.
  6 | """
  7 | 
  8 | from __future__ import division
  9 | 
 10 | import os
 11 | import time
 12 | 
 13 | import torch
 14 | import numpy as np
 15 | import torch.utils.data
 16 | import torch.backends.cudnn as cudnn
 17 | from fer2013.fer_loader import Fer2013Dataset, Fer2013PlusDataset
 18 | from utils.benchmark_helpers import compose_transforms
 19 | 
 20 | def fer2013_benchmark(model, data_dir, res_cache, refresh_cache,
 21 |                        batch_size=256, num_workers=2, fer_plus=False):
 22 |     if not refresh_cache: # load result from cache, if available
 23 |         if os.path.isfile(res_cache):
 24 |             res = torch.load(res_cache)
 25 |             prec1_val, prec1_test = res['prec1_val'], res['prec1_test']
 26 |             print("=> loaded results from '{}'".format(res_cache))
 27 |             info = (prec1_val, prec1_test, res['speed'])
 28 |             msg = 'val acc: {:.2f}, test acc: {:.2f}, Speed: {:.1f}Hz'
 29 |             print(msg.format(*info))
 30 |             return
 31 | 
 32 |     meta = model.meta
 33 |     cudnn.benchmark = True
 34 |     model = torch.nn.DataParallel(model).cuda()
 35 |     preproc_transforms = compose_transforms(meta, center_crop=False)
 36 |     if fer_plus:
 37 |         dataset = Fer2013PlusDataset
 38 |     else:
 39 |         dataset = Fer2013Dataset
 40 |     speeds = []
 41 |     res = {}
 42 |     for mode in 'val', 'test':
 43 |         loader = torch.utils.data.DataLoader(
 44 |             dataset(data_dir, mode=mode, transform=preproc_transforms),
 45 |             batch_size=batch_size, shuffle=False,
 46 |             num_workers=num_workers, pin_memory=True)
 47 |         prec1, speed = validate(loader, model, mode)
 48 |         res['prec1_{}'.format(mode)] = prec1
 49 |         speeds.append(speed)
 50 |     res['speed'] = np.mean(speed)
 51 |     torch.save(res, res_cache)
 52 | 
 53 | def validate(val_loader, model, mode):
 54 |     model.eval()
 55 |     top1 = AverageMeter()
 56 |     speed = WarmupAverageMeter()
 57 |     end = time.time()
 58 |     with torch.no_grad():
 59 |         for ii, (ims, target) in enumerate(val_loader):
 60 |             # target = target.cuda(async=True)
 61 |             target = target.cuda()
 62 |             output = model(ims) # compute output
 63 |             prec1, = accuracy(output.data, target, topk=(1,))
 64 |             top1.update(prec1[0], ims.size(0))
 65 |             speed.update(time.time() - end, ims.size(0))
 66 |             end = time.time()
 67 |             if ii % 10 == 0:
 68 |                 msg = ('{0}: [{1}/{2}]\tSpeed {speed.current:.1f}Hz\t'
 69 |                        '({speed.avg:.1f})Hz\tPrec@1 {top1.avg:.3f}')
 70 |                 print(msg.format(mode, ii, len(val_loader),
 71 |                       speed=speed, top1=top1))
 72 |     print(' * Accuracy {0:.3f}'.format(top1.avg))
 73 |     return top1.avg, speed.avg
 74 | 
 75 | class WarmupAverageMeter(object):
 76 |     """Computes and stores the average and current value, after a fixed
 77 |     warmup period (useful for approximate benchmarking)
 78 | 
 79 |     Args:
 80 |         warmup (int) [3]: The number of updates to be ignored before the
 81 |         average starts to be computed.
 82 |     """
 83 |     def __init__(self, warmup=3):
 84 |         self.reset()
 85 |         self.warmup = warmup
 86 | 
 87 |     def reset(self):
 88 |         self.avg = 0
 89 |         self.current = 0
 90 |         self.delta_sum = 0
 91 |         self.count = 0
 92 |         self.warmup_count = 0
 93 | 
 94 |     def update(self, delta, n):
 95 |         self.warmup_count = self.warmup_count + 1
 96 |         if self.warmup_count >= self.warmup:
 97 |             self.current = n / delta
 98 |             self.delta_sum += delta
 99 |             self.count += n
100 |             self.avg = self.count / self.delta_sum
101 | 
102 | class AverageMeter(object):
103 |     """Computes and stores the average and current value"""
104 |     def __init__(self):
105 |         self.reset()
106 | 
107 |     def reset(self):
108 |         self.val = 0
109 |         self.avg = 0
110 |         self.sum = 0
111 |         self.count = 0
112 | 
113 |     def update(self, val, n=1):
114 |         self.val = val
115 |         self.sum += val * n
116 |         self.count += n
117 |         self.avg = self.sum / self.count
118 | 
119 | def accuracy(output, target, topk=(1,)):
120 |     """Computes the precision@k for the specified values of k"""
121 |     maxk = max(topk)
122 |     batch_size = target.size(0)
123 |     output = output.squeeze(-1).squeeze(-1)
124 |     _, pred = output.topk(maxk, 1, True, True)
125 |     pred = pred.t()
126 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
127 | 
128 |     res = []
129 |     for k in topk:
130 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
131 |         res.append(correct_k.mul_(100.0 / batch_size))
132 |     return res
133 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/imagenet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/pytorch-benchmarks/imagenet/__init__.py


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/model/alexnet_face_fer_bn_dag.py:
--------------------------------------------------------------------------------
 1 | # *_*coding:utf-8 *_*
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class Alexnet_face_fer_bn_dag(nn.Module):
 8 | 
 9 |     def __init__(self):
10 |         super(Alexnet_face_fer_bn_dag, self).__init__()
11 |         self.meta = {'mean': [131.09375, 103.88607788085938, 91.47599792480469],
12 |                      'std': [1, 1, 1],
13 |                      'imageSize': [227, 227, 3]}
14 |         self.conv1 = nn.Conv2d(3, 96, kernel_size=[11, 11], stride=(4, 4))
15 |         self.bn1 = nn.BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
16 |         self.relu1 = nn.ReLU()
17 |         self.pool1 = nn.MaxPool2d(kernel_size=[3, 3], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
18 |         self.conv2 = nn.Conv2d(96, 256, kernel_size=[5, 5], stride=(1, 1), padding=(2, 2), groups=2)
19 |         self.bn2 = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
20 |         self.relu2 = nn.ReLU()
21 |         self.pool2 = nn.MaxPool2d(kernel_size=[3, 3], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
22 |         self.conv3 = nn.Conv2d(256, 384, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
23 |         self.bn3 = nn.BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
24 |         self.relu3 = nn.ReLU()
25 |         self.conv4 = nn.Conv2d(384, 384, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1), groups=2)
26 |         self.bn4 = nn.BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
27 |         self.relu4 = nn.ReLU()
28 |         self.conv5 = nn.Conv2d(384, 256, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1), groups=2)
29 |         self.bn5 = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
30 |         self.relu5 = nn.ReLU()
31 |         self.pool5 = nn.MaxPool2d(kernel_size=[3, 3], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
32 |         self.fc6 = nn.Conv2d(256, 4096, kernel_size=[6, 6], stride=(1, 1))
33 |         self.bn6 = nn.BatchNorm2d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
34 |         self.relu6 = nn.ReLU()
35 |         self.fc7 = nn.Conv2d(4096, 4096, kernel_size=[1, 1], stride=(1, 1))
36 |         self.bn7 = nn.BatchNorm2d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
37 |         self.relu7 = nn.ReLU()
38 |         self.fc8 = nn.Linear(in_features=4096, out_features=7, bias=True)
39 | 
40 |     def forward(self, data):
41 |         x1 = self.conv1(data)
42 |         x2 = self.bn1(x1)
43 |         x3 = self.relu1(x2)
44 |         x4 = self.pool1(x3)
45 |         x5 = self.conv2(x4)
46 |         x6 = self.bn2(x5)
47 |         x7 = self.relu2(x6)
48 |         x8 = self.pool2(x7)
49 |         x9 = self.conv3(x8)
50 |         x10 = self.bn3(x9)
51 |         x11 = self.relu3(x10)
52 |         x12 = self.conv4(x11)
53 |         x13 = self.bn4(x12)
54 |         x14 = self.relu4(x13)
55 |         x15 = self.conv5(x14)
56 |         x16 = self.bn5(x15)
57 |         x17 = self.relu5(x16)
58 |         x18 = self.pool5(x17)
59 |         x19 = self.fc6(x18)
60 |         x20 = self.bn6(x19)
61 |         x21 = self.relu6(x20)
62 |         x22 = self.fc7(x21)
63 |         x23 = self.bn7(x22)
64 |         x24_preflatten = self.relu7(x23)
65 |         x24 = x24_preflatten.view(x24_preflatten.size(0), -1)
66 |         prediction = self.fc8(x24)
67 |         return prediction
68 | 
69 | def alexnet_face_fer_bn_dag(weights_path=None, **kwargs):
70 |     """
71 |     load imported model instance
72 | 
73 |     Args:
74 |         weights_path (str): If set, loads model weights from the given path
75 |     """
76 |     model = Alexnet_face_fer_bn_dag()
77 |     if weights_path:
78 |         state_dict = torch.load(weights_path)
79 |         model.load_state_dict(state_dict)
80 |     return model


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/model/vgg_m_face_bn_fer_dag.py:
--------------------------------------------------------------------------------
 1 | # *_*coding:utf-8 *_*
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class Vgg_m_face_bn_fer_dag(nn.Module):
 8 | 
 9 |     def __init__(self):
10 |         super(Vgg_m_face_bn_fer_dag, self).__init__()
11 |         self.meta = {'mean': [131.45376586914062, 103.98748016357422, 91.46234893798828],
12 |                      'std': [1, 1, 1],
13 |                      'imageSize': [224, 224, 3]}
14 |         self.conv1 = nn.Conv2d(3, 96, kernel_size=[7, 7], stride=(2, 2))
15 |         self.bn49 = nn.BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
16 |         self.relu1 = nn.ReLU()
17 |         self.pool1 = nn.MaxPool2d(kernel_size=[3, 3], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
18 |         self.conv2 = nn.Conv2d(96, 256, kernel_size=[5, 5], stride=(2, 2), padding=(1, 1))
19 |         self.bn50 = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
20 |         self.relu2 = nn.ReLU()
21 |         self.pool2 = nn.MaxPool2d(kernel_size=[3, 3], stride=[2, 2], padding=(0, 0), dilation=1, ceil_mode=True)
22 |         self.conv3 = nn.Conv2d(256, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
23 |         self.bn51 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
24 |         self.relu3 = nn.ReLU()
25 |         self.conv4 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
26 |         self.bn52 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
27 |         self.relu4 = nn.ReLU()
28 |         self.conv5 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
29 |         self.bn53 = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
30 |         self.relu5 = nn.ReLU()
31 |         self.pool5 = nn.MaxPool2d(kernel_size=[3, 3], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
32 |         self.fc6 = nn.Conv2d(512, 4096, kernel_size=[6, 6], stride=(1, 1))
33 |         self.bn54 = nn.BatchNorm2d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
34 |         self.relu6 = nn.ReLU()
35 |         self.fc7 = nn.Conv2d(4096, 4096, kernel_size=[1, 1], stride=(1, 1))
36 |         self.bn55 = nn.BatchNorm2d(4096, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
37 |         self.relu7 = nn.ReLU()
38 |         self.fc8 = nn.Linear(in_features=4096, out_features=7, bias=True)
39 | 
40 |     def forward(self, data):
41 |         x1 = self.conv1(data)
42 |         x2 = self.bn49(x1)
43 |         x3 = self.relu1(x2)
44 |         x4 = self.pool1(x3)
45 |         x5 = self.conv2(x4)
46 |         x6 = self.bn50(x5)
47 |         x7 = self.relu2(x6)
48 |         x8 = self.pool2(x7)
49 |         x9 = self.conv3(x8)
50 |         x10 = self.bn51(x9)
51 |         x11 = self.relu3(x10)
52 |         x12 = self.conv4(x11)
53 |         x13 = self.bn52(x12)
54 |         x14 = self.relu4(x13)
55 |         x15 = self.conv5(x14)
56 |         x16 = self.bn53(x15)
57 |         x17 = self.relu5(x16)
58 |         x18 = self.pool5(x17)
59 |         x19 = self.fc6(x18)
60 |         x20 = self.bn54(x19)
61 |         x21 = self.relu6(x20)
62 |         x22 = self.fc7(x21)
63 |         x23 = self.bn55(x22)
64 |         x24_preflatten = self.relu7(x23)
65 |         x24 = x24_preflatten.view(x24_preflatten.size(0), -1)
66 |         prediction = self.fc8(x24)
67 |         return prediction
68 | 
69 | def vgg_m_face_bn_fer_dag(weights_path=None, **kwargs):
70 |     """
71 |     load imported model instance
72 | 
73 |     Args:
74 |         weights_path (str): If set, loads model weights from the given path
75 |     """
76 |     model = Vgg_m_face_bn_fer_dag()
77 |     if weights_path:
78 |         state_dict = torch.load(weights_path)
79 |         model.load_state_dict(state_dict)
80 |     return model


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/model/vgg_vd_face_fer_dag.py:
--------------------------------------------------------------------------------
  1 | # *_*coding:utf-8 *_*
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | 
  7 | class Vgg_vd_face_fer_dag(nn.Module):
  8 | 
  9 |     def __init__(self):
 10 |         super(Vgg_vd_face_fer_dag, self).__init__()
 11 |         self.meta = {'mean': [129.186279296875, 104.76238250732422, 93.59396362304688],
 12 |                      'std': [1, 1, 1],
 13 |                      'imageSize': [224, 224, 3]}
 14 |         self.conv1_1 = nn.Conv2d(3, 64, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 15 |         self.relu1_1 = nn.ReLU()
 16 |         self.conv1_2 = nn.Conv2d(64, 64, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 17 |         self.relu1_2 = nn.ReLU()
 18 |         self.pool1 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
 19 |         self.conv2_1 = nn.Conv2d(64, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 20 |         self.relu2_1 = nn.ReLU()
 21 |         self.conv2_2 = nn.Conv2d(128, 128, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 22 |         self.relu2_2 = nn.ReLU()
 23 |         self.pool2 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
 24 |         self.conv3_1 = nn.Conv2d(128, 256, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 25 |         self.relu3_1 = nn.ReLU()
 26 |         self.conv3_2 = nn.Conv2d(256, 256, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 27 |         self.relu3_2 = nn.ReLU()
 28 |         self.conv3_3 = nn.Conv2d(256, 256, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 29 |         self.relu3_3 = nn.ReLU()
 30 |         self.pool3 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
 31 |         self.conv4_1 = nn.Conv2d(256, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 32 |         self.relu4_1 = nn.ReLU()
 33 |         self.conv4_2 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 34 |         self.relu4_2 = nn.ReLU()
 35 |         self.conv4_3 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 36 |         self.relu4_3 = nn.ReLU()
 37 |         self.pool4 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
 38 |         self.conv5_1 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 39 |         self.relu5_1 = nn.ReLU()
 40 |         self.conv5_2 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 41 |         self.relu5_2 = nn.ReLU()
 42 |         self.conv5_3 = nn.Conv2d(512, 512, kernel_size=[3, 3], stride=(1, 1), padding=(1, 1))
 43 |         self.relu5_3 = nn.ReLU()
 44 |         self.pool5 = nn.MaxPool2d(kernel_size=[2, 2], stride=[2, 2], padding=0, dilation=1, ceil_mode=False)
 45 |         self.fc6 = nn.Conv2d(512, 4096, kernel_size=[7, 7], stride=(1, 1))
 46 |         self.relu6 = nn.ReLU()
 47 |         self.fc7 = nn.Linear(in_features=4096, out_features=4096, bias=True)
 48 |         self.relu7 = nn.ReLU()
 49 |         self.fc8 = nn.Linear(in_features=4096, out_features=7, bias=True)
 50 | 
 51 |     def forward(self, data):
 52 |         x1 = self.conv1_1(data)
 53 |         x2 = self.relu1_1(x1)
 54 |         x3 = self.conv1_2(x2)
 55 |         x4 = self.relu1_2(x3)
 56 |         x5 = self.pool1(x4)
 57 |         x6 = self.conv2_1(x5)
 58 |         x7 = self.relu2_1(x6)
 59 |         x8 = self.conv2_2(x7)
 60 |         x9 = self.relu2_2(x8)
 61 |         x10 = self.pool2(x9)
 62 |         x11 = self.conv3_1(x10)
 63 |         x12 = self.relu3_1(x11)
 64 |         x13 = self.conv3_2(x12)
 65 |         x14 = self.relu3_2(x13)
 66 |         x15 = self.conv3_3(x14)
 67 |         x16 = self.relu3_3(x15)
 68 |         x17 = self.pool3(x16)
 69 |         x18 = self.conv4_1(x17)
 70 |         x19 = self.relu4_1(x18)
 71 |         x20 = self.conv4_2(x19)
 72 |         x21 = self.relu4_2(x20)
 73 |         x22 = self.conv4_3(x21)
 74 |         x23 = self.relu4_3(x22)
 75 |         x24 = self.pool4(x23)
 76 |         x25 = self.conv5_1(x24)
 77 |         x26 = self.relu5_1(x25)
 78 |         x27 = self.conv5_2(x26)
 79 |         x28 = self.relu5_2(x27)
 80 |         x29 = self.conv5_3(x28)
 81 |         x30 = self.relu5_3(x29)
 82 |         x31 = self.pool5(x30)
 83 |         x32 = self.fc6(x31)
 84 |         x33_preflatten = self.relu6(x32)
 85 |         x33 = x33_preflatten.view(x33_preflatten.size(0), -1)
 86 |         x34 = self.fc7(x33)
 87 |         x35 = self.relu7(x34)
 88 |         prediction = self.fc8(x35)
 89 |         return prediction
 90 | 
 91 | def vgg_vd_face_fer_dag(weights_path=None, **kwargs):
 92 |     """
 93 |     load imported model instance
 94 | 
 95 |     Args:
 96 |         weights_path (str): If set, loads model weights from the given path
 97 |     """
 98 |     model = Vgg_vd_face_fer_dag()
 99 |     if weights_path:
100 |         state_dict = torch.load(weights_path)
101 |         model.load_state_dict(state_dict)
102 |     return model


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/run_fer_benchmarks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """This module evaluates imported PyTorch models on fer2013
 3 | """
 4 | 
 5 | import os
 6 | import argparse
 7 | from os.path import join as pjoin
 8 | from fer2013.fer import fer2013_benchmark
 9 | from utils.benchmark_helpers import load_module_2or3
10 | 
11 | # MODEL_DIR = os.path.expanduser('~/data/models/pytorch/mcn_imports')
12 | # FER_DIR = os.path.expanduser('~/data/datasets/fer2013+')
13 | MODEL_DIR = './pretrained/'
14 | FER_DIR = os.path.expanduser('~/Affective Computing/Dataset/FERPlus')
15 | 
16 | CACHE_DIR = 'res_cache/fer2013+'
17 | 
18 | def load_model(model_name):
19 |     """Load imoprted PyTorch model by name
20 | 
21 |     Args:
22 |         model_name (str): the name of the model to be loaded
23 | 
24 |     Return:
25 |         nn.Module: the loaded network
26 |     """
27 |     model_def_path = pjoin('model', model_name + '.py')
28 |     weights_path = pjoin(MODEL_DIR, model_name + '.pth')
29 |     mod = load_module_2or3(model_name, model_def_path)
30 |     func = getattr(mod, model_name)
31 |     net = func(weights_path=weights_path)
32 |     return net
33 | 
34 | def run_benchmarks(gpus, refresh, fer_plus):
35 |     """Run bencmarks for imported models
36 | 
37 |     Args:
38 |         gpus (str): comma separated gpu device identifiers
39 |         refresh (bool): whether to overwrite the results of existing runs
40 |         fer_plus (bool): whether to evaluate on the ferplus benchmark,
41 |           rather than the standard fer benchmark.
42 |     """
43 | 
44 |     # Select models (and their batch sizes) to include in the benchmark.
45 |     if fer_plus:
46 |         model_list = [
47 |             ('resnet50_ferplus_dag', 32),
48 |             ('senet50_ferplus_dag', 32),
49 |         ]
50 |     else:
51 |         model_list = [
52 |             ('alexnet_face_fer_bn_dag', 32),
53 |             ('vgg_m_face_bn_fer_dag', 32),
54 |             ('vgg_vd_face_fer_dag', 32),
55 |         ]
56 | 
57 |     if not os.path.exists(CACHE_DIR):
58 |         os.makedirs(CACHE_DIR)
59 |     os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
60 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(gpus)
61 | 
62 |     opts = {'data_dir': FER_DIR, 'refresh_cache': refresh}
63 | 
64 |     for model_name, batch_size in model_list:
65 |         cache_name = model_name
66 |         if fer_plus:
67 |             cache_name = cache_name + 'fer_plus'
68 |         opts['res_cache'] = '{}/{}.pth'.format(CACHE_DIR, cache_name)
69 |         opts['fer_plus'] = fer_plus
70 |         model = load_model(model_name)
71 |         print('benchmarking {}'.format(model_name))
72 |         fer2013_benchmark(model, batch_size=batch_size, **opts)
73 | 
74 | parser = argparse.ArgumentParser(description='Run PyTorch benchmarks.')
75 | parser.add_argument('--gpus', nargs='?', dest='gpus',
76 |                     help='select gpu device id')
77 | parser.add_argument('--refresh', dest='refresh', action='store_true',
78 |                     help='refresh results cache')
79 | parser.add_argument('--ferplus', dest='ferplus', action='store_true',
80 |                     help='run ferplus (rather than fer) benchmarks')
81 | parser.set_defaults(gpus=None)
82 | parser.set_defaults(refresh=False)
83 | parsed = parser.parse_args()
84 | 
85 | if __name__ == '__main__':
86 |     run_benchmarks(parsed.gpus, parsed.refresh, parsed.ferplus)
87 | 


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zeroQiaoba/GCNet/8cce4a0c9a50172abfa79971e77c71c40c1d733d/feature_extraction/visual/pytorch-benchmarks/utils/__init__.py


--------------------------------------------------------------------------------
/feature_extraction/visual/pytorch-benchmarks/utils/benchmark_helpers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Utilties shared among the benchmarking protocols
 3 | """
 4 | import os
 5 | import sys
 6 | import six
 7 | 
 8 | import torchvision.transforms as transforms
 9 | 
10 | 
11 | def compose_transforms(meta, resize=256, center_crop=True,
12 |                        override_meta_imsize=False):
13 |     """Compose preprocessing transforms for model
14 | 
15 |     The imported models use a range of different preprocessing options,
16 |     depending on how they were originally trained. Models trained in MatConvNet
17 |     typically require input images that have been scaled to [0,255], rather
18 |     than the [0,1] range favoured by PyTorch.
19 | 
20 |     Args:
21 |         meta (dict): model preprocessing requirements
22 |         resize (int) [256]: resize the input image to this size
23 |         center_crop (bool) [True]: whether to center crop the image
24 |         override_meta_imsize (bool) [False]: if true, use the value of `resize`
25 |            to select the image input size, rather than the properties contained
26 |            in meta (this option only applies when center cropping is not used.
27 | 
28 |     Return:
29 |         (transforms.Compose): Composition of preprocessing transforms
30 |     """
31 |     normalize = transforms.Normalize(mean=meta['mean'], std=meta['std'])
32 |     im_size = meta['imageSize']
33 |     assert im_size[0] == im_size[1], 'expected square image size'
34 |     if center_crop:
35 |         transform_list = [transforms.Resize(resize),
36 |                           transforms.CenterCrop(size=(im_size[0], im_size[1]))]
37 |     else:
38 |         if override_meta_imsize:
39 |             im_size = (resize, resize)
40 |         transform_list = [transforms.Resize(size=(im_size[0], im_size[1]))]
41 |     transform_list += [transforms.ToTensor()]
42 |     if meta['std'] == [1, 1, 1]:  # common amongst mcn models
43 |         transform_list += [lambda x: x * 255.0]
44 |     transform_list.append(normalize)
45 |     return transforms.Compose(transform_list)
46 | 
47 | 
48 | def load_module_2or3(model_name, model_def_path):
49 |     """Load model definition module in a manner that is compatible with
50 |     both Python2 and Python3
51 | 
52 |     Args:
53 |         model_name: The name of the model to be loaded
54 |         model_def_path: The filepath of the module containing the definition
55 | 
56 |     Return:
57 |         The loaded python module."""
58 |     if six.PY3:
59 |         import importlib.util
60 |         spec = importlib.util.spec_from_file_location(model_name, model_def_path)
61 |         mod = importlib.util.module_from_spec(spec)
62 |         spec.loader.exec_module(mod)
63 |     else:
64 |         import importlib
65 |         dirname = os.path.dirname(model_def_path)
66 |         sys.path.insert(0, dirname)
67 |         module_name = os.path.splitext(os.path.basename(model_def_path))[0]
68 |         mod = importlib.import_module(module_name)
69 |     return mod
70 | 


--------------------------------------------------------------------------------
/gcnet/loss.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import glob
 4 | import pickle
 5 | import random
 6 | import argparse
 7 | import numpy as np
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | from torch.autograd import Variable
12 | from torch.nn.utils.rnn import pad_sequence
13 | from torch_geometric.nn import RGCNConv, GraphConv
14 | 
15 | 
16 | ## for reconstruction [only recon loss on miss part]
17 | class MaskedReconLoss(nn.Module):
18 | 
19 |     def __init__(self):
20 |         super(MaskedReconLoss, self).__init__()
21 |         self.loss = nn.MSELoss(reduction='none')
22 | 
23 |     def forward(self, recon_input, target_input, input_mask, umask, adim, tdim, vdim):
24 |         """ ? => refer to spk and modality
25 |         recon_input  -> ? * [seqlen, batch, dim]
26 |         target_input -> ? * [seqlen, batch, dim]
27 |         input_mask   -> ? * [seqlen, batch, dim]
28 |         umask        -> [batch, seqlen]
29 |         """
30 |         assert len(recon_input) == 1
31 |         recon = recon_input[0] # [seqlen, batch, dim]
32 |         target = target_input[0] # [seqlen, batch, dim]
33 |         mask = input_mask[0] # [seqlen, batch, 3]
34 | 
35 |         recon  = torch.reshape(recon, (-1, recon.size(2)))   # [seqlen*batch, dim]
36 |         target = torch.reshape(target, (-1, target.size(2))) # [seqlen*batch, dim]
37 |         mask   = torch.reshape(mask, (-1, mask.size(2)))     # [seqlen*batch, 3] 1(exist); 0(mask)
38 |         umask = torch.reshape(umask, (-1, 1)) # [seqlen*batch, 1]
39 | 
40 |         A_rec = recon[:, :adim]
41 |         L_rec = recon[:, adim:adim+tdim]
42 |         V_rec = recon[:, adim+tdim:]
43 |         A_full = target[:, :adim]
44 |         L_full = target[:, adim:adim+tdim]
45 |         V_full = target[:, adim+tdim:]
46 |         A_miss_index = torch.reshape(mask[:, 0], (-1, 1))
47 |         L_miss_index = torch.reshape(mask[:, 1], (-1, 1))
48 |         V_miss_index = torch.reshape(mask[:, 2], (-1, 1))
49 | 
50 |         loss_recon1 = self.loss(A_rec*umask, A_full*umask) * -1 * (A_miss_index - 1)
51 |         loss_recon2 = self.loss(L_rec*umask, L_full*umask) * -1 * (L_miss_index - 1)
52 |         loss_recon3 = self.loss(V_rec*umask, V_full*umask) * -1 * (V_miss_index - 1)
53 |         loss_recon1 = torch.sum(loss_recon1) / adim
54 |         loss_recon2 = torch.sum(loss_recon2) / tdim
55 |         loss_recon3 = torch.sum(loss_recon3) / vdim
56 |         loss_recon = (loss_recon1 + loss_recon2 + loss_recon3) / torch.sum(umask)
57 | 
58 |         return loss_recon
59 | 
60 | 
61 | ## iemocap loss function: same with CE loss
62 | class MaskedCELoss(nn.Module):
63 | 
64 |     def __init__(self):
65 |         super(MaskedCELoss, self).__init__()
66 |         self.loss = nn.NLLLoss(reduction='sum')
67 | 
68 |     def forward(self, pred, target, umask):
69 |         """
70 |         pred -> [batch*seq_len, n_classes]
71 |         target -> [batch*seq_len]
72 |         umask -> [batch, seq_len]
73 |         """
74 |         umask = umask.view(-1,1) # [batch*seq_len, 1]
75 |         target = target.view(-1,1) # [batch*seq_len, 1]
76 |         pred = F.log_softmax(pred, 1) # [batch*seqlen, n_classes]
77 |         loss = self.loss(pred*umask, (target*umask).squeeze().long()) / torch.sum(umask) 
78 |         return loss
79 | 
80 | 
81 | ## for cmumosi and cmumosei loss calculation
82 | class MaskedMSELoss(nn.Module):
83 | 
84 |     def __init__(self):
85 |         super(MaskedMSELoss, self).__init__()
86 |         self.loss = nn.MSELoss(reduction='sum')
87 | 
88 |     def forward(self, pred, target, umask):
89 |         """
90 |         pred -> [batch*seq_len]
91 |         target -> [batch*seq_len]
92 |         umask -> [batch*seq_len]
93 |         """
94 |         pred = pred.view(-1, 1) # [batch*seq_len, 1]
95 |         target = target.view(-1, 1) # [batch*seq_len, 1]
96 |         umask = umask.view(-1, 1) # [batch*seq_len, 1]
97 |         loss = self.loss(pred*umask, target*umask) / torch.sum(umask)
98 |         return loss


--------------------------------------------------------------------------------
/gcnet/module.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import glob
  4 | import pickle
  5 | import random
  6 | import argparse
  7 | import numpy as np
  8 | import pandas as pd
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | from torch.autograd import Variable
 14 | from torch.nn.utils.rnn import pad_sequence
 15 | from torch_geometric.nn import RGCNConv, GraphConv
 16 | 
 17 | 
 18 | class MatchingAttention(nn.Module):
 19 | 
 20 |     def __init__(self, mem_dim, cand_dim, alpha_dim=None, att_type='general'):
 21 |         super(MatchingAttention, self).__init__()
 22 |         assert att_type!='concat' or alpha_dim!=None
 23 |         assert att_type!='dot' or mem_dim==cand_dim
 24 |         self.mem_dim = mem_dim
 25 |         self.cand_dim = cand_dim
 26 |         self.att_type = att_type
 27 |         if att_type=='general':
 28 |             self.transform = nn.Linear(cand_dim, mem_dim, bias=False)
 29 |         if att_type=='general2':
 30 |             self.transform = nn.Linear(cand_dim, mem_dim, bias=True)
 31 |         elif att_type=='concat':
 32 |             self.transform = nn.Linear(cand_dim+mem_dim, alpha_dim, bias=False)
 33 |             self.vector_prod = nn.Linear(alpha_dim, 1, bias=False)
 34 | 
 35 |     def forward(self, M, x, mask=None):
 36 |         """
 37 |         M -> (seq_len, batch, mem_dim)
 38 |         x -> (batch, cand_dim)
 39 |         mask -> (batch, seq_len)
 40 |         """
 41 |         if type(mask)==type(None):
 42 |             mask = torch.ones(M.size(1), M.size(0)).type(M.type()) # [batch, seq_len]
 43 | 
 44 |         if self.att_type=='dot':
 45 |             M_ = M.permute(1,2,0) # batch, vector, seqlen
 46 |             x_ = x.unsqueeze(1) # batch, 1, vector
 47 |             alpha = F.softmax(torch.bmm(x_, M_), dim=2) # batch, 1, seqlen
 48 |         elif self.att_type=='general':
 49 |             M_ = M.permute(1,2,0) # batch, mem_dim, seqlen
 50 |             x_ = self.transform(x).unsqueeze(1) # batch, 1, mem_dim
 51 |             alpha = F.softmax(torch.bmm(x_, M_), dim=2) # batch, 1, seqlen
 52 |         elif self.att_type=='general2':
 53 |             M_ = M.permute(1,2,0) # [batch, mem_dim, seqlen]
 54 |             x_ = self.transform(x).unsqueeze(1) # [batch, 1, mem_dim]
 55 |             mask_ = mask.unsqueeze(2).repeat(1, 1, self.mem_dim).transpose(1, 2) # [batch, mem_dim, seq_len]
 56 |             M_ = M_ * mask_ # [batch, mem_dim, seqlen]
 57 |             alpha_ = torch.bmm(x_, M_)*mask.unsqueeze(1) # attention value: [batch, 1, seqlen]
 58 |             alpha_ = torch.tanh(alpha_)
 59 |             alpha_ = F.softmax(alpha_, dim=2) # [batch, 1, seqlen]
 60 |             alpha_masked = alpha_*mask.unsqueeze(1) # [batch, 1, seqlen]
 61 |             alpha_sum = torch.sum(alpha_masked, dim=2, keepdim=True) # [batch, 1, 1]
 62 |             alpha = alpha_masked/alpha_sum # normalized attention: [batch, 1, seqlen]
 63 |             # alpha = torch.where(alpha.isnan(), alpha_masked, alpha) 
 64 |         else:
 65 |             M_ = M.transpose(0,1) # batch, seqlen, mem_dim
 66 |             x_ = x.unsqueeze(1).expand(-1,M.size()[0],-1) # batch, seqlen, cand_dim
 67 |             M_x_ = torch.cat([M_,x_],2) # batch, seqlen, mem_dim+cand_dim
 68 |             mx_a = F.tanh(self.transform(M_x_)) # batch, seqlen, alpha_dim
 69 |             alpha = F.softmax(self.vector_prod(mx_a),1).transpose(1,2) # [batch, 1, seqlen]
 70 | 
 71 |         attn_pool = torch.bmm(alpha, M.transpose(0,1))[:,0,:] # [batch, mem_dim]
 72 |         return attn_pool, alpha
 73 | 
 74 | 
 75 | # change [num_utterance, dim] => [seqlen, batch, dim]
 76 | def utterance_to_conversation(outputs, seq_lengths, umask, no_cuda):
 77 |     input_conversation_length = torch.tensor(seq_lengths) # [6, 24, 13, 9]
 78 |     start_zero = input_conversation_length.data.new(1).zero_() # [0]
 79 |     
 80 |     if not no_cuda:
 81 |         input_conversation_length = input_conversation_length.cuda()
 82 |         start_zero = start_zero.cuda()
 83 | 
 84 |     max_len = max(seq_lengths) # [int]
 85 |     start = torch.cumsum(torch.cat((start_zero, input_conversation_length[:-1])), 0) # [0,  6, 30, 43]
 86 | 
 87 |     outputs = torch.stack([pad(outputs.narrow(0, s, l), max_len, no_cuda) # [seqlen, batch, dim]
 88 |                                 for s, l in zip(start.data.tolist(),
 89 |                                 input_conversation_length.data.tolist())], 0).transpose(0, 1)
 90 |     return outputs
 91 | 
 92 | 
 93 | def pad(tensor, length, no_cuda):
 94 |     if isinstance(tensor, Variable):
 95 |         var = tensor
 96 |         if length > var.size(0):
 97 |             if not no_cuda:
 98 |                 return torch.cat([var, torch.zeros(length - var.size(0), *var.size()[1:]).cuda()])
 99 |             else:
100 |                 return torch.cat([var, torch.zeros(length - var.size(0), *var.size()[1:])])
101 |         else:
102 |             return var
103 |     else:
104 |         if length > tensor.size(0):
105 |             if not no_cuda:
106 |                 return torch.cat([tensor, torch.zeros(length - tensor.size(0), *tensor.size()[1:]).cuda()])
107 |             else:
108 |                 return torch.cat([tensor, torch.zeros(length - tensor.size(0), *tensor.size()[1:])])
109 |         else:
110 |             return tensor
111 | 


--------------------------------------------------------------------------------
/requirements-cpmnet.txt:
--------------------------------------------------------------------------------
 1 | absl-py==1.0.0
 2 | astor==0.8.1
 3 | backcall==0.2.0
 4 | cached-property==1.5.2
 5 | certifi==2021.10.8
 6 | decorator==5.1.1
 7 | gast==0.2.2
 8 | google-pasta==0.2.0
 9 | grpcio==1.43.0
10 | h5py==3.6.0
11 | importlib-metadata==4.10.1
12 | ipython==7.20.0
13 | joblib==1.1.0
14 | Keras-Applications==1.0.8
15 | Keras-Preprocessing==1.1.2
16 | Markdown==3.3.6
17 | mkl-fft==1.3.1
18 | mkl-random @ file:///tmp/build/80754af9/mkl_random_1626179032232/work
19 | mkl-service==2.4.0
20 | numpy @ file:///tmp/build/80754af9/numpy_and_numpy_base_1634106693478/work
21 | olefile==0.46
22 | opt-einsum==3.3.0
23 | pexpect==4.8.0
24 | pickleshare==0.7.5
25 | Pillow==8.4.0
26 | prompt-toolkit==3.0.29
27 | protobuf==3.19.3
28 | ptyprocess==0.7.0
29 | Pygments==2.12.0
30 | scikit-learn==1.0.2
31 | scipy==1.1.0
32 | six @ file:///tmp/build/80754af9/six_1623709665295/work
33 | tensorboard==1.15.0
34 | tensorflow-estimator==1.15.1
35 | tensorflow-gpu==1.15.0
36 | termcolor==1.1.0
37 | threadpoolctl==3.0.0
38 | torch==1.4.0
39 | torchvision==0.5.0
40 | tqdm==4.62.3
41 | traitlets==5.1.0
42 | typing_extensions==4.0.1
43 | wcwidth==0.2.5
44 | Werkzeug==2.0.2
45 | wrapt==1.13.3
46 | zipp==3.7.0
47 | 


--------------------------------------------------------------------------------