├── LICENSE ├── NT_Noise ├── __init__.py ├── checkpoint │ └── DomainNet │ │ ├── clipart_list.txt │ │ ├── infograph_list.txt │ │ ├── painting_list.txt │ │ ├── quickdraw_list.txt │ │ ├── real_list.txt │ │ └── sketch_list.txt ├── demo_ssda_seed_APE.py ├── demo_ssda_seed_ENT.py ├── demo_ssda_seed_S+T.py ├── demo_ssda_seed_T+DNN.py ├── demo_ssda_seed_finetune.py ├── demo_uda_seed_dan.py ├── demo_uda_seed_dann.py ├── demo_uda_seed_dnn.py ├── demo_uda_seed_mcc.py ├── demo_uda_seed_shot.py ├── save_init_model.py ├── source_train_seed.py └── utils │ ├── LogRecord.py │ ├── __init__.py │ ├── data_list.py │ ├── dataloader.py │ ├── generate_data_list.py │ ├── loss.py │ ├── network.py │ ├── utils.py │ └── utils_bl.py ├── NT_SSDA ├── checkpoint │ └── DomainNet │ │ ├── clipart_list.txt │ │ ├── infograph_list.txt │ │ ├── painting_list.txt │ │ ├── quickdraw_list.txt │ │ ├── real_list.txt │ │ └── sketch_list.txt ├── demo_img_APE.py ├── demo_img_ENT.py ├── demo_img_MME.py ├── demo_img_S+T.py ├── demo_img_T+DNN.py ├── demo_img_bl_svm.py ├── demo_img_cdan.py ├── demo_img_dan.py ├── demo_img_dann.py ├── demo_img_finetune.py ├── demo_img_mcc.py ├── demo_img_shot.py ├── demo_seed_APE.py ├── demo_seed_ENT.py ├── demo_seed_MME.py ├── demo_seed_S+T.py ├── demo_seed_T+DNN.py ├── demo_seed_bl_svm.py ├── demo_seed_cdan.py ├── demo_seed_dan.py ├── demo_seed_dann.py ├── demo_seed_finetune.py ├── demo_seed_mcc.py ├── demo_seed_shot.py ├── save_init_model.py ├── source_train_img_fintune.py ├── source_train_seed_finetune.py └── utils │ ├── LogRecord.py │ ├── __init__.py │ ├── data_list.py │ ├── dataloader.py │ ├── generate_data_list.py │ ├── loss.py │ ├── network.py │ ├── utils.py │ └── utils_bl.py ├── NT_UDA ├── __init__.py ├── checkpoint │ └── DomainNet │ │ ├── clipart_list.txt │ │ ├── infograph_list.txt │ │ ├── painting_list.txt │ │ ├── quickdraw_list.txt │ │ ├── real_list.txt │ │ └── sketch_list.txt ├── data_synth │ ├── generate_synth │ │ ├── .DS_Store │ │ ├── __init__.py │ │ ├── data_synth_moons.py │ │ └── func_transform.py │ ├── moon0.csv │ ├── moon1.csv │ ├── moon2.csv │ ├── moon3_45.csv │ ├── moon4_15.csv │ ├── moon5.csv │ ├── moon6.csv │ ├── moon7.csv │ ├── moon8.csv │ └── moon9.csv ├── demo_img_atdoc.py ├── demo_img_bl_random.py ├── demo_img_bl_svm.py ├── demo_img_cdan.py ├── demo_img_dan.py ├── demo_img_dann.py ├── demo_img_dnn.py ├── demo_img_fixbi.py ├── demo_img_jda.py ├── demo_img_kmm.py ├── demo_img_mcc.py ├── demo_img_shot.py ├── demo_seed_atdoc.py ├── demo_seed_bl_random.py ├── demo_seed_bl_svm.py ├── demo_seed_cdan.py ├── demo_seed_dan.py ├── demo_seed_dann.py ├── demo_seed_dnn.py ├── demo_seed_fixbi.py ├── demo_seed_jda.py ├── demo_seed_kmm.py ├── demo_seed_mcc.py ├── demo_seed_shot.py ├── demo_syn_atdoc.py ├── demo_syn_bl_svm.py ├── demo_syn_cdan.py ├── demo_syn_dan.py ├── demo_syn_dann.py ├── demo_syn_dnn.py ├── demo_syn_kmm.py ├── demo_syn_mcc.py ├── demo_syn_shot.py ├── save_init_model.py ├── source_train_img.py ├── source_train_seed.py ├── source_train_syn.py └── utils │ ├── LogRecord.py │ ├── __init__.py │ ├── data_list.py │ ├── dataloader.py │ ├── generate_data_list.py │ ├── loss.py │ ├── network.py │ ├── utils.py │ └── utils_bl.py ├── README.md └── presentation ├── dataset.png ├── nt-ssda.png └── nt-uda.png /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Wen Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NT_Noise/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022/1/23 9:10 上午 3 | # @Author : wenzhang 4 | # @File : __init__.py 5 | -------------------------------------------------------------------------------- /NT_Noise/demo_ssda_seed_ENT.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.dataloader import read_seed_src_tar 12 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg_ssda 13 | from utils.loss import entropy 14 | 15 | 16 | def train_target(args): 17 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 18 | dset_loaders = data_load_noimg_ssda(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 25 | 26 | max_iter = args.max_epoch * len(dset_loaders["source"]) 27 | interval_iter = max_iter // 10 28 | args.max_iter = max_iter 29 | iter_num = 0 30 | 31 | netF.train() 32 | netC.train() 33 | 34 | while iter_num < max_iter: 35 | try: 36 | inputs_source, labels_source = iter_source.next() 37 | except: 38 | iter_source = iter(dset_loaders["source"]) 39 | inputs_source, labels_source = iter_source.next() 40 | 41 | try: 42 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 43 | except: 44 | iter_target_tr = iter(dset_loaders["target_tr"]) 45 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 46 | 47 | try: 48 | inputs_target, _ = iter_target.next() 49 | except: 50 | iter_target = iter(dset_loaders["target_te"]) 51 | inputs_target, _ = iter_target.next() 52 | 53 | if inputs_source.size(0) == 1: 54 | continue 55 | 56 | iter_num += 1 57 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 58 | 59 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 60 | inputs_target_tr, labels_target_tr = inputs_target_tr.cuda(), labels_target_tr.cuda() 61 | _, outputs_source = netC(netF(inputs_source)) 62 | _, outputs_target_tr = netC(netF(inputs_target_tr)) 63 | outputs = tr.cat((outputs_source, outputs_target_tr), dim=0) 64 | labels = tr.cat((labels_source, labels_target_tr), dim=0) 65 | 66 | args.lamda = 0.1 67 | loss_classifier = nn.CrossEntropyLoss()(outputs, labels) 68 | inputs_target = inputs_target.cuda() 69 | feas_target = netF(inputs_target) 70 | _, outputs_target = netC(feas_target) 71 | loss_entropy = entropy(outputs_target, args.lamda) 72 | total_loss = loss_classifier + loss_entropy 73 | 74 | optimizer.zero_grad() 75 | total_loss.backward() 76 | optimizer.step() 77 | 78 | if iter_num % interval_iter == 0 or iter_num == max_iter: 79 | netF.eval() 80 | netC.eval() 81 | 82 | acc_t_te, _ = utils.cal_acc_noimg(dset_loaders["Target"], netF, netC) 83 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 84 | print(log_str) 85 | 86 | netF.train() 87 | netC.train() 88 | 89 | return acc_t_te 90 | 91 | 92 | if __name__ == '__main__': 93 | 94 | data_name = 'SEED' 95 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 96 | focus_domain_idx = [0, 1, 2] 97 | domain_list = ['S' + str(i) for i in focus_domain_idx] 98 | num_domain = len(domain_list) 99 | 100 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 101 | epsilon=1e-05, layer='wn', smooth=0, 102 | N=num_domain, chn=chn, class_num=class_num) 103 | 104 | args.dset = data_name 105 | args.method = 'ENT' 106 | args.backbone = 'ShallowNet' 107 | args.batch_size = 32 108 | args.max_epoch = 10 109 | args.input_dim = 310 110 | args.norm = 'zscore' 111 | args.bz_tar_tr = args.batch_size 112 | args.bz_tar_te = args.batch_size * 2 113 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 114 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 115 | 116 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 117 | args.data_env = 'gpu' # 'local' 118 | args.seed = 2022 # 2021~2023 repeat three times 119 | fix_random_seed(args.seed) 120 | tr.backends.cudnn.deterministic = True 121 | 122 | noise_list = np.linspace(0, 100, 11).tolist() 123 | num_test = len(noise_list) 124 | acc_all = np.zeros(num_test) 125 | s, t = 0, 1 126 | for ns in range(num_test): 127 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 128 | dset_n = args.dset + '_' + str(args.noise_rate) 129 | print(dset_n, args.method) 130 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 131 | print(info_str) 132 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 133 | args.task_str = domain_list[s] + '_' + domain_list[t] 134 | print(args) 135 | 136 | acc_all[ns] = train_target(args) 137 | print('\nSub acc: ', np.round(acc_all, 3)) 138 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 139 | 140 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 141 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 142 | 143 | -------------------------------------------------------------------------------- /NT_Noise/demo_ssda_seed_S+T.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import numpy as np 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, utils 11 | from utils.dataloader import read_seed_src_tar 12 | from utils.utils import fix_random_seed, lr_scheduler_full, data_load_noimg_ssda 13 | 14 | 15 | def train_source_test_target(args): 16 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 17 | dset_loaders = data_load_noimg_ssda(X_src, y_src, X_tar, y_tar, args) 18 | 19 | netF, netC = network.backbone_net(args, args.bottleneck) 20 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 21 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 22 | base_network = nn.Sequential(netF, netC) 23 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 24 | 25 | acc_init = 0 26 | max_iter = args.max_epoch * len(dset_loaders["source_tr"]) 27 | interval_iter = max_iter // 10 28 | args.max_iter = max_iter 29 | iter_num = 0 30 | 31 | netF.train() 32 | netC.train() 33 | 34 | while iter_num < max_iter: 35 | try: 36 | inputs_source, labels_source = iter_source.next() 37 | except: 38 | iter_source = iter(dset_loaders["source_tr"]) 39 | inputs_source, labels_source = iter_source.next() 40 | 41 | try: 42 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 43 | except: 44 | iter_target_tr = iter(dset_loaders["target_tr"]) 45 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 46 | 47 | if inputs_source.size(0) == 1: 48 | continue 49 | 50 | iter_num += 1 51 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 52 | 53 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 54 | inputs_target_tr, labels_target_tr = inputs_target_tr.cuda(), labels_target_tr.cuda() 55 | 56 | inputs_data = tr.cat((inputs_source, inputs_target_tr), 0) 57 | inputs_label = tr.cat((labels_source, labels_target_tr), 0) 58 | 59 | feas, output = netC(netF(inputs_data)) 60 | classifier_loss = nn.CrossEntropyLoss()(output, inputs_label) 61 | 62 | optimizer.zero_grad() 63 | classifier_loss.backward() 64 | optimizer.step() 65 | 66 | if iter_num % (interval_iter * 2) == 0 or iter_num == max_iter: 67 | netF.eval() 68 | netC.eval() 69 | 70 | acc_s_te, _ = utils.cal_acc_noimg(dset_loaders["source_te"], netF, netC) 71 | acc_t_te, _ = utils.cal_acc_noimg(dset_loaders["Target"], netF, netC) 72 | log_str = 'Task: {}, Iter:{}/{}; Val_Acc = {:.2f}%; Test_Acc = {:.2f}%'.format( 73 | args.task_str, iter_num, max_iter, acc_s_te, acc_t_te) 74 | print(log_str) 75 | netF.train() 76 | netC.train() 77 | 78 | if acc_s_te >= acc_init: 79 | acc_init = acc_s_te 80 | acc_tar_src_best = acc_t_te 81 | netF.cuda() 82 | netC.cuda() 83 | 84 | netF.train() 85 | netC.train() 86 | 87 | return acc_tar_src_best 88 | 89 | 90 | if __name__ == "__main__": 91 | data_name = 'SEED' 92 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 93 | focus_domain_idx = [0, 1, 2] 94 | domain_list = ['S' + str(i) for i in focus_domain_idx] 95 | num_domain = len(domain_list) 96 | 97 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 98 | epsilon=1e-05, layer='wn', smooth=0, is_save=False, 99 | N=num_domain, chn=chn, trial=trial_num, class_num=class_num) 100 | 101 | args.dset = data_name 102 | args.method = 'ST' 103 | args.backbone = 'ShallowNet' 104 | args.batch_size = 32 105 | args.max_epoch = 50 106 | args.input_dim = 310 107 | args.norm = 'zscore' 108 | args.bz_tar_tr = int(args.batch_size / 2) 109 | args.bz_tar_te = args.batch_size 110 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 111 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 112 | 113 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 114 | args.data_env = 'gpu' # 'local' 115 | args.seed = 2022 # 2021~2023 repeat three times 116 | fix_random_seed(args.seed) 117 | tr.backends.cudnn.deterministic = True 118 | 119 | noise_list = np.linspace(0, 100, 11).tolist() 120 | num_test = len(noise_list) 121 | acc_all = np.zeros(num_test) 122 | s, t = 0, 1 123 | for ns in range(num_test): 124 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 125 | dset_n = args.dset + '_' + str(args.noise_rate) 126 | print(dset_n, args.method) 127 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 128 | print(info_str) 129 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 130 | args.task_str = domain_list[s] + '_' + domain_list[t] 131 | print(args) 132 | 133 | acc_all[ns] = train_source_test_target(args) 134 | print('\nSub acc: ', np.round(acc_all, 2)) 135 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 136 | 137 | acc_sub_str = str(np.round(acc_all, 2).tolist()) 138 | acc_mean_str = str(np.round(np.mean(acc_all), 2).tolist()) 139 | 140 | -------------------------------------------------------------------------------- /NT_Noise/demo_ssda_seed_T+DNN.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import numpy as np 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, utils 11 | from utils.dataloader import read_seed_src_tar 12 | from utils.utils import fix_random_seed, lr_scheduler_full, data_load_noimg_ssda 13 | 14 | 15 | def train_source_test_target(args): 16 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 17 | dset_loaders = data_load_noimg_ssda(X_src, y_src, X_tar, y_tar, args) 18 | 19 | netF, netC = network.backbone_net(args, args.bottleneck) 20 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 21 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 22 | base_network = nn.Sequential(netF, netC) 23 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 24 | 25 | max_iter = args.max_epoch * len(dset_loaders["target_tr"]) 26 | interval_iter = max_iter // 10 27 | args.max_iter = max_iter 28 | iter_num = 0 29 | 30 | netF.train() 31 | netC.train() 32 | 33 | while iter_num < max_iter: 34 | try: 35 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 36 | except: 37 | iter_target_tr = iter(dset_loaders["target_tr"]) 38 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 39 | 40 | if inputs_target_tr.size(0) == 1: 41 | continue 42 | 43 | iter_num += 1 44 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 45 | 46 | inputs_data, inputs_label = inputs_target_tr.cuda(), labels_target_tr.cuda() 47 | 48 | feas, output = netC(netF(inputs_data)) 49 | classifier_loss = nn.CrossEntropyLoss()(output, inputs_label) 50 | 51 | optimizer.zero_grad() 52 | classifier_loss.backward() 53 | optimizer.step() 54 | 55 | if iter_num % (interval_iter * 2) == 0 or iter_num == max_iter: 56 | netF.eval() 57 | netC.eval() 58 | 59 | acc_t_te, _ = utils.cal_acc_noimg(dset_loaders["Target"], netF, netC) 60 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format( 61 | args.task_str, iter_num, max_iter, acc_t_te) 62 | print(log_str) 63 | netF.train() 64 | netC.train() 65 | 66 | return acc_t_te 67 | 68 | 69 | if __name__ == "__main__": 70 | data_name = 'SEED' 71 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 72 | focus_domain_idx = [0, 1, 2] 73 | domain_list = ['S' + str(i) for i in focus_domain_idx] 74 | num_domain = len(domain_list) 75 | 76 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 77 | epsilon=1e-05, layer='wn', smooth=0, is_save=False, 78 | N=num_domain, chn=chn, trial=trial_num, class_num=class_num) 79 | 80 | args.dset = data_name 81 | args.method = 'T+DNN' 82 | args.backbone = 'ShallowNet' 83 | args.batch_size = 32 # 32 84 | args.max_epoch = 50 85 | args.input_dim = 310 86 | args.norm = 'zscore' 87 | args.bz_tar_tr = int(args.batch_size / 2) 88 | args.bz_tar_te = args.batch_size 89 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 90 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 91 | 92 | os.environ["CUDA_VISIBLE_DEVICES"] = '0' 93 | args.data_env = 'gpu' # 'local' 94 | args.seed = 2022 # 2021~2023 repeat three times 95 | fix_random_seed(args.seed) 96 | tr.backends.cudnn.deterministic = True 97 | 98 | noise_list = np.linspace(0, 100, 11).tolist() 99 | num_test = len(noise_list) 100 | acc_all = np.zeros(num_test) 101 | s, t = 0, 1 102 | for ns in range(num_test): 103 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 104 | dset_n = args.dset + '_' + str(args.noise_rate) 105 | print(dset_n, args.method) 106 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 107 | print(info_str) 108 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 109 | args.task_str = domain_list[s] + '_' + domain_list[t] 110 | print(args) 111 | 112 | acc_all[ns] = train_source_test_target(args) 113 | print('\nSub acc: ', np.round(acc_all, 2)) 114 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 115 | 116 | acc_sub_str = str(np.round(acc_all, 2).tolist()) 117 | acc_mean_str = str(np.round(np.mean(acc_all), 2).tolist()) 118 | 119 | -------------------------------------------------------------------------------- /NT_Noise/demo_ssda_seed_finetune.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import os.path as osp 7 | import numpy as np 8 | import torch as tr 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | import torch.utils.data as Data 12 | from utils import network, utils 13 | from utils.utils import fix_random_seed, op_copy, lr_scheduler, get_idx_ssda_seed 14 | from utils.dataloader import read_seed_single 15 | 16 | 17 | def data_load(X, y, args): 18 | dset_loaders = {} 19 | train_bs = args.batch_size 20 | 21 | idx_train, idx_test = get_idx_ssda_seed(y, args.tar_lbl_rate) 22 | 23 | data_tar_tr = Data.TensorDataset(X[idx_train, :], y[idx_train]) 24 | data_tar_te = Data.TensorDataset(X[idx_test, :], y[idx_test]) 25 | 26 | dset_loaders["target_tr"] = Data.DataLoader(data_tar_tr, batch_size=train_bs, shuffle=True) 27 | dset_loaders["Target"] = Data.DataLoader(data_tar_te, batch_size=train_bs * 3, shuffle=False) 28 | return dset_loaders 29 | 30 | 31 | def train_source_test_target(args): 32 | X_tar, y_tar = read_seed_single(args, args.tar) 33 | dset_loaders = data_load(X_tar, y_tar, args) 34 | 35 | netF, netC = network.backbone_net(args, args.bottleneck) 36 | 37 | modelpath = args.output_dir_src + '/source_F.pt' 38 | netF.load_state_dict(tr.load(modelpath)) 39 | modelpath = args.output_dir_src + '/source_C.pt' 40 | netC.load_state_dict(tr.load(modelpath)) 41 | netF.eval() 42 | 43 | for k, v in netF.named_parameters(): 44 | v.requires_grad = False 45 | 46 | param_group = [] 47 | for k, v in netC.named_parameters(): 48 | if args.lr_decay1 > 0: 49 | param_group += [{'params': v, 'lr': args.lr * 0.1}] 50 | else: 51 | v.requires_grad = False 52 | 53 | optimizer = optim.SGD(param_group) 54 | optimizer = op_copy(optimizer) 55 | 56 | max_iter = args.max_epoch * len(dset_loaders["target_tr"]) 57 | interval_iter = max_iter // 10 58 | args.max_iter = max_iter 59 | iter_num = 0 60 | netC.train() 61 | 62 | while iter_num < max_iter: 63 | try: 64 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 65 | except: 66 | iter_target_tr = iter(dset_loaders["target_tr"]) 67 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 68 | 69 | if inputs_target_tr.size(0) == 1: 70 | continue 71 | 72 | iter_num += 1 73 | lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) 74 | 75 | inputs_data, inputs_label = inputs_target_tr.cuda(), labels_target_tr.cuda() 76 | _, output = netC(netF(inputs_data)) 77 | classifier_loss = nn.CrossEntropyLoss()(output, inputs_label) 78 | 79 | optimizer.zero_grad() 80 | classifier_loss.backward() 81 | optimizer.step() 82 | 83 | if iter_num % (interval_iter) == 0 or iter_num == max_iter: 84 | netC.eval() 85 | 86 | acc_t_te,_ = utils.cal_acc_noimg(dset_loaders["Target"], netF, netC) 87 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 88 | print(log_str) 89 | 90 | netC.train() 91 | 92 | return acc_t_te 93 | 94 | 95 | if __name__ == "__main__": 96 | data_name = 'SEED' 97 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 98 | focus_domain_idx = [0, 1, 2] 99 | domain_list = ['S' + str(i) for i in focus_domain_idx] 100 | num_domain = len(domain_list) 101 | 102 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, 103 | epsilon=1e-05, layer='wn', smooth=0, is_save=False, 104 | N=num_domain, chn=chn, trial=trial_num, class_num=class_num) 105 | 106 | args.dset = data_name 107 | args.method = 'Finetune' 108 | args.backbone = 'ShallowNet' 109 | args.batch_size = 32 110 | args.max_epoch = 50 111 | args.input_dim = 310 112 | args.norm = 'zscore' 113 | args.bz_tar_tr = int(args.batch_size / 2) 114 | args.bz_tar_te = args.batch_size 115 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 116 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 117 | 118 | os.environ["CUDA_VISIBLE_DEVICES"] = '2' 119 | args.data_env = 'gpu' # 'local' 120 | args.seed = 2022 # 2021~2023 repeat three times 121 | fix_random_seed(args.seed) 122 | tr.backends.cudnn.deterministic = True 123 | 124 | noise_list = np.linspace(0, 100, 11).tolist() 125 | num_test = len(noise_list) 126 | acc_all = np.zeros(num_test) 127 | s, t = 0, 1 128 | for ns in range(num_test): 129 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 130 | dset_n = args.dset + '_' + str(args.noise_rate) 131 | print(dset_n, args.method) 132 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 133 | print(info_str) 134 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 135 | args.task_str = domain_list[s] + '_' + domain_list[t] 136 | 137 | mdl_path = 'outputs/models/' 138 | args.output = mdl_path + dset_n + '/source/' 139 | args.name_src = domain_list[s] 140 | args.output_dir_src = osp.join(args.output, args.name_src) 141 | print(args) 142 | 143 | acc_all[ns] = train_source_test_target(args) 144 | print('\nSub acc: ', np.round(acc_all, 2)) 145 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 146 | 147 | acc_sub_str = str(np.round(acc_all, 2).tolist()) 148 | acc_mean_str = str(np.round(np.mean(acc_all), 2).tolist()) 149 | 150 | 151 | -------------------------------------------------------------------------------- /NT_Noise/demo_uda_seed_dan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.dataloader import read_seed_src_tar 12 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 13 | from utils.loss import MultipleKernelMaximumMeanDiscrepancy, GaussianKernel 14 | 15 | 16 | def train_target(args): 17 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 18 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 25 | 26 | max_iter = args.max_epoch * len(dset_loaders["source"]) 27 | interval_iter = max_iter // 10 28 | args.max_iter = max_iter 29 | iter_num = 0 30 | base_network.train() 31 | 32 | while iter_num < max_iter: 33 | try: 34 | inputs_source, labels_source = iter_source.next() 35 | except: 36 | iter_source = iter(dset_loaders["source"]) 37 | inputs_source, labels_source = iter_source.next() 38 | 39 | try: 40 | inputs_target, _ = iter_target.next() 41 | except: 42 | iter_target = iter(dset_loaders["target"]) 43 | inputs_target, _ = iter_target.next() 44 | 45 | if inputs_source.size(0) == 1: 46 | continue 47 | 48 | iter_num += 1 49 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 50 | 51 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 52 | features_source, outputs_source = base_network(inputs_source) 53 | features_target, outputs_target = base_network(inputs_target) 54 | 55 | # new version img loss 56 | args.non_linear = False 57 | args.trade_off = 1.0 58 | classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) 59 | mkmmd_loss = MultipleKernelMaximumMeanDiscrepancy( 60 | kernels=[GaussianKernel(alpha=2 ** k) for k in range(-3, 2)], 61 | linear=not args.non_linear 62 | ) 63 | discrepancy_loss = mkmmd_loss(features_source, features_target) 64 | total_loss = classifier_loss + discrepancy_loss * args.trade_off 65 | 66 | optimizer.zero_grad() 67 | total_loss.backward() 68 | optimizer.step() 69 | 70 | if iter_num % interval_iter == 0 or iter_num == max_iter: 71 | base_network.eval() 72 | 73 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 74 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 75 | print(log_str) 76 | 77 | base_network.train() 78 | 79 | return acc_t_te 80 | 81 | 82 | if __name__ == '__main__': 83 | 84 | data_name = 'SEED' 85 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 86 | focus_domain_idx = [0, 1, 2] 87 | domain_list = ['S' + str(i) for i in focus_domain_idx] 88 | num_domain = len(domain_list) 89 | 90 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 91 | epsilon=1e-05, layer='wn', smooth=0, 92 | N=num_domain, chn=chn, class_num=class_num) 93 | 94 | args.dset = data_name 95 | args.method = 'DAN' 96 | args.backbone = 'ShallowNet' 97 | args.batch_size = 32 98 | args.max_epoch = 50 99 | args.input_dim = 310 100 | args.norm = 'zscore' 101 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 102 | 103 | os.environ["CUDA_VISIBLE_DEVICES"] = '5' 104 | args.data_env = 'gpu' # 'local' 105 | args.seed = 2022 # 2021~2023 repeat three times 106 | fix_random_seed(args.seed) 107 | tr.backends.cudnn.deterministic = True 108 | 109 | noise_list = np.linspace(0, 100, 11).tolist() 110 | num_test = len(noise_list) 111 | acc_all = np.zeros(num_test) 112 | s, t = 0, 1 113 | for ns in range(num_test): 114 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 115 | dset_n = args.dset + '_' + str(args.noise_rate) 116 | print(dset_n, args.method) 117 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 118 | print(info_str) 119 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 120 | args.task_str = domain_list[s] + '_' + domain_list[t] 121 | print(args) 122 | 123 | acc_all[ns] = train_target(args) 124 | print('\nSub acc: ', np.round(acc_all, 3)) 125 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 126 | 127 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 128 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 129 | 130 | -------------------------------------------------------------------------------- /NT_Noise/demo_uda_seed_dann.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.dataloader import read_seed_src_tar 12 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 13 | from utils.loss import CELabelSmooth, ReverseLayerF 14 | 15 | 16 | def train_target(args): 17 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 18 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | 25 | args.max_iter = args.max_epoch * len(dset_loaders["source"]) 26 | 27 | ad_net = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 28 | ad_net.load_state_dict(tr.load(args.mdl_init_dir + 'netD_clf.pt')) 29 | 30 | optimizer_f = optim.SGD(netF.parameters(), lr=args.lr) 31 | optimizer_c = optim.SGD(netC.parameters(), lr=args.lr) 32 | optimizer_d = optim.SGD(ad_net.parameters(), lr=args.lr) 33 | 34 | max_iter = args.max_epoch * len(dset_loaders["source"]) 35 | interval_iter = max_iter // 10 36 | args.max_iter = max_iter 37 | iter_num = 0 38 | base_network.train() 39 | 40 | while iter_num < max_iter: 41 | try: 42 | inputs_source, labels_source = iter_source.next() 43 | except: 44 | iter_source = iter(dset_loaders["source"]) 45 | inputs_source, labels_source = iter_source.next() 46 | 47 | try: 48 | inputs_target, _ = iter_target.next() 49 | except: 50 | iter_target = iter(dset_loaders["target"]) 51 | inputs_target, _ = iter_target.next() 52 | 53 | if inputs_source.size(0) == 1: 54 | continue 55 | 56 | iter_num += 1 57 | lr_scheduler_full(optimizer_f, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 58 | lr_scheduler_full(optimizer_c, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 59 | lr_scheduler_full(optimizer_d, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 60 | 61 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 62 | features_source, outputs_source = base_network(inputs_source) 63 | features_target, outputs_target = base_network(inputs_target) 64 | 65 | # new version img loss 66 | p = float(iter_num) / max_iter 67 | alpha = 2. / (1. + np.exp(-10 * p)) - 1 68 | reverse_source, reverse_target = ReverseLayerF.apply(features_source, alpha), ReverseLayerF.apply( 69 | features_target, 70 | alpha) 71 | _, domain_output_s = ad_net(reverse_source) 72 | _, domain_output_t = ad_net(reverse_target) 73 | domain_label_s = tr.ones(inputs_source.size()[0]).long().cuda() 74 | domain_label_t = tr.zeros(inputs_target.size()[0]).long().cuda() 75 | 76 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) 77 | adv_loss = nn.CrossEntropyLoss()(domain_output_s, domain_label_s) + nn.CrossEntropyLoss()(domain_output_t, 78 | domain_label_t) 79 | total_loss = classifier_loss + adv_loss 80 | 81 | optimizer_f.zero_grad() 82 | optimizer_c.zero_grad() 83 | optimizer_d.zero_grad() 84 | total_loss.backward() 85 | optimizer_f.step() 86 | optimizer_c.step() 87 | optimizer_d.step() 88 | 89 | if iter_num % interval_iter == 0 or iter_num == max_iter: 90 | base_network.eval() 91 | 92 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 93 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 94 | print(log_str) 95 | 96 | base_network.train() 97 | 98 | return acc_t_te 99 | 100 | 101 | if __name__ == '__main__': 102 | 103 | data_name = 'SEED' 104 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 105 | focus_domain_idx = [0, 1, 2] 106 | domain_list = ['S' + str(i) for i in focus_domain_idx] 107 | num_domain = len(domain_list) 108 | 109 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 110 | epsilon=1e-05, layer='wn', smooth=0, 111 | N=num_domain, chn=chn, class_num=class_num) 112 | 113 | args.dset = data_name 114 | args.method = 'DANN' 115 | args.backbone = 'ShallowNet' 116 | args.batch_size = 32 # 32 117 | args.max_epoch = 50 # 50 enough to converge 118 | args.input_dim = 310 119 | args.norm = 'zscore' 120 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 121 | 122 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 123 | args.data_env = 'gpu' # 'local' 124 | args.seed = 2022 # 2021~2023 repeat three times 125 | fix_random_seed(args.seed) 126 | tr.backends.cudnn.deterministic = True 127 | 128 | noise_list = np.linspace(0, 100, 11).tolist() 129 | num_test = len(noise_list) 130 | acc_all = np.zeros(num_test) 131 | s, t = 0, 1 132 | for ns in range(num_test): 133 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 134 | dset_n = args.dset + '_' + str(args.noise_rate) 135 | print(dset_n, args.method) 136 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 137 | print(info_str) 138 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 139 | args.task_str = domain_list[s] + '_' + domain_list[t] 140 | print(args) 141 | 142 | acc_all[ns] = train_target(args) 143 | print('\nSub acc: ', np.round(acc_all, 3)) 144 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 145 | 146 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 147 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 148 | 149 | -------------------------------------------------------------------------------- /NT_Noise/demo_uda_seed_dnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, utils 11 | from utils.dataloader import read_seed_src_tar 12 | from utils.utils import fix_random_seed, lr_scheduler_full, data_load_noimg 13 | 14 | 15 | def train_source_test_target(args): 16 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 17 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 18 | 19 | netF, netC = network.backbone_net(args, args.bottleneck) 20 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 21 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 22 | base_network = nn.Sequential(netF, netC) 23 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 24 | 25 | acc_init = 0 26 | max_iter = args.max_epoch * len(dset_loaders["source_tr"]) 27 | interval_iter = max_iter // 10 28 | args.max_iter = max_iter 29 | iter_num = 0 30 | base_network.train() 31 | 32 | while iter_num < max_iter: 33 | try: 34 | inputs_source, labels_source = source_loader_iter.next() 35 | except: 36 | source_loader_iter = iter(dset_loaders["source_tr"]) 37 | inputs_source, labels_source = source_loader_iter.next() 38 | 39 | if inputs_source.size(0) == 1: 40 | continue 41 | 42 | iter_num += 1 43 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 44 | 45 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 46 | features_source, outputs_source = base_network(inputs_source) 47 | 48 | # # CE smooth loss 49 | # classifier_loss = loss.CELabelSmooth(reduction='none', num_classes=class_num, epsilon=args.smooth)( 50 | # outputs_source, labels_source) 51 | classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) 52 | 53 | optimizer.zero_grad() 54 | classifier_loss.backward() 55 | optimizer.step() 56 | 57 | if iter_num % interval_iter == 0 or iter_num == max_iter: 58 | base_network.eval() 59 | 60 | acc_s_te = utils.cal_acc_base(dset_loaders["source_te"], base_network) 61 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 62 | log_str = 'Task: {}, Iter:{}/{}; Val_acc = {:.2f}%; Test_Acc = {:.2f}%'.format(args.task_str, iter_num, 63 | max_iter, acc_s_te, acc_t_te) 64 | print(log_str) 65 | base_network.train() 66 | 67 | if acc_s_te >= acc_init: 68 | acc_init = acc_s_te 69 | acc_tar_src_best = acc_t_te 70 | 71 | return acc_tar_src_best 72 | 73 | 74 | if __name__ == '__main__': 75 | 76 | data_name = 'SEED' 77 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 78 | focus_domain_idx = [0, 1, 2] 79 | domain_list = ['S' + str(i) for i in focus_domain_idx] 80 | num_domain = len(domain_list) 81 | 82 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 83 | epsilon=1e-05, layer='wn', smooth=0, is_save=False, 84 | N=num_domain, chn=chn, trial=trial_num, class_num=class_num) 85 | 86 | args.dset = data_name 87 | args.method = 'DNN' 88 | args.backbone = 'ShallowNet' 89 | args.batch_size = 32 # 32 90 | args.max_epoch = 50 # 50 91 | args.input_dim = 310 92 | args.norm = 'zscore' 93 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 94 | 95 | os.environ["CUDA_VISIBLE_DEVICES"] = '5' 96 | args.data_env = 'gpu' # 'local' 97 | args.seed = 2022 # 2021~2023 repeat three times 98 | fix_random_seed(args.seed) 99 | tr.backends.cudnn.deterministic = True 100 | print(args) 101 | 102 | noise_list = np.linspace(0, 100, 11).tolist() 103 | num_test = len(noise_list) 104 | acc_all = np.zeros(num_test) 105 | s, t = 0, 1 106 | for ns in range(num_test): 107 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 108 | dset_n = args.dset + '_' + str(args.noise_rate) 109 | print(dset_n, args.method) 110 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 111 | print(info_str) 112 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 113 | args.task_str = domain_list[s] + '_' + domain_list[t] 114 | print(args) 115 | 116 | acc_all[ns] = train_source_test_target(args) 117 | print('\nSub acc: ', np.round(acc_all, 3)) 118 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 119 | 120 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 121 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 122 | 123 | -------------------------------------------------------------------------------- /NT_Noise/demo_uda_seed_mcc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.dataloader import read_seed_src_tar 12 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 13 | from utils.loss import ClassConfusionLoss, CELabelSmooth 14 | 15 | 16 | def train_target(args): 17 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 18 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 25 | 26 | max_len = max(len(dset_loaders["source"]), len(dset_loaders["target"])) 27 | args.max_iter = args.max_epoch * max_len 28 | 29 | max_iter = args.max_epoch * len(dset_loaders["source"]) 30 | interval_iter = max_iter // 10 31 | args.max_iter = max_iter 32 | iter_num = 0 33 | base_network.train() 34 | 35 | while iter_num < max_iter: 36 | try: 37 | inputs_source, labels_source = iter_source.next() 38 | except: 39 | iter_source = iter(dset_loaders["source"]) 40 | inputs_source, labels_source = iter_source.next() 41 | 42 | try: 43 | inputs_target, _ = iter_target.next() 44 | except: 45 | iter_target = iter(dset_loaders["target"]) 46 | inputs_target, _ = iter_target.next() 47 | 48 | if inputs_source.size(0) == 1: 49 | continue 50 | 51 | iter_num += 1 52 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 53 | 54 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 55 | features_source, outputs_source = base_network(inputs_source) 56 | features_target, outputs_target = base_network(inputs_target) 57 | 58 | # new version img loss 59 | # p = float(iter_num) / max_iter 60 | # alpha = 2. / (1. + np.exp(-10 * p)) - 1 61 | args.loss_trade_off = 1.0 62 | args.t_mcc = 2 63 | transfer_loss = ClassConfusionLoss(t=args.t_mcc)(outputs_target) 64 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) 65 | total_loss = args.loss_trade_off * transfer_loss + classifier_loss 66 | 67 | optimizer.zero_grad() 68 | total_loss.backward() 69 | optimizer.step() 70 | 71 | if iter_num % interval_iter == 0 or iter_num == max_iter: 72 | base_network.eval() 73 | 74 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 75 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 76 | print(log_str) 77 | 78 | base_network.train() 79 | 80 | return acc_t_te 81 | 82 | 83 | if __name__ == '__main__': 84 | 85 | data_name = 'SEED' 86 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 87 | focus_domain_idx = [0, 1, 2] 88 | domain_list = ['S' + str(i) for i in focus_domain_idx] 89 | num_domain = len(domain_list) 90 | 91 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 92 | epsilon=1e-05, layer='wn', smooth=0, 93 | N=num_domain, chn=chn, class_num=class_num) 94 | 95 | args.dset = data_name 96 | args.method = 'MCC' 97 | args.backbone = 'ShallowNet' 98 | args.batch_size = 32 # 32 99 | args.max_epoch = 50 # 50 100 | args.input_dim = 310 101 | args.norm = 'zscore' 102 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 103 | 104 | os.environ["CUDA_VISIBLE_DEVICES"] = '6' 105 | args.data_env = 'gpu' # 'local' 106 | args.seed = 2022 # 2021~2023 repeat three times 107 | fix_random_seed(args.seed) 108 | tr.backends.cudnn.deterministic = True 109 | 110 | noise_list = np.linspace(0, 100, 11).tolist() 111 | num_test = len(noise_list) 112 | acc_all = np.zeros(num_test) 113 | s, t = 0, 1 114 | for ns in range(num_test): 115 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 116 | dset_n = args.dset + '_' + str(args.noise_rate) 117 | print(dset_n, args.method) 118 | info_str = '\nnoise %s: %s --> %s' % (str(noise_list[ns]), domain_list[s], domain_list[t]) 119 | print(info_str) 120 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 121 | args.task_str = domain_list[s] + '_' + domain_list[t] 122 | print(args) 123 | 124 | acc_all[ns] = train_target(args) 125 | print('\nSub acc: ', np.round(acc_all, 3)) 126 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 127 | 128 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 129 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 130 | -------------------------------------------------------------------------------- /NT_Noise/save_init_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import random 7 | import os.path as osp 8 | import torch as tr 9 | import numpy as np 10 | import utils.network as network 11 | 12 | 13 | def create_folder(output_dir): 14 | 15 | if not osp.exists(output_dir): 16 | os.system('mkdir -p ' + output_dir) 17 | if not osp.exists(output_dir): 18 | os.mkdir(output_dir) 19 | 20 | 21 | if __name__ == '__main__': 22 | seed = 2022 23 | tr.manual_seed(seed) 24 | tr.cuda.manual_seed(seed) 25 | np.random.seed(seed) 26 | random.seed(seed) 27 | tr.cuda.manual_seed_all(seed) 28 | tr.backends.cudnn.deterministic = True 29 | mdl_init_dir = 'outputs/mdl_init/' 30 | dset_list = ['DomainNet', 'SEED'] 31 | 32 | ################################################################################### 33 | # Img data 34 | args = argparse.Namespace(bottleneck=1024, net='resnet50', layer='wn', classifier='bn') 35 | args.class_num = 40 36 | output_dir = osp.join(mdl_init_dir, dset_list[0]) 37 | create_folder(output_dir) 38 | 39 | if args.net[0:3] == 'res': 40 | netF = network.ResBase(res_name=args.net).cuda() 41 | elif args.net[0:3] == 'vgg': 42 | netF = network.VGGBase(vgg_name=args.net).cuda() 43 | netB = network.feat_bottleneck(type=args.classifier, feature_dim=netF.in_features, 44 | bottleneck_dim=args.bottleneck).cuda() 45 | netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() 46 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 47 | netD_full = network.AdversarialNetwork(args.bottleneck, 2048).cuda() 48 | 49 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 50 | tr.save(netB.state_dict(), osp.join(output_dir, "netB.pt")) 51 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 52 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 53 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 54 | # netF.load_state_dict(tr.load(osp.join(output_dir, "netF.pt"))) 55 | print('\nfinished init of DomainNet data...') 56 | 57 | ################################################################################### 58 | # SEED data 59 | args = argparse.Namespace(bottleneck=64, backbone='ShallowNet', layer='wn') 60 | args.input_dim = 310 61 | args.class_num = 3 62 | output_dir = osp.join(mdl_init_dir, dset_list[1]) 63 | create_folder(output_dir) 64 | 65 | netF, netC = network.backbone_net(args, args.bottleneck) 66 | netD_full = network.AdversarialNetwork(args.bottleneck, 20).cuda() 67 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 68 | 69 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 70 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 71 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 72 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 73 | print('\nfinished init of seed data...') 74 | -------------------------------------------------------------------------------- /NT_Noise/source_train_seed.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import torch as tr 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | import torch.utils.data as Data 10 | import os.path as osp 11 | import os 12 | from utils import network, loss, utils 13 | from utils.loss import CELabelSmooth 14 | from utils.dataloader import read_seed_single, obtain_train_val_source 15 | from utils.utils import create_folder, lr_scheduler_full, fix_random_seed, add_label_noise_noimg 16 | 17 | 18 | def data_load(X, y, args): 19 | dset_loaders = {} 20 | train_bs = args.batch_size 21 | tr.manual_seed(args.seed) 22 | trial_ins_num = args.trial 23 | 24 | if args.noise_rate > 0: 25 | y = add_label_noise_noimg(y, args.seed, args.class_num, args.noise_rate) 26 | 27 | id_train, id_val = obtain_train_val_source(y, trial_ins_num, args.validation) 28 | source_tr = Data.TensorDataset(X[id_train, :], y[id_train]) 29 | dset_loaders['source_tr'] = Data.DataLoader(source_tr, batch_size=train_bs, shuffle=True, drop_last=True) 30 | 31 | source_te = Data.TensorDataset(X[id_val, :], y[id_val]) 32 | dset_loaders['source_te'] = Data.DataLoader(source_te, batch_size=train_bs * 3, shuffle=False, drop_last=False) 33 | 34 | return dset_loaders 35 | 36 | 37 | def train_source(args): # within validation 38 | X_src, y_src = read_seed_single(args, args.src) 39 | dset_loaders = data_load(X_src, y_src, args) 40 | 41 | netF, netC = network.backbone_net(args, args.bottleneck) 42 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 43 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 44 | base_network = nn.Sequential(netF, netC) 45 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 46 | 47 | acc_init = 0 48 | max_iter = args.max_epoch * len(dset_loaders["source_tr"]) # source_tr:80个 49 | interval_iter = max_iter // 10 50 | args.max_iter = max_iter 51 | iter_num = 0 52 | 53 | netF.train() 54 | netC.train() 55 | 56 | while iter_num < max_iter: 57 | try: 58 | inputs_source, labels_source = iter_source.next() 59 | except: 60 | iter_source = iter(dset_loaders['source_tr']) 61 | inputs_source, labels_source = iter_source.next() 62 | 63 | if inputs_source.size(0) == 1: 64 | continue 65 | 66 | iter_num += 1 67 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 68 | 69 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 70 | 71 | _, outputs_source = netC(netF(inputs_source)) 72 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, 73 | labels_source) 74 | 75 | optimizer.zero_grad() 76 | classifier_loss.backward() 77 | optimizer.step() 78 | 79 | if iter_num % interval_iter == 0 or iter_num == max_iter: 80 | netF.eval() 81 | netC.eval() 82 | 83 | acc_s_te, _ = utils.cal_acc_noimg(dset_loaders['source_te'], netF, netC) 84 | log_str = 'Task: {}, Iter:{}/{}; Val_acc = {:.2f}%'.format(args.name_src, iter_num, max_iter, acc_s_te) 85 | print(log_str) 86 | 87 | if acc_s_te >= acc_init: 88 | acc_init = acc_s_te 89 | best_netF = netF.state_dict() 90 | best_netC = netC.state_dict() 91 | 92 | netF.train() 93 | netC.train() 94 | 95 | tr.save(best_netF, osp.join(args.output_dir_src, "source_F.pt")) 96 | tr.save(best_netC, osp.join(args.output_dir_src, "source_C.pt")) 97 | 98 | return acc_s_te 99 | 100 | 101 | if __name__ == '__main__': 102 | 103 | data_name = 'SEED' 104 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 105 | focus_domain_idx = [0, 1, 2] 106 | domain_list = ['S' + str(i) for i in focus_domain_idx] 107 | num_domain = len(domain_list) 108 | 109 | args = argparse.Namespace(bottleneck=64, lr=0.01, epsilon=1e-05, layer='wn', 110 | smooth=0, chn=chn, trial=trial_num, 111 | N=num_domain, class_num=class_num) 112 | args.dset = data_name 113 | args.method = 'single' 114 | args.backbone = 'ShallowNet' 115 | args.batch_size = 32 # 32 116 | args.max_epoch = 50 117 | args.input_dim = 310 118 | args.norm = 'zscore' 119 | args.validation = 'random' 120 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 121 | 122 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 123 | args.data_env = 'gpu' # 'local' 124 | args.seed = 2021 125 | fix_random_seed(args.seed) 126 | tr.backends.cudnn.deterministic = True 127 | 128 | args.local_dir = r'/mnt/ssd2/wenz/code/NT-Benchmark/NT_UDA/' 129 | args.result_dir = 'results/source/' 130 | 131 | noise_list = np.linspace(0, 100, 11).tolist() 132 | num_test = len(noise_list) 133 | acc_all = np.zeros(num_test) 134 | s = 0 135 | for ns in range(num_test): 136 | args.noise_rate = np.round(noise_list[ns] / 100, 2) 137 | dset_n = args.dset + '_' + str(args.noise_rate) 138 | args.src = focus_domain_idx[s] 139 | info_str = '========================== Within domain ' + domain_list[s] + ' ==========================' 140 | print('\n', dset_n, args.method) 141 | print(info_str) 142 | 143 | mdl_path = 'outputs/models/' 144 | args.output = mdl_path + dset_n + '/source/' 145 | 146 | args.name_src = domain_list[s] 147 | args.output_dir_src = osp.join(args.output, args.name_src) 148 | create_folder(args.output_dir_src, args.data_env, args.local_dir) 149 | print(args) 150 | 151 | acc_all[ns] = train_source(args) 152 | print(np.round(acc_all, 2)) 153 | print(np.round(np.mean(acc_all), 2)) 154 | -------------------------------------------------------------------------------- /NT_Noise/utils/LogRecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import os.path as osp 5 | from datetime import datetime 6 | from datetime import timedelta, timezone 7 | from utils.utils import create_folder 8 | 9 | 10 | class LogRecord: 11 | def __init__(self, args): 12 | self.args = args 13 | self.result_dir = args.result_dir 14 | self.data_env = 'gpu' 15 | self.data_name = args.dset 16 | self.method = args.method 17 | 18 | def log_init(self): 19 | create_folder(self.result_dir, self.args.data_env, self.args.local_dir) 20 | 21 | if self.data_env == 'local': 22 | time_str = datetime.utcnow().replace(tzinfo=timezone.utc).astimezone( 23 | timezone(timedelta(hours=8), name='Asia/Shanghai')).strftime("%Y-%m-%d_%H_%M_%S") 24 | if self.data_env == 'gpu': 25 | time_str = datetime.utcnow().replace(tzinfo=timezone.utc).strftime("%Y-%m-%d_%H_%M_%S") 26 | file_name_head = 'log_' + self.method + '_' + self.data_name + '_' 27 | self.args.out_file = open(osp.join(self.args.result_dir, file_name_head + time_str + '.txt'), 'w') 28 | self.args.out_file.write(self._print_args() + '\n') 29 | self.args.out_file.flush() 30 | return self.args 31 | 32 | def record(self, log_str): 33 | self.args.out_file.write(log_str + '\n') 34 | self.args.out_file.flush() 35 | return self.args 36 | 37 | def _print_args(self): 38 | s = "==========================================\n" 39 | for arg, content in self.args.__dict__.items(): 40 | s += "{}:{}\n".format(arg, content) 41 | return s 42 | -------------------------------------------------------------------------------- /NT_Noise/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022/1/11 11:43 下午 3 | # @Author : wenzhang 4 | # @File : __init__.py.py 5 | -------------------------------------------------------------------------------- /NT_Noise/utils/data_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from PIL import Image 6 | from torch.utils.data import Dataset 7 | 8 | 9 | def make_dataset(image_list, labels): 10 | if labels: 11 | len_ = len(image_list) 12 | images = [(image_list[i].strip(), labels[i, :]) for i in range(len_)] 13 | else: 14 | if len(image_list[0].split('==')) > 2: 15 | images = [(val.split('==')[0], np.array([int(la) for la in val.split('==')[1:]])) for val in image_list] 16 | else: 17 | images = [(val.split('==')[0], int(val.split('==')[1])) for val in image_list] 18 | return images 19 | 20 | 21 | def rgb_loader(path): 22 | with open(path, 'rb') as f: 23 | with Image.open(f) as img: 24 | return img.convert('RGB') 25 | 26 | 27 | def l_loader(path): 28 | with open(path, 'rb') as f: 29 | with Image.open(f) as img: 30 | return img.convert('L') 31 | 32 | 33 | class ImageList(Dataset): 34 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB', weight=None): 35 | imgs = make_dataset(image_list, labels) 36 | if len(imgs) == 0: 37 | raise (RuntimeError("Found 0 images in subfolders")) 38 | 39 | self.imgs = imgs 40 | self.transform = transform 41 | self.target_transform = target_transform 42 | if mode == 'RGB': 43 | self.loader = rgb_loader 44 | elif mode == 'L': 45 | self.loader = l_loader 46 | self.weight = weight 47 | 48 | def __getitem__(self, index): 49 | 50 | path, target = self.imgs[index] 51 | img = self.loader(path) 52 | if self.transform is not None: 53 | img = self.transform(img) 54 | if self.target_transform is not None: 55 | target = self.target_transform(target) 56 | if self.weight is None: 57 | return img, target 58 | else: 59 | return img, target, self.weight[index] 60 | 61 | def __len__(self): 62 | return len(self.imgs) 63 | 64 | 65 | class ImageList_idx(Dataset): 66 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB'): 67 | imgs = make_dataset(image_list, labels) 68 | if len(imgs) == 0: 69 | raise (RuntimeError("Found 0 images in subfolders of: " + "\n")) 70 | 71 | self.imgs = imgs 72 | self.transform = transform 73 | self.target_transform = target_transform 74 | if mode == 'RGB': 75 | self.loader = rgb_loader 76 | elif mode == 'L': 77 | self.loader = l_loader 78 | 79 | def __getitem__(self, index): 80 | path, target = self.imgs[index] 81 | img = self.loader(path) 82 | if self.transform is not None: 83 | img = self.transform(img) 84 | if self.target_transform is not None: 85 | target = self.target_transform(target) 86 | 87 | return img, target, index 88 | 89 | def __len__(self): 90 | return len(self.imgs) 91 | 92 | 93 | class ImageList_twice(Dataset): 94 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB'): 95 | imgs = make_dataset(image_list, labels) 96 | if len(imgs) == 0: 97 | raise (RuntimeError("Found 0 images in subfolders")) 98 | 99 | self.imgs = imgs 100 | self.transform = transform 101 | self.target_transform = target_transform 102 | if mode == 'RGB': 103 | self.loader = rgb_loader 104 | elif mode == 'L': 105 | self.loader = l_loader 106 | 107 | def __getitem__(self, index): 108 | path, target = self.imgs[index] 109 | img = self.loader(path) 110 | if self.target_transform is not None: 111 | target = self.target_transform(target) 112 | if self.transform is not None: 113 | if type(self.transform).__name__ == 'list': 114 | img = [t(img) for t in self.transform] 115 | else: 116 | img = self.transform(img) 117 | 118 | return img, target 119 | 120 | def __len__(self): 121 | return len(self.imgs) 122 | -------------------------------------------------------------------------------- /NT_Noise/utils/dataloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import pandas as pd 5 | import torch as tr 6 | from torch.autograd import Variable 7 | import numpy as np 8 | from sklearn import preprocessing 9 | 10 | 11 | def read_syn_single(args, sub_idx): 12 | root_path = args.root_path 13 | pd_tar = pd.read_csv(root_path + sub_idx + ".csv", header=None) 14 | X, Y = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 15 | X = Variable(tr.from_numpy(X).float()) 16 | Y = tr.from_numpy(Y).long() 17 | 18 | return X, Y 19 | 20 | 21 | def read_syn_src_tar(args): 22 | root_path = args.root_path 23 | pd_src = pd.read_csv(root_path + args.src + ".csv", header=None) 24 | Xs, Ys = pd_src.iloc[:, :2].values, pd_src.iloc[:, 2].values.astype(int) 25 | pd_tar = pd.read_csv(root_path + args.tar + ".csv", header=None) 26 | Xt, Yt = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 27 | Xs = Variable(tr.from_numpy(Xs).float()) 28 | Ys = tr.from_numpy(Ys).long() 29 | Xt = Variable(tr.from_numpy(Xt).float()) 30 | Yt = tr.from_numpy(Yt).long() 31 | 32 | return Xs, Ys, Xt, Yt 33 | 34 | 35 | def data_normalize(fea_de, norm_type): 36 | if norm_type == 'zscore': 37 | zscore = preprocessing.StandardScaler() 38 | fea_de = zscore.fit_transform(fea_de) 39 | return fea_de 40 | 41 | 42 | def read_seed_single(args, sub_idx): 43 | # (15, 3394, 310) (15, 3394) 44 | if args.data_env == 'local': 45 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 46 | if args.data_env == 'gpu': 47 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 48 | 49 | MI = np.load(file) 50 | Data_raw, Label = MI['data'], MI['label'] 51 | 52 | # source sub 53 | fea_de = np.squeeze(Data_raw[sub_idx, :, :]) 54 | fea_de = data_normalize(fea_de, args.norm) 55 | fea_de = Variable(tr.from_numpy(fea_de).float()) 56 | 57 | sub_label = np.squeeze(Label[sub_idx, :]) 58 | sub_label = tr.from_numpy(sub_label).long() 59 | print(fea_de.shape, sub_label.shape) 60 | 61 | return fea_de, sub_label 62 | 63 | 64 | def read_seed_src_tar(args): 65 | # (15, 3394, 310) (15, 3394) 66 | if args.data_env == 'local': 67 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 68 | if args.data_env == 'gpu': 69 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 70 | 71 | MI = np.load(file) 72 | Data_raw, Label = MI['data'], MI['label'] 73 | 74 | src_data = np.squeeze(Data_raw[args.src, :, :]) 75 | src_data = data_normalize(src_data, args.norm) 76 | src_data = Variable(tr.from_numpy(src_data).float()) 77 | src_label = np.squeeze(Label[args.src, :]) 78 | src_label = tr.from_numpy(src_label).long() 79 | 80 | # target sub 81 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 82 | tar_data = data_normalize(tar_data, args.norm) 83 | tar_data = Variable(tr.from_numpy(tar_data).float()) 84 | tar_label = np.squeeze(Label[args.tar, :]) 85 | tar_label = tr.from_numpy(tar_label).long() 86 | print(tar_data.shape, tar_label.shape) 87 | 88 | return src_data, src_label, tar_data, tar_label 89 | 90 | 91 | def obtain_train_val_source(y_array, trial_ins_num, val_type): 92 | y_array = y_array.numpy() 93 | ins_num_all = len(y_array) 94 | src_idx = range(ins_num_all) 95 | 96 | if val_type == 'random': 97 | num_train = int(0.9 * len(src_idx)) 98 | id_train, id_val = tr.utils.data.random_split(src_idx, [num_train, len(src_idx) - num_train]) 99 | 100 | return id_train, id_val 101 | -------------------------------------------------------------------------------- /NT_Noise/utils/generate_data_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import os 5 | import sys 6 | import random 7 | import numpy as np 8 | import os.path as osp 9 | 10 | sys.path.append("..") 11 | fix_seed = 2022 12 | 13 | 14 | def generate(dir, use_path, txt_path, label, sample_rate=1): 15 | files = os.listdir(dir) 16 | files.sort() 17 | 18 | if sample_rate < 1: 19 | select_num = int(len(files) * sample_rate) 20 | raw_idx = np.arange(len(files)) 21 | random.seed(fix_seed) 22 | random.shuffle(raw_idx) 23 | select_idx = raw_idx[:select_num].tolist() 24 | files = np.array(files.copy())[select_idx].tolist() 25 | files.sort() 26 | 27 | total_num = len(files) 28 | # print(total_num) 29 | 30 | listText = open(txt_path, 'a') 31 | num = 0 32 | for file in files: 33 | num += 1 34 | fileType = os.path.split(file) 35 | if fileType[1] == '.txt': 36 | continue 37 | name = use_path + file + '==' + str(int(label)) + '\n' 38 | if num < total_num + 1: 39 | listText.write(name) 40 | listText.close() 41 | 42 | return total_num 43 | 44 | 45 | def check_class_ins_num(domain_list, folderlist): 46 | min_class_num_list = [] 47 | for name in domain_list: 48 | print('\nreading...', name) 49 | txt_path = out_path_root + dset + '/' + name + '_list.txt' 50 | 51 | class_list = [] 52 | for line in open(txt_path): 53 | class_list.append(line.split('/' + name + '/')[1].split('/')[0]) 54 | 55 | class_list = np.array(class_list) 56 | class_num_list = [np.sum(class_list == cn) for cn in folderlist] 57 | min_class_num_list.append(min(class_num_list)) 58 | print('min class ins_num', min(class_num_list)) 59 | print(min_class_num_list) 60 | 61 | 62 | if __name__ == "__main__": 63 | root = "/mnt/ssd2/wenz/data/" 64 | out_path_root = '../checkpoint/' 65 | 66 | dset = 'VisDA17' 67 | if dset == 'office': 68 | domain_list = ['amazon', 'dslr', 'webcam'] 69 | if dset == 'office-home': 70 | domain_list = ['Art', 'Clipart', 'Product', 'RealWorld'] 71 | if dset == 'office-caltech': 72 | domain_list = ['amazon', 'caltech', 'dslr', 'webcam'] 73 | if dset == 'VisDA17': 74 | domain_list = ['train', 'validation'] 75 | if dset == 'DomainNet': 76 | domain_list = ['clipart', 'infograph', 'painting', 'quickdraw', 'real', 'sketch'] 77 | 78 | save_path = out_path_root + dset 79 | if not osp.exists(save_path): 80 | os.system('mkdir -p ' + save_path) 81 | if not osp.exists(save_path): 82 | os.mkdir(save_path) 83 | 84 | # 40 classes refer: 85 | # SENTRY: Selective entropy optimization via committee consistency 86 | # for unsupervised domain adaptation." ICCV. 2021. 87 | if dset == 'DomainNet': 88 | folderlist = ['airplane', 'ambulance', 'apple', 'backpack', 'banana', 'bathtub', 'bear', 'bed', 'bee', 89 | 'bicycle', 'bird', 'book', 'bridge', 'bus', 'butterfly', 'cake', 'calculator', 'camera', 'car', 90 | 'cat', 'chair', 'clock', 'cow', 'dog', 'dolphin', 'donut', 'drums', 'duck', 'elephant', 'fence', 91 | 'fork', 'horse', 'house', 'rabbit', 'scissors', 'sheep', 'strawberry', 'table', 'telephone', 92 | 'truck'] 93 | sample_rate = 0.2 # 0.2, 0.4 20%*all_num 94 | 95 | for name in domain_list: 96 | print('\nprocessing...', name) 97 | data_path = root + dset + '/' + name 98 | txt_path = out_path_root + dset + '/' + name + '_list.txt' 99 | 100 | if '.DS_Store' in folderlist: 101 | folderlist.remove('.DS_Store') 102 | 103 | i = 0 104 | total_num = 0 105 | for folder in folderlist: 106 | use_path_a = data_path + '/' + folder + '/' 107 | num = generate(os.path.join(data_path, folder), use_path_a, txt_path, i, sample_rate) 108 | total_num = total_num + num 109 | i += 1 110 | print(name, total_num) 111 | 112 | print('=' * 50) 113 | check_class_ins_num(domain_list, folderlist) 114 | -------------------------------------------------------------------------------- /NT_SSDA/demo_img_bl_svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from sklearn.metrics import accuracy_score 6 | from utils.utils_bl import baseline_KNN, baseline_SVM 7 | 8 | dset = 'DomainNet' 9 | noise_rate = 0 10 | dset_n = dset + '_' + str(noise_rate) 11 | tar_lbl_rate = 5 # [5, 10, ..., 50]/100 12 | 13 | if dset == 'DomainNet': 14 | domain_list = ['clipart', 'infograph', 'painting'] 15 | num_domain = len(domain_list) 16 | 17 | acc_all = np.zeros(num_domain * (num_domain - 1)) 18 | for s in range(num_domain): 19 | for t in range(num_domain): 20 | if t == s: 21 | continue 22 | itr_idx = (num_domain - 1) * s + t 23 | if t > s: itr_idx -= 1 24 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 25 | print(info_str) 26 | name_src = domain_list[s][0].upper() 27 | name_tar = domain_list[t][0].upper() 28 | task_str = name_src + name_tar 29 | 30 | # load pre-trained features: 31 | root_path = 'outputs/feas/' 32 | data_path = dset_n + '/' + 'tr' + str(tar_lbl_rate) + '/' + task_str + '_0.npz' 33 | data_dir = root_path + data_path 34 | data = np.load(data_dir) 35 | 36 | X_source, Y_source = data['X_source'], data['y_source'] 37 | X_target_tr, Y_target_tr = data['X_target_tr'], data['y_target_tr'] 38 | X_target_te, Y_target_te = data['X_target_te'], data['y_target_te'] 39 | Xs = np.concatenate((X_source, X_target_tr), 0) 40 | Ys = np.concatenate((Y_source, Y_target_tr), 0) 41 | print(Xs.shape, Ys.shape, X_target_te.shape, Y_target_te.shape) 42 | 43 | # test SVM: 44 | result_SVM = baseline_SVM(Xs, Ys, X_target_te, Y_target_te) 45 | acc_all[itr_idx] = accuracy_score(Y_target_te, result_SVM) * 100 46 | print('SVM: {:.2f}'.format(acc_all[itr_idx])) 47 | 48 | print('\ndone') 49 | print('All acc: ', np.round(acc_all, 2)) 50 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 51 | 52 | -------------------------------------------------------------------------------- /NT_SSDA/demo_seed_ENT.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, utils 11 | from utils.LogRecord import LogRecord 12 | from utils.dataloader import read_seed_src_tar 13 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg_ssda 14 | from utils.loss import entropy 15 | 16 | 17 | def train_target(args): 18 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 19 | dset_loaders = data_load_noimg_ssda(X_src, y_src, X_tar, y_tar, args) 20 | 21 | netF, netC = network.backbone_net(args, args.bottleneck) 22 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 23 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 24 | base_network = nn.Sequential(netF, netC) 25 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 26 | 27 | max_iter = args.max_epoch * len(dset_loaders["source"]) 28 | interval_iter = max_iter // 10 29 | args.max_iter = max_iter 30 | iter_num = 0 31 | 32 | netF.train() 33 | netC.train() 34 | 35 | while iter_num < max_iter: 36 | try: 37 | inputs_source, labels_source = iter_source.next() 38 | except: 39 | iter_source = iter(dset_loaders["source"]) 40 | inputs_source, labels_source = iter_source.next() 41 | 42 | try: 43 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 44 | except: 45 | iter_target_tr = iter(dset_loaders["target_tr"]) 46 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 47 | 48 | try: 49 | inputs_target, _ = iter_target.next() 50 | except: 51 | iter_target = iter(dset_loaders["target_te"]) 52 | inputs_target, _ = iter_target.next() 53 | 54 | if inputs_source.size(0) == 1: 55 | continue 56 | 57 | iter_num += 1 58 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 59 | 60 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 61 | inputs_target_tr, labels_target_tr = inputs_target_tr.cuda(), labels_target_tr.cuda() 62 | _, outputs_source = netC(netF(inputs_source)) 63 | _, outputs_target_tr = netC(netF(inputs_target_tr)) 64 | outputs = tr.cat((outputs_source, outputs_target_tr), dim=0) 65 | labels = tr.cat((labels_source, labels_target_tr), dim=0) 66 | 67 | args.lamda = 0.1 68 | loss_classifier = nn.CrossEntropyLoss()(outputs, labels) 69 | inputs_target = inputs_target.cuda() 70 | feas_target = netF(inputs_target) 71 | _, outputs_target = netC(feas_target) 72 | loss_entropy = entropy(outputs_target, args.lamda) 73 | total_loss = loss_classifier + loss_entropy 74 | 75 | optimizer.zero_grad() 76 | total_loss.backward() 77 | optimizer.step() 78 | 79 | if iter_num % interval_iter == 0 or iter_num == max_iter: 80 | netF.eval() 81 | netC.eval() 82 | 83 | acc_t_te, _ = utils.cal_acc_noimg(dset_loaders["Target"], netF, netC) 84 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 85 | args.log.record(log_str) 86 | print(log_str) 87 | 88 | netF.train() 89 | netC.train() 90 | 91 | return acc_t_te 92 | 93 | 94 | if __name__ == '__main__': 95 | 96 | data_name = 'SEED' 97 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 98 | focus_domain_idx = [0, 1, 2] 99 | domain_list = ['S' + str(i) for i in focus_domain_idx] 100 | num_domain = len(domain_list) 101 | 102 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 103 | epsilon=1e-05, layer='wn', smooth=0, 104 | N=num_domain, chn=chn, class_num=class_num) 105 | 106 | args.dset = data_name 107 | args.method = 'ENT' 108 | args.backbone = 'ShallowNet' 109 | args.batch_size = 32 # 32 110 | args.max_epoch = 10 # 50 bad performance 111 | args.input_dim = 310 112 | args.norm = 'zscore' 113 | args.bz_tar_tr = args.batch_size 114 | args.bz_tar_te = args.batch_size * 2 115 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 116 | args.noise_rate = 0 117 | dset_n = args.dset + '_' + str(args.noise_rate) 118 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 119 | 120 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 121 | args.data_env = 'gpu' # 'local' 122 | args.seed = 2022 123 | fix_random_seed(args.seed) 124 | tr.backends.cudnn.deterministic = True 125 | 126 | print(dset_n, args.method) 127 | print(args) 128 | 129 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_SSDA/' 130 | args.result_dir = 'results/target/' 131 | my_log = LogRecord(args) 132 | my_log.log_init() 133 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 134 | 135 | acc_all = np.zeros(num_domain * (num_domain - 1)) 136 | for s in range(num_domain): 137 | for t in range(num_domain): 138 | if s != t: 139 | itr_idx = (num_domain - 1) * s + t 140 | if t > s: itr_idx -= 1 141 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 142 | print(info_str) 143 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 144 | args.task_str = domain_list[s] + '_' + domain_list[t] 145 | print(args) 146 | 147 | my_log.record(info_str) 148 | args.log = my_log 149 | acc_all[itr_idx] = train_target(args) 150 | print('\nSub acc: ', np.round(acc_all, 3)) 151 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 152 | 153 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 154 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 155 | args.log.record("\n==========================================") 156 | args.log.record(acc_sub_str) 157 | args.log.record(acc_mean_str) 158 | 159 | -------------------------------------------------------------------------------- /NT_SSDA/demo_seed_T+DNN.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import numpy as np 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, utils 11 | from utils.LogRecord import LogRecord 12 | from utils.dataloader import read_seed_src_tar 13 | from utils.utils import fix_random_seed, lr_scheduler_full, data_load_noimg_ssda 14 | 15 | 16 | def train_source_test_target(args): 17 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 18 | dset_loaders = data_load_noimg_ssda(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 25 | 26 | max_iter = args.max_epoch * len(dset_loaders["target_tr"]) 27 | interval_iter = max_iter // 10 28 | args.max_iter = max_iter 29 | iter_num = 0 30 | 31 | netF.train() 32 | netC.train() 33 | 34 | while iter_num < max_iter: 35 | try: 36 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 37 | except: 38 | iter_target_tr = iter(dset_loaders["target_tr"]) 39 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 40 | 41 | if inputs_target_tr.size(0) == 1: 42 | continue 43 | 44 | iter_num += 1 45 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 46 | 47 | inputs_data, inputs_label = inputs_target_tr.cuda(), labels_target_tr.cuda() 48 | 49 | feas, output = netC(netF(inputs_data)) 50 | classifier_loss = nn.CrossEntropyLoss()(output, inputs_label) 51 | 52 | optimizer.zero_grad() 53 | classifier_loss.backward() 54 | optimizer.step() 55 | 56 | if iter_num % (interval_iter * 2) == 0 or iter_num == max_iter: 57 | netF.eval() 58 | netC.eval() 59 | 60 | acc_t_te, _ = utils.cal_acc_noimg(dset_loaders["Target"], netF, netC) 61 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format( 62 | args.task_str, iter_num, max_iter, acc_t_te) 63 | print(log_str) 64 | netF.train() 65 | netC.train() 66 | 67 | return acc_t_te 68 | 69 | 70 | if __name__ == "__main__": 71 | data_name = 'SEED' 72 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 73 | focus_domain_idx = [0, 1, 2] 74 | domain_list = ['S' + str(i) for i in focus_domain_idx] 75 | num_domain = len(domain_list) 76 | 77 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 78 | epsilon=1e-05, layer='wn', smooth=0, is_save=False, 79 | N=num_domain, chn=chn, trial=trial_num, class_num=class_num) 80 | 81 | args.dset = data_name 82 | args.method = 'T+DNN' 83 | args.backbone = 'ShallowNet' 84 | args.batch_size = 32 # 32 85 | args.max_epoch = 50 86 | args.input_dim = 310 87 | args.norm = 'zscore' 88 | args.bz_tar_tr = int(args.batch_size / 2) 89 | args.bz_tar_te = args.batch_size 90 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 91 | args.noise_rate = 0 92 | dset_n = args.dset + '_' + str(args.noise_rate) 93 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 94 | 95 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 96 | args.data_env = 'gpu' # 'local' 97 | args.seed = 2022 98 | fix_random_seed(args.seed) 99 | tr.backends.cudnn.deterministic = True 100 | print(dset_n, args.method) 101 | print(args) 102 | 103 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_SSDA/' 104 | args.result_dir = 'results/target/' 105 | my_log = LogRecord(args) 106 | my_log.log_init() 107 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 108 | 109 | acc_all = np.zeros(num_domain * (num_domain - 1)) 110 | for s in range(num_domain): 111 | for t in range(num_domain): 112 | if s != t: 113 | itr_idx = (num_domain - 1) * s + t 114 | if t > s: itr_idx -= 1 115 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 116 | print(info_str) 117 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 118 | args.task_str = domain_list[s] + domain_list[t] 119 | print(args) 120 | 121 | my_log.record(info_str) 122 | args.log = my_log 123 | 124 | acc_all[itr_idx] = train_source_test_target(args) 125 | print('\nSub acc: ', np.round(acc_all, 2)) 126 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 127 | 128 | acc_sub_str = str(np.round(acc_all, 2).tolist()) 129 | acc_mean_str = str(np.round(np.mean(acc_all), 2).tolist()) 130 | args.log.record("\n==========================================") 131 | args.log.record(acc_sub_str) 132 | args.log.record(acc_mean_str) 133 | 134 | -------------------------------------------------------------------------------- /NT_SSDA/demo_seed_bl_svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import os 5 | import numpy as np 6 | import torch as tr 7 | import argparse 8 | from sklearn.metrics import accuracy_score 9 | from utils.utils import add_label_noise_noimg, get_idx_ssda_seed 10 | from utils.dataloader import data_normalize 11 | from utils.utils_bl import baseline_SVM 12 | 13 | 14 | def dataload(args): 15 | # (15, 3394, 310) (15, 3394) 16 | if args.data_env == 'local': 17 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 18 | if args.data_env == 'gpu': 19 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 20 | 21 | MI = np.load(file) 22 | Data_raw, Label = MI['data'], MI['label'] 23 | 24 | src_data = np.squeeze(Data_raw[args.src, :, :]) 25 | src_data = data_normalize(src_data, args.norm) 26 | src_label = np.squeeze(Label[args.src, :]) 27 | 28 | # target sub 29 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 30 | tar_data = data_normalize(tar_data, args.norm) 31 | tar_label = np.squeeze(Label[args.tar, :]) 32 | print(tar_data.shape, tar_label.shape) 33 | 34 | return src_data, src_label, tar_data, tar_label 35 | 36 | 37 | data_name = 'SEED' 38 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 39 | focus_domain_idx = [0, 1, 2] 40 | domain_list = ['S' + str(i) for i in focus_domain_idx] 41 | num_domain = len(domain_list) 42 | 43 | args = argparse.Namespace(dset=data_name, norm='zscore', seed=2022, class_num=3) 44 | args.data_env = 'gpu' # 'local' 45 | args.noise_rate = 0 46 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 47 | 48 | num_domain = len(domain_list) 49 | acc_all = np.zeros(len(domain_list) * (len(domain_list) - 1)) 50 | for s in range(num_domain): # source 51 | for t in range(num_domain): # target 52 | if s != t: 53 | itr_idx = (num_domain - 1) * s + t 54 | if t > s: itr_idx -= 1 55 | print('\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t])) 56 | args.src, args.tar = s, t 57 | Xs, Ys, Xt, Yt = dataload(args) 58 | 59 | idx_tar_tr, idx_tar_te = get_idx_ssda_seed(Yt, args.tar_lbl_rate) 60 | Xt_tr, Yt_tr = Xt[idx_tar_tr, :], Yt[idx_tar_tr] 61 | Xt_te, Yt_te = Xt[idx_tar_te, :], Yt[idx_tar_te] 62 | Xs = np.concatenate((Xs, Xt_tr), 0) 63 | Ys = np.concatenate((Ys, Yt_tr), 0) 64 | Xt, Yt = Xt_te.copy(), Yt_te.copy() 65 | # print(Xs.shape, Ys.shape, Xt_te.shape, Yt_te.shape) 66 | 67 | # add noise on source label 68 | Ys = add_label_noise_noimg(Ys, args.seed, args.class_num, args.noise_rate) 69 | 70 | # test SVM: 71 | pred_tar = baseline_SVM(Xs, Ys, Xt, Yt) 72 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 73 | print('acc: %.2f' % np.round(acc_all[itr_idx], 2)) 74 | 75 | print('\nmean acc', np.round(np.mean(acc_all), 2)) 76 | print(domain_list) 77 | print(np.round(acc_all, 2).tolist()) 78 | 79 | -------------------------------------------------------------------------------- /NT_SSDA/demo_seed_finetune.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import os.path as osp 7 | import numpy as np 8 | import torch as tr 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | import torch.utils.data as Data 12 | from utils.LogRecord import LogRecord 13 | from utils import network, utils 14 | from utils.utils import fix_random_seed, op_copy, lr_scheduler, get_idx_ssda_seed 15 | from utils.dataloader import read_seed_single 16 | 17 | 18 | def data_load(X, y, args): 19 | dset_loaders = {} 20 | train_bs = args.batch_size 21 | 22 | idx_train, idx_test = get_idx_ssda_seed(y, args.tar_lbl_rate) 23 | 24 | data_tar_tr = Data.TensorDataset(X[idx_train, :], y[idx_train]) 25 | data_tar_te = Data.TensorDataset(X[idx_test, :], y[idx_test]) 26 | 27 | dset_loaders["target_tr"] = Data.DataLoader(data_tar_tr, batch_size=train_bs, shuffle=True) 28 | dset_loaders["Target"] = Data.DataLoader(data_tar_te, batch_size=train_bs * 3, shuffle=False) 29 | return dset_loaders 30 | 31 | 32 | def train_source_test_target(args): 33 | X_tar, y_tar = read_seed_single(args, args.tar) 34 | dset_loaders = data_load(X_tar, y_tar, args) 35 | 36 | netF, netC = network.backbone_net(args, args.bottleneck) 37 | 38 | modelpath = args.output_dir_src + '/source_F.pt' 39 | netF.load_state_dict(tr.load(modelpath)) 40 | modelpath = args.output_dir_src + '/source_C.pt' 41 | netC.load_state_dict(tr.load(modelpath)) 42 | netF.eval() 43 | 44 | for k, v in netF.named_parameters(): 45 | v.requires_grad = False 46 | 47 | param_group = [] 48 | for k, v in netC.named_parameters(): 49 | if args.lr_decay1 > 0: 50 | param_group += [{'params': v, 'lr': args.lr * 0.1}] 51 | else: 52 | v.requires_grad = False 53 | 54 | optimizer = optim.SGD(param_group) 55 | optimizer = op_copy(optimizer) 56 | 57 | max_iter = args.max_epoch * len(dset_loaders["target_tr"]) 58 | interval_iter = max_iter // 10 59 | args.max_iter = max_iter 60 | iter_num = 0 61 | netC.train() 62 | 63 | while iter_num < max_iter: 64 | try: 65 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 66 | except: 67 | iter_target_tr = iter(dset_loaders["target_tr"]) 68 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 69 | 70 | if inputs_target_tr.size(0) == 1: 71 | continue 72 | 73 | iter_num += 1 74 | lr_scheduler(optimizer, iter_num=iter_num, max_iter=max_iter) 75 | 76 | inputs_data, inputs_label = inputs_target_tr.cuda(), labels_target_tr.cuda() 77 | _, output = netC(netF(inputs_data)) 78 | classifier_loss = nn.CrossEntropyLoss()(output, inputs_label) 79 | 80 | optimizer.zero_grad() 81 | classifier_loss.backward() 82 | optimizer.step() 83 | 84 | if iter_num % (interval_iter) == 0 or iter_num == max_iter: 85 | netC.eval() 86 | 87 | acc_t_te,_ = utils.cal_acc_noimg(dset_loaders["Target"], netF, netC) 88 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 89 | print(log_str) 90 | 91 | netC.train() 92 | 93 | return acc_t_te 94 | 95 | 96 | if __name__ == "__main__": 97 | data_name = 'SEED' 98 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 99 | focus_domain_idx = [0, 1, 2] 100 | domain_list = ['S' + str(i) for i in focus_domain_idx] 101 | num_domain = len(domain_list) 102 | 103 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, 104 | epsilon=1e-05, layer='wn', smooth=0, is_save=False, 105 | N=num_domain, chn=chn, trial=trial_num, class_num=class_num) 106 | 107 | args.dset = data_name 108 | args.method = 'Finetune' 109 | args.backbone = 'ShallowNet' 110 | args.batch_size = 32 # 32 111 | args.max_epoch = 50 # 50 112 | args.input_dim = 310 113 | args.norm = 'zscore' 114 | args.bz_tar_tr = int(args.batch_size / 2) 115 | args.bz_tar_te = args.batch_size 116 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 117 | args.noise_rate = 0 118 | dset_n = args.dset + '_' + str(args.noise_rate) 119 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 120 | 121 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 122 | args.data_env = 'gpu' # 'local' 123 | args.seed = 2022 124 | fix_random_seed(args.seed) 125 | tr.backends.cudnn.deterministic = True 126 | print(dset_n, args.method) 127 | print(args) 128 | 129 | mdl_path = 'outputs/models/' 130 | args.output = mdl_path + dset_n + '/source/' 131 | 132 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_SSDA/' 133 | args.result_dir = 'results/target/' 134 | my_log = LogRecord(args) 135 | my_log.log_init() 136 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 137 | 138 | acc_all = np.zeros(num_domain * (num_domain - 1)) 139 | for s in range(num_domain): 140 | for t in range(num_domain): 141 | if s != t: 142 | itr_idx = (num_domain - 1) * s + t 143 | if t > s: itr_idx -= 1 144 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 145 | print(info_str) 146 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 147 | args.task_str = domain_list[s] + '_' + domain_list[t] 148 | 149 | args.name_src = domain_list[s] 150 | args.output_dir_src = osp.join(args.output, args.name_src) 151 | print(args) 152 | 153 | my_log.record(info_str) 154 | args.log = my_log 155 | 156 | acc_all[itr_idx] = train_source_test_target(args) 157 | print('\nSub acc: ', np.round(acc_all, 2)) 158 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 159 | 160 | acc_sub_str = str(np.round(acc_all, 2).tolist()) 161 | acc_mean_str = str(np.round(np.mean(acc_all), 2).tolist()) 162 | args.log.record("\n==========================================") 163 | args.log.record(acc_sub_str) 164 | args.log.record(acc_mean_str) 165 | 166 | -------------------------------------------------------------------------------- /NT_SSDA/demo_seed_mcc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, utils 11 | from utils.LogRecord import LogRecord 12 | from utils.dataloader import read_seed_src_tar 13 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg_ssda 14 | from utils.loss import ClassConfusionLoss, CELabelSmooth 15 | 16 | 17 | def train_target(args): 18 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 19 | dset_loaders = data_load_noimg_ssda(X_src, y_src, X_tar, y_tar, args) 20 | 21 | netF, netC = network.backbone_net(args, args.bottleneck) 22 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 23 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 24 | base_network = nn.Sequential(netF, netC) 25 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 26 | 27 | max_len = max(len(dset_loaders["source_tr"]), len(dset_loaders["target_te"])) 28 | args.max_iter = args.max_epoch * max_len 29 | 30 | max_iter = args.max_epoch * len(dset_loaders["source"]) 31 | interval_iter = max_iter // 10 32 | args.max_iter = max_iter 33 | iter_num = 0 34 | base_network.train() 35 | 36 | while iter_num < max_iter: 37 | try: 38 | inputs_source, labels_source = iter_source.next() 39 | except: 40 | iter_source = iter(dset_loaders["source"]) 41 | inputs_source, labels_source = iter_source.next() 42 | 43 | try: 44 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 45 | except: 46 | iter_target_tr = iter(dset_loaders["target_tr"]) 47 | inputs_target_tr, labels_target_tr = iter_target_tr.next() 48 | 49 | try: 50 | inputs_target, _ = iter_target.next() 51 | except: 52 | iter_target = iter(dset_loaders["target_te"]) 53 | inputs_target, _ = iter_target.next() 54 | 55 | if inputs_source.size(0) == 1: 56 | continue 57 | 58 | iter_num += 1 59 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 60 | 61 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 62 | inputs_target = inputs_target.cuda() 63 | 64 | inputs_target_tr, labels_target_tr = inputs_target_tr.cuda(), labels_target_tr.cuda() 65 | _, outputs_source = netC(netF(inputs_source)) 66 | _, outputs_target_tr = netC(netF(inputs_target_tr)) 67 | _, outputs_target = netC(netF(inputs_target)) 68 | outputs_comb = tr.cat((outputs_source, outputs_target_tr), dim=0) 69 | labels_comb = tr.cat((labels_source, labels_target_tr), dim=0) 70 | 71 | # # loss definition 72 | # p = float(iter_num) / max_iter 73 | # alpha = 2. / (1. + np.exp(-10 * p)) - 1 74 | args.loss_trade_off = 1.0 75 | args.t_mcc = 2 76 | transfer_loss = ClassConfusionLoss(t=args.t_mcc)(outputs_target) 77 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_comb, labels_comb) 78 | total_loss = args.loss_trade_off * transfer_loss + classifier_loss 79 | 80 | optimizer.zero_grad() 81 | total_loss.backward() 82 | optimizer.step() 83 | 84 | if iter_num % interval_iter == 0 or iter_num == max_iter: 85 | base_network.eval() 86 | 87 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 88 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 89 | args.log.record(log_str) 90 | print(log_str) 91 | 92 | base_network.train() 93 | 94 | return acc_t_te 95 | 96 | 97 | if __name__ == '__main__': 98 | 99 | data_name = 'SEED' 100 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 101 | focus_domain_idx = [0, 1, 2] 102 | domain_list = ['S' + str(i) for i in focus_domain_idx] 103 | num_domain = len(domain_list) 104 | 105 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 106 | epsilon=1e-05, layer='wn', smooth=0, 107 | N=num_domain, chn=chn, class_num=class_num) 108 | 109 | args.dset = data_name 110 | args.method = 'MCC' 111 | args.backbone = 'ShallowNet' 112 | args.batch_size = 32 # 32 113 | args.max_epoch = 50 # 50 114 | args.input_dim = 310 115 | args.norm = 'zscore' 116 | args.bz_tar_tr = args.batch_size 117 | args.bz_tar_te = args.batch_size * 2 118 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 119 | args.noise_rate = 0 120 | dset_n = args.dset + '_' + str(args.noise_rate) 121 | args.tar_lbl_rate = 5 # [5, 10, ..., 50]/100 122 | 123 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 124 | args.data_env = 'gpu' # 'local' 125 | args.seed = 2022 126 | fix_random_seed(args.seed) 127 | tr.backends.cudnn.deterministic = True 128 | 129 | print(dset_n, args.method) 130 | print(args) 131 | 132 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_SSDA/' 133 | args.result_dir = 'results/target/' 134 | my_log = LogRecord(args) 135 | my_log.log_init() 136 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 137 | 138 | acc_all = np.zeros(num_domain * (num_domain - 1)) 139 | for s in range(num_domain): 140 | for t in range(num_domain): 141 | if s != t: 142 | itr_idx = (num_domain - 1) * s + t 143 | if t > s: itr_idx -= 1 144 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 145 | print(info_str) 146 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 147 | args.task_str = domain_list[s] + '_' + domain_list[t] 148 | print(args) 149 | 150 | my_log.record(info_str) 151 | args.log = my_log 152 | acc_all[itr_idx] = train_target(args) 153 | print('\nSub acc: ', np.round(acc_all, 3)) 154 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 155 | 156 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 157 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 158 | args.log.record("\n==========================================") 159 | args.log.record(acc_sub_str) 160 | args.log.record(acc_mean_str) 161 | -------------------------------------------------------------------------------- /NT_SSDA/save_init_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import random 7 | import os.path as osp 8 | import torch as tr 9 | import numpy as np 10 | import utils.network as network 11 | 12 | 13 | def create_folder(output_dir): 14 | 15 | if not osp.exists(output_dir): 16 | os.system('mkdir -p ' + output_dir) 17 | if not osp.exists(output_dir): 18 | os.mkdir(output_dir) 19 | 20 | 21 | if __name__ == '__main__': 22 | seed = 2022 23 | tr.manual_seed(seed) 24 | tr.cuda.manual_seed(seed) 25 | np.random.seed(seed) 26 | random.seed(seed) 27 | tr.cuda.manual_seed_all(seed) 28 | tr.backends.cudnn.deterministic = True 29 | mdl_init_dir = 'outputs/mdl_init/' 30 | dset_list = ['DomainNet', 'SEED', 'moon'] 31 | 32 | ################################################################################### 33 | # Img data 34 | args = argparse.Namespace(bottleneck=1024, net='resnet50', layer='wn', classifier='bn') 35 | args.class_num = 40 36 | output_dir = osp.join(mdl_init_dir, dset_list[0]) 37 | create_folder(output_dir) 38 | 39 | if args.net[0:3] == 'res': 40 | netF = network.ResBase(res_name=args.net).cuda() 41 | elif args.net[0:3] == 'vgg': 42 | netF = network.VGGBase(vgg_name=args.net).cuda() 43 | netB = network.feat_bottleneck(type=args.classifier, feature_dim=netF.in_features, 44 | bottleneck_dim=args.bottleneck).cuda() 45 | netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() 46 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 47 | netD_full = network.AdversarialNetwork(args.bottleneck, 2048).cuda() 48 | 49 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 50 | tr.save(netB.state_dict(), osp.join(output_dir, "netB.pt")) 51 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 52 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 53 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 54 | # netF.load_state_dict(tr.load(osp.join(output_dir, "netF.pt"))) 55 | print('\nfinished init of DomainNet data...') 56 | 57 | ################################################################################### 58 | # SEED data 59 | args = argparse.Namespace(bottleneck=64, backbone='ShallowNet', layer='wn') 60 | args.input_dim = 310 61 | args.class_num = 3 62 | output_dir = osp.join(mdl_init_dir, dset_list[1]) 63 | create_folder(output_dir) 64 | 65 | netF, netC = network.backbone_net(args, args.bottleneck) 66 | netD_full = network.AdversarialNetwork(args.bottleneck, 20).cuda() 67 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 68 | 69 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 70 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 71 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 72 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 73 | print('\nfinished init of seed data...') 74 | 75 | ################################################################################### 76 | # Synth data 77 | args = argparse.Namespace(bottleneck=64, backbone='ShallowNet', layer='wn') 78 | args.input_dim = 2 79 | args.class_num = 2 80 | output_dir = osp.join(mdl_init_dir, dset_list[2]) 81 | create_folder(output_dir) 82 | 83 | netF, netC = network.backbone_net(args, args.bottleneck) 84 | netD_full = network.AdversarialNetwork(args.bottleneck, 20).cuda() 85 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 86 | 87 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 88 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 89 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 90 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 91 | print('\nfinished init of moon data...') 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /NT_SSDA/source_train_seed_finetune.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import torch as tr 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | import torch.utils.data as Data 10 | import os.path as osp 11 | import os 12 | from utils import network, utils 13 | from utils.loss import CELabelSmooth 14 | from utils.LogRecord import LogRecord 15 | from utils.dataloader import read_seed_single, obtain_train_val_source 16 | from utils.utils import create_folder, lr_scheduler_full, fix_random_seed, add_label_noise_noimg 17 | 18 | 19 | def data_load(X, y, args): 20 | dset_loaders = {} 21 | train_bs = args.batch_size 22 | tr.manual_seed(args.seed) 23 | trial_ins_num = args.trial 24 | 25 | if args.noise_rate > 0: 26 | y = add_label_noise_noimg(y, args.seed, args.class_num, args.noise_rate) 27 | 28 | id_train, id_val = obtain_train_val_source(y, trial_ins_num, args.validation) 29 | source_tr = Data.TensorDataset(X[id_train, :], y[id_train]) 30 | dset_loaders['source_tr'] = Data.DataLoader(source_tr, batch_size=train_bs, shuffle=True, drop_last=True) 31 | 32 | source_te = Data.TensorDataset(X[id_val, :], y[id_val]) 33 | dset_loaders['source_te'] = Data.DataLoader(source_te, batch_size=train_bs * 3, shuffle=False, drop_last=False) 34 | 35 | return dset_loaders 36 | 37 | 38 | def train_source(args): # within validation 39 | X_src, y_src = read_seed_single(args, args.src) 40 | dset_loaders = data_load(X_src, y_src, args) 41 | 42 | netF, netC = network.backbone_net(args, args.bottleneck) 43 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 44 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 45 | base_network = nn.Sequential(netF, netC) 46 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 47 | 48 | acc_init = 0 49 | max_iter = args.max_epoch * len(dset_loaders["source_tr"]) # source_tr:80个 50 | interval_iter = max_iter // 10 51 | args.max_iter = max_iter 52 | iter_num = 0 53 | 54 | netF.train() 55 | netC.train() 56 | 57 | while iter_num < max_iter: 58 | try: 59 | inputs_source, labels_source = iter_source.next() 60 | except: 61 | iter_source = iter(dset_loaders['source_tr']) 62 | inputs_source, labels_source = iter_source.next() 63 | 64 | if inputs_source.size(0) == 1: 65 | continue 66 | 67 | iter_num += 1 68 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 69 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 70 | 71 | _, outputs_source = netC(netF(inputs_source)) 72 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, 73 | labels_source) 74 | 75 | optimizer.zero_grad() 76 | classifier_loss.backward() 77 | optimizer.step() 78 | 79 | if iter_num % interval_iter == 0 or iter_num == max_iter: 80 | netF.eval() 81 | netC.eval() 82 | 83 | acc_s_te, _ = utils.cal_acc_noimg(dset_loaders['source_te'], netF, netC) 84 | log_str = 'Task: {}, Iter:{}/{}; Val_acc = {:.2f}%'.format(args.name_src, iter_num, max_iter, acc_s_te) 85 | args.log.record(log_str) 86 | print(log_str) 87 | 88 | if acc_s_te >= acc_init: 89 | acc_init = acc_s_te 90 | best_netF = netF.state_dict() 91 | best_netC = netC.state_dict() 92 | 93 | netF.train() 94 | netC.train() 95 | 96 | tr.save(best_netF, osp.join(args.output_dir_src, "source_F.pt")) 97 | tr.save(best_netC, osp.join(args.output_dir_src, "source_C.pt")) 98 | 99 | return acc_s_te 100 | 101 | 102 | if __name__ == '__main__': 103 | 104 | data_name = 'SEED' 105 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 106 | focus_domain_idx = [0, 1, 2] 107 | domain_list = ['S' + str(i) for i in focus_domain_idx] 108 | num_domain = len(domain_list) 109 | 110 | args = argparse.Namespace(bottleneck=64, lr=0.01, epsilon=1e-05, layer='wn', 111 | smooth=0, chn=chn, trial=trial_num, 112 | N=num_domain, class_num=class_num) 113 | args.dset = data_name 114 | args.method = 'single' 115 | args.backbone = 'ShallowNet' 116 | args.batch_size = 32 # 32 117 | args.max_epoch = 50 118 | args.input_dim = 310 119 | args.norm = 'zscore' 120 | args.validation = 'random' 121 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 122 | args.noise_rate = 0 123 | dset_n = args.dset + '_' + str(args.noise_rate) 124 | 125 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 126 | args.data_env = 'gpu' # 'local' 127 | args.seed = 2022 128 | fix_random_seed(args.seed) 129 | tr.backends.cudnn.deterministic = True 130 | 131 | mdl_path = 'outputs/models/' 132 | args.output = mdl_path + dset_n + '/source/' 133 | print(dset_n, args.method) 134 | print(args) 135 | 136 | args.local_dir = r'/mnt/ssd2/wenz/code/NT-Benchmark/NT_SSDA/' 137 | args.result_dir = 'results/source/' 138 | my_log = LogRecord(args) 139 | my_log.log_init() 140 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 141 | 142 | acc_all = [] 143 | for s in range(num_domain): 144 | args.src = focus_domain_idx[s] 145 | info_str = '\n========================== Within domain ' + domain_list[s] + ' ==========================' 146 | print(info_str) 147 | my_log.record(info_str) 148 | args.log = my_log 149 | 150 | args.name_src = domain_list[s] 151 | args.output_dir_src = osp.join(args.output, args.name_src) 152 | create_folder(args.output_dir_src, args.data_env, args.local_dir) 153 | print(args) 154 | 155 | acc_sub = train_source(args) 156 | acc_all.append(acc_sub) 157 | print(np.round(acc_all, 2)) 158 | print(np.round(np.mean(acc_all), 2)) 159 | 160 | acc_sub_str = str(np.round(acc_all, 2).tolist()) 161 | acc_mean_str = str(np.round(np.mean(acc_all), 2).tolist()) 162 | args.log.record("\n==========================================") 163 | args.log.record(acc_sub_str) 164 | args.log.record(acc_mean_str) 165 | -------------------------------------------------------------------------------- /NT_SSDA/utils/LogRecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import os.path as osp 5 | from datetime import datetime 6 | from datetime import timedelta, timezone 7 | from utils.utils import create_folder 8 | 9 | 10 | class LogRecord: 11 | def __init__(self, args): 12 | self.args = args 13 | self.result_dir = args.result_dir 14 | self.data_env = 'gpu' 15 | self.data_name = args.dset 16 | self.method = args.method 17 | 18 | def log_init(self): 19 | create_folder(self.result_dir, self.args.data_env, self.args.local_dir) 20 | 21 | if self.data_env == 'local': 22 | time_str = datetime.utcnow().replace(tzinfo=timezone.utc).astimezone( 23 | timezone(timedelta(hours=8), name='Asia/Shanghai')).strftime("%Y-%m-%d_%H_%M_%S") 24 | if self.data_env == 'gpu': 25 | time_str = datetime.utcnow().replace(tzinfo=timezone.utc).strftime("%Y-%m-%d_%H_%M_%S") 26 | file_name_head = 'log_' + self.method + '_' + self.data_name + '_' 27 | self.args.out_file = open(osp.join(self.args.result_dir, file_name_head + time_str + '.txt'), 'w') 28 | self.args.out_file.write(self._print_args() + '\n') 29 | self.args.out_file.flush() 30 | return self.args 31 | 32 | def record(self, log_str): 33 | self.args.out_file.write(log_str + '\n') 34 | self.args.out_file.flush() 35 | return self.args 36 | 37 | def _print_args(self): 38 | s = "==========================================\n" 39 | for arg, content in self.args.__dict__.items(): 40 | s += "{}:{}\n".format(arg, content) 41 | return s 42 | -------------------------------------------------------------------------------- /NT_SSDA/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022/1/11 11:43 下午 3 | # @Author : wenzhang 4 | # @File : __init__.py.py 5 | -------------------------------------------------------------------------------- /NT_SSDA/utils/data_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from PIL import Image 6 | from torch.utils.data import Dataset 7 | 8 | 9 | def make_dataset(image_list, labels): 10 | if labels: 11 | len_ = len(image_list) 12 | images = [(image_list[i].strip(), labels[i, :]) for i in range(len_)] 13 | else: 14 | if len(image_list[0].split('==')) > 2: 15 | images = [(val.split('==')[0], np.array([int(la) for la in val.split('==')[1:]])) for val in image_list] 16 | else: 17 | images = [(val.split('==')[0], int(val.split('==')[1])) for val in image_list] 18 | return images 19 | 20 | 21 | def rgb_loader(path): 22 | with open(path, 'rb') as f: 23 | with Image.open(f) as img: 24 | return img.convert('RGB') 25 | 26 | 27 | def l_loader(path): 28 | with open(path, 'rb') as f: 29 | with Image.open(f) as img: 30 | return img.convert('L') 31 | 32 | 33 | class ImageList(Dataset): 34 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB', weight=None): 35 | imgs = make_dataset(image_list, labels) 36 | if len(imgs) == 0: 37 | raise (RuntimeError("Found 0 images in subfolders")) 38 | 39 | self.imgs = imgs 40 | self.transform = transform 41 | self.target_transform = target_transform 42 | if mode == 'RGB': 43 | self.loader = rgb_loader 44 | elif mode == 'L': 45 | self.loader = l_loader 46 | self.weight = weight 47 | 48 | def __getitem__(self, index): 49 | 50 | path, target = self.imgs[index] 51 | img = self.loader(path) 52 | if self.transform is not None: 53 | img = self.transform(img) 54 | if self.target_transform is not None: 55 | target = self.target_transform(target) 56 | if self.weight is None: 57 | return img, target 58 | else: 59 | return img, target, self.weight[index] 60 | 61 | def __len__(self): 62 | return len(self.imgs) 63 | 64 | 65 | class ImageList_idx(Dataset): 66 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB'): 67 | imgs = make_dataset(image_list, labels) 68 | if len(imgs) == 0: 69 | raise (RuntimeError("Found 0 images in subfolders of: " + "\n")) 70 | 71 | self.imgs = imgs 72 | self.transform = transform 73 | self.target_transform = target_transform 74 | if mode == 'RGB': 75 | self.loader = rgb_loader 76 | elif mode == 'L': 77 | self.loader = l_loader 78 | 79 | def __getitem__(self, index): 80 | path, target = self.imgs[index] 81 | img = self.loader(path) 82 | if self.transform is not None: 83 | img = self.transform(img) 84 | if self.target_transform is not None: 85 | target = self.target_transform(target) 86 | 87 | return img, target, index 88 | 89 | def __len__(self): 90 | return len(self.imgs) 91 | 92 | 93 | class ImageList_twice(Dataset): 94 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB'): 95 | imgs = make_dataset(image_list, labels) 96 | if len(imgs) == 0: 97 | raise (RuntimeError("Found 0 images in subfolders")) 98 | 99 | self.imgs = imgs 100 | self.transform = transform 101 | self.target_transform = target_transform 102 | if mode == 'RGB': 103 | self.loader = rgb_loader 104 | elif mode == 'L': 105 | self.loader = l_loader 106 | 107 | def __getitem__(self, index): 108 | path, target = self.imgs[index] 109 | img = self.loader(path) 110 | if self.target_transform is not None: 111 | target = self.target_transform(target) 112 | if self.transform is not None: 113 | if type(self.transform).__name__ == 'list': 114 | img = [t(img) for t in self.transform] 115 | else: 116 | img = self.transform(img) 117 | 118 | return img, target 119 | 120 | def __len__(self): 121 | return len(self.imgs) 122 | -------------------------------------------------------------------------------- /NT_SSDA/utils/dataloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import pandas as pd 5 | import torch as tr 6 | from torch.autograd import Variable 7 | import numpy as np 8 | from sklearn import preprocessing 9 | 10 | 11 | def read_syn_single(args, sub_idx): 12 | root_path = args.root_path 13 | pd_tar = pd.read_csv(root_path + sub_idx + ".csv", header=None) 14 | X, Y = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 15 | X = Variable(tr.from_numpy(X).float()) 16 | Y = tr.from_numpy(Y).long() 17 | 18 | return X, Y 19 | 20 | 21 | def read_syn_src_tar(args): 22 | root_path = args.root_path 23 | pd_src = pd.read_csv(root_path + args.src + ".csv", header=None) 24 | Xs, Ys = pd_src.iloc[:, :2].values, pd_src.iloc[:, 2].values.astype(int) 25 | pd_tar = pd.read_csv(root_path + args.tar + ".csv", header=None) 26 | Xt, Yt = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 27 | Xs = Variable(tr.from_numpy(Xs).float()) 28 | Ys = tr.from_numpy(Ys).long() 29 | Xt = Variable(tr.from_numpy(Xt).float()) 30 | Yt = tr.from_numpy(Yt).long() 31 | 32 | return Xs, Ys, Xt, Yt 33 | 34 | 35 | def data_normalize(fea_de, norm_type): 36 | if norm_type == 'zscore': 37 | zscore = preprocessing.StandardScaler() 38 | fea_de = zscore.fit_transform(fea_de) 39 | return fea_de 40 | 41 | 42 | def read_seed_single(args, sub_idx): 43 | # (15, 3394, 310) (15, 3394) 44 | if args.data_env == 'local': 45 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 46 | if args.data_env == 'gpu': 47 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 48 | 49 | MI = np.load(file) 50 | Data_raw, Label = MI['data'], MI['label'] 51 | 52 | # source sub 53 | fea_de = np.squeeze(Data_raw[sub_idx, :, :]) 54 | fea_de = data_normalize(fea_de, args.norm) 55 | fea_de = Variable(tr.from_numpy(fea_de).float()) 56 | 57 | sub_label = np.squeeze(Label[sub_idx, :]) 58 | sub_label = tr.from_numpy(sub_label).long() 59 | print(fea_de.shape, sub_label.shape) 60 | 61 | return fea_de, sub_label 62 | 63 | 64 | def read_seed_src_tar(args): 65 | # (15, 3394, 310) (15, 3394) 66 | if args.data_env == 'local': 67 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 68 | if args.data_env == 'gpu': 69 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 70 | 71 | MI = np.load(file) 72 | Data_raw, Label = MI['data'], MI['label'] 73 | 74 | src_data = np.squeeze(Data_raw[args.src, :, :]) 75 | src_data = data_normalize(src_data, args.norm) 76 | src_data = Variable(tr.from_numpy(src_data).float()) 77 | src_label = np.squeeze(Label[args.src, :]) 78 | src_label = tr.from_numpy(src_label).long() 79 | 80 | # target sub 81 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 82 | tar_data = data_normalize(tar_data, args.norm) 83 | tar_data = Variable(tr.from_numpy(tar_data).float()) 84 | tar_label = np.squeeze(Label[args.tar, :]) 85 | tar_label = tr.from_numpy(tar_label).long() 86 | print(tar_data.shape, tar_label.shape) 87 | 88 | return src_data, src_label, tar_data, tar_label 89 | 90 | 91 | def obtain_train_val_source(y_array, trial_ins_num, val_type): 92 | y_array = y_array.numpy() 93 | ins_num_all = len(y_array) 94 | src_idx = range(ins_num_all) 95 | 96 | if val_type == 'random': 97 | num_train = int(0.9 * len(src_idx)) 98 | id_train, id_val = tr.utils.data.random_split(src_idx, [num_train, len(src_idx) - num_train]) 99 | 100 | return id_train, id_val 101 | -------------------------------------------------------------------------------- /NT_SSDA/utils/generate_data_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import os 5 | import sys 6 | import random 7 | import numpy as np 8 | import os.path as osp 9 | 10 | sys.path.append("..") 11 | fix_seed = 2022 12 | 13 | 14 | def generate(dir, use_path, txt_path, label, sample_rate=1): 15 | files = os.listdir(dir) 16 | files.sort() 17 | 18 | if sample_rate < 1: 19 | select_num = int(len(files) * sample_rate) 20 | raw_idx = np.arange(len(files)) 21 | random.seed(fix_seed) 22 | random.shuffle(raw_idx) 23 | select_idx = raw_idx[:select_num].tolist() 24 | files = np.array(files.copy())[select_idx].tolist() 25 | files.sort() 26 | 27 | total_num = len(files) 28 | # print(total_num) 29 | 30 | listText = open(txt_path, 'a') 31 | num = 0 32 | for file in files: 33 | num += 1 34 | fileType = os.path.split(file) 35 | if fileType[1] == '.txt': 36 | continue 37 | name = use_path + file + '==' + str(int(label)) + '\n' 38 | if num < total_num + 1: 39 | listText.write(name) 40 | listText.close() 41 | 42 | return total_num 43 | 44 | 45 | def check_class_ins_num(domain_list, folderlist): 46 | min_class_num_list = [] 47 | for name in domain_list: 48 | print('\nreading...', name) 49 | txt_path = out_path_root + dset + '/' + name + '_list.txt' 50 | 51 | class_list = [] 52 | for line in open(txt_path): 53 | class_list.append(line.split('/' + name + '/')[1].split('/')[0]) 54 | 55 | class_list = np.array(class_list) 56 | class_num_list = [np.sum(class_list == cn) for cn in folderlist] 57 | min_class_num_list.append(min(class_num_list)) 58 | print('min class ins_num', min(class_num_list)) 59 | print(min_class_num_list) 60 | 61 | 62 | if __name__ == "__main__": 63 | root = "/mnt/ssd2/wenz/data/" 64 | out_path_root = '../checkpoint/' 65 | 66 | dset = 'VisDA17' 67 | if dset == 'office': 68 | domain_list = ['amazon', 'dslr', 'webcam'] 69 | if dset == 'office-home': 70 | domain_list = ['Art', 'Clipart', 'Product', 'RealWorld'] 71 | if dset == 'office-caltech': 72 | domain_list = ['amazon', 'caltech', 'dslr', 'webcam'] 73 | if dset == 'VisDA17': 74 | domain_list = ['train', 'validation'] 75 | if dset == 'DomainNet': 76 | domain_list = ['clipart', 'infograph', 'painting', 'quickdraw', 'real', 'sketch'] 77 | 78 | save_path = out_path_root + dset 79 | if not osp.exists(save_path): 80 | os.system('mkdir -p ' + save_path) 81 | if not osp.exists(save_path): 82 | os.mkdir(save_path) 83 | 84 | # 40 classes refer: 85 | # SENTRY: Selective entropy optimization via committee consistency 86 | # for unsupervised domain adaptation." ICCV. 2021. 87 | if dset == 'DomainNet': 88 | folderlist = ['airplane', 'ambulance', 'apple', 'backpack', 'banana', 'bathtub', 'bear', 'bed', 'bee', 89 | 'bicycle', 'bird', 'book', 'bridge', 'bus', 'butterfly', 'cake', 'calculator', 'camera', 'car', 90 | 'cat', 'chair', 'clock', 'cow', 'dog', 'dolphin', 'donut', 'drums', 'duck', 'elephant', 'fence', 91 | 'fork', 'horse', 'house', 'rabbit', 'scissors', 'sheep', 'strawberry', 'table', 'telephone', 92 | 'truck'] 93 | sample_rate = 0.2 # 0.2, 0.4 20%*all_num 94 | 95 | for name in domain_list: 96 | print('\nprocessing...', name) 97 | data_path = root + dset + '/' + name 98 | txt_path = out_path_root + dset + '/' + name + '_list.txt' 99 | 100 | if '.DS_Store' in folderlist: 101 | folderlist.remove('.DS_Store') 102 | 103 | i = 0 104 | total_num = 0 105 | for folder in folderlist: 106 | use_path_a = data_path + '/' + folder + '/' 107 | num = generate(os.path.join(data_path, folder), use_path_a, txt_path, i, sample_rate) 108 | total_num = total_num + num 109 | i += 1 110 | print(name, total_num) 111 | 112 | print('=' * 50) 113 | check_class_ins_num(domain_list, folderlist) 114 | -------------------------------------------------------------------------------- /NT_UDA/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022/1/23 9:10 上午 3 | # @Author : wenzhang 4 | # @File : __init__.py 5 | -------------------------------------------------------------------------------- /NT_UDA/data_synth/generate_synth/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chamwen/NT-Benchmark/91f7502119c7d4d46bd1878ec21298a3873bf9c3/NT_UDA/data_synth/generate_synth/.DS_Store -------------------------------------------------------------------------------- /NT_UDA/data_synth/generate_synth/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022/2/20 21:46 3 | # @Author : wenzhang 4 | # @File : __init__.py 5 | 6 | import numpy as np 7 | -------------------------------------------------------------------------------- /NT_UDA/data_synth/generate_synth/data_synth_moons.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from sklearn.datasets import make_moons, make_blobs 7 | from func_transform import * 8 | 9 | root = '/Users/NT-Benchmark/NT_UDA/data_synth/' 10 | random_state = 2020 11 | 12 | # Generate MOONS SYNTHETIC DATA 13 | moons = make_moons(n_samples=600, noise=.1) 14 | Xt, Yt = moons[0], moons[1] 15 | data = np.concatenate([Xt, Yt.reshape(-1, 1)], axis=1) 16 | np.savetxt(root + "moon0.csv", data, delimiter=',') 17 | # show_data(Xt, Yt) 18 | 19 | # Generate DSM01 - TRANSLATION 20 | Xs, Ys = affine_transformation(Xt, Yt, "moon1", translation, 2) 21 | 22 | # Generate DSM02 - SCALE 23 | Xs, Ys = affine_transformation(Xt, Yt, "moon2", scale, 1.5) 24 | 25 | # Generate DSM03 - ROTATION 26 | Xs, Ys = affine_transformation(Xt, Yt, "moon3_15", rotation, np.pi / 12) 27 | Xs, Ys = affine_transformation(Xt, Yt, "moon3_30", rotation, np.pi / 6) 28 | Xs, Ys = affine_transformation(Xt, Yt, "moon3_45", rotation, np.pi / 4) 29 | 30 | # Generate DSM04 - SHEAR 31 | Xs, Ys = affine_transformation(Xt, Yt, "moon4_5", shear, 0.5) 32 | Xs, Ys = affine_transformation(Xt, Yt, "moon4_10", shear, 1) 33 | Xs, Ys = affine_transformation(Xt, Yt, "moon4_15", shear, 1.5) 34 | 35 | # Generate DSM05 - COMBINATION 36 | Xs, Ys = affine_transformation(Xt, Yt, "moon5", translation, 2, save=False) 37 | Xs, Ys = affine_transformation(Xs, Yt, "moon5", rotation, np.pi / 4, save=False) 38 | Xs, Ys = affine_transformation(Xs, Yt, "moon5", scale, 2, save=False) 39 | Xs, Ys = affine_transformation(Xs, Yt, "moon5", shear, 1.0, save=True) 40 | 41 | # Generate DSM06 - SKEWED DISTRIBUTION 42 | cls = Yt == 0 # init 43 | ind1 = np.squeeze(np.nonzero(cls)) 44 | ind2 = np.squeeze(np.nonzero(np.bitwise_not(cls))) 45 | Xa, Ya = Xt[ind1], Yt[ind1] 46 | Xb, Yb = Xt[ind2], Yt[ind2] 47 | Xa = Xa + 1.5 * Xa.std() * np.random.random(Xa.shape) # add noise 48 | Xs = np.concatenate((Xa, Xb), axis=0) 49 | Ys = np.concatenate((Ya, Yb), axis=0) 50 | 51 | idx_rand = np.arange(len(Ys)) 52 | np.random.seed(random_state) 53 | random.seed(random_state) 54 | random.shuffle(idx_rand) 55 | Xs, Ys = Xs[idx_rand.tolist(), :], Ys[idx_rand.tolist()] 56 | Xs, Ys = affine_transformation(Xs, Ys, "moon6", translation, 0) 57 | 58 | # Generate DSM07 - NOISE 59 | Xs = noisy(Xt, "s&p") 60 | Xs, Ys = affine_transformation(Xs, Yt, "moon7", translation, 0) 61 | 62 | # Generate DSM08 - OVERLAPPING 63 | moons = make_moons(n_samples=600, noise=.33) 64 | Xs, Ys = moons[0], moons[1] 65 | Xs, Ys = affine_transformation(Xs, Ys, "moon8", translation, 0) 66 | 67 | # Generate DSM09 - SUB-CLUSTERS 68 | centers = [(-0.5, -0.5)] 69 | samples = make_blobs(n_samples=100, centers=centers, cluster_std=[0.15], random_state=random_state, shuffle=True) 70 | moons = make_moons(n_samples=600, noise=.1) 71 | Xs, Ys = moons[0], moons[1] 72 | Xs = np.concatenate((Xs, samples[0]), axis=0) 73 | Ys = np.concatenate((Ys, samples[1]), axis=0) 74 | Xs, Ys = affine_transformation(Xs, Ys, "moon9", translation, 0) 75 | -------------------------------------------------------------------------------- /NT_UDA/data_synth/generate_synth/func_transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import copy 7 | import random 8 | from sklearn import manifold 9 | import warnings 10 | warnings.filterwarnings('ignore') 11 | 12 | 13 | def noisy(samples, noise_type): 14 | """ 15 | Parameters 16 | ---------- 17 | image : ndarray 18 | Input image data. Will be converted to float. 19 | mode : str 20 | One of the following strings, selecting the type of noise to add: 21 | 22 | 'gauss' Gaussian-distributed additive noise. 23 | 'poisson' Poisson-distributed noise generated from the data. 24 | 's&p' Replaces random pixels with 0 or 1. 25 | 'speckle' Multiplicative noise using out = image + n*image,where 26 | n is uniform noise with specified mean & variance. 27 | """ 28 | 29 | if noise_type == "gauss": 30 | row, col = samples.shape 31 | mean = 0 32 | var = 0.05 33 | sigma = var ** 0.5 34 | gauss = np.random.normal(mean, sigma, (row, col)) 35 | gauss = gauss.reshape(row, col) 36 | noisy = samples + gauss 37 | return noisy 38 | 39 | elif noise_type == "s&p": 40 | row, col = samples.shape 41 | amount = 0.05 42 | out = np.copy(samples) 43 | num = np.ceil(amount * samples.size) 44 | 45 | x1_min = np.amin(samples[:, 0]) 46 | x1_max = np.amax(samples[:, 0]) 47 | x2_min = np.amin(samples[:, 1]) 48 | x2_max = np.amax(samples[:, 0]) 49 | 50 | # Pepper mode 51 | coords = np.random.randint(0, samples.shape[0], int(num)) 52 | 53 | for i in range(len(coords)): 54 | out[coords[i]][0] = random.uniform(x1_min, x1_max) 55 | out[coords[i]][1] = random.uniform(x2_min, x2_max) 56 | 57 | return out 58 | 59 | elif noise_type == "poisson": 60 | vals = len(np.unique(samples)) 61 | vals = 2 ** np.ceil(np.log2(vals)) 62 | noisy = np.random.poisson(samples * vals) / float(vals) 63 | return noisy 64 | 65 | elif noise_type == "speckle": 66 | row, col = samples.shape 67 | gauss = np.random.randn(row, col) 68 | gauss = gauss.reshape(row, col) 69 | noisy = samples + samples * gauss 70 | return noisy 71 | 72 | 73 | def translation(Xs, magnitude): 74 | for i in range(len(Xs)): 75 | Xs[i][0] += magnitude 76 | Xs[i][1] += magnitude 77 | return Xs 78 | 79 | 80 | def rotation(Xs, angle): 81 | # Rotate data 82 | # axis 1 83 | a = np.multiply(Xs[:, 0], np.cos(angle)) 84 | b = np.multiply(Xs[:, 1], -1 * np.sin(angle)) 85 | T1 = np.add(a, b) 86 | # axis 2 87 | a = np.multiply(Xs[:, 1], np.cos(angle)) 88 | b = np.multiply(Xs[:, 0], np.sin(angle)) 89 | T2 = np.add(a, b) 90 | # copy rotated data 91 | Xs[:, 0] = T1 92 | Xs[:, 1] = T2 93 | return Xs 94 | 95 | 96 | def scale(Xs, scalar): 97 | # Apply scale 98 | Xs = scalar * Xs 99 | return Xs 100 | 101 | 102 | def reflection(Xs, scalar): 103 | # applying Reflection 104 | Xs[:, 1] = Xs[:, 1] * (-scalar) 105 | return Xs 106 | 107 | 108 | def shear(Xs, scalar): 109 | # Applying SHEAR 110 | # dim-1 111 | a = np.multiply(Xs[:, 0], 1) 112 | b = np.multiply(Xs[:, 1], 0) 113 | T1 = np.add(a, b) 114 | # dim-2 115 | a = np.multiply(Xs[:, 1], 1) 116 | b = np.multiply(Xs[:, 0], scalar) 117 | T2 = np.add(a, b) 118 | # copy data 119 | Xs[:, 0] = T1 120 | Xs[:, 1] = T2 121 | return Xs 122 | 123 | 124 | def show_data_st(Xt, Yt, Xs, Ys): 125 | # Draw synthetic data on image 126 | X = np.concatenate((Xt, Xs), axis=0) 127 | Y = np.concatenate((Yt, Ys), axis=0) 128 | 129 | plt.clf() 130 | plt.scatter(X[:, 0], X[:, 1], c=Y[:], alpha=0.4) 131 | plt.show() 132 | 133 | 134 | def show_data(Xs, Ys): 135 | # Draw synthetic data on image 136 | plt.clf() 137 | plt.scatter(Xs[:, 0], Xs[:, 1], c=Ys[:], alpha=0.4) 138 | plt.show() 139 | 140 | 141 | def affine_transformation(Xt, Yt, name, affine_method, value, save=True, plot=False): 142 | """ 143 | Method to generate affine transformation 144 | """ 145 | # Copy to target domain 146 | Xs = copy.copy(Xt) 147 | Ys = copy.copy(Yt) 148 | 149 | # generate affine transformation 150 | Xs = affine_method(Xs, value) 151 | 152 | # Save target domain 153 | if save: 154 | root = '/Users/NT-Benchmark/NT_UDA/data_synth/' 155 | data = np.concatenate([Xs, Ys.reshape(-1, 1)], axis=1) 156 | np.savetxt(root + name + ".csv", data, delimiter=',') 157 | 158 | if plot: 159 | show_data(Xs, Ys) 160 | 161 | return Xs, Ys 162 | -------------------------------------------------------------------------------- /NT_UDA/demo_img_bl_random.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from sklearn.metrics import accuracy_score 6 | 7 | dset = 'DomainNet' 8 | noise_rate = 0 9 | dset_n = dset + '_' + str(noise_rate) 10 | 11 | if dset == 'DomainNet': 12 | domain_list = ['clipart', 'infograph', 'painting'] 13 | num_domain = len(domain_list) 14 | 15 | acc_all = np.zeros(num_domain * (num_domain - 1)) 16 | for s in range(num_domain): 17 | for t in range(num_domain): 18 | if t == s: 19 | continue 20 | itr_idx = (num_domain - 1) * s + t 21 | if t > s: itr_idx -= 1 22 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 23 | print(info_str) 24 | name_src = domain_list[s][0].upper() 25 | name_tar = domain_list[t][0].upper() 26 | task_str = name_src + name_tar 27 | 28 | # load labels 29 | folder = "checkpoint/" 30 | s_dset_path = folder + dset + '/' + domain_list[s] + '_list.txt' 31 | t_dset_path = folder + dset + '/' + domain_list[t] + '_list.txt' 32 | txt_src = open(s_dset_path).readlines() 33 | txt_tar = open(t_dset_path).readlines() 34 | Y_source = [int(img_str.split('==')[1]) for img_str in txt_src] 35 | Y_target = [int(img_str.split('==')[1]) for img_str in txt_tar] 36 | 37 | # random guess 38 | class_list = np.unique(Y_source) 39 | class_num_list = [np.sum(Y_source == c) for c in class_list] 40 | num_max_class = class_list[np.argmax(class_num_list)] 41 | pred_tar = np.ones(len(Y_target)) * num_max_class 42 | pred_tar = pred_tar.astype(int) 43 | acc_all[itr_idx] = accuracy_score(Y_target, pred_tar) * 100 44 | 45 | print('acc: {:.2f}'.format(acc_all[itr_idx])) 46 | 47 | print('\ndone') 48 | print('All acc: ', np.round(acc_all, 2)) 49 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 50 | 51 | -------------------------------------------------------------------------------- /NT_UDA/demo_img_bl_svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from sklearn.metrics import accuracy_score 6 | from utils.utils_bl import baseline_SVM 7 | 8 | dset = 'DomainNet' 9 | noise_rate = 0 10 | dset_n = dset + '_' + str(noise_rate) 11 | 12 | if dset == 'DomainNet': 13 | domain_list = ['clipart', 'infograph', 'painting'] 14 | num_domain = len(domain_list) 15 | 16 | acc_all = np.zeros(num_domain * (num_domain - 1)) 17 | for s in range(num_domain): 18 | for t in range(num_domain): 19 | if t == s: 20 | continue 21 | itr_idx = (num_domain - 1) * s + t 22 | if t > s: itr_idx -= 1 23 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 24 | print(info_str) 25 | name_src = domain_list[s][0].upper() 26 | name_tar = domain_list[t][0].upper() 27 | task_str = name_src + name_tar 28 | 29 | # load pre-trained features: 30 | root_path = 'outputs/feas/' 31 | data_path = dset_n + '/' + task_str + '_0.npz' 32 | data_dir = root_path + data_path 33 | data = np.load(data_dir) 34 | X_source, Y_source = data['X_source'], data['y_source'] 35 | X_target, Y_target = data['X_target'], data['y_target'] 36 | print(X_source.shape, Y_source.shape, X_target.shape, Y_target.shape) 37 | 38 | # test SVM: 39 | result_SVM = baseline_SVM(X_source, Y_source, X_target, Y_target) 40 | acc_all[itr_idx] = accuracy_score(Y_target, result_SVM) * 100 41 | 42 | print('SVM: {:.2f}'.format(acc_all[itr_idx])) 43 | 44 | print('\ndone') 45 | print('All acc: ', np.round(acc_all, 2)) 46 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 47 | 48 | -------------------------------------------------------------------------------- /NT_UDA/demo_img_jda.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from utils.utils_bl import JDA 6 | 7 | dset = 'DomainNet' 8 | noise_rate = 0 9 | dset_n = dset + '_' + str(noise_rate) 10 | 11 | if dset == 'DomainNet': 12 | domain_list = ['clipart', 'infograph', 'painting'] 13 | num_domain = len(domain_list) 14 | 15 | acc_all = np.zeros(num_domain * (num_domain - 1)) 16 | for s in range(num_domain): 17 | for t in range(num_domain): 18 | if t == s: 19 | continue 20 | itr_idx = (num_domain - 1) * s + t 21 | if t > s: itr_idx -= 1 22 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 23 | print(info_str) 24 | name_src = domain_list[s][0].upper() 25 | name_tar = domain_list[t][0].upper() 26 | task_str = name_src + name_tar 27 | 28 | # load pre-trained features: 29 | root_path = 'outputs/feas/' 30 | data_path = dset_n + '/' + task_str + '_0.npz' 31 | data_dir = root_path + data_path 32 | data = np.load(data_dir) 33 | Xs, Ys = data['X_source'], data['y_source'] 34 | Xt, Yt = data['X_target'], data['y_target'] 35 | print(Xs.shape, Ys.shape, Xt.shape, Yt.shape) 36 | 37 | # JDA 38 | ker_type = 'primal' 39 | traditional_tl = JDA(kernel_type=ker_type, dim=100, lamb=1, gamma=1) 40 | acc_all[itr_idx] = traditional_tl.fit_predict(Xs, Ys, Xt, Yt) 41 | print('JDA: {:.2f}'.format(acc_all[itr_idx])) 42 | 43 | print('\ndone') 44 | print('All acc: ', np.round(acc_all, 2)) 45 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 46 | 47 | -------------------------------------------------------------------------------- /NT_UDA/demo_img_kmm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from sklearn.metrics import accuracy_score 6 | from utils.utils_bl import KMM, baseline_SVM 7 | 8 | dset = 'DomainNet' 9 | noise_rate = 0 10 | dset_n = dset + '_' + str(noise_rate) 11 | 12 | if dset == 'DomainNet': 13 | domain_list = ['clipart', 'infograph', 'painting'] 14 | num_domain = len(domain_list) 15 | 16 | acc_all = np.zeros(num_domain * (num_domain - 1)) 17 | for s in range(num_domain): 18 | for t in range(num_domain): 19 | if t == s: 20 | continue 21 | itr_idx = (num_domain - 1) * s + t 22 | if t > s: itr_idx -= 1 23 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 24 | print(info_str) 25 | name_src = domain_list[s][0].upper() 26 | name_tar = domain_list[t][0].upper() 27 | task_str = name_src + name_tar 28 | 29 | # load pre-trained features: 30 | root_path = 'outputs/feas/' 31 | data_path = dset_n + '/' + task_str + '_0.npz' 32 | data_dir = root_path + data_path 33 | data = np.load(data_dir) 34 | Xs, Ys = data['X_source'], data['y_source'] 35 | Xt, Yt = data['X_target'], data['y_target'] 36 | print(Xs.shape, Ys.shape, Xt.shape, Yt.shape) 37 | 38 | kmm = KMM(kernel_type='rbf', B=1) 39 | beta = kmm.fit(Xs, Xt) 40 | Xs_new = beta * Xs 41 | 42 | pred_tar = baseline_SVM(Xs_new, Ys, Xt, Yt) 43 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 44 | print('KMM: {:.2f}'.format(acc_all[itr_idx])) 45 | 46 | print('\ndone') 47 | print('All acc: ', np.round(acc_all, 2)) 48 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 49 | 50 | -------------------------------------------------------------------------------- /NT_UDA/demo_seed_bl_random.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | from sklearn.metrics import accuracy_score 7 | from utils.dataloader import data_normalize 8 | 9 | 10 | def read_seed_src_tar_bl(args): 11 | # (15, 3394, 310) (15, 3394) 12 | if args.data_env == 'local': 13 | file = '/Users/wenz/dataset/MOABB/' + args.dset + '.npz' 14 | if args.data_env == 'gpu': 15 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 16 | 17 | MI = np.load(file) 18 | Data_raw, Label = MI['data'], MI['label'] 19 | 20 | src_data = np.squeeze(Data_raw[args.src, :, :]) 21 | src_data = data_normalize(src_data, args.norm) 22 | src_label = np.squeeze(Label[args.src, :]) 23 | 24 | # target sub 25 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 26 | tar_data = data_normalize(tar_data, args.norm) 27 | tar_label = np.squeeze(Label[args.tar, :]) 28 | print(tar_data.shape, tar_label.shape) 29 | 30 | return src_data, src_label, tar_data, tar_label 31 | 32 | 33 | data_name = 'SEED' 34 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 35 | focus_domain_idx = [0, 1, 2] 36 | domain_list = ['S' + str(i) for i in focus_domain_idx] 37 | num_domain = len(domain_list) 38 | 39 | args = argparse.Namespace(dset=data_name, norm='zscore', seed=2022, class_num=3) 40 | args.data_env = 'local' 41 | args.noise_rate = 0 42 | print(args) 43 | 44 | num_domain = len(domain_list) 45 | acc_all = np.zeros(len(domain_list) * (len(domain_list) - 1)) 46 | for s in range(num_domain): # source 47 | for t in range(num_domain): # target 48 | if s != t: 49 | itr_idx = (num_domain - 1) * s + t 50 | if t > s: itr_idx -= 1 51 | print('\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t])) 52 | args.src, args.tar = s, t 53 | Xs, Ys, Xt, Yt = read_seed_src_tar_bl(args) 54 | 55 | # random guess 56 | class_list = np.unique(Ys) 57 | class_num_list = [np.sum(Ys == c) for c in class_list] 58 | num_max_class = class_list[np.argmax(class_num_list)] 59 | pred_tar = np.ones(len(Xt)) * num_max_class 60 | pred_tar = pred_tar.astype(int) 61 | 62 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 63 | print('acc: %.2f' % np.round(acc_all[itr_idx], 2)) 64 | 65 | print('\nmean acc', np.round(np.mean(acc_all), 2)) 66 | print(domain_list) 67 | print(np.round(acc_all, 2).tolist()) 68 | 69 | 70 | -------------------------------------------------------------------------------- /NT_UDA/demo_seed_bl_svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import torch as tr 6 | import argparse 7 | from sklearn.metrics import accuracy_score 8 | from utils.utils import add_label_noise_noimg 9 | from utils.dataloader import data_normalize 10 | from utils.utils_bl import baseline_KNN, baseline_SVM 11 | 12 | 13 | def read_seed_src_tar_bl(args): 14 | # (15, 3394, 310) (15, 3394) 15 | if args.data_env == 'local': 16 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 17 | if args.data_env == 'gpu': 18 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 19 | 20 | MI = np.load(file) 21 | Data_raw, Label = MI['data'], MI['label'] 22 | 23 | src_data = np.squeeze(Data_raw[args.src, :, :]) 24 | src_data = data_normalize(src_data, args.norm) 25 | src_label = np.squeeze(Label[args.src, :]) 26 | 27 | # target sub 28 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 29 | tar_data = data_normalize(tar_data, args.norm) 30 | tar_label = np.squeeze(Label[args.tar, :]) 31 | print(tar_data.shape, tar_label.shape) 32 | 33 | return src_data, src_label, tar_data, tar_label 34 | 35 | 36 | data_name = 'SEED' 37 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 38 | focus_domain_idx = [0, 1, 2] 39 | domain_list = ['S' + str(i) for i in focus_domain_idx] 40 | num_domain = len(domain_list) 41 | 42 | args = argparse.Namespace(dset=data_name, norm='zscore', seed=2022, class_num=3) 43 | args.data_env = 'gpu' # 'local' 44 | args.noise_rate = 0 45 | print(args) 46 | 47 | num_domain = len(domain_list) 48 | acc_all = np.zeros(len(domain_list) * (len(domain_list) - 1)) 49 | for s in range(num_domain): # source 50 | for t in range(num_domain): # target 51 | if s != t: 52 | itr_idx = (num_domain - 1) * s + t 53 | if t > s: itr_idx -= 1 54 | print('\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t])) 55 | args.src, args.tar = s, t 56 | Xs, Ys, Xt, Yt = read_seed_src_tar_bl(args) 57 | 58 | # add noise on source label 59 | Ys = add_label_noise_noimg(Ys, args.seed, args.class_num, args.noise_rate) 60 | 61 | # test SVM: 62 | pred_tar = baseline_SVM(Xs, Ys, Xt, Yt) 63 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 64 | print('acc: %.2f' % np.round(acc_all[itr_idx], 2)) 65 | 66 | print('\nmean acc', np.round(np.mean(acc_all), 2)) 67 | print(domain_list) 68 | print(np.round(acc_all, 2).tolist()) 69 | 70 | -------------------------------------------------------------------------------- /NT_UDA/demo_seed_dan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.LogRecord import LogRecord 12 | from utils.dataloader import read_seed_src_tar 13 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 14 | from utils.loss import MultipleKernelMaximumMeanDiscrepancy, GaussianKernel 15 | 16 | 17 | def train_target(args): 18 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 19 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 20 | 21 | netF, netC = network.backbone_net(args, args.bottleneck) 22 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 23 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 24 | base_network = nn.Sequential(netF, netC) 25 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 26 | 27 | max_iter = args.max_epoch * len(dset_loaders["source"]) 28 | interval_iter = max_iter // 10 29 | args.max_iter = max_iter 30 | iter_num = 0 31 | base_network.train() 32 | 33 | while iter_num < max_iter: 34 | try: 35 | inputs_source, labels_source = iter_source.next() 36 | except: 37 | iter_source = iter(dset_loaders["source"]) 38 | inputs_source, labels_source = iter_source.next() 39 | 40 | try: 41 | inputs_target, _ = iter_target.next() 42 | except: 43 | iter_target = iter(dset_loaders["target"]) 44 | inputs_target, _ = iter_target.next() 45 | 46 | if inputs_source.size(0) == 1: 47 | continue 48 | 49 | iter_num += 1 50 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 51 | 52 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 53 | features_source, outputs_source = base_network(inputs_source) 54 | features_target, outputs_target = base_network(inputs_target) 55 | 56 | # new version img loss 57 | args.non_linear = False 58 | args.trade_off = 1.0 59 | classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) 60 | mkmmd_loss = MultipleKernelMaximumMeanDiscrepancy( 61 | kernels=[GaussianKernel(alpha=2 ** k) for k in range(-3, 2)], 62 | linear=not args.non_linear 63 | ) 64 | discrepancy_loss = mkmmd_loss(features_source, features_target) 65 | total_loss = classifier_loss + discrepancy_loss * args.trade_off 66 | 67 | optimizer.zero_grad() 68 | total_loss.backward() 69 | optimizer.step() 70 | 71 | if iter_num % interval_iter == 0 or iter_num == max_iter: 72 | base_network.eval() 73 | 74 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 75 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 76 | args.log.record(log_str) 77 | print(log_str) 78 | 79 | base_network.train() 80 | 81 | return acc_t_te 82 | 83 | 84 | if __name__ == '__main__': 85 | 86 | data_name = 'SEED' 87 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 88 | focus_domain_idx = [0, 1, 2] 89 | domain_list = ['S' + str(i) for i in focus_domain_idx] 90 | num_domain = len(domain_list) 91 | 92 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 93 | epsilon=1e-05, layer='wn', smooth=0, 94 | N=num_domain, chn=chn, class_num=class_num) 95 | 96 | args.dset = data_name 97 | args.method = 'DAN' 98 | args.backbone = 'ShallowNet' 99 | args.batch_size = 32 # 32 100 | args.max_epoch = 50 # 50 101 | args.input_dim = 310 102 | args.norm = 'zscore' 103 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 104 | args.noise_rate = 0 105 | dset_n = args.dset + '_' + str(args.noise_rate) 106 | 107 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 108 | args.data_env = 'gpu' # 'local' 109 | args.seed = 2022 110 | fix_random_seed(args.seed) 111 | tr.backends.cudnn.deterministic = True 112 | 113 | print(dset_n, args.method) 114 | print(args) 115 | 116 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 117 | args.result_dir = 'results/target/' 118 | my_log = LogRecord(args) 119 | my_log.log_init() 120 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 121 | 122 | acc_all = np.zeros(num_domain * (num_domain - 1)) 123 | for s in range(num_domain): 124 | for t in range(num_domain): 125 | if s != t: 126 | itr_idx = (num_domain - 1) * s + t 127 | if t > s: itr_idx -= 1 128 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 129 | print(info_str) 130 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 131 | args.task_str = domain_list[s] + '_' + domain_list[t] 132 | print(args) 133 | 134 | my_log.record(info_str) 135 | args.log = my_log 136 | acc_all[itr_idx] = train_target(args) 137 | print('\nSub acc: ', np.round(acc_all, 3)) 138 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 139 | 140 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 141 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 142 | args.log.record("\n==========================================") 143 | args.log.record(acc_sub_str) 144 | args.log.record(acc_mean_str) 145 | -------------------------------------------------------------------------------- /NT_UDA/demo_seed_dnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | import os.path as osp 11 | from utils import network, utils 12 | from utils.LogRecord import LogRecord 13 | from utils.dataloader import read_seed_src_tar 14 | from utils.utils import fix_random_seed, lr_scheduler_full, data_load_noimg 15 | 16 | 17 | def train_source_test_target(args): 18 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 19 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 20 | 21 | netF, netC = network.backbone_net(args, args.bottleneck) 22 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 23 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 24 | base_network = nn.Sequential(netF, netC) 25 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 26 | 27 | acc_init = 0 28 | max_iter = args.max_epoch * len(dset_loaders["source_tr"]) 29 | interval_iter = max_iter // 10 30 | args.max_iter = max_iter 31 | iter_num = 0 32 | base_network.train() 33 | 34 | while iter_num < max_iter: 35 | try: 36 | inputs_source, labels_source = source_loader_iter.next() 37 | except: 38 | source_loader_iter = iter(dset_loaders["source_tr"]) 39 | inputs_source, labels_source = source_loader_iter.next() 40 | 41 | if inputs_source.size(0) == 1: 42 | continue 43 | 44 | iter_num += 1 45 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 46 | 47 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 48 | features_source, outputs_source = base_network(inputs_source) 49 | 50 | # # CE smooth loss 51 | # classifier_loss = loss.CELabelSmooth(reduction='none', num_classes=class_num, epsilon=args.smooth)( 52 | # outputs_source, labels_source) 53 | classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) 54 | 55 | optimizer.zero_grad() 56 | classifier_loss.backward() 57 | optimizer.step() 58 | 59 | if iter_num % interval_iter == 0 or iter_num == max_iter: 60 | base_network.eval() 61 | 62 | acc_s_te = utils.cal_acc_base(dset_loaders["source_te"], base_network) 63 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 64 | log_str = 'Task: {}, Iter:{}/{}; Val_acc = {:.2f}%; Test_Acc = {:.2f}%'.format(args.task_str, iter_num, 65 | max_iter, acc_s_te, acc_t_te) 66 | args.log.record(log_str) 67 | print(log_str) 68 | base_network.train() 69 | 70 | if acc_s_te >= acc_init: 71 | acc_init = acc_s_te 72 | acc_tar_src_best = acc_t_te 73 | 74 | return acc_tar_src_best 75 | 76 | 77 | if __name__ == '__main__': 78 | 79 | data_name = 'SEED' 80 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 81 | focus_domain_idx = [0, 1, 2] 82 | domain_list = ['S' + str(i) for i in focus_domain_idx] 83 | num_domain = len(domain_list) 84 | 85 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 86 | epsilon=1e-05, layer='wn', smooth=0, is_save=False, 87 | N=num_domain, chn=chn, trial=trial_num, class_num=class_num) 88 | 89 | args.dset = data_name 90 | args.method = 'DNN' 91 | args.backbone = 'ShallowNet' 92 | args.batch_size = 32 # 32 93 | args.max_epoch = 50 # 50 94 | args.input_dim = 310 95 | args.norm = 'zscore' 96 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 97 | args.noise_rate = 0 98 | dset_n = args.dset + '_' + str(args.noise_rate) 99 | 100 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 101 | args.data_env = 'gpu' # 'local' 102 | args.seed = 2022 103 | fix_random_seed(args.seed) 104 | tr.backends.cudnn.deterministic = True 105 | print(dset_n, args.method) 106 | print(args) 107 | 108 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 109 | args.result_dir = 'results/target/' 110 | my_log = LogRecord(args) 111 | my_log.log_init() 112 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 113 | 114 | acc_all = np.zeros(num_domain * (num_domain - 1)) 115 | for s in range(num_domain): 116 | for t in range(num_domain): 117 | if s != t: 118 | itr_idx = (num_domain - 1) * s + t 119 | if t > s: itr_idx -= 1 120 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 121 | print(info_str) 122 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 123 | args.task_str = domain_list[s] + '_' + domain_list[t] 124 | print(args) 125 | 126 | my_log.record(info_str) 127 | args.log = my_log 128 | 129 | acc_all[itr_idx] = train_source_test_target(args) 130 | print('\nSub acc: ', np.round(acc_all, 3)) 131 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 132 | 133 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 134 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 135 | args.log.record("\n==========================================") 136 | args.log.record(acc_sub_str) 137 | args.log.record(acc_mean_str) 138 | 139 | -------------------------------------------------------------------------------- /NT_UDA/demo_seed_jda.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import torch as tr 6 | import argparse 7 | from utils.utils import add_label_noise_noimg 8 | from utils.dataloader import data_normalize 9 | from utils.utils_bl import JDA 10 | 11 | 12 | def read_seed_src_tar_bl(args): 13 | # (15, 3394, 310) (15, 3394) 14 | if args.data_env == 'local': 15 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 16 | if args.data_env == 'gpu': 17 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 18 | 19 | MI = np.load(file) 20 | Data_raw, Label = MI['data'], MI['label'] 21 | 22 | src_data = np.squeeze(Data_raw[args.src, :, :]) 23 | src_data = data_normalize(src_data, args.norm) 24 | src_label = np.squeeze(Label[args.src, :]) 25 | 26 | # target sub 27 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 28 | tar_data = data_normalize(tar_data, args.norm) 29 | tar_label = np.squeeze(Label[args.tar, :]) 30 | print(tar_data.shape, tar_label.shape) 31 | 32 | return src_data, src_label, tar_data, tar_label 33 | 34 | 35 | data_name = 'SEED' 36 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 37 | focus_domain_idx = [0, 1, 2] 38 | domain_list = ['S' + str(i) for i in focus_domain_idx] 39 | num_domain = len(domain_list) 40 | 41 | args = argparse.Namespace(dset=data_name, norm='zscore', seed=2022, class_num=3) 42 | args.data_env = 'gpu' # 'local' 43 | args.noise_rate = 0 44 | 45 | num_domain = len(domain_list) 46 | acc_all = np.zeros(len(domain_list) * (len(domain_list) - 1)) 47 | for s in range(num_domain): # source 48 | for t in range(num_domain): # target 49 | if s != t: 50 | itr_idx = (num_domain - 1) * s + t 51 | if t > s: itr_idx -= 1 52 | print('\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t])) 53 | args.src, args.tar = s, t 54 | Xs, Ys, Xt, Yt = read_seed_src_tar_bl(args) 55 | 56 | # add noise on source label 57 | Ys = add_label_noise_noimg(Ys, args.seed, args.class_num, args.noise_rate) 58 | 59 | # JDA 60 | ker_type = 'primal' 61 | traditional_tl = JDA(kernel_type=ker_type, dim=100, lamb=1, gamma=1) 62 | acc_all[itr_idx] = traditional_tl.fit_predict(Xs, Ys, Xt, Yt) 63 | print('JDA: {:.2f}'.format(acc_all[itr_idx])) 64 | 65 | print('\nmean acc', np.round(np.mean(acc_all), 2)) 66 | print(domain_list) 67 | print(np.round(acc_all, 2).tolist()) 68 | 69 | -------------------------------------------------------------------------------- /NT_UDA/demo_seed_kmm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import torch as tr 6 | import argparse 7 | from sklearn.metrics import accuracy_score 8 | from utils.utils import add_label_noise_noimg 9 | from utils.dataloader import data_normalize 10 | from utils.utils_bl import KMM, baseline_SVM 11 | 12 | 13 | def read_seed_src_tar_bl(args): 14 | # (15, 3394, 310) (15, 3394) 15 | if args.data_env == 'local': 16 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 17 | if args.data_env == 'gpu': 18 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 19 | 20 | MI = np.load(file) 21 | Data_raw, Label = MI['data'], MI['label'] 22 | 23 | src_data = np.squeeze(Data_raw[args.src, :, :]) 24 | src_data = data_normalize(src_data, args.norm) 25 | src_label = np.squeeze(Label[args.src, :]) 26 | 27 | # target sub 28 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 29 | tar_data = data_normalize(tar_data, args.norm) 30 | tar_label = np.squeeze(Label[args.tar, :]) 31 | print(tar_data.shape, tar_label.shape) 32 | 33 | return src_data, src_label, tar_data, tar_label 34 | 35 | 36 | data_name = 'SEED' 37 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 38 | focus_domain_idx = [0, 1, 2] 39 | domain_list = ['S' + str(i) for i in focus_domain_idx] 40 | num_domain = len(domain_list) 41 | 42 | args = argparse.Namespace(dset=data_name, norm='zscore', seed=2022, class_num=3) 43 | args.data_env = 'gpu' # 'local' 44 | args.noise_rate = 0 45 | 46 | num_domain = len(domain_list) 47 | acc_all = np.zeros(len(domain_list) * (len(domain_list) - 1)) 48 | for s in range(num_domain): # source 49 | for t in range(num_domain): # target 50 | if s != t: 51 | itr_idx = (num_domain - 1) * s + t 52 | if t > s: itr_idx -= 1 53 | print('\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t])) 54 | args.src, args.tar = s, t 55 | Xs, Ys, Xt, Yt = read_seed_src_tar_bl(args) 56 | 57 | # add noise on source label 58 | Ys = add_label_noise_noimg(Ys, args.seed, args.class_num, args.noise_rate) 59 | 60 | kmm = KMM(kernel_type='rbf', B=1) 61 | beta = kmm.fit(Xs, Xt) 62 | Xs_new = beta * Xs 63 | 64 | pred_tar = baseline_SVM(Xs_new, Ys, Xt, Yt) 65 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 66 | print('KMM: %.2f' % np.round(acc_all[itr_idx], 2)) 67 | 68 | print('\nmean acc', np.round(np.mean(acc_all), 2)) 69 | print(domain_list) 70 | print(np.round(acc_all, 2).tolist()) 71 | 72 | -------------------------------------------------------------------------------- /NT_UDA/demo_seed_mcc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.LogRecord import LogRecord 12 | from utils.dataloader import read_seed_src_tar 13 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 14 | from utils.loss import ClassConfusionLoss, CELabelSmooth 15 | 16 | 17 | def train_target(args): 18 | X_src, y_src, X_tar, y_tar = read_seed_src_tar(args) 19 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 20 | 21 | netF, netC = network.backbone_net(args, args.bottleneck) 22 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 23 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 24 | base_network = nn.Sequential(netF, netC) 25 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 26 | 27 | max_len = max(len(dset_loaders["source"]), len(dset_loaders["target"])) 28 | args.max_iter = args.max_epoch * max_len 29 | 30 | max_iter = args.max_epoch * len(dset_loaders["source"]) 31 | interval_iter = max_iter // 10 32 | args.max_iter = max_iter 33 | iter_num = 0 34 | base_network.train() 35 | 36 | while iter_num < max_iter: 37 | try: 38 | inputs_source, labels_source = iter_source.next() 39 | except: 40 | iter_source = iter(dset_loaders["source"]) 41 | inputs_source, labels_source = iter_source.next() 42 | 43 | try: 44 | inputs_target, _ = iter_target.next() 45 | except: 46 | iter_target = iter(dset_loaders["target"]) 47 | inputs_target, _ = iter_target.next() 48 | 49 | if inputs_source.size(0) == 1: 50 | continue 51 | 52 | iter_num += 1 53 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 54 | 55 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 56 | features_source, outputs_source = base_network(inputs_source) 57 | features_target, outputs_target = base_network(inputs_target) 58 | 59 | # new version img loss 60 | # p = float(iter_num) / max_iter 61 | # alpha = 2. / (1. + np.exp(-10 * p)) - 1 62 | args.loss_trade_off = 1.0 63 | args.t_mcc = 2 64 | transfer_loss = ClassConfusionLoss(t=args.t_mcc)(outputs_target) 65 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) 66 | total_loss = args.loss_trade_off * transfer_loss + classifier_loss 67 | 68 | optimizer.zero_grad() 69 | total_loss.backward() 70 | optimizer.step() 71 | 72 | if iter_num % interval_iter == 0 or iter_num == max_iter: 73 | base_network.eval() 74 | 75 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 76 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 77 | args.log.record(log_str) 78 | print(log_str) 79 | 80 | base_network.train() 81 | 82 | return acc_t_te 83 | 84 | 85 | if __name__ == '__main__': 86 | 87 | data_name = 'SEED' 88 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 89 | focus_domain_idx = [0, 1, 2] 90 | domain_list = ['S' + str(i) for i in focus_domain_idx] 91 | num_domain = len(domain_list) 92 | 93 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 94 | epsilon=1e-05, layer='wn', smooth=0, 95 | N=num_domain, chn=chn, class_num=class_num) 96 | 97 | args.dset = data_name 98 | args.method = 'MCC' 99 | args.backbone = 'ShallowNet' 100 | args.batch_size = 32 # 32 101 | args.max_epoch = 50 # 50 102 | args.input_dim = 310 103 | args.norm = 'zscore' 104 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 105 | args.noise_rate = 0 106 | dset_n = args.dset + '_' + str(args.noise_rate) 107 | 108 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 109 | args.data_env = 'gpu' # 'local' 110 | args.seed = 2022 111 | fix_random_seed(args.seed) 112 | tr.backends.cudnn.deterministic = True 113 | 114 | print(dset_n, args.method) 115 | print(args) 116 | 117 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 118 | args.result_dir = 'results/target/' 119 | my_log = LogRecord(args) 120 | my_log.log_init() 121 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 122 | 123 | acc_all = np.zeros(num_domain * (num_domain - 1)) 124 | for s in range(num_domain): 125 | for t in range(num_domain): 126 | if s != t: 127 | itr_idx = (num_domain - 1) * s + t 128 | if t > s: itr_idx -= 1 129 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 130 | print(info_str) 131 | args.src, args.tar = focus_domain_idx[s], focus_domain_idx[t] 132 | args.task_str = domain_list[s] + '_' + domain_list[t] 133 | print(args) 134 | 135 | my_log.record(info_str) 136 | args.log = my_log 137 | acc_all[itr_idx] = train_target(args) 138 | print('\nSub acc: ', np.round(acc_all, 3)) 139 | print('Avg acc: ', np.round(np.mean(acc_all), 3)) 140 | 141 | acc_sub_str = str(np.round(acc_all, 3).tolist()) 142 | acc_mean_str = str(np.round(np.mean(acc_all), 3).tolist()) 143 | args.log.record("\n==========================================") 144 | args.log.record(acc_sub_str) 145 | args.log.record(acc_mean_str) 146 | -------------------------------------------------------------------------------- /NT_UDA/demo_syn_bl_svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.metrics import accuracy_score 7 | from utils.utils import add_label_noise_noimg 8 | from utils.utils_bl import baseline_SVM 9 | 10 | data_name = 'moon' 11 | seed = 2022 12 | if data_name == 'moon': class_num = 2 13 | noise_rate = 0 14 | base_name_list = ['0', '1', '2', '3_45', '4_15', '6', '7', '8', '9'] 15 | domain_list = ['Raw', 'Tl', 'Sl', 'Rt', 'Sh', 'Sk', 'Ns', 'Ol', 'Sc'] 16 | file_list = [data_name + i for i in base_name_list] 17 | num_domain = len(domain_list) 18 | 19 | root_path = './data_synth/' 20 | acc_all = np.zeros((len(domain_list) - 1)) 21 | for s in range(1, num_domain): # source 22 | for t in [0]: # target 23 | itr_idx = s - 1 24 | print('\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t])) 25 | src, tar = file_list[s], file_list[t] 26 | pd_src = pd.read_csv(root_path + src + ".csv", header=None) 27 | Xs, Ys = pd_src.iloc[:, :2].values, pd_src.iloc[:, 2].values.astype(int) 28 | pd_tar = pd.read_csv(root_path + tar + ".csv", header=None) 29 | Xt, Yt = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 30 | 31 | # add noise on source label 32 | Ys = add_label_noise_noimg(Ys, seed, class_num, noise_rate) 33 | 34 | # test SVM: 35 | pred_tar = baseline_SVM(Xs, Ys, Xt, Yt) 36 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 37 | 38 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 39 | print('acc: %.2f' % np.round(acc_all[itr_idx], 2)) 40 | 41 | print('\nmean acc', np.round(np.mean(acc_all), 2)) 42 | print(domain_list) 43 | print(np.round(acc_all, 2).tolist()) 44 | -------------------------------------------------------------------------------- /NT_UDA/demo_syn_cdan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.network import calc_coeff 12 | from utils.dataloader import read_syn_src_tar 13 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 14 | from utils.loss import CELabelSmooth, CDANE, Entropy, RandomLayer 15 | 16 | 17 | def train_target(args): 18 | X_src, y_src, X_tar, y_tar = read_syn_src_tar(args) 19 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 20 | 21 | netF, netC = network.backbone_net(args, args.bottleneck) 22 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 23 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 24 | base_network = nn.Sequential(netF, netC) 25 | 26 | args.max_iter = len(dset_loaders["source"]) 27 | 28 | ad_net = network.AdversarialNetwork(args.bottleneck, 20).cuda() 29 | ad_net.load_state_dict(tr.load(args.mdl_init_dir + 'netD_full.pt')) 30 | random_layer = RandomLayer([args.bottleneck, args.class_num], args.bottleneck) 31 | random_layer.cuda() 32 | 33 | optimizer_f = optim.SGD(netF.parameters(), lr=args.lr) 34 | optimizer_c = optim.SGD(netC.parameters(), lr=args.lr) 35 | optimizer_d = optim.SGD(ad_net.parameters(), lr=args.lr) 36 | 37 | max_iter = args.max_epoch * len(dset_loaders["source"]) 38 | interval_iter = max_iter // 10 39 | args.max_iter = args.max_epoch * len(dset_loaders["source"]) 40 | iter_num = 0 41 | base_network.train() 42 | 43 | while iter_num < max_iter: 44 | try: 45 | inputs_source, labels_source = iter_source.next() 46 | except: 47 | iter_source = iter(dset_loaders["source"]) 48 | inputs_source, labels_source = iter_source.next() 49 | 50 | try: 51 | inputs_target, _ = iter_target.next() 52 | except: 53 | iter_target = iter(dset_loaders["target"]) 54 | inputs_target, _ = iter_target.next() 55 | 56 | if inputs_source.size(0) == 1: 57 | continue 58 | 59 | iter_num += 1 60 | lr_scheduler_full(optimizer_f, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 61 | lr_scheduler_full(optimizer_c, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 62 | lr_scheduler_full(optimizer_d, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 63 | 64 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 65 | features_source, outputs_source = base_network(inputs_source) 66 | features_target, outputs_target = base_network(inputs_target) 67 | features = tr.cat((features_source, features_target), dim=0) 68 | 69 | # new version img loss 70 | args.loss_trade_off = 1.0 71 | outputs = tr.cat((outputs_source, outputs_target), dim=0) 72 | softmax_out = nn.Softmax(dim=1)(outputs) 73 | entropy = Entropy(softmax_out) 74 | transfer_loss = CDANE([features, softmax_out], ad_net, entropy, calc_coeff(iter_num), random_layer=random_layer) 75 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) 76 | total_loss = args.loss_trade_off * transfer_loss + classifier_loss 77 | 78 | optimizer_f.zero_grad() 79 | optimizer_c.zero_grad() 80 | optimizer_d.zero_grad() 81 | total_loss.backward() 82 | optimizer_f.step() 83 | optimizer_c.step() 84 | optimizer_d.step() 85 | 86 | if iter_num % interval_iter == 0 or iter_num == max_iter: 87 | base_network.eval() 88 | 89 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 90 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 91 | print(log_str) 92 | 93 | base_network.train() 94 | 95 | return acc_t_te 96 | 97 | 98 | if __name__ == '__main__': 99 | 100 | data_name = 'moon' 101 | if data_name == 'moon': num_class = 2 102 | base_name_list = ['0', '1', '2', '3_45', '4_15', '6', '7', '8', '9'] 103 | domain_list = ['Raw', 'Tl', 'Sl', 'Rt', 'Sh', 'Sk', 'Ns', 'Ol', 'Sc'] 104 | file_list = [data_name + i for i in base_name_list] 105 | num_domain = len(domain_list) 106 | 107 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 108 | epsilon=1e-05, layer='wn', class_num=num_class, smooth=0) 109 | 110 | args.method = 'CDAN' 111 | args.dset = data_name 112 | args.backbone = 'ShallowNet' 113 | args.batch_size = 32 114 | args.max_epoch = 50 115 | args.input_dim = 2 116 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 117 | args.noise_rate = 0 118 | dset_n = args.dset + '_' + str(args.noise_rate) 119 | 120 | os.environ["CUDA_VISIBLE_DEVICES"] = '5' 121 | args.data_env = 'gpu' # 'local' 122 | args.seed = 2022 123 | fix_random_seed(args.seed) 124 | tr.backends.cudnn.deterministic = True 125 | print(dset_n, args.method) 126 | 127 | args.root_path = './data_synth/' 128 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 129 | args.result_dir = 'results/target/' 130 | 131 | acc_all = np.zeros((len(domain_list) - 1)) 132 | for s in range(1, num_domain): # source 133 | for t in [0]: # target 134 | itr_idx = s - 1 135 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 136 | print(info_str) 137 | args.src, args.tar = file_list[s], file_list[t] 138 | args.task_str = domain_list[s] + '_' + domain_list[t] 139 | print(args) 140 | 141 | acc_all[itr_idx] = train_target(args) 142 | print('All acc: ', np.round(acc_all, 2)) 143 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 144 | 145 | -------------------------------------------------------------------------------- /NT_UDA/demo_syn_dan.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.dataloader import read_syn_src_tar 12 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 13 | from utils.loss import MultipleKernelMaximumMeanDiscrepancy, GaussianKernel 14 | 15 | 16 | def train_target(args): 17 | X_src, y_src, X_tar, y_tar = read_syn_src_tar(args) 18 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 25 | 26 | max_iter = args.max_epoch * len(dset_loaders["source"]) 27 | interval_iter = max_iter // 10 28 | args.max_iter = args.max_epoch * len(dset_loaders["source"]) 29 | iter_num = 0 30 | base_network.train() 31 | 32 | while iter_num < max_iter: 33 | try: 34 | inputs_source, labels_source = iter_source.next() 35 | except: 36 | iter_source = iter(dset_loaders["source"]) 37 | inputs_source, labels_source = iter_source.next() 38 | 39 | try: 40 | inputs_target, _ = iter_target.next() 41 | except: 42 | iter_target = iter(dset_loaders["target"]) 43 | inputs_target, _ = iter_target.next() 44 | 45 | if inputs_source.size(0) == 1: 46 | continue 47 | 48 | iter_num += 1 49 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 50 | 51 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 52 | features_source, outputs_source = base_network(inputs_source) 53 | features_target, outputs_target = base_network(inputs_target) 54 | 55 | # new version img loss 56 | args.non_linear = False 57 | args.trade_off = 1.0 58 | classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) 59 | mkmmd_loss = MultipleKernelMaximumMeanDiscrepancy( 60 | kernels=[GaussianKernel(alpha=2 ** k) for k in range(-3, 2)], 61 | linear=not args.non_linear 62 | ) 63 | discrepancy_loss = mkmmd_loss(features_source, features_target) 64 | total_loss = classifier_loss + discrepancy_loss * args.trade_off 65 | 66 | optimizer.zero_grad() 67 | total_loss.backward() 68 | optimizer.step() 69 | 70 | if iter_num % interval_iter == 0 or iter_num == max_iter: 71 | base_network.eval() 72 | 73 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 74 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 75 | print(log_str) 76 | 77 | base_network.train() 78 | 79 | return acc_t_te 80 | 81 | 82 | if __name__ == '__main__': 83 | 84 | data_name = 'moon' 85 | if data_name == 'moon': num_class = 2 86 | base_name_list = ['0', '1', '2', '3_45', '4_15', '6', '7', '8', '9'] 87 | domain_list = ['Raw', 'Tl', 'Sl', 'Rt', 'Sh', 'Sk', 'Ns', 'Ol', 'Sc'] 88 | file_list = [data_name + i for i in base_name_list] 89 | num_domain = len(domain_list) 90 | 91 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 92 | epsilon=1e-05, layer='wn', class_num=num_class, smooth=0) 93 | 94 | args.method = 'DAN' 95 | args.dset = data_name 96 | args.backbone = 'ShallowNet' 97 | args.batch_size = 32 98 | args.max_epoch = 50 99 | args.input_dim = 2 100 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 101 | args.noise_rate = 0 102 | dset_n = args.dset + '_' + str(args.noise_rate) 103 | 104 | os.environ["CUDA_VISIBLE_DEVICES"] = '5' 105 | args.data_env = 'gpu' # 'local' 106 | args.seed = 2022 107 | fix_random_seed(args.seed) 108 | tr.backends.cudnn.deterministic = True 109 | print(dset_n, args.method) 110 | 111 | args.root_path = './data_synth/' 112 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 113 | args.result_dir = 'results/target/' 114 | 115 | acc_all = np.zeros((len(domain_list) - 1)) 116 | for s in range(1, num_domain): # source 117 | for t in [0]: # target 118 | itr_idx = s - 1 119 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 120 | print(info_str) 121 | args.src, args.tar = file_list[s], file_list[t] 122 | args.task_str = domain_list[s] + '_' + domain_list[t] 123 | print(args) 124 | 125 | acc_all[itr_idx] = train_target(args) 126 | print('All acc: ', np.round(acc_all, 2)) 127 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 128 | -------------------------------------------------------------------------------- /NT_UDA/demo_syn_dann.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.dataloader import read_syn_src_tar 12 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 13 | from utils.loss import CELabelSmooth, ReverseLayerF 14 | 15 | 16 | def train_target(args): 17 | X_src, y_src, X_tar, y_tar = read_syn_src_tar(args) 18 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | 25 | args.max_iter = len(dset_loaders["source"]) 26 | ad_net = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 27 | ad_net.load_state_dict(tr.load(args.mdl_init_dir + 'netD_clf.pt')) 28 | 29 | optimizer_f = optim.SGD(netF.parameters(), lr=args.lr) 30 | optimizer_c = optim.SGD(netC.parameters(), lr=args.lr) 31 | optimizer_d = optim.SGD(ad_net.parameters(), lr=args.lr) 32 | 33 | max_iter = args.max_epoch * len(dset_loaders["source"]) 34 | interval_iter = max_iter // 10 35 | args.max_iter = args.max_epoch * len(dset_loaders["source"]) 36 | iter_num = 0 37 | base_network.train() 38 | 39 | while iter_num < max_iter: 40 | try: 41 | inputs_source, labels_source = iter_source.next() 42 | except: 43 | iter_source = iter(dset_loaders["source"]) 44 | inputs_source, labels_source = iter_source.next() 45 | 46 | try: 47 | inputs_target, _ = iter_target.next() 48 | except: 49 | iter_target = iter(dset_loaders["target"]) 50 | inputs_target, _ = iter_target.next() 51 | 52 | if inputs_source.size(0) == 1: 53 | continue 54 | 55 | iter_num += 1 56 | lr_scheduler_full(optimizer_f, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 57 | lr_scheduler_full(optimizer_c, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 58 | lr_scheduler_full(optimizer_d, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 59 | 60 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 61 | features_source, outputs_source = base_network(inputs_source) 62 | features_target, outputs_target = base_network(inputs_target) 63 | 64 | # new version img loss 65 | p = float(iter_num) / max_iter 66 | alpha = 2. / (1. + np.exp(-10 * p)) - 1 67 | reverse_source, reverse_target = ReverseLayerF.apply(features_source, alpha), ReverseLayerF.apply(features_target, 68 | alpha) 69 | _, domain_output_s = ad_net(reverse_source) 70 | _, domain_output_t = ad_net(reverse_target) 71 | domain_label_s = tr.ones(inputs_source.size()[0]).long().cuda() 72 | domain_label_t = tr.zeros(inputs_target.size()[0]).long().cuda() 73 | 74 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) 75 | adv_loss = nn.CrossEntropyLoss()(domain_output_s, domain_label_s) + nn.CrossEntropyLoss()(domain_output_t, 76 | domain_label_t) 77 | total_loss = classifier_loss + adv_loss 78 | 79 | optimizer_f.zero_grad() 80 | optimizer_c.zero_grad() 81 | optimizer_d.zero_grad() 82 | total_loss.backward() 83 | optimizer_f.step() 84 | optimizer_c.step() 85 | optimizer_d.step() 86 | 87 | if iter_num % interval_iter == 0 or iter_num == max_iter: 88 | base_network.eval() 89 | 90 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 91 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 92 | print(log_str) 93 | 94 | base_network.train() 95 | 96 | return acc_t_te 97 | 98 | 99 | if __name__ == '__main__': 100 | 101 | data_name = 'moon' 102 | if data_name == 'moon': num_class = 2 103 | base_name_list = ['0', '1', '2', '3_45', '4_15', '6', '7', '8', '9'] 104 | domain_list = ['Raw', 'Tl', 'Sl', 'Rt', 'Sh', 'Sk', 'Ns', 'Ol', 'Sc'] 105 | file_list = [data_name + i for i in base_name_list] 106 | num_domain = len(domain_list) 107 | 108 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 109 | epsilon=1e-05, layer='wn', class_num=num_class, smooth=0) 110 | 111 | args.method = 'DANN' 112 | args.dset = data_name 113 | args.backbone = 'ShallowNet' 114 | args.batch_size = 32 115 | args.max_epoch = 50 116 | args.input_dim = 2 117 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 118 | args.noise_rate = 0 119 | dset_n = args.dset + '_' + str(args.noise_rate) 120 | 121 | os.environ["CUDA_VISIBLE_DEVICES"] = '6' 122 | args.data_env = 'gpu' # 'local' 123 | args.seed = 2022 124 | fix_random_seed(args.seed) 125 | tr.backends.cudnn.deterministic = True 126 | print(dset_n, args.method) 127 | 128 | args.root_path = './data_synth/' 129 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 130 | args.result_dir = 'results/target/' 131 | 132 | acc_all = np.zeros((len(domain_list) - 1)) 133 | for s in range(1, num_domain): # source 134 | for t in [0]: # target 135 | itr_idx = s - 1 136 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 137 | print(info_str) 138 | args.src, args.tar = file_list[s], file_list[t] 139 | args.task_str = domain_list[s] + '_' + domain_list[t] 140 | print(args) 141 | 142 | acc_all[itr_idx] = train_target(args) 143 | print('All acc: ', np.round(acc_all, 2)) 144 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 145 | -------------------------------------------------------------------------------- /NT_UDA/demo_syn_dnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import random 7 | import os 8 | import torch as tr 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | from utils import network, loss, utils 12 | from utils.dataloader import read_syn_src_tar 13 | from utils.utils import fix_random_seed, lr_scheduler_full, data_load_noimg 14 | 15 | 16 | def train_source_test_target(args): 17 | X_src, y_src, X_tar, y_tar = read_syn_src_tar(args) 18 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 25 | 26 | acc_init = 0 27 | max_iter = args.max_epoch * len(dset_loaders["source_tr"]) 28 | interval_iter = max_iter // 10 29 | args.max_iter = args.max_epoch * len(dset_loaders["source_tr"]) 30 | iter_num = 0 31 | base_network.train() 32 | 33 | while iter_num < max_iter: 34 | try: 35 | inputs_source, labels_source = source_loader_iter.next() 36 | except: 37 | source_loader_iter = iter(dset_loaders["source_tr"]) 38 | inputs_source, labels_source = source_loader_iter.next() 39 | 40 | if inputs_source.size(0) == 1: 41 | continue 42 | 43 | iter_num += 1 44 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 45 | 46 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 47 | features_source, outputs_source = base_network(inputs_source) 48 | 49 | # classifier_loss = loss.CELabelSmooth(reduction='none', num_classes=class_num, epsilon=args.smooth)( 50 | # outputs_source, labels_source) 51 | classifier_loss = nn.CrossEntropyLoss()(outputs_source, labels_source) 52 | 53 | optimizer.zero_grad() 54 | classifier_loss.backward() 55 | optimizer.step() 56 | 57 | if iter_num % interval_iter == 0 or iter_num == max_iter: 58 | base_network.eval() 59 | 60 | acc_s_te = utils.cal_acc_base(dset_loaders["source_te"], base_network) 61 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 62 | log_str = 'Task: {}, Iter:{}/{}; Val_acc = {:.2f}%; Test_Acc = {:.2f}%'.format(args.task_str, iter_num, 63 | max_iter, acc_s_te, acc_t_te) 64 | print(log_str) 65 | base_network.train() 66 | 67 | if acc_s_te >= acc_init: 68 | acc_init = acc_s_te 69 | acc_tar_src_best = acc_t_te 70 | 71 | return acc_tar_src_best 72 | 73 | 74 | if __name__ == '__main__': 75 | 76 | data_name = 'moon' 77 | seed = 2022 78 | if data_name == 'moon': num_class = 2 79 | noise_rate = 0 80 | base_name_list = ['0', '1', '2', '3_45', '4_15', '6', '7', '8', '9'] 81 | domain_list = ['Raw', 'Tl', 'Sl', 'Rt', 'Sh', 'Sk', 'Ns', 'Ol', 'Sc'] 82 | file_list = [data_name + i for i in base_name_list] 83 | num_domain = len(domain_list) 84 | 85 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 86 | epsilon=1e-05, layer='wn', class_num=num_class, smooth=0, 87 | is_save=False, ins_num=600) 88 | 89 | args.method = 'DNN' 90 | args.dset = data_name 91 | args.backbone = 'ShallowNet' 92 | args.batch_size = 32 93 | args.max_epoch = 50 94 | args.input_dim = 2 95 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 96 | args.noise_rate = 0 97 | dset_n = args.dset + '_' + str(args.noise_rate) 98 | 99 | os.environ["CUDA_VISIBLE_DEVICES"] = '6' 100 | args.data_env = 'gpu' # 'local' 101 | args.seed = 2022 102 | fix_random_seed(args.seed) 103 | tr.backends.cudnn.deterministic = True 104 | print(dset_n, args.method) 105 | 106 | args.root_path = './data_synth/' 107 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 108 | args.result_dir = 'results/target/' 109 | 110 | tr.manual_seed(args.seed) 111 | tr.cuda.manual_seed(args.seed) 112 | np.random.seed(args.seed) 113 | random.seed(args.seed) 114 | 115 | acc_all = np.zeros((len(domain_list) - 1)) 116 | for s in range(1, num_domain): # source 117 | for t in [0]: # target 118 | itr_idx = s - 1 119 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 120 | print(info_str) 121 | args.src, args.tar = file_list[s], file_list[t] 122 | args.task_str = domain_list[s] + '_' + domain_list[t] 123 | print(args) 124 | 125 | acc_all[itr_idx] = train_source_test_target(args) 126 | print('done\n') 127 | print('\n\nfinish one repeat') 128 | print('\nAll acc: ', np.round(acc_all, 2)) 129 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 130 | -------------------------------------------------------------------------------- /NT_UDA/demo_syn_kmm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import pandas as pd 6 | import numpy as np 7 | from sklearn.metrics import accuracy_score 8 | from utils.utils import add_label_noise_noimg 9 | from utils.utils_bl import KMM, baseline_SVM 10 | 11 | data_name = 'moon' 12 | seed = 2022 13 | if data_name == 'moon': class_num = 2 14 | noise_rate = 0 15 | base_name_list = ['0', '1', '2', '3_45', '4_15', '6', '7', '8', '9'] 16 | domain_list = ['Raw', 'Tl', 'Sl', 'Rt', 'Sh', 'Sk', 'Ns', 'Ol', 'Sc'] 17 | file_list = [data_name + i for i in base_name_list] 18 | num_domain = len(domain_list) 19 | 20 | root_path = './data_synth/' 21 | acc_all = np.zeros((len(domain_list) - 1)) 22 | for s in range(1, num_domain): # source 23 | for t in [0]: # target 24 | itr_idx = s - 1 25 | print('\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t])) 26 | src, tar = file_list[s], file_list[t] 27 | pd_src = pd.read_csv(root_path + src + ".csv", header=None) 28 | Xs, Ys = pd_src.iloc[:, :2].values, pd_src.iloc[:, 2].values.astype(int) 29 | pd_tar = pd.read_csv(root_path + tar + ".csv", header=None) 30 | Xt, Yt = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 31 | 32 | # add noise on source label 33 | Ys = add_label_noise_noimg(Ys, seed, class_num, noise_rate) 34 | 35 | kmm = KMM(kernel_type='rbf', B=1) 36 | beta = kmm.fit(Xs, Xt) 37 | Xs_new = beta * Xs 38 | 39 | pred_tar = baseline_SVM(Xs_new, Ys, Xt, Yt) 40 | acc_all[itr_idx] = accuracy_score(Yt, pred_tar) * 100 41 | print('KMM: %.2f' % np.round(acc_all[itr_idx], 2)) 42 | 43 | print('\nmean acc', np.round(np.mean(acc_all), 2)) 44 | print(domain_list) 45 | print(np.round(acc_all, 2).tolist()) 46 | -------------------------------------------------------------------------------- /NT_UDA/demo_syn_mcc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import os 7 | import torch as tr 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | from utils import network, loss, utils 11 | from utils.dataloader import read_syn_src_tar 12 | from utils.utils import lr_scheduler_full, fix_random_seed, data_load_noimg 13 | from utils.loss import ClassConfusionLoss, CELabelSmooth 14 | 15 | 16 | def train_target(args): 17 | X_src, y_src, X_tar, y_tar = read_syn_src_tar(args) 18 | dset_loaders = data_load_noimg(X_src, y_src, X_tar, y_tar, args) 19 | 20 | netF, netC = network.backbone_net(args, args.bottleneck) 21 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 22 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 23 | base_network = nn.Sequential(netF, netC) 24 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 25 | 26 | max_iter = args.max_epoch * len(dset_loaders["source"]) 27 | interval_iter = max_iter // 10 28 | args.max_iter = args.max_epoch * len(dset_loaders["source"]) 29 | iter_num = 0 30 | base_network.train() 31 | 32 | while iter_num < max_iter: 33 | try: 34 | inputs_source, labels_source = iter_source.next() 35 | except: 36 | iter_source = iter(dset_loaders["source"]) 37 | inputs_source, labels_source = iter_source.next() 38 | 39 | try: 40 | inputs_target, _ = iter_target.next() 41 | except: 42 | iter_target = iter(dset_loaders["target"]) 43 | inputs_target, _ = iter_target.next() 44 | 45 | if inputs_source.size(0) == 1: 46 | continue 47 | 48 | iter_num += 1 49 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 50 | 51 | inputs_source, inputs_target, labels_source = inputs_source.cuda(), inputs_target.cuda(), labels_source.cuda() 52 | features_source, outputs_source = base_network(inputs_source) 53 | features_target, outputs_target = base_network(inputs_target) 54 | 55 | # new version img loss 56 | # p = float(iter_num) / max_iter 57 | # alpha = 2. / (1. + np.exp(-10 * p)) - 1 58 | args.loss_trade_off = 1.0 59 | args.t_mcc = 2 60 | transfer_loss = ClassConfusionLoss(t=args.t_mcc)(outputs_target) 61 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) 62 | total_loss = args.loss_trade_off * transfer_loss + classifier_loss 63 | 64 | optimizer.zero_grad() 65 | total_loss.backward() 66 | optimizer.step() 67 | 68 | if iter_num % interval_iter == 0 or iter_num == max_iter: 69 | base_network.eval() 70 | 71 | acc_t_te = utils.cal_acc_base(dset_loaders["Target"], base_network) 72 | log_str = 'Task: {}, Iter:{}/{}; Acc = {:.2f}%'.format(args.task_str, iter_num, max_iter, acc_t_te) 73 | print(log_str) 74 | 75 | base_network.train() 76 | 77 | return acc_t_te 78 | 79 | 80 | if __name__ == '__main__': 81 | 82 | data_name = 'moon' 83 | if data_name == 'moon': num_class = 2 84 | base_name_list = ['0', '1', '2', '3_45', '4_15', '6', '7', '8', '9'] 85 | domain_list = ['Raw', 'Tl', 'Sl', 'Rt', 'Sh', 'Sk', 'Ns', 'Ol', 'Sc'] 86 | file_list = [data_name + i for i in base_name_list] 87 | num_domain = len(domain_list) 88 | 89 | args = argparse.Namespace(bottleneck=64, lr=0.01, lr_decay1=0.1, lr_decay2=1.0, 90 | epsilon=1e-05, layer='wn', class_num=num_class, smooth=0) 91 | 92 | args.method = 'MCC' 93 | args.dset = data_name 94 | args.backbone = 'ShallowNet' 95 | args.batch_size = 32 96 | args.max_epoch = 50 97 | args.input_dim = 2 98 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 99 | args.noise_rate = 0 100 | dset_n = args.dset + '_' + str(args.noise_rate) 101 | 102 | os.environ["CUDA_VISIBLE_DEVICES"] = '6' 103 | args.data_env = 'gpu' # 'local' 104 | args.seed = 2022 105 | fix_random_seed(args.seed) 106 | tr.backends.cudnn.deterministic = True 107 | print(dset_n, args.method) 108 | 109 | args.root_path = './data_synth/' 110 | args.local_dir = r'/mnt/ssd2/wenz/NT-Benchmark/NT_UDA/' 111 | args.result_dir = 'results/target/' 112 | 113 | acc_all = np.zeros((len(domain_list) - 1)) 114 | for s in range(1, num_domain): # source 115 | for t in [0]: # target 116 | itr_idx = s - 1 117 | info_str = '\n%s: %s --> %s' % (itr_idx, domain_list[s], domain_list[t]) 118 | print(info_str) 119 | args.src, args.tar = file_list[s], file_list[t] 120 | args.task_str = domain_list[s] + '_' + domain_list[t] 121 | print(args) 122 | 123 | acc_all[itr_idx] = train_target(args) 124 | print('All acc: ', np.round(acc_all, 2)) 125 | print('Avg acc: ', np.round(np.mean(acc_all), 2)) 126 | -------------------------------------------------------------------------------- /NT_UDA/save_init_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import argparse 5 | import os 6 | import random 7 | import os.path as osp 8 | import torch as tr 9 | import numpy as np 10 | import utils.network as network 11 | 12 | 13 | def create_folder(output_dir): 14 | 15 | if not osp.exists(output_dir): 16 | os.system('mkdir -p ' + output_dir) 17 | if not osp.exists(output_dir): 18 | os.mkdir(output_dir) 19 | 20 | 21 | if __name__ == '__main__': 22 | seed = 2022 23 | tr.manual_seed(seed) 24 | tr.cuda.manual_seed(seed) 25 | np.random.seed(seed) 26 | random.seed(seed) 27 | tr.cuda.manual_seed_all(seed) 28 | tr.backends.cudnn.deterministic = True 29 | mdl_init_dir = 'outputs/mdl_init/' 30 | dset_list = ['DomainNet', 'SEED', 'moon'] 31 | 32 | ################################################################################### 33 | # Img data 34 | args = argparse.Namespace(bottleneck=1024, net='resnet50', layer='wn', classifier='bn') 35 | args.class_num = 40 36 | output_dir = osp.join(mdl_init_dir, dset_list[0]) 37 | create_folder(output_dir) 38 | 39 | if args.net[0:3] == 'res': 40 | netF = network.ResBase(res_name=args.net).cuda() 41 | elif args.net[0:3] == 'vgg': 42 | netF = network.VGGBase(vgg_name=args.net).cuda() 43 | netB = network.feat_bottleneck(type=args.classifier, feature_dim=netF.in_features, 44 | bottleneck_dim=args.bottleneck).cuda() 45 | netC = network.feat_classifier(type=args.layer, class_num=args.class_num, bottleneck_dim=args.bottleneck).cuda() 46 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 47 | netD_full = network.AdversarialNetwork(args.bottleneck, 2048).cuda() 48 | 49 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 50 | tr.save(netB.state_dict(), osp.join(output_dir, "netB.pt")) 51 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 52 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 53 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 54 | # netF.load_state_dict(tr.load(osp.join(output_dir, "netF.pt"))) 55 | print('\nfinished init of DomainNet data...') 56 | 57 | ################################################################################### 58 | # SEED data 59 | args = argparse.Namespace(bottleneck=64, backbone='ShallowNet', layer='wn') 60 | args.input_dim = 310 61 | args.class_num = 3 62 | output_dir = osp.join(mdl_init_dir, dset_list[1]) 63 | create_folder(output_dir) 64 | 65 | netF, netC = network.backbone_net(args, args.bottleneck) 66 | netD_full = network.AdversarialNetwork(args.bottleneck, 20).cuda() 67 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 68 | 69 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 70 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 71 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 72 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 73 | print('\nfinished init of seed data...') 74 | 75 | ################################################################################### 76 | # Synth data 77 | args = argparse.Namespace(bottleneck=64, backbone='ShallowNet', layer='wn') 78 | args.input_dim = 2 79 | args.class_num = 2 80 | output_dir = osp.join(mdl_init_dir, dset_list[2]) 81 | create_folder(output_dir) 82 | 83 | netF, netC = network.backbone_net(args, args.bottleneck) 84 | netD_full = network.AdversarialNetwork(args.bottleneck, 20).cuda() 85 | netD_clf = network.feat_classifier(type=args.layer, class_num=2, bottleneck_dim=args.bottleneck).cuda() 86 | 87 | tr.save(netF.state_dict(), osp.join(output_dir, "netF.pt")) 88 | tr.save(netC.state_dict(), osp.join(output_dir, "netC.pt")) 89 | tr.save(netD_full.state_dict(), osp.join(output_dir, "netD_full.pt")) 90 | tr.save(netD_clf.state_dict(), osp.join(output_dir, "netD_clf.pt")) 91 | print('\nfinished init of moon data...') 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /NT_UDA/source_train_seed.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | import argparse 6 | import torch as tr 7 | import torch.nn as nn 8 | import torch.optim as optim 9 | import torch.utils.data as Data 10 | import os.path as osp 11 | import os 12 | from utils import network, loss, utils 13 | from utils.loss import CELabelSmooth 14 | from utils.LogRecord import LogRecord 15 | from utils.dataloader import read_seed_single, obtain_train_val_source 16 | from utils.utils import create_folder, lr_scheduler_full, fix_random_seed, add_label_noise_noimg 17 | 18 | 19 | def data_load(X, y, args): 20 | dset_loaders = {} 21 | train_bs = args.batch_size 22 | tr.manual_seed(args.seed) 23 | trial_ins_num = args.trial 24 | 25 | if args.noise_rate > 0: 26 | y = add_label_noise_noimg(y, args.seed, args.class_num, args.noise_rate) 27 | 28 | id_train, id_val = obtain_train_val_source(y, trial_ins_num, args.validation) 29 | source_tr = Data.TensorDataset(X[id_train, :], y[id_train]) 30 | dset_loaders['source_tr'] = Data.DataLoader(source_tr, batch_size=train_bs, shuffle=True, drop_last=True) 31 | 32 | source_te = Data.TensorDataset(X[id_val, :], y[id_val]) 33 | dset_loaders['source_te'] = Data.DataLoader(source_te, batch_size=train_bs * 3, shuffle=False, drop_last=False) 34 | 35 | return dset_loaders 36 | 37 | 38 | def train_source(args): # within validation 39 | X_src, y_src = read_seed_single(args, args.src) 40 | dset_loaders = data_load(X_src, y_src, args) 41 | 42 | netF, netC = network.backbone_net(args, args.bottleneck) 43 | netF.load_state_dict(tr.load(args.mdl_init_dir + 'netF.pt')) 44 | netC.load_state_dict(tr.load(args.mdl_init_dir + 'netC.pt')) 45 | base_network = nn.Sequential(netF, netC) 46 | optimizer = optim.SGD(base_network.parameters(), lr=args.lr) 47 | 48 | acc_init = 0 49 | max_iter = args.max_epoch * len(dset_loaders["source_tr"]) # source_tr:80个 50 | interval_iter = max_iter // 10 51 | args.max_iter = max_iter 52 | iter_num = 0 53 | 54 | netF.train() 55 | netC.train() 56 | 57 | while iter_num < max_iter: 58 | try: 59 | inputs_source, labels_source = iter_source.next() 60 | except: 61 | iter_source = iter(dset_loaders['source_tr']) 62 | inputs_source, labels_source = iter_source.next() 63 | 64 | if inputs_source.size(0) == 1: 65 | continue 66 | 67 | iter_num += 1 68 | lr_scheduler_full(optimizer, init_lr=args.lr, iter_num=iter_num, max_iter=args.max_iter) 69 | inputs_source, labels_source = inputs_source.cuda(), labels_source.cuda() 70 | 71 | _, outputs_source = netC(netF(inputs_source)) 72 | classifier_loss = CELabelSmooth(num_classes=args.class_num, epsilon=args.smooth)(outputs_source, labels_source) 73 | 74 | optimizer.zero_grad() 75 | classifier_loss.backward() 76 | optimizer.step() 77 | 78 | if iter_num % interval_iter == 0 or iter_num == max_iter: 79 | netF.eval() 80 | netC.eval() 81 | 82 | acc_s_te, _ = utils.cal_acc_noimg(dset_loaders['source_te'], netF, netC) 83 | log_str = 'Task: {}, Iter:{}/{}; Val_acc = {:.2f}%'.format(args.name_src, iter_num, max_iter, acc_s_te) 84 | args.log.record(log_str) 85 | print(log_str) 86 | 87 | if acc_s_te >= acc_init: # 返回验证集上最好的acc,保存对应模型 88 | acc_init = acc_s_te 89 | best_netF = netF.state_dict() 90 | best_netC = netC.state_dict() 91 | 92 | netF.train() 93 | netC.train() 94 | 95 | tr.save(best_netF, osp.join(args.output_dir_src, "source_F.pt")) 96 | tr.save(best_netC, osp.join(args.output_dir_src, "source_C.pt")) 97 | 98 | return acc_s_te 99 | 100 | 101 | if __name__ == '__main__': 102 | 103 | data_name = 'SEED' 104 | if data_name == 'SEED': chn, class_num, trial_num = 62, 3, 3394 105 | focus_domain_idx = [0, 1, 2] 106 | # focus_domain_idx = np.arange(15) 107 | domain_list = ['S' + str(i) for i in focus_domain_idx] 108 | num_domain = len(domain_list) 109 | 110 | args = argparse.Namespace(bottleneck=64, lr=0.01, epsilon=1e-05, layer='wn', 111 | smooth=0, chn=chn, trial=trial_num, 112 | N=num_domain, class_num=class_num) 113 | args.dset = data_name 114 | args.method = 'single' 115 | args.backbone = 'ShallowNet' 116 | args.batch_size = 32 # 32 117 | args.max_epoch = 50 118 | args.input_dim = 310 119 | args.norm = 'zscore' 120 | args.validation = 'random' 121 | args.mdl_init_dir = 'outputs/mdl_init/' + args.dset + '/' 122 | args.noise_rate = 0 123 | dset_n = args.dset + '_' + str(args.noise_rate) 124 | 125 | os.environ["CUDA_VISIBLE_DEVICES"] = '4' 126 | args.data_env = 'gpu' # 'local' 127 | args.seed = 2022 128 | fix_random_seed(args.seed) 129 | tr.backends.cudnn.deterministic = True 130 | 131 | mdl_path = 'outputs/models/' 132 | args.output = mdl_path + dset_n + '/source/' 133 | print(dset_n, args.method) 134 | print(args) 135 | 136 | args.local_dir = r'/mnt/ssd2/wenz/code/NT-Benchmark/NT_UDA/' 137 | args.result_dir = 'results/source/' 138 | my_log = LogRecord(args) 139 | my_log.log_init() 140 | my_log.record('=' * 50 + '\n' + os.path.basename(__file__) + '\n' + '=' * 50) 141 | 142 | acc_all = [] 143 | for s in range(num_domain): 144 | args.src = focus_domain_idx[s] 145 | info_str = '\n========================== Within domain ' + domain_list[s] + ' ==========================' 146 | print(info_str) 147 | my_log.record(info_str) 148 | args.log = my_log 149 | 150 | args.name_src = domain_list[s] 151 | args.output_dir_src = osp.join(args.output, args.name_src) 152 | create_folder(args.output_dir_src, args.data_env, args.local_dir) 153 | print(args) 154 | 155 | acc_sub = train_source(args) 156 | acc_all.append(acc_sub) 157 | print(np.round(acc_all, 2)) 158 | print(np.round(np.mean(acc_all), 2)) 159 | 160 | acc_sub_str = str(np.round(acc_all, 2).tolist()) 161 | acc_mean_str = str(np.round(np.mean(acc_all), 2).tolist()) 162 | args.log.record("\n==========================================") 163 | args.log.record(acc_sub_str) 164 | args.log.record(acc_mean_str) 165 | -------------------------------------------------------------------------------- /NT_UDA/utils/LogRecord.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import os.path as osp 5 | from datetime import datetime 6 | from datetime import timedelta, timezone 7 | from utils.utils import create_folder 8 | 9 | 10 | class LogRecord: 11 | def __init__(self, args): 12 | self.args = args 13 | self.result_dir = args.result_dir 14 | self.data_env = 'gpu' 15 | self.data_name = args.dset 16 | self.method = args.method 17 | 18 | def log_init(self): 19 | create_folder(self.result_dir, self.args.data_env, self.args.local_dir) 20 | 21 | if self.data_env == 'local': 22 | time_str = datetime.utcnow().replace(tzinfo=timezone.utc).astimezone( 23 | timezone(timedelta(hours=8), name='Asia/Shanghai')).strftime("%Y-%m-%d_%H_%M_%S") 24 | if self.data_env == 'gpu': 25 | time_str = datetime.utcnow().replace(tzinfo=timezone.utc).strftime("%Y-%m-%d_%H_%M_%S") 26 | file_name_head = 'log_' + self.method + '_' + self.data_name + '_' 27 | self.args.out_file = open(osp.join(self.args.result_dir, file_name_head + time_str + '.txt'), 'w') 28 | self.args.out_file.write(self._print_args() + '\n') 29 | self.args.out_file.flush() 30 | return self.args 31 | 32 | def record(self, log_str): 33 | self.args.out_file.write(log_str + '\n') 34 | self.args.out_file.flush() 35 | return self.args 36 | 37 | def _print_args(self): 38 | s = "==========================================\n" 39 | for arg, content in self.args.__dict__.items(): 40 | s += "{}:{}\n".format(arg, content) 41 | return s 42 | -------------------------------------------------------------------------------- /NT_UDA/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2022/1/11 11:43 下午 3 | # @Author : wenzhang 4 | # @File : __init__.py.py 5 | -------------------------------------------------------------------------------- /NT_UDA/utils/data_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import numpy as np 5 | from PIL import Image 6 | from torch.utils.data import Dataset 7 | 8 | 9 | def make_dataset(image_list, labels): 10 | if labels: 11 | len_ = len(image_list) 12 | images = [(image_list[i].strip(), labels[i, :]) for i in range(len_)] 13 | else: 14 | if len(image_list[0].split('==')) > 2: 15 | images = [(val.split('==')[0], np.array([int(la) for la in val.split('==')[1:]])) for val in image_list] 16 | else: 17 | images = [(val.split('==')[0], int(val.split('==')[1])) for val in image_list] 18 | return images 19 | 20 | 21 | def rgb_loader(path): 22 | with open(path, 'rb') as f: 23 | with Image.open(f) as img: 24 | return img.convert('RGB') 25 | 26 | 27 | def l_loader(path): 28 | with open(path, 'rb') as f: 29 | with Image.open(f) as img: 30 | return img.convert('L') 31 | 32 | 33 | class ImageList(Dataset): 34 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB', weight=None): 35 | imgs = make_dataset(image_list, labels) 36 | if len(imgs) == 0: 37 | raise (RuntimeError("Found 0 images in subfolders")) 38 | 39 | self.imgs = imgs 40 | self.transform = transform 41 | self.target_transform = target_transform 42 | if mode == 'RGB': 43 | self.loader = rgb_loader 44 | elif mode == 'L': 45 | self.loader = l_loader 46 | self.weight = weight 47 | 48 | def __getitem__(self, index): 49 | 50 | path, target = self.imgs[index] 51 | img = self.loader(path) 52 | if self.transform is not None: 53 | img = self.transform(img) 54 | if self.target_transform is not None: 55 | target = self.target_transform(target) 56 | if self.weight is None: 57 | return img, target 58 | else: 59 | return img, target, self.weight[index] 60 | 61 | def __len__(self): 62 | return len(self.imgs) 63 | 64 | 65 | class ImageList_idx(Dataset): 66 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB'): 67 | imgs = make_dataset(image_list, labels) 68 | if len(imgs) == 0: 69 | raise (RuntimeError("Found 0 images in subfolders of: " + "\n")) 70 | 71 | self.imgs = imgs 72 | self.transform = transform 73 | self.target_transform = target_transform 74 | if mode == 'RGB': 75 | self.loader = rgb_loader 76 | elif mode == 'L': 77 | self.loader = l_loader 78 | 79 | def __getitem__(self, index): 80 | path, target = self.imgs[index] 81 | img = self.loader(path) 82 | if self.transform is not None: 83 | img = self.transform(img) 84 | if self.target_transform is not None: 85 | target = self.target_transform(target) 86 | 87 | return img, target, index 88 | 89 | def __len__(self): 90 | return len(self.imgs) 91 | 92 | 93 | class ImageList_twice(Dataset): 94 | def __init__(self, image_list, labels=None, transform=None, target_transform=None, mode='RGB'): 95 | imgs = make_dataset(image_list, labels) 96 | if len(imgs) == 0: 97 | raise (RuntimeError("Found 0 images in subfolders")) 98 | 99 | self.imgs = imgs 100 | self.transform = transform 101 | self.target_transform = target_transform 102 | if mode == 'RGB': 103 | self.loader = rgb_loader 104 | elif mode == 'L': 105 | self.loader = l_loader 106 | 107 | def __getitem__(self, index): 108 | path, target = self.imgs[index] 109 | img = self.loader(path) 110 | if self.target_transform is not None: 111 | target = self.target_transform(target) 112 | if self.transform is not None: 113 | if type(self.transform).__name__ == 'list': 114 | img = [t(img) for t in self.transform] 115 | else: 116 | img = self.transform(img) 117 | 118 | return img, target 119 | 120 | def __len__(self): 121 | return len(self.imgs) 122 | -------------------------------------------------------------------------------- /NT_UDA/utils/dataloader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import pandas as pd 5 | import torch as tr 6 | from torch.autograd import Variable 7 | import numpy as np 8 | from sklearn import preprocessing 9 | 10 | 11 | def read_syn_single(args, sub_idx): 12 | root_path = args.root_path 13 | pd_tar = pd.read_csv(root_path + sub_idx + ".csv", header=None) 14 | X, Y = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 15 | X = Variable(tr.from_numpy(X).float()) 16 | Y = tr.from_numpy(Y).long() 17 | 18 | return X, Y 19 | 20 | 21 | def read_syn_src_tar(args): 22 | root_path = args.root_path 23 | pd_src = pd.read_csv(root_path + args.src + ".csv", header=None) 24 | Xs, Ys = pd_src.iloc[:, :2].values, pd_src.iloc[:, 2].values.astype(int) 25 | pd_tar = pd.read_csv(root_path + args.tar + ".csv", header=None) 26 | Xt, Yt = pd_tar.iloc[:, :2].values, pd_tar.iloc[:, 2].values.astype(int) 27 | Xs = Variable(tr.from_numpy(Xs).float()) 28 | Ys = tr.from_numpy(Ys).long() 29 | Xt = Variable(tr.from_numpy(Xt).float()) 30 | Yt = tr.from_numpy(Yt).long() 31 | 32 | return Xs, Ys, Xt, Yt 33 | 34 | 35 | def data_normalize(fea_de, norm_type): 36 | if norm_type == 'zscore': 37 | zscore = preprocessing.StandardScaler() 38 | fea_de = zscore.fit_transform(fea_de) 39 | return fea_de 40 | 41 | 42 | def read_seed_single(args, sub_idx): 43 | # (15, 3394, 310) (15, 3394) 44 | if args.data_env == 'local': 45 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 46 | if args.data_env == 'gpu': 47 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 48 | 49 | MI = np.load(file) 50 | Data_raw, Label = MI['data'], MI['label'] 51 | 52 | # source sub 53 | fea_de = np.squeeze(Data_raw[sub_idx, :, :]) 54 | fea_de = data_normalize(fea_de, args.norm) 55 | fea_de = Variable(tr.from_numpy(fea_de).float()) 56 | 57 | sub_label = np.squeeze(Label[sub_idx, :]) 58 | sub_label = tr.from_numpy(sub_label).long() 59 | print(fea_de.shape, sub_label.shape) 60 | 61 | return fea_de, sub_label 62 | 63 | 64 | def read_seed_src_tar(args): 65 | # (15, 3394, 310) (15, 3394) 66 | if args.data_env == 'local': 67 | file = 'D:/Dataset/MOABB/' + args.dset + '.npz' 68 | if args.data_env == 'gpu': 69 | file = '/mnt/ssd2/wenz/data/bci/' + args.dset + '.npz' 70 | 71 | MI = np.load(file) 72 | Data_raw, Label = MI['data'], MI['label'] 73 | 74 | src_data = np.squeeze(Data_raw[args.src, :, :]) 75 | src_data = data_normalize(src_data, args.norm) 76 | src_data = Variable(tr.from_numpy(src_data).float()) 77 | src_label = np.squeeze(Label[args.src, :]) 78 | src_label = tr.from_numpy(src_label).long() 79 | 80 | # target sub 81 | tar_data = np.squeeze(Data_raw[args.tar, :, :]) 82 | tar_data = data_normalize(tar_data, args.norm) 83 | tar_data = Variable(tr.from_numpy(tar_data).float()) 84 | tar_label = np.squeeze(Label[args.tar, :]) 85 | tar_label = tr.from_numpy(tar_label).long() 86 | print(tar_data.shape, tar_label.shape) 87 | 88 | return src_data, src_label, tar_data, tar_label 89 | 90 | 91 | def obtain_train_val_source(y_array, trial_ins_num, val_type): 92 | y_array = y_array.numpy() 93 | ins_num_all = len(y_array) 94 | src_idx = range(ins_num_all) 95 | 96 | if val_type == 'random': 97 | num_train = int(0.9 * len(src_idx)) 98 | id_train, id_val = tr.utils.data.random_split(src_idx, [num_train, len(src_idx) - num_train]) 99 | 100 | return id_train, id_val 101 | -------------------------------------------------------------------------------- /NT_UDA/utils/generate_data_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # A Survey on Negative Transfer 3 | # https://github.com/chamwen/NT-Benchmark 4 | import os 5 | import sys 6 | import random 7 | import numpy as np 8 | import os.path as osp 9 | 10 | sys.path.append("..") 11 | fix_seed = 2022 12 | 13 | 14 | def generate(dir, use_path, txt_path, label, sample_rate=1): 15 | files = os.listdir(dir) 16 | files.sort() 17 | 18 | if sample_rate < 1: 19 | select_num = int(len(files) * sample_rate) 20 | raw_idx = np.arange(len(files)) 21 | random.seed(fix_seed) 22 | random.shuffle(raw_idx) 23 | select_idx = raw_idx[:select_num].tolist() 24 | files = np.array(files.copy())[select_idx].tolist() 25 | files.sort() 26 | 27 | total_num = len(files) 28 | # print(total_num) 29 | 30 | listText = open(txt_path, 'a') 31 | num = 0 32 | for file in files: 33 | num += 1 34 | fileType = os.path.split(file) 35 | if fileType[1] == '.txt': 36 | continue 37 | name = use_path + file + '==' + str(int(label)) + '\n' 38 | if num < total_num + 1: 39 | listText.write(name) 40 | listText.close() 41 | 42 | return total_num 43 | 44 | 45 | def check_class_ins_num(domain_list, folderlist): 46 | min_class_num_list = [] 47 | for name in domain_list: 48 | print('\nreading...', name) 49 | txt_path = out_path_root + dset + '/' + name + '_list.txt' 50 | 51 | class_list = [] 52 | for line in open(txt_path): 53 | class_list.append(line.split('/' + name + '/')[1].split('/')[0]) 54 | 55 | class_list = np.array(class_list) 56 | class_num_list = [np.sum(class_list == cn) for cn in folderlist] 57 | min_class_num_list.append(min(class_num_list)) 58 | print('min class ins_num', min(class_num_list)) 59 | print(min_class_num_list) 60 | 61 | 62 | if __name__ == "__main__": 63 | root = "/mnt/ssd2/wenz/data/" 64 | out_path_root = '../checkpoint/' 65 | 66 | dset = 'VisDA17' 67 | if dset == 'office': 68 | domain_list = ['amazon', 'dslr', 'webcam'] 69 | if dset == 'office-home': 70 | domain_list = ['Art', 'Clipart', 'Product', 'RealWorld'] 71 | if dset == 'office-caltech': 72 | domain_list = ['amazon', 'caltech', 'dslr', 'webcam'] 73 | if dset == 'VisDA17': 74 | domain_list = ['train', 'validation'] 75 | if dset == 'DomainNet': 76 | domain_list = ['clipart', 'infograph', 'painting', 'quickdraw', 'real', 'sketch'] 77 | 78 | save_path = out_path_root + dset 79 | if not osp.exists(save_path): 80 | os.system('mkdir -p ' + save_path) 81 | if not osp.exists(save_path): 82 | os.mkdir(save_path) 83 | 84 | # 40 classes refer: 85 | # SENTRY: Selective entropy optimization via committee consistency 86 | # for unsupervised domain adaptation." ICCV. 2021. 87 | if dset == 'DomainNet': 88 | folderlist = ['airplane', 'ambulance', 'apple', 'backpack', 'banana', 'bathtub', 'bear', 'bed', 'bee', 89 | 'bicycle', 'bird', 'book', 'bridge', 'bus', 'butterfly', 'cake', 'calculator', 'camera', 'car', 90 | 'cat', 'chair', 'clock', 'cow', 'dog', 'dolphin', 'donut', 'drums', 'duck', 'elephant', 'fence', 91 | 'fork', 'horse', 'house', 'rabbit', 'scissors', 'sheep', 'strawberry', 'table', 'telephone', 92 | 'truck'] 93 | sample_rate = 0.2 # 0.2, 0.4 20%*all_num 94 | 95 | for name in domain_list: 96 | print('\nprocessing...', name) 97 | data_path = root + dset + '/' + name 98 | txt_path = out_path_root + dset + '/' + name + '_list.txt' 99 | 100 | if '.DS_Store' in folderlist: 101 | folderlist.remove('.DS_Store') 102 | 103 | i = 0 104 | total_num = 0 105 | for folder in folderlist: 106 | use_path_a = data_path + '/' + folder + '/' 107 | num = generate(os.path.join(data_path, folder), use_path_a, txt_path, i, sample_rate) 108 | total_num = total_num + num 109 | i += 1 110 | print(name, total_num) 111 | 112 | print('=' * 50) 113 | check_class_ins_num(domain_list, folderlist) 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Benchmark of negative transfer 2 | 3 | [](LICENSE) 4 | [](https://github.com/chamwen/NT-Benchmark/commits/main) 5 | 6 | Negative transfer (NT) represents that introducing source domain data/knowledge decreases the learning performance in the target domain, which is a long-standing and challenging issue in transfer learning. 7 | 8 | This repository contains codes of NT detection experiments with over 20 representative approaches on three NT-specific datasets. 9 | 10 | 11 | 12 | ## Datasets 13 | 14 | There are three NT-specific datasets with large domain shift, i.e., on one synthetic dataset, and two real-world datasets on object recognition and emotion recognition. Their statistics are summarized below: 15 | 16 |