├── Leg-UP
    ├── models
    │   ├── detector
    │   │   └── SDLib
    │   │   │   ├── data
    │   │   │       ├── __init__.py
    │   │   │       ├── social.py
    │   │   │       └── rating.py
    │   │   │   ├── main
    │   │   │       ├── __init__.py
    │   │   │       ├── id_plus_1.py
    │   │   │       ├── SDLib.py
    │   │   │       ├── plot.py
    │   │   │       └── main.py
    │   │   │   ├── method
    │   │   │       ├── __init__.py
    │   │   │       └── FAP.py
    │   │   │   ├── tool
    │   │   │       ├── __init__.py
    │   │   │       ├── dataSplit.py
    │   │   │       ├── config.py
    │   │   │       ├── plot.py
    │   │   │       ├── qmath.py
    │   │   │       └── file.py
    │   │   │   ├── baseclass
    │   │   │       ├── __init__.py
    │   │   │       ├── SSDetection.py
    │   │   │       └── SDetection.py
    │   │   │   └── __init__.py
    │   └── attacker
    │   │   └── __init__.py
    ├── main.py
    ├── utils
    │   ├── loss.py
    │   ├── data_loader.py
    │   └── utils.py
    ├── run.sh
    ├── execute_model.py
    ├── README.md
    ├── run.py
    └── preprocess_data.py
├── AUSH
    ├── model
    │   ├── __init__.py
    │   ├── attack_model
    │   │   ├── AttackModel.py
    │   │   ├── gan_attack
    │   │   │   ├── __init__.py
    │   │   │   └── models.py
    │   │   ├── baseline.py
    │   │   └── gan_attack_copy
    │   │   │   └── models.py
    │   ├── trainer_rec.py
    │   ├── trainer_rec_surprise.py
    │   └── nnmf.py
    ├── utils
    │   ├── __init__.py
    │   ├── attack
    │   │   ├── __init__.py
    │   │   └── data_to_file.py
    │   └── load_data
    │   │   ├── __init__.py
    │   │   ├── load_attack_info.py
    │   │   └── load_data.py
    ├── test_main
    │   ├── __init__.py
    │   ├── example.sh
    │   ├── result_reporter.py
    │   ├── main_train_rec.py
    │   ├── main_gan_attack.py
    │   ├── main_gan_attack_baseline.py
    │   ├── main_eval_attack.py
    │   ├── data_preprocess.py
    │   ├── main_baseline_attack.py
    │   ├── dcgan.py
    │   ├── main_eval_similarity_foryangqian.py
    │   ├── main_eval_similarity.py
    │   └── WGAN_yangqian.py
    └── README.md
├── data
    ├── automotive
    │   ├── automotive_selected_items
    │   └── automotive_target_users
    ├── ml100k
    │   ├── ml100k_selected_items
    │   └── ml100k_target_users
    └── filmTrust
    │   └── filmTrust_selected_items
├── README.md
└── .gitignore


/Leg-UP/models/detector/SDLib/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/method/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/baseclass/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/AUSH/model/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2019/8/23 22:23
3 | # @Author     : chensi
4 | # @File       : __init__.py.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/AUSH/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2019/8/23 22:24
3 | # @Author     : chensi
4 | # @File       : __init__.py.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/AUSH/test_main/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2019/8/23 22:24
3 | # @Author     : chensi
4 | # @File       : __init__.py.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/AUSH/utils/attack/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2019/5/31 10:37
3 | # @Author     : chensi
4 | # @File       : __init__.py.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/AUSH/utils/load_data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2019/4/30 17:37
3 | # @Author     : chensi
4 | # @File       : __init__.py.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/Leg-UP/models/attacker/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2021/03/20 09:21
3 | # @Author     : chensi
4 | # @File       : __init__.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/AUSH/model/attack_model/AttackModel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2020/9/20 14:23
3 | # @Author     : chensi
4 | # @File       : attack_model.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2020/12/3 15:52
3 | # @Author     : chensi
4 | # @File       : __init__.py.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/AUSH/model/attack_model/gan_attack/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time       : 2019/8/24 10:41
3 | # @Author     : chensi
4 | # @File       : __init__.py.py
5 | # @Software   : PyCharm
6 | # @Desciption : None


--------------------------------------------------------------------------------
/data/automotive/automotive_selected_items:
--------------------------------------------------------------------------------
 1 | 22	866
 2 | 88	1141
 3 | 119	681
 4 | 122	1656
 5 | 339	177
 6 | 422	477
 7 | 477	1012
 8 | 594	1141
 9 | 866	1198
10 | 884	1656
11 | 1089	866
12 | 1141	866
13 | 1431	705
14 | 1593	1089
15 | 1656	1089


--------------------------------------------------------------------------------
/data/ml100k/ml100k_selected_items:
--------------------------------------------------------------------------------
 1 | 1257	171,49,180
 2 | 1419	203,167,172
 3 | 785	171,49,180
 4 | 1077	0,131,422
 5 | 62	167,172,237
 6 | 1319	97,99,55
 7 | 1612	171,49,180
 8 | 1509	11,99,55
 9 | 1545	97,99,55
10 | 1373	203,167,172
11 | 690	27,78,227


--------------------------------------------------------------------------------
/Leg-UP/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2020/11/29 19:21
 3 | # @Author     : chensi
 4 | # @File       : main.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | # from utils.evaluator import *
 9 | from models.attacker.aushplus import *
10 | model = AUSHplus()
11 | model.execute()
12 | 


--------------------------------------------------------------------------------
/AUSH/test_main/example.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 |  
 3 | for target_id in 5 395 181 565 254 601 623 619 64 558
 4 | do
 5 | 	for rec_model_name in IAUtoRec UAUtoRec NNMF NMF_25
 6 | 	do
 7 | 		python main_eval_attack.py --dataset filmTrust --rec_model_name $rec_model_name --attack_method G0 --target_id $target_id --attack_num 50 --filler_num 36 >> filmTrust_result_G0
 8 | 		#nohup python main_gan_attack_baseline.py --dataset filmTrust --target_id 5 --attack_num 50 --filler_num 36 --loss 0 >> G0_log 2>&1 &
 9 | 	done
10 | done


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/baseclass/SSDetection.py:
--------------------------------------------------------------------------------
 1 | from SDetection import SDetection
 2 | from data.social import SocialDAO
 3 | from tool.config import Config,LineConfig
 4 | from os.path import abspath
 5 | from time import strftime,localtime,time
 6 | from tool.file import FileIO
 7 | from sklearn.metrics import classification_report
 8 | class SSDetection(SDetection):
 9 | 
10 |     def __init__(self,conf,trainingSet=None,testSet=None,labels=None,relation=list(),fold='[1]'):
11 |         super(SSDetection, self).__init__(conf,trainingSet,testSet,labels,fold)
12 |         self.sao = SocialDAO(self.config, relation)  # social relations access control
13 | 


--------------------------------------------------------------------------------
/data/filmTrust/filmTrust_selected_items:
--------------------------------------------------------------------------------
 1 | 29	83,98,110
 2 | 5	98,118,112
 3 | 395	118,110,119
 4 | 380	98,83,118
 5 | 198	118,98,83
 6 | 576	98,118,112
 7 | 228	83,98,119
 8 | 181	118,112,98
 9 | 442	99,2,84
10 | 310	119,118,110
11 | 703	98,99,114
12 | 307	83,118,98
13 | 370	113,114,99
14 | 449	113,115,82
15 | 2	112,103,98
16 | 565	110,119,118
17 | 664	98,99,114
18 | 539	98,118,112
19 | 515	99,114,98
20 | 254	98,83,119
21 | 215	118,83,98
22 | 40	118,119,110
23 | 601	119,83,118
24 | 623	98,118,83
25 | 266	110,99,83
26 | 619	118,83,98
27 | 648	113,114,99
28 | 640	118,83,98
29 | 451	114,99,98
30 | 64	98,83,118
31 | 655	98,119,83
32 | 558	98,83,118
33 | 553	119,110,118
34 | 183	114,98,99
35 | 200	110,119,118
36 | 264	98,114,99
37 | 674	98,83,118
38 | 295	83,119,110
39 | 629	98,114,99
40 | 711	83,98,118


--------------------------------------------------------------------------------
/AUSH/utils/load_data/load_attack_info.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/23 11:53
 3 | # @Author     : chensi
 4 | # @File       : load_attack_info.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | def load_attack_info(seletced_item_path, target_user_path):
 9 |     attack_info = {}
10 |     with open(seletced_item_path, "r") as fin:
11 |         for line in fin:
12 |             line = line.strip("\n").split("\t")
13 |             target_item, selected_items = int(line[0]), list(map(int, line[1].split(",")))
14 |             attack_info[target_item] = [selected_items]
15 |     with open(target_user_path, "r") as fin:
16 |         for line in fin:
17 |             line = line.strip("\n").split("\t")
18 |             target_item, target_users = int(line[0]), list(map(int, line[1].split(",")))
19 |             attack_info[target_item].append(target_users)
20 |     return attack_info
21 | 
22 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/id_plus_1.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/29 21:51
 3 | # @Author     : chensi
 4 | # @File       : id_plus_1.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | 
 9 | import numpy as np
10 | import pandas as pd
11 | import os
12 | 
13 | conf_path = '../config/FAP.conf'
14 | 
15 | # random_target = [62, 1077, 785, 1419, 1257]
16 | # tail_target = [1319, 1612, 1509, 1545, 1373]
17 | # targets = random_target + tail_target
18 | random = [155, 383, 920, 941, 892]
19 | tail = [1480, 844, 1202, 1301, 2035]
20 | targets = random + tail
21 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"]
22 | for iid in targets:
23 |     for attack_method in attack_methods:
24 |         path = "../dataset/GAN/ciao/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat"
25 |         names = ['userID', 'movieID', 'movieRating']
26 |         data_df = pd.read_csv(path, sep='\t', names=names, engine='python')
27 |         data_df.userID += 1
28 |         data_df.movieID += 1
29 |         dst_path = "../dataset/GAN/ciao_1/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat"
30 |         data_df.to_csv(dst_path, index=False, sep='\t', header=False)
31 | 


--------------------------------------------------------------------------------
/AUSH/utils/attack/data_to_file.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/23 21:17
 3 | # @Author     : chensi
 4 | # @File       : data_to_file.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | import os
 9 | import shutil
10 | 
11 | 
12 | def attacked_file_writer(clean_path, attacked_path, fake_profiles, n_users_ori):
13 |     data_to_write = ""
14 |     i = 0
15 |     for fake_profile in fake_profiles:
16 |         injected_iid = fake_profile.nonzero()[0]
17 |         injected_rating = fake_profile[injected_iid]
18 |         data_to_write += ('\n'.join(
19 |             map(lambda x: '\t'.join(map(str, [n_users_ori + i] + list(x))), zip(injected_iid, injected_rating))) + '\n')
20 |         i += 1
21 |     if os.path.exists(attacked_path): os.remove(attacked_path)
22 |     shutil.copyfile(clean_path, attacked_path)
23 |     with open(attacked_path, 'a+')as fout:
24 |         fout.write(data_to_write)
25 | 
26 | 
27 | def target_prediction_writer(predictions, hit_ratios, dst_path):
28 |     # uid - rating - HR
29 |     data_to_write = []
30 |     for uid in range(len(predictions)):
31 |         data_to_write.append('\t'.join(map(str, [uid, predictions[uid]] + hit_ratios[uid])))
32 |     with open(dst_path, 'w')as fout:
33 |         fout.write('\n'.join(data_to_write))
34 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/dataSplit.py:
--------------------------------------------------------------------------------
 1 | from random import random
 2 | from models.detector.SDLib.tool.file import FileIO
 3 | class DataSplit(object):
 4 | 
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     @staticmethod
 9 |     def dataSplit(data,test_ratio = 0.3,output=False,path='./',order=1):
10 |         if test_ratio>=1 or test_ratio <=0:
11 |             test_ratio = 0.3
12 |         testSet = {}
13 |         trainingSet = {}
14 |         for user in data:
15 |             if random() < test_ratio:
16 |                 testSet[user] = data[user].copy()
17 |             else:
18 |                 trainingSet[user] = data[user].copy()
19 | 
20 |         if output:
21 |             FileIO.writeFile(path,'testSet['+str(order)+']',testSet)
22 |             FileIO.writeFile(path, 'trainingSet[' + str(order) + ']', trainingSet)
23 |         return trainingSet,testSet
24 | 
25 |     @staticmethod
26 |     def crossValidation(data,k,output=False,path='./',order=1):
27 |         if k<=1 or k>10:
28 |             k=3
29 |         for i in range(k):
30 |             trainingSet = {}
31 |             testSet = {}
32 |             for ind,user in enumerate(data):
33 |                 if ind%k == i:
34 |                     testSet[user] = data[user].copy()
35 |                 else:
36 |                     trainingSet[user] = data[user].copy()
37 |             yield trainingSet,testSet
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/AUSH/test_main/result_reporter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | # author:ariaschen
 4 | # datetime:2020/1/14 09:11
 5 | # software: PyCharm
 6 | 
 7 | # import itertools, gzip
 8 | import pandas as pd
 9 | 
10 | 
11 | columns = ['Rec_model', 'attack_method', 'target_id']
12 | 
13 | hr = ['HR_1', 'HR_3', 'HR_5', 'HR_10', 'HR_20', 'HR_50']
14 | hr_ori = [i + '_ori' for i in hr]
15 | 
16 | columns += [i + '_inseg' for i in ['shift'] + hr_ori + hr]
17 | 
18 | columns += [i + '_all' for i in ['shift'] + hr_ori + hr]
19 | 
20 | columns_r = [i + '_inseg' for i in ['shift'] + hr] + [i + '_all' for i in ['shift'] + hr]
21 | """"""
22 | # data = pd.read_excel('filmTrust_distance.xls')
23 | # data.groupby('attack_method').mean()[['dis_TVD','dis_JS']].to_excel('filmTrust_distance_avg.xls')
24 | 
25 | # data = pd.read_excel('ml100k_performance_all.xls')
26 | # data = pd.read_excel('../result_ijcai/filmTrust_performance_all.xls')
27 | # data = pd.read_excel('../result_ijcai/ml100k_performance_all.xls')
28 | # data = pd.read_excel('office_performance_all.xls')
29 | data = pd.read_excel('automotive_performance_all.xls')
30 | data.columns = columns
31 | data = data[['Rec_model', 'attack_method', 'target_id', 'shift_inseg', 'HR_10_inseg', 'shift_all', 'HR_10_all']]
32 | # target_type_dict = dict(
33 | #     zip([62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373], ['random'] * 5 + ['tail'] * 5))
34 | # target_type_dict = dict(zip([5, 395, 181, 565, 254] + [601, 623, 619, 64, 558], ['random'] * 5 + ['tail'] * 5))
35 | target_type_dict = dict(zip([1141, 1656, 477, 1089, 866] + [88, 22, 122, 339, 1431], ['random'] * 5 + ['tail'] * 5))
36 | data['target_type'] = data.target_id.apply(lambda x: target_type_dict[x])
37 | data['attack_method'] = data.attack_method.apply(lambda x: x.split('_')[0])
38 | result = data.groupby(['Rec_model','attack_method', 'target_type']).mean()[['shift_all', 'HR_10_all']]
39 | result.to_excel('ml100k_performance_0119_sample_strategy.xlsx')
40 | exit()
41 | 


--------------------------------------------------------------------------------
/Leg-UP/utils/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | from utils.utils import EPSILON
 5 | 
 6 | __all__ = ["mse_loss", "mult_ce_loss", "binary_ce_loss", "kld_loss",
 7 |            "sampled_bce_loss", "sampled_cml_loss"]
 8 | 
 9 | """Model training losses."""
10 | bce_loss = torch.nn.BCELoss(reduction='none')
11 | 
12 | 
13 | def mse_loss(data, logits, weight):
14 |     """Mean square error loss."""
15 |     weights = torch.ones_like(data)
16 |     weights[data > 0] = weight
17 |     res = weights * (data - logits) ** 2
18 |     return res.sum(1)
19 | 
20 | 
21 | def mult_ce_loss(data, logits):
22 |     """Multi-class cross-entropy loss."""
23 |     log_probs = F.log_softmax(logits, dim=-1)
24 |     loss = -log_probs * data
25 | 
26 |     instance_data = data.sum(1)
27 |     instance_loss = loss.sum(1)
28 |     # Avoid divide by zeros.
29 |     res = instance_loss / (instance_data + EPSILON)
30 |     return res
31 | 
32 | 
33 | def binary_ce_loss(data, logits):
34 |     """Binary-class cross-entropy loss."""
35 |     return bce_loss(torch.sigmoid(logits), data).mean(1)
36 | 
37 | 
38 | def kld_loss(mu, log_var):
39 |     """KL-divergence."""
40 |     return -0.5 * torch.sum(
41 |         1 + log_var - mu.pow(2) - log_var.exp(), dim=1)
42 | 
43 | 
44 | def sampled_bce_loss(logits, n_negatives):
45 |     """Binary-class cross-entropy loss with sampled negatives."""
46 |     pos_logits, neg_logits = torch.split(logits, [1, n_negatives], 1)
47 |     data = torch.cat([
48 |         torch.ones_like(pos_logits), torch.zeros_like(neg_logits)
49 |     ], 1)
50 |     return bce_loss(torch.sigmoid(logits), data).mean(1)
51 | 
52 | 
53 | def sampled_cml_loss(distances, n_negatives, margin):
54 |     """Hinge loss with sampled negatives."""
55 |     # Distances here are the negative euclidean distances.
56 |     pos_distances, neg_distances = torch.split(-distances, [1, n_negatives], 1)
57 |     neg_distances = neg_distances.min(1).values.unsqueeze(-1)
58 |     res = pos_distances - neg_distances + margin
59 |     res[res < 0] = 0
60 |     return res.sum(1)
61 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Shilling Attacks against Recommender Systems
 2 | 
 3 | This repository contains our implementations for Shilling Attacks against Recommender Systems. 
 4 | 
 5 | Folder structure:
 6 | - `AUSH`: The implementation of AUSH used in our CIKM'20 paper [[ACM Library](https://dl.acm.org/doi/10.1145/3340531.3411884)] [[arXiv Preprint](https://arxiv.org/abs/2005.08164)].
 7 | - `Leg-UP`: The implementation of Leg-UP in our TNNLS'22 paper [[IEEE Xplore](https://ieeexplore.ieee.org/document/9806457)] [[arXiv Preprint](https://arxiv.org/abs/2206.11433)] and a unified framework for comparing Leg-UP with various attackers including AIA, DCGAN, WGAN, Random Attack, Average Attack, Segment Attack and Bandwagon Attack.
 8 | - `data`: Recommendation datasets used in our experiments.
 9 | 
10 | See `README.md` in each folder for more details.
11 | 
12 | Please kindly cite our papers if you find our implementations useful:
13 | 
14 | > Chen Lin, Si Chen, Hui Li, Yanghua Xiao, Lianyun Li, and Qian Yang. 2020. Attacking Recommender Systems with Augmented User Profiles. In CIKM. 855–864.
15 | 
16 | > Chen Lin, Si Chen, Meifang Zeng, Sheng Zhang, Min Gao, and Hui Li. 2022. Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles. In TNNLS.
17 | 
18 |     @inproceedings{Lin2020Attacking,  
19 | 	  author    = {Chen Lin and
20 | 	               Si Chen and
21 | 	               Hui Li and
22 | 	               Yanghua Xiao and
23 | 	               Lianyun Li and
24 | 	               Qian Yang},
25 | 	  title     = {Attacking Recommender Systems with Augmented User Profiles},
26 | 	  booktitle = {{CIKM}},
27 | 	  pages     = {855--864},
28 | 	  year      = {2020}
29 |     }  
30 |     
31 | 
32 |     @article{LinCZZGL22,
33 | 	  author    = {Chen Lin and
34 | 	               Si Chen and
35 | 	               Meifang Zeng and
36 | 	               Sheng Zhang and
37 | 	               Min Gao and
38 | 	               Hui Li},
39 | 	  title     = {Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles},
40 | 	  journal   = {{IEEE} Trans. Neural Networks Learn. Syst.},
41 | 	  year      = {2022}
42 | 	}


--------------------------------------------------------------------------------
/Leg-UP/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #=================================================
 4 | 
 5 | for target_id in 62 785 1077 1257 1419; do
 6 |   python run.py --data_set ml100k --target_ids $target_id --attacker_list AUSHplus >log_ml100k_$target_id
 7 | done
 8 | 
 9 | for target_id in 5 395 181 565 254; do
10 |   python run.py --data_set filmTrust --target_ids $target_id --attacker_list AUSHplus >log_filmTrust_$target_id
11 | done
12 | 
13 | for target_id in 119 422 594 884 1593; do
14 |   python run.py --data_set automotive --target_ids $target_id --attacker_list AUSHplus >log_automotive_$target_id
15 | done
16 | #=================================================
17 | 
18 | for attacker in AUSHplus AIA WGANAttacker DCGANAttacker RandomAttacker AverageAttacker BandwagonAttacker SegmentAttacker; do
19 |   for target_id in 62 785 1077 1257 1419; do
20 |     python run.py --data_set ml100k --target_ids $target_id --attacker_list $attacker >log_ml100k_$target_id"_"$attacker
21 |   done
22 | 
23 |   for target_id in 5 395 181 565 254; do
24 |     python run.py --data_set filmTrust --target_ids $target_id --attacker_list $attacker >log_filmTrust_$target_id"_"$attacker
25 |   done
26 | 
27 |   for target_id in 119 422 594 884 1593; do
28 |     python run.py --data_set automotive --target_ids $target_id --attacker_list $attacker >log_automotive_$target_id"_"$attacker
29 |   done
30 | done
31 | 
32 | #=================================================
33 | 
34 | for attacker in AUSHplus_SR AUSHplus_woD AUSHplus_SF AUSHplus_inseg; do
35 |   for target_id in 62 785 1077 1257 1419; do
36 |     python run.py --data_set ml100k --target_ids $target_id --attacker_list $attacker >log_ml100k_$target_id"_"$attacker
37 |   done
38 | 
39 |   for target_id in 5 395 181 565 254; do
40 |     python run.py --data_set filmTrust --target_ids $target_id --attacker_list $attacker >log_filmTrust_$target_id"_"$attacker
41 |   done
42 | 
43 |   for target_id in 119 422 594 884 1593; do
44 |     python run.py --data_set automotive --target_ids $target_id --attacker_list $attacker >log_automotive_$target_id"_"$attacker
45 |   done
46 | done
47 | 
48 | #=================================================
49 | 


--------------------------------------------------------------------------------
/AUSH/model/trainer_rec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/23 19:58
 3 | # @Author     : chensi
 4 | # @File       : train_rec.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | try:
 9 |     import tensorflow.compat.v1 as tf
10 | 
11 |     tf.disable_v2_behavior()
12 | except:
13 |     import tensorflow as tf
14 | from model.autorec import IAutoRec, UAutoRec
15 | from model.nnmf import NNMF
16 | 
17 | 
18 | def get_model_network(sess, model_name, dataset_class):
19 |     model = None
20 |     if model_name == "IAutoRec":
21 |         model = IAutoRec(sess, dataset_class)
22 |     elif model_name == "UAutoRec":
23 |         model = UAutoRec(sess, dataset_class)
24 |     elif model_name == "NNMF":
25 |         model = NNMF(sess, dataset_class)
26 |     return model
27 | 
28 | 
29 | def get_top_n(model, n):
30 |     top_n = {}
31 |     user_nonrated_items = model.dataset_class.get_user_nonrated_items()
32 |     for uid in range(model.num_user):
33 |         items = user_nonrated_items[uid]
34 |         ratings = model.predict([uid] * len(items), items)
35 |         item_rating = list(zip(items, ratings))
36 |         item_rating.sort(key=lambda x: x[1], reverse=True)
37 |         top_n[uid] = [x[0] for x in item_rating[:n]]
38 |     return top_n
39 | 
40 | 
41 | def pred_for_target(model, target_id):
42 |     target_predictions = model.predict(list(range(model.num_user)), [target_id] * model.num_user)
43 | 
44 |     top_n = get_top_n(model, n=50)
45 |     hit_ratios = {}
46 |     for uid in top_n:
47 |         hit_ratios[uid] = [1 if target_id in top_n[uid][:i] else 0 for i in [1, 3, 5, 10, 20, 50]]
48 |     return target_predictions, hit_ratios
49 | 
50 | 
51 | def rec_trainer(model_name, dataset_class, target_id, is_train, model_path):
52 |     tf.reset_default_graph()
53 |     tf_config = tf.ConfigProto()
54 |     tf_config.gpu_options.allow_growth = True
55 |     with tf.Session(config=tf_config) as sess:
56 | 
57 |         rec_model = get_model_network(sess, model_name, dataset_class)
58 |         if is_train:
59 |             print('--> start train recommendation model...')
60 |             rec_model.execute()
61 |             rec_model.save(model_path)
62 |         else:
63 |             rec_model.restore(model_path)
64 |         print('--> start pred for each user...')
65 |         predictions, hit_ratios = pred_for_target(rec_model, target_id)
66 |     return predictions, hit_ratios
67 | 


--------------------------------------------------------------------------------
/Leg-UP/execute_model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2020/11/29 11:59
 3 | # @Author     : chensi
 4 | # @File       : execute_model.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | import random
 8 | import numpy as np
 9 | import torch
10 | 
11 | tf = None
12 | try:
13 |     import tensorflow.compat.v1 as tf
14 | 
15 |     tf.disable_v2_behavior()
16 | except:
17 |     import tensorflow as tf
18 | 
19 | seed = 1234
20 | random.seed(seed)
21 | np.random.seed(seed)
22 | tf.set_random_seed(seed)
23 | torch.manual_seed(seed)
24 | torch.cuda.manual_seed_all(seed)
25 | 
26 | from importlib import import_module
27 | import sys
28 | 
29 | 
30 | model2lib_dict = {
31 |     # attacker
32 |     'RandomAttacker': 'models.attacker.attacker',
33 |     'AverageAttacker': 'models.attacker.attacker',
34 |     'BandwagonAttacker': 'models.attacker.attacker',
35 |     'SegmentAttacker': 'models.attacker.attacker',
36 |     #
37 |     'WGANAttacker': 'models.attacker.attacker',
38 |     'DCGANAttacker': 'models.attacker.attacker',
39 |     #
40 |     'AUSH': 'models.attacker.aush',
41 |     #
42 |     'AUSHplus': 'models.attacker.aushplus',
43 |     'AIA': 'models.attacker.aushplus',
44 |     'AUSHplus_SR': 'models.attacker.aushplus',
45 |     'AUSHplus_woD': 'models.attacker.aushplus',
46 |     'AUSHplus_SF': 'models.attacker.aushplus',
47 |     'AUSHplus_inseg': 'models.attacker.aushplus',
48 | }
49 | 
50 | 
51 | def execute_model(model_type, model_name):
52 | 
53 |     try:
54 |         try:
55 |             model_lib_str = 'models.%s.%s' % (model_type.lower(),
56 |                                               model_type[0].upper() + model_type[1:].lower())
57 |             model_lib = import_module(model_lib_str)
58 |             model = getattr(model_lib, model_name)()
59 |         except:
60 |             model_lib_str = 'utils.%s' % (model_type.lower())
61 |             model_lib = import_module(model_lib_str)
62 |             model = getattr(model_lib, model_name)()
63 |     except:
64 |         # try:
65 |             model_lib_str = model2lib_dict[model_name]
66 |             model_lib = import_module(model_lib_str)
67 |             model = getattr(model_lib, model_name)()
68 |         # except:
69 |         #     print('Not found:', model_type, model_name)
70 |         #     exit()
71 | 
72 |     model.execute()
73 |     print('success.')
74 | 
75 | 
76 | model_lib = sys.argv[sys.argv.index('--exe_model_lib') + 1]
77 | model_name = sys.argv[sys.argv.index('--exe_model_class') + 1]
78 | execute_model(model_lib, model_name)
79 | 


--------------------------------------------------------------------------------
/Leg-UP/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Shilling Black-box Recommender Systems by Learning to Generate Fake User Profiles
 3 | 
 4 | This repository contains our implementation for Leg-UP (<ins>Le</ins>arning to <ins>G</ins>enerate Fake <ins>U</ins>ser <ins>P</ins>rofiles) and various shilling attack methods including AIA, DCGAN, WGAN, Random Attack, Average Attack, Segment Attack and Bandwagon Attack. 
 5 | 
 6 | Please kindly cite our paper [[IEEE Xplore](https://ieeexplore.ieee.org/document/9806457)] [[arXiv Preprint](https://arxiv.org/abs/2206.11433)] if you use it:
 7 | 
 8 | > Chen Lin, Si Chen, Meifang Zeng, Sheng Zhang, Min Gao, and Hui Li. 2022. Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles. In TNNLS.
 9 | 
10 |     @article{LinCZZGL22,
11 | 	  author    = {Chen Lin and
12 | 	               Si Chen and
13 | 	               Meifang Zeng and
14 | 	               Sheng Zhang and
15 | 	               Min Gao and
16 | 	               Hui Li},
17 | 	  title     = {Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles},
18 | 	  journal   = {{IEEE} Trans. Neural Networks Learn. Syst.},
19 | 	  year      = {2022}
20 | 	}
21 | 
22 | ## Environment
23 | - Python 3.8
24 | - higher 0.2.1
25 | - scikit-learn 0.24.1
26 | - scikit-surprise 1.1.1
27 | - tensorflow 2.7
28 | - pytorch 1.10
29 | - numpy 1.20.1
30 | 
31 | ## Data
32 | 
33 | The datasets used in our experiments can be found in the [data](../data) folder.
34 | 
35 | 
36 | ## Command Line Parameters
37 | `run.py` is the main entry of the program, it requires several parameters:
38 | 
39 | - `data_set`: the recommendation dataset used in the experiment (Possible values: "ml100k", ''filmTrust'', ''automotive'', "yelp", ''GroceryFood'', ''ToolHome'' and ''AppAndroid''.  Default is  "ml100k").
40 | - `attack_num`: number of injected profiles, i.e., A value (Default is 50).
41 | - `filler_num`: number of fillers, i.e., P value (Default is 36).
42 | - `surrogate`: surrogate RS model (Possible values: "WMF", ''ItemAE'', ''SVDpp'', and ''PMF''.  Default is  "WMF").
43 | - `target_ids`: id of the target item (Default is 62).
44 | - `recommender`: victim recommender (Possible values: ''AUSHplus'',  ''AIA'', ''WGANAttacker'', ''DCGANAttacker'', ''RandomAttacker'', ''AverageAttacker'', ''BandwagonAttacker'', and ''SegmentAttacker''.  Default is  "WMF"). Note that ''AUSHplus'' is the name of Leg-UP in our implementation.
45 | - `cuda_id`: GPU id (Default is 0).
46 | - `use_cuda`: use CPU or GPU (Default is 1).
47 | 
48 | ## Examples
49 | 
50 | Please refer to `run.sh` for some running examples.
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/SDLib.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | # sys.path.append("../")
 4 | from re import split
 5 | from models.detector.SDLib.tool.config import Config, LineConfig
 6 | from models.detector.SDLib.tool.dataSplit import *
 7 | from models.detector.SDLib.tool.file import FileIO
 8 | 
 9 | 
10 | class SDLib(object):
11 |     def __init__(self, config):
12 |         self.trainingData = []  # training data
13 |         self.testData = []  # testData
14 |         self.relation = []
15 |         self.measure = []
16 |         self.config = config
17 |         self.ratingConfig = LineConfig(config['ratings.setup'])
18 |         self.labels = FileIO.loadLabels(config['label'])
19 | 
20 |         if self.config.contains('evaluation.setup'):
21 |             self.evaluation = LineConfig(config['evaluation.setup'])
22 | 
23 |             if self.evaluation.contains('-testSet'):
24 |                 # specify testSet
25 |                 self.trainingData = FileIO.loadDataSet(config, config['ratings'])
26 |                 self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True)
27 | 
28 |             elif self.evaluation.contains('-ap'):
29 |                 # auto partition
30 |                 self.trainingData = FileIO.loadDataSet(config, config['ratings'])
31 |                 self.trainingData, self.testData = DataSplit. \
32 |                     dataSplit(self.trainingData, test_ratio=float(self.evaluation['-ap']))
33 | 
34 |             elif self.evaluation.contains('-cv'):
35 |                 # cross validation
36 |                 self.trainingData = FileIO.loadDataSet(config, config['ratings'])
37 |                 # self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))
38 | 
39 |         else:
40 |             print('Evaluation is not well configured!')
41 |             exit(-1)
42 | 
43 |         if config.contains('social'):
44 |             self.socialConfig = LineConfig(self.config['social.setup'])
45 |             self.relation = FileIO.loadRelationship(config, self.config['social'])
46 |         # print('preprocessing...')
47 | 
48 |     def execute(self):
49 |         # import the algorithm module
50 |         importStr = 'from models.detector.SDLib.method.' + self.config['methodName'] + ' import ' + self.config['methodName']
51 |         exec(importStr)
52 |         if self.config.contains('social'):
53 |             method = self.config[
54 |                          'methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)'
55 |         else:
56 |             method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels)'
57 |         ans = eval(method).execute()
58 |         return [float(i) for i in ans]
59 | 
60 | 
61 | def run(measure, algor, order):
62 |     measure[order] = algor.execute()
63 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/plot.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/30 9:24
 3 | # @Author     : chensi
 4 | # @File       : plot.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | import os
11 | import matplotlib.pyplot as plt
12 | 
13 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"]
14 | attack_name = ["Segment", "Random", "Average", "Bandwagon", "Ours"]
15 | attack_method = "segment"
16 | # random = [155, 383, 920, 941, 892]
17 | # tail = [1480, 844, 1202, 1301, 2035]
18 | # targets = random + tail
19 | random = [5, 395, 181, 565, 254]
20 | tail = [601, 623, 619, 64, 558]
21 | targets = random + tail
22 | # targets = [62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373]
23 | # for attack_method in attack_methods:
24 | #     # dir = '../results/ciao_DegreeSAD/' + attack_method
25 | #     dir = '../results/filmTrust_0903_FAP/' + attack_method
26 | #     pathDir = os.listdir(dir)
27 | #     data_to_write = []
28 | #     iid_idx = 0
29 | #     for i in range(len(pathDir)):
30 | #         # if "5-fold-cv" not in pathDir[i]: continue
31 | #         iid = targets[iid_idx]
32 | #         iid_idx += 1
33 | #         # load result
34 | #         lines = []
35 | #         if 'FAP' not in pathDir[i]: continue
36 | #         with open(dir + '/' + pathDir[i], 'r') as fin:
37 | #             for line in fin:
38 | #                 lines.append(line)
39 | #         res = lines[3].strip('\n').split(' ')
40 | #         while '' in res: res.remove('')
41 | #         res = [str(iid)] + res
42 | #         data_to_write.append('\t'.join(res))
43 | #     with open(dir + '/' + "result_" + attack_method, 'w') as fout:
44 | #         fout.write('\n'.join(data_to_write))
45 | 
46 | names = ['iid', 'label', 'precision', 'recall', 'f1', 'support']
47 | # pre_results = {}
48 | # recall_results = {}
49 | P, R, N = [], [], []
50 | for i in range(len(attack_methods)):
51 |     attack_method = attack_methods[i]
52 |     path = '../results/filmTrust_0903_FAP/' + attack_method + "/result_" + attack_method
53 |     # path = '../results/ml100k_DegreeSAD/' + attack_method + "/result_" + attack_method
54 |     # path = '../results/ciao_DegreeSAD/' + attack_method + "/result_" + attack_method
55 |     result = pd.read_csv(path, sep='\t', names=names, engine='python')
56 |     p = result.precision.values.tolist()
57 |     r = result.recall.values.tolist()
58 |     n = [attack_name[i]] * len(r)
59 |     P.extend(p)
60 |     R.extend(r)
61 |     N.extend(n)
62 |     # pre_results[attack_name[i]] =p
63 |     # recall_results[attack_name[i]] =r
64 | data_pre = pd.DataFrame({"method": N, "precision": P, "recall": R})
65 | # data_pre = pd.DataFrame(pre_results)
66 | data_pre.boxplot(column='precision', by=['method'])
67 | plt.title("Attack Detection")
68 | plt.ylabel("precision", )
69 | plt.xlabel("Attack Method")
70 | plt.show()
71 | a = 1
72 | #


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/data/social.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #from structure import sparseMatrix,new_sparseMatrix
 3 | from tool.config import Config,LineConfig
 4 | from tool.qmath import normalize
 5 | import os.path
 6 | from re import split
 7 | 
 8 | class SocialDAO(object):
 9 |     def __init__(self,conf,relation=list()):
10 |         self.config = conf
11 |         self.user = {} #used to store the order of users
12 |         self.relation = relation
13 |         self.followees = {}
14 |         self.followers = {}
15 |         self.trustMatrix = self.__generateSet()
16 | 
17 |     def __generateSet(self):
18 |         #triple = []
19 |         for line in self.relation:
20 |             userId1,userId2,weight = line
21 |             #add relations to dict
22 |             if not self.followees.has_key(userId1):
23 |                 self.followees[userId1] = {}
24 |             self.followees[userId1][userId2] = weight
25 |             if not self.followers.has_key(userId2):
26 |                 self.followers[userId2] = {}
27 |             self.followers[userId2][userId1] = weight
28 |             # order the user
29 |             if not self.user.has_key(userId1):
30 |                 self.user[userId1] = len(self.user)
31 |             if not self.user.has_key(userId2):
32 |                 self.user[userId2] = len(self.user)
33 |             #triple.append([self.user[userId1], self.user[userId2], weight])
34 |         #return new_sparseMatrix.SparseMatrix(triple)
35 | 
36 |     # def row(self,u):
37 |     #     #return user u's followees
38 |     #     return self.trustMatrix.row(self.user[u])
39 |     #
40 |     # def col(self,u):
41 |     #     #return user u's followers
42 |     #     return self.trustMatrix.col(self.user[u])
43 |     #
44 |     # def elem(self,u1,u2):
45 |     #     return self.trustMatrix.elem(u1,u2)
46 | 
47 |     def weight(self,u1,u2):
48 |         if self.followees.has_key(u1) and self.followees[u1].has_key(u2):
49 |             return self.followees[u1][u2]
50 |         else:
51 |             return 0
52 | 
53 |     # def trustSize(self):
54 |     #     return self.trustMatrix.size
55 | 
56 |     def getFollowers(self,u):
57 |         if self.followers.has_key(u):
58 |             return self.followers[u]
59 |         else:
60 |             return {}
61 | 
62 |     def getFollowees(self,u):
63 |         if self.followees.has_key(u):
64 |             return self.followees[u]
65 |         else:
66 |             return {}
67 | 
68 |     def hasFollowee(self,u1,u2):
69 |         if self.followees.has_key(u1):
70 |             if self.followees[u1].has_key(u2):
71 |                 return True
72 |             else:
73 |                 return False
74 |         return False
75 | 
76 |     def hasFollower(self,u1,u2):
77 |         if self.followers.has_key(u1):
78 |             if self.followers[u1].has_key(u2):
79 |                 return True
80 |             else:
81 |                 return False
82 |         return False
83 | 


--------------------------------------------------------------------------------
/AUSH/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Attacking Recommender Systems with Augmented User Profiles
 4 | 
 5 | This repository contains one shilling attack algorithm, AUSH, published in the following paper [[ACM Library](https://dl.acm.org/doi/10.1145/3340531.3411884)] [[arXiv Preprint](https://arxiv.org/abs/2005.08164)]:
 6 | 
 7 | > Chen Lin, Si Chen, Hui Li, Yanghua Xiao, Lianyun Li, and Qian Yang. 2020. Attacking Recommender Systems with Augmented User Profiles. In CIKM. 855–864.
 8 | 
 9 | Please kindly cite our paper if you use it:
10 | 
11 |     @inproceedings{Lin2020Attacking,  
12 | 	  author    = {Chen Lin and
13 | 	               Si Chen and
14 | 	               Hui Li and
15 | 	               Yanghua Xiao and
16 | 	               Lianyun Li and
17 | 	               Qian Yang},
18 | 	  title     = {Attacking Recommender Systems with Augmented User Profiles},
19 | 	  booktitle = {{CIKM}},
20 | 	  pages     = {855--864},
21 | 	  year      = {2020}
22 |     }  
23 | 
24 | ## How to run AUSH.
25 | ### Step1: Pre-processing
26 | Use `test_main\data_preprocess.py` to transform amazon 5-cores ratings to tuples `[userid, itemid, normalized float rating]`.
27 | 
28 | Update on Dec 9, 2021: We have released several recommendation datasets for testing shilling attacks including the three datasets used in our CIKM'20 paper. You can directly use files in the [data](/data) folder for experiments. Please copy the data folder to the folder of AUSH before execution.
29 | 
30 | ### Step2: Initialize
31 | Use `test_main\data_preprocess.py`
32 |  - select attack target
33 |  - select attack number (default fix 50)
34 |  - select filler size
35 |  - selected items and target users
36 |  - settings for bandwagon attack
37 | 
38 | ### Step3: Training and Evaluation
39 | 
40 |  - Train baseline attack models
41 |  ```shell script
42 | python main_baseline_attack.py --dataset filmTrust --attack_methods average,segment,random,bandwagon --targets 601,623,619,64,558 --filler_num 36 --bandwagon_selected 103,98,115 --sample_filler 1
43 | ```
44 |  - Evaluate baseline attack models
45 |  ```shell script
46 | python main_train_rec.py --dataset filmTrust --attack_method segment --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36
47 | ````
48 | 
49 |  - RS performance before attack
50 |  ```shell script
51 | python main_train_rec.py --dataset filmTrust --attack_method no --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36
52 | ````
53 | 
54 |  - Train AUSH
55 |  ```shell script
56 | python main_gan_attack.py --dataset filmTrust --target_ids 601,623,619,64,558 --filler_num 36
57 | ````
58 | 
59 |  - Evaluate AUSH
60 |  ```shell script
61 | python main_train_rec.py --dataset filmTrust --attack_method gan --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36
62 | ````
63 | 
64 |  - Comparative Study
65 |  ```shell script
66 | python main_eval_attack.py --dataset filmTrust --filler_num 36 --attack_methods gan,segment,average --rec_model_names NMF_25 --target_ids 601,623,619,64,558
67 | 
68 | python main_eval_similarity.py --dataset filmTrust --filler_num 36 --targets 601,623 --bandwagon_selected 103,98,115
69 | ```
70 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/config.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | from os.path import abspath
 3 | 
 4 | 
 5 | class Config(object):
 6 |     def __init__(self, fileName):
 7 |         self.config = {}
 8 |         self.readConfiguration(fileName)
 9 | 
10 |     def __getitem__(self, item):
11 |         if not self.contains(item):
12 |             print('parameter ' + item + ' is invalid!')
13 |             exit(-1)
14 |         return self.config[item]
15 | 
16 |     def getOptions(self, item):
17 |         if not self.contains(item):
18 |             print('parameter ' + item + ' is invalid!')
19 |             exit(-1)
20 |         return self.config[item]
21 | 
22 |     def contains(self, key):
23 |         return key in self.config
24 |         # return self.config.has_key(key)
25 | 
26 |     def get_keys(self):
27 |         return self.config.keys()
28 | 
29 |     def readConfiguration(self, fileName):
30 |         if not os.path.exists(abspath(fileName)):
31 |             print('config file is not found!')
32 |             raise IOError
33 |         with open(fileName) as f:
34 |             for ind, line in enumerate(f):
35 |                 if line.strip() != '':
36 |                     try:
37 |                         key, value = line.strip().split('=')
38 |                         self.config[key] = value
39 |                     except ValueError:
40 |                         print('config file is not in the correct format! Error Line:%d' % (ind))
41 | 
42 | 
43 | class LineConfig(object):
44 |     def __init__(self, content):
45 |         self.line = content.strip().split(' ')
46 |         self.options = {}
47 |         self.mainOption = False
48 |         if self.line[0] == 'on':
49 |             self.mainOption = True
50 |         elif self.line[0] == 'off':
51 |             self.mainOption = False
52 |         for i, item in enumerate(self.line):
53 |             if (item.startswith('-') or item.startswith('--')) and not item[1:].isdigit():
54 |                 ind = i + 1
55 |                 for j, sub in enumerate(self.line[ind:]):
56 |                     if (sub.startswith('-') or sub.startswith('--')) and not sub[1:].isdigit():
57 |                         ind = j
58 |                         break
59 |                     if j == len(self.line[ind:]) - 1:
60 |                         ind = j + 1
61 |                         break
62 |                 try:
63 |                     self.options[item] = ' '.join(self.line[i + 1:i + 1 + ind])
64 |                 except IndexError:
65 |                     self.options[item] = 1
66 | 
67 |     def __getitem__(self, item):
68 |         if not self.contains(item):
69 |             print('parameter ' + item + ' is invalid!')
70 |             exit(-1)
71 |         return self.options[item]
72 | 
73 |     def getOption(self, key):
74 |         if not self.contains(key):
75 |             print('parameter ' + key + ' is invalid!')
76 |             exit(-1)
77 |         return self.options[key]
78 | 
79 |     def isMainOn(self):
80 |         return self.mainOption
81 | 
82 |     def contains(self, key):
83 |         return key in self.options
84 |         # return self.options.has_key(key)
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea/
  2 | build/
  3 | .DS_Store
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 
138 | # pytype static type analyzer
139 | .pytype/
140 | 
141 | # Cython debug symbols
142 | cython_debug/
143 | 
144 | # PyCharm
145 | #  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
146 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
147 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
148 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
149 | #.idea/


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/main.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | from SDLib import SDLib
 5 | from tool.config import Config
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     print('=' * 80)
10 |     print('   SDLib: A Python library used to collect shilling detection methods.')
11 |     print('=' * 80)
12 |     print('Supervised Methods:')
13 |     print('1. DegreeSAD   2.CoDetector   3.BayesDetector\n')
14 |     print('Semi-Supervised Methods:')
15 |     print('4. SemiSAD\n')
16 |     print('Unsupervised Methods:')
17 |     print('5. PCASelectUsers    6. FAP   7.timeIndex\n')
18 |     print('-' * 80)
19 |     algor = -1
20 |     conf = -1
21 |     order = 6  # input('please enter the num of the method to run it:')
22 |     import time
23 | 
24 |     s = time.clock()
25 |     # if order == 0:
26 |     #     try:
27 |     #         import seaborn as sns
28 |     #     except ImportError:
29 |     #         print '!!!To obtain nice data charts, ' \
30 |     #               'we strongly recommend you to install the third-party package <seaborn>!!!'
31 |     #     conf = Config('../config/visual/visual.conf')
32 |     #     Display(conf).render()
33 |     #     exit(0)
34 | 
35 |     if order == 1:
36 |         conf = Config('../config/DegreeSAD_tmp.conf')
37 | 
38 |     elif order == 2:
39 |         conf = Config('../config/CoDetector.conf')
40 | 
41 |     elif order == 3:
42 |         conf = Config('../config/BayesDetector.conf')
43 | 
44 |     elif order == 4:
45 |         conf = Config('../config/SemiSAD.conf')
46 | 
47 |     elif order == 5:
48 |         conf = Config('../config/PCASelectUsers.conf')
49 | 
50 |     elif order == 6:
51 |         conf = Config('../config/FAP.conf')
52 |     elif order == 7:
53 |         conf = Config('../config/timeIndex.conf')
54 | 
55 |     else:
56 |         print('Error num!')
57 |         exit(-1)
58 | 
59 |     # ori conf info
60 |     lines = []
61 |     with open('../config/FAP.conf', 'r') as fin:
62 |         for line in fin:
63 |             lines.append(line)
64 |     random = [5, 395, 181, 565, 254]
65 |     tail = [601, 623, 619, 64, 558]
66 |     targets = random + tail
67 |     # targets = [62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373]
68 |     attack_methods = ["segment", "average", "random", "bandwagon", "gan"]
69 |     for attack_method in attack_methods[0:]:
70 |         for iid in targets:
71 |             path = "../dataset/GAN/filmTrust/filmTrust_" + str(iid) + "_" + attack_method + "_50_36.dat"
72 |             # path = "../dataset/GAN/ciao_1/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat"
73 |             lines[0] = 'ratings=' + path + '\n'
74 |             # lines[-1] = "output.setup=on -dir ../results/ciao_DegreeSAD/" + attack_method + '/'
75 |             lines[-1] = "output.setup=on -dir ../results/filmTrust_0903_FAP/" + attack_method + '/'
76 |             with open('../config/FAP_t.conf', 'w') as fout:
77 |                 fout.write(''.join(lines))
78 |             sd = SDLib(Config('../config/FAP_t.conf'))
79 |             result = sd.execute()
80 |     # conf = Config('../config/DegreeSAD_t.conf')
81 |     # conf = Config('../config/FAP_t.conf')
82 |     # sd = SDLib(conf)
83 |     # sd.execute()
84 |     e = time.clock()
85 |     print("Run time: %f s" % (e - s))
86 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/baseclass/SDetection.py:
--------------------------------------------------------------------------------
 1 | from models.detector.SDLib.data.rating import RatingDAO
 2 | from models.detector.SDLib.tool.config import Config,LineConfig
 3 | from os.path import abspath
 4 | from time import strftime,localtime,time
 5 | from models.detector.SDLib.tool.file import FileIO
 6 | from sklearn.metrics import classification_report
 7 | class SDetection(object):
 8 | 
 9 |     def __init__(self,conf,trainingSet=None,testSet=None,labels=None,fold='[1]'):
10 |         self.config = conf
11 |         self.isSave = False
12 |         self.isLoad = False
13 |         self.foldInfo = fold
14 |         self.labels = labels
15 |         self.dao = RatingDAO(self.config, trainingSet, testSet)
16 |         self.training = []
17 |         self.trainingLabels = []
18 |         self.test = []
19 |         self.testLabels = []
20 | 
21 |     def readConfiguration(self):
22 |         self.algorName = self.config['methodName']
23 |         self.output = LineConfig(self.config['output.setup'])
24 | 
25 | 
26 |     def printAlgorConfig(self):
27 |         "show algorithm's configuration"
28 |         # print ('Algorithm:',self.config['methodName'])
29 |         # print ('Ratings dataSet:',abspath(self.config['ratings']))
30 |         # if LineConfig(self.config['evaluation.setup']).contains('-testSet'):
31 |         #     print ('Test set:',abspath(LineConfig(self.config['evaluation.setup']).getOption('-testSet')))
32 |         #print 'Count of the users in training set: ',len()
33 |         # print ('Training set size: (user count: %d, item count %d, record count: %d)' %(self.dao.trainingSize()))
34 |         # print ('Test set size: (user count: %d, item count %d, record count: %d)' %(self.dao.testSize()))
35 |         # print ('='*80)
36 |         pass
37 | 
38 |     def initModel(self):
39 |         pass
40 | 
41 |     def buildModel(self):
42 |         pass
43 | 
44 |     def saveModel(self):
45 |         pass
46 | 
47 |     def loadModel(self):
48 |         pass
49 | 
50 |     def predict(self):
51 |         pass
52 | 
53 |     def execute(self):
54 |         self.readConfiguration()
55 |         if self.foldInfo == '[1]':
56 |             self.printAlgorConfig()
57 |         # load model from disk or build model
58 |         if self.isLoad:
59 |             # print ('Loading model %s...' % (self.foldInfo))
60 |             self.loadModel()
61 |         else:
62 |             # print ('Initializing model %s...' % (self.foldInfo))
63 |             self.initModel()
64 |             # print ('Building Model %s...' % (self.foldInfo))
65 |             self.buildModel()
66 | 
67 |         # preict the ratings or item ranking
68 |         # print ('Predicting %s...' % (self.foldInfo))
69 |         prediction = self.predict()
70 |         report = classification_report(self.testLabels, prediction, digits=4)
71 |         # currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
72 |         # FileIO.writeFile(self.output['-dir'],self.algorName+'@'+currentTime+self.foldInfo,report)
73 |         # save model
74 |         # if self.isSave:
75 |         #     print ('Saving model %s...' % (self.foldInfo))
76 |         #     self.saveModel()
77 |         # print (report)
78 |         res = [[j for j in i.split(' ') if len(j)] for i in report.split('\n') if len(i.strip())>0][:3]
79 |         precision, recall = res[-1][1:3]
80 |         return precision, recall#report


--------------------------------------------------------------------------------
/Leg-UP/utils/data_loader.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2020/11/27 15:34
 3 | # @Author     : chensi
 4 | # @File       : data_loader.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | import random
 9 | import numpy as np
10 | import torch
11 | 
12 | # tf = None
13 | # try:
14 | #     import tensorflow.compat.v1 as tf
15 | #
16 | #     tf.disable_v2_behavior()
17 | # except:
18 | #     import tensorflow as tf
19 | 
20 | seed = 1234
21 | random.seed(seed)
22 | np.random.seed(seed)
23 | # tf.set_random_seed(seed)
24 | torch.manual_seed(seed)
25 | torch.cuda.manual_seed_all(seed)
26 | import pandas as pd
27 | import numpy as np
28 | from scipy.sparse import csr_matrix
29 | 
30 | 
31 | class DataLoader(object):
32 | 
33 |     def __init__(self, path_train, path_test, header=None, sep='\t', threshold=4, verbose=False):
34 |         self.path_train = path_train
35 |         self.path_test = path_test
36 |         self.header = header if header is not None else ['user_id', 'item_id', 'rating']
37 |         self.sep = sep
38 |         self.threshold = threshold
39 |         self.verbose = verbose
40 | 
41 |         # load file as dataFrame
42 |         # self.train_data, self.test_data, self.n_users, self.n_items = self.load_file_as_dataFrame()
43 |         # dataframe to matrix
44 |         # self.train_matrix, self.train_matrix_implicit = self.dataFrame_to_matrix(self.train_data)
45 |         # self.test_matrix, self.test_matrix_implicit = self.dataFrame_to_matrix(self.test_data)
46 | 
47 |     def load_file_as_dataFrame(self):
48 |         # load data to pandas dataframe
49 |         if self.verbose:
50 |             print("\nload data from %s ..." % self.path_train, flush=True)
51 | 
52 |         train_data = pd.read_csv(self.path_train, sep=self.sep, names=self.header, engine='python')
53 |         train_data = train_data.loc[:, ['user_id', 'item_id', 'rating']]
54 | 
55 |         if self.verbose:
56 |             print("load data from %s ..." % self.path_test, flush=True)
57 |         test_data = pd.read_csv(self.path_test, sep=self.sep, names=self.header, engine='python').loc[:,
58 |                     ['user_id', 'item_id', 'rating']]
59 |         test_data = test_data.loc[:, ['user_id', 'item_id', 'rating']]
60 | 
61 |         # data statics
62 | 
63 |         n_users = max(max(test_data.user_id.unique()), max(train_data.user_id.unique())) + 1
64 |         n_items = max(max(test_data.item_id.unique()), max(train_data.item_id.unique())) + 1
65 | 
66 |         if self.verbose:
67 |             print("Number of users : %d , Number of items : %d. " % (n_users, n_items), flush=True)
68 |             print("Train size : %d , Test size : %d. " % (train_data.shape[0], test_data.shape[0]), flush=True)
69 | 
70 |         return train_data, test_data, n_users, n_items
71 | 
72 |     def dataFrame_to_matrix(self, data_frame, n_users, n_items):
73 |         row, col, rating, implicit_rating = [], [], [], []
74 |         for line in data_frame.itertuples():
75 |             uid, iid, r = list(line)[1:]
76 |             implicit_r = 1 if r >= self.threshold else 0
77 | 
78 |             row.append(uid)
79 |             col.append(iid)
80 |             rating.append(r)
81 |             implicit_rating.append(implicit_r)
82 | 
83 |         matrix = csr_matrix((rating, (row, col)), shape=(n_users, n_items))
84 |         matrix_implicit = csr_matrix((implicit_rating, (row, col)), shape=(n_users, n_items))
85 |         return matrix, matrix_implicit
86 | 


--------------------------------------------------------------------------------
/AUSH/model/attack_model/gan_attack/models.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/24 10:43
 3 | # @Author     : chensi
 4 | # @File       : models.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | 
 8 | try:
 9 |     import tensorflow.compat.v1 as tf
10 | 
11 |     tf.disable_v2_behavior()
12 | except:
13 |     import tensorflow as tf
14 | import math
15 | 
16 | 
17 | class GAN_Attacker:
18 |     def __init__(self):
19 |         print("GAN Attack model")
20 | 
21 |     def DIS(self, input, inputDim, h, activation, hiddenLayers, _reuse=False):
22 |         # input->hidden
23 |         y, _, W, b = self.FullyConnectedLayer(input, inputDim, h, activation, "dis", 0, reuse=_reuse)
24 | 
25 |         # stacked hidden layers
26 |         for layer in range(hiddenLayers - 1):
27 |             y, _, W, b = self.FullyConnectedLayer(y, h, h, activation, "dis", layer + 1, reuse=_reuse)
28 | 
29 |         # hidden -> output
30 |         y, _, W, b = self.FullyConnectedLayer(y, h, 1, "none", "dis", hiddenLayers + 1, reuse=_reuse)
31 | 
32 |         return y
33 | 
34 |     def GEN(self, input, num_item, h, outputDim, activation, decay, name="gen", _reuse=False):
35 |         """
36 |         input   :   sparse filler vectors
37 |         output  :   reconstructed selected vector
38 |         """
39 |         # input+thnh
40 |         # input_tanh = tf.nn.tanh(input)
41 | 
42 |         # input->hidden
43 | 
44 |         y, L2norm, W, b = self.FullyConnectedLayer(input, num_item, h // decay, activation, name, 0, reuse=_reuse)
45 | 
46 |         # stacked hidden layers
47 |         h = h // decay
48 |         layer = 0
49 |         # for layer in range(hiddenLayers - 1):
50 |         while True:
51 |             y, this_L2, W, b = self.FullyConnectedLayer(y, h, h // decay, activation, name, layer + 1, reuse=_reuse)
52 |             L2norm = L2norm + this_L2
53 |             layer += 1
54 |             if h // decay > outputDim:
55 |                 h = h // decay
56 |             else:
57 |                 break
58 |         # hidden -> output
59 |         y, this_L2, W, b = self.FullyConnectedLayer(y, h // decay, outputDim, "none", name, layer + 1, reuse=_reuse)
60 |         L2norm = L2norm + this_L2
61 |         y = tf.nn.sigmoid(y) * 5
62 |         return y, L2norm
63 | 
64 |     def FullyConnectedLayer(self, input, inputDim, outputDim, activation, model, layer, reuse=False):
65 |         scale1 = math.sqrt(6 / (inputDim + outputDim))
66 | 
67 |         wName = model + "_W" + str(layer)
68 |         bName = model + "_B" + str(layer)
69 | 
70 |         with tf.variable_scope(model) as scope:
71 | 
72 |             if reuse == True:
73 |                 scope.reuse_variables()
74 | 
75 |             W = tf.get_variable(wName, [inputDim, outputDim],
76 |                                 initializer=tf.random_uniform_initializer(-scale1, scale1))
77 |             b = tf.get_variable(bName, [outputDim], initializer=tf.random_uniform_initializer(-0.01, 0.01))
78 | 
79 |             y = tf.matmul(input, W) + b
80 | 
81 |             L2norm = tf.nn.l2_loss(W) + tf.nn.l2_loss(b)
82 | 
83 |             if activation == "none":
84 |                 y = tf.identity(y, name="output")
85 |                 return y, L2norm, W, b
86 | 
87 |             elif activation == "sigmoid":
88 |                 return tf.nn.sigmoid(y), L2norm, W, b
89 | 
90 |             elif activation == "tanh":
91 |                 return tf.nn.tanh(y), L2norm, W, b
92 |             elif activation == "relu":
93 |                 return tf.nn.relu(y), L2norm, W, b
94 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/plot.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | #import seaborn as sns
  4 | 
  5 | def drawLine(x,y,labels,xLabel,yLabel,title):
  6 |     f, ax = plt.subplots(1, 1, figsize=(10, 6), sharex=True)
  7 | 
  8 |     #f.tight_layout()
  9 |     #sns.set(style="darkgrid")
 10 | 
 11 |     palette = ['blue','orange','red','green','purple','pink']
 12 |     # for i in range(len(ax)):
 13 |     #     x1 = range(0, len(x))
 14 |         #ax.set_xlim(min(x1)-0.2,max(x1)+0.2)
 15 |         # mini = 10000;max = -10000
 16 |         # for label in labels:
 17 |         #     if mini>min(y[i][label]):
 18 |         #         mini = min(y[i][label])
 19 |         #     if max<max(y[i][label]):
 20 |         #         max = max(y[i][label])
 21 |         # ax[i].set_ylim(mini-0.25*(max-mini),max+0.25*(max-mini))
 22 |         # for j,label in enumerate(labels):
 23 |         #     if j%2==1:
 24 |         #         ax[i].plot(x1, y[i][label], color=palette[j/2], marker='.', label=label, markersize=12)
 25 |         #     else:
 26 |         #         ax[i].plot(x1, y[i][label], color=palette[j/2], marker='.', label=label,markersize=12,linestyle='--')
 27 |         # ax[0].set_ylabel(yLabel,fontsize=20)
 28 | 
 29 |     for xdata,ydata,lab,c in zip(x,y,labels,palette):
 30 |         ax.plot(xdata,ydata,color = c,label=lab)
 31 |     ind = np.arange(0,60,10)
 32 |     ax.set_xticks(ind)
 33 |     #ax.set_xticklabels(x)
 34 |     ax.set_xlabel(xLabel, fontsize=20)
 35 |     ax.set_ylabel(yLabel, fontsize=20)
 36 |     ax.tick_params(labelsize=16)
 37 |     #ax.tick_params(axs='y', labelsize=20)
 38 | 
 39 |     ax.set_title(title,fontsize=24)
 40 |     plt.grid(True)
 41 |     handles, labels1 = ax.get_legend_handles_labels()
 42 | 
 43 |     #ax[i].legend(handles, labels1, loc=2, fontsize=20)
 44 |     # ax.legend(loc=2,
 45 |     #        ncol=6,  borderaxespad=0.,fontsize=20)
 46 |     #ax[2].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,fontsize=20)
 47 |     ax.legend(loc='upper right',fontsize=20,shadow=True)
 48 |     plt.show()
 49 |     plt.close()
 50 | 
 51 | paths = ['SVD.txt','PMF.txt','EE.txt','RDML.txt']
 52 | files = ['EE['+str(i)+'] iteration.txt' for i in range(2,9)]
 53 | x = []
 54 | y = []
 55 | 
 56 | data = []
 57 | def normalize():
 58 |     for file in files:
 59 |         xdata = []
 60 |         with open(file) as f:
 61 |             for line in f:
 62 |                 items = line.strip().split()
 63 |                 rmse = items[2].split(':')[1]
 64 |                 xdata.append(float(rmse))
 65 |         data.append(xdata)
 66 |     average = []
 67 |     for i in range(len(data[0])):
 68 |         total = 0
 69 |         for k in range(len(data)):
 70 |             total += data[k][i]
 71 |         average.append(str(i+1)+':'+str(float(total)/len(data))+'\n')
 72 |     with open('EE.txt','w') as f:
 73 |         f.writelines(average)
 74 | 
 75 | 
 76 | 
 77 | def readData():
 78 |     for file in paths:
 79 |         xdata = []
 80 |         ydata = []
 81 |         with open(file) as f:
 82 |             for line in f:
 83 |                 items = line.strip().split(':')
 84 |                 xdata.append(int(items[0]))
 85 |                 rmse = float(items[1])
 86 |                 ydata.append(float(rmse))
 87 |         x.append(xdata)
 88 |         y.append(ydata)
 89 | 
 90 | 
 91 | 
 92 | 
 93 | # x = [[1,2,3],[1,2,3]]
 94 | # y = [[1,2,3],[4,5,6]]
 95 | #normalize()
 96 | readData()
 97 | labels = ['SVD','PMF','EE','RDML',]
 98 | xlabel = 'Iteration'
 99 | ylabel = 'RMSE'
100 | 
101 | drawLine(x,y,labels,xlabel,ylabel,'')


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/qmath.py:
--------------------------------------------------------------------------------
  1 | from sklearn.metrics.pairwise import pairwise_distances,cosine_similarity
  2 | import numpy as np
  3 | from numpy.linalg import norm
  4 | from scipy.stats.stats import pearsonr
  5 | from math import sqrt,exp
  6 | 
  7 | def l1(x):
  8 |     return norm(x,ord=1)
  9 | 
 10 | def l2(x):
 11 |     return norm(x)
 12 | 
 13 | def common(x1,x2):
 14 |     # find common ratings
 15 |     common = (x1!=0)&(x2!=0)
 16 |     new_x1 = x1[common]
 17 |     new_x2 = x2[common]
 18 |     return new_x1,new_x2
 19 | 
 20 | def cosine_sp(x1,x2):
 21 |     'x1,x2 are dicts,this version is for sparse representation'
 22 |     total = 0
 23 |     denom1 = 0
 24 |     denom2 =0
 25 |     for k in x1:
 26 |         if x2.has_key(k):
 27 |             total+=x1[k]*x2[k]
 28 |             denom1+=x1[k]**2
 29 |             denom2+=x2[k]**2
 30 |     try:
 31 |         return (total + 0.0) / (sqrt(denom1) * sqrt(denom2))
 32 |     except ZeroDivisionError:
 33 |         return 0
 34 | 
 35 | 
 36 | def cosine(x1,x2):
 37 |     #find common ratings
 38 |     new_x1, new_x2 = common(x1,x2)
 39 |     #compute the cosine similarity between two vectors
 40 |     sum = new_x1.dot(new_x2)
 41 |     denom = sqrt(new_x1.dot(new_x1)*new_x2.dot(new_x2))
 42 |     try:
 43 |         return float(sum)/denom
 44 |     except ZeroDivisionError:
 45 |         return 0
 46 | 
 47 |     #return cosine_similarity(x1,x2)[0][0]
 48 | 
 49 | def pearson_sp(x1,x2):
 50 |     total = 0
 51 |     denom1 = 0
 52 |     denom2 = 0
 53 |     overlapped=False
 54 |     try:
 55 |         mean1 = sum(x1.values())/(len(x1)+0.0)
 56 |         mean2 = sum(x2.values()) / (len(x2) + 0.0)
 57 |         for k in x1:
 58 |             if x2.has_key(k):
 59 |                 total += (x1[k]-mean1) * (x2[k]-mean2)
 60 |                 denom1 += (x1[k]-mean1) ** 2
 61 |                 denom2 += (x2[k]-mean2) ** 2
 62 |                 overlapped=True
 63 | 
 64 |         return (total + 0.0) / (sqrt(denom1) * sqrt(denom2))
 65 |     except ZeroDivisionError:
 66 |         if overlapped:
 67 |             return 1
 68 |         else:
 69 |             return 0
 70 | 
 71 | def euclidean(x1,x2):
 72 |     #find common ratings
 73 |     new_x1, new_x2 = common(x1, x2)
 74 |     #compute the euclidean between two vectors
 75 |     diff = new_x1-new_x2
 76 |     denom = sqrt((diff.dot(diff)))
 77 |     try:
 78 |         return 1/denom
 79 |     except ZeroDivisionError:
 80 |         return 0
 81 | 
 82 | 
 83 | def pearson(x1,x2):
 84 |     #find common ratings
 85 |     new_x1, new_x2 = common(x1, x2)
 86 |     #compute the pearson similarity between two vectors
 87 |     ind1 = new_x1 > 0
 88 |     ind2 = new_x2 > 0
 89 |     try:
 90 |         mean_x1 = float(new_x1.sum())/ind1.sum()
 91 |         mean_x2 = float(new_x2.sum())/ind2.sum()
 92 |         new_x1 = new_x1 - mean_x1
 93 |         new_x2 = new_x2 - mean_x2
 94 |         sum = new_x1.dot(new_x2)
 95 |         denom = sqrt((new_x1.dot(new_x1))*(new_x2.dot(new_x2)))
 96 |         return float(sum) / denom
 97 |     except ZeroDivisionError:
 98 |         return 0
 99 | 
100 | 
101 | def similarity(x1,x2,sim):
102 |     if sim == 'pcc':
103 |         return pearson_sp(x1,x2)
104 |     if sim == 'euclidean':
105 |         return euclidean(x1,x2)
106 |     else:
107 |         return cosine_sp(x1, x2)
108 | 
109 | 
110 | def normalize(vec,maxVal,minVal):
111 |     'get the normalized value using min-max normalization'
112 |     if maxVal > minVal:
113 |         return float(vec-minVal)/(maxVal-minVal)+0.01
114 |     elif maxVal==minVal:
115 |         return vec/maxVal
116 |     else:
117 |         print ('error... maximum value is less than minimum value.')
118 |         raise ArithmeticError
119 | 
120 | def sigmoid(val):
121 |     return 1/(1+exp(-val))
122 | 
123 | 
124 | def denormalize(vec,maxVal,minVal):
125 |     return minVal+(vec-0.01)*(maxVal-minVal)
126 | 


--------------------------------------------------------------------------------
/AUSH/test_main/main_train_rec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/23 19:29
 3 | # @Author     : chensi
 4 | # @File       : main_train_rec.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | import sys, os, argparse
 8 | 
 9 | sys.path.append("../")
10 | from utils.load_data.load_data import *
11 | from model.trainer_rec import rec_trainer
12 | from model.trainer_rec_surprise import basic_rec
13 | from utils.attack.data_to_file import target_prediction_writer
14 | 
15 | 
16 | # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
17 | 
18 | 
19 | def train_rec(data_set_name, model_name, attack_method, target_id, is_train):
20 |     if attack_method == "no":
21 |         attack_method = ""
22 |         model_path = "../result/model_ckpt/" + '_'.join([model_name, data_set_name]) + ".ckpt"
23 |     else:
24 |         model_path = "../result/model_ckpt/" + '_'.join([model_name, data_set_name, attack_method]) + ".ckpt"
25 |     path_train = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat"
26 |     path_test = "../data/data/" + data_set_name + "_test.dat"
27 |     if attack_method == "": path_train = "../data/data/" + data_set_name + "_train.dat"
28 | 
29 |     # load_data
30 |     dataset_class = load_data(path_train=path_train, path_test=path_test,
31 |                               header=['user_id', 'item_id', 'rating'],
32 |                               sep='\t', print_log=True)
33 |     # train rec
34 |     if model_name in ["IAutoRec", "UAutoRec", "NNMF"]:
35 |         predictions, hit_ratios = rec_trainer(model_name, dataset_class, target_id, is_train, model_path)
36 |     else:
37 |         predictions, hit_ratios = basic_rec(model_name, path_train, path_test, target_id)
38 | 
39 |     # write to file
40 |     dst_path = "../result/pred_result/" + '_'.join([model_name, data_set_name, str(target_id), attack_method])
41 |     dst_path = dst_path.strip('_')
42 |     target_prediction_writer(predictions, hit_ratios, dst_path)
43 | 
44 | 
45 | def parse_arg():
46 |     parser = argparse.ArgumentParser()
47 | 
48 |     parser.add_argument('--dataset', type=str, default='automotive', help='input data_set_name,filmTrust or ml100k')
49 | 
50 |     parser.add_argument('--model_name', type=str, default='NMF_25', help='NNMF,IAutoRec,UAutoRec,NMF_25')
51 | 
52 |     parser.add_argument('--attack_method', type=str, default='G1',
53 |                         help='no,gan,segment,average,random,bandwagon')
54 | 
55 |     # filmTrust:random = [5, 395, 181, 565, 254]    tail = [601, 623, 619, 64, 558]
56 |     # ml100k:random = [62, 1077, 785, 1419, 1257]   tail = [1319, 1612, 1509, 1545, 1373]
57 |     # 5,395,181,565,254,601,623,619,64,558
58 |     # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
59 |     # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
60 |     parser.add_argument('--target_ids', type=str, default='866',
61 |                         help='attack target')
62 | 
63 |     parser.add_argument('--attack_num', type=int, default=50,
64 |                         help='num of attack fake user,50 for ml100k and filmTrust')
65 | 
66 |     parser.add_argument('--filler_num', type=int, default=4,
67 |                         help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
68 | 
69 |     args = parser.parse_args()
70 |     args.target_ids = list(map(int, args.target_ids.split(',')))
71 |     return args
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     """parse args"""
76 |     args = parse_arg()
77 | 
78 |     """train"""
79 |     if args.attack_method == 'no':
80 |         attack_method_ = args.attack_method
81 |     else:
82 |         attack_method_ = '_'.join([args.attack_method, str(args.attack_num), str(args.filler_num)])
83 |     is_train = 1
84 |     train_rec(args.dataset, args.model_name, attack_method_, args.target_ids[0], is_train=is_train)
85 |     for target in args.target_ids[1:]:
86 |         if args.attack_method == 'no':
87 |             is_train = 0
88 |         train_rec(args.dataset, args.model_name, attack_method_, target, is_train=is_train)
89 | 


--------------------------------------------------------------------------------
/data/ml100k/ml100k_target_users:
--------------------------------------------------------------------------------
 1 | 1257	0,513,4,12,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,93,605,617,619,108,620,621,114,118,631,120,124,129,641,644,143,144,659,660,150,664,670,681,683,176,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,275,789,795,797,290,803,806,295,300,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,845,846,847,849,344,346,349,866,867,359,362,874,369,881,372,885,886,888,377,378,891,895,386,388,900,393,906,396,915,404,405,920,921,415,424,428,942,434,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505
 2 | 1419	0,513,641,6,647,520,649,263,392,393,13,653,527,912,275,660,21,534,150,282,795,540,157,797,415,544,290,294,298,43,300,428,302,177,310,822,58,59,314,193,449,326,839,456,714,715,333,846,335,605,93,94,221,349,98,483,360,748,492,750,494,505,882,243,756,757,372,631,889,378,891,124,895
 3 | 785	0,513,4,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,605,617,619,108,620,621,114,118,631,120,124,129,641,644,647,143,144,660,150,664,670,681,683,176,177,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,795,797,290,803,806,295,300,302,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,846,847,849,344,345,346,349,866,867,359,362,874,879,369,881,372,885,886,888,377,378,891,895,386,388,900,391,392,393,906,396,915,405,920,921,415,424,942,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505
 4 | 1077	513,129,642,388,262,393,17,785,532,150,535,406,285,415,291,294,297,43,302,306,310,183,312,313,188,63,576,449,69,199,456,329,715,591,209,467,342,471,346,605,93,863,96,229,617,746,377,494,881,116,502,889,507
 5 | 62	386,5,6,263,390,13,653,147,660,21,534,150,536,915,282,406,157,415,290,292,804,294,40,296,298,43,560,177,822,58,314,61,63,193,197,326,839,456,715,333,591,338,468,853,98,362,882,243,372,757,248,765,127
 6 | 1319	0,6,15,17,20,22,23,537,547,550,552,42,48,560,565,58,61,63,71,591,592,91,93,605,95,607,98,108,621,628,117,137,143,662,663,173,177,187,193,200,212,213,215,221,231,232,746,750,757,248,249,762,763,263,266,268,275,290,291,292,804,294,295,296,805,302,822,322,327,839,333,338,853,342,344,351,863,869,360,879,373,885,888,377,378,891,384,388,405,406,415,928,931,420,424,428,429,942,436,456,467,473,479,492,496,498,499
 7 | 1612	0,513,4,12,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,93,605,617,619,108,620,621,114,118,631,120,124,129,641,644,647,143,144,659,660,150,664,670,681,683,176,177,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,275,789,795,797,290,803,806,295,300,302,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,845,846,847,849,344,345,346,349,866,867,359,362,874,879,369,881,372,885,886,888,377,378,891,895,386,388,900,391,392,393,906,396,915,404,405,920,921,415,424,428,942,434,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505
 8 | 1509	0,5,6,13,526,15,17,23,537,27,541,547,550,42,48,560,565,58,63,71,591,89,91,93,98,108,621,114,628,137,143,662,663,173,177,193,709,200,212,213,215,220,221,746,757,248,249,762,263,266,275,290,292,804,805,296,307,822,314,830,832,837,327,843,333,338,853,341,342,344,351,863,869,360,362,879,882,373,885,888,377,378,891,396,405,415,928,420,932,424,428,429,942,436,452,456,457,467,471,473,479,496,498
 9 | 1545	0,6,15,17,20,22,23,537,547,550,552,42,48,560,565,58,61,63,71,591,592,91,93,605,95,607,98,108,621,628,117,137,143,662,663,173,177,187,193,200,212,213,215,221,231,232,746,750,757,248,249,762,763,263,266,268,275,290,291,292,804,294,295,296,805,302,822,322,327,839,333,338,853,342,344,351,863,869,360,879,373,885,888,377,378,891,384,388,405,406,415,928,931,420,424,428,429,942,436,456,467,473,479,492,496,498,499
10 | 1373	0,513,641,6,647,520,649,263,392,393,13,653,527,912,275,660,21,534,150,282,795,540,157,797,415,544,290,294,298,43,300,428,302,177,310,822,58,59,314,193,449,326,839,456,714,715,333,846,335,605,93,94,221,349,98,483,360,748,492,750,494,505,882,243,756,757,372,631,889,378,891,124,895
11 | 690	0,6,15,41,58,59,63,69,93,94,129,150,177,199,221,248,291,310,338,342,373,386,393,397,449,454,456,471,483,487,505,513,535,550,642,647,652,660,715,726,803,806,814,845,849,863,881,885,896


--------------------------------------------------------------------------------
/AUSH/model/attack_model/baseline.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time       : 2019/8/23 10:46
 3 | # @Author     : chensi
 4 | # @File       : baseline_new.py
 5 | # @Software   : PyCharm
 6 | # @Desciption : None
 7 | import numpy as np
 8 | import math
 9 | 
10 | 
11 | class BaselineAttack:
12 | 
13 |     def __init__(self, attack_num, filler_num, n_items, target_id,
14 |                  global_mean, global_std, item_means, item_stds, r_max, r_min, fixed_filler_indicator=None):
15 |         #
16 |         self.attack_num = attack_num
17 |         self.filler_num = filler_num
18 |         self.n_items = n_items
19 |         self.target_id = target_id
20 |         self.global_mean = global_mean
21 |         self.global_std = global_std
22 |         self.item_means = item_means
23 |         self.item_stds = item_stds
24 |         self.r_max = r_max
25 |         self.r_min = r_min
26 | 
27 |         self.fixed_filler_indicator = fixed_filler_indicator
28 | 
29 |     def RandomAttack(self):
30 |         filler_candis = list(set(range(self.n_items)) - {self.target_id})
31 |         fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
32 |         # target
33 |         fake_profiles[:, self.target_id] = self.r_max
34 |         # fillers
35 |         for i in range(self.attack_num):
36 |             if self.fixed_filler_indicator is None:
37 |                 fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
38 |             else:
39 | 
40 |                 fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
41 |             ratings = np.random.normal(loc=self.global_mean, scale=self.global_std, size=self.filler_num)
42 |             for f_id, r in zip(fillers, ratings):
43 |                 fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r))
44 |         return fake_profiles
45 | 
46 |     def BandwagonAttack(self, selected_ids):
47 |         filler_candis = list(set(range(self.n_items)) - set([self.target_id] + selected_ids))
48 |         fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
49 |         # target & selected patch
50 |         fake_profiles[:, [self.target_id] + selected_ids] = self.r_max
51 |         # fillers
52 |         for i in range(self.attack_num):
53 |             if self.fixed_filler_indicator is None:
54 |                 fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
55 |             else:
56 | 
57 |                 fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
58 |             ratings = np.random.normal(loc=self.global_mean, scale=self.global_std, size=self.filler_num)
59 |             for f_id, r in zip(fillers, ratings):
60 |                 fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r))
61 |         return fake_profiles
62 | 
63 |     def AverageAttack(self):
64 |         filler_candis = list(set(range(self.n_items)) - {self.target_id})
65 |         fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
66 |         # target
67 |         fake_profiles[:, self.target_id] = self.r_max
68 |         # fillers
69 |         fn_normal = lambda iid: np.random.normal(loc=self.item_means[iid], scale=self.item_stds[iid], size=1)[0]
70 |         for i in range(self.attack_num):
71 |             if self.fixed_filler_indicator is None:
72 |                 fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
73 |             else:
74 | 
75 |                 fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
76 |             ratings = map(fn_normal, fillers)
77 |             for f_id, r in zip(fillers, ratings):
78 |                 fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r))
79 |         return fake_profiles
80 | 
81 |     def SegmentAttack(self, selected_ids):
82 |         filler_candis = list(set(range(self.n_items)) - set([self.target_id] + selected_ids))
83 |         fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
84 |         # target & selected patch
85 |         fake_profiles[:, [self.target_id] + selected_ids] = self.r_max
86 |         # fillers
87 |         for i in range(self.attack_num):
88 |             if self.fixed_filler_indicator is None:
89 |                 fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
90 |             else:
91 | 
92 |                 fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
93 |             fake_profiles[i][fillers] = self.r_min
94 |         return fake_profiles
95 | 


--------------------------------------------------------------------------------
/Leg-UP/run.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2020/12/27 19:57
  3 | # @Author     : chensi
  4 | # @File       : run.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | 
  9 | import argparse, os
 10 | 
 11 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 12 | os.environ["CUDA_VISIBLE_DEVICES"] = "1, 2, 3"
 13 | 
 14 | PythonCommand = 'python'  # 'D:\Anaconda3\envs\py38_tf2\python' if os.path.exists('D:\Anaconda3') else 'python'
 15 | 
 16 | 
 17 | class Run:
 18 |     def __init__(self):
 19 |         self.args = self.parse_args()
 20 |         self.args.attacker_list = self.args.attacker_list.split(',')
 21 |         self.args.recommender_list = self.args.recommender_list.split(',')
 22 | 
 23 |     def execute(self):
 24 | 
 25 |         self.step_1_Rec()
 26 | 
 27 |         self.step_2_Attack()
 28 | 
 29 |         return
 30 | 
 31 |     def parse_args(self):
 32 | 
 33 |         parser = argparse.ArgumentParser()
 34 |         parser.add_argument('--data_set', type=str, default='ml100k')  # ml100k,filmTrust,automotive
 35 |         parser.add_argument('--attack_num', type=int, default=50)
 36 |         parser.add_argument('--filler_num', type=int, default=36)
 37 |         parser.add_argument('--cuda_id', type=int, default=3)
 38 |         parser.add_argument('--use_cuda', type=int, default=0)
 39 |         parser.add_argument('--batch_size_S', type=int, default=64)
 40 |         parser.add_argument('--batch_size_D', type=int, default=64)
 41 |         parser.add_argument("--surrogate", type=str, default="WMF")
 42 |         
 43 | 
 44 |         # ml100k:62,1077,785,1419,1257
 45 |         # filmTrust:5,395,181,565,254
 46 |         # automotive:119,422,594,884,1593
 47 |         parser.add_argument('--target_ids', type=str, default='62')
 48 |         # AUSH,AUSHplus,RecsysAttacker,DCGAN,WGAN,SegmentAttacker,BandwagonAttacker,AverageAttacker,RandomAttacker
 49 |         parser.add_argument('--attacker_list', type=str, default='AUSHplus')
 50 |         # SVD,NMF,SlopeOne,IAutoRec,UAutoRec,NeuMF
 51 |         parser.add_argument('--recommender_list', type=str, default='SVD,NMF,SlopeOne,IAutoRec,UAutoRec,NeuMF')
 52 |         return parser.parse_args()
 53 | 
 54 |     def step_1_Rec(self):
 55 |         print('step_1')
 56 |         args = self.args
 57 |         """
 58 | 
 59 |         data_set/target_ids/train_path/test_path/model_path/target_prediction_path_prefix
 60 |     
 61 |         """
 62 |         args_dict = {
 63 |             'exe_model_lib': 'recommender',
 64 |             'train_path': './data/%s/%s_train.dat' % (args.data_set, args.data_set),
 65 |             'test_path': './data/%s/%s_test.dat' % (args.data_set, args.data_set),
 66 |         }
 67 |         args_dict.update(vars(args))
 68 | 
 69 |         #
 70 |         for recommender in args.recommender_list:
 71 |             #
 72 |             cur_args_dict = {
 73 |                 'exe_model_class': recommender,
 74 |                 'model_path': './results/model_saved/%s/%s_%s' % (args.data_set, args.data_set, recommender),
 75 |                 'target_prediction_path_prefix': './results/performance/mid_results/%s/%s_%s' % (
 76 |                     args.data_set, args.data_set, recommender),
 77 |             }
 78 |             cur_args_dict.update(args_dict)
 79 | 
 80 |             args_str = ' '.join(
 81 |                 ["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()])
 82 |             #
 83 |             print('%s ./execute_model.py %s' % (PythonCommand, args_str))
 84 |             print(os.system('%s ./execute_model.py %s' % (PythonCommand, args_str)))
 85 | 
 86 |     def step_2_Attack(self):
 87 |         print('step_2')
 88 |         args = self.args
 89 | 
 90 |         args_dict = {
 91 |             'exe_model_lib': 'attacker',
 92 |             # 'filler_num': 4,
 93 |             # 'epoch': 50
 94 |         }
 95 |         args_dict.update(vars(args))
 96 | 
 97 |         for target_id in map(int, args.target_ids.split(',')):
 98 |             for attacker in args.attacker_list:
 99 |                 cur_args_dict = {
100 |                     'exe_model_class': attacker,
101 |                     'target_id': target_id,
102 |                     'injected_path': './results/data_attacked/%s/%s_%s_%d.data' % (
103 |                         args.data_set, args.data_set, attacker, target_id)
104 | 
105 |                 }
106 |                 cur_args_dict.update(args_dict)
107 | 
108 |                 args_str = ' '.join(["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()])
109 |                 print(os.system('%s ./execute_model.py %s' % (PythonCommand, args_str)))
110 |             # break
111 | 
112 | model = Run()
113 | model.execute()
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/Leg-UP/preprocess_data.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import random
  4 | import pandas as pd
  5 | from pathlib import Path
  6 | import json
  7 | from sklearn.model_selection import train_test_split
  8 | 
  9 | data_file = Path('data')
 10 | data_set_name = 'GroceryFood'
 11 | data_file = data_file / data_set_name / (data_set_name + 'Raw.json')
 12 | 
 13 | df_gro = pd.DataFrame(columns=['user', 'item', 'score'])
 14 | data = []
 15 | with open(data_file, encoding='utf-8') as f:
 16 |     for line in f:
 17 |         data.append(json.loads(line))
 18 | print(f'data_set_len:{len(data)}')
 19 | print(f'data head:\n{data[:5]}')
 20 | 
 21 | user_set = set()
 22 | item_set = set()
 23 | data_list = []
 24 | for idx, d in enumerate(data):
 25 |     item_set.add(d['asin'])
 26 |     user_set.add(d["reviewerID"])
 27 |     data_list.append([d["reviewerID"], d['asin'], d['overall']])
 28 | 
 29 | raw_df = pd.DataFrame(data_list, columns=['user', 'item', 'score'])
 30 | 
 31 | user2idx = {x: idx for idx, x in enumerate(user_set)}
 32 | item2idx = {x: idx for idx, x in enumerate(item_set)}
 33 | 
 34 | 
 35 | def fun(item):
 36 |     return user2idx[item]
 37 | 
 38 | def fun2(item):
 39 |     return item2idx[item]
 40 | 
 41 | raw_df['user'] = raw_df['user'].apply(fun)
 42 | raw_df['item'] = raw_df['item'].apply(fun2)
 43 | 
 44 | print(f'raw data frame:')
 45 | print(raw_df)
 46 | 
 47 | user_cont = raw_df.groupby('user').count()
 48 | filter_ratings = {i for i in list(user_cont[user_cont['item'] >= 17].index)}
 49 | 
 50 | after_filter_df = pd.DataFrame(columns=['user', 'item', 'score'])
 51 | 
 52 | 
 53 | all_data = []
 54 | for i in filter_ratings:
 55 |     each_i = raw_df[raw_df['user'] == i]
 56 |     all_data.append(each_i.values)
 57 |     after_filter_df = after_filter_df.append(each_i)
 58 | 
 59 | train_list = []
 60 | test_list = []
 61 | train_df = pd.DataFrame(columns=['user', 'item', 'score'])
 62 | test_df = pd.DataFrame(columns=['user', 'item', 'score'])
 63 | for d in all_data:
 64 |     train, test = train_test_split(d, test_size=0.1, random_state=42)
 65 |     df = pd.DataFrame(train, columns=['user', 'item', 'score'])
 66 |     df2 = pd.DataFrame(test, columns=['user', 'item', 'score'])
 67 |     train_df = train_df.append(df)
 68 |     test_df = test_df.append(df2)
 69 | print(f'train_df:{train_df}')
 70 | print(f'test_df:{test_df}')
 71 | 
 72 | item_count = raw_df.groupby('item').count().sort_values(by='user', ascending=False)
 73 | print(item_count)
 74 | target_item_first = [i for i in item_count[:int(0.1 * len(item_count))].index.values]
 75 | target_item_last = [i for i in item_count[int(0.9 * len(item_count)):].index.values]
 76 | target_item = target_item_first + target_item_last
 77 | with open(f'data/{data_set_name}_target_item', 'w') as f:
 78 |     for i in target_item:
 79 |         f.write(str(int(i)))
 80 |         f.write('\n')
 81 | 
 82 | with open(f'data/{data_set_name}_selected_items', 'a+') as f:
 83 |     for i in target_item:
 84 |         select_item = [i]
 85 |         while True:
 86 |             a = random.choice(target_item_first)
 87 |             if a not in select_item:
 88 |                 select_item.append(a)
 89 |             if len(select_item) == 4:
 90 |                 break
 91 |         f.write(str(select_item[0]) + '\t')
 92 |         f.write(str(select_item[1]) + ',' + str(select_item[2]) + ',' + str(select_item[3]))
 93 |         f.write('\n')
 94 | 
 95 | 
 96 | 
 97 | user_cont = raw_df.groupby('user').count()
 98 | filter_ratings = {i for i in list(user_cont[user_cont['item'] >= 17].index)}
 99 | 
100 | after_filter_df = pd.DataFrame(columns=['user', 'item', 'score'])
101 | 
102 | all_data = []
103 | for i in filter_ratings:
104 |     each_i = raw_df[raw_df['user'] == i]
105 |     all_data.append(each_i.values)
106 |     after_filter_df = after_filter_df.append(each_i)
107 | 
108 | # all_data = []
109 | # for i in filter_ratings:
110 | #     each_i = raw_df[raw_df['user'] == i]
111 | #     all_data.append(each_i.values)
112 | #     after_filter_df = after_filter_df.append(each_i)
113 | 
114 | 
115 | 
116 | # dfv = train_df.values
117 | # print(dfv)
118 | # with open(f'data/{data_set_name}_train.dat', 'a', encoding='utf-8') as f:
119 | #     for d in dfv:
120 | #         for idx, i in enumerate(d):
121 | #             if idx != 2:f.write(str(int(i)))
122 | #             else : f.write(str(i))
123 | #             if idx != 2: f.write('\t')
124 | #         f.write('\n')
125 | #         dfv = train_df.values
126 | #
127 | # dfv = test_df.values
128 | # with open(f'data/{data_set_name}_test.dat', 'a', encoding='utf-8') as f:
129 | #     for d in dfv:
130 | #         for idx, i in enumerate(d):
131 | #             if idx != 2: f.write(str(int(i)))
132 | #             else: f.write(str(i))
133 | #             if idx != 2: f.write('\t')
134 | #         f.write('\n')
135 | 


--------------------------------------------------------------------------------
/AUSH/test_main/main_gan_attack.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/24 11:08
  3 | # @Author     : chensi
  4 | # @File       : main_gan_attack.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | import sys
  9 | 
 10 | sys.path.append("../")
 11 | import os, argparse
 12 | from utils.load_data.load_data import load_data
 13 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
 14 | from utils.load_data.load_attack_info import load_attack_info
 15 | from utils.attack.data_to_file import *
 16 | import numpy as np
 17 | 
 18 | 
 19 | # os.environ["CUDA_VISIBLE_DEVICES"] = '2'
 20 | 
 21 | 
 22 | def gan_attack(data_set_name, attack_method, target_id, is_train, write_to_file=1, final_attack_setting=None):
 23 | 
 24 |     path_train = '../data/data/' + data_set_name + '_train.dat'
 25 |     path_test = '../data/data/' + data_set_name + '_test.dat'
 26 |     attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
 27 |                         "../data/data/" + data_set_name + "_target_users"]
 28 |     model_path = "../result/model_ckpt/" + '_'.join([data_set_name, attack_method, str(target_id)]) + ".ckpt"
 29 | 
 30 | 
 31 |     attack_info = load_attack_info(*attack_info_path)
 32 |     dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
 33 |                               sep='\t', print_log=True)
 34 | 
 35 |     if len(attack_method.split('_')[1:]) == 2:
 36 |         attack_num, filler_num = map(int, attack_method.split('_')[1:])
 37 |         filler_method = 0
 38 |     else:
 39 |         attack_num, filler_num, filler_method = map(int, attack_method.split('_')[1:])
 40 |     selected_items = attack_info[target_id][0]
 41 | 
 42 |     #
 43 |     gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
 44 |                                       selected_id_list=selected_items,
 45 |                                       filler_num=filler_num, attack_num=attack_num, filler_method=filler_method)
 46 | 
 47 |     fake_profiles, real_profiles, filler_indicator = gan_attacker.execute(is_train=is_train, model_path=model_path,
 48 |                                                                           final_attack_setting=final_attack_setting)
 49 |     gan_attacker.sess.close()
 50 | 
 51 |     # """inject and write to file"""
 52 |     if write_to_file == 1:
 53 |         dst_path = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat"
 54 |         attacked_file_writer(path_train, dst_path, fake_profiles, dataset_class.n_users)
 55 |     return fake_profiles, real_profiles, filler_indicator
 56 | 
 57 | 
 58 | def parse_arg():
 59 |     parser = argparse.ArgumentParser()
 60 | 
 61 |     parser.add_argument('--dataset', type=str, default='ml100k', help='filmTrust/ml100k/grocery')
 62 | 
 63 |     # filmTrust:random = [5, 395, 181, 565, 254]    tail = [601, 623, 619, 64, 558]
 64 |     # ml100k:random = [62, 1077, 785, 1419, 1257]   tail = [1319, 1612, 1509, 1545, 1373]
 65 |     # 5,395,181,565,254,601,623,619,64,558
 66 |     # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
 67 |     parser.add_argument('--target_ids', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373',
 68 |                         help='attack target list')
 69 | 
 70 |     parser.add_argument('--attack_num', type=int, default=50,
 71 |                         help='num of attack fake user,50 for ml100k and filmTrust')
 72 | 
 73 |     parser.add_argument('--filler_num', type=int, default=90,
 74 |                         help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
 75 | 
 76 |     parser.add_argument('--filler_method', type=str, default='', help='0/1/2/3')
 77 | 
 78 |     parser.add_argument('--write_to_file', type=int, default=1, help='write to fake profile to file or return array')
 79 |     #
 80 |     args = parser.parse_args()
 81 |     #
 82 |     args.target_ids = list(map(int, args.target_ids.split(',')))
 83 |     return args
 84 | 
 85 | 
 86 | if __name__ == '__main__':
 87 |     """parse args"""
 88 |     args = parse_arg()
 89 |     """train"""
 90 |     is_train = 1
 91 |     attack_method = '_'.join(['gan', str(args.attack_num), str(args.filler_num), str(args.filler_method)]).strip('_')
 92 | 
 93 |     #
 94 |     for target_id in args.target_ids:
 95 | 
 96 |         attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id]))
 97 |         attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
 98 |         real_profiles, filler_indicator = np.load(attackSetting_path + '.npy')
 99 |         final_attack_setting = [args.attack_num, real_profiles, filler_indicator]
100 | 
101 | 
102 |         _ = gan_attack(args.dataset, attack_method, target_id, is_train,
103 |                        write_to_file=args.write_to_file,
104 |                        final_attack_setting=final_attack_setting)
105 | 


--------------------------------------------------------------------------------
/AUSH/utils/load_data/load_data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/22 10:07
  3 | # @Author     : chensi
  4 | # @File       : load_data_new.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | 
  9 | import pandas as pd
 10 | import numpy as np
 11 | 
 12 | from scipy.sparse import csr_matrix
 13 | 
 14 | 
 15 | class load_data():
 16 | 
 17 |     def __init__(self, path_train, path_test,
 18 |                  header=None, sep='\t', threshold=4, print_log=True):
 19 |         self.path_train = path_train
 20 |         self.path_test = path_test
 21 |         self.header = header if header is not None else ['user_id', 'item_id', 'rating']
 22 |         self.sep = sep
 23 |         self.threshold = threshold
 24 |         self.print_log = print_log
 25 | 
 26 |         self._main_load()
 27 | 
 28 |     def _main_load(self):
 29 |         # load data
 30 |         self._load_file()
 31 |         #
 32 |         # dataframe to matrix
 33 |         self.train_matrix, self.train_matrix_implicit = self._data_to_matrix(self.train_data)
 34 |         self.test_matrix, self.test_matrix_implicit = self._data_to_matrix(self.test_data)
 35 | 
 36 |     def _load_file(self):
 37 |         if self.print_log:
 38 |             print("load train/test data\t:\n", self.path_train)
 39 |         self.train_data = pd.read_csv(self.path_train, sep=self.sep, names=self.header, engine='python').loc[:,
 40 |                           ['user_id', 'item_id', 'rating']]
 41 |         self.test_data = pd.read_csv(self.path_test, sep=self.sep, names=self.header, engine='python').loc[:,
 42 |                          ['user_id', 'item_id', 'rating']]
 43 | 
 44 |         self.n_users = len(set(self.test_data.user_id.unique()) | set(self.train_data.user_id.unique()))
 45 |         self.n_items = len(set(self.test_data.item_id.unique()) | set(self.train_data.item_id.unique()))
 46 | 
 47 |         if self.print_log:
 48 |             print("Number of users:", self.n_users, ",Number of items:", self.n_items, flush=True)
 49 |             print("Train size:", self.train_data.shape[0], ",Test size:", self.test_data.shape[0], flush=True)
 50 | 
 51 |     def _data_to_matrix(self, data_frame):
 52 |         row, col, rating, implicit_rating = [], [], [], []
 53 |         for line in data_frame.itertuples():
 54 |             uid, iid, r = list(line)[1:]
 55 |             implicit_r = 1 if r >= self.threshold else 0
 56 | 
 57 |             row.append(uid)
 58 |             col.append(iid)
 59 |             rating.append(r)
 60 |             implicit_rating.append(implicit_r)
 61 | 
 62 |         matrix = csr_matrix((rating, (row, col)), shape=(self.n_users, self.n_items))
 63 |         matrix_implicit = csr_matrix((implicit_rating, (row, col)), shape=(self.n_users, self.n_items))
 64 |         return matrix, matrix_implicit
 65 | 
 66 |     def get_global_mean_std(self):
 67 |         return self.train_matrix.data.mean(), self.train_matrix.data.std()
 68 | 
 69 |     def get_all_mean_std(self):
 70 |         flag = 1
 71 |         for v in ['global_mean', 'global_std', 'item_means', 'item_stds']:
 72 |             if not hasattr(self, v):
 73 |                 flag = 0
 74 |                 break
 75 |         if flag == 0:
 76 |             global_mean, global_std = self.get_global_mean_std()
 77 |             item_means, item_stds = [global_mean] * self.n_items, [global_std] * self.n_items
 78 |             train_matrix_t = self.train_matrix.transpose()
 79 |             for iid in range(self.n_items):
 80 |                 item_vec = train_matrix_t.getrow(iid).toarray()[0]
 81 |                 ratings = item_vec[np.nonzero(item_vec)]
 82 |                 if len(ratings) > 0:
 83 |                     item_means[iid], item_stds[iid] = ratings.mean(), ratings.std()
 84 |             self.global_mean, self.global_std, self.item_means, self.item_stds \
 85 |                 = global_mean, global_std, item_means, item_stds
 86 |         return self.global_mean, self.global_std, self.item_means, self.item_stds
 87 | 
 88 |     def get_item_pop(self):
 89 |         # item_pops = [0] * self.n_items
 90 |         # train_matrix_t = self.train_matrix.transpose()
 91 |         # for iid in range(self.n_items):
 92 |         #     item_vec = train_matrix_t.getrow(iid).toarray()[0]
 93 |         #     item_pops[iid] = len(np.nonzero(item_vec)[0])
 94 |         item_pops_dict = dict(self.train_data.groupby('item_id').size())
 95 |         item_pops = [0] * self.n_items
 96 |         for iid in item_pops_dict.keys():
 97 |             item_pops[iid] = item_pops_dict[iid]
 98 |         return item_pops
 99 | 
100 |     def get_user_nonrated_items(self):
101 |         non_rated_indicator = self.train_matrix.toarray()
102 |         non_rated_indicator[non_rated_indicator > 0] = 1
103 |         non_rated_indicator = 1 - non_rated_indicator
104 |         user_norated_items = {}
105 |         for uid in range(self.n_users):
106 |             user_norated_items[uid] = list(non_rated_indicator[uid].nonzero()[0])
107 |         return user_norated_items
108 | 
109 |     def get_item_nonrated_users(self, item_id):
110 |         item_vec = np.squeeze(self.train_matrix[:, item_id].toarray())
111 |         # item_vec = self.train_matrix.toarray().transpose()[item_id]
112 |         item_vec[item_vec > 0] = 1
113 |         non_rated_indicator = 1 - item_vec
114 |         return list(non_rated_indicator.nonzero()[0])
115 | 


--------------------------------------------------------------------------------
/Leg-UP/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from scipy import sparse
  6 | 
  7 | EPSILON = 1e-12
  8 | _fixed_target_items = {
  9 |     "head": np.asarray([259, 2272, 3010, 6737, 7690]),
 10 |     "tail": np.asarray([5611, 9213, 10359, 10395, 12308]),
 11 |     "upper_torso": np.asarray([1181, 1200, 2725, 4228, 6688]),
 12 |     "lower_torso": np.asarray([3227, 5810, 7402, 9272, 10551])
 13 | }
 14 | 
 15 | 
 16 | def sample_target_items(train_data, n_samples, popularity, use_fix=False):
 17 |     """Sample target items with certain popularity."""
 18 |     if popularity not in ["head", "upper_torso", "lower_torso", "tail"]:
 19 |         raise ValueError("Unknown popularity type {}.".format(popularity))
 20 | 
 21 |     n_items = train_data.shape[1]  # 14007
 22 |     all_items = np.arange(n_items)  # [0, 1, 2, ... , 14006]
 23 |     item_clicks = train_data.toarray().sum(0)
 24 | 
 25 |     valid_items = []
 26 |     if use_fix:
 27 |         valid_items = _fixed_target_items[popularity]
 28 |     else:
 29 |         bound_head = np.percentile(item_clicks, 95)
 30 |         bound_torso = np.percentile(item_clicks, 75)
 31 |         bound_tail = np.percentile(item_clicks, 50)
 32 |         if popularity == "head":
 33 |             valid_items = all_items[item_clicks > bound_head]
 34 |         elif popularity == "tail":
 35 |             valid_items = all_items[item_clicks < bound_tail]
 36 |         elif popularity == "upper_torso":
 37 |             valid_items = all_items[(item_clicks > bound_torso) & (item_clicks < bound_head)]
 38 |         elif popularity == "lower_torso":
 39 |             valid_items = all_items[(item_clicks > bound_tail) & (item_clicks < bound_torso)]
 40 | 
 41 |     if len(valid_items) < n_samples:
 42 |         raise ValueError("Cannot sample enough items that meet criteria.")
 43 | 
 44 |     np.random.shuffle(valid_items)
 45 |     sampled_items = valid_items[:n_samples]
 46 |     sampled_items.sort()
 47 |     print("Sampled target items: {}".format(sampled_items.tolist()))
 48 | 
 49 |     return sampled_items
 50 | 
 51 | 
 52 | def set_seed(seed, cuda=False):
 53 |     """Set seed globally."""
 54 |     np.random.seed(seed)
 55 |     random.seed(seed)
 56 |     if cuda:
 57 |         torch.cuda.manual_seed(seed)
 58 |         torch.backends.cudnn.deterministic = True
 59 |     else:
 60 |         torch.manual_seed(seed)
 61 | 
 62 | 
 63 | def minibatch(*tensors, **kwargs):
 64 |     """Mini-batch generator for pytorch tensor."""
 65 |     batch_size = kwargs.get('batch_size', 128)  # 2048
 66 | 
 67 |     if len(tensors) == 1:  # √
 68 |         tensor = tensors[0]
 69 |         for i in range(0, len(tensor), batch_size):  # len(tensor) = 14007
 70 |             yield tensor[i:i + batch_size]
 71 |     else:
 72 |         for i in range(0, len(tensors[0]), batch_size):
 73 |             yield tuple(x[i:i + batch_size] for x in tensors)
 74 | 
 75 | 
 76 | def shuffle(*arrays, **kwargs):
 77 |     """Shuffle arrays."""
 78 |     require_indices = kwargs.get('indices', False)
 79 | 
 80 |     if len(set(len(x) for x in arrays)) != 1:
 81 |         raise ValueError('All inputs to shuffle must have '
 82 |                          'the same length.')
 83 | 
 84 |     shuffle_indices = np.arange(len(arrays[0]))
 85 |     np.random.shuffle(shuffle_indices)
 86 | 
 87 |     if len(arrays) == 1:
 88 |         result = arrays[0][shuffle_indices]
 89 |     else:
 90 |         result = tuple(x[shuffle_indices] for x in arrays)
 91 | 
 92 |     if require_indices:
 93 |         return result, shuffle_indices
 94 |     else:
 95 |         return result
 96 | 
 97 | 
 98 | def sparse2tensor(sparse_data):
 99 |     """Convert sparse csr matrix to pytorch tensor."""
100 |     return torch.FloatTensor(sparse_data.toarray())
101 | 
102 | 
103 | def tensor2sparse(tensor):
104 |     """Convert pytorch tensor to sparse csr matrix."""
105 |     return sparse.csr_matrix(tensor.detach().cpu().numpy())
106 | 
107 | 
108 | def stack_csrdata(data1, data2):
109 |     """Stack two sparse csr matrix."""
110 |     return sparse.vstack((data1, data2), format="csr")
111 | 
112 | 
113 | def save_fake_data(fake_data, path):
114 |     """Save fake data to file."""
115 |     file_path = "%s.npz" % path
116 |     print("Saving fake data to {}".format(file_path))
117 |     sparse.save_npz(file_path, fake_data)
118 |     return file_path
119 | 
120 | 
121 | def load_fake_data(file_path):
122 |     """Load fake data from file."""
123 |     fake_data = sparse.load_npz(file_path)
124 |     print("Loaded fake data from {}".format(file_path))
125 |     return fake_data
126 | 
127 | 
128 | def save_checkpoint(model, optimizer, path, epoch=-1):
129 |     """Save model checkpoint and optimizer state to file."""
130 |     state = {
131 |         "epoch": epoch,
132 |         "state_dict": model.state_dict(),
133 |         "optimizer": optimizer.state_dict(),
134 |     }
135 |     file_path = "%s.pt" % path
136 |     print("Saving checkpoint to {}".format(file_path))
137 |     torch.save(state, file_path)
138 | 
139 | 
140 | def load_checkpoint(path):
141 |     """Load model checkpoint and optimizer state from file."""
142 |     file_path = "%s.pt" % path
143 |     state = torch.load(file_path, map_location=torch.device('cpu'))
144 |     print("Loaded checkpoint from {} (epoch {})".format(
145 |         file_path, state["epoch"]))
146 |     return state["epoch"], state["state_dict"], state["optimizer"]
147 | 


--------------------------------------------------------------------------------
/AUSH/model/trainer_rec_surprise.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/23 15:24
  3 | # @Author     : chensi
  4 | # @File       : cf.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | import os
  9 | from surprise import Dataset, Reader, accuracy
 10 | from surprise import SVD, SVDpp, NMF, KNNBasic, KNNWithMeans, KNNWithZScore
 11 | from surprise.model_selection import PredefinedKFold
 12 | from collections import defaultdict
 13 | 
 14 | 
 15 | def get_top_n(predictions, n=50):
 16 |     # First map the predictions to each user.
 17 |     top_n = defaultdict(list)
 18 |     for uid, iid, true_r, est, _ in predictions:
 19 |         top_n[uid].append((iid, est))
 20 |     # Then sort the predictions for each user and retrieve the k highest ones.
 21 |     for uid, user_ratings in top_n.items():
 22 |         user_ratings.sort(key=lambda x: x[1], reverse=True)
 23 |         top_n[uid] = user_ratings[:n]
 24 |     return top_n
 25 | 
 26 | 
 27 | def get_model(model_name):
 28 |     algo = None
 29 |     if 'KNN' in model_name:
 30 |         model_name = model_name.split('_')
 31 |         knn_model_name = model_name[0]
 32 |         user_based = False if len(model_name) > 1 and model_name[1] == 'I' else True
 33 |         dis_method = 'msd' if len(model_name) < 3 else model_name[2]
 34 |         k = 20 if len(model_name) < 4 else int(model_name[3])
 35 |         sim_options = {'user_based': user_based, 'name': dis_method}
 36 |         if knn_model_name == 'KNNBasic':
 37 |             algo = KNNBasic(sim_options=sim_options, k=k)
 38 |         elif knn_model_name == 'KNNWithMeans':
 39 |             algo = KNNWithMeans(sim_options=sim_options, k=k)
 40 |         elif knn_model_name == 'KNNWithZScore':
 41 |             algo = KNNWithZScore(sim_options=sim_options, k=k)
 42 |     elif 'SVDpp' in model_name or 'SVD' in model_name or 'NMF' in model_name:
 43 |         model_name = model_name.split('_')
 44 |         n_factors = 25 if len(model_name) == 1 else int(model_name[1])
 45 |         if model_name[0] == 'SVDpp':
 46 |             algo = SVDpp(n_factors=n_factors)
 47 |         elif model_name[0] == 'SVD':
 48 |             algo = SVD(n_factors=n_factors)
 49 |         elif model_name[0] == 'NMF':
 50 |             algo = NMF(n_factors=n_factors)
 51 |     return algo
 52 | 
 53 | 
 54 | def get_model_old(model_name):
 55 |     algo = None
 56 |     if model_name == 'KNNBasic_U':
 57 |         sim_options = {'user_based': True}
 58 |         algo = KNNBasic(sim_options=sim_options, k=20)
 59 |     elif model_name == 'KNNBasic_I':
 60 |         sim_options = {'user_based': False}
 61 |         algo = KNNBasic(sim_options=sim_options, k=20)
 62 |         # algo = KNNBasic()
 63 |     elif model_name == 'KNNWithMeans_I':
 64 |         algo = KNNWithMeans(sim_options={'user_based': False}, k=20)
 65 |     elif model_name == 'KNNWithMeans_U':
 66 |         algo = KNNWithMeans(sim_options={'user_based': True}, k=20)
 67 |     elif model_name == 'KNNWithZScore_I':
 68 |         algo = KNNWithZScore(sim_options={'user_based': False}, k=20)
 69 |     elif model_name == 'KNNWithZScore_U':
 70 |         algo = KNNWithZScore(sim_options={'user_based': True}, k=20)
 71 |     elif model_name == 'SVDpp':
 72 |         algo = SVDpp()
 73 |     elif model_name == 'SVD':
 74 |         algo = SVD()
 75 |     elif model_name == 'NMF':
 76 |         algo = NMF()
 77 |     elif 'NMF_' in model_name:
 78 |         n_factors = int(model_name.split("_")[1])
 79 |         algo = NMF(n_factors=n_factors)
 80 |     elif 'SVDpp_' in model_name:
 81 |         n_factors = int(model_name.split("_")[1])
 82 |         algo = SVDpp(n_factors=n_factors)
 83 |     elif 'SVD_' in model_name:
 84 |         n_factors = int(model_name.split("_")[1])
 85 |         algo = SVD(n_factors=n_factors)
 86 |     elif 'KNNBasic_U_' in model_name:
 87 |         k = int(model_name.split("_")[-1])
 88 |         sim_options = {'user_based': True}
 89 |         algo = KNNBasic(sim_options=sim_options, k=k)
 90 |     elif 'KNNBasic_I_' in model_name:
 91 |         k = int(model_name.split("_")[-1])
 92 |         sim_options = {'user_based': False}
 93 |         algo = KNNBasic(sim_options=sim_options, k=k)
 94 |     return algo
 95 | 
 96 | 
 97 | def basic_rec(model_name, train_path, test_path, target_id):
 98 |     # build data
 99 |     # TODO check float and min_r
100 |     reader = Reader(line_format='user item rating', sep='\t', rating_scale=(1, 5))
101 |     data = Dataset.load_from_folds([(train_path, test_path)], reader=reader)
102 |     trainset, testset = None, None
103 |     pkf = PredefinedKFold()
104 |     for trainset_, testset_ in pkf.split(data):
105 |         trainset, testset = trainset_, testset_
106 | 
107 |     # train model
108 |     rec_algo = get_model(model_name)
109 |     rec_algo.fit(trainset)
110 |     # eval
111 |     preds = rec_algo.test(testset)
112 |     rmse = accuracy.rmse(preds, verbose=True)
113 | 
114 |     # predor target
115 |     fn_pred = lambda uid: rec_algo.predict(str(uid), str(target_id), r_ui=0).est
116 |     target_predictions = list(map(fn_pred, range(trainset.n_users)))
117 | 
118 |     # topn
119 |     testset = trainset.build_anti_testset()
120 |     predictions = rec_algo.test(testset)
121 |     top_n = get_top_n(predictions, n=50)
122 | 
123 |     hit_ratios = {}
124 |     for uid, user_ratings in top_n.items():
125 |         topN = [int(iid) for (iid, _) in user_ratings]
126 |         hits = [1 if target_id in topN[:i] else 0 for i in [1, 3, 5, 10, 20, 50]]
127 |         hit_ratios[int(uid)] = hits
128 |     return target_predictions, hit_ratios
129 | 


--------------------------------------------------------------------------------
/AUSH/test_main/main_gan_attack_baseline.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/24 11:08
  3 | # @Author     : chensi
  4 | # @File       : main_gan_attack_baseline.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | import sys
  9 | 
 10 | sys.path.append("../")
 11 | import os, argparse
 12 | from utils.load_data.load_data import load_data
 13 | from model.attack_model.gan_attack.trainer_baseline import Train_G_Attacker
 14 | from utils.load_data.load_attack_info import load_attack_info
 15 | from utils.attack.data_to_file import *
 16 | import numpy as np
 17 | 
 18 | 
 19 | # os.environ["CUDA_VISIBLE_DEVICES"] = '2'
 20 | 
 21 | 
 22 | def gan_attack(data_set_name, attack_method, target_id, is_train, write_to_file=1, final_attack_setting=None):
 23 |     path_train = '../data/data/' + data_set_name + '_train.dat'
 24 |     path_test = '../data/data/' + data_set_name + '_test.dat'
 25 |     attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
 26 |                         "../data/data/" + data_set_name + "_target_users"]
 27 | 
 28 |     attack_info = load_attack_info(*attack_info_path)
 29 |     dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
 30 |                               sep='\t', print_log=True)
 31 | 
 32 |     if len(attack_method.split('_')[1:]) == 2:
 33 |         attack_num, filler_num = map(int, attack_method.split('_')[1:])
 34 |         filler_method = 0
 35 |     else:
 36 |         attack_num, filler_num, filler_method = map(int, attack_method.split('_')[1:])
 37 | 
 38 |     loss_setting = int(attack_method.split('_')[0][-1])
 39 |     selected_items = attack_info[target_id][0]
 40 |     model_path = "../result/model_ckpt/" + '_'.join([data_set_name, attack_method, str(target_id)]) + ".ckpt"
 41 | 
 42 |     #
 43 |     gan_attacker = Train_G_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
 44 |                                     selected_id_list=selected_items,
 45 |                                     filler_num=filler_num, attack_num=attack_num, filler_method=filler_method,
 46 |                                     loss_setting=loss_setting)
 47 |     # if is_train:
 48 |     #     fake_profiles = gan_attacker.execute(is_train=True, model_path=model_path)
 49 |     # else:
 50 |     #     fake_profiles, real_profiles = gan_attacker.execute(is_train=False, model_path=model_path)
 51 |     #     if write_to_file == 0:
 52 |     #         return fake_profiles, real_profiles
 53 |     fake_profiles, real_profiles, filler_indicator = gan_attacker.execute(is_train=is_train, model_path=model_path,
 54 |                                                                           final_attack_setting=final_attack_setting)
 55 |     gan_attacker.sess.close()
 56 |     # """inject and write to file"""
 57 |     if write_to_file == 1:
 58 |         dst_path = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat"
 59 |         attacked_file_writer(path_train, dst_path, fake_profiles, dataset_class.n_users)
 60 |     return fake_profiles, real_profiles, filler_indicator
 61 | 
 62 | 
 63 | def parse_arg():
 64 |     parser = argparse.ArgumentParser()
 65 | 
 66 |     parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/grocery')
 67 | 
 68 |     # filmTrust:random = [5, 395, 181, 565, 254]    tail = [601, 623, 619, 64, 558]
 69 |     # ml100k:random = [62, 1077, 785, 1419, 1257]   tail = [1319, 1612, 1509, 1545, 1373]
 70 |     # 5,395,181,565,254,601,623,619,64,558
 71 |     # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
 72 |     # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
 73 |     # 88,22,122,339,1431,1141,1656,477,1089,866
 74 |     parser.add_argument('--target_ids', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866',
 75 |                         help='attack target list')
 76 | 
 77 |     parser.add_argument('--attack_num', type=int, default=50,
 78 |                         help='num of attack fake user,50 for ml100k and filmTrust')
 79 | 
 80 |     parser.add_argument('--filler_num', type=int, default=4,
 81 |                         help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
 82 | 
 83 |     parser.add_argument('--filler_method', type=str, default='', help='0/1/2/3')
 84 | 
 85 |     parser.add_argument('--write_to_file', type=int, default=1, help='write to fake profile to file or return array')
 86 | 
 87 |     parser.add_argument('--loss', type=int, default=1, help='0:reconstruction,1:reconstruction+seed')
 88 |     #
 89 |     args = parser.parse_args()
 90 |     #
 91 |     args.target_ids = list(map(int, args.target_ids.split(',')))
 92 |     return args
 93 | 
 94 | 
 95 | if __name__ == '__main__':
 96 |     """parse args"""
 97 |     args = parse_arg()
 98 |     """train"""
 99 |     is_train = 1
100 |     attack_method = '_'.join(
101 |         ['G' + str(args.loss), str(args.attack_num), str(args.filler_num), str(args.filler_method)]).strip('_')
102 |     #
103 |     for target_id in args.target_ids:
104 | 
105 |         attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id]))
106 |         attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
107 |         real_profiles, filler_indicator = np.load(attackSetting_path + '.npy')
108 |         final_attack_setting = [args.attack_num, real_profiles, filler_indicator]
109 | 
110 | 
111 |         _ = gan_attack(args.dataset, attack_method, target_id, is_train,
112 |                        write_to_file=args.write_to_file,
113 |                        final_attack_setting=final_attack_setting)
114 | 
115 |     # gan_attack(args.dataset, attack_method, args.target_id, is_train, write_to_file=args.write_to_file)
116 | 


--------------------------------------------------------------------------------
/data/automotive/automotive_target_users:
--------------------------------------------------------------------------------
 1 | 22	2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,52,2731,2732,2733,2736,57,2737,2738,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,2739,1750,855,473,2740,2521,1885,2269,2743,2546,2547,2674,1270,2746,2427,2172,2747,2174
 2 | 88	1043,2586,2591,547,38,1578,52,2103,59,1600,579,2635,85,101,2661,1131,2668,1140,1157,1670,2695,649,2700,1176,677,683,1708,2735,2743,186,191,2755,712,1758,741,1253,1255,239,244,2814,1791,2824,2825,266,272,1808,1298,2834,280,1820,2845,2849,2850,2851,2852,2853,2854,2855,2359,825,1849,318,1863,334,846,2384,849,2388,2394,872,892,2469,1965,950,1980,2502,2529,1510,1514,502
 3 | 119	28,101,272,288,301,316,341,378,449,532,659,663,698,705,731,744,958,1076,1104,1172,1292,1313,1323,1341,1465,1469,1473,1488,1573,1644,1758,1893,1958,1975,1978,2082,2164,2166,2191,2235,2338,2389,2535,2544,2545,2546,2548,2549,2550,2551
 4 | 122	521,1547,2584,25,2586,2587,1052,28,1572,2603,1076,57,1089,580,2629,1094,1097,589,79,2644,96,2155,1644,2162,2675,2167,2172,1664,645,2181,2182,1672,655,1168,1689,1178,2717,1697,2721,2723,2724,2727,1704,2729,2731,1709,175,2736,2737,2738,179,180,2739,2740,2741,2749,2752,2241,198,2246,1225,725,221,2269,2282,238,1777,266,1303,296,1839,310,2362,2882,2889,842,331,2901,855,1879,2903,1887,2917,1895,2922,877,2427,900,1930,1931,2473,1972,1977,2493,461,2005,473,475,2533,1515,2546,2547,1528,1023
 5 | 339	769,255,1032,10,1291,145,533,1302,1048,161,1313,1314,1315,1316,1317,295,296,1318,1319,43,684,1320,1321,1322,1323,305,1325,1326,1327,1328,1329,311,1330,1331,1332,571,1333,1334,1214,1335,1336,1337,1338,195,835,1340,1341,1342,1343,1344,1345,1346,1347,1229,1350,1351,1352,1339,1353,1354,1355,347,608,613,102,1254,361,754,1142,889,1147,508,1348
 6 | 422	67,77,96,97,99,105,121,128,141,157,171,220,229,232,236,250,271,272,273,297,300,349,358,369,384,390,395,402,403,449,467,484,529,635,663,675,684,690,706,727,730,732,762,763,764,767,776,797,855,857,861,862,896,903,909,913,933,1012,1030,1036,1069,1077,1080,1106,1114,1124,1164,1193,1276,1291,1294,1307,1332,1333,1339,1347,1352,1383,1385,1386,1391,1410,1454,1473,1480,1494,1562,1579,1580,1593,1640,1643,1665,1670,1704,1710,1841,1845,1857,1876,1916,1920,1926,1956,1982,1997,2001,2010,2014,2037,2039,2050,2057,2086,2094,2124,2137,2153,2208,2236,2269,2270,2271,2272,2273,2274,2275,2276,2277,2279,2280,2282,2283,2284,2285,2286,2287,2288,2289,2290,2291,2292,2293,2294,2295,2296,2297,2298,2299,2301,2302
 7 | 477	2437,1032,2572,1804,2190,1685,406,1046,1691,2715,1693,417,2338,419,548,1573,1955,937,1321,1325,1070,2733,2734,1970,563,308,1075,1971,185,314,2105,1340,1983,1474,2755,1733,967,1875,1109,2005,2006,2390,345,2521,2778,2141,1889,1507,612,2019,2278,1511,2661,2664,508,366,2159,1649,114,242,886,2550,2300,1662,2559
 8 | 594	52,59,85,101,191,239,244,266,272,280,318,334,502,547,649,677,683,712,741,846,849,872,892,950,1043,1131,1140,1176,1253,1255,1298,1510,1514,1578,1600,1670,1708,1758,1791,1808,1820,1849,1965,2103,2359,2384,2388,2394,2469,2502,2529,2586,2591,2635,2661,2668,2700,2735,2743,2755,2814,2824,2834,2845,2849,2850,2851,2852,2853,2854,2855
 9 | 866	1537,1411,1415,136,1416,1417,398,1424,1937,915,1555,1173,1429,1939,2838,2842,1435,1440,1696,1443,1444,1445,1448,1066,300,1455,1712,433,1715,185,1465,1595,2874,1725,2875,1471,1727,1090,67,1475,1731,1479,2119,2504,2634,75,203,205,206,2123,2760,2765,1363,212,1367,1368,1499,221,1123,2406,2409,2157,1902,2030,1904,2158,1906,1395,2034,2037,1398,2673,1656,1402,1404,1405
10 | 884	25,28,57,79,175,179,180,198,221,238,266,296,310,331,461,473,475,580,589,645,655,725,842,855,877,900,1052,1076,1089,1094,1097,1168,1178,1225,1303,1515,1528,1547,1572,1644,1672,1689,1697,1704,1709,1777,1839,1887,1895,1930,1931,1972,1977,2005,2155,2162,2167,2172,2181,2182,2241,2246,2269,2282,2362,2427,2473,2493,2533,2546,2547,2584,2587,2603,2629,2644,2675,2717,2721,2723,2724,2727,2729,2731,2736,2737,2738,2739,2740,2741,2749,2752,2882,2889,2901,2903,2917,2922
11 | 1089	2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,52,2731,2732,2733,2736,57,2737,2738,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,2739,1750,855,473,2740,2521,1885,2269,2743,2546,2547,2674,1270,2746,2427,2172,2747,2174
12 | 1141	2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,2731,2732,2733,2736,2737,57,2738,2739,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,1750,855,473,2740,2521,1885,2269,2546,2547,2674,1270,2746,2427,2172,2747,2174
13 | 1431	770,3,1926,2569,2570,2571,2572,909,2573,2574,2575,1553,914,1943,1048,2457,153,27,1531,797,2465,1315,2467,2086,297,555,1580,1326,1711,1328,435,564,1331,1207,952,2492,195,835,1220,1347,2501,2120,1353,1098,1994,2250,2377,1230,211,1235,1237,726,1878,2009,220,2271,2274,874,2283,2285,367,754,371,1268,1653,2291,2292,2296,505,2299,764,893,2302
14 | 1593	2,3,69,95,132,185,193,201,203,205,210,212,216,217,220,221,235,253,297,353,395,398,399,433,436,438,447,454,500,545,552,640,775,838,1161,1219,1279,1365,1374,1376,1378,1388,1408,1413,1414,1427,1431,1438,1441,1456,1467,1479,1486,1551,1558,1591,1592,1593,1635,1636,1638,1696,1710,1719,1901,1904,1905,1912,1918,1924,1926,1992,2032,2037,2043,2087,2122,2127,2147,2148,2150,2151,2156,2160,2344,2410,2412,2413,2499,2503,2581,2633,2673,2818,2838
15 | 1656	2,3,1551,2581,1558,545,2087,552,1591,1592,1593,69,2633,2122,2127,95,1635,1636,2147,1638,2148,2150,2151,2156,2160,2673,640,132,1161,1696,1710,1719,185,193,1219,201,203,205,210,212,216,217,220,235,253,1279,2818,775,2838,2344,297,838,1365,1374,1376,353,1378,2410,1388,1901,2412,2413,1904,1905,1912,1918,1408,1924,1413,1414,1926,1417,395,398,399,1427,1431,1438,1441,1456,433,436,438,1467,447,2499,454,1479,1992,2503,1486,2032,500,2037,2043


--------------------------------------------------------------------------------
/AUSH/model/attack_model/gan_attack_copy/models.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2020/9/18 13:52
  3 | # @Author     : chensi
  4 | # @File       : models.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | try:
  9 |     import tensorflow.compat.v1 as tf
 10 | 
 11 |     tf.disable_v2_behavior()
 12 | except:
 13 |     import tensorflow as tf
 14 | import math
 15 | 
 16 | 
 17 | # import math
 18 | class CopyGanAttacker:
 19 |     def __init__(self, dataset_class, target_id, filler_num, attack_num, filler_method):
 20 |         # data set info
 21 |         self.dataset_class = dataset_class
 22 |         self.num_user = dataset_class.n_users
 23 |         self.num_item = dataset_class.n_items
 24 |         self.rating_matrix = dataset_class.train_matrix.toarray()  # tf.constant()
 25 | 
 26 |         # attack info
 27 |         self.target_id = target_id
 28 |         self.filler_num = filler_num
 29 |         self.attack_num = attack_num
 30 |         self.filler_method = filler_method
 31 | 
 32 |     def build_model(self):
 33 |         # define place_holder
 34 |         # self.user_vector = tf.placeholder(tf.int32, [None, self.num_item])
 35 |         # self.item_vector = tf.placeholder(tf.int32, [None, self.num_item])
 36 |         self.sampled_template = tf.placeholder(tf.int32, [self.args.batch_size, self.num_item])
 37 |         self.batch_filler_index = tf.placeholder(tf.int32, [None, self.args.batch_size])
 38 |         # user/item embedding
 39 |         # c = tf.constant(c)
 40 |         user_embedding = self.towerMlp(self.rating_matrix, self.num_item, self.args.embedding_dim)
 41 |         item_embedding = self.towerMlp(self.rating_matrix.transpose(), self.num_user, self.args.embedding_dim)
 42 | 
 43 |         """
 44 |         copy net  
 45 |         p_copy(j)=sigmoid (w x j’s item embedding + w x u’s user embedding + b)"""
 46 |         with tf.name_scope("copyNet"):
 47 |             w1 = tf.get_variable('w1', [self.args.embedding_dim, self.num_item])
 48 |             p1 = tf.matmul(tf.nn.embedding_lookup(user_embedding, self.batch_filler_index), w1)  # batch*item_num
 49 |             w2 = tf.get_variable('w2', [self.args.embedding_dim, 1])
 50 |             p2 = tf.matmul(item_embedding, w2)  # item_num*1
 51 |             b = tf.get_variable('b', [self.item_num])
 52 |             copy_prob = tf.nn.sigmoid(p1 + p2 + b)  # batch*item_num
 53 |         """
 54 |         generate net
 55 |         p_gen(j=r)
 56 |         """
 57 |         with tf.name_scope("genNet"):
 58 |             gen_probabilitiy_list = []
 59 |             for i in range(5):
 60 |                 with tf.name_scope("s_%d" % i):
 61 |                     w1 = tf.get_variable('w1', [self.args.embedding_dim, self.num_item])
 62 |                     p1 = tf.matmul(tf.nn.embedding_lookup(user_embedding, self.batch_filler_index),
 63 |                                    w1)  # batch*item_num
 64 |                     w2 = tf.get_variable('w2', [self.args.embedding_dim, 1])
 65 |                     p2 = tf.matmul(item_embedding, w2)  # item_num*1
 66 |                     b = tf.get_variable('b', [self.item_num])
 67 |                     gen_probability = p1 + p2 + b
 68 |                     gen_probabilitiy_list.append(tf.expand_dims(gen_probability, 2))  # batch*item_num*1
 69 |             gen_rating_distri = tf.nn.softmax(tf.concat(gen_probabilitiy_list, axis=2))  # batch*item_num*5
 70 |         """
 71 |         Rating
 72 |         rating p(r) = p_copy(j) x p_copy(j=r) + (1-p_copy(j)) x p_gen(j=r)
 73 |         """
 74 |         copy_rating_distri = tf.reshape(tf.expand_dims(tf.one_hot(self.sampled_template, 5), 3),
 75 |                                         [self.args.batch_size, -1, 5])
 76 |         rating_distri = copy_prob * copy_rating_distri + (1 - copy_prob) * gen_rating_distri  # batch*item_num*5
 77 |         rating_value = tf.tile(tf.constant([[[1., 2., 3., 4., 5.]]]), [self.args.batch_size, self.num_item, 1])
 78 |         fake_profiles = tf.reduce_sum(rating_distri * rating_value, 2)
 79 | 
 80 |         """
 81 |         loss function
 82 |         """
 83 |         with tf.name_scope("Discriminator"):
 84 |             D_real = self.towerMlp(self.sampled_template, self.num_item, 1)
 85 |             D_fake = self.towerMlp(fake_profiles, self.num_item, 1)
 86 | 
 87 |         """
 88 |         loss function
 89 |         """
 90 |         with tf.name_scope("loss_D"):
 91 |             d_loss_real = tf.reduce_mean(
 92 |                 tf.nn.sigmoid_cross_entropy_with_logits(logits=D_real, labels=tf.ones_like(D_real)),
 93 |                 name="loss_real")
 94 |             d_loss_fake = tf.reduce_mean(
 95 |                 tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.zeros_like(D_fake)),
 96 |                 name="loss_fake")
 97 |             loss_D = d_loss_real + d_loss_fake
 98 |         with tf.name_scope("loss_G"):
 99 |             # reconstruction loss
100 |             loss_rec = tf.reduce_mean(tf.square(fake_profiles - self.sampled_template))
101 |             # adversial loss
102 |             loss_adv = tf.reduce_mean(
103 |                 tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.ones_like(D_fake)))
104 |             loss_G = loss_rec + loss_adv
105 | 
106 |     def towerMlp(self, input, inputDim, outputDim):
107 |         dim, x = inputDim // 2, input
108 |         while dim > outputDim:
109 |             layer = tf.layers.dense(
110 |                 inputs=x,
111 |                 units=dim,
112 |                 kernel_initializer=tf.random_normal_initializer,
113 |                 activation=tf.nn.relu,
114 |                 kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate))
115 |             dim, x = dim // 2, layer
116 |         output = tf.layers.dense(
117 |             inputs=x,
118 |             units=outputDim,
119 |             kernel_initializer=tf.random_normal_initializer,
120 |             activation=tf.nn.sigmoid,
121 |             kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate))
122 |         return output
123 | 


--------------------------------------------------------------------------------
/AUSH/test_main/main_eval_attack.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/24 10:05
  3 | # @Author     : chensi
  4 | # @File       : main_eval_attack.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | import sys, argparse
  8 | import numpy as np
  9 | import pandas as pd
 10 | 
 11 | sys.path.append("../")
 12 | from utils.load_data.load_data import load_data
 13 | from utils.load_data.load_attack_info import *
 14 | 
 15 | 
 16 | def attack_evaluate(real_preds_path, attacked_preds_file, non_rated_users, target_users):
 17 |     #
 18 |     names = ['uid', 'rating', 'HR_1', 'HR_3', 'HR_5', 'HR_10', 'HR_20', 'HR_50']
 19 |     real_preds = pd.read_csv(real_preds_path, sep='\t', names=names, engine='python')
 20 |     attacked_preds = pd.read_csv(attacked_preds_file, sep='\t', names=names, engine='python')
 21 |     # pred
 22 |     shift_target = np.mean(attacked_preds.iloc[target_users, 1].values - real_preds.iloc[target_users, 1].values)
 23 |     shift_all = np.mean(attacked_preds.iloc[non_rated_users, 1].values - real_preds.iloc[non_rated_users, 1].values)
 24 |     #
 25 |     HR_real_target = real_preds.iloc[target_users, range(2, 8)].mean().values
 26 |     HR_real_all = real_preds.iloc[non_rated_users, range(2, 8)].mean().values
 27 | 
 28 |     HR_attacked_target = attacked_preds.iloc[target_users, range(2, 8)].mean().values
 29 |     HR_attacked_all = attacked_preds.iloc[non_rated_users, range(2, 8)].mean().values
 30 |     return shift_target, HR_real_target, HR_attacked_target, shift_all, HR_real_all, HR_attacked_all
 31 | 
 32 | 
 33 | def eval_attack(data_set_name, rec_model_name, attack_method, target_id):
 34 |     dir = "../result/pred_result/"
 35 |     real_preds_path = dir + '_'.join([rec_model_name, data_set_name, str(target_id)])
 36 |     attacked_preds_file = real_preds_path + "_" + attack_method
 37 |     """
 38 |     ml100k
 39 |     """
 40 |     if data_set_name == 'ml100k':
 41 |         path_train = "../data/data/ml100k_train.dat"
 42 |         path_test = "../data/data/ml100k_test.dat"
 43 |         attack_info_path = ["../data/data/ml100k_selected_items", "../data/data/ml100k_target_users"]
 44 |     elif data_set_name == 'filmTrust':
 45 |         path_train = "../data/data/filmTrust_train.dat"
 46 |         path_test = "../data/data/filmTrust_test.dat"
 47 |         attack_info_path = ["../data/data/filmTrust_selected_items", "../data/data/filmTrust_target_users"]
 48 | 
 49 |     else:
 50 |         path_train = "../data/data/" + data_set_name + "_train.dat"
 51 |         path_test = "../data/data/" + data_set_name + "_test.dat"
 52 |         attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
 53 |                             "../data/data/" + data_set_name + "_target_users"]
 54 | 
 55 |     attack_info = load_attack_info(*attack_info_path)
 56 |     dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
 57 |                               sep='\t', print_log=False)
 58 | 
 59 |     #
 60 |     target_users = attack_info[target_id][1]
 61 |     non_rated_users = dataset_class.get_item_nonrated_users(target_id)
 62 |     #
 63 |     res = attack_evaluate(real_preds_path, attacked_preds_file, non_rated_users, target_users)
 64 |     #
 65 |     target, all = res[:3], res[3:]
 66 |     target_str = '\t'.join([str(target[0]), '\t'.join(map(str, target[1])), '\t'.join(map(str, target[2]))])
 67 |     all_str = '\t'.join([str(all[0]), '\t'.join(map(str, all[1])), '\t'.join(map(str, all[2]))])
 68 | 
 69 |     # info
 70 |     info = '\t'.join([rec_model_name, attack_method, str(target_id)])
 71 |     # print(info + '\t' + target_str + '\t' + all_str)
 72 |     return info + '\t' + target_str + '\t' + all_str
 73 | 
 74 | 
 75 | def parse_arg():
 76 |     parser = argparse.ArgumentParser()
 77 | 
 78 |     parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/office')
 79 | 
 80 |     parser.add_argument('--attack_num', type=int, default=50, help='50 for ml100k and filmTrust')
 81 | 
 82 |     parser.add_argument('--filler_num', type=int, default=4, help='90 for ml100k,36 for filmTrust')
 83 | 
 84 |     parser.add_argument('--attack_methods', type=str, default='G0,G1',
 85 |                         help='gan,G0,G1,segment,average,random,bandwagon')
 86 | 
 87 |     parser.add_argument('--rec_model_names', type=str, default='NNMF,IAutoRec,UAutoRec,NMF_25',
 88 |                         help='NNMF,IAutoRec,UAutoRec,NMF_25')
 89 | 
 90 |     # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5
 91 |     # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5
 92 |     # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
 93 |     # 88,22,122,339,1431,1141,1656,477,1089,866
 94 |     parser.add_argument('--target_ids', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866',
 95 |                         help='target_id')
 96 | 
 97 |     #
 98 |     args = parser.parse_args()
 99 |     #
100 |     args.attack_methods = args.attack_methods.split(',')
101 |     args.rec_model_names = args.rec_model_names.split(',')
102 |     args.target_ids = list(map(int, args.target_ids.split(',')))
103 |     return args
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     """parse args"""
108 |     args = parse_arg()
109 |     """eval"""
110 |     result = []
111 | 
112 |     for attack_method in args.attack_methods:
113 |         for rec_model_name in args.rec_model_names:
114 |             for target_id in args.target_ids:
115 |                 attack_method_ = '_'.join([attack_method, str(args.attack_num), str(args.filler_num)])
116 |                 try:
117 |                     result_ = eval_attack(args.dataset, rec_model_name, attack_method_, target_id)
118 |                     result.append(result_.split('\t'))
119 |                 except:
120 |                     print(attack_method, rec_model_name, target_id)
121 | 
122 |     result = np.array(result).transpose()
123 |     result = pd.DataFrame(dict(zip(range(result.shape[0]), result)))
124 |     result.to_excel(args.dataset + '_performance_all.xls', index=False)
125 | 


--------------------------------------------------------------------------------
/AUSH/test_main/data_preprocess.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:utf-8 -*-
  3 | # author:ariaschen
  4 | # datetime:2020/1/12 16:11
  5 | # software: PyCharm
  6 | 
  7 | import itertools, gzip
  8 | import pandas as pd
  9 | from utils.load_data.load_data import *
 10 | from sklearn.model_selection import train_test_split
 11 | 
 12 | 
 13 | def parse(path):
 14 |     g = gzip.open(path, 'rb')
 15 |     for l in g:
 16 |         yield eval(l)
 17 | 
 18 | 
 19 | def getDF(path):
 20 |     i = 0
 21 |     df = {}
 22 |     for d in parse(path):
 23 |         df[i] = d
 24 |         i += 1
 25 |     return pd.DataFrame.from_dict(df, orient='index')
 26 | 
 27 | 
 28 | def data_preprocess(data_set, gz_path):
 29 |     data = getDF(gz_path)[['reviewerID', 'asin', 'overall']]
 30 |     data.columns = ['uid', 'iid', 'rating']
 31 | 
 32 |     uids, iids = data.uid.unique(), data.iid.unique()
 33 |     n_uids, n_iids, n_ratings = len(uids), len(iids), data.shape[0]
 34 |     print('User num:', n_uids, '\tItem num:', n_iids, '\tRating num:', n_ratings, '\t Sparsity :', n_ratings / (n_iids * n_uids))
 35 |     print('Number of ratings per user:', n_ratings / n_uids)
 36 | 
 37 |     uid_update = dict(zip(uids, range(n_uids)))
 38 |     iid_update = dict(zip(iids, range(n_iids)))
 39 | 
 40 |     data.uid = data.uid.apply(lambda x: uid_update[x])
 41 |     data.iid = data.iid.apply(lambda x: iid_update[x])
 42 | 
 43 |     train_idxs, test_idxs = train_test_split(list(range(n_ratings)), test_size=0.1)
 44 | 
 45 |     train_data = data.iloc[train_idxs]
 46 |     test_data = data.iloc[test_idxs]
 47 |     path_train = "../data/data/" + data_set + "_train.dat"
 48 |     path_test = "../data/data/" + data_set + "_test.dat"
 49 |     train_data.to_csv(path_train, index=False, header=None, sep='\t')
 50 |     test_data.to_csv(path_test, index=False, header=None, sep='\t')
 51 |     np.save("../data/data/" + data_set + "_id_update", [uid_update, iid_update])
 52 | 
 53 | 
 54 | def exp_select(data_set, target_items, selected_num, target_user_num):
 55 |     path_test = "../data/data/" + data_set + "_test.dat"
 56 |     path_train = "../data/data/" + data_set + "_train.dat"
 57 |     dataset_class = load_data(path_train=path_train, path_test=path_test,
 58 |                               header=['user_id', 'item_id', 'rating'],
 59 |                               sep='\t', print_log=True)
 60 | 
 61 |     item_pops = dataset_class.get_item_pop()
 62 | 
 63 |     items_sorted = np.array(item_pops).argsort()[::-1]
 64 | 
 65 |     bandwagon_selected = items_sorted[:selected_num]
 66 |     print('bandwagon_selected:', bandwagon_selected)
 67 | 
 68 | 
 69 |     threshold = dataset_class.test_data.rating.mean()
 70 |     threshold = threshold if threshold < 3 else 3.0
 71 |     print('threshold:', threshold)
 72 |     selected_candidates = items_sorted[:20]
 73 | 
 74 |     selected_candidates = list(itertools.combinations(selected_candidates, selected_num))
 75 | 
 76 |     result = {}
 77 |     target_items = [j for i in range(2, 10) for j in
 78 |                     items_sorted[i * len(items_sorted) // 10:(i * len(items_sorted) // 10) + 2]][::-1]
 79 |     target_items = list(
 80 |         np.random.choice([i for i in range(len(item_pops)) if item_pops[i] == 3], 4, replace=False)) + target_items
 81 |     print('target_items:', target_items)
 82 |     print('number of ratings:', [item_pops[i] for i in target_items])
 83 |     for target in target_items:
 84 |         target_rated = set(dataset_class.train_data[dataset_class.train_data.item_id == target].user_id.values)
 85 |         data_tmp = dataset_class.train_data[~dataset_class.train_data.user_id.isin(target_rated)].copy()
 86 |         data_tmp = data_tmp[data_tmp.rating >= threshold]
 87 |         np.random.shuffle(selected_candidates)
 88 | 
 89 |         for selected_items in selected_candidates:
 90 |             target_users = data_tmp[data_tmp.item_id.isin(selected_items)].groupby(
 91 |                 'user_id').size()
 92 | 
 93 |             if target_users[(target_users == selected_num)].shape[0] >= target_user_num:
 94 |                 target_users = sorted(target_users[(target_users == selected_num)].index)
 95 |                 result[target] = [sorted(selected_items), target_users]
 96 |                 print('target:', target)
 97 |                 break
 98 | 
 99 |         if target not in result:
100 |             for selected_items in selected_candidates:
101 | 
102 |                 target_users = data_tmp[data_tmp.item_id.isin(selected_items)].groupby(
103 |                     'user_id').size()
104 |                 target_users = sorted(dict(target_users).items(), key=lambda x: x[1], reverse=True)
105 |                 min = target_users[target_user_num][1]
106 |                 target_users = [i[0] for i in target_users[:target_user_num] if i[1] > selected_num // 2]
107 |                 if len(target_users) >= target_user_num:
108 |                     result[target] = [sorted(selected_items), sorted(target_users)]
109 |                     print('target:', target, 'min rated selected item num：', min)
110 |                     break
111 | 
112 |         if target not in result:
113 |             print('target:', target, 'non-targeted user')
114 |             a = 1
115 | 
116 |     key = list(result.keys())
117 |     selected_items = [','.join(map(str, result[k][0])) for k in key]
118 |     target_users = [','.join(map(str, result[k][1])) for k in key]
119 |     selected_items = pd.DataFrame(dict(zip(['id', 'selected_items'], [key, selected_items])))
120 |     target_users = pd.DataFrame(dict(zip(['id', 'target_users'], [key, target_users])))
121 |     selected_items.to_csv("../data/data/" + data_set + '_selected_items', index=False, header=None, sep='\t')
122 |     target_users.to_csv("../data/data/" + data_set + '_target_users', index=False, header=None, sep='\t')
123 | 
124 | 
125 | if __name__ == '__main__':
126 |     data_set = 'office'
127 |     gz_path = 'C:\\Users\\ariaschen\\Downloads\\reviews_Office_Products_5.json.gz'
128 |     # data_set = 'automotive'
129 |     # gz_path = 'C:\\Users\\ariaschen\\Downloads\\reviews_Automotive_5.json.gz'
130 |     # data_set = 'grocery'
131 |     # gz_path = "../data/new_raw_data/reviews_Grocery_and_Gourmet_Food_5.json.gz"
132 | 
133 | 
134 |     data_preprocess(data_set, gz_path)
135 | 
136 |     target_items = None
137 | 
138 |     exp_select(data_set, target_items, selected_num=2, target_user_num=30)
139 | 


--------------------------------------------------------------------------------
/AUSH/test_main/main_baseline_attack.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/23 11:49
  3 | # @Author     : chensi
  4 | # @File       : main_attack_baseline.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | import sys, argparse
  9 | 
 10 | sys.path.append("../")
 11 | from utils.load_data.load_data import *
 12 | from utils.load_data.load_attack_info import *
 13 | from model.attack_model.baseline import *
 14 | from utils.attack.data_to_file import *
 15 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
 16 | 
 17 | 
 18 | def get_data(data_set_name):
 19 |     path_train = '../data/data/' + data_set_name + '_train.dat'
 20 |     path_test = '../data/data/' + data_set_name + '_test.dat'
 21 |     dataset_class = load_data(path_train=path_train, path_test=path_test,
 22 |                               header=['user_id', 'item_id', 'rating'],
 23 |                               sep='\t', print_log=False)
 24 |     attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
 25 |                         "../data/data/" + data_set_name + "_target_users"]
 26 |     attack_info = load_attack_info(*attack_info_path)
 27 |     return dataset_class, attack_info
 28 | 
 29 | 
 30 | def baseline_attack(dataset_class, attack_info, attack_method, target_id, bandwagon_selected,
 31 |                     fixed_filler_indicator=None):
 32 |     """load data"""
 33 |     selected_ids, target_users = attack_info[target_id]
 34 |     attack_model, attack_num, filler_num = attack_method.split('_')
 35 |     attack_num, filler_num = int(attack_num), int(filler_num)
 36 | 
 37 |     """attack class"""
 38 |     global_mean, global_std, item_means, item_stds = dataset_class.get_all_mean_std()
 39 |     baseline_attacker = BaselineAttack(attack_num, filler_num, dataset_class.n_items, target_id,
 40 |                                        global_mean, global_std, item_means, item_stds, 5.0, 1.0,
 41 |                                        fixed_filler_indicator=fixed_filler_indicator)
 42 |     # fake profile array
 43 |     fake_profiles = None
 44 |     if attack_model == "random":
 45 |         fake_profiles = baseline_attacker.RandomAttack()
 46 |     elif attack_model == "bandwagon":
 47 |         fake_profiles = baseline_attacker.BandwagonAttack(bandwagon_selected)
 48 |     elif attack_model == "average":
 49 |         fake_profiles = baseline_attacker.AverageAttack()
 50 |     elif attack_model == "segment":
 51 |         fake_profiles = baseline_attacker.SegmentAttack(selected_ids)
 52 |     else:
 53 |         print('attack_method error')
 54 |         exit()
 55 |     return fake_profiles
 56 | 
 57 | 
 58 | def parse_arg():
 59 |     parser = argparse.ArgumentParser()
 60 | 
 61 |     parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/grocery')
 62 | 
 63 |     parser.add_argument('--attack_methods', type=str, default='average',
 64 |                         help='average,segment,random,bandwagon')
 65 | 
 66 |     # filmTrust:random = [5, 395, 181, 565, 254]    tail = [601, 623, 619, 64, 558]
 67 |     # ml100k:random = [62, 1077, 785, 1419, 1257]   tail = [1319, 1612, 1509, 1545, 1373]
 68 |     # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
 69 |     # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
 70 |     # 88,22,122,339,1431,1141,1656,477,1089,866
 71 |     parser.add_argument('--targets', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866',
 72 |                         help='attack_targets')
 73 | 
 74 |     parser.add_argument('--attack_num', type=int, default=50, help='fixed 50')
 75 | 
 76 |     parser.add_argument('--filler_num', type=int, default=4, help='90 for ml100k,36 for filmTrust')
 77 |     parser.add_argument('--bandwagon_selected', type=str, default='180,99,49',
 78 |                         help='180,99,49 for ml100k,103,98,115 for filmTrust')
 79 |     #
 80 |     parser.add_argument('--sample_filler', type=int, default=1, help='sample filler')
 81 |     #
 82 | 
 83 |     args = parser.parse_args()
 84 |     #
 85 |     args.attack_methods = args.attack_methods.split(',')
 86 |     args.targets = list(map(int, args.targets.split(',')))
 87 |     args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(',')))
 88 |     return args
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 |     """parse args"""
 93 |     args = parse_arg()
 94 | 
 95 |     """attack"""
 96 |     dataset_class, attack_info = get_data(args.dataset)
 97 | 
 98 |     for target_id in args.targets:
 99 | 
100 |         attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id]))
101 |         attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
102 |         if args.sample_filler:
103 |             gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
104 |                                               selected_id_list=attack_info[target_id][0],
105 |                                               filler_num=args.filler_num, attack_num=args.attack_num, filler_method=0)
106 |             _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no',
107 |                                                                       final_attack_setting=[args.attack_num,
108 |                                                                                             None, None])
109 | 
110 |             np.save(attackSetting_path, [real_profiles, filler_indicator])
111 |         else:
112 |             real_profiles, filler_indicator = np.load(attackSetting_path + '.npy')
113 | 
114 |         # for attack_method in args.attack_methods:
115 |         #
116 |         #     attack_model = '_'.join([attack_method, str(args.attack_num), str(args.filler_num)])
117 |         #     # fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
118 |         #     #                                 args.bandwagon_selected, filler_indicator)
119 |         #     fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
120 |         #                                     args.bandwagon_selected, None)
121 |         #
122 |         #     ori_path = '../data/data/' + args.dataset + '_train.dat'
123 |         #     dst_path = "../data/data_attacked/" + '_'.join([args.dataset, str(target_id), attack_model]) + "_sample.dat"
124 |         #     attacked_file_writer(ori_path, dst_path, fake_profiles, dataset_class.n_users)
125 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/file.py:
--------------------------------------------------------------------------------
  1 | import os.path
  2 | from os.path import abspath
  3 | from os import makedirs, remove
  4 | from re import compile, findall, split
  5 | # from config import LineConfig
  6 | from collections import defaultdict
  7 | class Config(object):
  8 |     def __init__(self, fileName):
  9 |         self.config = {}
 10 |         self.readConfiguration(fileName)
 11 | 
 12 |     def __getitem__(self, item):
 13 |         if not self.contains(item):
 14 |             print('parameter ' + item + ' is invalid!')
 15 |             exit(-1)
 16 |         return self.config[item]
 17 | 
 18 |     def getOptions(self, item):
 19 |         if not self.contains(item):
 20 |             print('parameter ' + item + ' is invalid!')
 21 |             exit(-1)
 22 |         return self.config[item]
 23 | 
 24 |     def contains(self, key):
 25 |         return self.config.has_key(key)
 26 | 
 27 |     def readConfiguration(self, fileName):
 28 |         if not os.path.exists(abspath(fileName)):
 29 |             print('config file is not found!')
 30 |             raise IOError
 31 |         with open(fileName) as f:
 32 |             for ind, line in enumerate(f):
 33 |                 if line.strip() != '':
 34 |                     try:
 35 |                         key, value = line.strip().split('=')
 36 |                         self.config[key] = value
 37 |                     except ValueError:
 38 |                         print('config file is not in the correct format! Error Line:%d' % (ind))
 39 | 
 40 | 
 41 | class LineConfig(object):
 42 |     def __init__(self, content):
 43 |         self.line = content.strip().split(' ')
 44 |         self.options = {}
 45 |         self.mainOption = False
 46 |         if self.line[0] == 'on':
 47 |             self.mainOption = True
 48 |         elif self.line[0] == 'off':
 49 |             self.mainOption = False
 50 |         for i, item in enumerate(self.line):
 51 |             if (item.startswith('-') or item.startswith('--')) and not item[1:].isdigit():
 52 |                 ind = i + 1
 53 |                 for j, sub in enumerate(self.line[ind:]):
 54 |                     if (sub.startswith('-') or sub.startswith('--')) and not sub[1:].isdigit():
 55 |                         ind = j
 56 |                         break
 57 |                     if j == len(self.line[ind:]) - 1:
 58 |                         ind = j + 1
 59 |                         break
 60 |                 try:
 61 |                     self.options[item] = ' '.join(self.line[i + 1:i + 1 + ind])
 62 |                 except IndexError:
 63 |                     self.options[item] = 1
 64 | 
 65 |     def __getitem__(self, item):
 66 |         if not self.contains(item):
 67 |             print('parameter ' + item + ' is invalid!')
 68 |             exit(-1)
 69 |         return self.options[item]
 70 | 
 71 |     def getOption(self, key):
 72 |         if not self.contains(key):
 73 |             print('parameter ' + key + ' is invalid!')
 74 |             exit(-1)
 75 |         return self.options[key]
 76 | 
 77 |     def isMainOn(self):
 78 |         return self.mainOption
 79 | 
 80 |     def contains(self, key):
 81 |         return key in self.options
 82 |         # return self.options.has_key(key)
 83 | class FileIO(object):
 84 |     def __init__(self):
 85 |         pass
 86 | 
 87 |     # @staticmethod
 88 |     # def writeFile(filePath,content,op = 'w'):
 89 |     #     reg = compile('(.+[/|\\\]).+')
 90 |     #     dirs = findall(reg,filePath)
 91 |     #     if not os.path.exists(filePath):
 92 |     #         os.makedirs(dirs[0])
 93 |     #     with open(filePath,op) as f:
 94 |     #         f.write(str(content))
 95 | 
 96 |     @staticmethod
 97 |     def writeFile(dir, file, content, op='w'):
 98 |         if not os.path.exists(dir):
 99 |             os.makedirs(dir)
100 |         if type(content) == 'str':
101 |             with open(dir + file, op) as f:
102 |                 f.write(content)
103 |         else:
104 |             with open(dir + file, op) as f:
105 |                 f.writelines(content)
106 | 
107 |     @staticmethod
108 |     def deleteFile(filePath):
109 |         if os.path.exists(filePath):
110 |             remove(filePath)
111 | 
112 |     @staticmethod
113 |     def loadDataSet(conf, file, bTest=False):
114 |         trainingData = defaultdict(dict)
115 |         testData = defaultdict(dict)
116 |         ratingConfig = LineConfig(conf['ratings.setup'])
117 |         # if not bTest:
118 |         #     print('loading training data...')
119 |         # else:
120 |         #     print('loading test data...')
121 |         with open(file) as f:
122 |             ratings = f.readlines()
123 |         # ignore the headline
124 |         if ratingConfig.contains('-header'):
125 |             ratings = ratings[1:]
126 |         # order of the columns
127 |         order = ratingConfig['-columns'].strip().split()
128 | 
129 |         for lineNo, line in enumerate(ratings):
130 |             items = split(' |,|\t', line.strip())
131 |             if not bTest and len(order) < 3:
132 |                 print('The rating file is not in a correct format. Error: Line num %d' % lineNo)
133 |                 exit(-1)
134 |             try:
135 |                 userId = items[int(order[0])]
136 |                 itemId = items[int(order[1])]
137 |                 if bTest and len(order) < 3:
138 |                     rating = 1  # default value
139 |                 else:
140 |                     rating = items[int(order[2])]
141 | 
142 |             except ValueError:
143 |                 print('Error! Have you added the option -header to the rating.setup?')
144 |                 exit(-1)
145 |             if not bTest:
146 |                 trainingData[userId][itemId] = float(rating)
147 |             else:
148 |                 testData[userId][itemId] = float(rating)
149 |         if not bTest:
150 |             return trainingData
151 |         else:
152 |             return testData
153 | 
154 |     @staticmethod
155 |     def loadRelationship(conf, filePath):
156 |         socialConfig = LineConfig(conf['social.setup'])
157 |         relation = []
158 |         print('loading social data...')
159 |         with open(filePath) as f:
160 |             relations = f.readlines()
161 |             # ignore the headline
162 |         if socialConfig.contains('-header'):
163 |             relations = relations[1:]
164 |         # order of the columns
165 |         order = socialConfig['-columns'].strip().split()
166 |         if len(order) <= 2:
167 |             print('The social file is not in a correct format.')
168 |         for lineNo, line in enumerate(relations):
169 |             items = split(' |,|\t', line.strip())
170 |             if len(order) < 2:
171 |                 print('The social file is not in a correct format. Error: Line num %d' % lineNo)
172 |                 exit(-1)
173 |             userId1 = items[int(order[0])]
174 |             userId2 = items[int(order[1])]
175 |             if len(order) < 3:
176 |                 weight = 1
177 |             else:
178 |                 weight = float(items[int(order[2])])
179 |             relation.append([userId1, userId2, weight])
180 |         return relation
181 | 
182 |     @staticmethod
183 |     def loadLabels(filePath):
184 |         labels = {}
185 |         with open(filePath) as f:
186 |             for line in f:
187 |                 items = split(' |,|\t', line.strip())
188 |                 labels[items[0]] = items[1]
189 |         return labels
190 | 


--------------------------------------------------------------------------------
/AUSH/test_main/dcgan.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | import sys
  4 | 
  5 | sys.path.append("../")
  6 | import os, argparse, time, math
  7 | import numpy as np
  8 | import tensorflow as tf
  9 | from glob import glob
 10 | from utils.attack.data_to_file import *
 11 | from test_main.utils_dcgan import *
 12 | from numpy import linalg as la
 13 | from model.trainer_rec import *
 14 | from test_main.main_eval_attack import eval_attack
 15 | import utils as ut
 16 | 
 17 | flags = tf.app.flags
 18 | flags.DEFINE_integer("epoch", 64, "Epoch to train [25]")
 19 | flags.DEFINE_float("learning_rate", 0.0002, "Learning rate of for adam [0.0002]")
 20 | flags.DEFINE_float("beta1", 0.5, "Momentum term of adam [0.5]")
 21 | flags.DEFINE_integer("batch_size", 64, "The size of batch images [64]")
 22 | flags.DEFINE_integer("max_to_keep", 1, "maximum number of checkpoints to keep")
 23 | flags.DEFINE_integer("z_dim", 100, "dimensions of z")
 24 | #
 25 | flags.DEFINE_integer("T", 10, "adv opt epoch")
 26 | flags.DEFINE_integer("K", 5, "top k svd")  # 5
 27 | flags.DEFINE_float("alpha", 50.0, "opt param")
 28 | flags.DEFINE_float("eta", 100.0, "opt param")
 29 | flags.DEFINE_integer("attack_num", 50, "attack_num")
 30 | flags.DEFINE_integer("filler_num", 90, "filler_num")
 31 | FLAGS = flags.FLAGS
 32 | 
 33 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
 34 | data_set_name = 'ml100k'
 35 | target_ids = [62, 1077, 785, 1419, 1257, 1319, 1612, 1509, 1545, 1373]
 36 | 
 37 | run_config = tf.ConfigProto()
 38 | run_config.gpu_options.allow_growth = True
 39 | path_train = '../data/data/' + data_set_name + '_train.dat'
 40 | path_test = '../data/data/' + data_set_name + '_test.dat'
 41 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
 42 |                     "../data/data/" + data_set_name + "_target_users"]
 43 | # 读取seletced items和target users
 44 | attack_info = load_attack_info(*attack_info_path)
 45 | dataset_class = ut.load_data.load_data.load_data(path_train=path_train, path_test=path_test,
 46 |                                                  header=['user_id', 'item_id', 'rating'],
 47 |                                                  sep='\t', print_log=False)
 48 | 
 49 | 
 50 | def train_Rec_model(injected_path, injected_profiles, target_id, model_path, train_epoch,
 51 |                     model_name='IAutoRec', warm_start=False, restore_path=None):
 52 |     tf.reset_default_graph()
 53 | 
 54 |     attacked_file_writer(path_train, injected_path, injected_profiles, dataset_class.n_users)
 55 | 
 56 |     dataset_class_injected = ut.load_data.load_data.load_data(path_train=injected_path,
 57 |                                                               path_test=path_test,
 58 |                                                               header=['user_id', 'item_id', 'rating'],
 59 |                                                               sep='\t', print_log=False)
 60 | 
 61 |     # tf.reset_default_graph()
 62 |     tf_config = tf.ConfigProto()
 63 |     tf_config.gpu_options.allow_growth = True
 64 |     with tf.Session() as sess:
 65 |         rec_model = get_model_network(sess, model_name, dataset_class_injected, train_epoch)
 66 |         if warm_start:
 67 |             # print('warm start')
 68 |             rec_model.restore(restore_path)
 69 |         rec_model.execute()
 70 |         rec_model.save(model_path)
 71 |         predictions, hit_ratios = pred_for_target(rec_model, target_id)
 72 |     return predictions, hit_ratios
 73 | 
 74 | 
 75 | def opt_adv_intent(fake_users, filler_indicators, target_id):
 76 |     target_users = attack_info[target_id][1]
 77 |     model_path = "./IAutoRec_dcgan_%d.ckpt" % target_id
 78 |     injected_path = "./IAutoRec_dcgan_%d.dat" % target_id
 79 | 
 80 |     # ----------------------
 81 |     for t in range(FLAGS.T):
 82 | 
 83 |         injected_profiles = fake_users * filler_indicators
 84 |         predictions, _ = train_Rec_model(injected_path, injected_profiles, target_id, model_path, 10)
 85 |         f_adv_0 = np.sum(predictions[target_users])
 86 |         f_adv_k = f_adv_0
 87 |         print("opt_adv_intent\tepoch-%d adv goal\t%f" % (t, f_adv_k))
 88 | 
 89 |         delta_f_Adv = []
 90 |         B, Sigma, V = la.svd(fake_users)
 91 |         for k in range(FLAGS.K):
 92 | 
 93 |             Z_k = np.matmul(np.reshape(B[k], [FLAGS.attack_num, 1]), np.reshape(V[k], [1, dataset_class.n_items]))
 94 | 
 95 |             fake_users_k = fake_users + FLAGS.alpha * Z_k
 96 | 
 97 |             injected_profiles = fake_users_k * filler_indicators
 98 |             predictions, _ = train_Rec_model(injected_path, injected_profiles, target_id, model_path,
 99 |                                              5, warm_start=True, restore_path=model_path)
100 |             f_adv_k_new = np.sum(predictions[target_users])
101 | 
102 |             delta_f_Adv.append((f_adv_k_new - f_adv_k) * Z_k)
103 | 
104 |         delta_f_A = FLAGS.alpha * sum(delta_f_Adv)
105 |         fake_users += FLAGS.eta * delta_f_A
106 |         fake_users[fake_users <= 0] = 0.5
107 |         fake_users[fake_users > 5] = 5
108 |     return fake_users * filler_indicators
109 | 
110 | 
111 | 
112 | tf.reset_default_graph()
113 | with tf.Session(config=run_config) as sess:
114 |     dcgan = DCGAN(sess, dataset_class)
115 |     # print("build_model_ok")
116 |     dcgan.train(FLAGS)
117 |     # save model
118 |     saver = tf.train.Saver()
119 |     saver.save(sess, './dcgan.ckpt')
120 | 
121 |     fake_users = None
122 |     while True:
123 |         batch_z = gen_random(size=[FLAGS.batch_size, dcgan.z_dim]).astype(np.float32)
124 |         fake_users_ = sess.run(dcgan.G, feed_dict={dcgan.z: batch_z})
125 |         # reshape&[-1,1]->[0,5]
126 |         fake_users_ = fake_users_.reshape([fake_users_.shape[0], -1])
127 |         fake_users_ = (fake_users_ * 2.5) + 2.5
128 |         fake_users = fake_users_ if fake_users is None else np.concatenate([fake_users_, fake_users_], 0)
129 |         if fake_users.shape[0] >= FLAGS.attack_num: break
130 |     # attack_num
131 |     fake_users = fake_users[:FLAGS.attack_num]
132 |     # filler_num
133 |     filler_indicators = []
134 |     for i in range(FLAGS.attack_num):
135 |         fillers_ = np.random.choice(list(range(dataset_class.n_items)), FLAGS.filler_num, replace=False)
136 |         filler_indicator_ = [1 if iid in fillers_ else 0 for iid in range(dataset_class.n_items)]
137 |         filler_indicators.append(filler_indicator_)
138 |     filler_indicators = np.array(filler_indicators)
139 | np.save('./fake_user_dcgan', [fake_users, filler_indicators])
140 | # fake_users, filler_indicators = np.load('./fake_user_dcgan.npy')
141 | 
142 | results = {}
143 | for target_id in target_ids:
144 | 
145 |     injected_profiles = opt_adv_intent(fake_users, filler_indicators, target_id)
146 | 
147 | 
148 |     model_path = "./IAutoRec_dcgan_%d.ckpt" % target_id
149 |     injected_path = "../data/data/ml100k_%d_dcgan_50_90.dat" % target_id
150 |     target_users = attack_info[target_id][1]
151 |     predictions, hit_ratios = train_Rec_model(injected_path, injected_profiles, target_id, model_path, 500)
152 |     dst_path = "../result/pred_result/" + '_'.join(['IAutoRec', 'ml100k', str(target_id), 'dcgan'])
153 |     target_prediction_writer(predictions, hit_ratios, dst_path)
154 | 
155 |     result = eval_attack('ml100k', 'IAutoRec', 'dcgan', target_id)
156 |     results[target_id] = result
157 |     print(target_id, result, '\n\n')
158 |     break
159 | 
160 | for target_id in target_ids:
161 |     print(target_id, results[target_id])
162 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/method/FAP.py:
--------------------------------------------------------------------------------
  1 | from models.detector.SDLib.baseclass.SDetection import SDetection
  2 | from models.detector.SDLib.tool import config
  3 | from sklearn.metrics import classification_report
  4 | import numpy as np
  5 | import random
  6 | 
  7 | class FAP(SDetection):
  8 | 
  9 |     def __init__(self, conf, trainingSet=None, testSet=None, labels=None, fold='[1]'):
 10 |         super(FAP, self).__init__(conf, trainingSet, testSet, labels, fold)
 11 | 
 12 |     def readConfiguration(self):
 13 |         super(FAP, self).readConfiguration()
 14 |         # # s means the number of seedUser who be regarded as spammer in training
 15 |         self.s =int( self.config['seedUser'])
 16 |         # preserve the real spammer ID
 17 |         self.spammer = []
 18 |         for i in self.dao.user:
 19 |             if self.labels[i] == '1':
 20 |                 self.spammer.append(self.dao.user[i])
 21 |         sThreshold = int(0.5 * len(self.spammer))
 22 |         if self.s > sThreshold :
 23 |             self.s = sThreshold
 24 |             print ('*** seedUser is more than a half of spammer, so it is set to', sThreshold, '***')
 25 | 
 26 |         # # predict top-k user as spammer
 27 |         self.k = int(self.config['topKSpam'])
 28 |         # 0.5 is the ratio of spammer to dataset, it can be changed according to different datasets
 29 |         kThreshold = int(0.5 * (len(self.dao.user) - self.s))
 30 |         if self.k > kThreshold:
 31 |             self.k = kThreshold
 32 |             print ('*** the number of top-K users is more than threshold value, so it is set to', kThreshold, '***')
 33 |     # product transition probability matrix self.TPUI and self.TPIU
 34 | 
 35 |     def __computeTProbability(self):
 36 |         # m--user count; n--item count
 37 |         m, n, tmp = self.dao.trainingSize()
 38 |         self.TPUI = np.zeros((m, n))
 39 |         self.TPIU = np.zeros((n, m))
 40 | 
 41 |         self.userUserIdDic = {}
 42 |         self.itemItemIdDic = {}
 43 |         tmpUser = list(self.dao.user.values())
 44 |         tmpUserId = list(self.dao.user.keys())
 45 |         tmpItem = list(self.dao.item.values())
 46 |         tmpItemId = list(self.dao.item.keys())
 47 |         # tmpUser = self.dao.user.values()
 48 |         # tmpUserId = self.dao.user.keys()
 49 |         # tmpItem = self.dao.item.values()
 50 |         # tmpItemId = self.dao.item.keys()
 51 |         for users in range(0, m):
 52 |             self.userUserIdDic[tmpUser[users]] = tmpUserId[users]
 53 |         for items in range(0, n):
 54 |             self.itemItemIdDic[tmpItem[items]] = tmpItemId[items]
 55 |         for i in range(0, m):
 56 |             for j in range(0, n):
 57 |                 user = self.userUserIdDic[i]
 58 |                 item = self.itemItemIdDic[j]
 59 |                 # if has edge in graph,set a value ;otherwise set 0
 60 |                 if (user not in self.bipartiteGraphUI) or (item not in self.bipartiteGraphUI[user]):
 61 |                     continue
 62 |                 else:
 63 |                     w = float(self.bipartiteGraphUI[user][item])
 64 |                     # to avoid positive feedback and reliability problem,we should Polish the w
 65 |                     otherItemW = 0
 66 |                     otherUserW = 0
 67 |                     for otherItem in self.bipartiteGraphUI[user]:
 68 |                         otherItemW += float(self.bipartiteGraphUI[user][otherItem])
 69 |                     for otherUser in self.dao.trainingSet_i[item]:
 70 |                         otherUserW += float(self.bipartiteGraphUI[otherUser][item])
 71 |                     # wPrime = w*1.0/(otherUserW * otherItemW)
 72 |                     wPrime = w
 73 |                     self.TPUI[i][j] = wPrime / otherItemW
 74 |                     self.TPIU[j][i] = wPrime / otherUserW
 75 |             # if i % 100 == 0:
 76 |             #     print ('progress: %d/%d' %(i,m))
 77 | 
 78 |     def initModel(self):
 79 |         # construction of the bipartite graph
 80 |         # print ("constructing bipartite graph...")
 81 |         self.bipartiteGraphUI = {}
 82 |         for user in self.dao.trainingSet_u:
 83 |             tmpUserItemDic = {}  # user-item-point
 84 |             for item in self.dao.trainingSet_u[user]:
 85 |                 # tmpItemUserDic = {}#item-user-point
 86 |                 recordValue = float(self.dao.trainingSet_u[user][item])
 87 |                 w = 1 + abs((recordValue - self.dao.userMeans[user]) / self.dao.userMeans[user]) + abs(
 88 |                     (recordValue - self.dao.itemMeans[item]) / self.dao.itemMeans[item]) + abs(
 89 |                     (recordValue - self.dao.globalMean) / self.dao.globalMean)
 90 |                 # tmpItemUserDic[user] = w
 91 |                 tmpUserItemDic[item] = w
 92 |             # self.bipartiteGraphIU[item] = tmpItemUserDic
 93 |             self.bipartiteGraphUI[user] = tmpUserItemDic
 94 |         # we do the polish in computing the transition probability
 95 |         # print ("computing transition probability...")
 96 |         self.__computeTProbability()
 97 | 
 98 |     def isConvergence(self, PUser, PUserOld):
 99 |         if len(PUserOld) == 0:
100 |             return True
101 |         for i in range(0, len(PUser)):
102 |             if (PUser[i] - PUserOld[i]) > 0.01:
103 |                 return True
104 |         return False
105 | 
106 |     def buildModel(self):
107 |         # -------init--------
108 |         m, n, tmp = self.dao.trainingSize()
109 |         PUser = np.zeros(m)
110 |         PItem = np.zeros(n)
111 |         self.testLabels = [0 for i in range(m)]
112 |         self.predLabels = [0 for i in range(m)]
113 | 
114 |         # preserve seedUser Index
115 |         self.seedUser = []
116 |         randDict = {}
117 |         for i in range(0, self.s):
118 |             randNum = random.randint(0, len(self.spammer) - 1)
119 |             while randNum in randDict:
120 |                 randNum = random.randint(0, len(self.spammer) - 1)
121 |             randDict[randNum] = 0
122 |             self.seedUser.append(int(self.spammer[randNum]))
123 |             # print len(randDict), randDict
124 | 
125 |         #initial user and item spam probability
126 |         for j in range(0, m):
127 |             if j in self.seedUser:
128 |                 #print type(j),j
129 |                 PUser[j] = 1
130 |             else:
131 |                 PUser[j] = random.random()
132 |         for tmp in range(0, n):
133 |             PItem[tmp] = random.random()
134 | 
135 |         # -------iterator-------
136 |         PUserOld = []
137 |         iterator = 0
138 |         while self.isConvergence(PUser, PUserOld):
139 |         #while iterator < 100:
140 |             for j in self.seedUser:
141 |                 PUser[j] = 1
142 |             PUserOld = PUser
143 |             PItem = np.dot(self.TPIU, PUser)
144 |             PUser = np.dot(self.TPUI, PItem)
145 |             iterator += 1
146 |             # print (self.foldInfo,'iteration', iterator)
147 | 
148 |         PUserDict = {}
149 |         userId = 0
150 |         for i in PUser:
151 |             PUserDict[userId] = i
152 |             userId += 1
153 |         for j in self.seedUser:
154 |             del PUserDict[j]
155 | 
156 |         self.PSort = sorted(PUserDict.items(), key=lambda d: d[1], reverse=True)
157 | 
158 | 
159 |     def predict(self):
160 |         # predLabels
161 |         # top-k user as spammer
162 |         spamList = []
163 |         sIndex = 0
164 |         while sIndex < self.k:
165 |             spam = self.PSort[sIndex][0]
166 |             spamList.append(spam)
167 |             self.predLabels[spam] = 1
168 |             sIndex += 1
169 | 
170 |         # trueLabels
171 |         for user in self.dao.trainingSet_u:
172 |             userInd = self.dao.user[user]
173 |             # print type(user), user, userInd
174 |             self.testLabels[userInd] = int(self.labels[user])
175 | 
176 |         # delete seedUser labels
177 |         differ = 0
178 |         for user in self.seedUser:
179 |             user = int(user - differ)
180 |             # print type(user)
181 |             del self.predLabels[user]
182 |             del self.testLabels[user]
183 |             differ += 1
184 | 
185 |         return self.predLabels
186 | 


--------------------------------------------------------------------------------
/AUSH/model/nnmf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """Implementation of Neural Network Matrix Factorization.
  3 | Reference: Dziugaite, Gintare Karolina, and Daniel M. Roy. "Neural network matrix factorization." arXiv preprint arXiv:1511.06443 (2015).
  4 | """
  5 | 
  6 | try:
  7 |     import tensorflow.compat.v1 as tf
  8 | 
  9 |     tf.disable_v2_behavior()
 10 | except:
 11 |     import tensorflow as tf
 12 | import time
 13 | import numpy as np
 14 | import math
 15 | 
 16 | __author__ = "Shuai Zhang"
 17 | __copyright__ = "Copyright 2018, The DeepRec Project"
 18 | 
 19 | __license__ = "GPL"
 20 | __version__ = "1.0.0"
 21 | __maintainer__ = "Shuai Zhang"
 22 | __email__ = "cheungdaven@gmail.com"
 23 | __status__ = "Development"
 24 | 
 25 | 
 26 | class NNMF():
 27 |     def __init__(self, sess, dataset_class, num_factor_1=100, num_factor_2=10, hidden_dimension=50,
 28 |                  learning_rate=0.001, reg_rate=0.01, epoch=500, batch_size=256,
 29 |                  show_time=False, T=5, display_step=1000):
 30 |         self.learning_rate = learning_rate
 31 |         self.epochs = epoch
 32 |         self.batch_size = batch_size
 33 |         self.reg_rate = reg_rate
 34 |         self.sess = sess
 35 |         self.dataset_class = dataset_class
 36 |         self.num_user = dataset_class.n_users
 37 |         self.num_item = dataset_class.n_items
 38 |         self.dataset_class.test_matrix_dok = self.dataset_class.test_matrix.todok()
 39 | 
 40 |         self.num_factor_1 = num_factor_1
 41 |         self.num_factor_2 = num_factor_2
 42 |         self.hidden_dimension = hidden_dimension
 43 |         self.show_time = show_time
 44 |         self.T = T
 45 |         self.display_step = display_step
 46 |         print("NNMF.")
 47 | 
 48 |         self.dataset_class_train_matrix_coo = self.dataset_class.train_matrix.tocoo()
 49 |         self.user = self.dataset_class_train_matrix_coo.row.reshape(-1)
 50 |         self.item = self.dataset_class_train_matrix_coo.col.reshape(-1)
 51 |         self.rating = self.dataset_class_train_matrix_coo.data
 52 | 
 53 |         self._build_network()
 54 |         init = tf.global_variables_initializer()
 55 |         self.sess.run(init)
 56 | 
 57 |     def _build_network(self):
 58 |         print("num_factor_1=%d, num_factor_2=%d, hidden_dimension=%d" % (
 59 |             self.num_factor_1, self.num_factor_2, self.hidden_dimension))
 60 | 
 61 |         # model dependent arguments
 62 |         self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id')
 63 |         self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id')
 64 |         self.y = tf.placeholder("float", [None], 'rating')
 65 |         # latent feature vectors
 66 |         P = tf.Variable(tf.random_normal([self.num_user, self.num_factor_1], stddev=0.01))
 67 |         Q = tf.Variable(tf.random_normal([self.num_item, self.num_factor_1], stddev=0.01))
 68 |         # latent feature matrix(K=1?)
 69 |         U = tf.Variable(tf.random_normal([self.num_user, self.num_factor_2], stddev=0.01))
 70 |         V = tf.Variable(tf.random_normal([self.num_item, self.num_factor_2], stddev=0.01))
 71 | 
 72 |         input = tf.concat(values=[tf.nn.embedding_lookup(P, self.user_id),
 73 |                                   tf.nn.embedding_lookup(Q, self.item_id),
 74 |                                   tf.multiply(tf.nn.embedding_lookup(U, self.user_id),
 75 |                                               tf.nn.embedding_lookup(V, self.item_id))
 76 |                                   ], axis=1)
 77 |         #
 78 |         # tf1->tf2
 79 |         # regularizer = tf.contrib.layers.l2_regularizer(scale=self.reg_rate)
 80 |         regularizer = tf.keras.regularizers.l2(self.reg_rate)
 81 |         layer_1 = tf.layers.dense(inputs=input, units=2 * self.num_factor_1 + self.num_factor_2,
 82 |                                   bias_initializer=tf.random_normal_initializer,
 83 |                                   kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid,
 84 |                                   kernel_regularizer=regularizer)
 85 |         layer_2 = tf.layers.dense(inputs=layer_1, units=self.hidden_dimension, activation=tf.sigmoid,
 86 |                                   bias_initializer=tf.random_normal_initializer,
 87 |                                   kernel_initializer=tf.random_normal_initializer,
 88 |                                   kernel_regularizer=regularizer)
 89 |         layer_3 = tf.layers.dense(inputs=layer_2, units=self.hidden_dimension, activation=tf.sigmoid,
 90 |                                   bias_initializer=tf.random_normal_initializer,
 91 |                                   kernel_initializer=tf.random_normal_initializer,
 92 |                                   kernel_regularizer=regularizer)
 93 |         layer_4 = tf.layers.dense(inputs=layer_3, units=self.hidden_dimension, activation=tf.sigmoid,
 94 |                                   bias_initializer=tf.random_normal_initializer,
 95 |                                   kernel_initializer=tf.random_normal_initializer,
 96 |                                   kernel_regularizer=regularizer)
 97 |         output = tf.layers.dense(inputs=layer_4, units=1, activation=None,
 98 |                                  bias_initializer=tf.random_normal_initializer,
 99 |                                  kernel_initializer=tf.random_normal_initializer,
100 |                                  kernel_regularizer=regularizer)
101 |         self.pred_rating = tf.reshape(output, [-1])
102 |         self.loss = tf.reduce_sum(tf.square(self.y - self.pred_rating)) \
103 |                     + tf.losses.get_regularization_loss() + self.reg_rate * (
104 |                             tf.norm(U) + tf.norm(V) + tf.norm(P) + tf.norm(Q))
105 |         self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
106 | 
107 |     def train(self):
108 |         self.num_training = len(self.rating)
109 |         total_batch = int(self.num_training / self.batch_size)
110 |         idxs = np.random.permutation(self.num_training)  # shuffled ordering
111 |         user_random = list(self.user[idxs])
112 |         item_random = list(self.item[idxs])
113 |         rating_random = list(self.rating[idxs])
114 |         # train
115 |         for i in range(total_batch):
116 |             batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size]
117 |             batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size]
118 |             batch_rating = rating_random[i * self.batch_size:(i + 1) * self.batch_size]
119 | 
120 |             _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={self.user_id: batch_user,
121 |                                                                             self.item_id: batch_item,
122 |                                                                             self.y: batch_rating
123 |                                                                             })
124 |         return loss
125 | 
126 |     def test(self, test_data):
127 |         error = 0
128 |         error_mae = 0
129 |         test_set = list(test_data.keys())
130 |         for (u, i) in test_set:
131 |             pred_rating_test = self.predict([u], [i])[0]
132 |             error += (float(test_data.get((u, i))) - pred_rating_test) ** 2
133 |             error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test))
134 |         rmse = np.sqrt(error / len(test_set))
135 |         mae = error_mae / len(test_set)
136 |         return rmse, mae
137 | 
138 |     def execute(self):
139 |         loss_prev = float("inf")
140 |         for epoch in range(self.epochs):
141 |             loss_cur = self.train()
142 |             if epoch % self.T == 0:
143 |                 print("epoch:\t", epoch, "\tloss:\t", loss_cur)
144 |             if abs(loss_cur - loss_prev) < math.exp(-5):
145 |                 break
146 |             loss_prev = loss_cur
147 |         rmse, mae = self.test(self.dataset_class.test_matrix_dok)
148 |         print("training done\tRMSE : ", rmse, "\tMAE : ", mae)
149 | 
150 |     def save(self, path):
151 |         saver = tf.train.Saver()
152 |         saver.save(self.sess, path)
153 | 
154 |     def restore(self, path):
155 |         init = tf.global_variables_initializer()
156 |         self.sess.run(init)
157 |         saver = tf.train.Saver()
158 |         saver.restore(self.sess, path)
159 | 
160 |     def predict(self, user_id, item_id):
161 |         if type(item_id) != list:
162 |             item_id = [item_id]
163 |         if type(user_id) != list:
164 |             user_id = [user_id] * len(item_id)
165 |         return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0]
166 | 


--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/data/rating.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | # from structure import sparseMatrix,new_sparseMatrix
  3 | from models.detector.SDLib.tool.config import Config, LineConfig
  4 | from models.detector.SDLib.tool.qmath import normalize
  5 | from models.detector.SDLib.tool.dataSplit import DataSplit
  6 | import os.path
  7 | from re import split
  8 | from collections import defaultdict
  9 | 
 10 | 
 11 | class RatingDAO(object):
 12 |     'data access control'
 13 | 
 14 |     def __init__(self, config, trainingData, testData):
 15 |         self.config = config
 16 |         self.ratingConfig = LineConfig(config['ratings.setup'])
 17 |         self.user = {}  # used to store the order of users in the training set
 18 |         self.item = {}  # used to store the order of items in the training set
 19 |         self.id2user = {}
 20 |         self.id2item = {}
 21 |         self.all_Item = {}
 22 |         self.all_User = {}
 23 |         self.userMeans = {}  # used to store the mean values of users's ratings
 24 |         self.itemMeans = {}  # used to store the mean values of items's ratings
 25 | 
 26 |         self.globalMean = 0
 27 |         self.timestamp = {}
 28 |         # self.trainingMatrix = None
 29 |         # self.validationMatrix = None
 30 |         self.testSet_u = testData.copy()  # used to store the test set by hierarchy user:[item,rating]
 31 |         self.testSet_i = defaultdict(dict)  # used to store the test set by hierarchy item:[user,rating]
 32 |         self.trainingSet_u = trainingData.copy()
 33 |         self.trainingSet_i = defaultdict(dict)
 34 |         # self.rScale = []
 35 | 
 36 |         self.trainingData = trainingData
 37 |         self.testData = testData
 38 |         self.__generateSet()
 39 |         self.__computeItemMean()
 40 |         self.__computeUserMean()
 41 |         self.__globalAverage()
 42 | 
 43 |     def __generateSet(self):
 44 |         scale = set()
 45 |         # find the maximum rating and minimum value
 46 |         # for i, entry in enumerate(self.trainingData):
 47 |         #     userName, itemName, rating = entry
 48 |         #     scale.add(float(rating))
 49 |         # self.rScale = list(scale)
 50 |         # self.rScale.sort()
 51 | 
 52 |         for i, user in enumerate(self.trainingData):
 53 |             for item in self.trainingData[user]:
 54 | 
 55 |                 # makes the rating within the range [0, 1].
 56 |                 # rating = normalize(float(rating), self.rScale[-1], self.rScale[0])
 57 |                 # self.trainingSet_u[userName][itemName] = float(rating)
 58 |                 self.trainingSet_i[item][user] = self.trainingData[user][item]
 59 |                 # order the user
 60 |                 # if not self.user.has_key(user):
 61 |                 if user not in self.user:
 62 |                     self.user[user] = len(self.user)
 63 |                     self.id2user[self.user[user]] = user
 64 |                 # order the item
 65 |                 # if not self.item.has_key(item):
 66 |                 if item not in self.item:
 67 |                     self.item[item] = len(self.item)
 68 |                     self.id2item[self.item[item]] = item
 69 |                 self.trainingSet_i[item][user] = self.trainingData[user][item]
 70 |                 # userList.append
 71 |         #     triple.append([self.user[userName], self.item[itemName], rating])
 72 |         # self.trainingMatrix = new_sparseMatrix.SparseMatrix(triple)
 73 | 
 74 |         self.all_User.update(self.user)
 75 |         self.all_Item.update(self.item)
 76 | 
 77 |         for i, user in enumerate(self.testData):
 78 |             # order the user
 79 |             # if not self.user.has_key(user):
 80 |             if user not in self.user:
 81 |                 self.all_User[user] = len(self.all_User)
 82 |             for item in self.testData[user]:
 83 |                 # order the item
 84 |                 # if not self.item.has_key(item):
 85 |                 if item not in self.item:
 86 |                     self.all_Item[item] = len(self.all_Item)
 87 |                 # self.testSet_u[userName][itemName] = float(rating)
 88 |                 self.testSet_i[item][user] = self.testData[user][item]
 89 | 
 90 |     def __globalAverage(self):
 91 |         total = sum(self.userMeans.values())
 92 |         if total == 0:
 93 |             self.globalMean = 0
 94 |         else:
 95 |             self.globalMean = total / len(self.userMeans)
 96 | 
 97 |     def __computeUserMean(self):
 98 |         # for u in self.user:
 99 |         #     n = self.row(u) > 0
100 |         #     mean = 0
101 |         #
102 |         #     if not self.containsUser(u):  # no data about current user in training set
103 |         #         pass
104 |         #     else:
105 |         #         sum = float(self.row(u)[0].sum())
106 |         #         try:
107 |         #             mean =  sum/ n[0].sum()
108 |         #         except ZeroDivisionError:
109 |         #             mean = 0
110 |         #     self.userMeans[u] = mean
111 |         for u in self.trainingSet_u:
112 |             self.userMeans[u] = sum(self.trainingSet_u[u].values()) / (len(self.trainingSet_u[u].values()) + 0.0)
113 |         for u in self.testSet_u:
114 |             self.userMeans[u] = sum(self.testSet_u[u].values()) / (len(self.testSet_u[u].values()) + 0.0)
115 | 
116 |     def __computeItemMean(self):
117 |         # for c in self.item:
118 |         #     n = self.col(c) > 0
119 |         #     mean = 0
120 |         #     if not self.containsItem(c):  # no data about current user in training set
121 |         #         pass
122 |         #     else:
123 |         #         sum = float(self.col(c)[0].sum())
124 |         #         try:
125 |         #             mean = sum / n[0].sum()
126 |         #         except ZeroDivisionError:
127 |         #             mean = 0
128 |         #     self.itemMeans[c] = mean
129 |         for item in self.trainingSet_i:
130 |             self.itemMeans[item] = sum(self.trainingSet_i[item].values()) / (
131 |                         len(self.trainingSet_i[item].values()) + 0.0)
132 |         for item in self.testSet_i:
133 |             self.itemMeans[item] = sum(self.testSet_i[item].values()) / (len(self.testSet_i[item].values()) + 0.0)
134 | 
135 |     def getUserId(self, u):
136 |         if self.user.has_key(u):
137 |             return self.user[u]
138 |         else:
139 |             return -1
140 | 
141 |     def getItemId(self, i):
142 |         if self.item.has_key(i):
143 |             return self.item[i]
144 |         else:
145 |             return -1
146 | 
147 |     def trainingSize(self):
148 |         recordCount = 0
149 |         for user in self.trainingData:
150 |             recordCount += len(self.trainingData[user])
151 |         return (len(self.trainingSet_u), len(self.trainingSet_i), recordCount)
152 | 
153 |     def testSize(self):
154 |         recordCount = 0
155 |         for user in self.testData:
156 |             recordCount += len(self.testData[user])
157 |         return (len(self.testSet_u), len(self.testSet_i), recordCount)
158 | 
159 |     def contains(self, u, i):
160 |         'whether user u rated item i'
161 |         if self.trainingSet_u.has_key(u) and self.trainingSet_u[u].has_key(i):
162 |             return True
163 |         return False
164 | 
165 |     def containsUser(self, u):
166 |         'whether user is in training set'
167 |         return self.trainingSet_u.has_key(u)
168 | 
169 |     def containsItem(self, i):
170 |         'whether item is in training set'
171 |         return self.trainingSet_i.has_key(i)
172 | 
173 |     def allUserRated(self, u):
174 |         if u in self.user:
175 |             return self.trainingSet_u[u].keys(), self.trainingSet_u[u].values()
176 |         else:
177 |             return self.testSet_u[u].keys(), self.testSet_u[u].values()
178 |     # def userRated(self,u):
179 |     #     if self.trainingMatrix.matrix_User.has_key(self.getUserId(u)):
180 |     #         itemIndex =  self.trainingMatrix.matrix_User[self.user[u]].keys()
181 |     #         rating = self.trainingMatrix.matrix_User[self.user[u]].values()
182 |     #         return (itemIndex,rating)
183 |     #     return ([],[])
184 |     #
185 |     # def itemRated(self,i):
186 |     #     if self.trainingMatrix.matrix_Item.has_key(self.getItemId(i)):
187 |     #         userIndex = self.trainingMatrix.matrix_Item[self.item[i]].keys()
188 |     #         rating = self.trainingMatrix.matrix_Item[self.item[i]].values()
189 |     #         return (userIndex,rating)
190 |     #     return ([],[])
191 | 
192 |     # def row(self,u):
193 |     #     return self.trainingMatrix.row(self.getUserId(u))
194 |     #
195 |     # def col(self,c):
196 |     #     return self.trainingMatrix.col(self.getItemId(c))
197 |     #
198 |     # def sRow(self,u):
199 |     #     return self.trainingMatrix.sRow(self.getUserId(u))
200 |     #
201 |     # def sCol(self,c):
202 |     #     return self.trainingMatrix.sCol(self.getItemId(c))
203 |     #
204 |     # def rating(self,u,c):
205 |     #     return self.trainingMatrix.elem(self.getUserId(u),self.getItemId(c))
206 |     #
207 |     # def ratingScale(self):
208 |     #     return (self.rScale[0],self.rScale[1])
209 | 
210 |     # def elemCount(self):
211 |     #     return self.trainingMatrix.elemCount()
212 | 


--------------------------------------------------------------------------------
/AUSH/test_main/main_eval_similarity_foryangqian.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/25 19:38
  3 | # @Author     : chensi
  4 | # @File       : main_eval_similarity.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | import numpy as np
  9 | from numpy.linalg import *
 10 | import scipy.stats
 11 | import sys, os, argparse
 12 | import pandas as pd
 13 | 
 14 | sys.path.append("../")
 15 | from test_main.main_baseline_attack import baseline_attack
 16 | from test_main.main_gan_attack import gan_attack
 17 | from test_main.main_gan_attack_baseline import gan_attack as gan_attack_baseline
 18 | from utils.load_data.load_data import *
 19 | from utils.load_data.load_attack_info import load_attack_info
 20 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
 21 | 
 22 | 
 23 | def eval_eigen_value(profiles):
 24 |     U_T_U = np.dot(profiles.transpose(), profiles)
 25 |     eig_val, _ = eig(U_T_U)
 26 |     top_10 = [i.real for i in eig_val[:10]]
 27 |     return top_10
 28 | 
 29 | 
 30 | def get_item_distribution(profiles):
 31 |     # [min(max(0, round(i)), 5) for i in a]
 32 |     profiles_T = profiles.transpose()
 33 |     fn_count = lambda item_vec: np.array(
 34 |         [sum([1 if (min(max(0, round(j)), 5) == i) else 0 for j in item_vec]) for i in range(6)])
 35 |     fn_norm = lambda item_vec: item_vec / sum(item_vec)
 36 |     item_distribution = np.array(list(map(fn_count, profiles_T)))
 37 |     item_distribution = np.array(list(map(fn_norm, item_distribution)))
 38 |     return item_distribution
 39 | 
 40 | 
 41 | def eval_TVD_JS(P, Q):
 42 |     # TVD
 43 |     dis_TVD = np.mean(np.sum(np.abs(P - Q) / 2, 1))
 44 |     # JS
 45 |     fn_KL = lambda p, q: scipy.stats.entropy(p, q)
 46 |     M = (P + Q) / 2
 47 |     js_vec = []
 48 |     for iid in range(P.shape[0]):
 49 |         p, q, m = P[iid], Q[iid], M[iid]
 50 |         js_vec.append((fn_KL(p, m) + fn_KL(q, m)) / 2)
 51 |     dis_JS = np.mean(np.array(js_vec))
 52 |     return dis_TVD, dis_JS
 53 | 
 54 | 
 55 | def print_eigen_result(real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods):
 56 |     top_10_res = []
 57 |     top_10_real = eval_eigen_value(real_profiles)
 58 |     top_10_res.append("real\t" + '\t'.join(map(str, top_10_real)))
 59 |     top_10_baseline = []
 60 |     for idx in range(len(baseline_fake_profiles)):
 61 |         top_10_baseline.append(eval_eigen_value(baseline_fake_profiles[idx]))
 62 |         top_10_res.append(baseline_methods[idx] + "\t" + '\t'.join(map(str, top_10_baseline[-1])))
 63 |     top_10_gan = eval_eigen_value(fake_profiles_gan)
 64 |     # top_10_sample_5 = eval_eigen_value(fake_profiles_sample_5)
 65 |     # top_10_real_sample = eval_eigen_value(real_profiles_gan)
 66 |     top_10_res.append("gan\t" + '\t'.join(map(str, top_10_gan)))
 67 |     # top_10_res.append("sample_5\t" + '\t'.join(map(str, top_10_sample_5)))
 68 |     # top_10_res.append("real_sample\t" + '\t'.join(map(str, top_10_real_sample)))
 69 |     print("\n".join(top_10_res))
 70 | 
 71 | 
 72 | def get_distance_result(target_id, real_profiles, fake_profiles_list, method_name):
 73 |     k = ['target_id', 'attack_method', 'dis_TVD', 'dis_JS']
 74 |     v = [[], [], [], []]
 75 |     res_dis = []
 76 |     real_item_distribution = get_item_distribution(real_profiles)
 77 |     for idx in range(len(fake_profiles_list)):
 78 |         dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, get_item_distribution(fake_profiles_list[idx]))
 79 |         v[1] += [method_name[idx]]
 80 |         v[2] += [dis_TVD]
 81 |         v[3] += [dis_JS]
 82 |     v[0] = [target_id] * len(v[1])
 83 |     result = pd.DataFrame(dict(zip(k, v)))
 84 |     return result
 85 | 
 86 | 
 87 | def profiles_generator(target_id, dataset_class, attack_info, bandwagon_selected, sample_num, args, real_profiles,
 88 |                        filler_indicator, pre_fix, has_G=False):
 89 |     # baseline fake profiles
 90 |     baseline_methods = ["segment", "average", "random", "bandwagon"]
 91 |     baseline_fake_profiles = []
 92 |     for attack_method in baseline_methods:
 93 |         attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
 94 |         fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
 95 |                                         bandwagon_selected, filler_indicator)
 96 |         baseline_fake_profiles.append(fake_profiles)
 97 | 
 98 |     for attack_method in baseline_methods:
 99 |         attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
100 |         fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
101 |                                         bandwagon_selected, None)
102 |         baseline_fake_profiles.append(fake_profiles)
103 |     baseline_methods = baseline_methods + [i + '_rand' for i in baseline_methods]
104 | 
105 |     final_attack_setting = [sample_num, real_profiles, filler_indicator]
106 |     # new_baseline
107 |     if has_G:
108 |         for attack_method in ['G0' + pre_fix, 'G1' + pre_fix]:
109 |             baseline_methods.append(attack_method)
110 |             fake_profiles_G, _, _ = gan_attack_baseline(args.dataset, attack_method, target_id, False, 0,
111 |                                                         final_attack_setting=final_attack_setting)
112 |             baseline_fake_profiles.append(fake_profiles_G)
113 | 
114 |     # gan profiles
115 |     attack_method = "gan" + pre_fix
116 |     fake_profiles_gan, _, _ = gan_attack(args.dataset, attack_method, target_id, False, write_to_file=0,
117 |                                          final_attack_setting=final_attack_setting)
118 |     return fake_profiles_gan, baseline_fake_profiles, baseline_methods
119 | 
120 | 
121 | def parse_arg():
122 |     parser = argparse.ArgumentParser()
123 | 
124 |     parser.add_argument('--dataset', type=str, default='ml100k',
125 |                         help='input data_set_name,filmTrust or ml100k grocery')
126 | 
127 |     parser.add_argument('--attack_num', type=int, default=50,
128 |                         help='num of attack fake user,50 for ml100k and filmTrust')
129 | 
130 |     parser.add_argument('--filler_num', type=int, default=90,
131 |                         help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
132 |     # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5
133 |     # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5
134 |     parser.add_argument('--targets', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373',
135 |                         help='attack_targets')
136 |     parser.add_argument('--bandwagon_selected', type=str, default='180,99,49',
137 |                         help='180,99,49 for ml100k,103,98,115 for filmTrust')
138 |     #
139 |     args = parser.parse_args()
140 |     #
141 |     args.targets = list(map(int, args.targets.split(',')))
142 |     args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(',')))
143 |     return args
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     """
148 |     step1 - load data
149 |     step2 - 
150 |     step3 - 
151 |     """
152 | 
153 |     #
154 |     """parse args"""
155 |     args = parse_arg()
156 |     pre_fix = '_' + str(args.attack_num) + '_' + str(args.filler_num)
157 | 
158 |     """step1 - load data"""
159 |     path_train = "../data/data/" + args.dataset + "_train.dat"
160 |     path_test = "../data/data/" + args.dataset + "_test.dat"
161 |     attack_info_path = ["../data/data/" + args.dataset + "_selected_items",
162 |                         "../data/data/" + args.dataset + "_target_users"]
163 |     dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
164 |                               sep='\t', print_log=False)
165 |     attack_info = load_attack_info(*attack_info_path)
166 | 
167 |     sample_num = dataset_class.n_users
168 |     result = None
169 |     for target_id in args.targets:
170 |         selected = attack_info[target_id][0]
171 |         """step2.1 - real_profiles"""
172 |         gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
173 |                                           selected_id_list=selected, filler_num=args.filler_num,
174 |                                           attack_num=args.attack_num, filler_method=0)
175 |         _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no',
176 |                                                                   final_attack_setting=[sample_num, None, None])
177 |         """step2.2 - """
178 | 
179 |         dir = None
180 |         fake_profiles_list = []
181 |         method_list = []
182 |         for attack_method in ['IAutoRec', 'UAutoRec', 'NNMF', 'NMF_25']:
183 |             path_dcgan = dir + 'D-%s-ml100k\\ml100k_%d_dcgan_50_90.dat' % (attack_method, target_id)
184 |             dataset_class_dcgan = load_data(path_train=path_dcgan, path_test=path_test,
185 |                                             header=['user_id', 'item_id', 'rating'],
186 |                                             sep='\t', print_log=False)
187 |             fake_profiles_ = dataset_class_dcgan.train_matrix.toarray()[dataset_class.n_users:]
188 |             while fake_profiles_.shape[0] < dataset_class.n_users:
189 |                 fake_profiles_ = np.concatenate([fake_profiles_, fake_profiles_])
190 |             fake_profiles_ = fake_profiles_[:dataset_class.n_users]
191 | 
192 |             path_wgan = dir + 'W-%s-ml100k\\ml100k_%d_wgan_50_90.dat' % (attack_method, target_id)
193 |             dataset_class_dcgan = load_data(path_train=path_dcgan, path_test=path_test,
194 |                                             header=['user_id', 'item_id', 'rating'],
195 |                                             sep='\t', print_log=False)
196 |             fake_profiles_w = dataset_class_dcgan.train_matrix.toarray()[dataset_class.n_users:]
197 |             while fake_profiles_w.shape[0] < dataset_class.n_users:
198 |                 fake_profiles_w = np.concatenate([fake_profiles_w, fake_profiles_w])
199 |             fake_profiles_w = fake_profiles_w[:dataset_class.n_users]
200 |             #
201 |             fake_profiles_list += [fake_profiles_, fake_profiles_w]
202 |             method_list += ['dcgan', 'wgan']
203 |         """step3 """
204 |         result_ = get_distance_result(target_id, real_profiles, fake_profiles_list, method_list)
205 |         result = result_ if result is None else pd.concat([result, result_])
206 |     print(result)
207 |     result.groupby('attack_method').mean().to_excel(args.dataset + '_distance_new.xls', index=False)
208 | 


--------------------------------------------------------------------------------
/AUSH/test_main/main_eval_similarity.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time       : 2019/8/25 19:38
  3 | # @Author     : chensi
  4 | # @File       : main_eval_similarity.py
  5 | # @Software   : PyCharm
  6 | # @Desciption : None
  7 | 
  8 | import numpy as np
  9 | from numpy.linalg import *
 10 | import scipy.stats
 11 | import sys, os, argparse
 12 | import pandas as pd
 13 | 
 14 | sys.path.append("../")
 15 | from test_main.main_baseline_attack import baseline_attack
 16 | from test_main.main_gan_attack import gan_attack
 17 | from test_main.main_gan_attack_baseline import gan_attack as gan_attack_baseline
 18 | from utils.load_data.load_data import *
 19 | from utils.load_data.load_attack_info import load_attack_info
 20 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
 21 | 
 22 | 
 23 | def eval_eigen_value(profiles):
 24 |     U_T_U = np.dot(profiles.transpose(), profiles)
 25 |     eig_val, _ = eig(U_T_U)
 26 |     top_10 = [i.real for i in eig_val[:10]]
 27 |     return top_10
 28 | 
 29 | 
 30 | def get_item_distribution(profiles):
 31 |     # [min(max(0, round(i)), 5) for i in a]
 32 |     profiles_T = profiles.transpose()
 33 |     fn_count = lambda item_vec: np.array(
 34 |         [sum([1 if (min(max(0, round(j)), 5) == i) else 0 for j in item_vec]) for i in range(6)])
 35 |     fn_norm = lambda item_vec: item_vec / sum(item_vec)
 36 |     item_distribution = np.array(list(map(fn_count, profiles_T)))
 37 |     item_distribution = np.array(list(map(fn_norm, item_distribution)))
 38 |     return item_distribution
 39 | 
 40 | 
 41 | def eval_TVD_JS(P, Q):
 42 |     # TVD
 43 |     dis_TVD = np.mean(np.sum(np.abs(P - Q) / 2, 1))
 44 |     # JS
 45 |     fn_KL = lambda p, q: scipy.stats.entropy(p, q)
 46 |     M = (P + Q) / 2
 47 |     js_vec = []
 48 |     for iid in range(P.shape[0]):
 49 |         p, q, m = P[iid], Q[iid], M[iid]
 50 |         js_vec.append((fn_KL(p, m) + fn_KL(q, m)) / 2)
 51 |     dis_JS = np.mean(np.array(js_vec))
 52 |     return dis_TVD, dis_JS
 53 | 
 54 | 
 55 | def print_eigen_result(real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods):
 56 |     top_10_res = []
 57 |     top_10_real = eval_eigen_value(real_profiles)
 58 |     top_10_res.append("real\t" + '\t'.join(map(str, top_10_real)))
 59 |     top_10_baseline = []
 60 |     for idx in range(len(baseline_fake_profiles)):
 61 |         top_10_baseline.append(eval_eigen_value(baseline_fake_profiles[idx]))
 62 |         top_10_res.append(baseline_methods[idx] + "\t" + '\t'.join(map(str, top_10_baseline[-1])))
 63 |     top_10_gan = eval_eigen_value(fake_profiles_gan)
 64 |     # top_10_sample_5 = eval_eigen_value(fake_profiles_sample_5)
 65 |     # top_10_real_sample = eval_eigen_value(real_profiles_gan)
 66 |     top_10_res.append("gan\t" + '\t'.join(map(str, top_10_gan)))
 67 |     # top_10_res.append("sample_5\t" + '\t'.join(map(str, top_10_sample_5)))
 68 |     # top_10_res.append("real_sample\t" + '\t'.join(map(str, top_10_real_sample)))
 69 |     print("\n".join(top_10_res))
 70 | 
 71 | 
 72 | def get_distance_result(target_id, real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods):
 73 |     k = ['target_id', 'attack_method', 'dis_TVD', 'dis_JS']
 74 |     v = [[], [], [], []]
 75 |     res_dis = []
 76 |     real_item_distribution = get_item_distribution(real_profiles)
 77 |     # real_gan_item_distribution = get_item_distribution(real_profiles_gan)
 78 |     fake_gan_distribution = get_item_distribution(fake_profiles_gan)
 79 |     # fake_sample_5_distribution = get_item_distribution(fake_profiles_sample_5)
 80 |     # dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, real_gan_item_distribution)
 81 |     # res_dis.append('\t'.join(map(str, ["real", "real_gan", dis_TVD, dis_JS])))
 82 |     # dis_TVD, dis_JS = eval_TVD_JS(real_gan_item_distribution, fake_gan_distribution)
 83 |     # res_dis.append('\t'.join(map(str, ["real_gan", "gan", dis_TVD, dis_JS])))
 84 |     # dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, fake_sample_5_distribution)
 85 |     # res_dis.append('\t'.join(map(str, ["real", "sample_5", dis_TVD, dis_JS])))
 86 |     # dis_TVD, dis_JS = eval_TVD_JS(real_gan_item_distribution, fake_sample_5_distribution)
 87 |     # res_dis.append('\t'.join(map(str, ["real_gan", "sample_5", dis_TVD, dis_JS])))
 88 |     dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, fake_gan_distribution)
 89 |     v[1] += ['gan']
 90 |     v[2] += [dis_TVD]
 91 |     v[3] += [dis_JS]
 92 |     # res_dis.append('\t'.join(map(str, [target_id, "gan", dis_TVD, dis_JS])))
 93 |     for idx in range(len(baseline_fake_profiles)):
 94 |         dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, get_item_distribution(baseline_fake_profiles[idx]))
 95 |         v[1] += [baseline_methods[idx]]
 96 |         v[2] += [dis_TVD]
 97 |         v[3] += [dis_JS]
 98 |         # res_dis.append('\t'.join(map(str, [target_id, baseline_methods[idx], dis_TVD, dis_JS])))
 99 |     v[0] = [target_id] * len(v[1])
100 |     result = pd.DataFrame(dict(zip(k, v)))
101 |     # print('\n'.join(res_dis))
102 |     return result
103 | 
104 | 
105 | def profiles_generator(target_id, dataset_class, attack_info, bandwagon_selected, sample_num, args, real_profiles,
106 |                        filler_indicator, pre_fix, has_G=False):
107 |     # baseline fake profiles
108 |     baseline_methods = ["segment", "average", "random", "bandwagon"]
109 |     baseline_fake_profiles = []
110 |     for attack_method in baseline_methods:
111 |         attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
112 |         fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
113 |                                         bandwagon_selected, filler_indicator)
114 |         baseline_fake_profiles.append(fake_profiles)
115 | 
116 |     for attack_method in baseline_methods:
117 |         attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
118 |         fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
119 |                                         bandwagon_selected, None)
120 |         baseline_fake_profiles.append(fake_profiles)
121 |     baseline_methods = baseline_methods + [i + '_rand' for i in baseline_methods]
122 | 
123 |     final_attack_setting = [sample_num, real_profiles, filler_indicator]
124 |     # new_baseline
125 |     if has_G:
126 |         for attack_method in ['G0' + pre_fix, 'G1' + pre_fix]:
127 |             baseline_methods.append(attack_method)
128 |             fake_profiles_G, _, _ = gan_attack_baseline(args.dataset, attack_method, target_id, False, 0,
129 |                                                         final_attack_setting=final_attack_setting)
130 |             baseline_fake_profiles.append(fake_profiles_G)
131 | 
132 |     # gan profiles
133 |     attack_method = "gan" + pre_fix
134 |     fake_profiles_gan, _, _ = gan_attack(args.dataset, attack_method, target_id, False, write_to_file=0,
135 |                                          final_attack_setting=final_attack_setting)
136 |     return fake_profiles_gan, baseline_fake_profiles, baseline_methods
137 | 
138 | 
139 | def parse_arg():
140 |     parser = argparse.ArgumentParser()
141 | 
142 |     parser.add_argument('--dataset', type=str, default='ml100k',
143 |                         help='input data_set_name,filmTrust or ml100k grocery')
144 | 
145 |     parser.add_argument('--attack_num', type=int, default=50,
146 |                         help='num of attack fake user,50 for ml100k and filmTrust')
147 | 
148 |     parser.add_argument('--filler_num', type=int, default=90,
149 |                         help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
150 |     # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5
151 |     # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5
152 |     parser.add_argument('--targets', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373', help='attack_targets')
153 |     parser.add_argument('--bandwagon_selected', type=str, default='180,99,49',
154 |                         help='180,99,49 for ml100k,103,98,115 for filmTrust')
155 |     #
156 |     args = parser.parse_args()
157 |     #
158 |     args.targets = list(map(int, args.targets.split(',')))
159 |     args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(',')))
160 |     return args
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     """
165 |     step1 - load data
166 |     step2 - 
167 |     step3 - 
168 |     """
169 | 
170 |     #
171 |     """parse args"""
172 |     args = parse_arg()
173 |     pre_fix = '_' + str(args.attack_num) + '_' + str(args.filler_num)
174 | 
175 |     """step1 - load data"""
176 |     path_train = "../data/data/" + args.dataset + "_train.dat"
177 |     path_test = "../data/data/" + args.dataset + "_test.dat"
178 |     attack_info_path = ["../data/data/" + args.dataset + "_selected_items",
179 |                         "../data/data/" + args.dataset + "_target_users"]
180 |     dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
181 |                               sep='\t', print_log=False)
182 |     attack_info = load_attack_info(*attack_info_path)
183 | 
184 |     sample_num = dataset_class.n_users
185 |     result = None
186 |     for target_id in args.targets:
187 |         selected = attack_info[target_id][0]
188 | 
189 |         attackSetting_path = '_'.join(map(str, [args.dataset, sample_num, args.filler_num, target_id]))
190 |         attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
191 |         gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
192 |                                           selected_id_list=selected, filler_num=args.filler_num,
193 |                                           attack_num=args.attack_num, filler_method=0)
194 |         _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no',
195 |                                                                   final_attack_setting=[sample_num, None, None])
196 |         np.save(attackSetting_path, [real_profiles, filler_indicator])
197 | 
198 |         fake_profiles_gan, baseline_fake_profiles, baseline_methods \
199 |             = profiles_generator(target_id, dataset_class, attack_info, args.bandwagon_selected, sample_num, args,
200 |                                  real_profiles, filler_indicator, pre_fix, has_G=True)
201 | 
202 | 
203 |         # result_ = get_distance_result(target_id, real_profiles, fake_profiles_gan, baseline_fake_profiles,
204 |         #                               baseline_methods)
205 |         result_ = get_distance_result(target_id, dataset_class.train_matrix.toarray(), fake_profiles_gan,
206 |                                       baseline_fake_profiles,
207 |                                       baseline_methods)
208 | 
209 |         result = result_ if result is None else pd.concat([result, result_])
210 |     print(result)
211 |     result.to_excel(args.dataset + '_distance_lianyun.xls', index=False)
212 | 


--------------------------------------------------------------------------------
/AUSH/test_main/WGAN_yangqian.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import sys
  5 | import math
  6 | sys.path.append("../")
  7 | from tensorflow.python.framework import ops
  8 | from six.moves import xrange
  9 | from utils.load_data.load_data import load_data
 10 | from utils.load_data.load_attack_info import load_attack_info
 11 | import utils as ut
 12 | 
 13 | 
 14 | if "concat_v2" in dir(tf):
 15 |     def concat(tensors, axis, *args, **kwargs):
 16 |         return tf.concat_v2(tensors, axis, *args, **kwargs)
 17 | else:
 18 |     def concat(tensors, axis, *args, **kwargs):
 19 |         return tf.concat(tensors, axis, *args, **kwargs)
 20 | 
 21 | 
 22 | class batch_norm(object):
 23 |     def __init__(self, epsilon=1e-5, momentum=0.9, name="batch_norm"):
 24 |         with tf.variable_scope(name):
 25 |             self.epsilon = epsilon
 26 |             self.momentum = momentum
 27 |             self.name = name
 28 | 
 29 |     def __call__(self, x, train=True):
 30 |         return tf.contrib.layers.batch_norm(x,
 31 |                                             decay=self.momentum,
 32 |                                             updates_collections=None,
 33 |                                             epsilon=self.epsilon,
 34 |                                             scale=True,
 35 |                                             is_training=train,
 36 |                                             scope=self.name)
 37 | 
 38 | 
 39 | def conv_cond_concat(x, y):
 40 |     """Concatenate conditioning vector on feature map axis."""
 41 |     x_shapes = x.get_shape()
 42 |     y_shapes = y.get_shape()
 43 |     return concat([
 44 |         x, y * tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
 45 | 
 46 | 
 47 | def conv2d(input_, output_dim,
 48 |            k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
 49 |            name="conv2d"):
 50 |     with tf.variable_scope(name):
 51 |         w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
 52 |                             initializer=tf.truncated_normal_initializer(stddev=stddev))
 53 |         conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')
 54 | 
 55 |         biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
 56 |         conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
 57 | 
 58 |         return conv
 59 | 
 60 | 
 61 | # kernel_size = 5 * 5
 62 | def deconv2d(input_, output_shape,
 63 |              k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
 64 |              name="deconv2d", with_w=False):
 65 |     with tf.variable_scope(name):
 66 |         # filter : [height, width, output_channels, in_channels]
 67 |         w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
 68 |                             initializer=tf.random_normal_initializer(stddev=stddev))
 69 | 
 70 |         try:
 71 |             deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
 72 |                                             strides=[1, d_h, d_w, 1])
 73 | 
 74 |         # Support for verisons of TensorFlow before 0.7.0
 75 |         except AttributeError:
 76 |             deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
 77 |                                     strides=[1, d_h, d_w, 1])
 78 | 
 79 |         biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
 80 |         deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
 81 | 
 82 |         if with_w:
 83 |             return deconv, w, biases
 84 |         else:
 85 |             return deconv
 86 | 
 87 | 
 88 | def lrelu(x, leak=0.2, name="lrelu"):
 89 |     return tf.maximum(x, leak * x)
 90 | 
 91 | 
 92 | def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
 93 |     shape = input_.get_shape().as_list()
 94 | 
 95 |     with tf.variable_scope(scope or "Linear"):
 96 |         try:
 97 |             matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
 98 |                                      tf.random_normal_initializer(stddev=stddev))
 99 |         except ValueError as err:
100 |             msg = "NOTE: Usually, this is due to an issue with the image dimensions.  Did you correctly set '--crop' or '--input_height' or '--output_height'?"
101 |             err.args = err.args + (msg,)
102 |             raise
103 |         bias = tf.get_variable("bias", [output_size],
104 |                                initializer=tf.constant_initializer(bias_start))
105 |         if with_w:
106 |             return tf.matmul(input_, matrix) + bias, matrix, bias
107 |         else:
108 |             return tf.matmul(input_, matrix) + bias
109 | 
110 | 
111 | def conv_out_size_same(size, stride):
112 |     return int(math.ceil(float(size) / float(stride)))
113 | 
114 | 
115 | def gen_random(size):
116 |     # z - N(0,100)
117 |     return np.random.normal(0, 100, size=size)
118 | 
119 | 
120 | class WGAN(object):
121 |     def __init__(self, sess, dataset_class,batch_size=64, height=29, width=58, z_dim=100, gf_dim=64, df_dim=64,
122 |                  gfc_dim=1024, dfc_dim=1024, max_to_keep=1):
123 |         self.sess = sess
124 |         self.dataset_class = dataset_class
125 |         self.batch_size = batch_size
126 | 
127 |         self.height = height
128 |         self.width = width
129 |         self.z_dim = z_dim
130 |         self.gf_dim = gf_dim
131 |         self.df_dim = df_dim
132 |         self.gfc_dim = gfc_dim
133 |         self.dfc_dim = dfc_dim
134 |         # batch normalization : deals with poor initialization helps gradient flow
135 |         self.d_bn1 = batch_norm(name='d_bn1')
136 |         self.d_bn2 = batch_norm(name='d_bn2')
137 |         self.d_bn3 = batch_norm(name='d_bn3')
138 |         self.g_bn0 = batch_norm(name='g_bn0')
139 |         self.g_bn1 = batch_norm(name='g_bn1')
140 |         self.g_bn2 = batch_norm(name='g_bn2')
141 |         self.g_bn3 = batch_norm(name='g_bn3')
142 | 
143 |         self.max_to_keep = max_to_keep
144 | 
145 |         self.build_model()
146 | 
147 |     def build_model(self):
148 |         self.inputs = tf.placeholder(tf.float32,
149 |                                      [self.batch_size, self.height, self.width, 1],
150 |                                      name='real_images')
151 |         inputs = self.inputs
152 | 
153 |         self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
154 |         self.G = self.generator(self.z)
155 | 
156 |         self.D, self.D_logits = self.discriminator(inputs, reuse=False)
157 |         self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)
158 | 
159 |         # def _cross_entropy_loss(self, logits, labels):
160 |         #     xentropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits, labels))
161 |         #     return xentropy
162 |         self.d_loss = tf.reduce_mean(tf.square(self.D_logits - self.D_logits_))
163 |         self.g_loss = tf.reduce_mean(tf.square(self.D_logits_))
164 |         # self.d_loss_real = tf.reduce_mean(
165 |         #     _cross_entropy_loss(self.D_logits, tf.ones_like(self.D)))
166 |         # self.d_loss_fake = tf.reduce_mean(
167 |         #     _cross_entropy_loss(self.D_logits_, tf.zeros_like(self.D_)))
168 |         #
169 |         # self.g_loss = tf.reduce_mean(
170 |         #     _cross_entropy_loss(self.D_logits_, tf.ones_like(self.D_)))
171 |         # self.d_loss = self.d_loss_real + self.d_loss_fake
172 |         #
173 |         t_vars = tf.trainable_variables()
174 |         self.d_vars = [var for var in t_vars if 'd_' in var.name]
175 |         self.g_vars = [var for var in t_vars if 'g_' in var.name]
176 | 
177 |         self.saver = tf.train.Saver(max_to_keep=self.max_to_keep)
178 | 
179 |     def train(self, config):
180 |         d_optim = tf.train.RMSPropOptimizer(config.learning_rate, decay=config.beta1) \
181 |             .minimize(self.d_loss, var_list=self.d_vars)
182 |         g_optim =tf.train.RMSPropOptimizer(config.learning_rate, decay=config.beta1) \
183 |             .minimize(self.g_loss, var_list=self.g_vars)
184 |         try:
185 |             tf.global_variables_initializer().run()
186 |         except:
187 |             tf.initialize_all_variables().run()
188 |         train_idxs = list(range(self.dataset_class.train_matrix.shape[0]))
189 |         for epoch in xrange(config.epoch):
190 |             np.random.shuffle(train_idxs)
191 |             for i in range(len(train_idxs) // self.batch_size):
192 |                 cur_idxs = train_idxs[i * self.batch_size:(i + 1) * self.batch_size]
193 |                 batch_inputs = self.dataset_class.train_matrix[cur_idxs].toarray()
194 |                 # transform range&shape
195 |                 batch_inputs = (batch_inputs - 2.5) / 2.5
196 |                 batch_inputs = np.reshape(batch_inputs, [self.batch_size, self.height, self.width, 1])
197 |                 # batch_inputs = np.random.random_sample([self.batch_size, self.height, self.width, 1])
198 |                 batch_z = gen_random(size=[config.batch_size, self.z_dim]).astype(np.float32)
199 | 
200 |                 # Update D network
201 |                 _ = self.sess.run(d_optim, feed_dict={self.inputs: batch_inputs, self.z: batch_z})
202 | 
203 |                 # Update G network
204 |                 _ = self.sess.run(g_optim, feed_dict={self.z: batch_z})
205 | 
206 |                 # Run g_optim twice to make sure that d_loss does not go to zero (different from paper)
207 | 
208 |                 errD= self.d_loss.eval({self.inputs: batch_inputs,self.z: batch_z})
209 |                 # errD_real = self.d_loss_real.eval({self.inputs: batch_inputs})
210 |                 errG = self.g_loss.eval({self.z: batch_z})
211 | 
212 |                 print("Epoch:[%2d/%2d]d_loss: %.8f, g_loss: %.8f" \
213 |                       % (epoch, config.epoch, errD, errG))
214 | 
215 |     def discriminator(self, image, reuse=False):
216 |         with tf.variable_scope("discriminator") as scope:
217 |             if reuse:
218 |                 scope.reuse_variables()
219 |             # [conv+BN+LeakyRelu[64,128,256,512]]+[FC]+[sigmoid]
220 |             h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
221 |             h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim * 2, name='d_h1_conv')))
222 |             h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim * 4, name='d_h2_conv')))
223 |             h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim * 8, name='d_h3_conv')))
224 |             h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h4_lin')
225 | 
226 |             return tf.nn.sigmoid(h4), h4
227 | 
228 |     def generator(self, z):
229 |         with tf.variable_scope("generator") as scope:
230 |             s_h, s_w = self.height, self.width
231 |             # CONV stride=2
232 |             s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
233 |             s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
234 |             s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
235 |             s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)
236 | 
237 |             # FC of 2*4*512&ReLU&BN
238 |             self.z_, self.h0_w, self.h0_b = linear(
239 |                 z, self.gf_dim * 8 * s_h16 * s_w16, 'g_h0_lin', with_w=True)
240 |             self.h0 = tf.reshape(
241 |                 self.z_, [-1, s_h16, s_w16, self.gf_dim * 8])
242 |             h0 = tf.nn.relu(self.g_bn0(self.h0))
243 | 
244 |             # four transposed CONV of [256,128,64] &ReLU&BN&kernel_size = 5 * 5
245 |             self.h1, self.h1_w, self.h1_b = deconv2d(
246 |                 h0, [self.batch_size, s_h8, s_w8, self.gf_dim * 4], name='g_h1', with_w=True)
247 |             h1 = tf.nn.relu(self.g_bn1(self.h1))
248 |             h2, self.h2_w, self.h2_b = deconv2d(
249 |                 h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2], name='g_h2', with_w=True)
250 |             h2 = tf.nn.relu(self.g_bn2(h2))
251 |             h3, self.h3_w, self.h3_b = deconv2d(
252 |                 h2, [self.batch_size, s_h2, s_w2, self.gf_dim * 1], name='g_h3', with_w=True)
253 |             h3 = tf.nn.relu(self.g_bn3(h3))
254 | 
255 |             # transposed CONV of [1] &tanh
256 |             h4, self.h4_w, self.h4_b = deconv2d(
257 |                 h3, [self.batch_size, s_h, s_w, 1], name='g_h4', with_w=True)
258 | 
259 |             return tf.nn.tanh(h4)


--------------------------------------------------------------------------------