├── Leg-UP ├── models │ ├── detector │ │ └── SDLib │ │ │ ├── data │ │ │ ├── __init__.py │ │ │ ├── social.py │ │ │ └── rating.py │ │ │ ├── main │ │ │ ├── __init__.py │ │ │ ├── id_plus_1.py │ │ │ ├── SDLib.py │ │ │ ├── plot.py │ │ │ └── main.py │ │ │ ├── method │ │ │ ├── __init__.py │ │ │ └── FAP.py │ │ │ ├── tool │ │ │ ├── __init__.py │ │ │ ├── dataSplit.py │ │ │ ├── config.py │ │ │ ├── plot.py │ │ │ ├── qmath.py │ │ │ └── file.py │ │ │ ├── baseclass │ │ │ ├── __init__.py │ │ │ ├── SSDetection.py │ │ │ └── SDetection.py │ │ │ └── __init__.py │ └── attacker │ │ └── __init__.py ├── main.py ├── utils │ ├── loss.py │ ├── data_loader.py │ └── utils.py ├── run.sh ├── execute_model.py ├── README.md ├── run.py └── preprocess_data.py ├── AUSH ├── model │ ├── __init__.py │ ├── attack_model │ │ ├── AttackModel.py │ │ ├── gan_attack │ │ │ ├── __init__.py │ │ │ └── models.py │ │ ├── baseline.py │ │ └── gan_attack_copy │ │ │ └── models.py │ ├── trainer_rec.py │ ├── trainer_rec_surprise.py │ └── nnmf.py ├── utils │ ├── __init__.py │ ├── attack │ │ ├── __init__.py │ │ └── data_to_file.py │ └── load_data │ │ ├── __init__.py │ │ ├── load_attack_info.py │ │ └── load_data.py ├── test_main │ ├── __init__.py │ ├── example.sh │ ├── result_reporter.py │ ├── main_train_rec.py │ ├── main_gan_attack.py │ ├── main_gan_attack_baseline.py │ ├── main_eval_attack.py │ ├── data_preprocess.py │ ├── main_baseline_attack.py │ ├── dcgan.py │ ├── main_eval_similarity_foryangqian.py │ ├── main_eval_similarity.py │ └── WGAN_yangqian.py └── README.md ├── data ├── automotive │ ├── automotive_selected_items │ └── automotive_target_users ├── ml100k │ ├── ml100k_selected_items │ └── ml100k_target_users └── filmTrust │ └── filmTrust_selected_items ├── README.md └── .gitignore /Leg-UP/models/detector/SDLib/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/main/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/method/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/tool/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/baseclass/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AUSH/model/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 22:23 3 | # @Author : chensi 4 | # @File : __init__.py.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /AUSH/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 22:24 3 | # @Author : chensi 4 | # @File : __init__.py.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /AUSH/test_main/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 22:24 3 | # @Author : chensi 4 | # @File : __init__.py.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /AUSH/utils/attack/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/5/31 10:37 3 | # @Author : chensi 4 | # @File : __init__.py.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /AUSH/utils/load_data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/4/30 17:37 3 | # @Author : chensi 4 | # @File : __init__.py.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /Leg-UP/models/attacker/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2021/03/20 09:21 3 | # @Author : chensi 4 | # @File : __init__.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /AUSH/model/attack_model/AttackModel.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/9/20 14:23 3 | # @Author : chensi 4 | # @File : attack_model.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/12/3 15:52 3 | # @Author : chensi 4 | # @File : __init__.py.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /AUSH/model/attack_model/gan_attack/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/24 10:41 3 | # @Author : chensi 4 | # @File : __init__.py.py 5 | # @Software : PyCharm 6 | # @Desciption : None -------------------------------------------------------------------------------- /data/automotive/automotive_selected_items: -------------------------------------------------------------------------------- 1 | 22 866 2 | 88 1141 3 | 119 681 4 | 122 1656 5 | 339 177 6 | 422 477 7 | 477 1012 8 | 594 1141 9 | 866 1198 10 | 884 1656 11 | 1089 866 12 | 1141 866 13 | 1431 705 14 | 1593 1089 15 | 1656 1089 -------------------------------------------------------------------------------- /data/ml100k/ml100k_selected_items: -------------------------------------------------------------------------------- 1 | 1257 171,49,180 2 | 1419 203,167,172 3 | 785 171,49,180 4 | 1077 0,131,422 5 | 62 167,172,237 6 | 1319 97,99,55 7 | 1612 171,49,180 8 | 1509 11,99,55 9 | 1545 97,99,55 10 | 1373 203,167,172 11 | 690 27,78,227 -------------------------------------------------------------------------------- /Leg-UP/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/11/29 19:21 3 | # @Author : chensi 4 | # @File : main.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | # from utils.evaluator import * 9 | from models.attacker.aushplus import * 10 | model = AUSHplus() 11 | model.execute() 12 | -------------------------------------------------------------------------------- /AUSH/test_main/example.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for target_id in 5 395 181 565 254 601 623 619 64 558 4 | do 5 | for rec_model_name in IAUtoRec UAUtoRec NNMF NMF_25 6 | do 7 | python main_eval_attack.py --dataset filmTrust --rec_model_name $rec_model_name --attack_method G0 --target_id $target_id --attack_num 50 --filler_num 36 >> filmTrust_result_G0 8 | #nohup python main_gan_attack_baseline.py --dataset filmTrust --target_id 5 --attack_num 50 --filler_num 36 --loss 0 >> G0_log 2>&1 & 9 | done 10 | done -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/baseclass/SSDetection.py: -------------------------------------------------------------------------------- 1 | from SDetection import SDetection 2 | from data.social import SocialDAO 3 | from tool.config import Config,LineConfig 4 | from os.path import abspath 5 | from time import strftime,localtime,time 6 | from tool.file import FileIO 7 | from sklearn.metrics import classification_report 8 | class SSDetection(SDetection): 9 | 10 | def __init__(self,conf,trainingSet=None,testSet=None,labels=None,relation=list(),fold='[1]'): 11 | super(SSDetection, self).__init__(conf,trainingSet,testSet,labels,fold) 12 | self.sao = SocialDAO(self.config, relation) # social relations access control 13 | -------------------------------------------------------------------------------- /data/filmTrust/filmTrust_selected_items: -------------------------------------------------------------------------------- 1 | 29 83,98,110 2 | 5 98,118,112 3 | 395 118,110,119 4 | 380 98,83,118 5 | 198 118,98,83 6 | 576 98,118,112 7 | 228 83,98,119 8 | 181 118,112,98 9 | 442 99,2,84 10 | 310 119,118,110 11 | 703 98,99,114 12 | 307 83,118,98 13 | 370 113,114,99 14 | 449 113,115,82 15 | 2 112,103,98 16 | 565 110,119,118 17 | 664 98,99,114 18 | 539 98,118,112 19 | 515 99,114,98 20 | 254 98,83,119 21 | 215 118,83,98 22 | 40 118,119,110 23 | 601 119,83,118 24 | 623 98,118,83 25 | 266 110,99,83 26 | 619 118,83,98 27 | 648 113,114,99 28 | 640 118,83,98 29 | 451 114,99,98 30 | 64 98,83,118 31 | 655 98,119,83 32 | 558 98,83,118 33 | 553 119,110,118 34 | 183 114,98,99 35 | 200 110,119,118 36 | 264 98,114,99 37 | 674 98,83,118 38 | 295 83,119,110 39 | 629 98,114,99 40 | 711 83,98,118 -------------------------------------------------------------------------------- /AUSH/utils/load_data/load_attack_info.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 11:53 3 | # @Author : chensi 4 | # @File : load_attack_info.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | def load_attack_info(seletced_item_path, target_user_path): 9 | attack_info = {} 10 | with open(seletced_item_path, "r") as fin: 11 | for line in fin: 12 | line = line.strip("\n").split("\t") 13 | target_item, selected_items = int(line[0]), list(map(int, line[1].split(","))) 14 | attack_info[target_item] = [selected_items] 15 | with open(target_user_path, "r") as fin: 16 | for line in fin: 17 | line = line.strip("\n").split("\t") 18 | target_item, target_users = int(line[0]), list(map(int, line[1].split(","))) 19 | attack_info[target_item].append(target_users) 20 | return attack_info 21 | 22 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/main/id_plus_1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/29 21:51 3 | # @Author : chensi 4 | # @File : id_plus_1.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import os 12 | 13 | conf_path = '../config/FAP.conf' 14 | 15 | # random_target = [62, 1077, 785, 1419, 1257] 16 | # tail_target = [1319, 1612, 1509, 1545, 1373] 17 | # targets = random_target + tail_target 18 | random = [155, 383, 920, 941, 892] 19 | tail = [1480, 844, 1202, 1301, 2035] 20 | targets = random + tail 21 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"] 22 | for iid in targets: 23 | for attack_method in attack_methods: 24 | path = "../dataset/GAN/ciao/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat" 25 | names = ['userID', 'movieID', 'movieRating'] 26 | data_df = pd.read_csv(path, sep='\t', names=names, engine='python') 27 | data_df.userID += 1 28 | data_df.movieID += 1 29 | dst_path = "../dataset/GAN/ciao_1/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat" 30 | data_df.to_csv(dst_path, index=False, sep='\t', header=False) 31 | -------------------------------------------------------------------------------- /AUSH/utils/attack/data_to_file.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 21:17 3 | # @Author : chensi 4 | # @File : data_to_file.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import os 9 | import shutil 10 | 11 | 12 | def attacked_file_writer(clean_path, attacked_path, fake_profiles, n_users_ori): 13 | data_to_write = "" 14 | i = 0 15 | for fake_profile in fake_profiles: 16 | injected_iid = fake_profile.nonzero()[0] 17 | injected_rating = fake_profile[injected_iid] 18 | data_to_write += ('\n'.join( 19 | map(lambda x: '\t'.join(map(str, [n_users_ori + i] + list(x))), zip(injected_iid, injected_rating))) + '\n') 20 | i += 1 21 | if os.path.exists(attacked_path): os.remove(attacked_path) 22 | shutil.copyfile(clean_path, attacked_path) 23 | with open(attacked_path, 'a+')as fout: 24 | fout.write(data_to_write) 25 | 26 | 27 | def target_prediction_writer(predictions, hit_ratios, dst_path): 28 | # uid - rating - HR 29 | data_to_write = [] 30 | for uid in range(len(predictions)): 31 | data_to_write.append('\t'.join(map(str, [uid, predictions[uid]] + hit_ratios[uid]))) 32 | with open(dst_path, 'w')as fout: 33 | fout.write('\n'.join(data_to_write)) 34 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/tool/dataSplit.py: -------------------------------------------------------------------------------- 1 | from random import random 2 | from models.detector.SDLib.tool.file import FileIO 3 | class DataSplit(object): 4 | 5 | def __init__(self): 6 | pass 7 | 8 | @staticmethod 9 | def dataSplit(data,test_ratio = 0.3,output=False,path='./',order=1): 10 | if test_ratio>=1 or test_ratio <=0: 11 | test_ratio = 0.3 12 | testSet = {} 13 | trainingSet = {} 14 | for user in data: 15 | if random() < test_ratio: 16 | testSet[user] = data[user].copy() 17 | else: 18 | trainingSet[user] = data[user].copy() 19 | 20 | if output: 21 | FileIO.writeFile(path,'testSet['+str(order)+']',testSet) 22 | FileIO.writeFile(path, 'trainingSet[' + str(order) + ']', trainingSet) 23 | return trainingSet,testSet 24 | 25 | @staticmethod 26 | def crossValidation(data,k,output=False,path='./',order=1): 27 | if k<=1 or k>10: 28 | k=3 29 | for i in range(k): 30 | trainingSet = {} 31 | testSet = {} 32 | for ind,user in enumerate(data): 33 | if ind%k == i: 34 | testSet[user] = data[user].copy() 35 | else: 36 | trainingSet[user] = data[user].copy() 37 | yield trainingSet,testSet 38 | 39 | 40 | -------------------------------------------------------------------------------- /AUSH/test_main/result_reporter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # author:ariaschen 4 | # datetime:2020/1/14 09:11 5 | # software: PyCharm 6 | 7 | # import itertools, gzip 8 | import pandas as pd 9 | 10 | 11 | columns = ['Rec_model', 'attack_method', 'target_id'] 12 | 13 | hr = ['HR_1', 'HR_3', 'HR_5', 'HR_10', 'HR_20', 'HR_50'] 14 | hr_ori = [i + '_ori' for i in hr] 15 | 16 | columns += [i + '_inseg' for i in ['shift'] + hr_ori + hr] 17 | 18 | columns += [i + '_all' for i in ['shift'] + hr_ori + hr] 19 | 20 | columns_r = [i + '_inseg' for i in ['shift'] + hr] + [i + '_all' for i in ['shift'] + hr] 21 | """""" 22 | # data = pd.read_excel('filmTrust_distance.xls') 23 | # data.groupby('attack_method').mean()[['dis_TVD','dis_JS']].to_excel('filmTrust_distance_avg.xls') 24 | 25 | # data = pd.read_excel('ml100k_performance_all.xls') 26 | # data = pd.read_excel('../result_ijcai/filmTrust_performance_all.xls') 27 | # data = pd.read_excel('../result_ijcai/ml100k_performance_all.xls') 28 | # data = pd.read_excel('office_performance_all.xls') 29 | data = pd.read_excel('automotive_performance_all.xls') 30 | data.columns = columns 31 | data = data[['Rec_model', 'attack_method', 'target_id', 'shift_inseg', 'HR_10_inseg', 'shift_all', 'HR_10_all']] 32 | # target_type_dict = dict( 33 | # zip([62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373], ['random'] * 5 + ['tail'] * 5)) 34 | # target_type_dict = dict(zip([5, 395, 181, 565, 254] + [601, 623, 619, 64, 558], ['random'] * 5 + ['tail'] * 5)) 35 | target_type_dict = dict(zip([1141, 1656, 477, 1089, 866] + [88, 22, 122, 339, 1431], ['random'] * 5 + ['tail'] * 5)) 36 | data['target_type'] = data.target_id.apply(lambda x: target_type_dict[x]) 37 | data['attack_method'] = data.attack_method.apply(lambda x: x.split('_')[0]) 38 | result = data.groupby(['Rec_model','attack_method', 'target_type']).mean()[['shift_all', 'HR_10_all']] 39 | result.to_excel('ml100k_performance_0119_sample_strategy.xlsx') 40 | exit() 41 | -------------------------------------------------------------------------------- /Leg-UP/utils/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from utils.utils import EPSILON 5 | 6 | __all__ = ["mse_loss", "mult_ce_loss", "binary_ce_loss", "kld_loss", 7 | "sampled_bce_loss", "sampled_cml_loss"] 8 | 9 | """Model training losses.""" 10 | bce_loss = torch.nn.BCELoss(reduction='none') 11 | 12 | 13 | def mse_loss(data, logits, weight): 14 | """Mean square error loss.""" 15 | weights = torch.ones_like(data) 16 | weights[data > 0] = weight 17 | res = weights * (data - logits) ** 2 18 | return res.sum(1) 19 | 20 | 21 | def mult_ce_loss(data, logits): 22 | """Multi-class cross-entropy loss.""" 23 | log_probs = F.log_softmax(logits, dim=-1) 24 | loss = -log_probs * data 25 | 26 | instance_data = data.sum(1) 27 | instance_loss = loss.sum(1) 28 | # Avoid divide by zeros. 29 | res = instance_loss / (instance_data + EPSILON) 30 | return res 31 | 32 | 33 | def binary_ce_loss(data, logits): 34 | """Binary-class cross-entropy loss.""" 35 | return bce_loss(torch.sigmoid(logits), data).mean(1) 36 | 37 | 38 | def kld_loss(mu, log_var): 39 | """KL-divergence.""" 40 | return -0.5 * torch.sum( 41 | 1 + log_var - mu.pow(2) - log_var.exp(), dim=1) 42 | 43 | 44 | def sampled_bce_loss(logits, n_negatives): 45 | """Binary-class cross-entropy loss with sampled negatives.""" 46 | pos_logits, neg_logits = torch.split(logits, [1, n_negatives], 1) 47 | data = torch.cat([ 48 | torch.ones_like(pos_logits), torch.zeros_like(neg_logits) 49 | ], 1) 50 | return bce_loss(torch.sigmoid(logits), data).mean(1) 51 | 52 | 53 | def sampled_cml_loss(distances, n_negatives, margin): 54 | """Hinge loss with sampled negatives.""" 55 | # Distances here are the negative euclidean distances. 56 | pos_distances, neg_distances = torch.split(-distances, [1, n_negatives], 1) 57 | neg_distances = neg_distances.min(1).values.unsqueeze(-1) 58 | res = pos_distances - neg_distances + margin 59 | res[res < 0] = 0 60 | return res.sum(1) 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Shilling Attacks against Recommender Systems 2 | 3 | This repository contains our implementations for Shilling Attacks against Recommender Systems. 4 | 5 | Folder structure: 6 | - `AUSH`: The implementation of AUSH used in our CIKM'20 paper [[ACM Library](https://dl.acm.org/doi/10.1145/3340531.3411884)] [[arXiv Preprint](https://arxiv.org/abs/2005.08164)]. 7 | - `Leg-UP`: The implementation of Leg-UP in our TNNLS'22 paper [[IEEE Xplore](https://ieeexplore.ieee.org/document/9806457)] [[arXiv Preprint](https://arxiv.org/abs/2206.11433)] and a unified framework for comparing Leg-UP with various attackers including AIA, DCGAN, WGAN, Random Attack, Average Attack, Segment Attack and Bandwagon Attack. 8 | - `data`: Recommendation datasets used in our experiments. 9 | 10 | See `README.md` in each folder for more details. 11 | 12 | Please kindly cite our papers if you find our implementations useful: 13 | 14 | > Chen Lin, Si Chen, Hui Li, Yanghua Xiao, Lianyun Li, and Qian Yang. 2020. Attacking Recommender Systems with Augmented User Profiles. In CIKM. 855–864. 15 | 16 | > Chen Lin, Si Chen, Meifang Zeng, Sheng Zhang, Min Gao, and Hui Li. 2022. Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles. In TNNLS. 17 | 18 | @inproceedings{Lin2020Attacking, 19 | author = {Chen Lin and 20 | Si Chen and 21 | Hui Li and 22 | Yanghua Xiao and 23 | Lianyun Li and 24 | Qian Yang}, 25 | title = {Attacking Recommender Systems with Augmented User Profiles}, 26 | booktitle = {{CIKM}}, 27 | pages = {855--864}, 28 | year = {2020} 29 | } 30 | 31 | 32 | @article{LinCZZGL22, 33 | author = {Chen Lin and 34 | Si Chen and 35 | Meifang Zeng and 36 | Sheng Zhang and 37 | Min Gao and 38 | Hui Li}, 39 | title = {Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles}, 40 | journal = {{IEEE} Trans. Neural Networks Learn. Syst.}, 41 | year = {2022} 42 | } -------------------------------------------------------------------------------- /Leg-UP/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #================================================= 4 | 5 | for target_id in 62 785 1077 1257 1419; do 6 | python run.py --data_set ml100k --target_ids $target_id --attacker_list AUSHplus >log_ml100k_$target_id 7 | done 8 | 9 | for target_id in 5 395 181 565 254; do 10 | python run.py --data_set filmTrust --target_ids $target_id --attacker_list AUSHplus >log_filmTrust_$target_id 11 | done 12 | 13 | for target_id in 119 422 594 884 1593; do 14 | python run.py --data_set automotive --target_ids $target_id --attacker_list AUSHplus >log_automotive_$target_id 15 | done 16 | #================================================= 17 | 18 | for attacker in AUSHplus AIA WGANAttacker DCGANAttacker RandomAttacker AverageAttacker BandwagonAttacker SegmentAttacker; do 19 | for target_id in 62 785 1077 1257 1419; do 20 | python run.py --data_set ml100k --target_ids $target_id --attacker_list $attacker >log_ml100k_$target_id"_"$attacker 21 | done 22 | 23 | for target_id in 5 395 181 565 254; do 24 | python run.py --data_set filmTrust --target_ids $target_id --attacker_list $attacker >log_filmTrust_$target_id"_"$attacker 25 | done 26 | 27 | for target_id in 119 422 594 884 1593; do 28 | python run.py --data_set automotive --target_ids $target_id --attacker_list $attacker >log_automotive_$target_id"_"$attacker 29 | done 30 | done 31 | 32 | #================================================= 33 | 34 | for attacker in AUSHplus_SR AUSHplus_woD AUSHplus_SF AUSHplus_inseg; do 35 | for target_id in 62 785 1077 1257 1419; do 36 | python run.py --data_set ml100k --target_ids $target_id --attacker_list $attacker >log_ml100k_$target_id"_"$attacker 37 | done 38 | 39 | for target_id in 5 395 181 565 254; do 40 | python run.py --data_set filmTrust --target_ids $target_id --attacker_list $attacker >log_filmTrust_$target_id"_"$attacker 41 | done 42 | 43 | for target_id in 119 422 594 884 1593; do 44 | python run.py --data_set automotive --target_ids $target_id --attacker_list $attacker >log_automotive_$target_id"_"$attacker 45 | done 46 | done 47 | 48 | #================================================= 49 | -------------------------------------------------------------------------------- /AUSH/model/trainer_rec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 19:58 3 | # @Author : chensi 4 | # @File : train_rec.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | try: 9 | import tensorflow.compat.v1 as tf 10 | 11 | tf.disable_v2_behavior() 12 | except: 13 | import tensorflow as tf 14 | from model.autorec import IAutoRec, UAutoRec 15 | from model.nnmf import NNMF 16 | 17 | 18 | def get_model_network(sess, model_name, dataset_class): 19 | model = None 20 | if model_name == "IAutoRec": 21 | model = IAutoRec(sess, dataset_class) 22 | elif model_name == "UAutoRec": 23 | model = UAutoRec(sess, dataset_class) 24 | elif model_name == "NNMF": 25 | model = NNMF(sess, dataset_class) 26 | return model 27 | 28 | 29 | def get_top_n(model, n): 30 | top_n = {} 31 | user_nonrated_items = model.dataset_class.get_user_nonrated_items() 32 | for uid in range(model.num_user): 33 | items = user_nonrated_items[uid] 34 | ratings = model.predict([uid] * len(items), items) 35 | item_rating = list(zip(items, ratings)) 36 | item_rating.sort(key=lambda x: x[1], reverse=True) 37 | top_n[uid] = [x[0] for x in item_rating[:n]] 38 | return top_n 39 | 40 | 41 | def pred_for_target(model, target_id): 42 | target_predictions = model.predict(list(range(model.num_user)), [target_id] * model.num_user) 43 | 44 | top_n = get_top_n(model, n=50) 45 | hit_ratios = {} 46 | for uid in top_n: 47 | hit_ratios[uid] = [1 if target_id in top_n[uid][:i] else 0 for i in [1, 3, 5, 10, 20, 50]] 48 | return target_predictions, hit_ratios 49 | 50 | 51 | def rec_trainer(model_name, dataset_class, target_id, is_train, model_path): 52 | tf.reset_default_graph() 53 | tf_config = tf.ConfigProto() 54 | tf_config.gpu_options.allow_growth = True 55 | with tf.Session(config=tf_config) as sess: 56 | 57 | rec_model = get_model_network(sess, model_name, dataset_class) 58 | if is_train: 59 | print('--> start train recommendation model...') 60 | rec_model.execute() 61 | rec_model.save(model_path) 62 | else: 63 | rec_model.restore(model_path) 64 | print('--> start pred for each user...') 65 | predictions, hit_ratios = pred_for_target(rec_model, target_id) 66 | return predictions, hit_ratios 67 | -------------------------------------------------------------------------------- /Leg-UP/execute_model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/11/29 11:59 3 | # @Author : chensi 4 | # @File : execute_model.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | import random 8 | import numpy as np 9 | import torch 10 | 11 | tf = None 12 | try: 13 | import tensorflow.compat.v1 as tf 14 | 15 | tf.disable_v2_behavior() 16 | except: 17 | import tensorflow as tf 18 | 19 | seed = 1234 20 | random.seed(seed) 21 | np.random.seed(seed) 22 | tf.set_random_seed(seed) 23 | torch.manual_seed(seed) 24 | torch.cuda.manual_seed_all(seed) 25 | 26 | from importlib import import_module 27 | import sys 28 | 29 | 30 | model2lib_dict = { 31 | # attacker 32 | 'RandomAttacker': 'models.attacker.attacker', 33 | 'AverageAttacker': 'models.attacker.attacker', 34 | 'BandwagonAttacker': 'models.attacker.attacker', 35 | 'SegmentAttacker': 'models.attacker.attacker', 36 | # 37 | 'WGANAttacker': 'models.attacker.attacker', 38 | 'DCGANAttacker': 'models.attacker.attacker', 39 | # 40 | 'AUSH': 'models.attacker.aush', 41 | # 42 | 'AUSHplus': 'models.attacker.aushplus', 43 | 'AIA': 'models.attacker.aushplus', 44 | 'AUSHplus_SR': 'models.attacker.aushplus', 45 | 'AUSHplus_woD': 'models.attacker.aushplus', 46 | 'AUSHplus_SF': 'models.attacker.aushplus', 47 | 'AUSHplus_inseg': 'models.attacker.aushplus', 48 | } 49 | 50 | 51 | def execute_model(model_type, model_name): 52 | 53 | try: 54 | try: 55 | model_lib_str = 'models.%s.%s' % (model_type.lower(), 56 | model_type[0].upper() + model_type[1:].lower()) 57 | model_lib = import_module(model_lib_str) 58 | model = getattr(model_lib, model_name)() 59 | except: 60 | model_lib_str = 'utils.%s' % (model_type.lower()) 61 | model_lib = import_module(model_lib_str) 62 | model = getattr(model_lib, model_name)() 63 | except: 64 | # try: 65 | model_lib_str = model2lib_dict[model_name] 66 | model_lib = import_module(model_lib_str) 67 | model = getattr(model_lib, model_name)() 68 | # except: 69 | # print('Not found:', model_type, model_name) 70 | # exit() 71 | 72 | model.execute() 73 | print('success.') 74 | 75 | 76 | model_lib = sys.argv[sys.argv.index('--exe_model_lib') + 1] 77 | model_name = sys.argv[sys.argv.index('--exe_model_class') + 1] 78 | execute_model(model_lib, model_name) 79 | -------------------------------------------------------------------------------- /Leg-UP/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Shilling Black-box Recommender Systems by Learning to Generate Fake User Profiles 3 | 4 | This repository contains our implementation for Leg-UP (Learning to Generate Fake User Profiles) and various shilling attack methods including AIA, DCGAN, WGAN, Random Attack, Average Attack, Segment Attack and Bandwagon Attack. 5 | 6 | Please kindly cite our paper [[IEEE Xplore](https://ieeexplore.ieee.org/document/9806457)] [[arXiv Preprint](https://arxiv.org/abs/2206.11433)] if you use it: 7 | 8 | > Chen Lin, Si Chen, Meifang Zeng, Sheng Zhang, Min Gao, and Hui Li. 2022. Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles. In TNNLS. 9 | 10 | @article{LinCZZGL22, 11 | author = {Chen Lin and 12 | Si Chen and 13 | Meifang Zeng and 14 | Sheng Zhang and 15 | Min Gao and 16 | Hui Li}, 17 | title = {Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles}, 18 | journal = {{IEEE} Trans. Neural Networks Learn. Syst.}, 19 | year = {2022} 20 | } 21 | 22 | ## Environment 23 | - Python 3.8 24 | - higher 0.2.1 25 | - scikit-learn 0.24.1 26 | - scikit-surprise 1.1.1 27 | - tensorflow 2.7 28 | - pytorch 1.10 29 | - numpy 1.20.1 30 | 31 | ## Data 32 | 33 | The datasets used in our experiments can be found in the [data](../data) folder. 34 | 35 | 36 | ## Command Line Parameters 37 | `run.py` is the main entry of the program, it requires several parameters: 38 | 39 | - `data_set`: the recommendation dataset used in the experiment (Possible values: "ml100k", ''filmTrust'', ''automotive'', "yelp", ''GroceryFood'', ''ToolHome'' and ''AppAndroid''. Default is "ml100k"). 40 | - `attack_num`: number of injected profiles, i.e., A value (Default is 50). 41 | - `filler_num`: number of fillers, i.e., P value (Default is 36). 42 | - `surrogate`: surrogate RS model (Possible values: "WMF", ''ItemAE'', ''SVDpp'', and ''PMF''. Default is "WMF"). 43 | - `target_ids`: id of the target item (Default is 62). 44 | - `recommender`: victim recommender (Possible values: ''AUSHplus'', ''AIA'', ''WGANAttacker'', ''DCGANAttacker'', ''RandomAttacker'', ''AverageAttacker'', ''BandwagonAttacker'', and ''SegmentAttacker''. Default is "WMF"). Note that ''AUSHplus'' is the name of Leg-UP in our implementation. 45 | - `cuda_id`: GPU id (Default is 0). 46 | - `use_cuda`: use CPU or GPU (Default is 1). 47 | 48 | ## Examples 49 | 50 | Please refer to `run.sh` for some running examples. 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/main/SDLib.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # sys.path.append("../") 4 | from re import split 5 | from models.detector.SDLib.tool.config import Config, LineConfig 6 | from models.detector.SDLib.tool.dataSplit import * 7 | from models.detector.SDLib.tool.file import FileIO 8 | 9 | 10 | class SDLib(object): 11 | def __init__(self, config): 12 | self.trainingData = [] # training data 13 | self.testData = [] # testData 14 | self.relation = [] 15 | self.measure = [] 16 | self.config = config 17 | self.ratingConfig = LineConfig(config['ratings.setup']) 18 | self.labels = FileIO.loadLabels(config['label']) 19 | 20 | if self.config.contains('evaluation.setup'): 21 | self.evaluation = LineConfig(config['evaluation.setup']) 22 | 23 | if self.evaluation.contains('-testSet'): 24 | # specify testSet 25 | self.trainingData = FileIO.loadDataSet(config, config['ratings']) 26 | self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True) 27 | 28 | elif self.evaluation.contains('-ap'): 29 | # auto partition 30 | self.trainingData = FileIO.loadDataSet(config, config['ratings']) 31 | self.trainingData, self.testData = DataSplit. \ 32 | dataSplit(self.trainingData, test_ratio=float(self.evaluation['-ap'])) 33 | 34 | elif self.evaluation.contains('-cv'): 35 | # cross validation 36 | self.trainingData = FileIO.loadDataSet(config, config['ratings']) 37 | # self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv'])) 38 | 39 | else: 40 | print('Evaluation is not well configured!') 41 | exit(-1) 42 | 43 | if config.contains('social'): 44 | self.socialConfig = LineConfig(self.config['social.setup']) 45 | self.relation = FileIO.loadRelationship(config, self.config['social']) 46 | # print('preprocessing...') 47 | 48 | def execute(self): 49 | # import the algorithm module 50 | importStr = 'from models.detector.SDLib.method.' + self.config['methodName'] + ' import ' + self.config['methodName'] 51 | exec(importStr) 52 | if self.config.contains('social'): 53 | method = self.config[ 54 | 'methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)' 55 | else: 56 | method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels)' 57 | ans = eval(method).execute() 58 | return [float(i) for i in ans] 59 | 60 | 61 | def run(measure, algor, order): 62 | measure[order] = algor.execute() 63 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/main/plot.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/30 9:24 3 | # @Author : chensi 4 | # @File : plot.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import os 11 | import matplotlib.pyplot as plt 12 | 13 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"] 14 | attack_name = ["Segment", "Random", "Average", "Bandwagon", "Ours"] 15 | attack_method = "segment" 16 | # random = [155, 383, 920, 941, 892] 17 | # tail = [1480, 844, 1202, 1301, 2035] 18 | # targets = random + tail 19 | random = [5, 395, 181, 565, 254] 20 | tail = [601, 623, 619, 64, 558] 21 | targets = random + tail 22 | # targets = [62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373] 23 | # for attack_method in attack_methods: 24 | # # dir = '../results/ciao_DegreeSAD/' + attack_method 25 | # dir = '../results/filmTrust_0903_FAP/' + attack_method 26 | # pathDir = os.listdir(dir) 27 | # data_to_write = [] 28 | # iid_idx = 0 29 | # for i in range(len(pathDir)): 30 | # # if "5-fold-cv" not in pathDir[i]: continue 31 | # iid = targets[iid_idx] 32 | # iid_idx += 1 33 | # # load result 34 | # lines = [] 35 | # if 'FAP' not in pathDir[i]: continue 36 | # with open(dir + '/' + pathDir[i], 'r') as fin: 37 | # for line in fin: 38 | # lines.append(line) 39 | # res = lines[3].strip('\n').split(' ') 40 | # while '' in res: res.remove('') 41 | # res = [str(iid)] + res 42 | # data_to_write.append('\t'.join(res)) 43 | # with open(dir + '/' + "result_" + attack_method, 'w') as fout: 44 | # fout.write('\n'.join(data_to_write)) 45 | 46 | names = ['iid', 'label', 'precision', 'recall', 'f1', 'support'] 47 | # pre_results = {} 48 | # recall_results = {} 49 | P, R, N = [], [], [] 50 | for i in range(len(attack_methods)): 51 | attack_method = attack_methods[i] 52 | path = '../results/filmTrust_0903_FAP/' + attack_method + "/result_" + attack_method 53 | # path = '../results/ml100k_DegreeSAD/' + attack_method + "/result_" + attack_method 54 | # path = '../results/ciao_DegreeSAD/' + attack_method + "/result_" + attack_method 55 | result = pd.read_csv(path, sep='\t', names=names, engine='python') 56 | p = result.precision.values.tolist() 57 | r = result.recall.values.tolist() 58 | n = [attack_name[i]] * len(r) 59 | P.extend(p) 60 | R.extend(r) 61 | N.extend(n) 62 | # pre_results[attack_name[i]] =p 63 | # recall_results[attack_name[i]] =r 64 | data_pre = pd.DataFrame({"method": N, "precision": P, "recall": R}) 65 | # data_pre = pd.DataFrame(pre_results) 66 | data_pre.boxplot(column='precision', by=['method']) 67 | plt.title("Attack Detection") 68 | plt.ylabel("precision", ) 69 | plt.xlabel("Attack Method") 70 | plt.show() 71 | a = 1 72 | # -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/data/social.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from structure import sparseMatrix,new_sparseMatrix 3 | from tool.config import Config,LineConfig 4 | from tool.qmath import normalize 5 | import os.path 6 | from re import split 7 | 8 | class SocialDAO(object): 9 | def __init__(self,conf,relation=list()): 10 | self.config = conf 11 | self.user = {} #used to store the order of users 12 | self.relation = relation 13 | self.followees = {} 14 | self.followers = {} 15 | self.trustMatrix = self.__generateSet() 16 | 17 | def __generateSet(self): 18 | #triple = [] 19 | for line in self.relation: 20 | userId1,userId2,weight = line 21 | #add relations to dict 22 | if not self.followees.has_key(userId1): 23 | self.followees[userId1] = {} 24 | self.followees[userId1][userId2] = weight 25 | if not self.followers.has_key(userId2): 26 | self.followers[userId2] = {} 27 | self.followers[userId2][userId1] = weight 28 | # order the user 29 | if not self.user.has_key(userId1): 30 | self.user[userId1] = len(self.user) 31 | if not self.user.has_key(userId2): 32 | self.user[userId2] = len(self.user) 33 | #triple.append([self.user[userId1], self.user[userId2], weight]) 34 | #return new_sparseMatrix.SparseMatrix(triple) 35 | 36 | # def row(self,u): 37 | # #return user u's followees 38 | # return self.trustMatrix.row(self.user[u]) 39 | # 40 | # def col(self,u): 41 | # #return user u's followers 42 | # return self.trustMatrix.col(self.user[u]) 43 | # 44 | # def elem(self,u1,u2): 45 | # return self.trustMatrix.elem(u1,u2) 46 | 47 | def weight(self,u1,u2): 48 | if self.followees.has_key(u1) and self.followees[u1].has_key(u2): 49 | return self.followees[u1][u2] 50 | else: 51 | return 0 52 | 53 | # def trustSize(self): 54 | # return self.trustMatrix.size 55 | 56 | def getFollowers(self,u): 57 | if self.followers.has_key(u): 58 | return self.followers[u] 59 | else: 60 | return {} 61 | 62 | def getFollowees(self,u): 63 | if self.followees.has_key(u): 64 | return self.followees[u] 65 | else: 66 | return {} 67 | 68 | def hasFollowee(self,u1,u2): 69 | if self.followees.has_key(u1): 70 | if self.followees[u1].has_key(u2): 71 | return True 72 | else: 73 | return False 74 | return False 75 | 76 | def hasFollower(self,u1,u2): 77 | if self.followers.has_key(u1): 78 | if self.followers[u1].has_key(u2): 79 | return True 80 | else: 81 | return False 82 | return False 83 | -------------------------------------------------------------------------------- /AUSH/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Attacking Recommender Systems with Augmented User Profiles 4 | 5 | This repository contains one shilling attack algorithm, AUSH, published in the following paper [[ACM Library](https://dl.acm.org/doi/10.1145/3340531.3411884)] [[arXiv Preprint](https://arxiv.org/abs/2005.08164)]: 6 | 7 | > Chen Lin, Si Chen, Hui Li, Yanghua Xiao, Lianyun Li, and Qian Yang. 2020. Attacking Recommender Systems with Augmented User Profiles. In CIKM. 855–864. 8 | 9 | Please kindly cite our paper if you use it: 10 | 11 | @inproceedings{Lin2020Attacking, 12 | author = {Chen Lin and 13 | Si Chen and 14 | Hui Li and 15 | Yanghua Xiao and 16 | Lianyun Li and 17 | Qian Yang}, 18 | title = {Attacking Recommender Systems with Augmented User Profiles}, 19 | booktitle = {{CIKM}}, 20 | pages = {855--864}, 21 | year = {2020} 22 | } 23 | 24 | ## How to run AUSH. 25 | ### Step1: Pre-processing 26 | Use `test_main\data_preprocess.py` to transform amazon 5-cores ratings to tuples `[userid, itemid, normalized float rating]`. 27 | 28 | Update on Dec 9, 2021: We have released several recommendation datasets for testing shilling attacks including the three datasets used in our CIKM'20 paper. You can directly use files in the [data](/data) folder for experiments. Please copy the data folder to the folder of AUSH before execution. 29 | 30 | ### Step2: Initialize 31 | Use `test_main\data_preprocess.py` 32 | - select attack target 33 | - select attack number (default fix 50) 34 | - select filler size 35 | - selected items and target users 36 | - settings for bandwagon attack 37 | 38 | ### Step3: Training and Evaluation 39 | 40 | - Train baseline attack models 41 | ```shell script 42 | python main_baseline_attack.py --dataset filmTrust --attack_methods average,segment,random,bandwagon --targets 601,623,619,64,558 --filler_num 36 --bandwagon_selected 103,98,115 --sample_filler 1 43 | ``` 44 | - Evaluate baseline attack models 45 | ```shell script 46 | python main_train_rec.py --dataset filmTrust --attack_method segment --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36 47 | ```` 48 | 49 | - RS performance before attack 50 | ```shell script 51 | python main_train_rec.py --dataset filmTrust --attack_method no --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36 52 | ```` 53 | 54 | - Train AUSH 55 | ```shell script 56 | python main_gan_attack.py --dataset filmTrust --target_ids 601,623,619,64,558 --filler_num 36 57 | ```` 58 | 59 | - Evaluate AUSH 60 | ```shell script 61 | python main_train_rec.py --dataset filmTrust --attack_method gan --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36 62 | ```` 63 | 64 | - Comparative Study 65 | ```shell script 66 | python main_eval_attack.py --dataset filmTrust --filler_num 36 --attack_methods gan,segment,average --rec_model_names NMF_25 --target_ids 601,623,619,64,558 67 | 68 | python main_eval_similarity.py --dataset filmTrust --filler_num 36 --targets 601,623 --bandwagon_selected 103,98,115 69 | ``` 70 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/tool/config.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from os.path import abspath 3 | 4 | 5 | class Config(object): 6 | def __init__(self, fileName): 7 | self.config = {} 8 | self.readConfiguration(fileName) 9 | 10 | def __getitem__(self, item): 11 | if not self.contains(item): 12 | print('parameter ' + item + ' is invalid!') 13 | exit(-1) 14 | return self.config[item] 15 | 16 | def getOptions(self, item): 17 | if not self.contains(item): 18 | print('parameter ' + item + ' is invalid!') 19 | exit(-1) 20 | return self.config[item] 21 | 22 | def contains(self, key): 23 | return key in self.config 24 | # return self.config.has_key(key) 25 | 26 | def get_keys(self): 27 | return self.config.keys() 28 | 29 | def readConfiguration(self, fileName): 30 | if not os.path.exists(abspath(fileName)): 31 | print('config file is not found!') 32 | raise IOError 33 | with open(fileName) as f: 34 | for ind, line in enumerate(f): 35 | if line.strip() != '': 36 | try: 37 | key, value = line.strip().split('=') 38 | self.config[key] = value 39 | except ValueError: 40 | print('config file is not in the correct format! Error Line:%d' % (ind)) 41 | 42 | 43 | class LineConfig(object): 44 | def __init__(self, content): 45 | self.line = content.strip().split(' ') 46 | self.options = {} 47 | self.mainOption = False 48 | if self.line[0] == 'on': 49 | self.mainOption = True 50 | elif self.line[0] == 'off': 51 | self.mainOption = False 52 | for i, item in enumerate(self.line): 53 | if (item.startswith('-') or item.startswith('--')) and not item[1:].isdigit(): 54 | ind = i + 1 55 | for j, sub in enumerate(self.line[ind:]): 56 | if (sub.startswith('-') or sub.startswith('--')) and not sub[1:].isdigit(): 57 | ind = j 58 | break 59 | if j == len(self.line[ind:]) - 1: 60 | ind = j + 1 61 | break 62 | try: 63 | self.options[item] = ' '.join(self.line[i + 1:i + 1 + ind]) 64 | except IndexError: 65 | self.options[item] = 1 66 | 67 | def __getitem__(self, item): 68 | if not self.contains(item): 69 | print('parameter ' + item + ' is invalid!') 70 | exit(-1) 71 | return self.options[item] 72 | 73 | def getOption(self, key): 74 | if not self.contains(key): 75 | print('parameter ' + key + ' is invalid!') 76 | exit(-1) 77 | return self.options[key] 78 | 79 | def isMainOn(self): 80 | return self.mainOption 81 | 82 | def contains(self, key): 83 | return key in self.options 84 | # return self.options.has_key(key) 85 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | build/ 3 | .DS_Store 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | cover/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | .pybuilder/ 80 | target/ 81 | 82 | # Jupyter Notebook 83 | .ipynb_checkpoints 84 | 85 | # IPython 86 | profile_default/ 87 | ipython_config.py 88 | 89 | # pyenv 90 | # For a library or package, you might want to ignore these files since the code is 91 | # intended to run in multiple environments; otherwise, check them in: 92 | # .python-version 93 | 94 | # pipenv 95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 98 | # install all needed dependencies. 99 | #Pipfile.lock 100 | 101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 102 | __pypackages__/ 103 | 104 | # Celery stuff 105 | celerybeat-schedule 106 | celerybeat.pid 107 | 108 | # SageMath parsed files 109 | *.sage.py 110 | 111 | # Environments 112 | .env 113 | .venv 114 | env/ 115 | venv/ 116 | ENV/ 117 | env.bak/ 118 | venv.bak/ 119 | 120 | # Spyder project settings 121 | .spyderproject 122 | .spyproject 123 | 124 | # Rope project settings 125 | .ropeproject 126 | 127 | # mkdocs documentation 128 | /site 129 | 130 | # mypy 131 | .mypy_cache/ 132 | .dmypy.json 133 | dmypy.json 134 | 135 | # Pyre type checker 136 | .pyre/ 137 | 138 | # pytype static type analyzer 139 | .pytype/ 140 | 141 | # Cython debug symbols 142 | cython_debug/ 143 | 144 | # PyCharm 145 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 146 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 147 | # and can be added to the global gitignore or merged into this file. For a more nuclear 148 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 149 | #.idea/ -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/main/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.append("../") 4 | from SDLib import SDLib 5 | from tool.config import Config 6 | 7 | if __name__ == '__main__': 8 | 9 | print('=' * 80) 10 | print(' SDLib: A Python library used to collect shilling detection methods.') 11 | print('=' * 80) 12 | print('Supervised Methods:') 13 | print('1. DegreeSAD 2.CoDetector 3.BayesDetector\n') 14 | print('Semi-Supervised Methods:') 15 | print('4. SemiSAD\n') 16 | print('Unsupervised Methods:') 17 | print('5. PCASelectUsers 6. FAP 7.timeIndex\n') 18 | print('-' * 80) 19 | algor = -1 20 | conf = -1 21 | order = 6 # input('please enter the num of the method to run it:') 22 | import time 23 | 24 | s = time.clock() 25 | # if order == 0: 26 | # try: 27 | # import seaborn as sns 28 | # except ImportError: 29 | # print '!!!To obtain nice data charts, ' \ 30 | # 'we strongly recommend you to install the third-party package !!!' 31 | # conf = Config('../config/visual/visual.conf') 32 | # Display(conf).render() 33 | # exit(0) 34 | 35 | if order == 1: 36 | conf = Config('../config/DegreeSAD_tmp.conf') 37 | 38 | elif order == 2: 39 | conf = Config('../config/CoDetector.conf') 40 | 41 | elif order == 3: 42 | conf = Config('../config/BayesDetector.conf') 43 | 44 | elif order == 4: 45 | conf = Config('../config/SemiSAD.conf') 46 | 47 | elif order == 5: 48 | conf = Config('../config/PCASelectUsers.conf') 49 | 50 | elif order == 6: 51 | conf = Config('../config/FAP.conf') 52 | elif order == 7: 53 | conf = Config('../config/timeIndex.conf') 54 | 55 | else: 56 | print('Error num!') 57 | exit(-1) 58 | 59 | # ori conf info 60 | lines = [] 61 | with open('../config/FAP.conf', 'r') as fin: 62 | for line in fin: 63 | lines.append(line) 64 | random = [5, 395, 181, 565, 254] 65 | tail = [601, 623, 619, 64, 558] 66 | targets = random + tail 67 | # targets = [62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373] 68 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"] 69 | for attack_method in attack_methods[0:]: 70 | for iid in targets: 71 | path = "../dataset/GAN/filmTrust/filmTrust_" + str(iid) + "_" + attack_method + "_50_36.dat" 72 | # path = "../dataset/GAN/ciao_1/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat" 73 | lines[0] = 'ratings=' + path + '\n' 74 | # lines[-1] = "output.setup=on -dir ../results/ciao_DegreeSAD/" + attack_method + '/' 75 | lines[-1] = "output.setup=on -dir ../results/filmTrust_0903_FAP/" + attack_method + '/' 76 | with open('../config/FAP_t.conf', 'w') as fout: 77 | fout.write(''.join(lines)) 78 | sd = SDLib(Config('../config/FAP_t.conf')) 79 | result = sd.execute() 80 | # conf = Config('../config/DegreeSAD_t.conf') 81 | # conf = Config('../config/FAP_t.conf') 82 | # sd = SDLib(conf) 83 | # sd.execute() 84 | e = time.clock() 85 | print("Run time: %f s" % (e - s)) 86 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/baseclass/SDetection.py: -------------------------------------------------------------------------------- 1 | from models.detector.SDLib.data.rating import RatingDAO 2 | from models.detector.SDLib.tool.config import Config,LineConfig 3 | from os.path import abspath 4 | from time import strftime,localtime,time 5 | from models.detector.SDLib.tool.file import FileIO 6 | from sklearn.metrics import classification_report 7 | class SDetection(object): 8 | 9 | def __init__(self,conf,trainingSet=None,testSet=None,labels=None,fold='[1]'): 10 | self.config = conf 11 | self.isSave = False 12 | self.isLoad = False 13 | self.foldInfo = fold 14 | self.labels = labels 15 | self.dao = RatingDAO(self.config, trainingSet, testSet) 16 | self.training = [] 17 | self.trainingLabels = [] 18 | self.test = [] 19 | self.testLabels = [] 20 | 21 | def readConfiguration(self): 22 | self.algorName = self.config['methodName'] 23 | self.output = LineConfig(self.config['output.setup']) 24 | 25 | 26 | def printAlgorConfig(self): 27 | "show algorithm's configuration" 28 | # print ('Algorithm:',self.config['methodName']) 29 | # print ('Ratings dataSet:',abspath(self.config['ratings'])) 30 | # if LineConfig(self.config['evaluation.setup']).contains('-testSet'): 31 | # print ('Test set:',abspath(LineConfig(self.config['evaluation.setup']).getOption('-testSet'))) 32 | #print 'Count of the users in training set: ',len() 33 | # print ('Training set size: (user count: %d, item count %d, record count: %d)' %(self.dao.trainingSize())) 34 | # print ('Test set size: (user count: %d, item count %d, record count: %d)' %(self.dao.testSize())) 35 | # print ('='*80) 36 | pass 37 | 38 | def initModel(self): 39 | pass 40 | 41 | def buildModel(self): 42 | pass 43 | 44 | def saveModel(self): 45 | pass 46 | 47 | def loadModel(self): 48 | pass 49 | 50 | def predict(self): 51 | pass 52 | 53 | def execute(self): 54 | self.readConfiguration() 55 | if self.foldInfo == '[1]': 56 | self.printAlgorConfig() 57 | # load model from disk or build model 58 | if self.isLoad: 59 | # print ('Loading model %s...' % (self.foldInfo)) 60 | self.loadModel() 61 | else: 62 | # print ('Initializing model %s...' % (self.foldInfo)) 63 | self.initModel() 64 | # print ('Building Model %s...' % (self.foldInfo)) 65 | self.buildModel() 66 | 67 | # preict the ratings or item ranking 68 | # print ('Predicting %s...' % (self.foldInfo)) 69 | prediction = self.predict() 70 | report = classification_report(self.testLabels, prediction, digits=4) 71 | # currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time())) 72 | # FileIO.writeFile(self.output['-dir'],self.algorName+'@'+currentTime+self.foldInfo,report) 73 | # save model 74 | # if self.isSave: 75 | # print ('Saving model %s...' % (self.foldInfo)) 76 | # self.saveModel() 77 | # print (report) 78 | res = [[j for j in i.split(' ') if len(j)] for i in report.split('\n') if len(i.strip())>0][:3] 79 | precision, recall = res[-1][1:3] 80 | return precision, recall#report -------------------------------------------------------------------------------- /Leg-UP/utils/data_loader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/11/27 15:34 3 | # @Author : chensi 4 | # @File : data_loader.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import random 9 | import numpy as np 10 | import torch 11 | 12 | # tf = None 13 | # try: 14 | # import tensorflow.compat.v1 as tf 15 | # 16 | # tf.disable_v2_behavior() 17 | # except: 18 | # import tensorflow as tf 19 | 20 | seed = 1234 21 | random.seed(seed) 22 | np.random.seed(seed) 23 | # tf.set_random_seed(seed) 24 | torch.manual_seed(seed) 25 | torch.cuda.manual_seed_all(seed) 26 | import pandas as pd 27 | import numpy as np 28 | from scipy.sparse import csr_matrix 29 | 30 | 31 | class DataLoader(object): 32 | 33 | def __init__(self, path_train, path_test, header=None, sep='\t', threshold=4, verbose=False): 34 | self.path_train = path_train 35 | self.path_test = path_test 36 | self.header = header if header is not None else ['user_id', 'item_id', 'rating'] 37 | self.sep = sep 38 | self.threshold = threshold 39 | self.verbose = verbose 40 | 41 | # load file as dataFrame 42 | # self.train_data, self.test_data, self.n_users, self.n_items = self.load_file_as_dataFrame() 43 | # dataframe to matrix 44 | # self.train_matrix, self.train_matrix_implicit = self.dataFrame_to_matrix(self.train_data) 45 | # self.test_matrix, self.test_matrix_implicit = self.dataFrame_to_matrix(self.test_data) 46 | 47 | def load_file_as_dataFrame(self): 48 | # load data to pandas dataframe 49 | if self.verbose: 50 | print("\nload data from %s ..." % self.path_train, flush=True) 51 | 52 | train_data = pd.read_csv(self.path_train, sep=self.sep, names=self.header, engine='python') 53 | train_data = train_data.loc[:, ['user_id', 'item_id', 'rating']] 54 | 55 | if self.verbose: 56 | print("load data from %s ..." % self.path_test, flush=True) 57 | test_data = pd.read_csv(self.path_test, sep=self.sep, names=self.header, engine='python').loc[:, 58 | ['user_id', 'item_id', 'rating']] 59 | test_data = test_data.loc[:, ['user_id', 'item_id', 'rating']] 60 | 61 | # data statics 62 | 63 | n_users = max(max(test_data.user_id.unique()), max(train_data.user_id.unique())) + 1 64 | n_items = max(max(test_data.item_id.unique()), max(train_data.item_id.unique())) + 1 65 | 66 | if self.verbose: 67 | print("Number of users : %d , Number of items : %d. " % (n_users, n_items), flush=True) 68 | print("Train size : %d , Test size : %d. " % (train_data.shape[0], test_data.shape[0]), flush=True) 69 | 70 | return train_data, test_data, n_users, n_items 71 | 72 | def dataFrame_to_matrix(self, data_frame, n_users, n_items): 73 | row, col, rating, implicit_rating = [], [], [], [] 74 | for line in data_frame.itertuples(): 75 | uid, iid, r = list(line)[1:] 76 | implicit_r = 1 if r >= self.threshold else 0 77 | 78 | row.append(uid) 79 | col.append(iid) 80 | rating.append(r) 81 | implicit_rating.append(implicit_r) 82 | 83 | matrix = csr_matrix((rating, (row, col)), shape=(n_users, n_items)) 84 | matrix_implicit = csr_matrix((implicit_rating, (row, col)), shape=(n_users, n_items)) 85 | return matrix, matrix_implicit 86 | -------------------------------------------------------------------------------- /AUSH/model/attack_model/gan_attack/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/24 10:43 3 | # @Author : chensi 4 | # @File : models.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | try: 9 | import tensorflow.compat.v1 as tf 10 | 11 | tf.disable_v2_behavior() 12 | except: 13 | import tensorflow as tf 14 | import math 15 | 16 | 17 | class GAN_Attacker: 18 | def __init__(self): 19 | print("GAN Attack model") 20 | 21 | def DIS(self, input, inputDim, h, activation, hiddenLayers, _reuse=False): 22 | # input->hidden 23 | y, _, W, b = self.FullyConnectedLayer(input, inputDim, h, activation, "dis", 0, reuse=_reuse) 24 | 25 | # stacked hidden layers 26 | for layer in range(hiddenLayers - 1): 27 | y, _, W, b = self.FullyConnectedLayer(y, h, h, activation, "dis", layer + 1, reuse=_reuse) 28 | 29 | # hidden -> output 30 | y, _, W, b = self.FullyConnectedLayer(y, h, 1, "none", "dis", hiddenLayers + 1, reuse=_reuse) 31 | 32 | return y 33 | 34 | def GEN(self, input, num_item, h, outputDim, activation, decay, name="gen", _reuse=False): 35 | """ 36 | input : sparse filler vectors 37 | output : reconstructed selected vector 38 | """ 39 | # input+thnh 40 | # input_tanh = tf.nn.tanh(input) 41 | 42 | # input->hidden 43 | 44 | y, L2norm, W, b = self.FullyConnectedLayer(input, num_item, h // decay, activation, name, 0, reuse=_reuse) 45 | 46 | # stacked hidden layers 47 | h = h // decay 48 | layer = 0 49 | # for layer in range(hiddenLayers - 1): 50 | while True: 51 | y, this_L2, W, b = self.FullyConnectedLayer(y, h, h // decay, activation, name, layer + 1, reuse=_reuse) 52 | L2norm = L2norm + this_L2 53 | layer += 1 54 | if h // decay > outputDim: 55 | h = h // decay 56 | else: 57 | break 58 | # hidden -> output 59 | y, this_L2, W, b = self.FullyConnectedLayer(y, h // decay, outputDim, "none", name, layer + 1, reuse=_reuse) 60 | L2norm = L2norm + this_L2 61 | y = tf.nn.sigmoid(y) * 5 62 | return y, L2norm 63 | 64 | def FullyConnectedLayer(self, input, inputDim, outputDim, activation, model, layer, reuse=False): 65 | scale1 = math.sqrt(6 / (inputDim + outputDim)) 66 | 67 | wName = model + "_W" + str(layer) 68 | bName = model + "_B" + str(layer) 69 | 70 | with tf.variable_scope(model) as scope: 71 | 72 | if reuse == True: 73 | scope.reuse_variables() 74 | 75 | W = tf.get_variable(wName, [inputDim, outputDim], 76 | initializer=tf.random_uniform_initializer(-scale1, scale1)) 77 | b = tf.get_variable(bName, [outputDim], initializer=tf.random_uniform_initializer(-0.01, 0.01)) 78 | 79 | y = tf.matmul(input, W) + b 80 | 81 | L2norm = tf.nn.l2_loss(W) + tf.nn.l2_loss(b) 82 | 83 | if activation == "none": 84 | y = tf.identity(y, name="output") 85 | return y, L2norm, W, b 86 | 87 | elif activation == "sigmoid": 88 | return tf.nn.sigmoid(y), L2norm, W, b 89 | 90 | elif activation == "tanh": 91 | return tf.nn.tanh(y), L2norm, W, b 92 | elif activation == "relu": 93 | return tf.nn.relu(y), L2norm, W, b 94 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/tool/plot.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | #import seaborn as sns 4 | 5 | def drawLine(x,y,labels,xLabel,yLabel,title): 6 | f, ax = plt.subplots(1, 1, figsize=(10, 6), sharex=True) 7 | 8 | #f.tight_layout() 9 | #sns.set(style="darkgrid") 10 | 11 | palette = ['blue','orange','red','green','purple','pink'] 12 | # for i in range(len(ax)): 13 | # x1 = range(0, len(x)) 14 | #ax.set_xlim(min(x1)-0.2,max(x1)+0.2) 15 | # mini = 10000;max = -10000 16 | # for label in labels: 17 | # if mini>min(y[i][label]): 18 | # mini = min(y[i][label]) 19 | # if max 0 88 | ind2 = new_x2 > 0 89 | try: 90 | mean_x1 = float(new_x1.sum())/ind1.sum() 91 | mean_x2 = float(new_x2.sum())/ind2.sum() 92 | new_x1 = new_x1 - mean_x1 93 | new_x2 = new_x2 - mean_x2 94 | sum = new_x1.dot(new_x2) 95 | denom = sqrt((new_x1.dot(new_x1))*(new_x2.dot(new_x2))) 96 | return float(sum) / denom 97 | except ZeroDivisionError: 98 | return 0 99 | 100 | 101 | def similarity(x1,x2,sim): 102 | if sim == 'pcc': 103 | return pearson_sp(x1,x2) 104 | if sim == 'euclidean': 105 | return euclidean(x1,x2) 106 | else: 107 | return cosine_sp(x1, x2) 108 | 109 | 110 | def normalize(vec,maxVal,minVal): 111 | 'get the normalized value using min-max normalization' 112 | if maxVal > minVal: 113 | return float(vec-minVal)/(maxVal-minVal)+0.01 114 | elif maxVal==minVal: 115 | return vec/maxVal 116 | else: 117 | print ('error... maximum value is less than minimum value.') 118 | raise ArithmeticError 119 | 120 | def sigmoid(val): 121 | return 1/(1+exp(-val)) 122 | 123 | 124 | def denormalize(vec,maxVal,minVal): 125 | return minVal+(vec-0.01)*(maxVal-minVal) 126 | -------------------------------------------------------------------------------- /AUSH/test_main/main_train_rec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 19:29 3 | # @Author : chensi 4 | # @File : main_train_rec.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | import sys, os, argparse 8 | 9 | sys.path.append("../") 10 | from utils.load_data.load_data import * 11 | from model.trainer_rec import rec_trainer 12 | from model.trainer_rec_surprise import basic_rec 13 | from utils.attack.data_to_file import target_prediction_writer 14 | 15 | 16 | # os.environ["CUDA_VISIBLE_DEVICES"] = '0' 17 | 18 | 19 | def train_rec(data_set_name, model_name, attack_method, target_id, is_train): 20 | if attack_method == "no": 21 | attack_method = "" 22 | model_path = "../result/model_ckpt/" + '_'.join([model_name, data_set_name]) + ".ckpt" 23 | else: 24 | model_path = "../result/model_ckpt/" + '_'.join([model_name, data_set_name, attack_method]) + ".ckpt" 25 | path_train = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat" 26 | path_test = "../data/data/" + data_set_name + "_test.dat" 27 | if attack_method == "": path_train = "../data/data/" + data_set_name + "_train.dat" 28 | 29 | # load_data 30 | dataset_class = load_data(path_train=path_train, path_test=path_test, 31 | header=['user_id', 'item_id', 'rating'], 32 | sep='\t', print_log=True) 33 | # train rec 34 | if model_name in ["IAutoRec", "UAutoRec", "NNMF"]: 35 | predictions, hit_ratios = rec_trainer(model_name, dataset_class, target_id, is_train, model_path) 36 | else: 37 | predictions, hit_ratios = basic_rec(model_name, path_train, path_test, target_id) 38 | 39 | # write to file 40 | dst_path = "../result/pred_result/" + '_'.join([model_name, data_set_name, str(target_id), attack_method]) 41 | dst_path = dst_path.strip('_') 42 | target_prediction_writer(predictions, hit_ratios, dst_path) 43 | 44 | 45 | def parse_arg(): 46 | parser = argparse.ArgumentParser() 47 | 48 | parser.add_argument('--dataset', type=str, default='automotive', help='input data_set_name,filmTrust or ml100k') 49 | 50 | parser.add_argument('--model_name', type=str, default='NMF_25', help='NNMF,IAutoRec,UAutoRec,NMF_25') 51 | 52 | parser.add_argument('--attack_method', type=str, default='G1', 53 | help='no,gan,segment,average,random,bandwagon') 54 | 55 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558] 56 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373] 57 | # 5,395,181,565,254,601,623,619,64,558 58 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373 59 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148 60 | parser.add_argument('--target_ids', type=str, default='866', 61 | help='attack target') 62 | 63 | parser.add_argument('--attack_num', type=int, default=50, 64 | help='num of attack fake user,50 for ml100k and filmTrust') 65 | 66 | parser.add_argument('--filler_num', type=int, default=4, 67 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust') 68 | 69 | args = parser.parse_args() 70 | args.target_ids = list(map(int, args.target_ids.split(','))) 71 | return args 72 | 73 | 74 | if __name__ == '__main__': 75 | """parse args""" 76 | args = parse_arg() 77 | 78 | """train""" 79 | if args.attack_method == 'no': 80 | attack_method_ = args.attack_method 81 | else: 82 | attack_method_ = '_'.join([args.attack_method, str(args.attack_num), str(args.filler_num)]) 83 | is_train = 1 84 | train_rec(args.dataset, args.model_name, attack_method_, args.target_ids[0], is_train=is_train) 85 | for target in args.target_ids[1:]: 86 | if args.attack_method == 'no': 87 | is_train = 0 88 | train_rec(args.dataset, args.model_name, attack_method_, target, is_train=is_train) 89 | -------------------------------------------------------------------------------- /data/ml100k/ml100k_target_users: -------------------------------------------------------------------------------- 1 | 1257 0,513,4,12,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,93,605,617,619,108,620,621,114,118,631,120,124,129,641,644,143,144,659,660,150,664,670,681,683,176,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,275,789,795,797,290,803,806,295,300,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,845,846,847,849,344,346,349,866,867,359,362,874,369,881,372,885,886,888,377,378,891,895,386,388,900,393,906,396,915,404,405,920,921,415,424,428,942,434,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505 2 | 1419 0,513,641,6,647,520,649,263,392,393,13,653,527,912,275,660,21,534,150,282,795,540,157,797,415,544,290,294,298,43,300,428,302,177,310,822,58,59,314,193,449,326,839,456,714,715,333,846,335,605,93,94,221,349,98,483,360,748,492,750,494,505,882,243,756,757,372,631,889,378,891,124,895 3 | 785 0,513,4,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,605,617,619,108,620,621,114,118,631,120,124,129,641,644,647,143,144,660,150,664,670,681,683,176,177,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,795,797,290,803,806,295,300,302,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,846,847,849,344,345,346,349,866,867,359,362,874,879,369,881,372,885,886,888,377,378,891,895,386,388,900,391,392,393,906,396,915,405,920,921,415,424,942,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505 4 | 1077 513,129,642,388,262,393,17,785,532,150,535,406,285,415,291,294,297,43,302,306,310,183,312,313,188,63,576,449,69,199,456,329,715,591,209,467,342,471,346,605,93,863,96,229,617,746,377,494,881,116,502,889,507 5 | 62 386,5,6,263,390,13,653,147,660,21,534,150,536,915,282,406,157,415,290,292,804,294,40,296,298,43,560,177,822,58,314,61,63,193,197,326,839,456,715,333,591,338,468,853,98,362,882,243,372,757,248,765,127 6 | 1319 0,6,15,17,20,22,23,537,547,550,552,42,48,560,565,58,61,63,71,591,592,91,93,605,95,607,98,108,621,628,117,137,143,662,663,173,177,187,193,200,212,213,215,221,231,232,746,750,757,248,249,762,763,263,266,268,275,290,291,292,804,294,295,296,805,302,822,322,327,839,333,338,853,342,344,351,863,869,360,879,373,885,888,377,378,891,384,388,405,406,415,928,931,420,424,428,429,942,436,456,467,473,479,492,496,498,499 7 | 1612 0,513,4,12,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,93,605,617,619,108,620,621,114,118,631,120,124,129,641,644,647,143,144,659,660,150,664,670,681,683,176,177,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,275,789,795,797,290,803,806,295,300,302,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,845,846,847,849,344,345,346,349,866,867,359,362,874,879,369,881,372,885,886,888,377,378,891,895,386,388,900,391,392,393,906,396,915,404,405,920,921,415,424,428,942,434,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505 8 | 1509 0,5,6,13,526,15,17,23,537,27,541,547,550,42,48,560,565,58,63,71,591,89,91,93,98,108,621,114,628,137,143,662,663,173,177,193,709,200,212,213,215,220,221,746,757,248,249,762,263,266,275,290,292,804,805,296,307,822,314,830,832,837,327,843,333,338,853,341,342,344,351,863,869,360,362,879,882,373,885,888,377,378,891,396,405,415,928,420,932,424,428,429,942,436,452,456,457,467,471,473,479,496,498 9 | 1545 0,6,15,17,20,22,23,537,547,550,552,42,48,560,565,58,61,63,71,591,592,91,93,605,95,607,98,108,621,628,117,137,143,662,663,173,177,187,193,200,212,213,215,221,231,232,746,750,757,248,249,762,763,263,266,268,275,290,291,292,804,294,295,296,805,302,822,322,327,839,333,338,853,342,344,351,863,869,360,879,373,885,888,377,378,891,384,388,405,406,415,928,931,420,424,428,429,942,436,456,467,473,479,492,496,498,499 10 | 1373 0,513,641,6,647,520,649,263,392,393,13,653,527,912,275,660,21,534,150,282,795,540,157,797,415,544,290,294,298,43,300,428,302,177,310,822,58,59,314,193,449,326,839,456,714,715,333,846,335,605,93,94,221,349,98,483,360,748,492,750,494,505,882,243,756,757,372,631,889,378,891,124,895 11 | 690 0,6,15,41,58,59,63,69,93,94,129,150,177,199,221,248,291,310,338,342,373,386,393,397,449,454,456,471,483,487,505,513,535,550,642,647,652,660,715,726,803,806,814,845,849,863,881,885,896 -------------------------------------------------------------------------------- /AUSH/model/attack_model/baseline.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 10:46 3 | # @Author : chensi 4 | # @File : baseline_new.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | import numpy as np 8 | import math 9 | 10 | 11 | class BaselineAttack: 12 | 13 | def __init__(self, attack_num, filler_num, n_items, target_id, 14 | global_mean, global_std, item_means, item_stds, r_max, r_min, fixed_filler_indicator=None): 15 | # 16 | self.attack_num = attack_num 17 | self.filler_num = filler_num 18 | self.n_items = n_items 19 | self.target_id = target_id 20 | self.global_mean = global_mean 21 | self.global_std = global_std 22 | self.item_means = item_means 23 | self.item_stds = item_stds 24 | self.r_max = r_max 25 | self.r_min = r_min 26 | 27 | self.fixed_filler_indicator = fixed_filler_indicator 28 | 29 | def RandomAttack(self): 30 | filler_candis = list(set(range(self.n_items)) - {self.target_id}) 31 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float) 32 | # target 33 | fake_profiles[:, self.target_id] = self.r_max 34 | # fillers 35 | for i in range(self.attack_num): 36 | if self.fixed_filler_indicator is None: 37 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False) 38 | else: 39 | 40 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0] 41 | ratings = np.random.normal(loc=self.global_mean, scale=self.global_std, size=self.filler_num) 42 | for f_id, r in zip(fillers, ratings): 43 | fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r)) 44 | return fake_profiles 45 | 46 | def BandwagonAttack(self, selected_ids): 47 | filler_candis = list(set(range(self.n_items)) - set([self.target_id] + selected_ids)) 48 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float) 49 | # target & selected patch 50 | fake_profiles[:, [self.target_id] + selected_ids] = self.r_max 51 | # fillers 52 | for i in range(self.attack_num): 53 | if self.fixed_filler_indicator is None: 54 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False) 55 | else: 56 | 57 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0] 58 | ratings = np.random.normal(loc=self.global_mean, scale=self.global_std, size=self.filler_num) 59 | for f_id, r in zip(fillers, ratings): 60 | fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r)) 61 | return fake_profiles 62 | 63 | def AverageAttack(self): 64 | filler_candis = list(set(range(self.n_items)) - {self.target_id}) 65 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float) 66 | # target 67 | fake_profiles[:, self.target_id] = self.r_max 68 | # fillers 69 | fn_normal = lambda iid: np.random.normal(loc=self.item_means[iid], scale=self.item_stds[iid], size=1)[0] 70 | for i in range(self.attack_num): 71 | if self.fixed_filler_indicator is None: 72 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False) 73 | else: 74 | 75 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0] 76 | ratings = map(fn_normal, fillers) 77 | for f_id, r in zip(fillers, ratings): 78 | fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r)) 79 | return fake_profiles 80 | 81 | def SegmentAttack(self, selected_ids): 82 | filler_candis = list(set(range(self.n_items)) - set([self.target_id] + selected_ids)) 83 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float) 84 | # target & selected patch 85 | fake_profiles[:, [self.target_id] + selected_ids] = self.r_max 86 | # fillers 87 | for i in range(self.attack_num): 88 | if self.fixed_filler_indicator is None: 89 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False) 90 | else: 91 | 92 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0] 93 | fake_profiles[i][fillers] = self.r_min 94 | return fake_profiles 95 | -------------------------------------------------------------------------------- /Leg-UP/run.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/12/27 19:57 3 | # @Author : chensi 4 | # @File : run.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | 9 | import argparse, os 10 | 11 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 12 | os.environ["CUDA_VISIBLE_DEVICES"] = "1, 2, 3" 13 | 14 | PythonCommand = 'python' # 'D:\Anaconda3\envs\py38_tf2\python' if os.path.exists('D:\Anaconda3') else 'python' 15 | 16 | 17 | class Run: 18 | def __init__(self): 19 | self.args = self.parse_args() 20 | self.args.attacker_list = self.args.attacker_list.split(',') 21 | self.args.recommender_list = self.args.recommender_list.split(',') 22 | 23 | def execute(self): 24 | 25 | self.step_1_Rec() 26 | 27 | self.step_2_Attack() 28 | 29 | return 30 | 31 | def parse_args(self): 32 | 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('--data_set', type=str, default='ml100k') # ml100k,filmTrust,automotive 35 | parser.add_argument('--attack_num', type=int, default=50) 36 | parser.add_argument('--filler_num', type=int, default=36) 37 | parser.add_argument('--cuda_id', type=int, default=3) 38 | parser.add_argument('--use_cuda', type=int, default=0) 39 | parser.add_argument('--batch_size_S', type=int, default=64) 40 | parser.add_argument('--batch_size_D', type=int, default=64) 41 | parser.add_argument("--surrogate", type=str, default="WMF") 42 | 43 | 44 | # ml100k:62,1077,785,1419,1257 45 | # filmTrust:5,395,181,565,254 46 | # automotive:119,422,594,884,1593 47 | parser.add_argument('--target_ids', type=str, default='62') 48 | # AUSH,AUSHplus,RecsysAttacker,DCGAN,WGAN,SegmentAttacker,BandwagonAttacker,AverageAttacker,RandomAttacker 49 | parser.add_argument('--attacker_list', type=str, default='AUSHplus') 50 | # SVD,NMF,SlopeOne,IAutoRec,UAutoRec,NeuMF 51 | parser.add_argument('--recommender_list', type=str, default='SVD,NMF,SlopeOne,IAutoRec,UAutoRec,NeuMF') 52 | return parser.parse_args() 53 | 54 | def step_1_Rec(self): 55 | print('step_1') 56 | args = self.args 57 | """ 58 | 59 | data_set/target_ids/train_path/test_path/model_path/target_prediction_path_prefix 60 | 61 | """ 62 | args_dict = { 63 | 'exe_model_lib': 'recommender', 64 | 'train_path': './data/%s/%s_train.dat' % (args.data_set, args.data_set), 65 | 'test_path': './data/%s/%s_test.dat' % (args.data_set, args.data_set), 66 | } 67 | args_dict.update(vars(args)) 68 | 69 | # 70 | for recommender in args.recommender_list: 71 | # 72 | cur_args_dict = { 73 | 'exe_model_class': recommender, 74 | 'model_path': './results/model_saved/%s/%s_%s' % (args.data_set, args.data_set, recommender), 75 | 'target_prediction_path_prefix': './results/performance/mid_results/%s/%s_%s' % ( 76 | args.data_set, args.data_set, recommender), 77 | } 78 | cur_args_dict.update(args_dict) 79 | 80 | args_str = ' '.join( 81 | ["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()]) 82 | # 83 | print('%s ./execute_model.py %s' % (PythonCommand, args_str)) 84 | print(os.system('%s ./execute_model.py %s' % (PythonCommand, args_str))) 85 | 86 | def step_2_Attack(self): 87 | print('step_2') 88 | args = self.args 89 | 90 | args_dict = { 91 | 'exe_model_lib': 'attacker', 92 | # 'filler_num': 4, 93 | # 'epoch': 50 94 | } 95 | args_dict.update(vars(args)) 96 | 97 | for target_id in map(int, args.target_ids.split(',')): 98 | for attacker in args.attacker_list: 99 | cur_args_dict = { 100 | 'exe_model_class': attacker, 101 | 'target_id': target_id, 102 | 'injected_path': './results/data_attacked/%s/%s_%s_%d.data' % ( 103 | args.data_set, args.data_set, attacker, target_id) 104 | 105 | } 106 | cur_args_dict.update(args_dict) 107 | 108 | args_str = ' '.join(["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()]) 109 | print(os.system('%s ./execute_model.py %s' % (PythonCommand, args_str))) 110 | # break 111 | 112 | model = Run() 113 | model.execute() 114 | 115 | 116 | -------------------------------------------------------------------------------- /Leg-UP/preprocess_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import random 4 | import pandas as pd 5 | from pathlib import Path 6 | import json 7 | from sklearn.model_selection import train_test_split 8 | 9 | data_file = Path('data') 10 | data_set_name = 'GroceryFood' 11 | data_file = data_file / data_set_name / (data_set_name + 'Raw.json') 12 | 13 | df_gro = pd.DataFrame(columns=['user', 'item', 'score']) 14 | data = [] 15 | with open(data_file, encoding='utf-8') as f: 16 | for line in f: 17 | data.append(json.loads(line)) 18 | print(f'data_set_len:{len(data)}') 19 | print(f'data head:\n{data[:5]}') 20 | 21 | user_set = set() 22 | item_set = set() 23 | data_list = [] 24 | for idx, d in enumerate(data): 25 | item_set.add(d['asin']) 26 | user_set.add(d["reviewerID"]) 27 | data_list.append([d["reviewerID"], d['asin'], d['overall']]) 28 | 29 | raw_df = pd.DataFrame(data_list, columns=['user', 'item', 'score']) 30 | 31 | user2idx = {x: idx for idx, x in enumerate(user_set)} 32 | item2idx = {x: idx for idx, x in enumerate(item_set)} 33 | 34 | 35 | def fun(item): 36 | return user2idx[item] 37 | 38 | def fun2(item): 39 | return item2idx[item] 40 | 41 | raw_df['user'] = raw_df['user'].apply(fun) 42 | raw_df['item'] = raw_df['item'].apply(fun2) 43 | 44 | print(f'raw data frame:') 45 | print(raw_df) 46 | 47 | user_cont = raw_df.groupby('user').count() 48 | filter_ratings = {i for i in list(user_cont[user_cont['item'] >= 17].index)} 49 | 50 | after_filter_df = pd.DataFrame(columns=['user', 'item', 'score']) 51 | 52 | 53 | all_data = [] 54 | for i in filter_ratings: 55 | each_i = raw_df[raw_df['user'] == i] 56 | all_data.append(each_i.values) 57 | after_filter_df = after_filter_df.append(each_i) 58 | 59 | train_list = [] 60 | test_list = [] 61 | train_df = pd.DataFrame(columns=['user', 'item', 'score']) 62 | test_df = pd.DataFrame(columns=['user', 'item', 'score']) 63 | for d in all_data: 64 | train, test = train_test_split(d, test_size=0.1, random_state=42) 65 | df = pd.DataFrame(train, columns=['user', 'item', 'score']) 66 | df2 = pd.DataFrame(test, columns=['user', 'item', 'score']) 67 | train_df = train_df.append(df) 68 | test_df = test_df.append(df2) 69 | print(f'train_df:{train_df}') 70 | print(f'test_df:{test_df}') 71 | 72 | item_count = raw_df.groupby('item').count().sort_values(by='user', ascending=False) 73 | print(item_count) 74 | target_item_first = [i for i in item_count[:int(0.1 * len(item_count))].index.values] 75 | target_item_last = [i for i in item_count[int(0.9 * len(item_count)):].index.values] 76 | target_item = target_item_first + target_item_last 77 | with open(f'data/{data_set_name}_target_item', 'w') as f: 78 | for i in target_item: 79 | f.write(str(int(i))) 80 | f.write('\n') 81 | 82 | with open(f'data/{data_set_name}_selected_items', 'a+') as f: 83 | for i in target_item: 84 | select_item = [i] 85 | while True: 86 | a = random.choice(target_item_first) 87 | if a not in select_item: 88 | select_item.append(a) 89 | if len(select_item) == 4: 90 | break 91 | f.write(str(select_item[0]) + '\t') 92 | f.write(str(select_item[1]) + ',' + str(select_item[2]) + ',' + str(select_item[3])) 93 | f.write('\n') 94 | 95 | 96 | 97 | user_cont = raw_df.groupby('user').count() 98 | filter_ratings = {i for i in list(user_cont[user_cont['item'] >= 17].index)} 99 | 100 | after_filter_df = pd.DataFrame(columns=['user', 'item', 'score']) 101 | 102 | all_data = [] 103 | for i in filter_ratings: 104 | each_i = raw_df[raw_df['user'] == i] 105 | all_data.append(each_i.values) 106 | after_filter_df = after_filter_df.append(each_i) 107 | 108 | # all_data = [] 109 | # for i in filter_ratings: 110 | # each_i = raw_df[raw_df['user'] == i] 111 | # all_data.append(each_i.values) 112 | # after_filter_df = after_filter_df.append(each_i) 113 | 114 | 115 | 116 | # dfv = train_df.values 117 | # print(dfv) 118 | # with open(f'data/{data_set_name}_train.dat', 'a', encoding='utf-8') as f: 119 | # for d in dfv: 120 | # for idx, i in enumerate(d): 121 | # if idx != 2:f.write(str(int(i))) 122 | # else : f.write(str(i)) 123 | # if idx != 2: f.write('\t') 124 | # f.write('\n') 125 | # dfv = train_df.values 126 | # 127 | # dfv = test_df.values 128 | # with open(f'data/{data_set_name}_test.dat', 'a', encoding='utf-8') as f: 129 | # for d in dfv: 130 | # for idx, i in enumerate(d): 131 | # if idx != 2: f.write(str(int(i))) 132 | # else: f.write(str(i)) 133 | # if idx != 2: f.write('\t') 134 | # f.write('\n') 135 | -------------------------------------------------------------------------------- /AUSH/test_main/main_gan_attack.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/24 11:08 3 | # @Author : chensi 4 | # @File : main_gan_attack.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import sys 9 | 10 | sys.path.append("../") 11 | import os, argparse 12 | from utils.load_data.load_data import load_data 13 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker 14 | from utils.load_data.load_attack_info import load_attack_info 15 | from utils.attack.data_to_file import * 16 | import numpy as np 17 | 18 | 19 | # os.environ["CUDA_VISIBLE_DEVICES"] = '2' 20 | 21 | 22 | def gan_attack(data_set_name, attack_method, target_id, is_train, write_to_file=1, final_attack_setting=None): 23 | 24 | path_train = '../data/data/' + data_set_name + '_train.dat' 25 | path_test = '../data/data/' + data_set_name + '_test.dat' 26 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items", 27 | "../data/data/" + data_set_name + "_target_users"] 28 | model_path = "../result/model_ckpt/" + '_'.join([data_set_name, attack_method, str(target_id)]) + ".ckpt" 29 | 30 | 31 | attack_info = load_attack_info(*attack_info_path) 32 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'], 33 | sep='\t', print_log=True) 34 | 35 | if len(attack_method.split('_')[1:]) == 2: 36 | attack_num, filler_num = map(int, attack_method.split('_')[1:]) 37 | filler_method = 0 38 | else: 39 | attack_num, filler_num, filler_method = map(int, attack_method.split('_')[1:]) 40 | selected_items = attack_info[target_id][0] 41 | 42 | # 43 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id, 44 | selected_id_list=selected_items, 45 | filler_num=filler_num, attack_num=attack_num, filler_method=filler_method) 46 | 47 | fake_profiles, real_profiles, filler_indicator = gan_attacker.execute(is_train=is_train, model_path=model_path, 48 | final_attack_setting=final_attack_setting) 49 | gan_attacker.sess.close() 50 | 51 | # """inject and write to file""" 52 | if write_to_file == 1: 53 | dst_path = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat" 54 | attacked_file_writer(path_train, dst_path, fake_profiles, dataset_class.n_users) 55 | return fake_profiles, real_profiles, filler_indicator 56 | 57 | 58 | def parse_arg(): 59 | parser = argparse.ArgumentParser() 60 | 61 | parser.add_argument('--dataset', type=str, default='ml100k', help='filmTrust/ml100k/grocery') 62 | 63 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558] 64 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373] 65 | # 5,395,181,565,254,601,623,619,64,558 66 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373 67 | parser.add_argument('--target_ids', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373', 68 | help='attack target list') 69 | 70 | parser.add_argument('--attack_num', type=int, default=50, 71 | help='num of attack fake user,50 for ml100k and filmTrust') 72 | 73 | parser.add_argument('--filler_num', type=int, default=90, 74 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust') 75 | 76 | parser.add_argument('--filler_method', type=str, default='', help='0/1/2/3') 77 | 78 | parser.add_argument('--write_to_file', type=int, default=1, help='write to fake profile to file or return array') 79 | # 80 | args = parser.parse_args() 81 | # 82 | args.target_ids = list(map(int, args.target_ids.split(','))) 83 | return args 84 | 85 | 86 | if __name__ == '__main__': 87 | """parse args""" 88 | args = parse_arg() 89 | """train""" 90 | is_train = 1 91 | attack_method = '_'.join(['gan', str(args.attack_num), str(args.filler_num), str(args.filler_method)]).strip('_') 92 | 93 | # 94 | for target_id in args.target_ids: 95 | 96 | attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id])) 97 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting' 98 | real_profiles, filler_indicator = np.load(attackSetting_path + '.npy') 99 | final_attack_setting = [args.attack_num, real_profiles, filler_indicator] 100 | 101 | 102 | _ = gan_attack(args.dataset, attack_method, target_id, is_train, 103 | write_to_file=args.write_to_file, 104 | final_attack_setting=final_attack_setting) 105 | -------------------------------------------------------------------------------- /AUSH/utils/load_data/load_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/22 10:07 3 | # @Author : chensi 4 | # @File : load_data_new.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | 9 | import pandas as pd 10 | import numpy as np 11 | 12 | from scipy.sparse import csr_matrix 13 | 14 | 15 | class load_data(): 16 | 17 | def __init__(self, path_train, path_test, 18 | header=None, sep='\t', threshold=4, print_log=True): 19 | self.path_train = path_train 20 | self.path_test = path_test 21 | self.header = header if header is not None else ['user_id', 'item_id', 'rating'] 22 | self.sep = sep 23 | self.threshold = threshold 24 | self.print_log = print_log 25 | 26 | self._main_load() 27 | 28 | def _main_load(self): 29 | # load data 30 | self._load_file() 31 | # 32 | # dataframe to matrix 33 | self.train_matrix, self.train_matrix_implicit = self._data_to_matrix(self.train_data) 34 | self.test_matrix, self.test_matrix_implicit = self._data_to_matrix(self.test_data) 35 | 36 | def _load_file(self): 37 | if self.print_log: 38 | print("load train/test data\t:\n", self.path_train) 39 | self.train_data = pd.read_csv(self.path_train, sep=self.sep, names=self.header, engine='python').loc[:, 40 | ['user_id', 'item_id', 'rating']] 41 | self.test_data = pd.read_csv(self.path_test, sep=self.sep, names=self.header, engine='python').loc[:, 42 | ['user_id', 'item_id', 'rating']] 43 | 44 | self.n_users = len(set(self.test_data.user_id.unique()) | set(self.train_data.user_id.unique())) 45 | self.n_items = len(set(self.test_data.item_id.unique()) | set(self.train_data.item_id.unique())) 46 | 47 | if self.print_log: 48 | print("Number of users:", self.n_users, ",Number of items:", self.n_items, flush=True) 49 | print("Train size:", self.train_data.shape[0], ",Test size:", self.test_data.shape[0], flush=True) 50 | 51 | def _data_to_matrix(self, data_frame): 52 | row, col, rating, implicit_rating = [], [], [], [] 53 | for line in data_frame.itertuples(): 54 | uid, iid, r = list(line)[1:] 55 | implicit_r = 1 if r >= self.threshold else 0 56 | 57 | row.append(uid) 58 | col.append(iid) 59 | rating.append(r) 60 | implicit_rating.append(implicit_r) 61 | 62 | matrix = csr_matrix((rating, (row, col)), shape=(self.n_users, self.n_items)) 63 | matrix_implicit = csr_matrix((implicit_rating, (row, col)), shape=(self.n_users, self.n_items)) 64 | return matrix, matrix_implicit 65 | 66 | def get_global_mean_std(self): 67 | return self.train_matrix.data.mean(), self.train_matrix.data.std() 68 | 69 | def get_all_mean_std(self): 70 | flag = 1 71 | for v in ['global_mean', 'global_std', 'item_means', 'item_stds']: 72 | if not hasattr(self, v): 73 | flag = 0 74 | break 75 | if flag == 0: 76 | global_mean, global_std = self.get_global_mean_std() 77 | item_means, item_stds = [global_mean] * self.n_items, [global_std] * self.n_items 78 | train_matrix_t = self.train_matrix.transpose() 79 | for iid in range(self.n_items): 80 | item_vec = train_matrix_t.getrow(iid).toarray()[0] 81 | ratings = item_vec[np.nonzero(item_vec)] 82 | if len(ratings) > 0: 83 | item_means[iid], item_stds[iid] = ratings.mean(), ratings.std() 84 | self.global_mean, self.global_std, self.item_means, self.item_stds \ 85 | = global_mean, global_std, item_means, item_stds 86 | return self.global_mean, self.global_std, self.item_means, self.item_stds 87 | 88 | def get_item_pop(self): 89 | # item_pops = [0] * self.n_items 90 | # train_matrix_t = self.train_matrix.transpose() 91 | # for iid in range(self.n_items): 92 | # item_vec = train_matrix_t.getrow(iid).toarray()[0] 93 | # item_pops[iid] = len(np.nonzero(item_vec)[0]) 94 | item_pops_dict = dict(self.train_data.groupby('item_id').size()) 95 | item_pops = [0] * self.n_items 96 | for iid in item_pops_dict.keys(): 97 | item_pops[iid] = item_pops_dict[iid] 98 | return item_pops 99 | 100 | def get_user_nonrated_items(self): 101 | non_rated_indicator = self.train_matrix.toarray() 102 | non_rated_indicator[non_rated_indicator > 0] = 1 103 | non_rated_indicator = 1 - non_rated_indicator 104 | user_norated_items = {} 105 | for uid in range(self.n_users): 106 | user_norated_items[uid] = list(non_rated_indicator[uid].nonzero()[0]) 107 | return user_norated_items 108 | 109 | def get_item_nonrated_users(self, item_id): 110 | item_vec = np.squeeze(self.train_matrix[:, item_id].toarray()) 111 | # item_vec = self.train_matrix.toarray().transpose()[item_id] 112 | item_vec[item_vec > 0] = 1 113 | non_rated_indicator = 1 - item_vec 114 | return list(non_rated_indicator.nonzero()[0]) 115 | -------------------------------------------------------------------------------- /Leg-UP/utils/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | import torch 5 | from scipy import sparse 6 | 7 | EPSILON = 1e-12 8 | _fixed_target_items = { 9 | "head": np.asarray([259, 2272, 3010, 6737, 7690]), 10 | "tail": np.asarray([5611, 9213, 10359, 10395, 12308]), 11 | "upper_torso": np.asarray([1181, 1200, 2725, 4228, 6688]), 12 | "lower_torso": np.asarray([3227, 5810, 7402, 9272, 10551]) 13 | } 14 | 15 | 16 | def sample_target_items(train_data, n_samples, popularity, use_fix=False): 17 | """Sample target items with certain popularity.""" 18 | if popularity not in ["head", "upper_torso", "lower_torso", "tail"]: 19 | raise ValueError("Unknown popularity type {}.".format(popularity)) 20 | 21 | n_items = train_data.shape[1] # 14007 22 | all_items = np.arange(n_items) # [0, 1, 2, ... , 14006] 23 | item_clicks = train_data.toarray().sum(0) 24 | 25 | valid_items = [] 26 | if use_fix: 27 | valid_items = _fixed_target_items[popularity] 28 | else: 29 | bound_head = np.percentile(item_clicks, 95) 30 | bound_torso = np.percentile(item_clicks, 75) 31 | bound_tail = np.percentile(item_clicks, 50) 32 | if popularity == "head": 33 | valid_items = all_items[item_clicks > bound_head] 34 | elif popularity == "tail": 35 | valid_items = all_items[item_clicks < bound_tail] 36 | elif popularity == "upper_torso": 37 | valid_items = all_items[(item_clicks > bound_torso) & (item_clicks < bound_head)] 38 | elif popularity == "lower_torso": 39 | valid_items = all_items[(item_clicks > bound_tail) & (item_clicks < bound_torso)] 40 | 41 | if len(valid_items) < n_samples: 42 | raise ValueError("Cannot sample enough items that meet criteria.") 43 | 44 | np.random.shuffle(valid_items) 45 | sampled_items = valid_items[:n_samples] 46 | sampled_items.sort() 47 | print("Sampled target items: {}".format(sampled_items.tolist())) 48 | 49 | return sampled_items 50 | 51 | 52 | def set_seed(seed, cuda=False): 53 | """Set seed globally.""" 54 | np.random.seed(seed) 55 | random.seed(seed) 56 | if cuda: 57 | torch.cuda.manual_seed(seed) 58 | torch.backends.cudnn.deterministic = True 59 | else: 60 | torch.manual_seed(seed) 61 | 62 | 63 | def minibatch(*tensors, **kwargs): 64 | """Mini-batch generator for pytorch tensor.""" 65 | batch_size = kwargs.get('batch_size', 128) # 2048 66 | 67 | if len(tensors) == 1: # √ 68 | tensor = tensors[0] 69 | for i in range(0, len(tensor), batch_size): # len(tensor) = 14007 70 | yield tensor[i:i + batch_size] 71 | else: 72 | for i in range(0, len(tensors[0]), batch_size): 73 | yield tuple(x[i:i + batch_size] for x in tensors) 74 | 75 | 76 | def shuffle(*arrays, **kwargs): 77 | """Shuffle arrays.""" 78 | require_indices = kwargs.get('indices', False) 79 | 80 | if len(set(len(x) for x in arrays)) != 1: 81 | raise ValueError('All inputs to shuffle must have ' 82 | 'the same length.') 83 | 84 | shuffle_indices = np.arange(len(arrays[0])) 85 | np.random.shuffle(shuffle_indices) 86 | 87 | if len(arrays) == 1: 88 | result = arrays[0][shuffle_indices] 89 | else: 90 | result = tuple(x[shuffle_indices] for x in arrays) 91 | 92 | if require_indices: 93 | return result, shuffle_indices 94 | else: 95 | return result 96 | 97 | 98 | def sparse2tensor(sparse_data): 99 | """Convert sparse csr matrix to pytorch tensor.""" 100 | return torch.FloatTensor(sparse_data.toarray()) 101 | 102 | 103 | def tensor2sparse(tensor): 104 | """Convert pytorch tensor to sparse csr matrix.""" 105 | return sparse.csr_matrix(tensor.detach().cpu().numpy()) 106 | 107 | 108 | def stack_csrdata(data1, data2): 109 | """Stack two sparse csr matrix.""" 110 | return sparse.vstack((data1, data2), format="csr") 111 | 112 | 113 | def save_fake_data(fake_data, path): 114 | """Save fake data to file.""" 115 | file_path = "%s.npz" % path 116 | print("Saving fake data to {}".format(file_path)) 117 | sparse.save_npz(file_path, fake_data) 118 | return file_path 119 | 120 | 121 | def load_fake_data(file_path): 122 | """Load fake data from file.""" 123 | fake_data = sparse.load_npz(file_path) 124 | print("Loaded fake data from {}".format(file_path)) 125 | return fake_data 126 | 127 | 128 | def save_checkpoint(model, optimizer, path, epoch=-1): 129 | """Save model checkpoint and optimizer state to file.""" 130 | state = { 131 | "epoch": epoch, 132 | "state_dict": model.state_dict(), 133 | "optimizer": optimizer.state_dict(), 134 | } 135 | file_path = "%s.pt" % path 136 | print("Saving checkpoint to {}".format(file_path)) 137 | torch.save(state, file_path) 138 | 139 | 140 | def load_checkpoint(path): 141 | """Load model checkpoint and optimizer state from file.""" 142 | file_path = "%s.pt" % path 143 | state = torch.load(file_path, map_location=torch.device('cpu')) 144 | print("Loaded checkpoint from {} (epoch {})".format( 145 | file_path, state["epoch"])) 146 | return state["epoch"], state["state_dict"], state["optimizer"] 147 | -------------------------------------------------------------------------------- /AUSH/model/trainer_rec_surprise.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 15:24 3 | # @Author : chensi 4 | # @File : cf.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import os 9 | from surprise import Dataset, Reader, accuracy 10 | from surprise import SVD, SVDpp, NMF, KNNBasic, KNNWithMeans, KNNWithZScore 11 | from surprise.model_selection import PredefinedKFold 12 | from collections import defaultdict 13 | 14 | 15 | def get_top_n(predictions, n=50): 16 | # First map the predictions to each user. 17 | top_n = defaultdict(list) 18 | for uid, iid, true_r, est, _ in predictions: 19 | top_n[uid].append((iid, est)) 20 | # Then sort the predictions for each user and retrieve the k highest ones. 21 | for uid, user_ratings in top_n.items(): 22 | user_ratings.sort(key=lambda x: x[1], reverse=True) 23 | top_n[uid] = user_ratings[:n] 24 | return top_n 25 | 26 | 27 | def get_model(model_name): 28 | algo = None 29 | if 'KNN' in model_name: 30 | model_name = model_name.split('_') 31 | knn_model_name = model_name[0] 32 | user_based = False if len(model_name) > 1 and model_name[1] == 'I' else True 33 | dis_method = 'msd' if len(model_name) < 3 else model_name[2] 34 | k = 20 if len(model_name) < 4 else int(model_name[3]) 35 | sim_options = {'user_based': user_based, 'name': dis_method} 36 | if knn_model_name == 'KNNBasic': 37 | algo = KNNBasic(sim_options=sim_options, k=k) 38 | elif knn_model_name == 'KNNWithMeans': 39 | algo = KNNWithMeans(sim_options=sim_options, k=k) 40 | elif knn_model_name == 'KNNWithZScore': 41 | algo = KNNWithZScore(sim_options=sim_options, k=k) 42 | elif 'SVDpp' in model_name or 'SVD' in model_name or 'NMF' in model_name: 43 | model_name = model_name.split('_') 44 | n_factors = 25 if len(model_name) == 1 else int(model_name[1]) 45 | if model_name[0] == 'SVDpp': 46 | algo = SVDpp(n_factors=n_factors) 47 | elif model_name[0] == 'SVD': 48 | algo = SVD(n_factors=n_factors) 49 | elif model_name[0] == 'NMF': 50 | algo = NMF(n_factors=n_factors) 51 | return algo 52 | 53 | 54 | def get_model_old(model_name): 55 | algo = None 56 | if model_name == 'KNNBasic_U': 57 | sim_options = {'user_based': True} 58 | algo = KNNBasic(sim_options=sim_options, k=20) 59 | elif model_name == 'KNNBasic_I': 60 | sim_options = {'user_based': False} 61 | algo = KNNBasic(sim_options=sim_options, k=20) 62 | # algo = KNNBasic() 63 | elif model_name == 'KNNWithMeans_I': 64 | algo = KNNWithMeans(sim_options={'user_based': False}, k=20) 65 | elif model_name == 'KNNWithMeans_U': 66 | algo = KNNWithMeans(sim_options={'user_based': True}, k=20) 67 | elif model_name == 'KNNWithZScore_I': 68 | algo = KNNWithZScore(sim_options={'user_based': False}, k=20) 69 | elif model_name == 'KNNWithZScore_U': 70 | algo = KNNWithZScore(sim_options={'user_based': True}, k=20) 71 | elif model_name == 'SVDpp': 72 | algo = SVDpp() 73 | elif model_name == 'SVD': 74 | algo = SVD() 75 | elif model_name == 'NMF': 76 | algo = NMF() 77 | elif 'NMF_' in model_name: 78 | n_factors = int(model_name.split("_")[1]) 79 | algo = NMF(n_factors=n_factors) 80 | elif 'SVDpp_' in model_name: 81 | n_factors = int(model_name.split("_")[1]) 82 | algo = SVDpp(n_factors=n_factors) 83 | elif 'SVD_' in model_name: 84 | n_factors = int(model_name.split("_")[1]) 85 | algo = SVD(n_factors=n_factors) 86 | elif 'KNNBasic_U_' in model_name: 87 | k = int(model_name.split("_")[-1]) 88 | sim_options = {'user_based': True} 89 | algo = KNNBasic(sim_options=sim_options, k=k) 90 | elif 'KNNBasic_I_' in model_name: 91 | k = int(model_name.split("_")[-1]) 92 | sim_options = {'user_based': False} 93 | algo = KNNBasic(sim_options=sim_options, k=k) 94 | return algo 95 | 96 | 97 | def basic_rec(model_name, train_path, test_path, target_id): 98 | # build data 99 | # TODO check float and min_r 100 | reader = Reader(line_format='user item rating', sep='\t', rating_scale=(1, 5)) 101 | data = Dataset.load_from_folds([(train_path, test_path)], reader=reader) 102 | trainset, testset = None, None 103 | pkf = PredefinedKFold() 104 | for trainset_, testset_ in pkf.split(data): 105 | trainset, testset = trainset_, testset_ 106 | 107 | # train model 108 | rec_algo = get_model(model_name) 109 | rec_algo.fit(trainset) 110 | # eval 111 | preds = rec_algo.test(testset) 112 | rmse = accuracy.rmse(preds, verbose=True) 113 | 114 | # predor target 115 | fn_pred = lambda uid: rec_algo.predict(str(uid), str(target_id), r_ui=0).est 116 | target_predictions = list(map(fn_pred, range(trainset.n_users))) 117 | 118 | # topn 119 | testset = trainset.build_anti_testset() 120 | predictions = rec_algo.test(testset) 121 | top_n = get_top_n(predictions, n=50) 122 | 123 | hit_ratios = {} 124 | for uid, user_ratings in top_n.items(): 125 | topN = [int(iid) for (iid, _) in user_ratings] 126 | hits = [1 if target_id in topN[:i] else 0 for i in [1, 3, 5, 10, 20, 50]] 127 | hit_ratios[int(uid)] = hits 128 | return target_predictions, hit_ratios 129 | -------------------------------------------------------------------------------- /AUSH/test_main/main_gan_attack_baseline.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/24 11:08 3 | # @Author : chensi 4 | # @File : main_gan_attack_baseline.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import sys 9 | 10 | sys.path.append("../") 11 | import os, argparse 12 | from utils.load_data.load_data import load_data 13 | from model.attack_model.gan_attack.trainer_baseline import Train_G_Attacker 14 | from utils.load_data.load_attack_info import load_attack_info 15 | from utils.attack.data_to_file import * 16 | import numpy as np 17 | 18 | 19 | # os.environ["CUDA_VISIBLE_DEVICES"] = '2' 20 | 21 | 22 | def gan_attack(data_set_name, attack_method, target_id, is_train, write_to_file=1, final_attack_setting=None): 23 | path_train = '../data/data/' + data_set_name + '_train.dat' 24 | path_test = '../data/data/' + data_set_name + '_test.dat' 25 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items", 26 | "../data/data/" + data_set_name + "_target_users"] 27 | 28 | attack_info = load_attack_info(*attack_info_path) 29 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'], 30 | sep='\t', print_log=True) 31 | 32 | if len(attack_method.split('_')[1:]) == 2: 33 | attack_num, filler_num = map(int, attack_method.split('_')[1:]) 34 | filler_method = 0 35 | else: 36 | attack_num, filler_num, filler_method = map(int, attack_method.split('_')[1:]) 37 | 38 | loss_setting = int(attack_method.split('_')[0][-1]) 39 | selected_items = attack_info[target_id][0] 40 | model_path = "../result/model_ckpt/" + '_'.join([data_set_name, attack_method, str(target_id)]) + ".ckpt" 41 | 42 | # 43 | gan_attacker = Train_G_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id, 44 | selected_id_list=selected_items, 45 | filler_num=filler_num, attack_num=attack_num, filler_method=filler_method, 46 | loss_setting=loss_setting) 47 | # if is_train: 48 | # fake_profiles = gan_attacker.execute(is_train=True, model_path=model_path) 49 | # else: 50 | # fake_profiles, real_profiles = gan_attacker.execute(is_train=False, model_path=model_path) 51 | # if write_to_file == 0: 52 | # return fake_profiles, real_profiles 53 | fake_profiles, real_profiles, filler_indicator = gan_attacker.execute(is_train=is_train, model_path=model_path, 54 | final_attack_setting=final_attack_setting) 55 | gan_attacker.sess.close() 56 | # """inject and write to file""" 57 | if write_to_file == 1: 58 | dst_path = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat" 59 | attacked_file_writer(path_train, dst_path, fake_profiles, dataset_class.n_users) 60 | return fake_profiles, real_profiles, filler_indicator 61 | 62 | 63 | def parse_arg(): 64 | parser = argparse.ArgumentParser() 65 | 66 | parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/grocery') 67 | 68 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558] 69 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373] 70 | # 5,395,181,565,254,601,623,619,64,558 71 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373 72 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148 73 | # 88,22,122,339,1431,1141,1656,477,1089,866 74 | parser.add_argument('--target_ids', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866', 75 | help='attack target list') 76 | 77 | parser.add_argument('--attack_num', type=int, default=50, 78 | help='num of attack fake user,50 for ml100k and filmTrust') 79 | 80 | parser.add_argument('--filler_num', type=int, default=4, 81 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust') 82 | 83 | parser.add_argument('--filler_method', type=str, default='', help='0/1/2/3') 84 | 85 | parser.add_argument('--write_to_file', type=int, default=1, help='write to fake profile to file or return array') 86 | 87 | parser.add_argument('--loss', type=int, default=1, help='0:reconstruction,1:reconstruction+seed') 88 | # 89 | args = parser.parse_args() 90 | # 91 | args.target_ids = list(map(int, args.target_ids.split(','))) 92 | return args 93 | 94 | 95 | if __name__ == '__main__': 96 | """parse args""" 97 | args = parse_arg() 98 | """train""" 99 | is_train = 1 100 | attack_method = '_'.join( 101 | ['G' + str(args.loss), str(args.attack_num), str(args.filler_num), str(args.filler_method)]).strip('_') 102 | # 103 | for target_id in args.target_ids: 104 | 105 | attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id])) 106 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting' 107 | real_profiles, filler_indicator = np.load(attackSetting_path + '.npy') 108 | final_attack_setting = [args.attack_num, real_profiles, filler_indicator] 109 | 110 | 111 | _ = gan_attack(args.dataset, attack_method, target_id, is_train, 112 | write_to_file=args.write_to_file, 113 | final_attack_setting=final_attack_setting) 114 | 115 | # gan_attack(args.dataset, attack_method, args.target_id, is_train, write_to_file=args.write_to_file) 116 | -------------------------------------------------------------------------------- /data/automotive/automotive_target_users: -------------------------------------------------------------------------------- 1 | 22 2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,52,2731,2732,2733,2736,57,2737,2738,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,2739,1750,855,473,2740,2521,1885,2269,2743,2546,2547,2674,1270,2746,2427,2172,2747,2174 2 | 88 1043,2586,2591,547,38,1578,52,2103,59,1600,579,2635,85,101,2661,1131,2668,1140,1157,1670,2695,649,2700,1176,677,683,1708,2735,2743,186,191,2755,712,1758,741,1253,1255,239,244,2814,1791,2824,2825,266,272,1808,1298,2834,280,1820,2845,2849,2850,2851,2852,2853,2854,2855,2359,825,1849,318,1863,334,846,2384,849,2388,2394,872,892,2469,1965,950,1980,2502,2529,1510,1514,502 3 | 119 28,101,272,288,301,316,341,378,449,532,659,663,698,705,731,744,958,1076,1104,1172,1292,1313,1323,1341,1465,1469,1473,1488,1573,1644,1758,1893,1958,1975,1978,2082,2164,2166,2191,2235,2338,2389,2535,2544,2545,2546,2548,2549,2550,2551 4 | 122 521,1547,2584,25,2586,2587,1052,28,1572,2603,1076,57,1089,580,2629,1094,1097,589,79,2644,96,2155,1644,2162,2675,2167,2172,1664,645,2181,2182,1672,655,1168,1689,1178,2717,1697,2721,2723,2724,2727,1704,2729,2731,1709,175,2736,2737,2738,179,180,2739,2740,2741,2749,2752,2241,198,2246,1225,725,221,2269,2282,238,1777,266,1303,296,1839,310,2362,2882,2889,842,331,2901,855,1879,2903,1887,2917,1895,2922,877,2427,900,1930,1931,2473,1972,1977,2493,461,2005,473,475,2533,1515,2546,2547,1528,1023 5 | 339 769,255,1032,10,1291,145,533,1302,1048,161,1313,1314,1315,1316,1317,295,296,1318,1319,43,684,1320,1321,1322,1323,305,1325,1326,1327,1328,1329,311,1330,1331,1332,571,1333,1334,1214,1335,1336,1337,1338,195,835,1340,1341,1342,1343,1344,1345,1346,1347,1229,1350,1351,1352,1339,1353,1354,1355,347,608,613,102,1254,361,754,1142,889,1147,508,1348 6 | 422 67,77,96,97,99,105,121,128,141,157,171,220,229,232,236,250,271,272,273,297,300,349,358,369,384,390,395,402,403,449,467,484,529,635,663,675,684,690,706,727,730,732,762,763,764,767,776,797,855,857,861,862,896,903,909,913,933,1012,1030,1036,1069,1077,1080,1106,1114,1124,1164,1193,1276,1291,1294,1307,1332,1333,1339,1347,1352,1383,1385,1386,1391,1410,1454,1473,1480,1494,1562,1579,1580,1593,1640,1643,1665,1670,1704,1710,1841,1845,1857,1876,1916,1920,1926,1956,1982,1997,2001,2010,2014,2037,2039,2050,2057,2086,2094,2124,2137,2153,2208,2236,2269,2270,2271,2272,2273,2274,2275,2276,2277,2279,2280,2282,2283,2284,2285,2286,2287,2288,2289,2290,2291,2292,2293,2294,2295,2296,2297,2298,2299,2301,2302 7 | 477 2437,1032,2572,1804,2190,1685,406,1046,1691,2715,1693,417,2338,419,548,1573,1955,937,1321,1325,1070,2733,2734,1970,563,308,1075,1971,185,314,2105,1340,1983,1474,2755,1733,967,1875,1109,2005,2006,2390,345,2521,2778,2141,1889,1507,612,2019,2278,1511,2661,2664,508,366,2159,1649,114,242,886,2550,2300,1662,2559 8 | 594 52,59,85,101,191,239,244,266,272,280,318,334,502,547,649,677,683,712,741,846,849,872,892,950,1043,1131,1140,1176,1253,1255,1298,1510,1514,1578,1600,1670,1708,1758,1791,1808,1820,1849,1965,2103,2359,2384,2388,2394,2469,2502,2529,2586,2591,2635,2661,2668,2700,2735,2743,2755,2814,2824,2834,2845,2849,2850,2851,2852,2853,2854,2855 9 | 866 1537,1411,1415,136,1416,1417,398,1424,1937,915,1555,1173,1429,1939,2838,2842,1435,1440,1696,1443,1444,1445,1448,1066,300,1455,1712,433,1715,185,1465,1595,2874,1725,2875,1471,1727,1090,67,1475,1731,1479,2119,2504,2634,75,203,205,206,2123,2760,2765,1363,212,1367,1368,1499,221,1123,2406,2409,2157,1902,2030,1904,2158,1906,1395,2034,2037,1398,2673,1656,1402,1404,1405 10 | 884 25,28,57,79,175,179,180,198,221,238,266,296,310,331,461,473,475,580,589,645,655,725,842,855,877,900,1052,1076,1089,1094,1097,1168,1178,1225,1303,1515,1528,1547,1572,1644,1672,1689,1697,1704,1709,1777,1839,1887,1895,1930,1931,1972,1977,2005,2155,2162,2167,2172,2181,2182,2241,2246,2269,2282,2362,2427,2473,2493,2533,2546,2547,2584,2587,2603,2629,2644,2675,2717,2721,2723,2724,2727,2729,2731,2736,2737,2738,2739,2740,2741,2749,2752,2882,2889,2901,2903,2917,2922 11 | 1089 2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,52,2731,2732,2733,2736,57,2737,2738,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,2739,1750,855,473,2740,2521,1885,2269,2743,2546,2547,2674,1270,2746,2427,2172,2747,2174 12 | 1141 2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,2731,2732,2733,2736,2737,57,2738,2739,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,1750,855,473,2740,2521,1885,2269,2546,2547,2674,1270,2746,2427,2172,2747,2174 13 | 1431 770,3,1926,2569,2570,2571,2572,909,2573,2574,2575,1553,914,1943,1048,2457,153,27,1531,797,2465,1315,2467,2086,297,555,1580,1326,1711,1328,435,564,1331,1207,952,2492,195,835,1220,1347,2501,2120,1353,1098,1994,2250,2377,1230,211,1235,1237,726,1878,2009,220,2271,2274,874,2283,2285,367,754,371,1268,1653,2291,2292,2296,505,2299,764,893,2302 14 | 1593 2,3,69,95,132,185,193,201,203,205,210,212,216,217,220,221,235,253,297,353,395,398,399,433,436,438,447,454,500,545,552,640,775,838,1161,1219,1279,1365,1374,1376,1378,1388,1408,1413,1414,1427,1431,1438,1441,1456,1467,1479,1486,1551,1558,1591,1592,1593,1635,1636,1638,1696,1710,1719,1901,1904,1905,1912,1918,1924,1926,1992,2032,2037,2043,2087,2122,2127,2147,2148,2150,2151,2156,2160,2344,2410,2412,2413,2499,2503,2581,2633,2673,2818,2838 15 | 1656 2,3,1551,2581,1558,545,2087,552,1591,1592,1593,69,2633,2122,2127,95,1635,1636,2147,1638,2148,2150,2151,2156,2160,2673,640,132,1161,1696,1710,1719,185,193,1219,201,203,205,210,212,216,217,220,235,253,1279,2818,775,2838,2344,297,838,1365,1374,1376,353,1378,2410,1388,1901,2412,2413,1904,1905,1912,1918,1408,1924,1413,1414,1926,1417,395,398,399,1427,1431,1438,1441,1456,433,436,438,1467,447,2499,454,1479,1992,2503,1486,2032,500,2037,2043 -------------------------------------------------------------------------------- /AUSH/model/attack_model/gan_attack_copy/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2020/9/18 13:52 3 | # @Author : chensi 4 | # @File : models.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | try: 9 | import tensorflow.compat.v1 as tf 10 | 11 | tf.disable_v2_behavior() 12 | except: 13 | import tensorflow as tf 14 | import math 15 | 16 | 17 | # import math 18 | class CopyGanAttacker: 19 | def __init__(self, dataset_class, target_id, filler_num, attack_num, filler_method): 20 | # data set info 21 | self.dataset_class = dataset_class 22 | self.num_user = dataset_class.n_users 23 | self.num_item = dataset_class.n_items 24 | self.rating_matrix = dataset_class.train_matrix.toarray() # tf.constant() 25 | 26 | # attack info 27 | self.target_id = target_id 28 | self.filler_num = filler_num 29 | self.attack_num = attack_num 30 | self.filler_method = filler_method 31 | 32 | def build_model(self): 33 | # define place_holder 34 | # self.user_vector = tf.placeholder(tf.int32, [None, self.num_item]) 35 | # self.item_vector = tf.placeholder(tf.int32, [None, self.num_item]) 36 | self.sampled_template = tf.placeholder(tf.int32, [self.args.batch_size, self.num_item]) 37 | self.batch_filler_index = tf.placeholder(tf.int32, [None, self.args.batch_size]) 38 | # user/item embedding 39 | # c = tf.constant(c) 40 | user_embedding = self.towerMlp(self.rating_matrix, self.num_item, self.args.embedding_dim) 41 | item_embedding = self.towerMlp(self.rating_matrix.transpose(), self.num_user, self.args.embedding_dim) 42 | 43 | """ 44 | copy net 45 | p_copy(j)=sigmoid (w x j’s item embedding + w x u’s user embedding + b)""" 46 | with tf.name_scope("copyNet"): 47 | w1 = tf.get_variable('w1', [self.args.embedding_dim, self.num_item]) 48 | p1 = tf.matmul(tf.nn.embedding_lookup(user_embedding, self.batch_filler_index), w1) # batch*item_num 49 | w2 = tf.get_variable('w2', [self.args.embedding_dim, 1]) 50 | p2 = tf.matmul(item_embedding, w2) # item_num*1 51 | b = tf.get_variable('b', [self.item_num]) 52 | copy_prob = tf.nn.sigmoid(p1 + p2 + b) # batch*item_num 53 | """ 54 | generate net 55 | p_gen(j=r) 56 | """ 57 | with tf.name_scope("genNet"): 58 | gen_probabilitiy_list = [] 59 | for i in range(5): 60 | with tf.name_scope("s_%d" % i): 61 | w1 = tf.get_variable('w1', [self.args.embedding_dim, self.num_item]) 62 | p1 = tf.matmul(tf.nn.embedding_lookup(user_embedding, self.batch_filler_index), 63 | w1) # batch*item_num 64 | w2 = tf.get_variable('w2', [self.args.embedding_dim, 1]) 65 | p2 = tf.matmul(item_embedding, w2) # item_num*1 66 | b = tf.get_variable('b', [self.item_num]) 67 | gen_probability = p1 + p2 + b 68 | gen_probabilitiy_list.append(tf.expand_dims(gen_probability, 2)) # batch*item_num*1 69 | gen_rating_distri = tf.nn.softmax(tf.concat(gen_probabilitiy_list, axis=2)) # batch*item_num*5 70 | """ 71 | Rating 72 | rating p(r) = p_copy(j) x p_copy(j=r) + (1-p_copy(j)) x p_gen(j=r) 73 | """ 74 | copy_rating_distri = tf.reshape(tf.expand_dims(tf.one_hot(self.sampled_template, 5), 3), 75 | [self.args.batch_size, -1, 5]) 76 | rating_distri = copy_prob * copy_rating_distri + (1 - copy_prob) * gen_rating_distri # batch*item_num*5 77 | rating_value = tf.tile(tf.constant([[[1., 2., 3., 4., 5.]]]), [self.args.batch_size, self.num_item, 1]) 78 | fake_profiles = tf.reduce_sum(rating_distri * rating_value, 2) 79 | 80 | """ 81 | loss function 82 | """ 83 | with tf.name_scope("Discriminator"): 84 | D_real = self.towerMlp(self.sampled_template, self.num_item, 1) 85 | D_fake = self.towerMlp(fake_profiles, self.num_item, 1) 86 | 87 | """ 88 | loss function 89 | """ 90 | with tf.name_scope("loss_D"): 91 | d_loss_real = tf.reduce_mean( 92 | tf.nn.sigmoid_cross_entropy_with_logits(logits=D_real, labels=tf.ones_like(D_real)), 93 | name="loss_real") 94 | d_loss_fake = tf.reduce_mean( 95 | tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.zeros_like(D_fake)), 96 | name="loss_fake") 97 | loss_D = d_loss_real + d_loss_fake 98 | with tf.name_scope("loss_G"): 99 | # reconstruction loss 100 | loss_rec = tf.reduce_mean(tf.square(fake_profiles - self.sampled_template)) 101 | # adversial loss 102 | loss_adv = tf.reduce_mean( 103 | tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.ones_like(D_fake))) 104 | loss_G = loss_rec + loss_adv 105 | 106 | def towerMlp(self, input, inputDim, outputDim): 107 | dim, x = inputDim // 2, input 108 | while dim > outputDim: 109 | layer = tf.layers.dense( 110 | inputs=x, 111 | units=dim, 112 | kernel_initializer=tf.random_normal_initializer, 113 | activation=tf.nn.relu, 114 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 115 | dim, x = dim // 2, layer 116 | output = tf.layers.dense( 117 | inputs=x, 118 | units=outputDim, 119 | kernel_initializer=tf.random_normal_initializer, 120 | activation=tf.nn.sigmoid, 121 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate)) 122 | return output 123 | -------------------------------------------------------------------------------- /AUSH/test_main/main_eval_attack.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/24 10:05 3 | # @Author : chensi 4 | # @File : main_eval_attack.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | import sys, argparse 8 | import numpy as np 9 | import pandas as pd 10 | 11 | sys.path.append("../") 12 | from utils.load_data.load_data import load_data 13 | from utils.load_data.load_attack_info import * 14 | 15 | 16 | def attack_evaluate(real_preds_path, attacked_preds_file, non_rated_users, target_users): 17 | # 18 | names = ['uid', 'rating', 'HR_1', 'HR_3', 'HR_5', 'HR_10', 'HR_20', 'HR_50'] 19 | real_preds = pd.read_csv(real_preds_path, sep='\t', names=names, engine='python') 20 | attacked_preds = pd.read_csv(attacked_preds_file, sep='\t', names=names, engine='python') 21 | # pred 22 | shift_target = np.mean(attacked_preds.iloc[target_users, 1].values - real_preds.iloc[target_users, 1].values) 23 | shift_all = np.mean(attacked_preds.iloc[non_rated_users, 1].values - real_preds.iloc[non_rated_users, 1].values) 24 | # 25 | HR_real_target = real_preds.iloc[target_users, range(2, 8)].mean().values 26 | HR_real_all = real_preds.iloc[non_rated_users, range(2, 8)].mean().values 27 | 28 | HR_attacked_target = attacked_preds.iloc[target_users, range(2, 8)].mean().values 29 | HR_attacked_all = attacked_preds.iloc[non_rated_users, range(2, 8)].mean().values 30 | return shift_target, HR_real_target, HR_attacked_target, shift_all, HR_real_all, HR_attacked_all 31 | 32 | 33 | def eval_attack(data_set_name, rec_model_name, attack_method, target_id): 34 | dir = "../result/pred_result/" 35 | real_preds_path = dir + '_'.join([rec_model_name, data_set_name, str(target_id)]) 36 | attacked_preds_file = real_preds_path + "_" + attack_method 37 | """ 38 | ml100k 39 | """ 40 | if data_set_name == 'ml100k': 41 | path_train = "../data/data/ml100k_train.dat" 42 | path_test = "../data/data/ml100k_test.dat" 43 | attack_info_path = ["../data/data/ml100k_selected_items", "../data/data/ml100k_target_users"] 44 | elif data_set_name == 'filmTrust': 45 | path_train = "../data/data/filmTrust_train.dat" 46 | path_test = "../data/data/filmTrust_test.dat" 47 | attack_info_path = ["../data/data/filmTrust_selected_items", "../data/data/filmTrust_target_users"] 48 | 49 | else: 50 | path_train = "../data/data/" + data_set_name + "_train.dat" 51 | path_test = "../data/data/" + data_set_name + "_test.dat" 52 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items", 53 | "../data/data/" + data_set_name + "_target_users"] 54 | 55 | attack_info = load_attack_info(*attack_info_path) 56 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'], 57 | sep='\t', print_log=False) 58 | 59 | # 60 | target_users = attack_info[target_id][1] 61 | non_rated_users = dataset_class.get_item_nonrated_users(target_id) 62 | # 63 | res = attack_evaluate(real_preds_path, attacked_preds_file, non_rated_users, target_users) 64 | # 65 | target, all = res[:3], res[3:] 66 | target_str = '\t'.join([str(target[0]), '\t'.join(map(str, target[1])), '\t'.join(map(str, target[2]))]) 67 | all_str = '\t'.join([str(all[0]), '\t'.join(map(str, all[1])), '\t'.join(map(str, all[2]))]) 68 | 69 | # info 70 | info = '\t'.join([rec_model_name, attack_method, str(target_id)]) 71 | # print(info + '\t' + target_str + '\t' + all_str) 72 | return info + '\t' + target_str + '\t' + all_str 73 | 74 | 75 | def parse_arg(): 76 | parser = argparse.ArgumentParser() 77 | 78 | parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/office') 79 | 80 | parser.add_argument('--attack_num', type=int, default=50, help='50 for ml100k and filmTrust') 81 | 82 | parser.add_argument('--filler_num', type=int, default=4, help='90 for ml100k,36 for filmTrust') 83 | 84 | parser.add_argument('--attack_methods', type=str, default='G0,G1', 85 | help='gan,G0,G1,segment,average,random,bandwagon') 86 | 87 | parser.add_argument('--rec_model_names', type=str, default='NNMF,IAutoRec,UAutoRec,NMF_25', 88 | help='NNMF,IAutoRec,UAutoRec,NMF_25') 89 | 90 | # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5 91 | # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5 92 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148 93 | # 88,22,122,339,1431,1141,1656,477,1089,866 94 | parser.add_argument('--target_ids', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866', 95 | help='target_id') 96 | 97 | # 98 | args = parser.parse_args() 99 | # 100 | args.attack_methods = args.attack_methods.split(',') 101 | args.rec_model_names = args.rec_model_names.split(',') 102 | args.target_ids = list(map(int, args.target_ids.split(','))) 103 | return args 104 | 105 | 106 | if __name__ == '__main__': 107 | """parse args""" 108 | args = parse_arg() 109 | """eval""" 110 | result = [] 111 | 112 | for attack_method in args.attack_methods: 113 | for rec_model_name in args.rec_model_names: 114 | for target_id in args.target_ids: 115 | attack_method_ = '_'.join([attack_method, str(args.attack_num), str(args.filler_num)]) 116 | try: 117 | result_ = eval_attack(args.dataset, rec_model_name, attack_method_, target_id) 118 | result.append(result_.split('\t')) 119 | except: 120 | print(attack_method, rec_model_name, target_id) 121 | 122 | result = np.array(result).transpose() 123 | result = pd.DataFrame(dict(zip(range(result.shape[0]), result))) 124 | result.to_excel(args.dataset + '_performance_all.xls', index=False) 125 | -------------------------------------------------------------------------------- /AUSH/test_main/data_preprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | # author:ariaschen 4 | # datetime:2020/1/12 16:11 5 | # software: PyCharm 6 | 7 | import itertools, gzip 8 | import pandas as pd 9 | from utils.load_data.load_data import * 10 | from sklearn.model_selection import train_test_split 11 | 12 | 13 | def parse(path): 14 | g = gzip.open(path, 'rb') 15 | for l in g: 16 | yield eval(l) 17 | 18 | 19 | def getDF(path): 20 | i = 0 21 | df = {} 22 | for d in parse(path): 23 | df[i] = d 24 | i += 1 25 | return pd.DataFrame.from_dict(df, orient='index') 26 | 27 | 28 | def data_preprocess(data_set, gz_path): 29 | data = getDF(gz_path)[['reviewerID', 'asin', 'overall']] 30 | data.columns = ['uid', 'iid', 'rating'] 31 | 32 | uids, iids = data.uid.unique(), data.iid.unique() 33 | n_uids, n_iids, n_ratings = len(uids), len(iids), data.shape[0] 34 | print('User num:', n_uids, '\tItem num:', n_iids, '\tRating num:', n_ratings, '\t Sparsity :', n_ratings / (n_iids * n_uids)) 35 | print('Number of ratings per user:', n_ratings / n_uids) 36 | 37 | uid_update = dict(zip(uids, range(n_uids))) 38 | iid_update = dict(zip(iids, range(n_iids))) 39 | 40 | data.uid = data.uid.apply(lambda x: uid_update[x]) 41 | data.iid = data.iid.apply(lambda x: iid_update[x]) 42 | 43 | train_idxs, test_idxs = train_test_split(list(range(n_ratings)), test_size=0.1) 44 | 45 | train_data = data.iloc[train_idxs] 46 | test_data = data.iloc[test_idxs] 47 | path_train = "../data/data/" + data_set + "_train.dat" 48 | path_test = "../data/data/" + data_set + "_test.dat" 49 | train_data.to_csv(path_train, index=False, header=None, sep='\t') 50 | test_data.to_csv(path_test, index=False, header=None, sep='\t') 51 | np.save("../data/data/" + data_set + "_id_update", [uid_update, iid_update]) 52 | 53 | 54 | def exp_select(data_set, target_items, selected_num, target_user_num): 55 | path_test = "../data/data/" + data_set + "_test.dat" 56 | path_train = "../data/data/" + data_set + "_train.dat" 57 | dataset_class = load_data(path_train=path_train, path_test=path_test, 58 | header=['user_id', 'item_id', 'rating'], 59 | sep='\t', print_log=True) 60 | 61 | item_pops = dataset_class.get_item_pop() 62 | 63 | items_sorted = np.array(item_pops).argsort()[::-1] 64 | 65 | bandwagon_selected = items_sorted[:selected_num] 66 | print('bandwagon_selected:', bandwagon_selected) 67 | 68 | 69 | threshold = dataset_class.test_data.rating.mean() 70 | threshold = threshold if threshold < 3 else 3.0 71 | print('threshold:', threshold) 72 | selected_candidates = items_sorted[:20] 73 | 74 | selected_candidates = list(itertools.combinations(selected_candidates, selected_num)) 75 | 76 | result = {} 77 | target_items = [j for i in range(2, 10) for j in 78 | items_sorted[i * len(items_sorted) // 10:(i * len(items_sorted) // 10) + 2]][::-1] 79 | target_items = list( 80 | np.random.choice([i for i in range(len(item_pops)) if item_pops[i] == 3], 4, replace=False)) + target_items 81 | print('target_items:', target_items) 82 | print('number of ratings:', [item_pops[i] for i in target_items]) 83 | for target in target_items: 84 | target_rated = set(dataset_class.train_data[dataset_class.train_data.item_id == target].user_id.values) 85 | data_tmp = dataset_class.train_data[~dataset_class.train_data.user_id.isin(target_rated)].copy() 86 | data_tmp = data_tmp[data_tmp.rating >= threshold] 87 | np.random.shuffle(selected_candidates) 88 | 89 | for selected_items in selected_candidates: 90 | target_users = data_tmp[data_tmp.item_id.isin(selected_items)].groupby( 91 | 'user_id').size() 92 | 93 | if target_users[(target_users == selected_num)].shape[0] >= target_user_num: 94 | target_users = sorted(target_users[(target_users == selected_num)].index) 95 | result[target] = [sorted(selected_items), target_users] 96 | print('target:', target) 97 | break 98 | 99 | if target not in result: 100 | for selected_items in selected_candidates: 101 | 102 | target_users = data_tmp[data_tmp.item_id.isin(selected_items)].groupby( 103 | 'user_id').size() 104 | target_users = sorted(dict(target_users).items(), key=lambda x: x[1], reverse=True) 105 | min = target_users[target_user_num][1] 106 | target_users = [i[0] for i in target_users[:target_user_num] if i[1] > selected_num // 2] 107 | if len(target_users) >= target_user_num: 108 | result[target] = [sorted(selected_items), sorted(target_users)] 109 | print('target:', target, 'min rated selected item num:', min) 110 | break 111 | 112 | if target not in result: 113 | print('target:', target, 'non-targeted user') 114 | a = 1 115 | 116 | key = list(result.keys()) 117 | selected_items = [','.join(map(str, result[k][0])) for k in key] 118 | target_users = [','.join(map(str, result[k][1])) for k in key] 119 | selected_items = pd.DataFrame(dict(zip(['id', 'selected_items'], [key, selected_items]))) 120 | target_users = pd.DataFrame(dict(zip(['id', 'target_users'], [key, target_users]))) 121 | selected_items.to_csv("../data/data/" + data_set + '_selected_items', index=False, header=None, sep='\t') 122 | target_users.to_csv("../data/data/" + data_set + '_target_users', index=False, header=None, sep='\t') 123 | 124 | 125 | if __name__ == '__main__': 126 | data_set = 'office' 127 | gz_path = 'C:\\Users\\ariaschen\\Downloads\\reviews_Office_Products_5.json.gz' 128 | # data_set = 'automotive' 129 | # gz_path = 'C:\\Users\\ariaschen\\Downloads\\reviews_Automotive_5.json.gz' 130 | # data_set = 'grocery' 131 | # gz_path = "../data/new_raw_data/reviews_Grocery_and_Gourmet_Food_5.json.gz" 132 | 133 | 134 | data_preprocess(data_set, gz_path) 135 | 136 | target_items = None 137 | 138 | exp_select(data_set, target_items, selected_num=2, target_user_num=30) 139 | -------------------------------------------------------------------------------- /AUSH/test_main/main_baseline_attack.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/23 11:49 3 | # @Author : chensi 4 | # @File : main_attack_baseline.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import sys, argparse 9 | 10 | sys.path.append("../") 11 | from utils.load_data.load_data import * 12 | from utils.load_data.load_attack_info import * 13 | from model.attack_model.baseline import * 14 | from utils.attack.data_to_file import * 15 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker 16 | 17 | 18 | def get_data(data_set_name): 19 | path_train = '../data/data/' + data_set_name + '_train.dat' 20 | path_test = '../data/data/' + data_set_name + '_test.dat' 21 | dataset_class = load_data(path_train=path_train, path_test=path_test, 22 | header=['user_id', 'item_id', 'rating'], 23 | sep='\t', print_log=False) 24 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items", 25 | "../data/data/" + data_set_name + "_target_users"] 26 | attack_info = load_attack_info(*attack_info_path) 27 | return dataset_class, attack_info 28 | 29 | 30 | def baseline_attack(dataset_class, attack_info, attack_method, target_id, bandwagon_selected, 31 | fixed_filler_indicator=None): 32 | """load data""" 33 | selected_ids, target_users = attack_info[target_id] 34 | attack_model, attack_num, filler_num = attack_method.split('_') 35 | attack_num, filler_num = int(attack_num), int(filler_num) 36 | 37 | """attack class""" 38 | global_mean, global_std, item_means, item_stds = dataset_class.get_all_mean_std() 39 | baseline_attacker = BaselineAttack(attack_num, filler_num, dataset_class.n_items, target_id, 40 | global_mean, global_std, item_means, item_stds, 5.0, 1.0, 41 | fixed_filler_indicator=fixed_filler_indicator) 42 | # fake profile array 43 | fake_profiles = None 44 | if attack_model == "random": 45 | fake_profiles = baseline_attacker.RandomAttack() 46 | elif attack_model == "bandwagon": 47 | fake_profiles = baseline_attacker.BandwagonAttack(bandwagon_selected) 48 | elif attack_model == "average": 49 | fake_profiles = baseline_attacker.AverageAttack() 50 | elif attack_model == "segment": 51 | fake_profiles = baseline_attacker.SegmentAttack(selected_ids) 52 | else: 53 | print('attack_method error') 54 | exit() 55 | return fake_profiles 56 | 57 | 58 | def parse_arg(): 59 | parser = argparse.ArgumentParser() 60 | 61 | parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/grocery') 62 | 63 | parser.add_argument('--attack_methods', type=str, default='average', 64 | help='average,segment,random,bandwagon') 65 | 66 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558] 67 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373] 68 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148 69 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373 70 | # 88,22,122,339,1431,1141,1656,477,1089,866 71 | parser.add_argument('--targets', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866', 72 | help='attack_targets') 73 | 74 | parser.add_argument('--attack_num', type=int, default=50, help='fixed 50') 75 | 76 | parser.add_argument('--filler_num', type=int, default=4, help='90 for ml100k,36 for filmTrust') 77 | parser.add_argument('--bandwagon_selected', type=str, default='180,99,49', 78 | help='180,99,49 for ml100k,103,98,115 for filmTrust') 79 | # 80 | parser.add_argument('--sample_filler', type=int, default=1, help='sample filler') 81 | # 82 | 83 | args = parser.parse_args() 84 | # 85 | args.attack_methods = args.attack_methods.split(',') 86 | args.targets = list(map(int, args.targets.split(','))) 87 | args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(','))) 88 | return args 89 | 90 | 91 | if __name__ == '__main__': 92 | """parse args""" 93 | args = parse_arg() 94 | 95 | """attack""" 96 | dataset_class, attack_info = get_data(args.dataset) 97 | 98 | for target_id in args.targets: 99 | 100 | attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id])) 101 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting' 102 | if args.sample_filler: 103 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id, 104 | selected_id_list=attack_info[target_id][0], 105 | filler_num=args.filler_num, attack_num=args.attack_num, filler_method=0) 106 | _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no', 107 | final_attack_setting=[args.attack_num, 108 | None, None]) 109 | 110 | np.save(attackSetting_path, [real_profiles, filler_indicator]) 111 | else: 112 | real_profiles, filler_indicator = np.load(attackSetting_path + '.npy') 113 | 114 | # for attack_method in args.attack_methods: 115 | # 116 | # attack_model = '_'.join([attack_method, str(args.attack_num), str(args.filler_num)]) 117 | # # fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id, 118 | # # args.bandwagon_selected, filler_indicator) 119 | # fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id, 120 | # args.bandwagon_selected, None) 121 | # 122 | # ori_path = '../data/data/' + args.dataset + '_train.dat' 123 | # dst_path = "../data/data_attacked/" + '_'.join([args.dataset, str(target_id), attack_model]) + "_sample.dat" 124 | # attacked_file_writer(ori_path, dst_path, fake_profiles, dataset_class.n_users) 125 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/tool/file.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | from os.path import abspath 3 | from os import makedirs, remove 4 | from re import compile, findall, split 5 | # from config import LineConfig 6 | from collections import defaultdict 7 | class Config(object): 8 | def __init__(self, fileName): 9 | self.config = {} 10 | self.readConfiguration(fileName) 11 | 12 | def __getitem__(self, item): 13 | if not self.contains(item): 14 | print('parameter ' + item + ' is invalid!') 15 | exit(-1) 16 | return self.config[item] 17 | 18 | def getOptions(self, item): 19 | if not self.contains(item): 20 | print('parameter ' + item + ' is invalid!') 21 | exit(-1) 22 | return self.config[item] 23 | 24 | def contains(self, key): 25 | return self.config.has_key(key) 26 | 27 | def readConfiguration(self, fileName): 28 | if not os.path.exists(abspath(fileName)): 29 | print('config file is not found!') 30 | raise IOError 31 | with open(fileName) as f: 32 | for ind, line in enumerate(f): 33 | if line.strip() != '': 34 | try: 35 | key, value = line.strip().split('=') 36 | self.config[key] = value 37 | except ValueError: 38 | print('config file is not in the correct format! Error Line:%d' % (ind)) 39 | 40 | 41 | class LineConfig(object): 42 | def __init__(self, content): 43 | self.line = content.strip().split(' ') 44 | self.options = {} 45 | self.mainOption = False 46 | if self.line[0] == 'on': 47 | self.mainOption = True 48 | elif self.line[0] == 'off': 49 | self.mainOption = False 50 | for i, item in enumerate(self.line): 51 | if (item.startswith('-') or item.startswith('--')) and not item[1:].isdigit(): 52 | ind = i + 1 53 | for j, sub in enumerate(self.line[ind:]): 54 | if (sub.startswith('-') or sub.startswith('--')) and not sub[1:].isdigit(): 55 | ind = j 56 | break 57 | if j == len(self.line[ind:]) - 1: 58 | ind = j + 1 59 | break 60 | try: 61 | self.options[item] = ' '.join(self.line[i + 1:i + 1 + ind]) 62 | except IndexError: 63 | self.options[item] = 1 64 | 65 | def __getitem__(self, item): 66 | if not self.contains(item): 67 | print('parameter ' + item + ' is invalid!') 68 | exit(-1) 69 | return self.options[item] 70 | 71 | def getOption(self, key): 72 | if not self.contains(key): 73 | print('parameter ' + key + ' is invalid!') 74 | exit(-1) 75 | return self.options[key] 76 | 77 | def isMainOn(self): 78 | return self.mainOption 79 | 80 | def contains(self, key): 81 | return key in self.options 82 | # return self.options.has_key(key) 83 | class FileIO(object): 84 | def __init__(self): 85 | pass 86 | 87 | # @staticmethod 88 | # def writeFile(filePath,content,op = 'w'): 89 | # reg = compile('(.+[/|\\\]).+') 90 | # dirs = findall(reg,filePath) 91 | # if not os.path.exists(filePath): 92 | # os.makedirs(dirs[0]) 93 | # with open(filePath,op) as f: 94 | # f.write(str(content)) 95 | 96 | @staticmethod 97 | def writeFile(dir, file, content, op='w'): 98 | if not os.path.exists(dir): 99 | os.makedirs(dir) 100 | if type(content) == 'str': 101 | with open(dir + file, op) as f: 102 | f.write(content) 103 | else: 104 | with open(dir + file, op) as f: 105 | f.writelines(content) 106 | 107 | @staticmethod 108 | def deleteFile(filePath): 109 | if os.path.exists(filePath): 110 | remove(filePath) 111 | 112 | @staticmethod 113 | def loadDataSet(conf, file, bTest=False): 114 | trainingData = defaultdict(dict) 115 | testData = defaultdict(dict) 116 | ratingConfig = LineConfig(conf['ratings.setup']) 117 | # if not bTest: 118 | # print('loading training data...') 119 | # else: 120 | # print('loading test data...') 121 | with open(file) as f: 122 | ratings = f.readlines() 123 | # ignore the headline 124 | if ratingConfig.contains('-header'): 125 | ratings = ratings[1:] 126 | # order of the columns 127 | order = ratingConfig['-columns'].strip().split() 128 | 129 | for lineNo, line in enumerate(ratings): 130 | items = split(' |,|\t', line.strip()) 131 | if not bTest and len(order) < 3: 132 | print('The rating file is not in a correct format. Error: Line num %d' % lineNo) 133 | exit(-1) 134 | try: 135 | userId = items[int(order[0])] 136 | itemId = items[int(order[1])] 137 | if bTest and len(order) < 3: 138 | rating = 1 # default value 139 | else: 140 | rating = items[int(order[2])] 141 | 142 | except ValueError: 143 | print('Error! Have you added the option -header to the rating.setup?') 144 | exit(-1) 145 | if not bTest: 146 | trainingData[userId][itemId] = float(rating) 147 | else: 148 | testData[userId][itemId] = float(rating) 149 | if not bTest: 150 | return trainingData 151 | else: 152 | return testData 153 | 154 | @staticmethod 155 | def loadRelationship(conf, filePath): 156 | socialConfig = LineConfig(conf['social.setup']) 157 | relation = [] 158 | print('loading social data...') 159 | with open(filePath) as f: 160 | relations = f.readlines() 161 | # ignore the headline 162 | if socialConfig.contains('-header'): 163 | relations = relations[1:] 164 | # order of the columns 165 | order = socialConfig['-columns'].strip().split() 166 | if len(order) <= 2: 167 | print('The social file is not in a correct format.') 168 | for lineNo, line in enumerate(relations): 169 | items = split(' |,|\t', line.strip()) 170 | if len(order) < 2: 171 | print('The social file is not in a correct format. Error: Line num %d' % lineNo) 172 | exit(-1) 173 | userId1 = items[int(order[0])] 174 | userId2 = items[int(order[1])] 175 | if len(order) < 3: 176 | weight = 1 177 | else: 178 | weight = float(items[int(order[2])]) 179 | relation.append([userId1, userId2, weight]) 180 | return relation 181 | 182 | @staticmethod 183 | def loadLabels(filePath): 184 | labels = {} 185 | with open(filePath) as f: 186 | for line in f: 187 | items = split(' |,|\t', line.strip()) 188 | labels[items[0]] = items[1] 189 | return labels 190 | -------------------------------------------------------------------------------- /AUSH/test_main/dcgan.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | import sys 4 | 5 | sys.path.append("../") 6 | import os, argparse, time, math 7 | import numpy as np 8 | import tensorflow as tf 9 | from glob import glob 10 | from utils.attack.data_to_file import * 11 | from test_main.utils_dcgan import * 12 | from numpy import linalg as la 13 | from model.trainer_rec import * 14 | from test_main.main_eval_attack import eval_attack 15 | import utils as ut 16 | 17 | flags = tf.app.flags 18 | flags.DEFINE_integer("epoch", 64, "Epoch to train [25]") 19 | flags.DEFINE_float("learning_rate", 0.0002, "Learning rate of for adam [0.0002]") 20 | flags.DEFINE_float("beta1", 0.5, "Momentum term of adam [0.5]") 21 | flags.DEFINE_integer("batch_size", 64, "The size of batch images [64]") 22 | flags.DEFINE_integer("max_to_keep", 1, "maximum number of checkpoints to keep") 23 | flags.DEFINE_integer("z_dim", 100, "dimensions of z") 24 | # 25 | flags.DEFINE_integer("T", 10, "adv opt epoch") 26 | flags.DEFINE_integer("K", 5, "top k svd") # 5 27 | flags.DEFINE_float("alpha", 50.0, "opt param") 28 | flags.DEFINE_float("eta", 100.0, "opt param") 29 | flags.DEFINE_integer("attack_num", 50, "attack_num") 30 | flags.DEFINE_integer("filler_num", 90, "filler_num") 31 | FLAGS = flags.FLAGS 32 | 33 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) 34 | data_set_name = 'ml100k' 35 | target_ids = [62, 1077, 785, 1419, 1257, 1319, 1612, 1509, 1545, 1373] 36 | 37 | run_config = tf.ConfigProto() 38 | run_config.gpu_options.allow_growth = True 39 | path_train = '../data/data/' + data_set_name + '_train.dat' 40 | path_test = '../data/data/' + data_set_name + '_test.dat' 41 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items", 42 | "../data/data/" + data_set_name + "_target_users"] 43 | # 读取seletced items和target users 44 | attack_info = load_attack_info(*attack_info_path) 45 | dataset_class = ut.load_data.load_data.load_data(path_train=path_train, path_test=path_test, 46 | header=['user_id', 'item_id', 'rating'], 47 | sep='\t', print_log=False) 48 | 49 | 50 | def train_Rec_model(injected_path, injected_profiles, target_id, model_path, train_epoch, 51 | model_name='IAutoRec', warm_start=False, restore_path=None): 52 | tf.reset_default_graph() 53 | 54 | attacked_file_writer(path_train, injected_path, injected_profiles, dataset_class.n_users) 55 | 56 | dataset_class_injected = ut.load_data.load_data.load_data(path_train=injected_path, 57 | path_test=path_test, 58 | header=['user_id', 'item_id', 'rating'], 59 | sep='\t', print_log=False) 60 | 61 | # tf.reset_default_graph() 62 | tf_config = tf.ConfigProto() 63 | tf_config.gpu_options.allow_growth = True 64 | with tf.Session() as sess: 65 | rec_model = get_model_network(sess, model_name, dataset_class_injected, train_epoch) 66 | if warm_start: 67 | # print('warm start') 68 | rec_model.restore(restore_path) 69 | rec_model.execute() 70 | rec_model.save(model_path) 71 | predictions, hit_ratios = pred_for_target(rec_model, target_id) 72 | return predictions, hit_ratios 73 | 74 | 75 | def opt_adv_intent(fake_users, filler_indicators, target_id): 76 | target_users = attack_info[target_id][1] 77 | model_path = "./IAutoRec_dcgan_%d.ckpt" % target_id 78 | injected_path = "./IAutoRec_dcgan_%d.dat" % target_id 79 | 80 | # ---------------------- 81 | for t in range(FLAGS.T): 82 | 83 | injected_profiles = fake_users * filler_indicators 84 | predictions, _ = train_Rec_model(injected_path, injected_profiles, target_id, model_path, 10) 85 | f_adv_0 = np.sum(predictions[target_users]) 86 | f_adv_k = f_adv_0 87 | print("opt_adv_intent\tepoch-%d adv goal\t%f" % (t, f_adv_k)) 88 | 89 | delta_f_Adv = [] 90 | B, Sigma, V = la.svd(fake_users) 91 | for k in range(FLAGS.K): 92 | 93 | Z_k = np.matmul(np.reshape(B[k], [FLAGS.attack_num, 1]), np.reshape(V[k], [1, dataset_class.n_items])) 94 | 95 | fake_users_k = fake_users + FLAGS.alpha * Z_k 96 | 97 | injected_profiles = fake_users_k * filler_indicators 98 | predictions, _ = train_Rec_model(injected_path, injected_profiles, target_id, model_path, 99 | 5, warm_start=True, restore_path=model_path) 100 | f_adv_k_new = np.sum(predictions[target_users]) 101 | 102 | delta_f_Adv.append((f_adv_k_new - f_adv_k) * Z_k) 103 | 104 | delta_f_A = FLAGS.alpha * sum(delta_f_Adv) 105 | fake_users += FLAGS.eta * delta_f_A 106 | fake_users[fake_users <= 0] = 0.5 107 | fake_users[fake_users > 5] = 5 108 | return fake_users * filler_indicators 109 | 110 | 111 | 112 | tf.reset_default_graph() 113 | with tf.Session(config=run_config) as sess: 114 | dcgan = DCGAN(sess, dataset_class) 115 | # print("build_model_ok") 116 | dcgan.train(FLAGS) 117 | # save model 118 | saver = tf.train.Saver() 119 | saver.save(sess, './dcgan.ckpt') 120 | 121 | fake_users = None 122 | while True: 123 | batch_z = gen_random(size=[FLAGS.batch_size, dcgan.z_dim]).astype(np.float32) 124 | fake_users_ = sess.run(dcgan.G, feed_dict={dcgan.z: batch_z}) 125 | # reshape&[-1,1]->[0,5] 126 | fake_users_ = fake_users_.reshape([fake_users_.shape[0], -1]) 127 | fake_users_ = (fake_users_ * 2.5) + 2.5 128 | fake_users = fake_users_ if fake_users is None else np.concatenate([fake_users_, fake_users_], 0) 129 | if fake_users.shape[0] >= FLAGS.attack_num: break 130 | # attack_num 131 | fake_users = fake_users[:FLAGS.attack_num] 132 | # filler_num 133 | filler_indicators = [] 134 | for i in range(FLAGS.attack_num): 135 | fillers_ = np.random.choice(list(range(dataset_class.n_items)), FLAGS.filler_num, replace=False) 136 | filler_indicator_ = [1 if iid in fillers_ else 0 for iid in range(dataset_class.n_items)] 137 | filler_indicators.append(filler_indicator_) 138 | filler_indicators = np.array(filler_indicators) 139 | np.save('./fake_user_dcgan', [fake_users, filler_indicators]) 140 | # fake_users, filler_indicators = np.load('./fake_user_dcgan.npy') 141 | 142 | results = {} 143 | for target_id in target_ids: 144 | 145 | injected_profiles = opt_adv_intent(fake_users, filler_indicators, target_id) 146 | 147 | 148 | model_path = "./IAutoRec_dcgan_%d.ckpt" % target_id 149 | injected_path = "../data/data/ml100k_%d_dcgan_50_90.dat" % target_id 150 | target_users = attack_info[target_id][1] 151 | predictions, hit_ratios = train_Rec_model(injected_path, injected_profiles, target_id, model_path, 500) 152 | dst_path = "../result/pred_result/" + '_'.join(['IAutoRec', 'ml100k', str(target_id), 'dcgan']) 153 | target_prediction_writer(predictions, hit_ratios, dst_path) 154 | 155 | result = eval_attack('ml100k', 'IAutoRec', 'dcgan', target_id) 156 | results[target_id] = result 157 | print(target_id, result, '\n\n') 158 | break 159 | 160 | for target_id in target_ids: 161 | print(target_id, results[target_id]) 162 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/method/FAP.py: -------------------------------------------------------------------------------- 1 | from models.detector.SDLib.baseclass.SDetection import SDetection 2 | from models.detector.SDLib.tool import config 3 | from sklearn.metrics import classification_report 4 | import numpy as np 5 | import random 6 | 7 | class FAP(SDetection): 8 | 9 | def __init__(self, conf, trainingSet=None, testSet=None, labels=None, fold='[1]'): 10 | super(FAP, self).__init__(conf, trainingSet, testSet, labels, fold) 11 | 12 | def readConfiguration(self): 13 | super(FAP, self).readConfiguration() 14 | # # s means the number of seedUser who be regarded as spammer in training 15 | self.s =int( self.config['seedUser']) 16 | # preserve the real spammer ID 17 | self.spammer = [] 18 | for i in self.dao.user: 19 | if self.labels[i] == '1': 20 | self.spammer.append(self.dao.user[i]) 21 | sThreshold = int(0.5 * len(self.spammer)) 22 | if self.s > sThreshold : 23 | self.s = sThreshold 24 | print ('*** seedUser is more than a half of spammer, so it is set to', sThreshold, '***') 25 | 26 | # # predict top-k user as spammer 27 | self.k = int(self.config['topKSpam']) 28 | # 0.5 is the ratio of spammer to dataset, it can be changed according to different datasets 29 | kThreshold = int(0.5 * (len(self.dao.user) - self.s)) 30 | if self.k > kThreshold: 31 | self.k = kThreshold 32 | print ('*** the number of top-K users is more than threshold value, so it is set to', kThreshold, '***') 33 | # product transition probability matrix self.TPUI and self.TPIU 34 | 35 | def __computeTProbability(self): 36 | # m--user count; n--item count 37 | m, n, tmp = self.dao.trainingSize() 38 | self.TPUI = np.zeros((m, n)) 39 | self.TPIU = np.zeros((n, m)) 40 | 41 | self.userUserIdDic = {} 42 | self.itemItemIdDic = {} 43 | tmpUser = list(self.dao.user.values()) 44 | tmpUserId = list(self.dao.user.keys()) 45 | tmpItem = list(self.dao.item.values()) 46 | tmpItemId = list(self.dao.item.keys()) 47 | # tmpUser = self.dao.user.values() 48 | # tmpUserId = self.dao.user.keys() 49 | # tmpItem = self.dao.item.values() 50 | # tmpItemId = self.dao.item.keys() 51 | for users in range(0, m): 52 | self.userUserIdDic[tmpUser[users]] = tmpUserId[users] 53 | for items in range(0, n): 54 | self.itemItemIdDic[tmpItem[items]] = tmpItemId[items] 55 | for i in range(0, m): 56 | for j in range(0, n): 57 | user = self.userUserIdDic[i] 58 | item = self.itemItemIdDic[j] 59 | # if has edge in graph,set a value ;otherwise set 0 60 | if (user not in self.bipartiteGraphUI) or (item not in self.bipartiteGraphUI[user]): 61 | continue 62 | else: 63 | w = float(self.bipartiteGraphUI[user][item]) 64 | # to avoid positive feedback and reliability problem,we should Polish the w 65 | otherItemW = 0 66 | otherUserW = 0 67 | for otherItem in self.bipartiteGraphUI[user]: 68 | otherItemW += float(self.bipartiteGraphUI[user][otherItem]) 69 | for otherUser in self.dao.trainingSet_i[item]: 70 | otherUserW += float(self.bipartiteGraphUI[otherUser][item]) 71 | # wPrime = w*1.0/(otherUserW * otherItemW) 72 | wPrime = w 73 | self.TPUI[i][j] = wPrime / otherItemW 74 | self.TPIU[j][i] = wPrime / otherUserW 75 | # if i % 100 == 0: 76 | # print ('progress: %d/%d' %(i,m)) 77 | 78 | def initModel(self): 79 | # construction of the bipartite graph 80 | # print ("constructing bipartite graph...") 81 | self.bipartiteGraphUI = {} 82 | for user in self.dao.trainingSet_u: 83 | tmpUserItemDic = {} # user-item-point 84 | for item in self.dao.trainingSet_u[user]: 85 | # tmpItemUserDic = {}#item-user-point 86 | recordValue = float(self.dao.trainingSet_u[user][item]) 87 | w = 1 + abs((recordValue - self.dao.userMeans[user]) / self.dao.userMeans[user]) + abs( 88 | (recordValue - self.dao.itemMeans[item]) / self.dao.itemMeans[item]) + abs( 89 | (recordValue - self.dao.globalMean) / self.dao.globalMean) 90 | # tmpItemUserDic[user] = w 91 | tmpUserItemDic[item] = w 92 | # self.bipartiteGraphIU[item] = tmpItemUserDic 93 | self.bipartiteGraphUI[user] = tmpUserItemDic 94 | # we do the polish in computing the transition probability 95 | # print ("computing transition probability...") 96 | self.__computeTProbability() 97 | 98 | def isConvergence(self, PUser, PUserOld): 99 | if len(PUserOld) == 0: 100 | return True 101 | for i in range(0, len(PUser)): 102 | if (PUser[i] - PUserOld[i]) > 0.01: 103 | return True 104 | return False 105 | 106 | def buildModel(self): 107 | # -------init-------- 108 | m, n, tmp = self.dao.trainingSize() 109 | PUser = np.zeros(m) 110 | PItem = np.zeros(n) 111 | self.testLabels = [0 for i in range(m)] 112 | self.predLabels = [0 for i in range(m)] 113 | 114 | # preserve seedUser Index 115 | self.seedUser = [] 116 | randDict = {} 117 | for i in range(0, self.s): 118 | randNum = random.randint(0, len(self.spammer) - 1) 119 | while randNum in randDict: 120 | randNum = random.randint(0, len(self.spammer) - 1) 121 | randDict[randNum] = 0 122 | self.seedUser.append(int(self.spammer[randNum])) 123 | # print len(randDict), randDict 124 | 125 | #initial user and item spam probability 126 | for j in range(0, m): 127 | if j in self.seedUser: 128 | #print type(j),j 129 | PUser[j] = 1 130 | else: 131 | PUser[j] = random.random() 132 | for tmp in range(0, n): 133 | PItem[tmp] = random.random() 134 | 135 | # -------iterator------- 136 | PUserOld = [] 137 | iterator = 0 138 | while self.isConvergence(PUser, PUserOld): 139 | #while iterator < 100: 140 | for j in self.seedUser: 141 | PUser[j] = 1 142 | PUserOld = PUser 143 | PItem = np.dot(self.TPIU, PUser) 144 | PUser = np.dot(self.TPUI, PItem) 145 | iterator += 1 146 | # print (self.foldInfo,'iteration', iterator) 147 | 148 | PUserDict = {} 149 | userId = 0 150 | for i in PUser: 151 | PUserDict[userId] = i 152 | userId += 1 153 | for j in self.seedUser: 154 | del PUserDict[j] 155 | 156 | self.PSort = sorted(PUserDict.items(), key=lambda d: d[1], reverse=True) 157 | 158 | 159 | def predict(self): 160 | # predLabels 161 | # top-k user as spammer 162 | spamList = [] 163 | sIndex = 0 164 | while sIndex < self.k: 165 | spam = self.PSort[sIndex][0] 166 | spamList.append(spam) 167 | self.predLabels[spam] = 1 168 | sIndex += 1 169 | 170 | # trueLabels 171 | for user in self.dao.trainingSet_u: 172 | userInd = self.dao.user[user] 173 | # print type(user), user, userInd 174 | self.testLabels[userInd] = int(self.labels[user]) 175 | 176 | # delete seedUser labels 177 | differ = 0 178 | for user in self.seedUser: 179 | user = int(user - differ) 180 | # print type(user) 181 | del self.predLabels[user] 182 | del self.testLabels[user] 183 | differ += 1 184 | 185 | return self.predLabels 186 | -------------------------------------------------------------------------------- /AUSH/model/nnmf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Implementation of Neural Network Matrix Factorization. 3 | Reference: Dziugaite, Gintare Karolina, and Daniel M. Roy. "Neural network matrix factorization." arXiv preprint arXiv:1511.06443 (2015). 4 | """ 5 | 6 | try: 7 | import tensorflow.compat.v1 as tf 8 | 9 | tf.disable_v2_behavior() 10 | except: 11 | import tensorflow as tf 12 | import time 13 | import numpy as np 14 | import math 15 | 16 | __author__ = "Shuai Zhang" 17 | __copyright__ = "Copyright 2018, The DeepRec Project" 18 | 19 | __license__ = "GPL" 20 | __version__ = "1.0.0" 21 | __maintainer__ = "Shuai Zhang" 22 | __email__ = "cheungdaven@gmail.com" 23 | __status__ = "Development" 24 | 25 | 26 | class NNMF(): 27 | def __init__(self, sess, dataset_class, num_factor_1=100, num_factor_2=10, hidden_dimension=50, 28 | learning_rate=0.001, reg_rate=0.01, epoch=500, batch_size=256, 29 | show_time=False, T=5, display_step=1000): 30 | self.learning_rate = learning_rate 31 | self.epochs = epoch 32 | self.batch_size = batch_size 33 | self.reg_rate = reg_rate 34 | self.sess = sess 35 | self.dataset_class = dataset_class 36 | self.num_user = dataset_class.n_users 37 | self.num_item = dataset_class.n_items 38 | self.dataset_class.test_matrix_dok = self.dataset_class.test_matrix.todok() 39 | 40 | self.num_factor_1 = num_factor_1 41 | self.num_factor_2 = num_factor_2 42 | self.hidden_dimension = hidden_dimension 43 | self.show_time = show_time 44 | self.T = T 45 | self.display_step = display_step 46 | print("NNMF.") 47 | 48 | self.dataset_class_train_matrix_coo = self.dataset_class.train_matrix.tocoo() 49 | self.user = self.dataset_class_train_matrix_coo.row.reshape(-1) 50 | self.item = self.dataset_class_train_matrix_coo.col.reshape(-1) 51 | self.rating = self.dataset_class_train_matrix_coo.data 52 | 53 | self._build_network() 54 | init = tf.global_variables_initializer() 55 | self.sess.run(init) 56 | 57 | def _build_network(self): 58 | print("num_factor_1=%d, num_factor_2=%d, hidden_dimension=%d" % ( 59 | self.num_factor_1, self.num_factor_2, self.hidden_dimension)) 60 | 61 | # model dependent arguments 62 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id') 63 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id') 64 | self.y = tf.placeholder("float", [None], 'rating') 65 | # latent feature vectors 66 | P = tf.Variable(tf.random_normal([self.num_user, self.num_factor_1], stddev=0.01)) 67 | Q = tf.Variable(tf.random_normal([self.num_item, self.num_factor_1], stddev=0.01)) 68 | # latent feature matrix(K=1?) 69 | U = tf.Variable(tf.random_normal([self.num_user, self.num_factor_2], stddev=0.01)) 70 | V = tf.Variable(tf.random_normal([self.num_item, self.num_factor_2], stddev=0.01)) 71 | 72 | input = tf.concat(values=[tf.nn.embedding_lookup(P, self.user_id), 73 | tf.nn.embedding_lookup(Q, self.item_id), 74 | tf.multiply(tf.nn.embedding_lookup(U, self.user_id), 75 | tf.nn.embedding_lookup(V, self.item_id)) 76 | ], axis=1) 77 | # 78 | # tf1->tf2 79 | # regularizer = tf.contrib.layers.l2_regularizer(scale=self.reg_rate) 80 | regularizer = tf.keras.regularizers.l2(self.reg_rate) 81 | layer_1 = tf.layers.dense(inputs=input, units=2 * self.num_factor_1 + self.num_factor_2, 82 | bias_initializer=tf.random_normal_initializer, 83 | kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid, 84 | kernel_regularizer=regularizer) 85 | layer_2 = tf.layers.dense(inputs=layer_1, units=self.hidden_dimension, activation=tf.sigmoid, 86 | bias_initializer=tf.random_normal_initializer, 87 | kernel_initializer=tf.random_normal_initializer, 88 | kernel_regularizer=regularizer) 89 | layer_3 = tf.layers.dense(inputs=layer_2, units=self.hidden_dimension, activation=tf.sigmoid, 90 | bias_initializer=tf.random_normal_initializer, 91 | kernel_initializer=tf.random_normal_initializer, 92 | kernel_regularizer=regularizer) 93 | layer_4 = tf.layers.dense(inputs=layer_3, units=self.hidden_dimension, activation=tf.sigmoid, 94 | bias_initializer=tf.random_normal_initializer, 95 | kernel_initializer=tf.random_normal_initializer, 96 | kernel_regularizer=regularizer) 97 | output = tf.layers.dense(inputs=layer_4, units=1, activation=None, 98 | bias_initializer=tf.random_normal_initializer, 99 | kernel_initializer=tf.random_normal_initializer, 100 | kernel_regularizer=regularizer) 101 | self.pred_rating = tf.reshape(output, [-1]) 102 | self.loss = tf.reduce_sum(tf.square(self.y - self.pred_rating)) \ 103 | + tf.losses.get_regularization_loss() + self.reg_rate * ( 104 | tf.norm(U) + tf.norm(V) + tf.norm(P) + tf.norm(Q)) 105 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss) 106 | 107 | def train(self): 108 | self.num_training = len(self.rating) 109 | total_batch = int(self.num_training / self.batch_size) 110 | idxs = np.random.permutation(self.num_training) # shuffled ordering 111 | user_random = list(self.user[idxs]) 112 | item_random = list(self.item[idxs]) 113 | rating_random = list(self.rating[idxs]) 114 | # train 115 | for i in range(total_batch): 116 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size] 117 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size] 118 | batch_rating = rating_random[i * self.batch_size:(i + 1) * self.batch_size] 119 | 120 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={self.user_id: batch_user, 121 | self.item_id: batch_item, 122 | self.y: batch_rating 123 | }) 124 | return loss 125 | 126 | def test(self, test_data): 127 | error = 0 128 | error_mae = 0 129 | test_set = list(test_data.keys()) 130 | for (u, i) in test_set: 131 | pred_rating_test = self.predict([u], [i])[0] 132 | error += (float(test_data.get((u, i))) - pred_rating_test) ** 2 133 | error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test)) 134 | rmse = np.sqrt(error / len(test_set)) 135 | mae = error_mae / len(test_set) 136 | return rmse, mae 137 | 138 | def execute(self): 139 | loss_prev = float("inf") 140 | for epoch in range(self.epochs): 141 | loss_cur = self.train() 142 | if epoch % self.T == 0: 143 | print("epoch:\t", epoch, "\tloss:\t", loss_cur) 144 | if abs(loss_cur - loss_prev) < math.exp(-5): 145 | break 146 | loss_prev = loss_cur 147 | rmse, mae = self.test(self.dataset_class.test_matrix_dok) 148 | print("training done\tRMSE : ", rmse, "\tMAE : ", mae) 149 | 150 | def save(self, path): 151 | saver = tf.train.Saver() 152 | saver.save(self.sess, path) 153 | 154 | def restore(self, path): 155 | init = tf.global_variables_initializer() 156 | self.sess.run(init) 157 | saver = tf.train.Saver() 158 | saver.restore(self.sess, path) 159 | 160 | def predict(self, user_id, item_id): 161 | if type(item_id) != list: 162 | item_id = [item_id] 163 | if type(user_id) != list: 164 | user_id = [user_id] * len(item_id) 165 | return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0] 166 | -------------------------------------------------------------------------------- /Leg-UP/models/detector/SDLib/data/rating.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | # from structure import sparseMatrix,new_sparseMatrix 3 | from models.detector.SDLib.tool.config import Config, LineConfig 4 | from models.detector.SDLib.tool.qmath import normalize 5 | from models.detector.SDLib.tool.dataSplit import DataSplit 6 | import os.path 7 | from re import split 8 | from collections import defaultdict 9 | 10 | 11 | class RatingDAO(object): 12 | 'data access control' 13 | 14 | def __init__(self, config, trainingData, testData): 15 | self.config = config 16 | self.ratingConfig = LineConfig(config['ratings.setup']) 17 | self.user = {} # used to store the order of users in the training set 18 | self.item = {} # used to store the order of items in the training set 19 | self.id2user = {} 20 | self.id2item = {} 21 | self.all_Item = {} 22 | self.all_User = {} 23 | self.userMeans = {} # used to store the mean values of users's ratings 24 | self.itemMeans = {} # used to store the mean values of items's ratings 25 | 26 | self.globalMean = 0 27 | self.timestamp = {} 28 | # self.trainingMatrix = None 29 | # self.validationMatrix = None 30 | self.testSet_u = testData.copy() # used to store the test set by hierarchy user:[item,rating] 31 | self.testSet_i = defaultdict(dict) # used to store the test set by hierarchy item:[user,rating] 32 | self.trainingSet_u = trainingData.copy() 33 | self.trainingSet_i = defaultdict(dict) 34 | # self.rScale = [] 35 | 36 | self.trainingData = trainingData 37 | self.testData = testData 38 | self.__generateSet() 39 | self.__computeItemMean() 40 | self.__computeUserMean() 41 | self.__globalAverage() 42 | 43 | def __generateSet(self): 44 | scale = set() 45 | # find the maximum rating and minimum value 46 | # for i, entry in enumerate(self.trainingData): 47 | # userName, itemName, rating = entry 48 | # scale.add(float(rating)) 49 | # self.rScale = list(scale) 50 | # self.rScale.sort() 51 | 52 | for i, user in enumerate(self.trainingData): 53 | for item in self.trainingData[user]: 54 | 55 | # makes the rating within the range [0, 1]. 56 | # rating = normalize(float(rating), self.rScale[-1], self.rScale[0]) 57 | # self.trainingSet_u[userName][itemName] = float(rating) 58 | self.trainingSet_i[item][user] = self.trainingData[user][item] 59 | # order the user 60 | # if not self.user.has_key(user): 61 | if user not in self.user: 62 | self.user[user] = len(self.user) 63 | self.id2user[self.user[user]] = user 64 | # order the item 65 | # if not self.item.has_key(item): 66 | if item not in self.item: 67 | self.item[item] = len(self.item) 68 | self.id2item[self.item[item]] = item 69 | self.trainingSet_i[item][user] = self.trainingData[user][item] 70 | # userList.append 71 | # triple.append([self.user[userName], self.item[itemName], rating]) 72 | # self.trainingMatrix = new_sparseMatrix.SparseMatrix(triple) 73 | 74 | self.all_User.update(self.user) 75 | self.all_Item.update(self.item) 76 | 77 | for i, user in enumerate(self.testData): 78 | # order the user 79 | # if not self.user.has_key(user): 80 | if user not in self.user: 81 | self.all_User[user] = len(self.all_User) 82 | for item in self.testData[user]: 83 | # order the item 84 | # if not self.item.has_key(item): 85 | if item not in self.item: 86 | self.all_Item[item] = len(self.all_Item) 87 | # self.testSet_u[userName][itemName] = float(rating) 88 | self.testSet_i[item][user] = self.testData[user][item] 89 | 90 | def __globalAverage(self): 91 | total = sum(self.userMeans.values()) 92 | if total == 0: 93 | self.globalMean = 0 94 | else: 95 | self.globalMean = total / len(self.userMeans) 96 | 97 | def __computeUserMean(self): 98 | # for u in self.user: 99 | # n = self.row(u) > 0 100 | # mean = 0 101 | # 102 | # if not self.containsUser(u): # no data about current user in training set 103 | # pass 104 | # else: 105 | # sum = float(self.row(u)[0].sum()) 106 | # try: 107 | # mean = sum/ n[0].sum() 108 | # except ZeroDivisionError: 109 | # mean = 0 110 | # self.userMeans[u] = mean 111 | for u in self.trainingSet_u: 112 | self.userMeans[u] = sum(self.trainingSet_u[u].values()) / (len(self.trainingSet_u[u].values()) + 0.0) 113 | for u in self.testSet_u: 114 | self.userMeans[u] = sum(self.testSet_u[u].values()) / (len(self.testSet_u[u].values()) + 0.0) 115 | 116 | def __computeItemMean(self): 117 | # for c in self.item: 118 | # n = self.col(c) > 0 119 | # mean = 0 120 | # if not self.containsItem(c): # no data about current user in training set 121 | # pass 122 | # else: 123 | # sum = float(self.col(c)[0].sum()) 124 | # try: 125 | # mean = sum / n[0].sum() 126 | # except ZeroDivisionError: 127 | # mean = 0 128 | # self.itemMeans[c] = mean 129 | for item in self.trainingSet_i: 130 | self.itemMeans[item] = sum(self.trainingSet_i[item].values()) / ( 131 | len(self.trainingSet_i[item].values()) + 0.0) 132 | for item in self.testSet_i: 133 | self.itemMeans[item] = sum(self.testSet_i[item].values()) / (len(self.testSet_i[item].values()) + 0.0) 134 | 135 | def getUserId(self, u): 136 | if self.user.has_key(u): 137 | return self.user[u] 138 | else: 139 | return -1 140 | 141 | def getItemId(self, i): 142 | if self.item.has_key(i): 143 | return self.item[i] 144 | else: 145 | return -1 146 | 147 | def trainingSize(self): 148 | recordCount = 0 149 | for user in self.trainingData: 150 | recordCount += len(self.trainingData[user]) 151 | return (len(self.trainingSet_u), len(self.trainingSet_i), recordCount) 152 | 153 | def testSize(self): 154 | recordCount = 0 155 | for user in self.testData: 156 | recordCount += len(self.testData[user]) 157 | return (len(self.testSet_u), len(self.testSet_i), recordCount) 158 | 159 | def contains(self, u, i): 160 | 'whether user u rated item i' 161 | if self.trainingSet_u.has_key(u) and self.trainingSet_u[u].has_key(i): 162 | return True 163 | return False 164 | 165 | def containsUser(self, u): 166 | 'whether user is in training set' 167 | return self.trainingSet_u.has_key(u) 168 | 169 | def containsItem(self, i): 170 | 'whether item is in training set' 171 | return self.trainingSet_i.has_key(i) 172 | 173 | def allUserRated(self, u): 174 | if u in self.user: 175 | return self.trainingSet_u[u].keys(), self.trainingSet_u[u].values() 176 | else: 177 | return self.testSet_u[u].keys(), self.testSet_u[u].values() 178 | # def userRated(self,u): 179 | # if self.trainingMatrix.matrix_User.has_key(self.getUserId(u)): 180 | # itemIndex = self.trainingMatrix.matrix_User[self.user[u]].keys() 181 | # rating = self.trainingMatrix.matrix_User[self.user[u]].values() 182 | # return (itemIndex,rating) 183 | # return ([],[]) 184 | # 185 | # def itemRated(self,i): 186 | # if self.trainingMatrix.matrix_Item.has_key(self.getItemId(i)): 187 | # userIndex = self.trainingMatrix.matrix_Item[self.item[i]].keys() 188 | # rating = self.trainingMatrix.matrix_Item[self.item[i]].values() 189 | # return (userIndex,rating) 190 | # return ([],[]) 191 | 192 | # def row(self,u): 193 | # return self.trainingMatrix.row(self.getUserId(u)) 194 | # 195 | # def col(self,c): 196 | # return self.trainingMatrix.col(self.getItemId(c)) 197 | # 198 | # def sRow(self,u): 199 | # return self.trainingMatrix.sRow(self.getUserId(u)) 200 | # 201 | # def sCol(self,c): 202 | # return self.trainingMatrix.sCol(self.getItemId(c)) 203 | # 204 | # def rating(self,u,c): 205 | # return self.trainingMatrix.elem(self.getUserId(u),self.getItemId(c)) 206 | # 207 | # def ratingScale(self): 208 | # return (self.rScale[0],self.rScale[1]) 209 | 210 | # def elemCount(self): 211 | # return self.trainingMatrix.elemCount() 212 | -------------------------------------------------------------------------------- /AUSH/test_main/main_eval_similarity_foryangqian.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/25 19:38 3 | # @Author : chensi 4 | # @File : main_eval_similarity.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import numpy as np 9 | from numpy.linalg import * 10 | import scipy.stats 11 | import sys, os, argparse 12 | import pandas as pd 13 | 14 | sys.path.append("../") 15 | from test_main.main_baseline_attack import baseline_attack 16 | from test_main.main_gan_attack import gan_attack 17 | from test_main.main_gan_attack_baseline import gan_attack as gan_attack_baseline 18 | from utils.load_data.load_data import * 19 | from utils.load_data.load_attack_info import load_attack_info 20 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker 21 | 22 | 23 | def eval_eigen_value(profiles): 24 | U_T_U = np.dot(profiles.transpose(), profiles) 25 | eig_val, _ = eig(U_T_U) 26 | top_10 = [i.real for i in eig_val[:10]] 27 | return top_10 28 | 29 | 30 | def get_item_distribution(profiles): 31 | # [min(max(0, round(i)), 5) for i in a] 32 | profiles_T = profiles.transpose() 33 | fn_count = lambda item_vec: np.array( 34 | [sum([1 if (min(max(0, round(j)), 5) == i) else 0 for j in item_vec]) for i in range(6)]) 35 | fn_norm = lambda item_vec: item_vec / sum(item_vec) 36 | item_distribution = np.array(list(map(fn_count, profiles_T))) 37 | item_distribution = np.array(list(map(fn_norm, item_distribution))) 38 | return item_distribution 39 | 40 | 41 | def eval_TVD_JS(P, Q): 42 | # TVD 43 | dis_TVD = np.mean(np.sum(np.abs(P - Q) / 2, 1)) 44 | # JS 45 | fn_KL = lambda p, q: scipy.stats.entropy(p, q) 46 | M = (P + Q) / 2 47 | js_vec = [] 48 | for iid in range(P.shape[0]): 49 | p, q, m = P[iid], Q[iid], M[iid] 50 | js_vec.append((fn_KL(p, m) + fn_KL(q, m)) / 2) 51 | dis_JS = np.mean(np.array(js_vec)) 52 | return dis_TVD, dis_JS 53 | 54 | 55 | def print_eigen_result(real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods): 56 | top_10_res = [] 57 | top_10_real = eval_eigen_value(real_profiles) 58 | top_10_res.append("real\t" + '\t'.join(map(str, top_10_real))) 59 | top_10_baseline = [] 60 | for idx in range(len(baseline_fake_profiles)): 61 | top_10_baseline.append(eval_eigen_value(baseline_fake_profiles[idx])) 62 | top_10_res.append(baseline_methods[idx] + "\t" + '\t'.join(map(str, top_10_baseline[-1]))) 63 | top_10_gan = eval_eigen_value(fake_profiles_gan) 64 | # top_10_sample_5 = eval_eigen_value(fake_profiles_sample_5) 65 | # top_10_real_sample = eval_eigen_value(real_profiles_gan) 66 | top_10_res.append("gan\t" + '\t'.join(map(str, top_10_gan))) 67 | # top_10_res.append("sample_5\t" + '\t'.join(map(str, top_10_sample_5))) 68 | # top_10_res.append("real_sample\t" + '\t'.join(map(str, top_10_real_sample))) 69 | print("\n".join(top_10_res)) 70 | 71 | 72 | def get_distance_result(target_id, real_profiles, fake_profiles_list, method_name): 73 | k = ['target_id', 'attack_method', 'dis_TVD', 'dis_JS'] 74 | v = [[], [], [], []] 75 | res_dis = [] 76 | real_item_distribution = get_item_distribution(real_profiles) 77 | for idx in range(len(fake_profiles_list)): 78 | dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, get_item_distribution(fake_profiles_list[idx])) 79 | v[1] += [method_name[idx]] 80 | v[2] += [dis_TVD] 81 | v[3] += [dis_JS] 82 | v[0] = [target_id] * len(v[1]) 83 | result = pd.DataFrame(dict(zip(k, v))) 84 | return result 85 | 86 | 87 | def profiles_generator(target_id, dataset_class, attack_info, bandwagon_selected, sample_num, args, real_profiles, 88 | filler_indicator, pre_fix, has_G=False): 89 | # baseline fake profiles 90 | baseline_methods = ["segment", "average", "random", "bandwagon"] 91 | baseline_fake_profiles = [] 92 | for attack_method in baseline_methods: 93 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)]) 94 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id, 95 | bandwagon_selected, filler_indicator) 96 | baseline_fake_profiles.append(fake_profiles) 97 | 98 | for attack_method in baseline_methods: 99 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)]) 100 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id, 101 | bandwagon_selected, None) 102 | baseline_fake_profiles.append(fake_profiles) 103 | baseline_methods = baseline_methods + [i + '_rand' for i in baseline_methods] 104 | 105 | final_attack_setting = [sample_num, real_profiles, filler_indicator] 106 | # new_baseline 107 | if has_G: 108 | for attack_method in ['G0' + pre_fix, 'G1' + pre_fix]: 109 | baseline_methods.append(attack_method) 110 | fake_profiles_G, _, _ = gan_attack_baseline(args.dataset, attack_method, target_id, False, 0, 111 | final_attack_setting=final_attack_setting) 112 | baseline_fake_profiles.append(fake_profiles_G) 113 | 114 | # gan profiles 115 | attack_method = "gan" + pre_fix 116 | fake_profiles_gan, _, _ = gan_attack(args.dataset, attack_method, target_id, False, write_to_file=0, 117 | final_attack_setting=final_attack_setting) 118 | return fake_profiles_gan, baseline_fake_profiles, baseline_methods 119 | 120 | 121 | def parse_arg(): 122 | parser = argparse.ArgumentParser() 123 | 124 | parser.add_argument('--dataset', type=str, default='ml100k', 125 | help='input data_set_name,filmTrust or ml100k grocery') 126 | 127 | parser.add_argument('--attack_num', type=int, default=50, 128 | help='num of attack fake user,50 for ml100k and filmTrust') 129 | 130 | parser.add_argument('--filler_num', type=int, default=90, 131 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust') 132 | # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5 133 | # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5 134 | parser.add_argument('--targets', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373', 135 | help='attack_targets') 136 | parser.add_argument('--bandwagon_selected', type=str, default='180,99,49', 137 | help='180,99,49 for ml100k,103,98,115 for filmTrust') 138 | # 139 | args = parser.parse_args() 140 | # 141 | args.targets = list(map(int, args.targets.split(','))) 142 | args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(','))) 143 | return args 144 | 145 | 146 | if __name__ == '__main__': 147 | """ 148 | step1 - load data 149 | step2 - 150 | step3 - 151 | """ 152 | 153 | # 154 | """parse args""" 155 | args = parse_arg() 156 | pre_fix = '_' + str(args.attack_num) + '_' + str(args.filler_num) 157 | 158 | """step1 - load data""" 159 | path_train = "../data/data/" + args.dataset + "_train.dat" 160 | path_test = "../data/data/" + args.dataset + "_test.dat" 161 | attack_info_path = ["../data/data/" + args.dataset + "_selected_items", 162 | "../data/data/" + args.dataset + "_target_users"] 163 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'], 164 | sep='\t', print_log=False) 165 | attack_info = load_attack_info(*attack_info_path) 166 | 167 | sample_num = dataset_class.n_users 168 | result = None 169 | for target_id in args.targets: 170 | selected = attack_info[target_id][0] 171 | """step2.1 - real_profiles""" 172 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id, 173 | selected_id_list=selected, filler_num=args.filler_num, 174 | attack_num=args.attack_num, filler_method=0) 175 | _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no', 176 | final_attack_setting=[sample_num, None, None]) 177 | """step2.2 - """ 178 | 179 | dir = None 180 | fake_profiles_list = [] 181 | method_list = [] 182 | for attack_method in ['IAutoRec', 'UAutoRec', 'NNMF', 'NMF_25']: 183 | path_dcgan = dir + 'D-%s-ml100k\\ml100k_%d_dcgan_50_90.dat' % (attack_method, target_id) 184 | dataset_class_dcgan = load_data(path_train=path_dcgan, path_test=path_test, 185 | header=['user_id', 'item_id', 'rating'], 186 | sep='\t', print_log=False) 187 | fake_profiles_ = dataset_class_dcgan.train_matrix.toarray()[dataset_class.n_users:] 188 | while fake_profiles_.shape[0] < dataset_class.n_users: 189 | fake_profiles_ = np.concatenate([fake_profiles_, fake_profiles_]) 190 | fake_profiles_ = fake_profiles_[:dataset_class.n_users] 191 | 192 | path_wgan = dir + 'W-%s-ml100k\\ml100k_%d_wgan_50_90.dat' % (attack_method, target_id) 193 | dataset_class_dcgan = load_data(path_train=path_dcgan, path_test=path_test, 194 | header=['user_id', 'item_id', 'rating'], 195 | sep='\t', print_log=False) 196 | fake_profiles_w = dataset_class_dcgan.train_matrix.toarray()[dataset_class.n_users:] 197 | while fake_profiles_w.shape[0] < dataset_class.n_users: 198 | fake_profiles_w = np.concatenate([fake_profiles_w, fake_profiles_w]) 199 | fake_profiles_w = fake_profiles_w[:dataset_class.n_users] 200 | # 201 | fake_profiles_list += [fake_profiles_, fake_profiles_w] 202 | method_list += ['dcgan', 'wgan'] 203 | """step3 """ 204 | result_ = get_distance_result(target_id, real_profiles, fake_profiles_list, method_list) 205 | result = result_ if result is None else pd.concat([result, result_]) 206 | print(result) 207 | result.groupby('attack_method').mean().to_excel(args.dataset + '_distance_new.xls', index=False) 208 | -------------------------------------------------------------------------------- /AUSH/test_main/main_eval_similarity.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 2019/8/25 19:38 3 | # @Author : chensi 4 | # @File : main_eval_similarity.py 5 | # @Software : PyCharm 6 | # @Desciption : None 7 | 8 | import numpy as np 9 | from numpy.linalg import * 10 | import scipy.stats 11 | import sys, os, argparse 12 | import pandas as pd 13 | 14 | sys.path.append("../") 15 | from test_main.main_baseline_attack import baseline_attack 16 | from test_main.main_gan_attack import gan_attack 17 | from test_main.main_gan_attack_baseline import gan_attack as gan_attack_baseline 18 | from utils.load_data.load_data import * 19 | from utils.load_data.load_attack_info import load_attack_info 20 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker 21 | 22 | 23 | def eval_eigen_value(profiles): 24 | U_T_U = np.dot(profiles.transpose(), profiles) 25 | eig_val, _ = eig(U_T_U) 26 | top_10 = [i.real for i in eig_val[:10]] 27 | return top_10 28 | 29 | 30 | def get_item_distribution(profiles): 31 | # [min(max(0, round(i)), 5) for i in a] 32 | profiles_T = profiles.transpose() 33 | fn_count = lambda item_vec: np.array( 34 | [sum([1 if (min(max(0, round(j)), 5) == i) else 0 for j in item_vec]) for i in range(6)]) 35 | fn_norm = lambda item_vec: item_vec / sum(item_vec) 36 | item_distribution = np.array(list(map(fn_count, profiles_T))) 37 | item_distribution = np.array(list(map(fn_norm, item_distribution))) 38 | return item_distribution 39 | 40 | 41 | def eval_TVD_JS(P, Q): 42 | # TVD 43 | dis_TVD = np.mean(np.sum(np.abs(P - Q) / 2, 1)) 44 | # JS 45 | fn_KL = lambda p, q: scipy.stats.entropy(p, q) 46 | M = (P + Q) / 2 47 | js_vec = [] 48 | for iid in range(P.shape[0]): 49 | p, q, m = P[iid], Q[iid], M[iid] 50 | js_vec.append((fn_KL(p, m) + fn_KL(q, m)) / 2) 51 | dis_JS = np.mean(np.array(js_vec)) 52 | return dis_TVD, dis_JS 53 | 54 | 55 | def print_eigen_result(real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods): 56 | top_10_res = [] 57 | top_10_real = eval_eigen_value(real_profiles) 58 | top_10_res.append("real\t" + '\t'.join(map(str, top_10_real))) 59 | top_10_baseline = [] 60 | for idx in range(len(baseline_fake_profiles)): 61 | top_10_baseline.append(eval_eigen_value(baseline_fake_profiles[idx])) 62 | top_10_res.append(baseline_methods[idx] + "\t" + '\t'.join(map(str, top_10_baseline[-1]))) 63 | top_10_gan = eval_eigen_value(fake_profiles_gan) 64 | # top_10_sample_5 = eval_eigen_value(fake_profiles_sample_5) 65 | # top_10_real_sample = eval_eigen_value(real_profiles_gan) 66 | top_10_res.append("gan\t" + '\t'.join(map(str, top_10_gan))) 67 | # top_10_res.append("sample_5\t" + '\t'.join(map(str, top_10_sample_5))) 68 | # top_10_res.append("real_sample\t" + '\t'.join(map(str, top_10_real_sample))) 69 | print("\n".join(top_10_res)) 70 | 71 | 72 | def get_distance_result(target_id, real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods): 73 | k = ['target_id', 'attack_method', 'dis_TVD', 'dis_JS'] 74 | v = [[], [], [], []] 75 | res_dis = [] 76 | real_item_distribution = get_item_distribution(real_profiles) 77 | # real_gan_item_distribution = get_item_distribution(real_profiles_gan) 78 | fake_gan_distribution = get_item_distribution(fake_profiles_gan) 79 | # fake_sample_5_distribution = get_item_distribution(fake_profiles_sample_5) 80 | # dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, real_gan_item_distribution) 81 | # res_dis.append('\t'.join(map(str, ["real", "real_gan", dis_TVD, dis_JS]))) 82 | # dis_TVD, dis_JS = eval_TVD_JS(real_gan_item_distribution, fake_gan_distribution) 83 | # res_dis.append('\t'.join(map(str, ["real_gan", "gan", dis_TVD, dis_JS]))) 84 | # dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, fake_sample_5_distribution) 85 | # res_dis.append('\t'.join(map(str, ["real", "sample_5", dis_TVD, dis_JS]))) 86 | # dis_TVD, dis_JS = eval_TVD_JS(real_gan_item_distribution, fake_sample_5_distribution) 87 | # res_dis.append('\t'.join(map(str, ["real_gan", "sample_5", dis_TVD, dis_JS]))) 88 | dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, fake_gan_distribution) 89 | v[1] += ['gan'] 90 | v[2] += [dis_TVD] 91 | v[3] += [dis_JS] 92 | # res_dis.append('\t'.join(map(str, [target_id, "gan", dis_TVD, dis_JS]))) 93 | for idx in range(len(baseline_fake_profiles)): 94 | dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, get_item_distribution(baseline_fake_profiles[idx])) 95 | v[1] += [baseline_methods[idx]] 96 | v[2] += [dis_TVD] 97 | v[3] += [dis_JS] 98 | # res_dis.append('\t'.join(map(str, [target_id, baseline_methods[idx], dis_TVD, dis_JS]))) 99 | v[0] = [target_id] * len(v[1]) 100 | result = pd.DataFrame(dict(zip(k, v))) 101 | # print('\n'.join(res_dis)) 102 | return result 103 | 104 | 105 | def profiles_generator(target_id, dataset_class, attack_info, bandwagon_selected, sample_num, args, real_profiles, 106 | filler_indicator, pre_fix, has_G=False): 107 | # baseline fake profiles 108 | baseline_methods = ["segment", "average", "random", "bandwagon"] 109 | baseline_fake_profiles = [] 110 | for attack_method in baseline_methods: 111 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)]) 112 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id, 113 | bandwagon_selected, filler_indicator) 114 | baseline_fake_profiles.append(fake_profiles) 115 | 116 | for attack_method in baseline_methods: 117 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)]) 118 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id, 119 | bandwagon_selected, None) 120 | baseline_fake_profiles.append(fake_profiles) 121 | baseline_methods = baseline_methods + [i + '_rand' for i in baseline_methods] 122 | 123 | final_attack_setting = [sample_num, real_profiles, filler_indicator] 124 | # new_baseline 125 | if has_G: 126 | for attack_method in ['G0' + pre_fix, 'G1' + pre_fix]: 127 | baseline_methods.append(attack_method) 128 | fake_profiles_G, _, _ = gan_attack_baseline(args.dataset, attack_method, target_id, False, 0, 129 | final_attack_setting=final_attack_setting) 130 | baseline_fake_profiles.append(fake_profiles_G) 131 | 132 | # gan profiles 133 | attack_method = "gan" + pre_fix 134 | fake_profiles_gan, _, _ = gan_attack(args.dataset, attack_method, target_id, False, write_to_file=0, 135 | final_attack_setting=final_attack_setting) 136 | return fake_profiles_gan, baseline_fake_profiles, baseline_methods 137 | 138 | 139 | def parse_arg(): 140 | parser = argparse.ArgumentParser() 141 | 142 | parser.add_argument('--dataset', type=str, default='ml100k', 143 | help='input data_set_name,filmTrust or ml100k grocery') 144 | 145 | parser.add_argument('--attack_num', type=int, default=50, 146 | help='num of attack fake user,50 for ml100k and filmTrust') 147 | 148 | parser.add_argument('--filler_num', type=int, default=90, 149 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust') 150 | # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5 151 | # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5 152 | parser.add_argument('--targets', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373', help='attack_targets') 153 | parser.add_argument('--bandwagon_selected', type=str, default='180,99,49', 154 | help='180,99,49 for ml100k,103,98,115 for filmTrust') 155 | # 156 | args = parser.parse_args() 157 | # 158 | args.targets = list(map(int, args.targets.split(','))) 159 | args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(','))) 160 | return args 161 | 162 | 163 | if __name__ == '__main__': 164 | """ 165 | step1 - load data 166 | step2 - 167 | step3 - 168 | """ 169 | 170 | # 171 | """parse args""" 172 | args = parse_arg() 173 | pre_fix = '_' + str(args.attack_num) + '_' + str(args.filler_num) 174 | 175 | """step1 - load data""" 176 | path_train = "../data/data/" + args.dataset + "_train.dat" 177 | path_test = "../data/data/" + args.dataset + "_test.dat" 178 | attack_info_path = ["../data/data/" + args.dataset + "_selected_items", 179 | "../data/data/" + args.dataset + "_target_users"] 180 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'], 181 | sep='\t', print_log=False) 182 | attack_info = load_attack_info(*attack_info_path) 183 | 184 | sample_num = dataset_class.n_users 185 | result = None 186 | for target_id in args.targets: 187 | selected = attack_info[target_id][0] 188 | 189 | attackSetting_path = '_'.join(map(str, [args.dataset, sample_num, args.filler_num, target_id])) 190 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting' 191 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id, 192 | selected_id_list=selected, filler_num=args.filler_num, 193 | attack_num=args.attack_num, filler_method=0) 194 | _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no', 195 | final_attack_setting=[sample_num, None, None]) 196 | np.save(attackSetting_path, [real_profiles, filler_indicator]) 197 | 198 | fake_profiles_gan, baseline_fake_profiles, baseline_methods \ 199 | = profiles_generator(target_id, dataset_class, attack_info, args.bandwagon_selected, sample_num, args, 200 | real_profiles, filler_indicator, pre_fix, has_G=True) 201 | 202 | 203 | # result_ = get_distance_result(target_id, real_profiles, fake_profiles_gan, baseline_fake_profiles, 204 | # baseline_methods) 205 | result_ = get_distance_result(target_id, dataset_class.train_matrix.toarray(), fake_profiles_gan, 206 | baseline_fake_profiles, 207 | baseline_methods) 208 | 209 | result = result_ if result is None else pd.concat([result, result_]) 210 | print(result) 211 | result.to_excel(args.dataset + '_distance_lianyun.xls', index=False) 212 | -------------------------------------------------------------------------------- /AUSH/test_main/WGAN_yangqian.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import tensorflow as tf 4 | import sys 5 | import math 6 | sys.path.append("../") 7 | from tensorflow.python.framework import ops 8 | from six.moves import xrange 9 | from utils.load_data.load_data import load_data 10 | from utils.load_data.load_attack_info import load_attack_info 11 | import utils as ut 12 | 13 | 14 | if "concat_v2" in dir(tf): 15 | def concat(tensors, axis, *args, **kwargs): 16 | return tf.concat_v2(tensors, axis, *args, **kwargs) 17 | else: 18 | def concat(tensors, axis, *args, **kwargs): 19 | return tf.concat(tensors, axis, *args, **kwargs) 20 | 21 | 22 | class batch_norm(object): 23 | def __init__(self, epsilon=1e-5, momentum=0.9, name="batch_norm"): 24 | with tf.variable_scope(name): 25 | self.epsilon = epsilon 26 | self.momentum = momentum 27 | self.name = name 28 | 29 | def __call__(self, x, train=True): 30 | return tf.contrib.layers.batch_norm(x, 31 | decay=self.momentum, 32 | updates_collections=None, 33 | epsilon=self.epsilon, 34 | scale=True, 35 | is_training=train, 36 | scope=self.name) 37 | 38 | 39 | def conv_cond_concat(x, y): 40 | """Concatenate conditioning vector on feature map axis.""" 41 | x_shapes = x.get_shape() 42 | y_shapes = y.get_shape() 43 | return concat([ 44 | x, y * tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3) 45 | 46 | 47 | def conv2d(input_, output_dim, 48 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, 49 | name="conv2d"): 50 | with tf.variable_scope(name): 51 | w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim], 52 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 53 | conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME') 54 | 55 | biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0)) 56 | conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape()) 57 | 58 | return conv 59 | 60 | 61 | # kernel_size = 5 * 5 62 | def deconv2d(input_, output_shape, 63 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, 64 | name="deconv2d", with_w=False): 65 | with tf.variable_scope(name): 66 | # filter : [height, width, output_channels, in_channels] 67 | w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]], 68 | initializer=tf.random_normal_initializer(stddev=stddev)) 69 | 70 | try: 71 | deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape, 72 | strides=[1, d_h, d_w, 1]) 73 | 74 | # Support for verisons of TensorFlow before 0.7.0 75 | except AttributeError: 76 | deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape, 77 | strides=[1, d_h, d_w, 1]) 78 | 79 | biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0)) 80 | deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape()) 81 | 82 | if with_w: 83 | return deconv, w, biases 84 | else: 85 | return deconv 86 | 87 | 88 | def lrelu(x, leak=0.2, name="lrelu"): 89 | return tf.maximum(x, leak * x) 90 | 91 | 92 | def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): 93 | shape = input_.get_shape().as_list() 94 | 95 | with tf.variable_scope(scope or "Linear"): 96 | try: 97 | matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, 98 | tf.random_normal_initializer(stddev=stddev)) 99 | except ValueError as err: 100 | msg = "NOTE: Usually, this is due to an issue with the image dimensions. Did you correctly set '--crop' or '--input_height' or '--output_height'?" 101 | err.args = err.args + (msg,) 102 | raise 103 | bias = tf.get_variable("bias", [output_size], 104 | initializer=tf.constant_initializer(bias_start)) 105 | if with_w: 106 | return tf.matmul(input_, matrix) + bias, matrix, bias 107 | else: 108 | return tf.matmul(input_, matrix) + bias 109 | 110 | 111 | def conv_out_size_same(size, stride): 112 | return int(math.ceil(float(size) / float(stride))) 113 | 114 | 115 | def gen_random(size): 116 | # z - N(0,100) 117 | return np.random.normal(0, 100, size=size) 118 | 119 | 120 | class WGAN(object): 121 | def __init__(self, sess, dataset_class,batch_size=64, height=29, width=58, z_dim=100, gf_dim=64, df_dim=64, 122 | gfc_dim=1024, dfc_dim=1024, max_to_keep=1): 123 | self.sess = sess 124 | self.dataset_class = dataset_class 125 | self.batch_size = batch_size 126 | 127 | self.height = height 128 | self.width = width 129 | self.z_dim = z_dim 130 | self.gf_dim = gf_dim 131 | self.df_dim = df_dim 132 | self.gfc_dim = gfc_dim 133 | self.dfc_dim = dfc_dim 134 | # batch normalization : deals with poor initialization helps gradient flow 135 | self.d_bn1 = batch_norm(name='d_bn1') 136 | self.d_bn2 = batch_norm(name='d_bn2') 137 | self.d_bn3 = batch_norm(name='d_bn3') 138 | self.g_bn0 = batch_norm(name='g_bn0') 139 | self.g_bn1 = batch_norm(name='g_bn1') 140 | self.g_bn2 = batch_norm(name='g_bn2') 141 | self.g_bn3 = batch_norm(name='g_bn3') 142 | 143 | self.max_to_keep = max_to_keep 144 | 145 | self.build_model() 146 | 147 | def build_model(self): 148 | self.inputs = tf.placeholder(tf.float32, 149 | [self.batch_size, self.height, self.width, 1], 150 | name='real_images') 151 | inputs = self.inputs 152 | 153 | self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z') 154 | self.G = self.generator(self.z) 155 | 156 | self.D, self.D_logits = self.discriminator(inputs, reuse=False) 157 | self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True) 158 | 159 | # def _cross_entropy_loss(self, logits, labels): 160 | # xentropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)) 161 | # return xentropy 162 | self.d_loss = tf.reduce_mean(tf.square(self.D_logits - self.D_logits_)) 163 | self.g_loss = tf.reduce_mean(tf.square(self.D_logits_)) 164 | # self.d_loss_real = tf.reduce_mean( 165 | # _cross_entropy_loss(self.D_logits, tf.ones_like(self.D))) 166 | # self.d_loss_fake = tf.reduce_mean( 167 | # _cross_entropy_loss(self.D_logits_, tf.zeros_like(self.D_))) 168 | # 169 | # self.g_loss = tf.reduce_mean( 170 | # _cross_entropy_loss(self.D_logits_, tf.ones_like(self.D_))) 171 | # self.d_loss = self.d_loss_real + self.d_loss_fake 172 | # 173 | t_vars = tf.trainable_variables() 174 | self.d_vars = [var for var in t_vars if 'd_' in var.name] 175 | self.g_vars = [var for var in t_vars if 'g_' in var.name] 176 | 177 | self.saver = tf.train.Saver(max_to_keep=self.max_to_keep) 178 | 179 | def train(self, config): 180 | d_optim = tf.train.RMSPropOptimizer(config.learning_rate, decay=config.beta1) \ 181 | .minimize(self.d_loss, var_list=self.d_vars) 182 | g_optim =tf.train.RMSPropOptimizer(config.learning_rate, decay=config.beta1) \ 183 | .minimize(self.g_loss, var_list=self.g_vars) 184 | try: 185 | tf.global_variables_initializer().run() 186 | except: 187 | tf.initialize_all_variables().run() 188 | train_idxs = list(range(self.dataset_class.train_matrix.shape[0])) 189 | for epoch in xrange(config.epoch): 190 | np.random.shuffle(train_idxs) 191 | for i in range(len(train_idxs) // self.batch_size): 192 | cur_idxs = train_idxs[i * self.batch_size:(i + 1) * self.batch_size] 193 | batch_inputs = self.dataset_class.train_matrix[cur_idxs].toarray() 194 | # transform range&shape 195 | batch_inputs = (batch_inputs - 2.5) / 2.5 196 | batch_inputs = np.reshape(batch_inputs, [self.batch_size, self.height, self.width, 1]) 197 | # batch_inputs = np.random.random_sample([self.batch_size, self.height, self.width, 1]) 198 | batch_z = gen_random(size=[config.batch_size, self.z_dim]).astype(np.float32) 199 | 200 | # Update D network 201 | _ = self.sess.run(d_optim, feed_dict={self.inputs: batch_inputs, self.z: batch_z}) 202 | 203 | # Update G network 204 | _ = self.sess.run(g_optim, feed_dict={self.z: batch_z}) 205 | 206 | # Run g_optim twice to make sure that d_loss does not go to zero (different from paper) 207 | 208 | errD= self.d_loss.eval({self.inputs: batch_inputs,self.z: batch_z}) 209 | # errD_real = self.d_loss_real.eval({self.inputs: batch_inputs}) 210 | errG = self.g_loss.eval({self.z: batch_z}) 211 | 212 | print("Epoch:[%2d/%2d]d_loss: %.8f, g_loss: %.8f" \ 213 | % (epoch, config.epoch, errD, errG)) 214 | 215 | def discriminator(self, image, reuse=False): 216 | with tf.variable_scope("discriminator") as scope: 217 | if reuse: 218 | scope.reuse_variables() 219 | # [conv+BN+LeakyRelu[64,128,256,512]]+[FC]+[sigmoid] 220 | h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv')) 221 | h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim * 2, name='d_h1_conv'))) 222 | h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim * 4, name='d_h2_conv'))) 223 | h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim * 8, name='d_h3_conv'))) 224 | h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h4_lin') 225 | 226 | return tf.nn.sigmoid(h4), h4 227 | 228 | def generator(self, z): 229 | with tf.variable_scope("generator") as scope: 230 | s_h, s_w = self.height, self.width 231 | # CONV stride=2 232 | s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) 233 | s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) 234 | s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) 235 | s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2) 236 | 237 | # FC of 2*4*512&ReLU&BN 238 | self.z_, self.h0_w, self.h0_b = linear( 239 | z, self.gf_dim * 8 * s_h16 * s_w16, 'g_h0_lin', with_w=True) 240 | self.h0 = tf.reshape( 241 | self.z_, [-1, s_h16, s_w16, self.gf_dim * 8]) 242 | h0 = tf.nn.relu(self.g_bn0(self.h0)) 243 | 244 | # four transposed CONV of [256,128,64] &ReLU&BN&kernel_size = 5 * 5 245 | self.h1, self.h1_w, self.h1_b = deconv2d( 246 | h0, [self.batch_size, s_h8, s_w8, self.gf_dim * 4], name='g_h1', with_w=True) 247 | h1 = tf.nn.relu(self.g_bn1(self.h1)) 248 | h2, self.h2_w, self.h2_b = deconv2d( 249 | h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2], name='g_h2', with_w=True) 250 | h2 = tf.nn.relu(self.g_bn2(h2)) 251 | h3, self.h3_w, self.h3_b = deconv2d( 252 | h2, [self.batch_size, s_h2, s_w2, self.gf_dim * 1], name='g_h3', with_w=True) 253 | h3 = tf.nn.relu(self.g_bn3(h3)) 254 | 255 | # transposed CONV of [1] &tanh 256 | h4, self.h4_w, self.h4_b = deconv2d( 257 | h3, [self.batch_size, s_h, s_w, 1], name='g_h4', with_w=True) 258 | 259 | return tf.nn.tanh(h4) --------------------------------------------------------------------------------