├── Leg-UP
├── models
│ ├── detector
│ │ └── SDLib
│ │ │ ├── data
│ │ │ ├── __init__.py
│ │ │ ├── social.py
│ │ │ └── rating.py
│ │ │ ├── main
│ │ │ ├── __init__.py
│ │ │ ├── id_plus_1.py
│ │ │ ├── SDLib.py
│ │ │ ├── plot.py
│ │ │ └── main.py
│ │ │ ├── method
│ │ │ ├── __init__.py
│ │ │ └── FAP.py
│ │ │ ├── tool
│ │ │ ├── __init__.py
│ │ │ ├── dataSplit.py
│ │ │ ├── config.py
│ │ │ ├── plot.py
│ │ │ ├── qmath.py
│ │ │ └── file.py
│ │ │ ├── baseclass
│ │ │ ├── __init__.py
│ │ │ ├── SSDetection.py
│ │ │ └── SDetection.py
│ │ │ └── __init__.py
│ └── attacker
│ │ └── __init__.py
├── main.py
├── utils
│ ├── loss.py
│ ├── data_loader.py
│ └── utils.py
├── run.sh
├── execute_model.py
├── README.md
├── run.py
└── preprocess_data.py
├── AUSH
├── model
│ ├── __init__.py
│ ├── attack_model
│ │ ├── AttackModel.py
│ │ ├── gan_attack
│ │ │ ├── __init__.py
│ │ │ └── models.py
│ │ ├── baseline.py
│ │ └── gan_attack_copy
│ │ │ └── models.py
│ ├── trainer_rec.py
│ ├── trainer_rec_surprise.py
│ └── nnmf.py
├── utils
│ ├── __init__.py
│ ├── attack
│ │ ├── __init__.py
│ │ └── data_to_file.py
│ └── load_data
│ │ ├── __init__.py
│ │ ├── load_attack_info.py
│ │ └── load_data.py
├── test_main
│ ├── __init__.py
│ ├── example.sh
│ ├── result_reporter.py
│ ├── main_train_rec.py
│ ├── main_gan_attack.py
│ ├── main_gan_attack_baseline.py
│ ├── main_eval_attack.py
│ ├── data_preprocess.py
│ ├── main_baseline_attack.py
│ ├── dcgan.py
│ ├── main_eval_similarity_foryangqian.py
│ ├── main_eval_similarity.py
│ └── WGAN_yangqian.py
└── README.md
├── data
├── automotive
│ ├── automotive_selected_items
│ └── automotive_target_users
├── ml100k
│ ├── ml100k_selected_items
│ └── ml100k_target_users
└── filmTrust
│ └── filmTrust_selected_items
├── README.md
└── .gitignore
/Leg-UP/models/detector/SDLib/data/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/method/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/baseclass/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/AUSH/model/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 22:23
3 | # @Author : chensi
4 | # @File : __init__.py.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/AUSH/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 22:24
3 | # @Author : chensi
4 | # @File : __init__.py.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/AUSH/test_main/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 22:24
3 | # @Author : chensi
4 | # @File : __init__.py.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/AUSH/utils/attack/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/5/31 10:37
3 | # @Author : chensi
4 | # @File : __init__.py.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/AUSH/utils/load_data/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/4/30 17:37
3 | # @Author : chensi
4 | # @File : __init__.py.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/Leg-UP/models/attacker/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2021/03/20 09:21
3 | # @Author : chensi
4 | # @File : __init__.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/AUSH/model/attack_model/AttackModel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/9/20 14:23
3 | # @Author : chensi
4 | # @File : attack_model.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/12/3 15:52
3 | # @Author : chensi
4 | # @File : __init__.py.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/AUSH/model/attack_model/gan_attack/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/24 10:41
3 | # @Author : chensi
4 | # @File : __init__.py.py
5 | # @Software : PyCharm
6 | # @Desciption : None
--------------------------------------------------------------------------------
/data/automotive/automotive_selected_items:
--------------------------------------------------------------------------------
1 | 22 866
2 | 88 1141
3 | 119 681
4 | 122 1656
5 | 339 177
6 | 422 477
7 | 477 1012
8 | 594 1141
9 | 866 1198
10 | 884 1656
11 | 1089 866
12 | 1141 866
13 | 1431 705
14 | 1593 1089
15 | 1656 1089
--------------------------------------------------------------------------------
/data/ml100k/ml100k_selected_items:
--------------------------------------------------------------------------------
1 | 1257 171,49,180
2 | 1419 203,167,172
3 | 785 171,49,180
4 | 1077 0,131,422
5 | 62 167,172,237
6 | 1319 97,99,55
7 | 1612 171,49,180
8 | 1509 11,99,55
9 | 1545 97,99,55
10 | 1373 203,167,172
11 | 690 27,78,227
--------------------------------------------------------------------------------
/Leg-UP/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/11/29 19:21
3 | # @Author : chensi
4 | # @File : main.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | # from utils.evaluator import *
9 | from models.attacker.aushplus import *
10 | model = AUSHplus()
11 | model.execute()
12 |
--------------------------------------------------------------------------------
/AUSH/test_main/example.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for target_id in 5 395 181 565 254 601 623 619 64 558
4 | do
5 | for rec_model_name in IAUtoRec UAUtoRec NNMF NMF_25
6 | do
7 | python main_eval_attack.py --dataset filmTrust --rec_model_name $rec_model_name --attack_method G0 --target_id $target_id --attack_num 50 --filler_num 36 >> filmTrust_result_G0
8 | #nohup python main_gan_attack_baseline.py --dataset filmTrust --target_id 5 --attack_num 50 --filler_num 36 --loss 0 >> G0_log 2>&1 &
9 | done
10 | done
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/baseclass/SSDetection.py:
--------------------------------------------------------------------------------
1 | from SDetection import SDetection
2 | from data.social import SocialDAO
3 | from tool.config import Config,LineConfig
4 | from os.path import abspath
5 | from time import strftime,localtime,time
6 | from tool.file import FileIO
7 | from sklearn.metrics import classification_report
8 | class SSDetection(SDetection):
9 |
10 | def __init__(self,conf,trainingSet=None,testSet=None,labels=None,relation=list(),fold='[1]'):
11 | super(SSDetection, self).__init__(conf,trainingSet,testSet,labels,fold)
12 | self.sao = SocialDAO(self.config, relation) # social relations access control
13 |
--------------------------------------------------------------------------------
/data/filmTrust/filmTrust_selected_items:
--------------------------------------------------------------------------------
1 | 29 83,98,110
2 | 5 98,118,112
3 | 395 118,110,119
4 | 380 98,83,118
5 | 198 118,98,83
6 | 576 98,118,112
7 | 228 83,98,119
8 | 181 118,112,98
9 | 442 99,2,84
10 | 310 119,118,110
11 | 703 98,99,114
12 | 307 83,118,98
13 | 370 113,114,99
14 | 449 113,115,82
15 | 2 112,103,98
16 | 565 110,119,118
17 | 664 98,99,114
18 | 539 98,118,112
19 | 515 99,114,98
20 | 254 98,83,119
21 | 215 118,83,98
22 | 40 118,119,110
23 | 601 119,83,118
24 | 623 98,118,83
25 | 266 110,99,83
26 | 619 118,83,98
27 | 648 113,114,99
28 | 640 118,83,98
29 | 451 114,99,98
30 | 64 98,83,118
31 | 655 98,119,83
32 | 558 98,83,118
33 | 553 119,110,118
34 | 183 114,98,99
35 | 200 110,119,118
36 | 264 98,114,99
37 | 674 98,83,118
38 | 295 83,119,110
39 | 629 98,114,99
40 | 711 83,98,118
--------------------------------------------------------------------------------
/AUSH/utils/load_data/load_attack_info.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 11:53
3 | # @Author : chensi
4 | # @File : load_attack_info.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | def load_attack_info(seletced_item_path, target_user_path):
9 | attack_info = {}
10 | with open(seletced_item_path, "r") as fin:
11 | for line in fin:
12 | line = line.strip("\n").split("\t")
13 | target_item, selected_items = int(line[0]), list(map(int, line[1].split(",")))
14 | attack_info[target_item] = [selected_items]
15 | with open(target_user_path, "r") as fin:
16 | for line in fin:
17 | line = line.strip("\n").split("\t")
18 | target_item, target_users = int(line[0]), list(map(int, line[1].split(",")))
19 | attack_info[target_item].append(target_users)
20 | return attack_info
21 |
22 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/id_plus_1.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/29 21:51
3 | # @Author : chensi
4 | # @File : id_plus_1.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 |
9 | import numpy as np
10 | import pandas as pd
11 | import os
12 |
13 | conf_path = '../config/FAP.conf'
14 |
15 | # random_target = [62, 1077, 785, 1419, 1257]
16 | # tail_target = [1319, 1612, 1509, 1545, 1373]
17 | # targets = random_target + tail_target
18 | random = [155, 383, 920, 941, 892]
19 | tail = [1480, 844, 1202, 1301, 2035]
20 | targets = random + tail
21 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"]
22 | for iid in targets:
23 | for attack_method in attack_methods:
24 | path = "../dataset/GAN/ciao/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat"
25 | names = ['userID', 'movieID', 'movieRating']
26 | data_df = pd.read_csv(path, sep='\t', names=names, engine='python')
27 | data_df.userID += 1
28 | data_df.movieID += 1
29 | dst_path = "../dataset/GAN/ciao_1/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat"
30 | data_df.to_csv(dst_path, index=False, sep='\t', header=False)
31 |
--------------------------------------------------------------------------------
/AUSH/utils/attack/data_to_file.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 21:17
3 | # @Author : chensi
4 | # @File : data_to_file.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import os
9 | import shutil
10 |
11 |
12 | def attacked_file_writer(clean_path, attacked_path, fake_profiles, n_users_ori):
13 | data_to_write = ""
14 | i = 0
15 | for fake_profile in fake_profiles:
16 | injected_iid = fake_profile.nonzero()[0]
17 | injected_rating = fake_profile[injected_iid]
18 | data_to_write += ('\n'.join(
19 | map(lambda x: '\t'.join(map(str, [n_users_ori + i] + list(x))), zip(injected_iid, injected_rating))) + '\n')
20 | i += 1
21 | if os.path.exists(attacked_path): os.remove(attacked_path)
22 | shutil.copyfile(clean_path, attacked_path)
23 | with open(attacked_path, 'a+')as fout:
24 | fout.write(data_to_write)
25 |
26 |
27 | def target_prediction_writer(predictions, hit_ratios, dst_path):
28 | # uid - rating - HR
29 | data_to_write = []
30 | for uid in range(len(predictions)):
31 | data_to_write.append('\t'.join(map(str, [uid, predictions[uid]] + hit_ratios[uid])))
32 | with open(dst_path, 'w')as fout:
33 | fout.write('\n'.join(data_to_write))
34 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/dataSplit.py:
--------------------------------------------------------------------------------
1 | from random import random
2 | from models.detector.SDLib.tool.file import FileIO
3 | class DataSplit(object):
4 |
5 | def __init__(self):
6 | pass
7 |
8 | @staticmethod
9 | def dataSplit(data,test_ratio = 0.3,output=False,path='./',order=1):
10 | if test_ratio>=1 or test_ratio <=0:
11 | test_ratio = 0.3
12 | testSet = {}
13 | trainingSet = {}
14 | for user in data:
15 | if random() < test_ratio:
16 | testSet[user] = data[user].copy()
17 | else:
18 | trainingSet[user] = data[user].copy()
19 |
20 | if output:
21 | FileIO.writeFile(path,'testSet['+str(order)+']',testSet)
22 | FileIO.writeFile(path, 'trainingSet[' + str(order) + ']', trainingSet)
23 | return trainingSet,testSet
24 |
25 | @staticmethod
26 | def crossValidation(data,k,output=False,path='./',order=1):
27 | if k<=1 or k>10:
28 | k=3
29 | for i in range(k):
30 | trainingSet = {}
31 | testSet = {}
32 | for ind,user in enumerate(data):
33 | if ind%k == i:
34 | testSet[user] = data[user].copy()
35 | else:
36 | trainingSet[user] = data[user].copy()
37 | yield trainingSet,testSet
38 |
39 |
40 |
--------------------------------------------------------------------------------
/AUSH/test_main/result_reporter.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # author:ariaschen
4 | # datetime:2020/1/14 09:11
5 | # software: PyCharm
6 |
7 | # import itertools, gzip
8 | import pandas as pd
9 |
10 |
11 | columns = ['Rec_model', 'attack_method', 'target_id']
12 |
13 | hr = ['HR_1', 'HR_3', 'HR_5', 'HR_10', 'HR_20', 'HR_50']
14 | hr_ori = [i + '_ori' for i in hr]
15 |
16 | columns += [i + '_inseg' for i in ['shift'] + hr_ori + hr]
17 |
18 | columns += [i + '_all' for i in ['shift'] + hr_ori + hr]
19 |
20 | columns_r = [i + '_inseg' for i in ['shift'] + hr] + [i + '_all' for i in ['shift'] + hr]
21 | """"""
22 | # data = pd.read_excel('filmTrust_distance.xls')
23 | # data.groupby('attack_method').mean()[['dis_TVD','dis_JS']].to_excel('filmTrust_distance_avg.xls')
24 |
25 | # data = pd.read_excel('ml100k_performance_all.xls')
26 | # data = pd.read_excel('../result_ijcai/filmTrust_performance_all.xls')
27 | # data = pd.read_excel('../result_ijcai/ml100k_performance_all.xls')
28 | # data = pd.read_excel('office_performance_all.xls')
29 | data = pd.read_excel('automotive_performance_all.xls')
30 | data.columns = columns
31 | data = data[['Rec_model', 'attack_method', 'target_id', 'shift_inseg', 'HR_10_inseg', 'shift_all', 'HR_10_all']]
32 | # target_type_dict = dict(
33 | # zip([62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373], ['random'] * 5 + ['tail'] * 5))
34 | # target_type_dict = dict(zip([5, 395, 181, 565, 254] + [601, 623, 619, 64, 558], ['random'] * 5 + ['tail'] * 5))
35 | target_type_dict = dict(zip([1141, 1656, 477, 1089, 866] + [88, 22, 122, 339, 1431], ['random'] * 5 + ['tail'] * 5))
36 | data['target_type'] = data.target_id.apply(lambda x: target_type_dict[x])
37 | data['attack_method'] = data.attack_method.apply(lambda x: x.split('_')[0])
38 | result = data.groupby(['Rec_model','attack_method', 'target_type']).mean()[['shift_all', 'HR_10_all']]
39 | result.to_excel('ml100k_performance_0119_sample_strategy.xlsx')
40 | exit()
41 |
--------------------------------------------------------------------------------
/Leg-UP/utils/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 | from utils.utils import EPSILON
5 |
6 | __all__ = ["mse_loss", "mult_ce_loss", "binary_ce_loss", "kld_loss",
7 | "sampled_bce_loss", "sampled_cml_loss"]
8 |
9 | """Model training losses."""
10 | bce_loss = torch.nn.BCELoss(reduction='none')
11 |
12 |
13 | def mse_loss(data, logits, weight):
14 | """Mean square error loss."""
15 | weights = torch.ones_like(data)
16 | weights[data > 0] = weight
17 | res = weights * (data - logits) ** 2
18 | return res.sum(1)
19 |
20 |
21 | def mult_ce_loss(data, logits):
22 | """Multi-class cross-entropy loss."""
23 | log_probs = F.log_softmax(logits, dim=-1)
24 | loss = -log_probs * data
25 |
26 | instance_data = data.sum(1)
27 | instance_loss = loss.sum(1)
28 | # Avoid divide by zeros.
29 | res = instance_loss / (instance_data + EPSILON)
30 | return res
31 |
32 |
33 | def binary_ce_loss(data, logits):
34 | """Binary-class cross-entropy loss."""
35 | return bce_loss(torch.sigmoid(logits), data).mean(1)
36 |
37 |
38 | def kld_loss(mu, log_var):
39 | """KL-divergence."""
40 | return -0.5 * torch.sum(
41 | 1 + log_var - mu.pow(2) - log_var.exp(), dim=1)
42 |
43 |
44 | def sampled_bce_loss(logits, n_negatives):
45 | """Binary-class cross-entropy loss with sampled negatives."""
46 | pos_logits, neg_logits = torch.split(logits, [1, n_negatives], 1)
47 | data = torch.cat([
48 | torch.ones_like(pos_logits), torch.zeros_like(neg_logits)
49 | ], 1)
50 | return bce_loss(torch.sigmoid(logits), data).mean(1)
51 |
52 |
53 | def sampled_cml_loss(distances, n_negatives, margin):
54 | """Hinge loss with sampled negatives."""
55 | # Distances here are the negative euclidean distances.
56 | pos_distances, neg_distances = torch.split(-distances, [1, n_negatives], 1)
57 | neg_distances = neg_distances.min(1).values.unsqueeze(-1)
58 | res = pos_distances - neg_distances + margin
59 | res[res < 0] = 0
60 | return res.sum(1)
61 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Shilling Attacks against Recommender Systems
2 |
3 | This repository contains our implementations for Shilling Attacks against Recommender Systems.
4 |
5 | Folder structure:
6 | - `AUSH`: The implementation of AUSH used in our CIKM'20 paper [[ACM Library](https://dl.acm.org/doi/10.1145/3340531.3411884)] [[arXiv Preprint](https://arxiv.org/abs/2005.08164)].
7 | - `Leg-UP`: The implementation of Leg-UP in our TNNLS'22 paper [[IEEE Xplore](https://ieeexplore.ieee.org/document/9806457)] [[arXiv Preprint](https://arxiv.org/abs/2206.11433)] and a unified framework for comparing Leg-UP with various attackers including AIA, DCGAN, WGAN, Random Attack, Average Attack, Segment Attack and Bandwagon Attack.
8 | - `data`: Recommendation datasets used in our experiments.
9 |
10 | See `README.md` in each folder for more details.
11 |
12 | Please kindly cite our papers if you find our implementations useful:
13 |
14 | > Chen Lin, Si Chen, Hui Li, Yanghua Xiao, Lianyun Li, and Qian Yang. 2020. Attacking Recommender Systems with Augmented User Profiles. In CIKM. 855–864.
15 |
16 | > Chen Lin, Si Chen, Meifang Zeng, Sheng Zhang, Min Gao, and Hui Li. 2022. Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles. In TNNLS.
17 |
18 | @inproceedings{Lin2020Attacking,
19 | author = {Chen Lin and
20 | Si Chen and
21 | Hui Li and
22 | Yanghua Xiao and
23 | Lianyun Li and
24 | Qian Yang},
25 | title = {Attacking Recommender Systems with Augmented User Profiles},
26 | booktitle = {{CIKM}},
27 | pages = {855--864},
28 | year = {2020}
29 | }
30 |
31 |
32 | @article{LinCZZGL22,
33 | author = {Chen Lin and
34 | Si Chen and
35 | Meifang Zeng and
36 | Sheng Zhang and
37 | Min Gao and
38 | Hui Li},
39 | title = {Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles},
40 | journal = {{IEEE} Trans. Neural Networks Learn. Syst.},
41 | year = {2022}
42 | }
--------------------------------------------------------------------------------
/Leg-UP/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #=================================================
4 |
5 | for target_id in 62 785 1077 1257 1419; do
6 | python run.py --data_set ml100k --target_ids $target_id --attacker_list AUSHplus >log_ml100k_$target_id
7 | done
8 |
9 | for target_id in 5 395 181 565 254; do
10 | python run.py --data_set filmTrust --target_ids $target_id --attacker_list AUSHplus >log_filmTrust_$target_id
11 | done
12 |
13 | for target_id in 119 422 594 884 1593; do
14 | python run.py --data_set automotive --target_ids $target_id --attacker_list AUSHplus >log_automotive_$target_id
15 | done
16 | #=================================================
17 |
18 | for attacker in AUSHplus AIA WGANAttacker DCGANAttacker RandomAttacker AverageAttacker BandwagonAttacker SegmentAttacker; do
19 | for target_id in 62 785 1077 1257 1419; do
20 | python run.py --data_set ml100k --target_ids $target_id --attacker_list $attacker >log_ml100k_$target_id"_"$attacker
21 | done
22 |
23 | for target_id in 5 395 181 565 254; do
24 | python run.py --data_set filmTrust --target_ids $target_id --attacker_list $attacker >log_filmTrust_$target_id"_"$attacker
25 | done
26 |
27 | for target_id in 119 422 594 884 1593; do
28 | python run.py --data_set automotive --target_ids $target_id --attacker_list $attacker >log_automotive_$target_id"_"$attacker
29 | done
30 | done
31 |
32 | #=================================================
33 |
34 | for attacker in AUSHplus_SR AUSHplus_woD AUSHplus_SF AUSHplus_inseg; do
35 | for target_id in 62 785 1077 1257 1419; do
36 | python run.py --data_set ml100k --target_ids $target_id --attacker_list $attacker >log_ml100k_$target_id"_"$attacker
37 | done
38 |
39 | for target_id in 5 395 181 565 254; do
40 | python run.py --data_set filmTrust --target_ids $target_id --attacker_list $attacker >log_filmTrust_$target_id"_"$attacker
41 | done
42 |
43 | for target_id in 119 422 594 884 1593; do
44 | python run.py --data_set automotive --target_ids $target_id --attacker_list $attacker >log_automotive_$target_id"_"$attacker
45 | done
46 | done
47 |
48 | #=================================================
49 |
--------------------------------------------------------------------------------
/AUSH/model/trainer_rec.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 19:58
3 | # @Author : chensi
4 | # @File : train_rec.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | try:
9 | import tensorflow.compat.v1 as tf
10 |
11 | tf.disable_v2_behavior()
12 | except:
13 | import tensorflow as tf
14 | from model.autorec import IAutoRec, UAutoRec
15 | from model.nnmf import NNMF
16 |
17 |
18 | def get_model_network(sess, model_name, dataset_class):
19 | model = None
20 | if model_name == "IAutoRec":
21 | model = IAutoRec(sess, dataset_class)
22 | elif model_name == "UAutoRec":
23 | model = UAutoRec(sess, dataset_class)
24 | elif model_name == "NNMF":
25 | model = NNMF(sess, dataset_class)
26 | return model
27 |
28 |
29 | def get_top_n(model, n):
30 | top_n = {}
31 | user_nonrated_items = model.dataset_class.get_user_nonrated_items()
32 | for uid in range(model.num_user):
33 | items = user_nonrated_items[uid]
34 | ratings = model.predict([uid] * len(items), items)
35 | item_rating = list(zip(items, ratings))
36 | item_rating.sort(key=lambda x: x[1], reverse=True)
37 | top_n[uid] = [x[0] for x in item_rating[:n]]
38 | return top_n
39 |
40 |
41 | def pred_for_target(model, target_id):
42 | target_predictions = model.predict(list(range(model.num_user)), [target_id] * model.num_user)
43 |
44 | top_n = get_top_n(model, n=50)
45 | hit_ratios = {}
46 | for uid in top_n:
47 | hit_ratios[uid] = [1 if target_id in top_n[uid][:i] else 0 for i in [1, 3, 5, 10, 20, 50]]
48 | return target_predictions, hit_ratios
49 |
50 |
51 | def rec_trainer(model_name, dataset_class, target_id, is_train, model_path):
52 | tf.reset_default_graph()
53 | tf_config = tf.ConfigProto()
54 | tf_config.gpu_options.allow_growth = True
55 | with tf.Session(config=tf_config) as sess:
56 |
57 | rec_model = get_model_network(sess, model_name, dataset_class)
58 | if is_train:
59 | print('--> start train recommendation model...')
60 | rec_model.execute()
61 | rec_model.save(model_path)
62 | else:
63 | rec_model.restore(model_path)
64 | print('--> start pred for each user...')
65 | predictions, hit_ratios = pred_for_target(rec_model, target_id)
66 | return predictions, hit_ratios
67 |
--------------------------------------------------------------------------------
/Leg-UP/execute_model.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/11/29 11:59
3 | # @Author : chensi
4 | # @File : execute_model.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 | import random
8 | import numpy as np
9 | import torch
10 |
11 | tf = None
12 | try:
13 | import tensorflow.compat.v1 as tf
14 |
15 | tf.disable_v2_behavior()
16 | except:
17 | import tensorflow as tf
18 |
19 | seed = 1234
20 | random.seed(seed)
21 | np.random.seed(seed)
22 | tf.set_random_seed(seed)
23 | torch.manual_seed(seed)
24 | torch.cuda.manual_seed_all(seed)
25 |
26 | from importlib import import_module
27 | import sys
28 |
29 |
30 | model2lib_dict = {
31 | # attacker
32 | 'RandomAttacker': 'models.attacker.attacker',
33 | 'AverageAttacker': 'models.attacker.attacker',
34 | 'BandwagonAttacker': 'models.attacker.attacker',
35 | 'SegmentAttacker': 'models.attacker.attacker',
36 | #
37 | 'WGANAttacker': 'models.attacker.attacker',
38 | 'DCGANAttacker': 'models.attacker.attacker',
39 | #
40 | 'AUSH': 'models.attacker.aush',
41 | #
42 | 'AUSHplus': 'models.attacker.aushplus',
43 | 'AIA': 'models.attacker.aushplus',
44 | 'AUSHplus_SR': 'models.attacker.aushplus',
45 | 'AUSHplus_woD': 'models.attacker.aushplus',
46 | 'AUSHplus_SF': 'models.attacker.aushplus',
47 | 'AUSHplus_inseg': 'models.attacker.aushplus',
48 | }
49 |
50 |
51 | def execute_model(model_type, model_name):
52 |
53 | try:
54 | try:
55 | model_lib_str = 'models.%s.%s' % (model_type.lower(),
56 | model_type[0].upper() + model_type[1:].lower())
57 | model_lib = import_module(model_lib_str)
58 | model = getattr(model_lib, model_name)()
59 | except:
60 | model_lib_str = 'utils.%s' % (model_type.lower())
61 | model_lib = import_module(model_lib_str)
62 | model = getattr(model_lib, model_name)()
63 | except:
64 | # try:
65 | model_lib_str = model2lib_dict[model_name]
66 | model_lib = import_module(model_lib_str)
67 | model = getattr(model_lib, model_name)()
68 | # except:
69 | # print('Not found:', model_type, model_name)
70 | # exit()
71 |
72 | model.execute()
73 | print('success.')
74 |
75 |
76 | model_lib = sys.argv[sys.argv.index('--exe_model_lib') + 1]
77 | model_name = sys.argv[sys.argv.index('--exe_model_class') + 1]
78 | execute_model(model_lib, model_name)
79 |
--------------------------------------------------------------------------------
/Leg-UP/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Shilling Black-box Recommender Systems by Learning to Generate Fake User Profiles
3 |
4 | This repository contains our implementation for Leg-UP (Learning to Generate Fake User Profiles) and various shilling attack methods including AIA, DCGAN, WGAN, Random Attack, Average Attack, Segment Attack and Bandwagon Attack.
5 |
6 | Please kindly cite our paper [[IEEE Xplore](https://ieeexplore.ieee.org/document/9806457)] [[arXiv Preprint](https://arxiv.org/abs/2206.11433)] if you use it:
7 |
8 | > Chen Lin, Si Chen, Meifang Zeng, Sheng Zhang, Min Gao, and Hui Li. 2022. Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles. In TNNLS.
9 |
10 | @article{LinCZZGL22,
11 | author = {Chen Lin and
12 | Si Chen and
13 | Meifang Zeng and
14 | Sheng Zhang and
15 | Min Gao and
16 | Hui Li},
17 | title = {Shilling Black-Box Recommender Systems by Learning to Generate Fake User Profiles},
18 | journal = {{IEEE} Trans. Neural Networks Learn. Syst.},
19 | year = {2022}
20 | }
21 |
22 | ## Environment
23 | - Python 3.8
24 | - higher 0.2.1
25 | - scikit-learn 0.24.1
26 | - scikit-surprise 1.1.1
27 | - tensorflow 2.7
28 | - pytorch 1.10
29 | - numpy 1.20.1
30 |
31 | ## Data
32 |
33 | The datasets used in our experiments can be found in the [data](../data) folder.
34 |
35 |
36 | ## Command Line Parameters
37 | `run.py` is the main entry of the program, it requires several parameters:
38 |
39 | - `data_set`: the recommendation dataset used in the experiment (Possible values: "ml100k", ''filmTrust'', ''automotive'', "yelp", ''GroceryFood'', ''ToolHome'' and ''AppAndroid''. Default is "ml100k").
40 | - `attack_num`: number of injected profiles, i.e., A value (Default is 50).
41 | - `filler_num`: number of fillers, i.e., P value (Default is 36).
42 | - `surrogate`: surrogate RS model (Possible values: "WMF", ''ItemAE'', ''SVDpp'', and ''PMF''. Default is "WMF").
43 | - `target_ids`: id of the target item (Default is 62).
44 | - `recommender`: victim recommender (Possible values: ''AUSHplus'', ''AIA'', ''WGANAttacker'', ''DCGANAttacker'', ''RandomAttacker'', ''AverageAttacker'', ''BandwagonAttacker'', and ''SegmentAttacker''. Default is "WMF"). Note that ''AUSHplus'' is the name of Leg-UP in our implementation.
45 | - `cuda_id`: GPU id (Default is 0).
46 | - `use_cuda`: use CPU or GPU (Default is 1).
47 |
48 | ## Examples
49 |
50 | Please refer to `run.sh` for some running examples.
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/SDLib.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | # sys.path.append("../")
4 | from re import split
5 | from models.detector.SDLib.tool.config import Config, LineConfig
6 | from models.detector.SDLib.tool.dataSplit import *
7 | from models.detector.SDLib.tool.file import FileIO
8 |
9 |
10 | class SDLib(object):
11 | def __init__(self, config):
12 | self.trainingData = [] # training data
13 | self.testData = [] # testData
14 | self.relation = []
15 | self.measure = []
16 | self.config = config
17 | self.ratingConfig = LineConfig(config['ratings.setup'])
18 | self.labels = FileIO.loadLabels(config['label'])
19 |
20 | if self.config.contains('evaluation.setup'):
21 | self.evaluation = LineConfig(config['evaluation.setup'])
22 |
23 | if self.evaluation.contains('-testSet'):
24 | # specify testSet
25 | self.trainingData = FileIO.loadDataSet(config, config['ratings'])
26 | self.testData = FileIO.loadDataSet(config, self.evaluation['-testSet'], bTest=True)
27 |
28 | elif self.evaluation.contains('-ap'):
29 | # auto partition
30 | self.trainingData = FileIO.loadDataSet(config, config['ratings'])
31 | self.trainingData, self.testData = DataSplit. \
32 | dataSplit(self.trainingData, test_ratio=float(self.evaluation['-ap']))
33 |
34 | elif self.evaluation.contains('-cv'):
35 | # cross validation
36 | self.trainingData = FileIO.loadDataSet(config, config['ratings'])
37 | # self.trainingData,self.testData = DataSplit.crossValidation(self.trainingData,int(self.evaluation['-cv']))
38 |
39 | else:
40 | print('Evaluation is not well configured!')
41 | exit(-1)
42 |
43 | if config.contains('social'):
44 | self.socialConfig = LineConfig(self.config['social.setup'])
45 | self.relation = FileIO.loadRelationship(config, self.config['social'])
46 | # print('preprocessing...')
47 |
48 | def execute(self):
49 | # import the algorithm module
50 | importStr = 'from models.detector.SDLib.method.' + self.config['methodName'] + ' import ' + self.config['methodName']
51 | exec(importStr)
52 | if self.config.contains('social'):
53 | method = self.config[
54 | 'methodName'] + '(self.config,self.trainingData,self.testData,self.labels,self.relation)'
55 | else:
56 | method = self.config['methodName'] + '(self.config,self.trainingData,self.testData,self.labels)'
57 | ans = eval(method).execute()
58 | return [float(i) for i in ans]
59 |
60 |
61 | def run(measure, algor, order):
62 | measure[order] = algor.execute()
63 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/plot.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/30 9:24
3 | # @Author : chensi
4 | # @File : plot.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import numpy as np
9 | import pandas as pd
10 | import os
11 | import matplotlib.pyplot as plt
12 |
13 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"]
14 | attack_name = ["Segment", "Random", "Average", "Bandwagon", "Ours"]
15 | attack_method = "segment"
16 | # random = [155, 383, 920, 941, 892]
17 | # tail = [1480, 844, 1202, 1301, 2035]
18 | # targets = random + tail
19 | random = [5, 395, 181, 565, 254]
20 | tail = [601, 623, 619, 64, 558]
21 | targets = random + tail
22 | # targets = [62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373]
23 | # for attack_method in attack_methods:
24 | # # dir = '../results/ciao_DegreeSAD/' + attack_method
25 | # dir = '../results/filmTrust_0903_FAP/' + attack_method
26 | # pathDir = os.listdir(dir)
27 | # data_to_write = []
28 | # iid_idx = 0
29 | # for i in range(len(pathDir)):
30 | # # if "5-fold-cv" not in pathDir[i]: continue
31 | # iid = targets[iid_idx]
32 | # iid_idx += 1
33 | # # load result
34 | # lines = []
35 | # if 'FAP' not in pathDir[i]: continue
36 | # with open(dir + '/' + pathDir[i], 'r') as fin:
37 | # for line in fin:
38 | # lines.append(line)
39 | # res = lines[3].strip('\n').split(' ')
40 | # while '' in res: res.remove('')
41 | # res = [str(iid)] + res
42 | # data_to_write.append('\t'.join(res))
43 | # with open(dir + '/' + "result_" + attack_method, 'w') as fout:
44 | # fout.write('\n'.join(data_to_write))
45 |
46 | names = ['iid', 'label', 'precision', 'recall', 'f1', 'support']
47 | # pre_results = {}
48 | # recall_results = {}
49 | P, R, N = [], [], []
50 | for i in range(len(attack_methods)):
51 | attack_method = attack_methods[i]
52 | path = '../results/filmTrust_0903_FAP/' + attack_method + "/result_" + attack_method
53 | # path = '../results/ml100k_DegreeSAD/' + attack_method + "/result_" + attack_method
54 | # path = '../results/ciao_DegreeSAD/' + attack_method + "/result_" + attack_method
55 | result = pd.read_csv(path, sep='\t', names=names, engine='python')
56 | p = result.precision.values.tolist()
57 | r = result.recall.values.tolist()
58 | n = [attack_name[i]] * len(r)
59 | P.extend(p)
60 | R.extend(r)
61 | N.extend(n)
62 | # pre_results[attack_name[i]] =p
63 | # recall_results[attack_name[i]] =r
64 | data_pre = pd.DataFrame({"method": N, "precision": P, "recall": R})
65 | # data_pre = pd.DataFrame(pre_results)
66 | data_pre.boxplot(column='precision', by=['method'])
67 | plt.title("Attack Detection")
68 | plt.ylabel("precision", )
69 | plt.xlabel("Attack Method")
70 | plt.show()
71 | a = 1
72 | #
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/data/social.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | #from structure import sparseMatrix,new_sparseMatrix
3 | from tool.config import Config,LineConfig
4 | from tool.qmath import normalize
5 | import os.path
6 | from re import split
7 |
8 | class SocialDAO(object):
9 | def __init__(self,conf,relation=list()):
10 | self.config = conf
11 | self.user = {} #used to store the order of users
12 | self.relation = relation
13 | self.followees = {}
14 | self.followers = {}
15 | self.trustMatrix = self.__generateSet()
16 |
17 | def __generateSet(self):
18 | #triple = []
19 | for line in self.relation:
20 | userId1,userId2,weight = line
21 | #add relations to dict
22 | if not self.followees.has_key(userId1):
23 | self.followees[userId1] = {}
24 | self.followees[userId1][userId2] = weight
25 | if not self.followers.has_key(userId2):
26 | self.followers[userId2] = {}
27 | self.followers[userId2][userId1] = weight
28 | # order the user
29 | if not self.user.has_key(userId1):
30 | self.user[userId1] = len(self.user)
31 | if not self.user.has_key(userId2):
32 | self.user[userId2] = len(self.user)
33 | #triple.append([self.user[userId1], self.user[userId2], weight])
34 | #return new_sparseMatrix.SparseMatrix(triple)
35 |
36 | # def row(self,u):
37 | # #return user u's followees
38 | # return self.trustMatrix.row(self.user[u])
39 | #
40 | # def col(self,u):
41 | # #return user u's followers
42 | # return self.trustMatrix.col(self.user[u])
43 | #
44 | # def elem(self,u1,u2):
45 | # return self.trustMatrix.elem(u1,u2)
46 |
47 | def weight(self,u1,u2):
48 | if self.followees.has_key(u1) and self.followees[u1].has_key(u2):
49 | return self.followees[u1][u2]
50 | else:
51 | return 0
52 |
53 | # def trustSize(self):
54 | # return self.trustMatrix.size
55 |
56 | def getFollowers(self,u):
57 | if self.followers.has_key(u):
58 | return self.followers[u]
59 | else:
60 | return {}
61 |
62 | def getFollowees(self,u):
63 | if self.followees.has_key(u):
64 | return self.followees[u]
65 | else:
66 | return {}
67 |
68 | def hasFollowee(self,u1,u2):
69 | if self.followees.has_key(u1):
70 | if self.followees[u1].has_key(u2):
71 | return True
72 | else:
73 | return False
74 | return False
75 |
76 | def hasFollower(self,u1,u2):
77 | if self.followers.has_key(u1):
78 | if self.followers[u1].has_key(u2):
79 | return True
80 | else:
81 | return False
82 | return False
83 |
--------------------------------------------------------------------------------
/AUSH/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # Attacking Recommender Systems with Augmented User Profiles
4 |
5 | This repository contains one shilling attack algorithm, AUSH, published in the following paper [[ACM Library](https://dl.acm.org/doi/10.1145/3340531.3411884)] [[arXiv Preprint](https://arxiv.org/abs/2005.08164)]:
6 |
7 | > Chen Lin, Si Chen, Hui Li, Yanghua Xiao, Lianyun Li, and Qian Yang. 2020. Attacking Recommender Systems with Augmented User Profiles. In CIKM. 855–864.
8 |
9 | Please kindly cite our paper if you use it:
10 |
11 | @inproceedings{Lin2020Attacking,
12 | author = {Chen Lin and
13 | Si Chen and
14 | Hui Li and
15 | Yanghua Xiao and
16 | Lianyun Li and
17 | Qian Yang},
18 | title = {Attacking Recommender Systems with Augmented User Profiles},
19 | booktitle = {{CIKM}},
20 | pages = {855--864},
21 | year = {2020}
22 | }
23 |
24 | ## How to run AUSH.
25 | ### Step1: Pre-processing
26 | Use `test_main\data_preprocess.py` to transform amazon 5-cores ratings to tuples `[userid, itemid, normalized float rating]`.
27 |
28 | Update on Dec 9, 2021: We have released several recommendation datasets for testing shilling attacks including the three datasets used in our CIKM'20 paper. You can directly use files in the [data](/data) folder for experiments. Please copy the data folder to the folder of AUSH before execution.
29 |
30 | ### Step2: Initialize
31 | Use `test_main\data_preprocess.py`
32 | - select attack target
33 | - select attack number (default fix 50)
34 | - select filler size
35 | - selected items and target users
36 | - settings for bandwagon attack
37 |
38 | ### Step3: Training and Evaluation
39 |
40 | - Train baseline attack models
41 | ```shell script
42 | python main_baseline_attack.py --dataset filmTrust --attack_methods average,segment,random,bandwagon --targets 601,623,619,64,558 --filler_num 36 --bandwagon_selected 103,98,115 --sample_filler 1
43 | ```
44 | - Evaluate baseline attack models
45 | ```shell script
46 | python main_train_rec.py --dataset filmTrust --attack_method segment --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36
47 | ````
48 |
49 | - RS performance before attack
50 | ```shell script
51 | python main_train_rec.py --dataset filmTrust --attack_method no --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36
52 | ````
53 |
54 | - Train AUSH
55 | ```shell script
56 | python main_gan_attack.py --dataset filmTrust --target_ids 601,623,619,64,558 --filler_num 36
57 | ````
58 |
59 | - Evaluate AUSH
60 | ```shell script
61 | python main_train_rec.py --dataset filmTrust --attack_method gan --model_name NMF_25 --target_ids 601,623,619,64,558 --filler_num 36
62 | ````
63 |
64 | - Comparative Study
65 | ```shell script
66 | python main_eval_attack.py --dataset filmTrust --filler_num 36 --attack_methods gan,segment,average --rec_model_names NMF_25 --target_ids 601,623,619,64,558
67 |
68 | python main_eval_similarity.py --dataset filmTrust --filler_num 36 --targets 601,623 --bandwagon_selected 103,98,115
69 | ```
70 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/config.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | from os.path import abspath
3 |
4 |
5 | class Config(object):
6 | def __init__(self, fileName):
7 | self.config = {}
8 | self.readConfiguration(fileName)
9 |
10 | def __getitem__(self, item):
11 | if not self.contains(item):
12 | print('parameter ' + item + ' is invalid!')
13 | exit(-1)
14 | return self.config[item]
15 |
16 | def getOptions(self, item):
17 | if not self.contains(item):
18 | print('parameter ' + item + ' is invalid!')
19 | exit(-1)
20 | return self.config[item]
21 |
22 | def contains(self, key):
23 | return key in self.config
24 | # return self.config.has_key(key)
25 |
26 | def get_keys(self):
27 | return self.config.keys()
28 |
29 | def readConfiguration(self, fileName):
30 | if not os.path.exists(abspath(fileName)):
31 | print('config file is not found!')
32 | raise IOError
33 | with open(fileName) as f:
34 | for ind, line in enumerate(f):
35 | if line.strip() != '':
36 | try:
37 | key, value = line.strip().split('=')
38 | self.config[key] = value
39 | except ValueError:
40 | print('config file is not in the correct format! Error Line:%d' % (ind))
41 |
42 |
43 | class LineConfig(object):
44 | def __init__(self, content):
45 | self.line = content.strip().split(' ')
46 | self.options = {}
47 | self.mainOption = False
48 | if self.line[0] == 'on':
49 | self.mainOption = True
50 | elif self.line[0] == 'off':
51 | self.mainOption = False
52 | for i, item in enumerate(self.line):
53 | if (item.startswith('-') or item.startswith('--')) and not item[1:].isdigit():
54 | ind = i + 1
55 | for j, sub in enumerate(self.line[ind:]):
56 | if (sub.startswith('-') or sub.startswith('--')) and not sub[1:].isdigit():
57 | ind = j
58 | break
59 | if j == len(self.line[ind:]) - 1:
60 | ind = j + 1
61 | break
62 | try:
63 | self.options[item] = ' '.join(self.line[i + 1:i + 1 + ind])
64 | except IndexError:
65 | self.options[item] = 1
66 |
67 | def __getitem__(self, item):
68 | if not self.contains(item):
69 | print('parameter ' + item + ' is invalid!')
70 | exit(-1)
71 | return self.options[item]
72 |
73 | def getOption(self, key):
74 | if not self.contains(key):
75 | print('parameter ' + key + ' is invalid!')
76 | exit(-1)
77 | return self.options[key]
78 |
79 | def isMainOn(self):
80 | return self.mainOption
81 |
82 | def contains(self, key):
83 | return key in self.options
84 | # return self.options.has_key(key)
85 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | build/
3 | .DS_Store
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | share/python-wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 | MANIFEST
32 |
33 | # PyInstaller
34 | # Usually these files are written by a python script from a template
35 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 |
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 |
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .nox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | *.py,cover
54 | .hypothesis/
55 | .pytest_cache/
56 | cover/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | .pybuilder/
80 | target/
81 |
82 | # Jupyter Notebook
83 | .ipynb_checkpoints
84 |
85 | # IPython
86 | profile_default/
87 | ipython_config.py
88 |
89 | # pyenv
90 | # For a library or package, you might want to ignore these files since the code is
91 | # intended to run in multiple environments; otherwise, check them in:
92 | # .python-version
93 |
94 | # pipenv
95 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
97 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
98 | # install all needed dependencies.
99 | #Pipfile.lock
100 |
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 |
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 |
108 | # SageMath parsed files
109 | *.sage.py
110 |
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 |
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 |
124 | # Rope project settings
125 | .ropeproject
126 |
127 | # mkdocs documentation
128 | /site
129 |
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 |
135 | # Pyre type checker
136 | .pyre/
137 |
138 | # pytype static type analyzer
139 | .pytype/
140 |
141 | # Cython debug symbols
142 | cython_debug/
143 |
144 | # PyCharm
145 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
146 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
147 | # and can be added to the global gitignore or merged into this file. For a more nuclear
148 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
149 | #.idea/
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/main/main.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | sys.path.append("../")
4 | from SDLib import SDLib
5 | from tool.config import Config
6 |
7 | if __name__ == '__main__':
8 |
9 | print('=' * 80)
10 | print(' SDLib: A Python library used to collect shilling detection methods.')
11 | print('=' * 80)
12 | print('Supervised Methods:')
13 | print('1. DegreeSAD 2.CoDetector 3.BayesDetector\n')
14 | print('Semi-Supervised Methods:')
15 | print('4. SemiSAD\n')
16 | print('Unsupervised Methods:')
17 | print('5. PCASelectUsers 6. FAP 7.timeIndex\n')
18 | print('-' * 80)
19 | algor = -1
20 | conf = -1
21 | order = 6 # input('please enter the num of the method to run it:')
22 | import time
23 |
24 | s = time.clock()
25 | # if order == 0:
26 | # try:
27 | # import seaborn as sns
28 | # except ImportError:
29 | # print '!!!To obtain nice data charts, ' \
30 | # 'we strongly recommend you to install the third-party package !!!'
31 | # conf = Config('../config/visual/visual.conf')
32 | # Display(conf).render()
33 | # exit(0)
34 |
35 | if order == 1:
36 | conf = Config('../config/DegreeSAD_tmp.conf')
37 |
38 | elif order == 2:
39 | conf = Config('../config/CoDetector.conf')
40 |
41 | elif order == 3:
42 | conf = Config('../config/BayesDetector.conf')
43 |
44 | elif order == 4:
45 | conf = Config('../config/SemiSAD.conf')
46 |
47 | elif order == 5:
48 | conf = Config('../config/PCASelectUsers.conf')
49 |
50 | elif order == 6:
51 | conf = Config('../config/FAP.conf')
52 | elif order == 7:
53 | conf = Config('../config/timeIndex.conf')
54 |
55 | else:
56 | print('Error num!')
57 | exit(-1)
58 |
59 | # ori conf info
60 | lines = []
61 | with open('../config/FAP.conf', 'r') as fin:
62 | for line in fin:
63 | lines.append(line)
64 | random = [5, 395, 181, 565, 254]
65 | tail = [601, 623, 619, 64, 558]
66 | targets = random + tail
67 | # targets = [62, 1077, 785, 1419, 1257] + [1319, 1612, 1509, 1545, 1373]
68 | attack_methods = ["segment", "average", "random", "bandwagon", "gan"]
69 | for attack_method in attack_methods[0:]:
70 | for iid in targets:
71 | path = "../dataset/GAN/filmTrust/filmTrust_" + str(iid) + "_" + attack_method + "_50_36.dat"
72 | # path = "../dataset/GAN/ciao_1/ciao_" + str(iid) + "_" + attack_method + "_50_15.dat"
73 | lines[0] = 'ratings=' + path + '\n'
74 | # lines[-1] = "output.setup=on -dir ../results/ciao_DegreeSAD/" + attack_method + '/'
75 | lines[-1] = "output.setup=on -dir ../results/filmTrust_0903_FAP/" + attack_method + '/'
76 | with open('../config/FAP_t.conf', 'w') as fout:
77 | fout.write(''.join(lines))
78 | sd = SDLib(Config('../config/FAP_t.conf'))
79 | result = sd.execute()
80 | # conf = Config('../config/DegreeSAD_t.conf')
81 | # conf = Config('../config/FAP_t.conf')
82 | # sd = SDLib(conf)
83 | # sd.execute()
84 | e = time.clock()
85 | print("Run time: %f s" % (e - s))
86 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/baseclass/SDetection.py:
--------------------------------------------------------------------------------
1 | from models.detector.SDLib.data.rating import RatingDAO
2 | from models.detector.SDLib.tool.config import Config,LineConfig
3 | from os.path import abspath
4 | from time import strftime,localtime,time
5 | from models.detector.SDLib.tool.file import FileIO
6 | from sklearn.metrics import classification_report
7 | class SDetection(object):
8 |
9 | def __init__(self,conf,trainingSet=None,testSet=None,labels=None,fold='[1]'):
10 | self.config = conf
11 | self.isSave = False
12 | self.isLoad = False
13 | self.foldInfo = fold
14 | self.labels = labels
15 | self.dao = RatingDAO(self.config, trainingSet, testSet)
16 | self.training = []
17 | self.trainingLabels = []
18 | self.test = []
19 | self.testLabels = []
20 |
21 | def readConfiguration(self):
22 | self.algorName = self.config['methodName']
23 | self.output = LineConfig(self.config['output.setup'])
24 |
25 |
26 | def printAlgorConfig(self):
27 | "show algorithm's configuration"
28 | # print ('Algorithm:',self.config['methodName'])
29 | # print ('Ratings dataSet:',abspath(self.config['ratings']))
30 | # if LineConfig(self.config['evaluation.setup']).contains('-testSet'):
31 | # print ('Test set:',abspath(LineConfig(self.config['evaluation.setup']).getOption('-testSet')))
32 | #print 'Count of the users in training set: ',len()
33 | # print ('Training set size: (user count: %d, item count %d, record count: %d)' %(self.dao.trainingSize()))
34 | # print ('Test set size: (user count: %d, item count %d, record count: %d)' %(self.dao.testSize()))
35 | # print ('='*80)
36 | pass
37 |
38 | def initModel(self):
39 | pass
40 |
41 | def buildModel(self):
42 | pass
43 |
44 | def saveModel(self):
45 | pass
46 |
47 | def loadModel(self):
48 | pass
49 |
50 | def predict(self):
51 | pass
52 |
53 | def execute(self):
54 | self.readConfiguration()
55 | if self.foldInfo == '[1]':
56 | self.printAlgorConfig()
57 | # load model from disk or build model
58 | if self.isLoad:
59 | # print ('Loading model %s...' % (self.foldInfo))
60 | self.loadModel()
61 | else:
62 | # print ('Initializing model %s...' % (self.foldInfo))
63 | self.initModel()
64 | # print ('Building Model %s...' % (self.foldInfo))
65 | self.buildModel()
66 |
67 | # preict the ratings or item ranking
68 | # print ('Predicting %s...' % (self.foldInfo))
69 | prediction = self.predict()
70 | report = classification_report(self.testLabels, prediction, digits=4)
71 | # currentTime = currentTime = strftime("%Y-%m-%d %H-%M-%S", localtime(time()))
72 | # FileIO.writeFile(self.output['-dir'],self.algorName+'@'+currentTime+self.foldInfo,report)
73 | # save model
74 | # if self.isSave:
75 | # print ('Saving model %s...' % (self.foldInfo))
76 | # self.saveModel()
77 | # print (report)
78 | res = [[j for j in i.split(' ') if len(j)] for i in report.split('\n') if len(i.strip())>0][:3]
79 | precision, recall = res[-1][1:3]
80 | return precision, recall#report
--------------------------------------------------------------------------------
/Leg-UP/utils/data_loader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/11/27 15:34
3 | # @Author : chensi
4 | # @File : data_loader.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import random
9 | import numpy as np
10 | import torch
11 |
12 | # tf = None
13 | # try:
14 | # import tensorflow.compat.v1 as tf
15 | #
16 | # tf.disable_v2_behavior()
17 | # except:
18 | # import tensorflow as tf
19 |
20 | seed = 1234
21 | random.seed(seed)
22 | np.random.seed(seed)
23 | # tf.set_random_seed(seed)
24 | torch.manual_seed(seed)
25 | torch.cuda.manual_seed_all(seed)
26 | import pandas as pd
27 | import numpy as np
28 | from scipy.sparse import csr_matrix
29 |
30 |
31 | class DataLoader(object):
32 |
33 | def __init__(self, path_train, path_test, header=None, sep='\t', threshold=4, verbose=False):
34 | self.path_train = path_train
35 | self.path_test = path_test
36 | self.header = header if header is not None else ['user_id', 'item_id', 'rating']
37 | self.sep = sep
38 | self.threshold = threshold
39 | self.verbose = verbose
40 |
41 | # load file as dataFrame
42 | # self.train_data, self.test_data, self.n_users, self.n_items = self.load_file_as_dataFrame()
43 | # dataframe to matrix
44 | # self.train_matrix, self.train_matrix_implicit = self.dataFrame_to_matrix(self.train_data)
45 | # self.test_matrix, self.test_matrix_implicit = self.dataFrame_to_matrix(self.test_data)
46 |
47 | def load_file_as_dataFrame(self):
48 | # load data to pandas dataframe
49 | if self.verbose:
50 | print("\nload data from %s ..." % self.path_train, flush=True)
51 |
52 | train_data = pd.read_csv(self.path_train, sep=self.sep, names=self.header, engine='python')
53 | train_data = train_data.loc[:, ['user_id', 'item_id', 'rating']]
54 |
55 | if self.verbose:
56 | print("load data from %s ..." % self.path_test, flush=True)
57 | test_data = pd.read_csv(self.path_test, sep=self.sep, names=self.header, engine='python').loc[:,
58 | ['user_id', 'item_id', 'rating']]
59 | test_data = test_data.loc[:, ['user_id', 'item_id', 'rating']]
60 |
61 | # data statics
62 |
63 | n_users = max(max(test_data.user_id.unique()), max(train_data.user_id.unique())) + 1
64 | n_items = max(max(test_data.item_id.unique()), max(train_data.item_id.unique())) + 1
65 |
66 | if self.verbose:
67 | print("Number of users : %d , Number of items : %d. " % (n_users, n_items), flush=True)
68 | print("Train size : %d , Test size : %d. " % (train_data.shape[0], test_data.shape[0]), flush=True)
69 |
70 | return train_data, test_data, n_users, n_items
71 |
72 | def dataFrame_to_matrix(self, data_frame, n_users, n_items):
73 | row, col, rating, implicit_rating = [], [], [], []
74 | for line in data_frame.itertuples():
75 | uid, iid, r = list(line)[1:]
76 | implicit_r = 1 if r >= self.threshold else 0
77 |
78 | row.append(uid)
79 | col.append(iid)
80 | rating.append(r)
81 | implicit_rating.append(implicit_r)
82 |
83 | matrix = csr_matrix((rating, (row, col)), shape=(n_users, n_items))
84 | matrix_implicit = csr_matrix((implicit_rating, (row, col)), shape=(n_users, n_items))
85 | return matrix, matrix_implicit
86 |
--------------------------------------------------------------------------------
/AUSH/model/attack_model/gan_attack/models.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/24 10:43
3 | # @Author : chensi
4 | # @File : models.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | try:
9 | import tensorflow.compat.v1 as tf
10 |
11 | tf.disable_v2_behavior()
12 | except:
13 | import tensorflow as tf
14 | import math
15 |
16 |
17 | class GAN_Attacker:
18 | def __init__(self):
19 | print("GAN Attack model")
20 |
21 | def DIS(self, input, inputDim, h, activation, hiddenLayers, _reuse=False):
22 | # input->hidden
23 | y, _, W, b = self.FullyConnectedLayer(input, inputDim, h, activation, "dis", 0, reuse=_reuse)
24 |
25 | # stacked hidden layers
26 | for layer in range(hiddenLayers - 1):
27 | y, _, W, b = self.FullyConnectedLayer(y, h, h, activation, "dis", layer + 1, reuse=_reuse)
28 |
29 | # hidden -> output
30 | y, _, W, b = self.FullyConnectedLayer(y, h, 1, "none", "dis", hiddenLayers + 1, reuse=_reuse)
31 |
32 | return y
33 |
34 | def GEN(self, input, num_item, h, outputDim, activation, decay, name="gen", _reuse=False):
35 | """
36 | input : sparse filler vectors
37 | output : reconstructed selected vector
38 | """
39 | # input+thnh
40 | # input_tanh = tf.nn.tanh(input)
41 |
42 | # input->hidden
43 |
44 | y, L2norm, W, b = self.FullyConnectedLayer(input, num_item, h // decay, activation, name, 0, reuse=_reuse)
45 |
46 | # stacked hidden layers
47 | h = h // decay
48 | layer = 0
49 | # for layer in range(hiddenLayers - 1):
50 | while True:
51 | y, this_L2, W, b = self.FullyConnectedLayer(y, h, h // decay, activation, name, layer + 1, reuse=_reuse)
52 | L2norm = L2norm + this_L2
53 | layer += 1
54 | if h // decay > outputDim:
55 | h = h // decay
56 | else:
57 | break
58 | # hidden -> output
59 | y, this_L2, W, b = self.FullyConnectedLayer(y, h // decay, outputDim, "none", name, layer + 1, reuse=_reuse)
60 | L2norm = L2norm + this_L2
61 | y = tf.nn.sigmoid(y) * 5
62 | return y, L2norm
63 |
64 | def FullyConnectedLayer(self, input, inputDim, outputDim, activation, model, layer, reuse=False):
65 | scale1 = math.sqrt(6 / (inputDim + outputDim))
66 |
67 | wName = model + "_W" + str(layer)
68 | bName = model + "_B" + str(layer)
69 |
70 | with tf.variable_scope(model) as scope:
71 |
72 | if reuse == True:
73 | scope.reuse_variables()
74 |
75 | W = tf.get_variable(wName, [inputDim, outputDim],
76 | initializer=tf.random_uniform_initializer(-scale1, scale1))
77 | b = tf.get_variable(bName, [outputDim], initializer=tf.random_uniform_initializer(-0.01, 0.01))
78 |
79 | y = tf.matmul(input, W) + b
80 |
81 | L2norm = tf.nn.l2_loss(W) + tf.nn.l2_loss(b)
82 |
83 | if activation == "none":
84 | y = tf.identity(y, name="output")
85 | return y, L2norm, W, b
86 |
87 | elif activation == "sigmoid":
88 | return tf.nn.sigmoid(y), L2norm, W, b
89 |
90 | elif activation == "tanh":
91 | return tf.nn.tanh(y), L2norm, W, b
92 | elif activation == "relu":
93 | return tf.nn.relu(y), L2norm, W, b
94 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/plot.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | #import seaborn as sns
4 |
5 | def drawLine(x,y,labels,xLabel,yLabel,title):
6 | f, ax = plt.subplots(1, 1, figsize=(10, 6), sharex=True)
7 |
8 | #f.tight_layout()
9 | #sns.set(style="darkgrid")
10 |
11 | palette = ['blue','orange','red','green','purple','pink']
12 | # for i in range(len(ax)):
13 | # x1 = range(0, len(x))
14 | #ax.set_xlim(min(x1)-0.2,max(x1)+0.2)
15 | # mini = 10000;max = -10000
16 | # for label in labels:
17 | # if mini>min(y[i][label]):
18 | # mini = min(y[i][label])
19 | # if max 0
88 | ind2 = new_x2 > 0
89 | try:
90 | mean_x1 = float(new_x1.sum())/ind1.sum()
91 | mean_x2 = float(new_x2.sum())/ind2.sum()
92 | new_x1 = new_x1 - mean_x1
93 | new_x2 = new_x2 - mean_x2
94 | sum = new_x1.dot(new_x2)
95 | denom = sqrt((new_x1.dot(new_x1))*(new_x2.dot(new_x2)))
96 | return float(sum) / denom
97 | except ZeroDivisionError:
98 | return 0
99 |
100 |
101 | def similarity(x1,x2,sim):
102 | if sim == 'pcc':
103 | return pearson_sp(x1,x2)
104 | if sim == 'euclidean':
105 | return euclidean(x1,x2)
106 | else:
107 | return cosine_sp(x1, x2)
108 |
109 |
110 | def normalize(vec,maxVal,minVal):
111 | 'get the normalized value using min-max normalization'
112 | if maxVal > minVal:
113 | return float(vec-minVal)/(maxVal-minVal)+0.01
114 | elif maxVal==minVal:
115 | return vec/maxVal
116 | else:
117 | print ('error... maximum value is less than minimum value.')
118 | raise ArithmeticError
119 |
120 | def sigmoid(val):
121 | return 1/(1+exp(-val))
122 |
123 |
124 | def denormalize(vec,maxVal,minVal):
125 | return minVal+(vec-0.01)*(maxVal-minVal)
126 |
--------------------------------------------------------------------------------
/AUSH/test_main/main_train_rec.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 19:29
3 | # @Author : chensi
4 | # @File : main_train_rec.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 | import sys, os, argparse
8 |
9 | sys.path.append("../")
10 | from utils.load_data.load_data import *
11 | from model.trainer_rec import rec_trainer
12 | from model.trainer_rec_surprise import basic_rec
13 | from utils.attack.data_to_file import target_prediction_writer
14 |
15 |
16 | # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
17 |
18 |
19 | def train_rec(data_set_name, model_name, attack_method, target_id, is_train):
20 | if attack_method == "no":
21 | attack_method = ""
22 | model_path = "../result/model_ckpt/" + '_'.join([model_name, data_set_name]) + ".ckpt"
23 | else:
24 | model_path = "../result/model_ckpt/" + '_'.join([model_name, data_set_name, attack_method]) + ".ckpt"
25 | path_train = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat"
26 | path_test = "../data/data/" + data_set_name + "_test.dat"
27 | if attack_method == "": path_train = "../data/data/" + data_set_name + "_train.dat"
28 |
29 | # load_data
30 | dataset_class = load_data(path_train=path_train, path_test=path_test,
31 | header=['user_id', 'item_id', 'rating'],
32 | sep='\t', print_log=True)
33 | # train rec
34 | if model_name in ["IAutoRec", "UAutoRec", "NNMF"]:
35 | predictions, hit_ratios = rec_trainer(model_name, dataset_class, target_id, is_train, model_path)
36 | else:
37 | predictions, hit_ratios = basic_rec(model_name, path_train, path_test, target_id)
38 |
39 | # write to file
40 | dst_path = "../result/pred_result/" + '_'.join([model_name, data_set_name, str(target_id), attack_method])
41 | dst_path = dst_path.strip('_')
42 | target_prediction_writer(predictions, hit_ratios, dst_path)
43 |
44 |
45 | def parse_arg():
46 | parser = argparse.ArgumentParser()
47 |
48 | parser.add_argument('--dataset', type=str, default='automotive', help='input data_set_name,filmTrust or ml100k')
49 |
50 | parser.add_argument('--model_name', type=str, default='NMF_25', help='NNMF,IAutoRec,UAutoRec,NMF_25')
51 |
52 | parser.add_argument('--attack_method', type=str, default='G1',
53 | help='no,gan,segment,average,random,bandwagon')
54 |
55 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558]
56 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373]
57 | # 5,395,181,565,254,601,623,619,64,558
58 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
59 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
60 | parser.add_argument('--target_ids', type=str, default='866',
61 | help='attack target')
62 |
63 | parser.add_argument('--attack_num', type=int, default=50,
64 | help='num of attack fake user,50 for ml100k and filmTrust')
65 |
66 | parser.add_argument('--filler_num', type=int, default=4,
67 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
68 |
69 | args = parser.parse_args()
70 | args.target_ids = list(map(int, args.target_ids.split(',')))
71 | return args
72 |
73 |
74 | if __name__ == '__main__':
75 | """parse args"""
76 | args = parse_arg()
77 |
78 | """train"""
79 | if args.attack_method == 'no':
80 | attack_method_ = args.attack_method
81 | else:
82 | attack_method_ = '_'.join([args.attack_method, str(args.attack_num), str(args.filler_num)])
83 | is_train = 1
84 | train_rec(args.dataset, args.model_name, attack_method_, args.target_ids[0], is_train=is_train)
85 | for target in args.target_ids[1:]:
86 | if args.attack_method == 'no':
87 | is_train = 0
88 | train_rec(args.dataset, args.model_name, attack_method_, target, is_train=is_train)
89 |
--------------------------------------------------------------------------------
/data/ml100k/ml100k_target_users:
--------------------------------------------------------------------------------
1 | 1257 0,513,4,12,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,93,605,617,619,108,620,621,114,118,631,120,124,129,641,644,143,144,659,660,150,664,670,681,683,176,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,275,789,795,797,290,803,806,295,300,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,845,846,847,849,344,346,349,866,867,359,362,874,369,881,372,885,886,888,377,378,891,895,386,388,900,393,906,396,915,404,405,920,921,415,424,428,942,434,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505
2 | 1419 0,513,641,6,647,520,649,263,392,393,13,653,527,912,275,660,21,534,150,282,795,540,157,797,415,544,290,294,298,43,300,428,302,177,310,822,58,59,314,193,449,326,839,456,714,715,333,846,335,605,93,94,221,349,98,483,360,748,492,750,494,505,882,243,756,757,372,631,889,378,891,124,895
3 | 785 0,513,4,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,605,617,619,108,620,621,114,118,631,120,124,129,641,644,647,143,144,660,150,664,670,681,683,176,177,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,795,797,290,803,806,295,300,302,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,846,847,849,344,345,346,349,866,867,359,362,874,879,369,881,372,885,886,888,377,378,891,895,386,388,900,391,392,393,906,396,915,405,920,921,415,424,942,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505
4 | 1077 513,129,642,388,262,393,17,785,532,150,535,406,285,415,291,294,297,43,302,306,310,183,312,313,188,63,576,449,69,199,456,329,715,591,209,467,342,471,346,605,93,863,96,229,617,746,377,494,881,116,502,889,507
5 | 62 386,5,6,263,390,13,653,147,660,21,534,150,536,915,282,406,157,415,290,292,804,294,40,296,298,43,560,177,822,58,314,61,63,193,197,326,839,456,715,333,591,338,468,853,98,362,882,243,372,757,248,765,127
6 | 1319 0,6,15,17,20,22,23,537,547,550,552,42,48,560,565,58,61,63,71,591,592,91,93,605,95,607,98,108,621,628,117,137,143,662,663,173,177,187,193,200,212,213,215,221,231,232,746,750,757,248,249,762,763,263,266,268,275,290,291,292,804,294,295,296,805,302,822,322,327,839,333,338,853,342,344,351,863,869,360,879,373,885,888,377,378,891,384,388,405,406,415,928,931,420,424,428,429,942,436,456,467,473,479,492,496,498,499
7 | 1612 0,513,4,12,13,526,21,535,540,541,544,41,42,43,553,47,50,55,58,59,61,63,576,68,585,587,84,86,599,93,605,617,619,108,620,621,114,118,631,120,124,129,641,644,647,143,144,659,660,150,664,670,681,683,176,177,693,183,196,197,708,710,711,715,720,209,214,220,221,737,740,231,745,746,747,748,750,243,245,757,247,250,253,770,772,266,267,780,270,785,275,789,795,797,290,803,806,295,300,302,814,304,306,307,310,311,312,822,829,322,324,327,329,331,843,845,846,847,849,344,345,346,349,866,867,359,362,874,879,369,881,372,885,886,888,377,378,891,895,386,388,900,391,392,393,906,396,915,404,405,920,921,415,424,428,942,434,436,449,451,452,454,456,458,471,473,478,483,486,492,494,495,496,505
8 | 1509 0,5,6,13,526,15,17,23,537,27,541,547,550,42,48,560,565,58,63,71,591,89,91,93,98,108,621,114,628,137,143,662,663,173,177,193,709,200,212,213,215,220,221,746,757,248,249,762,263,266,275,290,292,804,805,296,307,822,314,830,832,837,327,843,333,338,853,341,342,344,351,863,869,360,362,879,882,373,885,888,377,378,891,396,405,415,928,420,932,424,428,429,942,436,452,456,457,467,471,473,479,496,498
9 | 1545 0,6,15,17,20,22,23,537,547,550,552,42,48,560,565,58,61,63,71,591,592,91,93,605,95,607,98,108,621,628,117,137,143,662,663,173,177,187,193,200,212,213,215,221,231,232,746,750,757,248,249,762,763,263,266,268,275,290,291,292,804,294,295,296,805,302,822,322,327,839,333,338,853,342,344,351,863,869,360,879,373,885,888,377,378,891,384,388,405,406,415,928,931,420,424,428,429,942,436,456,467,473,479,492,496,498,499
10 | 1373 0,513,641,6,647,520,649,263,392,393,13,653,527,912,275,660,21,534,150,282,795,540,157,797,415,544,290,294,298,43,300,428,302,177,310,822,58,59,314,193,449,326,839,456,714,715,333,846,335,605,93,94,221,349,98,483,360,748,492,750,494,505,882,243,756,757,372,631,889,378,891,124,895
11 | 690 0,6,15,41,58,59,63,69,93,94,129,150,177,199,221,248,291,310,338,342,373,386,393,397,449,454,456,471,483,487,505,513,535,550,642,647,652,660,715,726,803,806,814,845,849,863,881,885,896
--------------------------------------------------------------------------------
/AUSH/model/attack_model/baseline.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 10:46
3 | # @Author : chensi
4 | # @File : baseline_new.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 | import numpy as np
8 | import math
9 |
10 |
11 | class BaselineAttack:
12 |
13 | def __init__(self, attack_num, filler_num, n_items, target_id,
14 | global_mean, global_std, item_means, item_stds, r_max, r_min, fixed_filler_indicator=None):
15 | #
16 | self.attack_num = attack_num
17 | self.filler_num = filler_num
18 | self.n_items = n_items
19 | self.target_id = target_id
20 | self.global_mean = global_mean
21 | self.global_std = global_std
22 | self.item_means = item_means
23 | self.item_stds = item_stds
24 | self.r_max = r_max
25 | self.r_min = r_min
26 |
27 | self.fixed_filler_indicator = fixed_filler_indicator
28 |
29 | def RandomAttack(self):
30 | filler_candis = list(set(range(self.n_items)) - {self.target_id})
31 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
32 | # target
33 | fake_profiles[:, self.target_id] = self.r_max
34 | # fillers
35 | for i in range(self.attack_num):
36 | if self.fixed_filler_indicator is None:
37 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
38 | else:
39 |
40 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
41 | ratings = np.random.normal(loc=self.global_mean, scale=self.global_std, size=self.filler_num)
42 | for f_id, r in zip(fillers, ratings):
43 | fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r))
44 | return fake_profiles
45 |
46 | def BandwagonAttack(self, selected_ids):
47 | filler_candis = list(set(range(self.n_items)) - set([self.target_id] + selected_ids))
48 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
49 | # target & selected patch
50 | fake_profiles[:, [self.target_id] + selected_ids] = self.r_max
51 | # fillers
52 | for i in range(self.attack_num):
53 | if self.fixed_filler_indicator is None:
54 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
55 | else:
56 |
57 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
58 | ratings = np.random.normal(loc=self.global_mean, scale=self.global_std, size=self.filler_num)
59 | for f_id, r in zip(fillers, ratings):
60 | fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r))
61 | return fake_profiles
62 |
63 | def AverageAttack(self):
64 | filler_candis = list(set(range(self.n_items)) - {self.target_id})
65 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
66 | # target
67 | fake_profiles[:, self.target_id] = self.r_max
68 | # fillers
69 | fn_normal = lambda iid: np.random.normal(loc=self.item_means[iid], scale=self.item_stds[iid], size=1)[0]
70 | for i in range(self.attack_num):
71 | if self.fixed_filler_indicator is None:
72 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
73 | else:
74 |
75 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
76 | ratings = map(fn_normal, fillers)
77 | for f_id, r in zip(fillers, ratings):
78 | fake_profiles[i][f_id] = max(math.exp(-5), min(self.r_max, r))
79 | return fake_profiles
80 |
81 | def SegmentAttack(self, selected_ids):
82 | filler_candis = list(set(range(self.n_items)) - set([self.target_id] + selected_ids))
83 | fake_profiles = np.zeros(shape=[self.attack_num, self.n_items], dtype=float)
84 | # target & selected patch
85 | fake_profiles[:, [self.target_id] + selected_ids] = self.r_max
86 | # fillers
87 | for i in range(self.attack_num):
88 | if self.fixed_filler_indicator is None:
89 | fillers = np.random.choice(filler_candis, size=self.filler_num, replace=False)
90 | else:
91 |
92 | fillers = np.where(np.array(self.fixed_filler_indicator[i])== 1)[0]
93 | fake_profiles[i][fillers] = self.r_min
94 | return fake_profiles
95 |
--------------------------------------------------------------------------------
/Leg-UP/run.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/12/27 19:57
3 | # @Author : chensi
4 | # @File : run.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 |
9 | import argparse, os
10 |
11 | os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
12 | os.environ["CUDA_VISIBLE_DEVICES"] = "1, 2, 3"
13 |
14 | PythonCommand = 'python' # 'D:\Anaconda3\envs\py38_tf2\python' if os.path.exists('D:\Anaconda3') else 'python'
15 |
16 |
17 | class Run:
18 | def __init__(self):
19 | self.args = self.parse_args()
20 | self.args.attacker_list = self.args.attacker_list.split(',')
21 | self.args.recommender_list = self.args.recommender_list.split(',')
22 |
23 | def execute(self):
24 |
25 | self.step_1_Rec()
26 |
27 | self.step_2_Attack()
28 |
29 | return
30 |
31 | def parse_args(self):
32 |
33 | parser = argparse.ArgumentParser()
34 | parser.add_argument('--data_set', type=str, default='ml100k') # ml100k,filmTrust,automotive
35 | parser.add_argument('--attack_num', type=int, default=50)
36 | parser.add_argument('--filler_num', type=int, default=36)
37 | parser.add_argument('--cuda_id', type=int, default=3)
38 | parser.add_argument('--use_cuda', type=int, default=0)
39 | parser.add_argument('--batch_size_S', type=int, default=64)
40 | parser.add_argument('--batch_size_D', type=int, default=64)
41 | parser.add_argument("--surrogate", type=str, default="WMF")
42 |
43 |
44 | # ml100k:62,1077,785,1419,1257
45 | # filmTrust:5,395,181,565,254
46 | # automotive:119,422,594,884,1593
47 | parser.add_argument('--target_ids', type=str, default='62')
48 | # AUSH,AUSHplus,RecsysAttacker,DCGAN,WGAN,SegmentAttacker,BandwagonAttacker,AverageAttacker,RandomAttacker
49 | parser.add_argument('--attacker_list', type=str, default='AUSHplus')
50 | # SVD,NMF,SlopeOne,IAutoRec,UAutoRec,NeuMF
51 | parser.add_argument('--recommender_list', type=str, default='SVD,NMF,SlopeOne,IAutoRec,UAutoRec,NeuMF')
52 | return parser.parse_args()
53 |
54 | def step_1_Rec(self):
55 | print('step_1')
56 | args = self.args
57 | """
58 |
59 | data_set/target_ids/train_path/test_path/model_path/target_prediction_path_prefix
60 |
61 | """
62 | args_dict = {
63 | 'exe_model_lib': 'recommender',
64 | 'train_path': './data/%s/%s_train.dat' % (args.data_set, args.data_set),
65 | 'test_path': './data/%s/%s_test.dat' % (args.data_set, args.data_set),
66 | }
67 | args_dict.update(vars(args))
68 |
69 | #
70 | for recommender in args.recommender_list:
71 | #
72 | cur_args_dict = {
73 | 'exe_model_class': recommender,
74 | 'model_path': './results/model_saved/%s/%s_%s' % (args.data_set, args.data_set, recommender),
75 | 'target_prediction_path_prefix': './results/performance/mid_results/%s/%s_%s' % (
76 | args.data_set, args.data_set, recommender),
77 | }
78 | cur_args_dict.update(args_dict)
79 |
80 | args_str = ' '.join(
81 | ["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()])
82 | #
83 | print('%s ./execute_model.py %s' % (PythonCommand, args_str))
84 | print(os.system('%s ./execute_model.py %s' % (PythonCommand, args_str)))
85 |
86 | def step_2_Attack(self):
87 | print('step_2')
88 | args = self.args
89 |
90 | args_dict = {
91 | 'exe_model_lib': 'attacker',
92 | # 'filler_num': 4,
93 | # 'epoch': 50
94 | }
95 | args_dict.update(vars(args))
96 |
97 | for target_id in map(int, args.target_ids.split(',')):
98 | for attacker in args.attacker_list:
99 | cur_args_dict = {
100 | 'exe_model_class': attacker,
101 | 'target_id': target_id,
102 | 'injected_path': './results/data_attacked/%s/%s_%s_%d.data' % (
103 | args.data_set, args.data_set, attacker, target_id)
104 |
105 | }
106 | cur_args_dict.update(args_dict)
107 |
108 | args_str = ' '.join(["--%s %s" % (k, v) for (k, v) in cur_args_dict.items()])
109 | print(os.system('%s ./execute_model.py %s' % (PythonCommand, args_str)))
110 | # break
111 |
112 | model = Run()
113 | model.execute()
114 |
115 |
116 |
--------------------------------------------------------------------------------
/Leg-UP/preprocess_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import random
4 | import pandas as pd
5 | from pathlib import Path
6 | import json
7 | from sklearn.model_selection import train_test_split
8 |
9 | data_file = Path('data')
10 | data_set_name = 'GroceryFood'
11 | data_file = data_file / data_set_name / (data_set_name + 'Raw.json')
12 |
13 | df_gro = pd.DataFrame(columns=['user', 'item', 'score'])
14 | data = []
15 | with open(data_file, encoding='utf-8') as f:
16 | for line in f:
17 | data.append(json.loads(line))
18 | print(f'data_set_len:{len(data)}')
19 | print(f'data head:\n{data[:5]}')
20 |
21 | user_set = set()
22 | item_set = set()
23 | data_list = []
24 | for idx, d in enumerate(data):
25 | item_set.add(d['asin'])
26 | user_set.add(d["reviewerID"])
27 | data_list.append([d["reviewerID"], d['asin'], d['overall']])
28 |
29 | raw_df = pd.DataFrame(data_list, columns=['user', 'item', 'score'])
30 |
31 | user2idx = {x: idx for idx, x in enumerate(user_set)}
32 | item2idx = {x: idx for idx, x in enumerate(item_set)}
33 |
34 |
35 | def fun(item):
36 | return user2idx[item]
37 |
38 | def fun2(item):
39 | return item2idx[item]
40 |
41 | raw_df['user'] = raw_df['user'].apply(fun)
42 | raw_df['item'] = raw_df['item'].apply(fun2)
43 |
44 | print(f'raw data frame:')
45 | print(raw_df)
46 |
47 | user_cont = raw_df.groupby('user').count()
48 | filter_ratings = {i for i in list(user_cont[user_cont['item'] >= 17].index)}
49 |
50 | after_filter_df = pd.DataFrame(columns=['user', 'item', 'score'])
51 |
52 |
53 | all_data = []
54 | for i in filter_ratings:
55 | each_i = raw_df[raw_df['user'] == i]
56 | all_data.append(each_i.values)
57 | after_filter_df = after_filter_df.append(each_i)
58 |
59 | train_list = []
60 | test_list = []
61 | train_df = pd.DataFrame(columns=['user', 'item', 'score'])
62 | test_df = pd.DataFrame(columns=['user', 'item', 'score'])
63 | for d in all_data:
64 | train, test = train_test_split(d, test_size=0.1, random_state=42)
65 | df = pd.DataFrame(train, columns=['user', 'item', 'score'])
66 | df2 = pd.DataFrame(test, columns=['user', 'item', 'score'])
67 | train_df = train_df.append(df)
68 | test_df = test_df.append(df2)
69 | print(f'train_df:{train_df}')
70 | print(f'test_df:{test_df}')
71 |
72 | item_count = raw_df.groupby('item').count().sort_values(by='user', ascending=False)
73 | print(item_count)
74 | target_item_first = [i for i in item_count[:int(0.1 * len(item_count))].index.values]
75 | target_item_last = [i for i in item_count[int(0.9 * len(item_count)):].index.values]
76 | target_item = target_item_first + target_item_last
77 | with open(f'data/{data_set_name}_target_item', 'w') as f:
78 | for i in target_item:
79 | f.write(str(int(i)))
80 | f.write('\n')
81 |
82 | with open(f'data/{data_set_name}_selected_items', 'a+') as f:
83 | for i in target_item:
84 | select_item = [i]
85 | while True:
86 | a = random.choice(target_item_first)
87 | if a not in select_item:
88 | select_item.append(a)
89 | if len(select_item) == 4:
90 | break
91 | f.write(str(select_item[0]) + '\t')
92 | f.write(str(select_item[1]) + ',' + str(select_item[2]) + ',' + str(select_item[3]))
93 | f.write('\n')
94 |
95 |
96 |
97 | user_cont = raw_df.groupby('user').count()
98 | filter_ratings = {i for i in list(user_cont[user_cont['item'] >= 17].index)}
99 |
100 | after_filter_df = pd.DataFrame(columns=['user', 'item', 'score'])
101 |
102 | all_data = []
103 | for i in filter_ratings:
104 | each_i = raw_df[raw_df['user'] == i]
105 | all_data.append(each_i.values)
106 | after_filter_df = after_filter_df.append(each_i)
107 |
108 | # all_data = []
109 | # for i in filter_ratings:
110 | # each_i = raw_df[raw_df['user'] == i]
111 | # all_data.append(each_i.values)
112 | # after_filter_df = after_filter_df.append(each_i)
113 |
114 |
115 |
116 | # dfv = train_df.values
117 | # print(dfv)
118 | # with open(f'data/{data_set_name}_train.dat', 'a', encoding='utf-8') as f:
119 | # for d in dfv:
120 | # for idx, i in enumerate(d):
121 | # if idx != 2:f.write(str(int(i)))
122 | # else : f.write(str(i))
123 | # if idx != 2: f.write('\t')
124 | # f.write('\n')
125 | # dfv = train_df.values
126 | #
127 | # dfv = test_df.values
128 | # with open(f'data/{data_set_name}_test.dat', 'a', encoding='utf-8') as f:
129 | # for d in dfv:
130 | # for idx, i in enumerate(d):
131 | # if idx != 2: f.write(str(int(i)))
132 | # else: f.write(str(i))
133 | # if idx != 2: f.write('\t')
134 | # f.write('\n')
135 |
--------------------------------------------------------------------------------
/AUSH/test_main/main_gan_attack.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/24 11:08
3 | # @Author : chensi
4 | # @File : main_gan_attack.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import sys
9 |
10 | sys.path.append("../")
11 | import os, argparse
12 | from utils.load_data.load_data import load_data
13 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
14 | from utils.load_data.load_attack_info import load_attack_info
15 | from utils.attack.data_to_file import *
16 | import numpy as np
17 |
18 |
19 | # os.environ["CUDA_VISIBLE_DEVICES"] = '2'
20 |
21 |
22 | def gan_attack(data_set_name, attack_method, target_id, is_train, write_to_file=1, final_attack_setting=None):
23 |
24 | path_train = '../data/data/' + data_set_name + '_train.dat'
25 | path_test = '../data/data/' + data_set_name + '_test.dat'
26 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
27 | "../data/data/" + data_set_name + "_target_users"]
28 | model_path = "../result/model_ckpt/" + '_'.join([data_set_name, attack_method, str(target_id)]) + ".ckpt"
29 |
30 |
31 | attack_info = load_attack_info(*attack_info_path)
32 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
33 | sep='\t', print_log=True)
34 |
35 | if len(attack_method.split('_')[1:]) == 2:
36 | attack_num, filler_num = map(int, attack_method.split('_')[1:])
37 | filler_method = 0
38 | else:
39 | attack_num, filler_num, filler_method = map(int, attack_method.split('_')[1:])
40 | selected_items = attack_info[target_id][0]
41 |
42 | #
43 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
44 | selected_id_list=selected_items,
45 | filler_num=filler_num, attack_num=attack_num, filler_method=filler_method)
46 |
47 | fake_profiles, real_profiles, filler_indicator = gan_attacker.execute(is_train=is_train, model_path=model_path,
48 | final_attack_setting=final_attack_setting)
49 | gan_attacker.sess.close()
50 |
51 | # """inject and write to file"""
52 | if write_to_file == 1:
53 | dst_path = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat"
54 | attacked_file_writer(path_train, dst_path, fake_profiles, dataset_class.n_users)
55 | return fake_profiles, real_profiles, filler_indicator
56 |
57 |
58 | def parse_arg():
59 | parser = argparse.ArgumentParser()
60 |
61 | parser.add_argument('--dataset', type=str, default='ml100k', help='filmTrust/ml100k/grocery')
62 |
63 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558]
64 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373]
65 | # 5,395,181,565,254,601,623,619,64,558
66 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
67 | parser.add_argument('--target_ids', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373',
68 | help='attack target list')
69 |
70 | parser.add_argument('--attack_num', type=int, default=50,
71 | help='num of attack fake user,50 for ml100k and filmTrust')
72 |
73 | parser.add_argument('--filler_num', type=int, default=90,
74 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
75 |
76 | parser.add_argument('--filler_method', type=str, default='', help='0/1/2/3')
77 |
78 | parser.add_argument('--write_to_file', type=int, default=1, help='write to fake profile to file or return array')
79 | #
80 | args = parser.parse_args()
81 | #
82 | args.target_ids = list(map(int, args.target_ids.split(',')))
83 | return args
84 |
85 |
86 | if __name__ == '__main__':
87 | """parse args"""
88 | args = parse_arg()
89 | """train"""
90 | is_train = 1
91 | attack_method = '_'.join(['gan', str(args.attack_num), str(args.filler_num), str(args.filler_method)]).strip('_')
92 |
93 | #
94 | for target_id in args.target_ids:
95 |
96 | attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id]))
97 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
98 | real_profiles, filler_indicator = np.load(attackSetting_path + '.npy')
99 | final_attack_setting = [args.attack_num, real_profiles, filler_indicator]
100 |
101 |
102 | _ = gan_attack(args.dataset, attack_method, target_id, is_train,
103 | write_to_file=args.write_to_file,
104 | final_attack_setting=final_attack_setting)
105 |
--------------------------------------------------------------------------------
/AUSH/utils/load_data/load_data.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/22 10:07
3 | # @Author : chensi
4 | # @File : load_data_new.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 |
9 | import pandas as pd
10 | import numpy as np
11 |
12 | from scipy.sparse import csr_matrix
13 |
14 |
15 | class load_data():
16 |
17 | def __init__(self, path_train, path_test,
18 | header=None, sep='\t', threshold=4, print_log=True):
19 | self.path_train = path_train
20 | self.path_test = path_test
21 | self.header = header if header is not None else ['user_id', 'item_id', 'rating']
22 | self.sep = sep
23 | self.threshold = threshold
24 | self.print_log = print_log
25 |
26 | self._main_load()
27 |
28 | def _main_load(self):
29 | # load data
30 | self._load_file()
31 | #
32 | # dataframe to matrix
33 | self.train_matrix, self.train_matrix_implicit = self._data_to_matrix(self.train_data)
34 | self.test_matrix, self.test_matrix_implicit = self._data_to_matrix(self.test_data)
35 |
36 | def _load_file(self):
37 | if self.print_log:
38 | print("load train/test data\t:\n", self.path_train)
39 | self.train_data = pd.read_csv(self.path_train, sep=self.sep, names=self.header, engine='python').loc[:,
40 | ['user_id', 'item_id', 'rating']]
41 | self.test_data = pd.read_csv(self.path_test, sep=self.sep, names=self.header, engine='python').loc[:,
42 | ['user_id', 'item_id', 'rating']]
43 |
44 | self.n_users = len(set(self.test_data.user_id.unique()) | set(self.train_data.user_id.unique()))
45 | self.n_items = len(set(self.test_data.item_id.unique()) | set(self.train_data.item_id.unique()))
46 |
47 | if self.print_log:
48 | print("Number of users:", self.n_users, ",Number of items:", self.n_items, flush=True)
49 | print("Train size:", self.train_data.shape[0], ",Test size:", self.test_data.shape[0], flush=True)
50 |
51 | def _data_to_matrix(self, data_frame):
52 | row, col, rating, implicit_rating = [], [], [], []
53 | for line in data_frame.itertuples():
54 | uid, iid, r = list(line)[1:]
55 | implicit_r = 1 if r >= self.threshold else 0
56 |
57 | row.append(uid)
58 | col.append(iid)
59 | rating.append(r)
60 | implicit_rating.append(implicit_r)
61 |
62 | matrix = csr_matrix((rating, (row, col)), shape=(self.n_users, self.n_items))
63 | matrix_implicit = csr_matrix((implicit_rating, (row, col)), shape=(self.n_users, self.n_items))
64 | return matrix, matrix_implicit
65 |
66 | def get_global_mean_std(self):
67 | return self.train_matrix.data.mean(), self.train_matrix.data.std()
68 |
69 | def get_all_mean_std(self):
70 | flag = 1
71 | for v in ['global_mean', 'global_std', 'item_means', 'item_stds']:
72 | if not hasattr(self, v):
73 | flag = 0
74 | break
75 | if flag == 0:
76 | global_mean, global_std = self.get_global_mean_std()
77 | item_means, item_stds = [global_mean] * self.n_items, [global_std] * self.n_items
78 | train_matrix_t = self.train_matrix.transpose()
79 | for iid in range(self.n_items):
80 | item_vec = train_matrix_t.getrow(iid).toarray()[0]
81 | ratings = item_vec[np.nonzero(item_vec)]
82 | if len(ratings) > 0:
83 | item_means[iid], item_stds[iid] = ratings.mean(), ratings.std()
84 | self.global_mean, self.global_std, self.item_means, self.item_stds \
85 | = global_mean, global_std, item_means, item_stds
86 | return self.global_mean, self.global_std, self.item_means, self.item_stds
87 |
88 | def get_item_pop(self):
89 | # item_pops = [0] * self.n_items
90 | # train_matrix_t = self.train_matrix.transpose()
91 | # for iid in range(self.n_items):
92 | # item_vec = train_matrix_t.getrow(iid).toarray()[0]
93 | # item_pops[iid] = len(np.nonzero(item_vec)[0])
94 | item_pops_dict = dict(self.train_data.groupby('item_id').size())
95 | item_pops = [0] * self.n_items
96 | for iid in item_pops_dict.keys():
97 | item_pops[iid] = item_pops_dict[iid]
98 | return item_pops
99 |
100 | def get_user_nonrated_items(self):
101 | non_rated_indicator = self.train_matrix.toarray()
102 | non_rated_indicator[non_rated_indicator > 0] = 1
103 | non_rated_indicator = 1 - non_rated_indicator
104 | user_norated_items = {}
105 | for uid in range(self.n_users):
106 | user_norated_items[uid] = list(non_rated_indicator[uid].nonzero()[0])
107 | return user_norated_items
108 |
109 | def get_item_nonrated_users(self, item_id):
110 | item_vec = np.squeeze(self.train_matrix[:, item_id].toarray())
111 | # item_vec = self.train_matrix.toarray().transpose()[item_id]
112 | item_vec[item_vec > 0] = 1
113 | non_rated_indicator = 1 - item_vec
114 | return list(non_rated_indicator.nonzero()[0])
115 |
--------------------------------------------------------------------------------
/Leg-UP/utils/utils.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import numpy as np
4 | import torch
5 | from scipy import sparse
6 |
7 | EPSILON = 1e-12
8 | _fixed_target_items = {
9 | "head": np.asarray([259, 2272, 3010, 6737, 7690]),
10 | "tail": np.asarray([5611, 9213, 10359, 10395, 12308]),
11 | "upper_torso": np.asarray([1181, 1200, 2725, 4228, 6688]),
12 | "lower_torso": np.asarray([3227, 5810, 7402, 9272, 10551])
13 | }
14 |
15 |
16 | def sample_target_items(train_data, n_samples, popularity, use_fix=False):
17 | """Sample target items with certain popularity."""
18 | if popularity not in ["head", "upper_torso", "lower_torso", "tail"]:
19 | raise ValueError("Unknown popularity type {}.".format(popularity))
20 |
21 | n_items = train_data.shape[1] # 14007
22 | all_items = np.arange(n_items) # [0, 1, 2, ... , 14006]
23 | item_clicks = train_data.toarray().sum(0)
24 |
25 | valid_items = []
26 | if use_fix:
27 | valid_items = _fixed_target_items[popularity]
28 | else:
29 | bound_head = np.percentile(item_clicks, 95)
30 | bound_torso = np.percentile(item_clicks, 75)
31 | bound_tail = np.percentile(item_clicks, 50)
32 | if popularity == "head":
33 | valid_items = all_items[item_clicks > bound_head]
34 | elif popularity == "tail":
35 | valid_items = all_items[item_clicks < bound_tail]
36 | elif popularity == "upper_torso":
37 | valid_items = all_items[(item_clicks > bound_torso) & (item_clicks < bound_head)]
38 | elif popularity == "lower_torso":
39 | valid_items = all_items[(item_clicks > bound_tail) & (item_clicks < bound_torso)]
40 |
41 | if len(valid_items) < n_samples:
42 | raise ValueError("Cannot sample enough items that meet criteria.")
43 |
44 | np.random.shuffle(valid_items)
45 | sampled_items = valid_items[:n_samples]
46 | sampled_items.sort()
47 | print("Sampled target items: {}".format(sampled_items.tolist()))
48 |
49 | return sampled_items
50 |
51 |
52 | def set_seed(seed, cuda=False):
53 | """Set seed globally."""
54 | np.random.seed(seed)
55 | random.seed(seed)
56 | if cuda:
57 | torch.cuda.manual_seed(seed)
58 | torch.backends.cudnn.deterministic = True
59 | else:
60 | torch.manual_seed(seed)
61 |
62 |
63 | def minibatch(*tensors, **kwargs):
64 | """Mini-batch generator for pytorch tensor."""
65 | batch_size = kwargs.get('batch_size', 128) # 2048
66 |
67 | if len(tensors) == 1: # √
68 | tensor = tensors[0]
69 | for i in range(0, len(tensor), batch_size): # len(tensor) = 14007
70 | yield tensor[i:i + batch_size]
71 | else:
72 | for i in range(0, len(tensors[0]), batch_size):
73 | yield tuple(x[i:i + batch_size] for x in tensors)
74 |
75 |
76 | def shuffle(*arrays, **kwargs):
77 | """Shuffle arrays."""
78 | require_indices = kwargs.get('indices', False)
79 |
80 | if len(set(len(x) for x in arrays)) != 1:
81 | raise ValueError('All inputs to shuffle must have '
82 | 'the same length.')
83 |
84 | shuffle_indices = np.arange(len(arrays[0]))
85 | np.random.shuffle(shuffle_indices)
86 |
87 | if len(arrays) == 1:
88 | result = arrays[0][shuffle_indices]
89 | else:
90 | result = tuple(x[shuffle_indices] for x in arrays)
91 |
92 | if require_indices:
93 | return result, shuffle_indices
94 | else:
95 | return result
96 |
97 |
98 | def sparse2tensor(sparse_data):
99 | """Convert sparse csr matrix to pytorch tensor."""
100 | return torch.FloatTensor(sparse_data.toarray())
101 |
102 |
103 | def tensor2sparse(tensor):
104 | """Convert pytorch tensor to sparse csr matrix."""
105 | return sparse.csr_matrix(tensor.detach().cpu().numpy())
106 |
107 |
108 | def stack_csrdata(data1, data2):
109 | """Stack two sparse csr matrix."""
110 | return sparse.vstack((data1, data2), format="csr")
111 |
112 |
113 | def save_fake_data(fake_data, path):
114 | """Save fake data to file."""
115 | file_path = "%s.npz" % path
116 | print("Saving fake data to {}".format(file_path))
117 | sparse.save_npz(file_path, fake_data)
118 | return file_path
119 |
120 |
121 | def load_fake_data(file_path):
122 | """Load fake data from file."""
123 | fake_data = sparse.load_npz(file_path)
124 | print("Loaded fake data from {}".format(file_path))
125 | return fake_data
126 |
127 |
128 | def save_checkpoint(model, optimizer, path, epoch=-1):
129 | """Save model checkpoint and optimizer state to file."""
130 | state = {
131 | "epoch": epoch,
132 | "state_dict": model.state_dict(),
133 | "optimizer": optimizer.state_dict(),
134 | }
135 | file_path = "%s.pt" % path
136 | print("Saving checkpoint to {}".format(file_path))
137 | torch.save(state, file_path)
138 |
139 |
140 | def load_checkpoint(path):
141 | """Load model checkpoint and optimizer state from file."""
142 | file_path = "%s.pt" % path
143 | state = torch.load(file_path, map_location=torch.device('cpu'))
144 | print("Loaded checkpoint from {} (epoch {})".format(
145 | file_path, state["epoch"]))
146 | return state["epoch"], state["state_dict"], state["optimizer"]
147 |
--------------------------------------------------------------------------------
/AUSH/model/trainer_rec_surprise.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 15:24
3 | # @Author : chensi
4 | # @File : cf.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import os
9 | from surprise import Dataset, Reader, accuracy
10 | from surprise import SVD, SVDpp, NMF, KNNBasic, KNNWithMeans, KNNWithZScore
11 | from surprise.model_selection import PredefinedKFold
12 | from collections import defaultdict
13 |
14 |
15 | def get_top_n(predictions, n=50):
16 | # First map the predictions to each user.
17 | top_n = defaultdict(list)
18 | for uid, iid, true_r, est, _ in predictions:
19 | top_n[uid].append((iid, est))
20 | # Then sort the predictions for each user and retrieve the k highest ones.
21 | for uid, user_ratings in top_n.items():
22 | user_ratings.sort(key=lambda x: x[1], reverse=True)
23 | top_n[uid] = user_ratings[:n]
24 | return top_n
25 |
26 |
27 | def get_model(model_name):
28 | algo = None
29 | if 'KNN' in model_name:
30 | model_name = model_name.split('_')
31 | knn_model_name = model_name[0]
32 | user_based = False if len(model_name) > 1 and model_name[1] == 'I' else True
33 | dis_method = 'msd' if len(model_name) < 3 else model_name[2]
34 | k = 20 if len(model_name) < 4 else int(model_name[3])
35 | sim_options = {'user_based': user_based, 'name': dis_method}
36 | if knn_model_name == 'KNNBasic':
37 | algo = KNNBasic(sim_options=sim_options, k=k)
38 | elif knn_model_name == 'KNNWithMeans':
39 | algo = KNNWithMeans(sim_options=sim_options, k=k)
40 | elif knn_model_name == 'KNNWithZScore':
41 | algo = KNNWithZScore(sim_options=sim_options, k=k)
42 | elif 'SVDpp' in model_name or 'SVD' in model_name or 'NMF' in model_name:
43 | model_name = model_name.split('_')
44 | n_factors = 25 if len(model_name) == 1 else int(model_name[1])
45 | if model_name[0] == 'SVDpp':
46 | algo = SVDpp(n_factors=n_factors)
47 | elif model_name[0] == 'SVD':
48 | algo = SVD(n_factors=n_factors)
49 | elif model_name[0] == 'NMF':
50 | algo = NMF(n_factors=n_factors)
51 | return algo
52 |
53 |
54 | def get_model_old(model_name):
55 | algo = None
56 | if model_name == 'KNNBasic_U':
57 | sim_options = {'user_based': True}
58 | algo = KNNBasic(sim_options=sim_options, k=20)
59 | elif model_name == 'KNNBasic_I':
60 | sim_options = {'user_based': False}
61 | algo = KNNBasic(sim_options=sim_options, k=20)
62 | # algo = KNNBasic()
63 | elif model_name == 'KNNWithMeans_I':
64 | algo = KNNWithMeans(sim_options={'user_based': False}, k=20)
65 | elif model_name == 'KNNWithMeans_U':
66 | algo = KNNWithMeans(sim_options={'user_based': True}, k=20)
67 | elif model_name == 'KNNWithZScore_I':
68 | algo = KNNWithZScore(sim_options={'user_based': False}, k=20)
69 | elif model_name == 'KNNWithZScore_U':
70 | algo = KNNWithZScore(sim_options={'user_based': True}, k=20)
71 | elif model_name == 'SVDpp':
72 | algo = SVDpp()
73 | elif model_name == 'SVD':
74 | algo = SVD()
75 | elif model_name == 'NMF':
76 | algo = NMF()
77 | elif 'NMF_' in model_name:
78 | n_factors = int(model_name.split("_")[1])
79 | algo = NMF(n_factors=n_factors)
80 | elif 'SVDpp_' in model_name:
81 | n_factors = int(model_name.split("_")[1])
82 | algo = SVDpp(n_factors=n_factors)
83 | elif 'SVD_' in model_name:
84 | n_factors = int(model_name.split("_")[1])
85 | algo = SVD(n_factors=n_factors)
86 | elif 'KNNBasic_U_' in model_name:
87 | k = int(model_name.split("_")[-1])
88 | sim_options = {'user_based': True}
89 | algo = KNNBasic(sim_options=sim_options, k=k)
90 | elif 'KNNBasic_I_' in model_name:
91 | k = int(model_name.split("_")[-1])
92 | sim_options = {'user_based': False}
93 | algo = KNNBasic(sim_options=sim_options, k=k)
94 | return algo
95 |
96 |
97 | def basic_rec(model_name, train_path, test_path, target_id):
98 | # build data
99 | # TODO check float and min_r
100 | reader = Reader(line_format='user item rating', sep='\t', rating_scale=(1, 5))
101 | data = Dataset.load_from_folds([(train_path, test_path)], reader=reader)
102 | trainset, testset = None, None
103 | pkf = PredefinedKFold()
104 | for trainset_, testset_ in pkf.split(data):
105 | trainset, testset = trainset_, testset_
106 |
107 | # train model
108 | rec_algo = get_model(model_name)
109 | rec_algo.fit(trainset)
110 | # eval
111 | preds = rec_algo.test(testset)
112 | rmse = accuracy.rmse(preds, verbose=True)
113 |
114 | # predor target
115 | fn_pred = lambda uid: rec_algo.predict(str(uid), str(target_id), r_ui=0).est
116 | target_predictions = list(map(fn_pred, range(trainset.n_users)))
117 |
118 | # topn
119 | testset = trainset.build_anti_testset()
120 | predictions = rec_algo.test(testset)
121 | top_n = get_top_n(predictions, n=50)
122 |
123 | hit_ratios = {}
124 | for uid, user_ratings in top_n.items():
125 | topN = [int(iid) for (iid, _) in user_ratings]
126 | hits = [1 if target_id in topN[:i] else 0 for i in [1, 3, 5, 10, 20, 50]]
127 | hit_ratios[int(uid)] = hits
128 | return target_predictions, hit_ratios
129 |
--------------------------------------------------------------------------------
/AUSH/test_main/main_gan_attack_baseline.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/24 11:08
3 | # @Author : chensi
4 | # @File : main_gan_attack_baseline.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import sys
9 |
10 | sys.path.append("../")
11 | import os, argparse
12 | from utils.load_data.load_data import load_data
13 | from model.attack_model.gan_attack.trainer_baseline import Train_G_Attacker
14 | from utils.load_data.load_attack_info import load_attack_info
15 | from utils.attack.data_to_file import *
16 | import numpy as np
17 |
18 |
19 | # os.environ["CUDA_VISIBLE_DEVICES"] = '2'
20 |
21 |
22 | def gan_attack(data_set_name, attack_method, target_id, is_train, write_to_file=1, final_attack_setting=None):
23 | path_train = '../data/data/' + data_set_name + '_train.dat'
24 | path_test = '../data/data/' + data_set_name + '_test.dat'
25 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
26 | "../data/data/" + data_set_name + "_target_users"]
27 |
28 | attack_info = load_attack_info(*attack_info_path)
29 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
30 | sep='\t', print_log=True)
31 |
32 | if len(attack_method.split('_')[1:]) == 2:
33 | attack_num, filler_num = map(int, attack_method.split('_')[1:])
34 | filler_method = 0
35 | else:
36 | attack_num, filler_num, filler_method = map(int, attack_method.split('_')[1:])
37 |
38 | loss_setting = int(attack_method.split('_')[0][-1])
39 | selected_items = attack_info[target_id][0]
40 | model_path = "../result/model_ckpt/" + '_'.join([data_set_name, attack_method, str(target_id)]) + ".ckpt"
41 |
42 | #
43 | gan_attacker = Train_G_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
44 | selected_id_list=selected_items,
45 | filler_num=filler_num, attack_num=attack_num, filler_method=filler_method,
46 | loss_setting=loss_setting)
47 | # if is_train:
48 | # fake_profiles = gan_attacker.execute(is_train=True, model_path=model_path)
49 | # else:
50 | # fake_profiles, real_profiles = gan_attacker.execute(is_train=False, model_path=model_path)
51 | # if write_to_file == 0:
52 | # return fake_profiles, real_profiles
53 | fake_profiles, real_profiles, filler_indicator = gan_attacker.execute(is_train=is_train, model_path=model_path,
54 | final_attack_setting=final_attack_setting)
55 | gan_attacker.sess.close()
56 | # """inject and write to file"""
57 | if write_to_file == 1:
58 | dst_path = "../data/data_attacked/" + '_'.join([data_set_name, str(target_id), attack_method]) + ".dat"
59 | attacked_file_writer(path_train, dst_path, fake_profiles, dataset_class.n_users)
60 | return fake_profiles, real_profiles, filler_indicator
61 |
62 |
63 | def parse_arg():
64 | parser = argparse.ArgumentParser()
65 |
66 | parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/grocery')
67 |
68 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558]
69 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373]
70 | # 5,395,181,565,254,601,623,619,64,558
71 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
72 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
73 | # 88,22,122,339,1431,1141,1656,477,1089,866
74 | parser.add_argument('--target_ids', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866',
75 | help='attack target list')
76 |
77 | parser.add_argument('--attack_num', type=int, default=50,
78 | help='num of attack fake user,50 for ml100k and filmTrust')
79 |
80 | parser.add_argument('--filler_num', type=int, default=4,
81 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
82 |
83 | parser.add_argument('--filler_method', type=str, default='', help='0/1/2/3')
84 |
85 | parser.add_argument('--write_to_file', type=int, default=1, help='write to fake profile to file or return array')
86 |
87 | parser.add_argument('--loss', type=int, default=1, help='0:reconstruction,1:reconstruction+seed')
88 | #
89 | args = parser.parse_args()
90 | #
91 | args.target_ids = list(map(int, args.target_ids.split(',')))
92 | return args
93 |
94 |
95 | if __name__ == '__main__':
96 | """parse args"""
97 | args = parse_arg()
98 | """train"""
99 | is_train = 1
100 | attack_method = '_'.join(
101 | ['G' + str(args.loss), str(args.attack_num), str(args.filler_num), str(args.filler_method)]).strip('_')
102 | #
103 | for target_id in args.target_ids:
104 |
105 | attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id]))
106 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
107 | real_profiles, filler_indicator = np.load(attackSetting_path + '.npy')
108 | final_attack_setting = [args.attack_num, real_profiles, filler_indicator]
109 |
110 |
111 | _ = gan_attack(args.dataset, attack_method, target_id, is_train,
112 | write_to_file=args.write_to_file,
113 | final_attack_setting=final_attack_setting)
114 |
115 | # gan_attack(args.dataset, attack_method, args.target_id, is_train, write_to_file=args.write_to_file)
116 |
--------------------------------------------------------------------------------
/data/automotive/automotive_target_users:
--------------------------------------------------------------------------------
1 | 22 2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,52,2731,2732,2733,2736,57,2737,2738,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,2739,1750,855,473,2740,2521,1885,2269,2743,2546,2547,2674,1270,2746,2427,2172,2747,2174
2 | 88 1043,2586,2591,547,38,1578,52,2103,59,1600,579,2635,85,101,2661,1131,2668,1140,1157,1670,2695,649,2700,1176,677,683,1708,2735,2743,186,191,2755,712,1758,741,1253,1255,239,244,2814,1791,2824,2825,266,272,1808,1298,2834,280,1820,2845,2849,2850,2851,2852,2853,2854,2855,2359,825,1849,318,1863,334,846,2384,849,2388,2394,872,892,2469,1965,950,1980,2502,2529,1510,1514,502
3 | 119 28,101,272,288,301,316,341,378,449,532,659,663,698,705,731,744,958,1076,1104,1172,1292,1313,1323,1341,1465,1469,1473,1488,1573,1644,1758,1893,1958,1975,1978,2082,2164,2166,2191,2235,2338,2389,2535,2544,2545,2546,2548,2549,2550,2551
4 | 122 521,1547,2584,25,2586,2587,1052,28,1572,2603,1076,57,1089,580,2629,1094,1097,589,79,2644,96,2155,1644,2162,2675,2167,2172,1664,645,2181,2182,1672,655,1168,1689,1178,2717,1697,2721,2723,2724,2727,1704,2729,2731,1709,175,2736,2737,2738,179,180,2739,2740,2741,2749,2752,2241,198,2246,1225,725,221,2269,2282,238,1777,266,1303,296,1839,310,2362,2882,2889,842,331,2901,855,1879,2903,1887,2917,1895,2922,877,2427,900,1930,1931,2473,1972,1977,2493,461,2005,473,475,2533,1515,2546,2547,1528,1023
5 | 339 769,255,1032,10,1291,145,533,1302,1048,161,1313,1314,1315,1316,1317,295,296,1318,1319,43,684,1320,1321,1322,1323,305,1325,1326,1327,1328,1329,311,1330,1331,1332,571,1333,1334,1214,1335,1336,1337,1338,195,835,1340,1341,1342,1343,1344,1345,1346,1347,1229,1350,1351,1352,1339,1353,1354,1355,347,608,613,102,1254,361,754,1142,889,1147,508,1348
6 | 422 67,77,96,97,99,105,121,128,141,157,171,220,229,232,236,250,271,272,273,297,300,349,358,369,384,390,395,402,403,449,467,484,529,635,663,675,684,690,706,727,730,732,762,763,764,767,776,797,855,857,861,862,896,903,909,913,933,1012,1030,1036,1069,1077,1080,1106,1114,1124,1164,1193,1276,1291,1294,1307,1332,1333,1339,1347,1352,1383,1385,1386,1391,1410,1454,1473,1480,1494,1562,1579,1580,1593,1640,1643,1665,1670,1704,1710,1841,1845,1857,1876,1916,1920,1926,1956,1982,1997,2001,2010,2014,2037,2039,2050,2057,2086,2094,2124,2137,2153,2208,2236,2269,2270,2271,2272,2273,2274,2275,2276,2277,2279,2280,2282,2283,2284,2285,2286,2287,2288,2289,2290,2291,2292,2293,2294,2295,2296,2297,2298,2299,2301,2302
7 | 477 2437,1032,2572,1804,2190,1685,406,1046,1691,2715,1693,417,2338,419,548,1573,1955,937,1321,1325,1070,2733,2734,1970,563,308,1075,1971,185,314,2105,1340,1983,1474,2755,1733,967,1875,1109,2005,2006,2390,345,2521,2778,2141,1889,1507,612,2019,2278,1511,2661,2664,508,366,2159,1649,114,242,886,2550,2300,1662,2559
8 | 594 52,59,85,101,191,239,244,266,272,280,318,334,502,547,649,677,683,712,741,846,849,872,892,950,1043,1131,1140,1176,1253,1255,1298,1510,1514,1578,1600,1670,1708,1758,1791,1808,1820,1849,1965,2103,2359,2384,2388,2394,2469,2502,2529,2586,2591,2635,2661,2668,2700,2735,2743,2755,2814,2824,2834,2845,2849,2850,2851,2852,2853,2854,2855
9 | 866 1537,1411,1415,136,1416,1417,398,1424,1937,915,1555,1173,1429,1939,2838,2842,1435,1440,1696,1443,1444,1445,1448,1066,300,1455,1712,433,1715,185,1465,1595,2874,1725,2875,1471,1727,1090,67,1475,1731,1479,2119,2504,2634,75,203,205,206,2123,2760,2765,1363,212,1367,1368,1499,221,1123,2406,2409,2157,1902,2030,1904,2158,1906,1395,2034,2037,1398,2673,1656,1402,1404,1405
10 | 884 25,28,57,79,175,179,180,198,221,238,266,296,310,331,461,473,475,580,589,645,655,725,842,855,877,900,1052,1076,1089,1094,1097,1168,1178,1225,1303,1515,1528,1547,1572,1644,1672,1689,1697,1704,1709,1777,1839,1887,1895,1930,1931,1972,1977,2005,2155,2162,2167,2172,2181,2182,2241,2246,2269,2282,2362,2427,2473,2493,2533,2546,2547,2584,2587,2603,2629,2644,2675,2717,2721,2723,2724,2727,2729,2731,2736,2737,2738,2739,2740,2741,2749,2752,2882,2889,2901,2903,2917,2922
11 | 1089 2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,52,2731,2732,2733,2736,57,2737,2738,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,2739,1750,855,473,2740,2521,1885,2269,2743,2546,2547,2674,1270,2746,2427,2172,2747,2174
12 | 1141 2181,2694,2696,1170,2582,1303,1175,2585,25,2717,2718,1950,2720,1697,2721,414,2719,2722,2723,2724,1704,2473,2725,2727,2728,1709,174,2606,2729,2609,1842,2730,2731,2732,2733,2736,2737,57,2738,2739,2748,2749,2741,2742,64,1985,2745,2627,1348,2628,198,2750,1742,2644,1750,855,473,2740,2521,1885,2269,2546,2547,2674,1270,2746,2427,2172,2747,2174
13 | 1431 770,3,1926,2569,2570,2571,2572,909,2573,2574,2575,1553,914,1943,1048,2457,153,27,1531,797,2465,1315,2467,2086,297,555,1580,1326,1711,1328,435,564,1331,1207,952,2492,195,835,1220,1347,2501,2120,1353,1098,1994,2250,2377,1230,211,1235,1237,726,1878,2009,220,2271,2274,874,2283,2285,367,754,371,1268,1653,2291,2292,2296,505,2299,764,893,2302
14 | 1593 2,3,69,95,132,185,193,201,203,205,210,212,216,217,220,221,235,253,297,353,395,398,399,433,436,438,447,454,500,545,552,640,775,838,1161,1219,1279,1365,1374,1376,1378,1388,1408,1413,1414,1427,1431,1438,1441,1456,1467,1479,1486,1551,1558,1591,1592,1593,1635,1636,1638,1696,1710,1719,1901,1904,1905,1912,1918,1924,1926,1992,2032,2037,2043,2087,2122,2127,2147,2148,2150,2151,2156,2160,2344,2410,2412,2413,2499,2503,2581,2633,2673,2818,2838
15 | 1656 2,3,1551,2581,1558,545,2087,552,1591,1592,1593,69,2633,2122,2127,95,1635,1636,2147,1638,2148,2150,2151,2156,2160,2673,640,132,1161,1696,1710,1719,185,193,1219,201,203,205,210,212,216,217,220,235,253,1279,2818,775,2838,2344,297,838,1365,1374,1376,353,1378,2410,1388,1901,2412,2413,1904,1905,1912,1918,1408,1924,1413,1414,1926,1417,395,398,399,1427,1431,1438,1441,1456,433,436,438,1467,447,2499,454,1479,1992,2503,1486,2032,500,2037,2043
--------------------------------------------------------------------------------
/AUSH/model/attack_model/gan_attack_copy/models.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2020/9/18 13:52
3 | # @Author : chensi
4 | # @File : models.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | try:
9 | import tensorflow.compat.v1 as tf
10 |
11 | tf.disable_v2_behavior()
12 | except:
13 | import tensorflow as tf
14 | import math
15 |
16 |
17 | # import math
18 | class CopyGanAttacker:
19 | def __init__(self, dataset_class, target_id, filler_num, attack_num, filler_method):
20 | # data set info
21 | self.dataset_class = dataset_class
22 | self.num_user = dataset_class.n_users
23 | self.num_item = dataset_class.n_items
24 | self.rating_matrix = dataset_class.train_matrix.toarray() # tf.constant()
25 |
26 | # attack info
27 | self.target_id = target_id
28 | self.filler_num = filler_num
29 | self.attack_num = attack_num
30 | self.filler_method = filler_method
31 |
32 | def build_model(self):
33 | # define place_holder
34 | # self.user_vector = tf.placeholder(tf.int32, [None, self.num_item])
35 | # self.item_vector = tf.placeholder(tf.int32, [None, self.num_item])
36 | self.sampled_template = tf.placeholder(tf.int32, [self.args.batch_size, self.num_item])
37 | self.batch_filler_index = tf.placeholder(tf.int32, [None, self.args.batch_size])
38 | # user/item embedding
39 | # c = tf.constant(c)
40 | user_embedding = self.towerMlp(self.rating_matrix, self.num_item, self.args.embedding_dim)
41 | item_embedding = self.towerMlp(self.rating_matrix.transpose(), self.num_user, self.args.embedding_dim)
42 |
43 | """
44 | copy net
45 | p_copy(j)=sigmoid (w x j’s item embedding + w x u’s user embedding + b)"""
46 | with tf.name_scope("copyNet"):
47 | w1 = tf.get_variable('w1', [self.args.embedding_dim, self.num_item])
48 | p1 = tf.matmul(tf.nn.embedding_lookup(user_embedding, self.batch_filler_index), w1) # batch*item_num
49 | w2 = tf.get_variable('w2', [self.args.embedding_dim, 1])
50 | p2 = tf.matmul(item_embedding, w2) # item_num*1
51 | b = tf.get_variable('b', [self.item_num])
52 | copy_prob = tf.nn.sigmoid(p1 + p2 + b) # batch*item_num
53 | """
54 | generate net
55 | p_gen(j=r)
56 | """
57 | with tf.name_scope("genNet"):
58 | gen_probabilitiy_list = []
59 | for i in range(5):
60 | with tf.name_scope("s_%d" % i):
61 | w1 = tf.get_variable('w1', [self.args.embedding_dim, self.num_item])
62 | p1 = tf.matmul(tf.nn.embedding_lookup(user_embedding, self.batch_filler_index),
63 | w1) # batch*item_num
64 | w2 = tf.get_variable('w2', [self.args.embedding_dim, 1])
65 | p2 = tf.matmul(item_embedding, w2) # item_num*1
66 | b = tf.get_variable('b', [self.item_num])
67 | gen_probability = p1 + p2 + b
68 | gen_probabilitiy_list.append(tf.expand_dims(gen_probability, 2)) # batch*item_num*1
69 | gen_rating_distri = tf.nn.softmax(tf.concat(gen_probabilitiy_list, axis=2)) # batch*item_num*5
70 | """
71 | Rating
72 | rating p(r) = p_copy(j) x p_copy(j=r) + (1-p_copy(j)) x p_gen(j=r)
73 | """
74 | copy_rating_distri = tf.reshape(tf.expand_dims(tf.one_hot(self.sampled_template, 5), 3),
75 | [self.args.batch_size, -1, 5])
76 | rating_distri = copy_prob * copy_rating_distri + (1 - copy_prob) * gen_rating_distri # batch*item_num*5
77 | rating_value = tf.tile(tf.constant([[[1., 2., 3., 4., 5.]]]), [self.args.batch_size, self.num_item, 1])
78 | fake_profiles = tf.reduce_sum(rating_distri * rating_value, 2)
79 |
80 | """
81 | loss function
82 | """
83 | with tf.name_scope("Discriminator"):
84 | D_real = self.towerMlp(self.sampled_template, self.num_item, 1)
85 | D_fake = self.towerMlp(fake_profiles, self.num_item, 1)
86 |
87 | """
88 | loss function
89 | """
90 | with tf.name_scope("loss_D"):
91 | d_loss_real = tf.reduce_mean(
92 | tf.nn.sigmoid_cross_entropy_with_logits(logits=D_real, labels=tf.ones_like(D_real)),
93 | name="loss_real")
94 | d_loss_fake = tf.reduce_mean(
95 | tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.zeros_like(D_fake)),
96 | name="loss_fake")
97 | loss_D = d_loss_real + d_loss_fake
98 | with tf.name_scope("loss_G"):
99 | # reconstruction loss
100 | loss_rec = tf.reduce_mean(tf.square(fake_profiles - self.sampled_template))
101 | # adversial loss
102 | loss_adv = tf.reduce_mean(
103 | tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.ones_like(D_fake)))
104 | loss_G = loss_rec + loss_adv
105 |
106 | def towerMlp(self, input, inputDim, outputDim):
107 | dim, x = inputDim // 2, input
108 | while dim > outputDim:
109 | layer = tf.layers.dense(
110 | inputs=x,
111 | units=dim,
112 | kernel_initializer=tf.random_normal_initializer,
113 | activation=tf.nn.relu,
114 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate))
115 | dim, x = dim // 2, layer
116 | output = tf.layers.dense(
117 | inputs=x,
118 | units=outputDim,
119 | kernel_initializer=tf.random_normal_initializer,
120 | activation=tf.nn.sigmoid,
121 | kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=self.reg_rate))
122 | return output
123 |
--------------------------------------------------------------------------------
/AUSH/test_main/main_eval_attack.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/24 10:05
3 | # @Author : chensi
4 | # @File : main_eval_attack.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 | import sys, argparse
8 | import numpy as np
9 | import pandas as pd
10 |
11 | sys.path.append("../")
12 | from utils.load_data.load_data import load_data
13 | from utils.load_data.load_attack_info import *
14 |
15 |
16 | def attack_evaluate(real_preds_path, attacked_preds_file, non_rated_users, target_users):
17 | #
18 | names = ['uid', 'rating', 'HR_1', 'HR_3', 'HR_5', 'HR_10', 'HR_20', 'HR_50']
19 | real_preds = pd.read_csv(real_preds_path, sep='\t', names=names, engine='python')
20 | attacked_preds = pd.read_csv(attacked_preds_file, sep='\t', names=names, engine='python')
21 | # pred
22 | shift_target = np.mean(attacked_preds.iloc[target_users, 1].values - real_preds.iloc[target_users, 1].values)
23 | shift_all = np.mean(attacked_preds.iloc[non_rated_users, 1].values - real_preds.iloc[non_rated_users, 1].values)
24 | #
25 | HR_real_target = real_preds.iloc[target_users, range(2, 8)].mean().values
26 | HR_real_all = real_preds.iloc[non_rated_users, range(2, 8)].mean().values
27 |
28 | HR_attacked_target = attacked_preds.iloc[target_users, range(2, 8)].mean().values
29 | HR_attacked_all = attacked_preds.iloc[non_rated_users, range(2, 8)].mean().values
30 | return shift_target, HR_real_target, HR_attacked_target, shift_all, HR_real_all, HR_attacked_all
31 |
32 |
33 | def eval_attack(data_set_name, rec_model_name, attack_method, target_id):
34 | dir = "../result/pred_result/"
35 | real_preds_path = dir + '_'.join([rec_model_name, data_set_name, str(target_id)])
36 | attacked_preds_file = real_preds_path + "_" + attack_method
37 | """
38 | ml100k
39 | """
40 | if data_set_name == 'ml100k':
41 | path_train = "../data/data/ml100k_train.dat"
42 | path_test = "../data/data/ml100k_test.dat"
43 | attack_info_path = ["../data/data/ml100k_selected_items", "../data/data/ml100k_target_users"]
44 | elif data_set_name == 'filmTrust':
45 | path_train = "../data/data/filmTrust_train.dat"
46 | path_test = "../data/data/filmTrust_test.dat"
47 | attack_info_path = ["../data/data/filmTrust_selected_items", "../data/data/filmTrust_target_users"]
48 |
49 | else:
50 | path_train = "../data/data/" + data_set_name + "_train.dat"
51 | path_test = "../data/data/" + data_set_name + "_test.dat"
52 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
53 | "../data/data/" + data_set_name + "_target_users"]
54 |
55 | attack_info = load_attack_info(*attack_info_path)
56 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
57 | sep='\t', print_log=False)
58 |
59 | #
60 | target_users = attack_info[target_id][1]
61 | non_rated_users = dataset_class.get_item_nonrated_users(target_id)
62 | #
63 | res = attack_evaluate(real_preds_path, attacked_preds_file, non_rated_users, target_users)
64 | #
65 | target, all = res[:3], res[3:]
66 | target_str = '\t'.join([str(target[0]), '\t'.join(map(str, target[1])), '\t'.join(map(str, target[2]))])
67 | all_str = '\t'.join([str(all[0]), '\t'.join(map(str, all[1])), '\t'.join(map(str, all[2]))])
68 |
69 | # info
70 | info = '\t'.join([rec_model_name, attack_method, str(target_id)])
71 | # print(info + '\t' + target_str + '\t' + all_str)
72 | return info + '\t' + target_str + '\t' + all_str
73 |
74 |
75 | def parse_arg():
76 | parser = argparse.ArgumentParser()
77 |
78 | parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/office')
79 |
80 | parser.add_argument('--attack_num', type=int, default=50, help='50 for ml100k and filmTrust')
81 |
82 | parser.add_argument('--filler_num', type=int, default=4, help='90 for ml100k,36 for filmTrust')
83 |
84 | parser.add_argument('--attack_methods', type=str, default='G0,G1',
85 | help='gan,G0,G1,segment,average,random,bandwagon')
86 |
87 | parser.add_argument('--rec_model_names', type=str, default='NNMF,IAutoRec,UAutoRec,NMF_25',
88 | help='NNMF,IAutoRec,UAutoRec,NMF_25')
89 |
90 | # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5
91 | # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5
92 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
93 | # 88,22,122,339,1431,1141,1656,477,1089,866
94 | parser.add_argument('--target_ids', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866',
95 | help='target_id')
96 |
97 | #
98 | args = parser.parse_args()
99 | #
100 | args.attack_methods = args.attack_methods.split(',')
101 | args.rec_model_names = args.rec_model_names.split(',')
102 | args.target_ids = list(map(int, args.target_ids.split(',')))
103 | return args
104 |
105 |
106 | if __name__ == '__main__':
107 | """parse args"""
108 | args = parse_arg()
109 | """eval"""
110 | result = []
111 |
112 | for attack_method in args.attack_methods:
113 | for rec_model_name in args.rec_model_names:
114 | for target_id in args.target_ids:
115 | attack_method_ = '_'.join([attack_method, str(args.attack_num), str(args.filler_num)])
116 | try:
117 | result_ = eval_attack(args.dataset, rec_model_name, attack_method_, target_id)
118 | result.append(result_.split('\t'))
119 | except:
120 | print(attack_method, rec_model_name, target_id)
121 |
122 | result = np.array(result).transpose()
123 | result = pd.DataFrame(dict(zip(range(result.shape[0]), result)))
124 | result.to_excel(args.dataset + '_performance_all.xls', index=False)
125 |
--------------------------------------------------------------------------------
/AUSH/test_main/data_preprocess.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | # author:ariaschen
4 | # datetime:2020/1/12 16:11
5 | # software: PyCharm
6 |
7 | import itertools, gzip
8 | import pandas as pd
9 | from utils.load_data.load_data import *
10 | from sklearn.model_selection import train_test_split
11 |
12 |
13 | def parse(path):
14 | g = gzip.open(path, 'rb')
15 | for l in g:
16 | yield eval(l)
17 |
18 |
19 | def getDF(path):
20 | i = 0
21 | df = {}
22 | for d in parse(path):
23 | df[i] = d
24 | i += 1
25 | return pd.DataFrame.from_dict(df, orient='index')
26 |
27 |
28 | def data_preprocess(data_set, gz_path):
29 | data = getDF(gz_path)[['reviewerID', 'asin', 'overall']]
30 | data.columns = ['uid', 'iid', 'rating']
31 |
32 | uids, iids = data.uid.unique(), data.iid.unique()
33 | n_uids, n_iids, n_ratings = len(uids), len(iids), data.shape[0]
34 | print('User num:', n_uids, '\tItem num:', n_iids, '\tRating num:', n_ratings, '\t Sparsity :', n_ratings / (n_iids * n_uids))
35 | print('Number of ratings per user:', n_ratings / n_uids)
36 |
37 | uid_update = dict(zip(uids, range(n_uids)))
38 | iid_update = dict(zip(iids, range(n_iids)))
39 |
40 | data.uid = data.uid.apply(lambda x: uid_update[x])
41 | data.iid = data.iid.apply(lambda x: iid_update[x])
42 |
43 | train_idxs, test_idxs = train_test_split(list(range(n_ratings)), test_size=0.1)
44 |
45 | train_data = data.iloc[train_idxs]
46 | test_data = data.iloc[test_idxs]
47 | path_train = "../data/data/" + data_set + "_train.dat"
48 | path_test = "../data/data/" + data_set + "_test.dat"
49 | train_data.to_csv(path_train, index=False, header=None, sep='\t')
50 | test_data.to_csv(path_test, index=False, header=None, sep='\t')
51 | np.save("../data/data/" + data_set + "_id_update", [uid_update, iid_update])
52 |
53 |
54 | def exp_select(data_set, target_items, selected_num, target_user_num):
55 | path_test = "../data/data/" + data_set + "_test.dat"
56 | path_train = "../data/data/" + data_set + "_train.dat"
57 | dataset_class = load_data(path_train=path_train, path_test=path_test,
58 | header=['user_id', 'item_id', 'rating'],
59 | sep='\t', print_log=True)
60 |
61 | item_pops = dataset_class.get_item_pop()
62 |
63 | items_sorted = np.array(item_pops).argsort()[::-1]
64 |
65 | bandwagon_selected = items_sorted[:selected_num]
66 | print('bandwagon_selected:', bandwagon_selected)
67 |
68 |
69 | threshold = dataset_class.test_data.rating.mean()
70 | threshold = threshold if threshold < 3 else 3.0
71 | print('threshold:', threshold)
72 | selected_candidates = items_sorted[:20]
73 |
74 | selected_candidates = list(itertools.combinations(selected_candidates, selected_num))
75 |
76 | result = {}
77 | target_items = [j for i in range(2, 10) for j in
78 | items_sorted[i * len(items_sorted) // 10:(i * len(items_sorted) // 10) + 2]][::-1]
79 | target_items = list(
80 | np.random.choice([i for i in range(len(item_pops)) if item_pops[i] == 3], 4, replace=False)) + target_items
81 | print('target_items:', target_items)
82 | print('number of ratings:', [item_pops[i] for i in target_items])
83 | for target in target_items:
84 | target_rated = set(dataset_class.train_data[dataset_class.train_data.item_id == target].user_id.values)
85 | data_tmp = dataset_class.train_data[~dataset_class.train_data.user_id.isin(target_rated)].copy()
86 | data_tmp = data_tmp[data_tmp.rating >= threshold]
87 | np.random.shuffle(selected_candidates)
88 |
89 | for selected_items in selected_candidates:
90 | target_users = data_tmp[data_tmp.item_id.isin(selected_items)].groupby(
91 | 'user_id').size()
92 |
93 | if target_users[(target_users == selected_num)].shape[0] >= target_user_num:
94 | target_users = sorted(target_users[(target_users == selected_num)].index)
95 | result[target] = [sorted(selected_items), target_users]
96 | print('target:', target)
97 | break
98 |
99 | if target not in result:
100 | for selected_items in selected_candidates:
101 |
102 | target_users = data_tmp[data_tmp.item_id.isin(selected_items)].groupby(
103 | 'user_id').size()
104 | target_users = sorted(dict(target_users).items(), key=lambda x: x[1], reverse=True)
105 | min = target_users[target_user_num][1]
106 | target_users = [i[0] for i in target_users[:target_user_num] if i[1] > selected_num // 2]
107 | if len(target_users) >= target_user_num:
108 | result[target] = [sorted(selected_items), sorted(target_users)]
109 | print('target:', target, 'min rated selected item num:', min)
110 | break
111 |
112 | if target not in result:
113 | print('target:', target, 'non-targeted user')
114 | a = 1
115 |
116 | key = list(result.keys())
117 | selected_items = [','.join(map(str, result[k][0])) for k in key]
118 | target_users = [','.join(map(str, result[k][1])) for k in key]
119 | selected_items = pd.DataFrame(dict(zip(['id', 'selected_items'], [key, selected_items])))
120 | target_users = pd.DataFrame(dict(zip(['id', 'target_users'], [key, target_users])))
121 | selected_items.to_csv("../data/data/" + data_set + '_selected_items', index=False, header=None, sep='\t')
122 | target_users.to_csv("../data/data/" + data_set + '_target_users', index=False, header=None, sep='\t')
123 |
124 |
125 | if __name__ == '__main__':
126 | data_set = 'office'
127 | gz_path = 'C:\\Users\\ariaschen\\Downloads\\reviews_Office_Products_5.json.gz'
128 | # data_set = 'automotive'
129 | # gz_path = 'C:\\Users\\ariaschen\\Downloads\\reviews_Automotive_5.json.gz'
130 | # data_set = 'grocery'
131 | # gz_path = "../data/new_raw_data/reviews_Grocery_and_Gourmet_Food_5.json.gz"
132 |
133 |
134 | data_preprocess(data_set, gz_path)
135 |
136 | target_items = None
137 |
138 | exp_select(data_set, target_items, selected_num=2, target_user_num=30)
139 |
--------------------------------------------------------------------------------
/AUSH/test_main/main_baseline_attack.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/23 11:49
3 | # @Author : chensi
4 | # @File : main_attack_baseline.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import sys, argparse
9 |
10 | sys.path.append("../")
11 | from utils.load_data.load_data import *
12 | from utils.load_data.load_attack_info import *
13 | from model.attack_model.baseline import *
14 | from utils.attack.data_to_file import *
15 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
16 |
17 |
18 | def get_data(data_set_name):
19 | path_train = '../data/data/' + data_set_name + '_train.dat'
20 | path_test = '../data/data/' + data_set_name + '_test.dat'
21 | dataset_class = load_data(path_train=path_train, path_test=path_test,
22 | header=['user_id', 'item_id', 'rating'],
23 | sep='\t', print_log=False)
24 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
25 | "../data/data/" + data_set_name + "_target_users"]
26 | attack_info = load_attack_info(*attack_info_path)
27 | return dataset_class, attack_info
28 |
29 |
30 | def baseline_attack(dataset_class, attack_info, attack_method, target_id, bandwagon_selected,
31 | fixed_filler_indicator=None):
32 | """load data"""
33 | selected_ids, target_users = attack_info[target_id]
34 | attack_model, attack_num, filler_num = attack_method.split('_')
35 | attack_num, filler_num = int(attack_num), int(filler_num)
36 |
37 | """attack class"""
38 | global_mean, global_std, item_means, item_stds = dataset_class.get_all_mean_std()
39 | baseline_attacker = BaselineAttack(attack_num, filler_num, dataset_class.n_items, target_id,
40 | global_mean, global_std, item_means, item_stds, 5.0, 1.0,
41 | fixed_filler_indicator=fixed_filler_indicator)
42 | # fake profile array
43 | fake_profiles = None
44 | if attack_model == "random":
45 | fake_profiles = baseline_attacker.RandomAttack()
46 | elif attack_model == "bandwagon":
47 | fake_profiles = baseline_attacker.BandwagonAttack(bandwagon_selected)
48 | elif attack_model == "average":
49 | fake_profiles = baseline_attacker.AverageAttack()
50 | elif attack_model == "segment":
51 | fake_profiles = baseline_attacker.SegmentAttack(selected_ids)
52 | else:
53 | print('attack_method error')
54 | exit()
55 | return fake_profiles
56 |
57 |
58 | def parse_arg():
59 | parser = argparse.ArgumentParser()
60 |
61 | parser.add_argument('--dataset', type=str, default='automotive', help='filmTrust/ml100k/grocery')
62 |
63 | parser.add_argument('--attack_methods', type=str, default='average',
64 | help='average,segment,random,bandwagon')
65 |
66 | # filmTrust:random = [5, 395, 181, 565, 254] tail = [601, 623, 619, 64, 558]
67 | # ml100k:random = [62, 1077, 785, 1419, 1257] tail = [1319, 1612, 1509, 1545, 1373]
68 | # 1166,1574,759,494,549,1272,1728,1662,450,1456,595,566,764,1187,1816,1478,1721,2294,2413,1148
69 | # 62,1077,785,1419,1257,1319,1612,1509,1545,1373
70 | # 88,22,122,339,1431,1141,1656,477,1089,866
71 | parser.add_argument('--targets', type=str, default='88,22,122,339,1431,1141,1656,477,1089,866',
72 | help='attack_targets')
73 |
74 | parser.add_argument('--attack_num', type=int, default=50, help='fixed 50')
75 |
76 | parser.add_argument('--filler_num', type=int, default=4, help='90 for ml100k,36 for filmTrust')
77 | parser.add_argument('--bandwagon_selected', type=str, default='180,99,49',
78 | help='180,99,49 for ml100k,103,98,115 for filmTrust')
79 | #
80 | parser.add_argument('--sample_filler', type=int, default=1, help='sample filler')
81 | #
82 |
83 | args = parser.parse_args()
84 | #
85 | args.attack_methods = args.attack_methods.split(',')
86 | args.targets = list(map(int, args.targets.split(',')))
87 | args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(',')))
88 | return args
89 |
90 |
91 | if __name__ == '__main__':
92 | """parse args"""
93 | args = parse_arg()
94 |
95 | """attack"""
96 | dataset_class, attack_info = get_data(args.dataset)
97 |
98 | for target_id in args.targets:
99 |
100 | attackSetting_path = '_'.join(map(str, [args.dataset, args.attack_num, args.filler_num, target_id]))
101 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
102 | if args.sample_filler:
103 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
104 | selected_id_list=attack_info[target_id][0],
105 | filler_num=args.filler_num, attack_num=args.attack_num, filler_method=0)
106 | _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no',
107 | final_attack_setting=[args.attack_num,
108 | None, None])
109 |
110 | np.save(attackSetting_path, [real_profiles, filler_indicator])
111 | else:
112 | real_profiles, filler_indicator = np.load(attackSetting_path + '.npy')
113 |
114 | # for attack_method in args.attack_methods:
115 | #
116 | # attack_model = '_'.join([attack_method, str(args.attack_num), str(args.filler_num)])
117 | # # fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
118 | # # args.bandwagon_selected, filler_indicator)
119 | # fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
120 | # args.bandwagon_selected, None)
121 | #
122 | # ori_path = '../data/data/' + args.dataset + '_train.dat'
123 | # dst_path = "../data/data_attacked/" + '_'.join([args.dataset, str(target_id), attack_model]) + "_sample.dat"
124 | # attacked_file_writer(ori_path, dst_path, fake_profiles, dataset_class.n_users)
125 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/tool/file.py:
--------------------------------------------------------------------------------
1 | import os.path
2 | from os.path import abspath
3 | from os import makedirs, remove
4 | from re import compile, findall, split
5 | # from config import LineConfig
6 | from collections import defaultdict
7 | class Config(object):
8 | def __init__(self, fileName):
9 | self.config = {}
10 | self.readConfiguration(fileName)
11 |
12 | def __getitem__(self, item):
13 | if not self.contains(item):
14 | print('parameter ' + item + ' is invalid!')
15 | exit(-1)
16 | return self.config[item]
17 |
18 | def getOptions(self, item):
19 | if not self.contains(item):
20 | print('parameter ' + item + ' is invalid!')
21 | exit(-1)
22 | return self.config[item]
23 |
24 | def contains(self, key):
25 | return self.config.has_key(key)
26 |
27 | def readConfiguration(self, fileName):
28 | if not os.path.exists(abspath(fileName)):
29 | print('config file is not found!')
30 | raise IOError
31 | with open(fileName) as f:
32 | for ind, line in enumerate(f):
33 | if line.strip() != '':
34 | try:
35 | key, value = line.strip().split('=')
36 | self.config[key] = value
37 | except ValueError:
38 | print('config file is not in the correct format! Error Line:%d' % (ind))
39 |
40 |
41 | class LineConfig(object):
42 | def __init__(self, content):
43 | self.line = content.strip().split(' ')
44 | self.options = {}
45 | self.mainOption = False
46 | if self.line[0] == 'on':
47 | self.mainOption = True
48 | elif self.line[0] == 'off':
49 | self.mainOption = False
50 | for i, item in enumerate(self.line):
51 | if (item.startswith('-') or item.startswith('--')) and not item[1:].isdigit():
52 | ind = i + 1
53 | for j, sub in enumerate(self.line[ind:]):
54 | if (sub.startswith('-') or sub.startswith('--')) and not sub[1:].isdigit():
55 | ind = j
56 | break
57 | if j == len(self.line[ind:]) - 1:
58 | ind = j + 1
59 | break
60 | try:
61 | self.options[item] = ' '.join(self.line[i + 1:i + 1 + ind])
62 | except IndexError:
63 | self.options[item] = 1
64 |
65 | def __getitem__(self, item):
66 | if not self.contains(item):
67 | print('parameter ' + item + ' is invalid!')
68 | exit(-1)
69 | return self.options[item]
70 |
71 | def getOption(self, key):
72 | if not self.contains(key):
73 | print('parameter ' + key + ' is invalid!')
74 | exit(-1)
75 | return self.options[key]
76 |
77 | def isMainOn(self):
78 | return self.mainOption
79 |
80 | def contains(self, key):
81 | return key in self.options
82 | # return self.options.has_key(key)
83 | class FileIO(object):
84 | def __init__(self):
85 | pass
86 |
87 | # @staticmethod
88 | # def writeFile(filePath,content,op = 'w'):
89 | # reg = compile('(.+[/|\\\]).+')
90 | # dirs = findall(reg,filePath)
91 | # if not os.path.exists(filePath):
92 | # os.makedirs(dirs[0])
93 | # with open(filePath,op) as f:
94 | # f.write(str(content))
95 |
96 | @staticmethod
97 | def writeFile(dir, file, content, op='w'):
98 | if not os.path.exists(dir):
99 | os.makedirs(dir)
100 | if type(content) == 'str':
101 | with open(dir + file, op) as f:
102 | f.write(content)
103 | else:
104 | with open(dir + file, op) as f:
105 | f.writelines(content)
106 |
107 | @staticmethod
108 | def deleteFile(filePath):
109 | if os.path.exists(filePath):
110 | remove(filePath)
111 |
112 | @staticmethod
113 | def loadDataSet(conf, file, bTest=False):
114 | trainingData = defaultdict(dict)
115 | testData = defaultdict(dict)
116 | ratingConfig = LineConfig(conf['ratings.setup'])
117 | # if not bTest:
118 | # print('loading training data...')
119 | # else:
120 | # print('loading test data...')
121 | with open(file) as f:
122 | ratings = f.readlines()
123 | # ignore the headline
124 | if ratingConfig.contains('-header'):
125 | ratings = ratings[1:]
126 | # order of the columns
127 | order = ratingConfig['-columns'].strip().split()
128 |
129 | for lineNo, line in enumerate(ratings):
130 | items = split(' |,|\t', line.strip())
131 | if not bTest and len(order) < 3:
132 | print('The rating file is not in a correct format. Error: Line num %d' % lineNo)
133 | exit(-1)
134 | try:
135 | userId = items[int(order[0])]
136 | itemId = items[int(order[1])]
137 | if bTest and len(order) < 3:
138 | rating = 1 # default value
139 | else:
140 | rating = items[int(order[2])]
141 |
142 | except ValueError:
143 | print('Error! Have you added the option -header to the rating.setup?')
144 | exit(-1)
145 | if not bTest:
146 | trainingData[userId][itemId] = float(rating)
147 | else:
148 | testData[userId][itemId] = float(rating)
149 | if not bTest:
150 | return trainingData
151 | else:
152 | return testData
153 |
154 | @staticmethod
155 | def loadRelationship(conf, filePath):
156 | socialConfig = LineConfig(conf['social.setup'])
157 | relation = []
158 | print('loading social data...')
159 | with open(filePath) as f:
160 | relations = f.readlines()
161 | # ignore the headline
162 | if socialConfig.contains('-header'):
163 | relations = relations[1:]
164 | # order of the columns
165 | order = socialConfig['-columns'].strip().split()
166 | if len(order) <= 2:
167 | print('The social file is not in a correct format.')
168 | for lineNo, line in enumerate(relations):
169 | items = split(' |,|\t', line.strip())
170 | if len(order) < 2:
171 | print('The social file is not in a correct format. Error: Line num %d' % lineNo)
172 | exit(-1)
173 | userId1 = items[int(order[0])]
174 | userId2 = items[int(order[1])]
175 | if len(order) < 3:
176 | weight = 1
177 | else:
178 | weight = float(items[int(order[2])])
179 | relation.append([userId1, userId2, weight])
180 | return relation
181 |
182 | @staticmethod
183 | def loadLabels(filePath):
184 | labels = {}
185 | with open(filePath) as f:
186 | for line in f:
187 | items = split(' |,|\t', line.strip())
188 | labels[items[0]] = items[1]
189 | return labels
190 |
--------------------------------------------------------------------------------
/AUSH/test_main/dcgan.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | from __future__ import print_function
3 | import sys
4 |
5 | sys.path.append("../")
6 | import os, argparse, time, math
7 | import numpy as np
8 | import tensorflow as tf
9 | from glob import glob
10 | from utils.attack.data_to_file import *
11 | from test_main.utils_dcgan import *
12 | from numpy import linalg as la
13 | from model.trainer_rec import *
14 | from test_main.main_eval_attack import eval_attack
15 | import utils as ut
16 |
17 | flags = tf.app.flags
18 | flags.DEFINE_integer("epoch", 64, "Epoch to train [25]")
19 | flags.DEFINE_float("learning_rate", 0.0002, "Learning rate of for adam [0.0002]")
20 | flags.DEFINE_float("beta1", 0.5, "Momentum term of adam [0.5]")
21 | flags.DEFINE_integer("batch_size", 64, "The size of batch images [64]")
22 | flags.DEFINE_integer("max_to_keep", 1, "maximum number of checkpoints to keep")
23 | flags.DEFINE_integer("z_dim", 100, "dimensions of z")
24 | #
25 | flags.DEFINE_integer("T", 10, "adv opt epoch")
26 | flags.DEFINE_integer("K", 5, "top k svd") # 5
27 | flags.DEFINE_float("alpha", 50.0, "opt param")
28 | flags.DEFINE_float("eta", 100.0, "opt param")
29 | flags.DEFINE_integer("attack_num", 50, "attack_num")
30 | flags.DEFINE_integer("filler_num", 90, "filler_num")
31 | FLAGS = flags.FLAGS
32 |
33 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
34 | data_set_name = 'ml100k'
35 | target_ids = [62, 1077, 785, 1419, 1257, 1319, 1612, 1509, 1545, 1373]
36 |
37 | run_config = tf.ConfigProto()
38 | run_config.gpu_options.allow_growth = True
39 | path_train = '../data/data/' + data_set_name + '_train.dat'
40 | path_test = '../data/data/' + data_set_name + '_test.dat'
41 | attack_info_path = ["../data/data/" + data_set_name + "_selected_items",
42 | "../data/data/" + data_set_name + "_target_users"]
43 | # 读取seletced items和target users
44 | attack_info = load_attack_info(*attack_info_path)
45 | dataset_class = ut.load_data.load_data.load_data(path_train=path_train, path_test=path_test,
46 | header=['user_id', 'item_id', 'rating'],
47 | sep='\t', print_log=False)
48 |
49 |
50 | def train_Rec_model(injected_path, injected_profiles, target_id, model_path, train_epoch,
51 | model_name='IAutoRec', warm_start=False, restore_path=None):
52 | tf.reset_default_graph()
53 |
54 | attacked_file_writer(path_train, injected_path, injected_profiles, dataset_class.n_users)
55 |
56 | dataset_class_injected = ut.load_data.load_data.load_data(path_train=injected_path,
57 | path_test=path_test,
58 | header=['user_id', 'item_id', 'rating'],
59 | sep='\t', print_log=False)
60 |
61 | # tf.reset_default_graph()
62 | tf_config = tf.ConfigProto()
63 | tf_config.gpu_options.allow_growth = True
64 | with tf.Session() as sess:
65 | rec_model = get_model_network(sess, model_name, dataset_class_injected, train_epoch)
66 | if warm_start:
67 | # print('warm start')
68 | rec_model.restore(restore_path)
69 | rec_model.execute()
70 | rec_model.save(model_path)
71 | predictions, hit_ratios = pred_for_target(rec_model, target_id)
72 | return predictions, hit_ratios
73 |
74 |
75 | def opt_adv_intent(fake_users, filler_indicators, target_id):
76 | target_users = attack_info[target_id][1]
77 | model_path = "./IAutoRec_dcgan_%d.ckpt" % target_id
78 | injected_path = "./IAutoRec_dcgan_%d.dat" % target_id
79 |
80 | # ----------------------
81 | for t in range(FLAGS.T):
82 |
83 | injected_profiles = fake_users * filler_indicators
84 | predictions, _ = train_Rec_model(injected_path, injected_profiles, target_id, model_path, 10)
85 | f_adv_0 = np.sum(predictions[target_users])
86 | f_adv_k = f_adv_0
87 | print("opt_adv_intent\tepoch-%d adv goal\t%f" % (t, f_adv_k))
88 |
89 | delta_f_Adv = []
90 | B, Sigma, V = la.svd(fake_users)
91 | for k in range(FLAGS.K):
92 |
93 | Z_k = np.matmul(np.reshape(B[k], [FLAGS.attack_num, 1]), np.reshape(V[k], [1, dataset_class.n_items]))
94 |
95 | fake_users_k = fake_users + FLAGS.alpha * Z_k
96 |
97 | injected_profiles = fake_users_k * filler_indicators
98 | predictions, _ = train_Rec_model(injected_path, injected_profiles, target_id, model_path,
99 | 5, warm_start=True, restore_path=model_path)
100 | f_adv_k_new = np.sum(predictions[target_users])
101 |
102 | delta_f_Adv.append((f_adv_k_new - f_adv_k) * Z_k)
103 |
104 | delta_f_A = FLAGS.alpha * sum(delta_f_Adv)
105 | fake_users += FLAGS.eta * delta_f_A
106 | fake_users[fake_users <= 0] = 0.5
107 | fake_users[fake_users > 5] = 5
108 | return fake_users * filler_indicators
109 |
110 |
111 |
112 | tf.reset_default_graph()
113 | with tf.Session(config=run_config) as sess:
114 | dcgan = DCGAN(sess, dataset_class)
115 | # print("build_model_ok")
116 | dcgan.train(FLAGS)
117 | # save model
118 | saver = tf.train.Saver()
119 | saver.save(sess, './dcgan.ckpt')
120 |
121 | fake_users = None
122 | while True:
123 | batch_z = gen_random(size=[FLAGS.batch_size, dcgan.z_dim]).astype(np.float32)
124 | fake_users_ = sess.run(dcgan.G, feed_dict={dcgan.z: batch_z})
125 | # reshape&[-1,1]->[0,5]
126 | fake_users_ = fake_users_.reshape([fake_users_.shape[0], -1])
127 | fake_users_ = (fake_users_ * 2.5) + 2.5
128 | fake_users = fake_users_ if fake_users is None else np.concatenate([fake_users_, fake_users_], 0)
129 | if fake_users.shape[0] >= FLAGS.attack_num: break
130 | # attack_num
131 | fake_users = fake_users[:FLAGS.attack_num]
132 | # filler_num
133 | filler_indicators = []
134 | for i in range(FLAGS.attack_num):
135 | fillers_ = np.random.choice(list(range(dataset_class.n_items)), FLAGS.filler_num, replace=False)
136 | filler_indicator_ = [1 if iid in fillers_ else 0 for iid in range(dataset_class.n_items)]
137 | filler_indicators.append(filler_indicator_)
138 | filler_indicators = np.array(filler_indicators)
139 | np.save('./fake_user_dcgan', [fake_users, filler_indicators])
140 | # fake_users, filler_indicators = np.load('./fake_user_dcgan.npy')
141 |
142 | results = {}
143 | for target_id in target_ids:
144 |
145 | injected_profiles = opt_adv_intent(fake_users, filler_indicators, target_id)
146 |
147 |
148 | model_path = "./IAutoRec_dcgan_%d.ckpt" % target_id
149 | injected_path = "../data/data/ml100k_%d_dcgan_50_90.dat" % target_id
150 | target_users = attack_info[target_id][1]
151 | predictions, hit_ratios = train_Rec_model(injected_path, injected_profiles, target_id, model_path, 500)
152 | dst_path = "../result/pred_result/" + '_'.join(['IAutoRec', 'ml100k', str(target_id), 'dcgan'])
153 | target_prediction_writer(predictions, hit_ratios, dst_path)
154 |
155 | result = eval_attack('ml100k', 'IAutoRec', 'dcgan', target_id)
156 | results[target_id] = result
157 | print(target_id, result, '\n\n')
158 | break
159 |
160 | for target_id in target_ids:
161 | print(target_id, results[target_id])
162 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/method/FAP.py:
--------------------------------------------------------------------------------
1 | from models.detector.SDLib.baseclass.SDetection import SDetection
2 | from models.detector.SDLib.tool import config
3 | from sklearn.metrics import classification_report
4 | import numpy as np
5 | import random
6 |
7 | class FAP(SDetection):
8 |
9 | def __init__(self, conf, trainingSet=None, testSet=None, labels=None, fold='[1]'):
10 | super(FAP, self).__init__(conf, trainingSet, testSet, labels, fold)
11 |
12 | def readConfiguration(self):
13 | super(FAP, self).readConfiguration()
14 | # # s means the number of seedUser who be regarded as spammer in training
15 | self.s =int( self.config['seedUser'])
16 | # preserve the real spammer ID
17 | self.spammer = []
18 | for i in self.dao.user:
19 | if self.labels[i] == '1':
20 | self.spammer.append(self.dao.user[i])
21 | sThreshold = int(0.5 * len(self.spammer))
22 | if self.s > sThreshold :
23 | self.s = sThreshold
24 | print ('*** seedUser is more than a half of spammer, so it is set to', sThreshold, '***')
25 |
26 | # # predict top-k user as spammer
27 | self.k = int(self.config['topKSpam'])
28 | # 0.5 is the ratio of spammer to dataset, it can be changed according to different datasets
29 | kThreshold = int(0.5 * (len(self.dao.user) - self.s))
30 | if self.k > kThreshold:
31 | self.k = kThreshold
32 | print ('*** the number of top-K users is more than threshold value, so it is set to', kThreshold, '***')
33 | # product transition probability matrix self.TPUI and self.TPIU
34 |
35 | def __computeTProbability(self):
36 | # m--user count; n--item count
37 | m, n, tmp = self.dao.trainingSize()
38 | self.TPUI = np.zeros((m, n))
39 | self.TPIU = np.zeros((n, m))
40 |
41 | self.userUserIdDic = {}
42 | self.itemItemIdDic = {}
43 | tmpUser = list(self.dao.user.values())
44 | tmpUserId = list(self.dao.user.keys())
45 | tmpItem = list(self.dao.item.values())
46 | tmpItemId = list(self.dao.item.keys())
47 | # tmpUser = self.dao.user.values()
48 | # tmpUserId = self.dao.user.keys()
49 | # tmpItem = self.dao.item.values()
50 | # tmpItemId = self.dao.item.keys()
51 | for users in range(0, m):
52 | self.userUserIdDic[tmpUser[users]] = tmpUserId[users]
53 | for items in range(0, n):
54 | self.itemItemIdDic[tmpItem[items]] = tmpItemId[items]
55 | for i in range(0, m):
56 | for j in range(0, n):
57 | user = self.userUserIdDic[i]
58 | item = self.itemItemIdDic[j]
59 | # if has edge in graph,set a value ;otherwise set 0
60 | if (user not in self.bipartiteGraphUI) or (item not in self.bipartiteGraphUI[user]):
61 | continue
62 | else:
63 | w = float(self.bipartiteGraphUI[user][item])
64 | # to avoid positive feedback and reliability problem,we should Polish the w
65 | otherItemW = 0
66 | otherUserW = 0
67 | for otherItem in self.bipartiteGraphUI[user]:
68 | otherItemW += float(self.bipartiteGraphUI[user][otherItem])
69 | for otherUser in self.dao.trainingSet_i[item]:
70 | otherUserW += float(self.bipartiteGraphUI[otherUser][item])
71 | # wPrime = w*1.0/(otherUserW * otherItemW)
72 | wPrime = w
73 | self.TPUI[i][j] = wPrime / otherItemW
74 | self.TPIU[j][i] = wPrime / otherUserW
75 | # if i % 100 == 0:
76 | # print ('progress: %d/%d' %(i,m))
77 |
78 | def initModel(self):
79 | # construction of the bipartite graph
80 | # print ("constructing bipartite graph...")
81 | self.bipartiteGraphUI = {}
82 | for user in self.dao.trainingSet_u:
83 | tmpUserItemDic = {} # user-item-point
84 | for item in self.dao.trainingSet_u[user]:
85 | # tmpItemUserDic = {}#item-user-point
86 | recordValue = float(self.dao.trainingSet_u[user][item])
87 | w = 1 + abs((recordValue - self.dao.userMeans[user]) / self.dao.userMeans[user]) + abs(
88 | (recordValue - self.dao.itemMeans[item]) / self.dao.itemMeans[item]) + abs(
89 | (recordValue - self.dao.globalMean) / self.dao.globalMean)
90 | # tmpItemUserDic[user] = w
91 | tmpUserItemDic[item] = w
92 | # self.bipartiteGraphIU[item] = tmpItemUserDic
93 | self.bipartiteGraphUI[user] = tmpUserItemDic
94 | # we do the polish in computing the transition probability
95 | # print ("computing transition probability...")
96 | self.__computeTProbability()
97 |
98 | def isConvergence(self, PUser, PUserOld):
99 | if len(PUserOld) == 0:
100 | return True
101 | for i in range(0, len(PUser)):
102 | if (PUser[i] - PUserOld[i]) > 0.01:
103 | return True
104 | return False
105 |
106 | def buildModel(self):
107 | # -------init--------
108 | m, n, tmp = self.dao.trainingSize()
109 | PUser = np.zeros(m)
110 | PItem = np.zeros(n)
111 | self.testLabels = [0 for i in range(m)]
112 | self.predLabels = [0 for i in range(m)]
113 |
114 | # preserve seedUser Index
115 | self.seedUser = []
116 | randDict = {}
117 | for i in range(0, self.s):
118 | randNum = random.randint(0, len(self.spammer) - 1)
119 | while randNum in randDict:
120 | randNum = random.randint(0, len(self.spammer) - 1)
121 | randDict[randNum] = 0
122 | self.seedUser.append(int(self.spammer[randNum]))
123 | # print len(randDict), randDict
124 |
125 | #initial user and item spam probability
126 | for j in range(0, m):
127 | if j in self.seedUser:
128 | #print type(j),j
129 | PUser[j] = 1
130 | else:
131 | PUser[j] = random.random()
132 | for tmp in range(0, n):
133 | PItem[tmp] = random.random()
134 |
135 | # -------iterator-------
136 | PUserOld = []
137 | iterator = 0
138 | while self.isConvergence(PUser, PUserOld):
139 | #while iterator < 100:
140 | for j in self.seedUser:
141 | PUser[j] = 1
142 | PUserOld = PUser
143 | PItem = np.dot(self.TPIU, PUser)
144 | PUser = np.dot(self.TPUI, PItem)
145 | iterator += 1
146 | # print (self.foldInfo,'iteration', iterator)
147 |
148 | PUserDict = {}
149 | userId = 0
150 | for i in PUser:
151 | PUserDict[userId] = i
152 | userId += 1
153 | for j in self.seedUser:
154 | del PUserDict[j]
155 |
156 | self.PSort = sorted(PUserDict.items(), key=lambda d: d[1], reverse=True)
157 |
158 |
159 | def predict(self):
160 | # predLabels
161 | # top-k user as spammer
162 | spamList = []
163 | sIndex = 0
164 | while sIndex < self.k:
165 | spam = self.PSort[sIndex][0]
166 | spamList.append(spam)
167 | self.predLabels[spam] = 1
168 | sIndex += 1
169 |
170 | # trueLabels
171 | for user in self.dao.trainingSet_u:
172 | userInd = self.dao.user[user]
173 | # print type(user), user, userInd
174 | self.testLabels[userInd] = int(self.labels[user])
175 |
176 | # delete seedUser labels
177 | differ = 0
178 | for user in self.seedUser:
179 | user = int(user - differ)
180 | # print type(user)
181 | del self.predLabels[user]
182 | del self.testLabels[user]
183 | differ += 1
184 |
185 | return self.predLabels
186 |
--------------------------------------------------------------------------------
/AUSH/model/nnmf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Implementation of Neural Network Matrix Factorization.
3 | Reference: Dziugaite, Gintare Karolina, and Daniel M. Roy. "Neural network matrix factorization." arXiv preprint arXiv:1511.06443 (2015).
4 | """
5 |
6 | try:
7 | import tensorflow.compat.v1 as tf
8 |
9 | tf.disable_v2_behavior()
10 | except:
11 | import tensorflow as tf
12 | import time
13 | import numpy as np
14 | import math
15 |
16 | __author__ = "Shuai Zhang"
17 | __copyright__ = "Copyright 2018, The DeepRec Project"
18 |
19 | __license__ = "GPL"
20 | __version__ = "1.0.0"
21 | __maintainer__ = "Shuai Zhang"
22 | __email__ = "cheungdaven@gmail.com"
23 | __status__ = "Development"
24 |
25 |
26 | class NNMF():
27 | def __init__(self, sess, dataset_class, num_factor_1=100, num_factor_2=10, hidden_dimension=50,
28 | learning_rate=0.001, reg_rate=0.01, epoch=500, batch_size=256,
29 | show_time=False, T=5, display_step=1000):
30 | self.learning_rate = learning_rate
31 | self.epochs = epoch
32 | self.batch_size = batch_size
33 | self.reg_rate = reg_rate
34 | self.sess = sess
35 | self.dataset_class = dataset_class
36 | self.num_user = dataset_class.n_users
37 | self.num_item = dataset_class.n_items
38 | self.dataset_class.test_matrix_dok = self.dataset_class.test_matrix.todok()
39 |
40 | self.num_factor_1 = num_factor_1
41 | self.num_factor_2 = num_factor_2
42 | self.hidden_dimension = hidden_dimension
43 | self.show_time = show_time
44 | self.T = T
45 | self.display_step = display_step
46 | print("NNMF.")
47 |
48 | self.dataset_class_train_matrix_coo = self.dataset_class.train_matrix.tocoo()
49 | self.user = self.dataset_class_train_matrix_coo.row.reshape(-1)
50 | self.item = self.dataset_class_train_matrix_coo.col.reshape(-1)
51 | self.rating = self.dataset_class_train_matrix_coo.data
52 |
53 | self._build_network()
54 | init = tf.global_variables_initializer()
55 | self.sess.run(init)
56 |
57 | def _build_network(self):
58 | print("num_factor_1=%d, num_factor_2=%d, hidden_dimension=%d" % (
59 | self.num_factor_1, self.num_factor_2, self.hidden_dimension))
60 |
61 | # model dependent arguments
62 | self.user_id = tf.placeholder(dtype=tf.int32, shape=[None], name='user_id')
63 | self.item_id = tf.placeholder(dtype=tf.int32, shape=[None], name='item_id')
64 | self.y = tf.placeholder("float", [None], 'rating')
65 | # latent feature vectors
66 | P = tf.Variable(tf.random_normal([self.num_user, self.num_factor_1], stddev=0.01))
67 | Q = tf.Variable(tf.random_normal([self.num_item, self.num_factor_1], stddev=0.01))
68 | # latent feature matrix(K=1?)
69 | U = tf.Variable(tf.random_normal([self.num_user, self.num_factor_2], stddev=0.01))
70 | V = tf.Variable(tf.random_normal([self.num_item, self.num_factor_2], stddev=0.01))
71 |
72 | input = tf.concat(values=[tf.nn.embedding_lookup(P, self.user_id),
73 | tf.nn.embedding_lookup(Q, self.item_id),
74 | tf.multiply(tf.nn.embedding_lookup(U, self.user_id),
75 | tf.nn.embedding_lookup(V, self.item_id))
76 | ], axis=1)
77 | #
78 | # tf1->tf2
79 | # regularizer = tf.contrib.layers.l2_regularizer(scale=self.reg_rate)
80 | regularizer = tf.keras.regularizers.l2(self.reg_rate)
81 | layer_1 = tf.layers.dense(inputs=input, units=2 * self.num_factor_1 + self.num_factor_2,
82 | bias_initializer=tf.random_normal_initializer,
83 | kernel_initializer=tf.random_normal_initializer, activation=tf.sigmoid,
84 | kernel_regularizer=regularizer)
85 | layer_2 = tf.layers.dense(inputs=layer_1, units=self.hidden_dimension, activation=tf.sigmoid,
86 | bias_initializer=tf.random_normal_initializer,
87 | kernel_initializer=tf.random_normal_initializer,
88 | kernel_regularizer=regularizer)
89 | layer_3 = tf.layers.dense(inputs=layer_2, units=self.hidden_dimension, activation=tf.sigmoid,
90 | bias_initializer=tf.random_normal_initializer,
91 | kernel_initializer=tf.random_normal_initializer,
92 | kernel_regularizer=regularizer)
93 | layer_4 = tf.layers.dense(inputs=layer_3, units=self.hidden_dimension, activation=tf.sigmoid,
94 | bias_initializer=tf.random_normal_initializer,
95 | kernel_initializer=tf.random_normal_initializer,
96 | kernel_regularizer=regularizer)
97 | output = tf.layers.dense(inputs=layer_4, units=1, activation=None,
98 | bias_initializer=tf.random_normal_initializer,
99 | kernel_initializer=tf.random_normal_initializer,
100 | kernel_regularizer=regularizer)
101 | self.pred_rating = tf.reshape(output, [-1])
102 | self.loss = tf.reduce_sum(tf.square(self.y - self.pred_rating)) \
103 | + tf.losses.get_regularization_loss() + self.reg_rate * (
104 | tf.norm(U) + tf.norm(V) + tf.norm(P) + tf.norm(Q))
105 | self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
106 |
107 | def train(self):
108 | self.num_training = len(self.rating)
109 | total_batch = int(self.num_training / self.batch_size)
110 | idxs = np.random.permutation(self.num_training) # shuffled ordering
111 | user_random = list(self.user[idxs])
112 | item_random = list(self.item[idxs])
113 | rating_random = list(self.rating[idxs])
114 | # train
115 | for i in range(total_batch):
116 | batch_user = user_random[i * self.batch_size:(i + 1) * self.batch_size]
117 | batch_item = item_random[i * self.batch_size:(i + 1) * self.batch_size]
118 | batch_rating = rating_random[i * self.batch_size:(i + 1) * self.batch_size]
119 |
120 | _, loss = self.sess.run([self.optimizer, self.loss], feed_dict={self.user_id: batch_user,
121 | self.item_id: batch_item,
122 | self.y: batch_rating
123 | })
124 | return loss
125 |
126 | def test(self, test_data):
127 | error = 0
128 | error_mae = 0
129 | test_set = list(test_data.keys())
130 | for (u, i) in test_set:
131 | pred_rating_test = self.predict([u], [i])[0]
132 | error += (float(test_data.get((u, i))) - pred_rating_test) ** 2
133 | error_mae += (np.abs(float(test_data.get((u, i))) - pred_rating_test))
134 | rmse = np.sqrt(error / len(test_set))
135 | mae = error_mae / len(test_set)
136 | return rmse, mae
137 |
138 | def execute(self):
139 | loss_prev = float("inf")
140 | for epoch in range(self.epochs):
141 | loss_cur = self.train()
142 | if epoch % self.T == 0:
143 | print("epoch:\t", epoch, "\tloss:\t", loss_cur)
144 | if abs(loss_cur - loss_prev) < math.exp(-5):
145 | break
146 | loss_prev = loss_cur
147 | rmse, mae = self.test(self.dataset_class.test_matrix_dok)
148 | print("training done\tRMSE : ", rmse, "\tMAE : ", mae)
149 |
150 | def save(self, path):
151 | saver = tf.train.Saver()
152 | saver.save(self.sess, path)
153 |
154 | def restore(self, path):
155 | init = tf.global_variables_initializer()
156 | self.sess.run(init)
157 | saver = tf.train.Saver()
158 | saver.restore(self.sess, path)
159 |
160 | def predict(self, user_id, item_id):
161 | if type(item_id) != list:
162 | item_id = [item_id]
163 | if type(user_id) != list:
164 | user_id = [user_id] * len(item_id)
165 | return self.sess.run([self.pred_rating], feed_dict={self.user_id: user_id, self.item_id: item_id})[0]
166 |
--------------------------------------------------------------------------------
/Leg-UP/models/detector/SDLib/data/rating.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | # from structure import sparseMatrix,new_sparseMatrix
3 | from models.detector.SDLib.tool.config import Config, LineConfig
4 | from models.detector.SDLib.tool.qmath import normalize
5 | from models.detector.SDLib.tool.dataSplit import DataSplit
6 | import os.path
7 | from re import split
8 | from collections import defaultdict
9 |
10 |
11 | class RatingDAO(object):
12 | 'data access control'
13 |
14 | def __init__(self, config, trainingData, testData):
15 | self.config = config
16 | self.ratingConfig = LineConfig(config['ratings.setup'])
17 | self.user = {} # used to store the order of users in the training set
18 | self.item = {} # used to store the order of items in the training set
19 | self.id2user = {}
20 | self.id2item = {}
21 | self.all_Item = {}
22 | self.all_User = {}
23 | self.userMeans = {} # used to store the mean values of users's ratings
24 | self.itemMeans = {} # used to store the mean values of items's ratings
25 |
26 | self.globalMean = 0
27 | self.timestamp = {}
28 | # self.trainingMatrix = None
29 | # self.validationMatrix = None
30 | self.testSet_u = testData.copy() # used to store the test set by hierarchy user:[item,rating]
31 | self.testSet_i = defaultdict(dict) # used to store the test set by hierarchy item:[user,rating]
32 | self.trainingSet_u = trainingData.copy()
33 | self.trainingSet_i = defaultdict(dict)
34 | # self.rScale = []
35 |
36 | self.trainingData = trainingData
37 | self.testData = testData
38 | self.__generateSet()
39 | self.__computeItemMean()
40 | self.__computeUserMean()
41 | self.__globalAverage()
42 |
43 | def __generateSet(self):
44 | scale = set()
45 | # find the maximum rating and minimum value
46 | # for i, entry in enumerate(self.trainingData):
47 | # userName, itemName, rating = entry
48 | # scale.add(float(rating))
49 | # self.rScale = list(scale)
50 | # self.rScale.sort()
51 |
52 | for i, user in enumerate(self.trainingData):
53 | for item in self.trainingData[user]:
54 |
55 | # makes the rating within the range [0, 1].
56 | # rating = normalize(float(rating), self.rScale[-1], self.rScale[0])
57 | # self.trainingSet_u[userName][itemName] = float(rating)
58 | self.trainingSet_i[item][user] = self.trainingData[user][item]
59 | # order the user
60 | # if not self.user.has_key(user):
61 | if user not in self.user:
62 | self.user[user] = len(self.user)
63 | self.id2user[self.user[user]] = user
64 | # order the item
65 | # if not self.item.has_key(item):
66 | if item not in self.item:
67 | self.item[item] = len(self.item)
68 | self.id2item[self.item[item]] = item
69 | self.trainingSet_i[item][user] = self.trainingData[user][item]
70 | # userList.append
71 | # triple.append([self.user[userName], self.item[itemName], rating])
72 | # self.trainingMatrix = new_sparseMatrix.SparseMatrix(triple)
73 |
74 | self.all_User.update(self.user)
75 | self.all_Item.update(self.item)
76 |
77 | for i, user in enumerate(self.testData):
78 | # order the user
79 | # if not self.user.has_key(user):
80 | if user not in self.user:
81 | self.all_User[user] = len(self.all_User)
82 | for item in self.testData[user]:
83 | # order the item
84 | # if not self.item.has_key(item):
85 | if item not in self.item:
86 | self.all_Item[item] = len(self.all_Item)
87 | # self.testSet_u[userName][itemName] = float(rating)
88 | self.testSet_i[item][user] = self.testData[user][item]
89 |
90 | def __globalAverage(self):
91 | total = sum(self.userMeans.values())
92 | if total == 0:
93 | self.globalMean = 0
94 | else:
95 | self.globalMean = total / len(self.userMeans)
96 |
97 | def __computeUserMean(self):
98 | # for u in self.user:
99 | # n = self.row(u) > 0
100 | # mean = 0
101 | #
102 | # if not self.containsUser(u): # no data about current user in training set
103 | # pass
104 | # else:
105 | # sum = float(self.row(u)[0].sum())
106 | # try:
107 | # mean = sum/ n[0].sum()
108 | # except ZeroDivisionError:
109 | # mean = 0
110 | # self.userMeans[u] = mean
111 | for u in self.trainingSet_u:
112 | self.userMeans[u] = sum(self.trainingSet_u[u].values()) / (len(self.trainingSet_u[u].values()) + 0.0)
113 | for u in self.testSet_u:
114 | self.userMeans[u] = sum(self.testSet_u[u].values()) / (len(self.testSet_u[u].values()) + 0.0)
115 |
116 | def __computeItemMean(self):
117 | # for c in self.item:
118 | # n = self.col(c) > 0
119 | # mean = 0
120 | # if not self.containsItem(c): # no data about current user in training set
121 | # pass
122 | # else:
123 | # sum = float(self.col(c)[0].sum())
124 | # try:
125 | # mean = sum / n[0].sum()
126 | # except ZeroDivisionError:
127 | # mean = 0
128 | # self.itemMeans[c] = mean
129 | for item in self.trainingSet_i:
130 | self.itemMeans[item] = sum(self.trainingSet_i[item].values()) / (
131 | len(self.trainingSet_i[item].values()) + 0.0)
132 | for item in self.testSet_i:
133 | self.itemMeans[item] = sum(self.testSet_i[item].values()) / (len(self.testSet_i[item].values()) + 0.0)
134 |
135 | def getUserId(self, u):
136 | if self.user.has_key(u):
137 | return self.user[u]
138 | else:
139 | return -1
140 |
141 | def getItemId(self, i):
142 | if self.item.has_key(i):
143 | return self.item[i]
144 | else:
145 | return -1
146 |
147 | def trainingSize(self):
148 | recordCount = 0
149 | for user in self.trainingData:
150 | recordCount += len(self.trainingData[user])
151 | return (len(self.trainingSet_u), len(self.trainingSet_i), recordCount)
152 |
153 | def testSize(self):
154 | recordCount = 0
155 | for user in self.testData:
156 | recordCount += len(self.testData[user])
157 | return (len(self.testSet_u), len(self.testSet_i), recordCount)
158 |
159 | def contains(self, u, i):
160 | 'whether user u rated item i'
161 | if self.trainingSet_u.has_key(u) and self.trainingSet_u[u].has_key(i):
162 | return True
163 | return False
164 |
165 | def containsUser(self, u):
166 | 'whether user is in training set'
167 | return self.trainingSet_u.has_key(u)
168 |
169 | def containsItem(self, i):
170 | 'whether item is in training set'
171 | return self.trainingSet_i.has_key(i)
172 |
173 | def allUserRated(self, u):
174 | if u in self.user:
175 | return self.trainingSet_u[u].keys(), self.trainingSet_u[u].values()
176 | else:
177 | return self.testSet_u[u].keys(), self.testSet_u[u].values()
178 | # def userRated(self,u):
179 | # if self.trainingMatrix.matrix_User.has_key(self.getUserId(u)):
180 | # itemIndex = self.trainingMatrix.matrix_User[self.user[u]].keys()
181 | # rating = self.trainingMatrix.matrix_User[self.user[u]].values()
182 | # return (itemIndex,rating)
183 | # return ([],[])
184 | #
185 | # def itemRated(self,i):
186 | # if self.trainingMatrix.matrix_Item.has_key(self.getItemId(i)):
187 | # userIndex = self.trainingMatrix.matrix_Item[self.item[i]].keys()
188 | # rating = self.trainingMatrix.matrix_Item[self.item[i]].values()
189 | # return (userIndex,rating)
190 | # return ([],[])
191 |
192 | # def row(self,u):
193 | # return self.trainingMatrix.row(self.getUserId(u))
194 | #
195 | # def col(self,c):
196 | # return self.trainingMatrix.col(self.getItemId(c))
197 | #
198 | # def sRow(self,u):
199 | # return self.trainingMatrix.sRow(self.getUserId(u))
200 | #
201 | # def sCol(self,c):
202 | # return self.trainingMatrix.sCol(self.getItemId(c))
203 | #
204 | # def rating(self,u,c):
205 | # return self.trainingMatrix.elem(self.getUserId(u),self.getItemId(c))
206 | #
207 | # def ratingScale(self):
208 | # return (self.rScale[0],self.rScale[1])
209 |
210 | # def elemCount(self):
211 | # return self.trainingMatrix.elemCount()
212 |
--------------------------------------------------------------------------------
/AUSH/test_main/main_eval_similarity_foryangqian.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/25 19:38
3 | # @Author : chensi
4 | # @File : main_eval_similarity.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import numpy as np
9 | from numpy.linalg import *
10 | import scipy.stats
11 | import sys, os, argparse
12 | import pandas as pd
13 |
14 | sys.path.append("../")
15 | from test_main.main_baseline_attack import baseline_attack
16 | from test_main.main_gan_attack import gan_attack
17 | from test_main.main_gan_attack_baseline import gan_attack as gan_attack_baseline
18 | from utils.load_data.load_data import *
19 | from utils.load_data.load_attack_info import load_attack_info
20 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
21 |
22 |
23 | def eval_eigen_value(profiles):
24 | U_T_U = np.dot(profiles.transpose(), profiles)
25 | eig_val, _ = eig(U_T_U)
26 | top_10 = [i.real for i in eig_val[:10]]
27 | return top_10
28 |
29 |
30 | def get_item_distribution(profiles):
31 | # [min(max(0, round(i)), 5) for i in a]
32 | profiles_T = profiles.transpose()
33 | fn_count = lambda item_vec: np.array(
34 | [sum([1 if (min(max(0, round(j)), 5) == i) else 0 for j in item_vec]) for i in range(6)])
35 | fn_norm = lambda item_vec: item_vec / sum(item_vec)
36 | item_distribution = np.array(list(map(fn_count, profiles_T)))
37 | item_distribution = np.array(list(map(fn_norm, item_distribution)))
38 | return item_distribution
39 |
40 |
41 | def eval_TVD_JS(P, Q):
42 | # TVD
43 | dis_TVD = np.mean(np.sum(np.abs(P - Q) / 2, 1))
44 | # JS
45 | fn_KL = lambda p, q: scipy.stats.entropy(p, q)
46 | M = (P + Q) / 2
47 | js_vec = []
48 | for iid in range(P.shape[0]):
49 | p, q, m = P[iid], Q[iid], M[iid]
50 | js_vec.append((fn_KL(p, m) + fn_KL(q, m)) / 2)
51 | dis_JS = np.mean(np.array(js_vec))
52 | return dis_TVD, dis_JS
53 |
54 |
55 | def print_eigen_result(real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods):
56 | top_10_res = []
57 | top_10_real = eval_eigen_value(real_profiles)
58 | top_10_res.append("real\t" + '\t'.join(map(str, top_10_real)))
59 | top_10_baseline = []
60 | for idx in range(len(baseline_fake_profiles)):
61 | top_10_baseline.append(eval_eigen_value(baseline_fake_profiles[idx]))
62 | top_10_res.append(baseline_methods[idx] + "\t" + '\t'.join(map(str, top_10_baseline[-1])))
63 | top_10_gan = eval_eigen_value(fake_profiles_gan)
64 | # top_10_sample_5 = eval_eigen_value(fake_profiles_sample_5)
65 | # top_10_real_sample = eval_eigen_value(real_profiles_gan)
66 | top_10_res.append("gan\t" + '\t'.join(map(str, top_10_gan)))
67 | # top_10_res.append("sample_5\t" + '\t'.join(map(str, top_10_sample_5)))
68 | # top_10_res.append("real_sample\t" + '\t'.join(map(str, top_10_real_sample)))
69 | print("\n".join(top_10_res))
70 |
71 |
72 | def get_distance_result(target_id, real_profiles, fake_profiles_list, method_name):
73 | k = ['target_id', 'attack_method', 'dis_TVD', 'dis_JS']
74 | v = [[], [], [], []]
75 | res_dis = []
76 | real_item_distribution = get_item_distribution(real_profiles)
77 | for idx in range(len(fake_profiles_list)):
78 | dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, get_item_distribution(fake_profiles_list[idx]))
79 | v[1] += [method_name[idx]]
80 | v[2] += [dis_TVD]
81 | v[3] += [dis_JS]
82 | v[0] = [target_id] * len(v[1])
83 | result = pd.DataFrame(dict(zip(k, v)))
84 | return result
85 |
86 |
87 | def profiles_generator(target_id, dataset_class, attack_info, bandwagon_selected, sample_num, args, real_profiles,
88 | filler_indicator, pre_fix, has_G=False):
89 | # baseline fake profiles
90 | baseline_methods = ["segment", "average", "random", "bandwagon"]
91 | baseline_fake_profiles = []
92 | for attack_method in baseline_methods:
93 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
94 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
95 | bandwagon_selected, filler_indicator)
96 | baseline_fake_profiles.append(fake_profiles)
97 |
98 | for attack_method in baseline_methods:
99 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
100 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
101 | bandwagon_selected, None)
102 | baseline_fake_profiles.append(fake_profiles)
103 | baseline_methods = baseline_methods + [i + '_rand' for i in baseline_methods]
104 |
105 | final_attack_setting = [sample_num, real_profiles, filler_indicator]
106 | # new_baseline
107 | if has_G:
108 | for attack_method in ['G0' + pre_fix, 'G1' + pre_fix]:
109 | baseline_methods.append(attack_method)
110 | fake_profiles_G, _, _ = gan_attack_baseline(args.dataset, attack_method, target_id, False, 0,
111 | final_attack_setting=final_attack_setting)
112 | baseline_fake_profiles.append(fake_profiles_G)
113 |
114 | # gan profiles
115 | attack_method = "gan" + pre_fix
116 | fake_profiles_gan, _, _ = gan_attack(args.dataset, attack_method, target_id, False, write_to_file=0,
117 | final_attack_setting=final_attack_setting)
118 | return fake_profiles_gan, baseline_fake_profiles, baseline_methods
119 |
120 |
121 | def parse_arg():
122 | parser = argparse.ArgumentParser()
123 |
124 | parser.add_argument('--dataset', type=str, default='ml100k',
125 | help='input data_set_name,filmTrust or ml100k grocery')
126 |
127 | parser.add_argument('--attack_num', type=int, default=50,
128 | help='num of attack fake user,50 for ml100k and filmTrust')
129 |
130 | parser.add_argument('--filler_num', type=int, default=90,
131 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
132 | # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5
133 | # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5
134 | parser.add_argument('--targets', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373',
135 | help='attack_targets')
136 | parser.add_argument('--bandwagon_selected', type=str, default='180,99,49',
137 | help='180,99,49 for ml100k,103,98,115 for filmTrust')
138 | #
139 | args = parser.parse_args()
140 | #
141 | args.targets = list(map(int, args.targets.split(',')))
142 | args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(',')))
143 | return args
144 |
145 |
146 | if __name__ == '__main__':
147 | """
148 | step1 - load data
149 | step2 -
150 | step3 -
151 | """
152 |
153 | #
154 | """parse args"""
155 | args = parse_arg()
156 | pre_fix = '_' + str(args.attack_num) + '_' + str(args.filler_num)
157 |
158 | """step1 - load data"""
159 | path_train = "../data/data/" + args.dataset + "_train.dat"
160 | path_test = "../data/data/" + args.dataset + "_test.dat"
161 | attack_info_path = ["../data/data/" + args.dataset + "_selected_items",
162 | "../data/data/" + args.dataset + "_target_users"]
163 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
164 | sep='\t', print_log=False)
165 | attack_info = load_attack_info(*attack_info_path)
166 |
167 | sample_num = dataset_class.n_users
168 | result = None
169 | for target_id in args.targets:
170 | selected = attack_info[target_id][0]
171 | """step2.1 - real_profiles"""
172 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
173 | selected_id_list=selected, filler_num=args.filler_num,
174 | attack_num=args.attack_num, filler_method=0)
175 | _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no',
176 | final_attack_setting=[sample_num, None, None])
177 | """step2.2 - """
178 |
179 | dir = None
180 | fake_profiles_list = []
181 | method_list = []
182 | for attack_method in ['IAutoRec', 'UAutoRec', 'NNMF', 'NMF_25']:
183 | path_dcgan = dir + 'D-%s-ml100k\\ml100k_%d_dcgan_50_90.dat' % (attack_method, target_id)
184 | dataset_class_dcgan = load_data(path_train=path_dcgan, path_test=path_test,
185 | header=['user_id', 'item_id', 'rating'],
186 | sep='\t', print_log=False)
187 | fake_profiles_ = dataset_class_dcgan.train_matrix.toarray()[dataset_class.n_users:]
188 | while fake_profiles_.shape[0] < dataset_class.n_users:
189 | fake_profiles_ = np.concatenate([fake_profiles_, fake_profiles_])
190 | fake_profiles_ = fake_profiles_[:dataset_class.n_users]
191 |
192 | path_wgan = dir + 'W-%s-ml100k\\ml100k_%d_wgan_50_90.dat' % (attack_method, target_id)
193 | dataset_class_dcgan = load_data(path_train=path_dcgan, path_test=path_test,
194 | header=['user_id', 'item_id', 'rating'],
195 | sep='\t', print_log=False)
196 | fake_profiles_w = dataset_class_dcgan.train_matrix.toarray()[dataset_class.n_users:]
197 | while fake_profiles_w.shape[0] < dataset_class.n_users:
198 | fake_profiles_w = np.concatenate([fake_profiles_w, fake_profiles_w])
199 | fake_profiles_w = fake_profiles_w[:dataset_class.n_users]
200 | #
201 | fake_profiles_list += [fake_profiles_, fake_profiles_w]
202 | method_list += ['dcgan', 'wgan']
203 | """step3 """
204 | result_ = get_distance_result(target_id, real_profiles, fake_profiles_list, method_list)
205 | result = result_ if result is None else pd.concat([result, result_])
206 | print(result)
207 | result.groupby('attack_method').mean().to_excel(args.dataset + '_distance_new.xls', index=False)
208 |
--------------------------------------------------------------------------------
/AUSH/test_main/main_eval_similarity.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 2019/8/25 19:38
3 | # @Author : chensi
4 | # @File : main_eval_similarity.py
5 | # @Software : PyCharm
6 | # @Desciption : None
7 |
8 | import numpy as np
9 | from numpy.linalg import *
10 | import scipy.stats
11 | import sys, os, argparse
12 | import pandas as pd
13 |
14 | sys.path.append("../")
15 | from test_main.main_baseline_attack import baseline_attack
16 | from test_main.main_gan_attack import gan_attack
17 | from test_main.main_gan_attack_baseline import gan_attack as gan_attack_baseline
18 | from utils.load_data.load_data import *
19 | from utils.load_data.load_attack_info import load_attack_info
20 | from model.attack_model.gan_attack.trainer import Train_GAN_Attacker
21 |
22 |
23 | def eval_eigen_value(profiles):
24 | U_T_U = np.dot(profiles.transpose(), profiles)
25 | eig_val, _ = eig(U_T_U)
26 | top_10 = [i.real for i in eig_val[:10]]
27 | return top_10
28 |
29 |
30 | def get_item_distribution(profiles):
31 | # [min(max(0, round(i)), 5) for i in a]
32 | profiles_T = profiles.transpose()
33 | fn_count = lambda item_vec: np.array(
34 | [sum([1 if (min(max(0, round(j)), 5) == i) else 0 for j in item_vec]) for i in range(6)])
35 | fn_norm = lambda item_vec: item_vec / sum(item_vec)
36 | item_distribution = np.array(list(map(fn_count, profiles_T)))
37 | item_distribution = np.array(list(map(fn_norm, item_distribution)))
38 | return item_distribution
39 |
40 |
41 | def eval_TVD_JS(P, Q):
42 | # TVD
43 | dis_TVD = np.mean(np.sum(np.abs(P - Q) / 2, 1))
44 | # JS
45 | fn_KL = lambda p, q: scipy.stats.entropy(p, q)
46 | M = (P + Q) / 2
47 | js_vec = []
48 | for iid in range(P.shape[0]):
49 | p, q, m = P[iid], Q[iid], M[iid]
50 | js_vec.append((fn_KL(p, m) + fn_KL(q, m)) / 2)
51 | dis_JS = np.mean(np.array(js_vec))
52 | return dis_TVD, dis_JS
53 |
54 |
55 | def print_eigen_result(real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods):
56 | top_10_res = []
57 | top_10_real = eval_eigen_value(real_profiles)
58 | top_10_res.append("real\t" + '\t'.join(map(str, top_10_real)))
59 | top_10_baseline = []
60 | for idx in range(len(baseline_fake_profiles)):
61 | top_10_baseline.append(eval_eigen_value(baseline_fake_profiles[idx]))
62 | top_10_res.append(baseline_methods[idx] + "\t" + '\t'.join(map(str, top_10_baseline[-1])))
63 | top_10_gan = eval_eigen_value(fake_profiles_gan)
64 | # top_10_sample_5 = eval_eigen_value(fake_profiles_sample_5)
65 | # top_10_real_sample = eval_eigen_value(real_profiles_gan)
66 | top_10_res.append("gan\t" + '\t'.join(map(str, top_10_gan)))
67 | # top_10_res.append("sample_5\t" + '\t'.join(map(str, top_10_sample_5)))
68 | # top_10_res.append("real_sample\t" + '\t'.join(map(str, top_10_real_sample)))
69 | print("\n".join(top_10_res))
70 |
71 |
72 | def get_distance_result(target_id, real_profiles, fake_profiles_gan, baseline_fake_profiles, baseline_methods):
73 | k = ['target_id', 'attack_method', 'dis_TVD', 'dis_JS']
74 | v = [[], [], [], []]
75 | res_dis = []
76 | real_item_distribution = get_item_distribution(real_profiles)
77 | # real_gan_item_distribution = get_item_distribution(real_profiles_gan)
78 | fake_gan_distribution = get_item_distribution(fake_profiles_gan)
79 | # fake_sample_5_distribution = get_item_distribution(fake_profiles_sample_5)
80 | # dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, real_gan_item_distribution)
81 | # res_dis.append('\t'.join(map(str, ["real", "real_gan", dis_TVD, dis_JS])))
82 | # dis_TVD, dis_JS = eval_TVD_JS(real_gan_item_distribution, fake_gan_distribution)
83 | # res_dis.append('\t'.join(map(str, ["real_gan", "gan", dis_TVD, dis_JS])))
84 | # dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, fake_sample_5_distribution)
85 | # res_dis.append('\t'.join(map(str, ["real", "sample_5", dis_TVD, dis_JS])))
86 | # dis_TVD, dis_JS = eval_TVD_JS(real_gan_item_distribution, fake_sample_5_distribution)
87 | # res_dis.append('\t'.join(map(str, ["real_gan", "sample_5", dis_TVD, dis_JS])))
88 | dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, fake_gan_distribution)
89 | v[1] += ['gan']
90 | v[2] += [dis_TVD]
91 | v[3] += [dis_JS]
92 | # res_dis.append('\t'.join(map(str, [target_id, "gan", dis_TVD, dis_JS])))
93 | for idx in range(len(baseline_fake_profiles)):
94 | dis_TVD, dis_JS = eval_TVD_JS(real_item_distribution, get_item_distribution(baseline_fake_profiles[idx]))
95 | v[1] += [baseline_methods[idx]]
96 | v[2] += [dis_TVD]
97 | v[3] += [dis_JS]
98 | # res_dis.append('\t'.join(map(str, [target_id, baseline_methods[idx], dis_TVD, dis_JS])))
99 | v[0] = [target_id] * len(v[1])
100 | result = pd.DataFrame(dict(zip(k, v)))
101 | # print('\n'.join(res_dis))
102 | return result
103 |
104 |
105 | def profiles_generator(target_id, dataset_class, attack_info, bandwagon_selected, sample_num, args, real_profiles,
106 | filler_indicator, pre_fix, has_G=False):
107 | # baseline fake profiles
108 | baseline_methods = ["segment", "average", "random", "bandwagon"]
109 | baseline_fake_profiles = []
110 | for attack_method in baseline_methods:
111 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
112 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
113 | bandwagon_selected, filler_indicator)
114 | baseline_fake_profiles.append(fake_profiles)
115 |
116 | for attack_method in baseline_methods:
117 | attack_model = '_'.join([attack_method, str(sample_num), str(args.filler_num)])
118 | fake_profiles = baseline_attack(dataset_class, attack_info, attack_model, target_id,
119 | bandwagon_selected, None)
120 | baseline_fake_profiles.append(fake_profiles)
121 | baseline_methods = baseline_methods + [i + '_rand' for i in baseline_methods]
122 |
123 | final_attack_setting = [sample_num, real_profiles, filler_indicator]
124 | # new_baseline
125 | if has_G:
126 | for attack_method in ['G0' + pre_fix, 'G1' + pre_fix]:
127 | baseline_methods.append(attack_method)
128 | fake_profiles_G, _, _ = gan_attack_baseline(args.dataset, attack_method, target_id, False, 0,
129 | final_attack_setting=final_attack_setting)
130 | baseline_fake_profiles.append(fake_profiles_G)
131 |
132 | # gan profiles
133 | attack_method = "gan" + pre_fix
134 | fake_profiles_gan, _, _ = gan_attack(args.dataset, attack_method, target_id, False, write_to_file=0,
135 | final_attack_setting=final_attack_setting)
136 | return fake_profiles_gan, baseline_fake_profiles, baseline_methods
137 |
138 |
139 | def parse_arg():
140 | parser = argparse.ArgumentParser()
141 |
142 | parser.add_argument('--dataset', type=str, default='ml100k',
143 | help='input data_set_name,filmTrust or ml100k grocery')
144 |
145 | parser.add_argument('--attack_num', type=int, default=50,
146 | help='num of attack fake user,50 for ml100k and filmTrust')
147 |
148 | parser.add_argument('--filler_num', type=int, default=90,
149 | help='num of filler items each fake user,90 for ml100k,36 for filmTrust')
150 | # filmTrust:5,395,181,565,254,601,623,619,64,558 - random*5+tail*5
151 | # ml100k:62,1077,785,1419,1257,1319,1612,1509,1545,1373 - random*5+tail*5
152 | parser.add_argument('--targets', type=str, default='62,1077,785,1419,1257,1319,1612,1509,1545,1373', help='attack_targets')
153 | parser.add_argument('--bandwagon_selected', type=str, default='180,99,49',
154 | help='180,99,49 for ml100k,103,98,115 for filmTrust')
155 | #
156 | args = parser.parse_args()
157 | #
158 | args.targets = list(map(int, args.targets.split(',')))
159 | args.bandwagon_selected = list(map(int, args.bandwagon_selected.split(',')))
160 | return args
161 |
162 |
163 | if __name__ == '__main__':
164 | """
165 | step1 - load data
166 | step2 -
167 | step3 -
168 | """
169 |
170 | #
171 | """parse args"""
172 | args = parse_arg()
173 | pre_fix = '_' + str(args.attack_num) + '_' + str(args.filler_num)
174 |
175 | """step1 - load data"""
176 | path_train = "../data/data/" + args.dataset + "_train.dat"
177 | path_test = "../data/data/" + args.dataset + "_test.dat"
178 | attack_info_path = ["../data/data/" + args.dataset + "_selected_items",
179 | "../data/data/" + args.dataset + "_target_users"]
180 | dataset_class = load_data(path_train=path_train, path_test=path_test, header=['user_id', 'item_id', 'rating'],
181 | sep='\t', print_log=False)
182 | attack_info = load_attack_info(*attack_info_path)
183 |
184 | sample_num = dataset_class.n_users
185 | result = None
186 | for target_id in args.targets:
187 | selected = attack_info[target_id][0]
188 |
189 | attackSetting_path = '_'.join(map(str, [args.dataset, sample_num, args.filler_num, target_id]))
190 | attackSetting_path = "../data/data_attacked/" + attackSetting_path + '_attackSetting'
191 | gan_attacker = Train_GAN_Attacker(dataset_class, params_D=None, params_G=None, target_id=target_id,
192 | selected_id_list=selected, filler_num=args.filler_num,
193 | attack_num=args.attack_num, filler_method=0)
194 | _, real_profiles, filler_indicator = gan_attacker.execute(is_train=0, model_path='no',
195 | final_attack_setting=[sample_num, None, None])
196 | np.save(attackSetting_path, [real_profiles, filler_indicator])
197 |
198 | fake_profiles_gan, baseline_fake_profiles, baseline_methods \
199 | = profiles_generator(target_id, dataset_class, attack_info, args.bandwagon_selected, sample_num, args,
200 | real_profiles, filler_indicator, pre_fix, has_G=True)
201 |
202 |
203 | # result_ = get_distance_result(target_id, real_profiles, fake_profiles_gan, baseline_fake_profiles,
204 | # baseline_methods)
205 | result_ = get_distance_result(target_id, dataset_class.train_matrix.toarray(), fake_profiles_gan,
206 | baseline_fake_profiles,
207 | baseline_methods)
208 |
209 | result = result_ if result is None else pd.concat([result, result_])
210 | print(result)
211 | result.to_excel(args.dataset + '_distance_lianyun.xls', index=False)
212 |
--------------------------------------------------------------------------------
/AUSH/test_main/WGAN_yangqian.py:
--------------------------------------------------------------------------------
1 | import math
2 | import numpy as np
3 | import tensorflow as tf
4 | import sys
5 | import math
6 | sys.path.append("../")
7 | from tensorflow.python.framework import ops
8 | from six.moves import xrange
9 | from utils.load_data.load_data import load_data
10 | from utils.load_data.load_attack_info import load_attack_info
11 | import utils as ut
12 |
13 |
14 | if "concat_v2" in dir(tf):
15 | def concat(tensors, axis, *args, **kwargs):
16 | return tf.concat_v2(tensors, axis, *args, **kwargs)
17 | else:
18 | def concat(tensors, axis, *args, **kwargs):
19 | return tf.concat(tensors, axis, *args, **kwargs)
20 |
21 |
22 | class batch_norm(object):
23 | def __init__(self, epsilon=1e-5, momentum=0.9, name="batch_norm"):
24 | with tf.variable_scope(name):
25 | self.epsilon = epsilon
26 | self.momentum = momentum
27 | self.name = name
28 |
29 | def __call__(self, x, train=True):
30 | return tf.contrib.layers.batch_norm(x,
31 | decay=self.momentum,
32 | updates_collections=None,
33 | epsilon=self.epsilon,
34 | scale=True,
35 | is_training=train,
36 | scope=self.name)
37 |
38 |
39 | def conv_cond_concat(x, y):
40 | """Concatenate conditioning vector on feature map axis."""
41 | x_shapes = x.get_shape()
42 | y_shapes = y.get_shape()
43 | return concat([
44 | x, y * tf.ones([x_shapes[0], x_shapes[1], x_shapes[2], y_shapes[3]])], 3)
45 |
46 |
47 | def conv2d(input_, output_dim,
48 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
49 | name="conv2d"):
50 | with tf.variable_scope(name):
51 | w = tf.get_variable('w', [k_h, k_w, input_.get_shape()[-1], output_dim],
52 | initializer=tf.truncated_normal_initializer(stddev=stddev))
53 | conv = tf.nn.conv2d(input_, w, strides=[1, d_h, d_w, 1], padding='SAME')
54 |
55 | biases = tf.get_variable('biases', [output_dim], initializer=tf.constant_initializer(0.0))
56 | conv = tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape())
57 |
58 | return conv
59 |
60 |
61 | # kernel_size = 5 * 5
62 | def deconv2d(input_, output_shape,
63 | k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02,
64 | name="deconv2d", with_w=False):
65 | with tf.variable_scope(name):
66 | # filter : [height, width, output_channels, in_channels]
67 | w = tf.get_variable('w', [k_h, k_w, output_shape[-1], input_.get_shape()[-1]],
68 | initializer=tf.random_normal_initializer(stddev=stddev))
69 |
70 | try:
71 | deconv = tf.nn.conv2d_transpose(input_, w, output_shape=output_shape,
72 | strides=[1, d_h, d_w, 1])
73 |
74 | # Support for verisons of TensorFlow before 0.7.0
75 | except AttributeError:
76 | deconv = tf.nn.deconv2d(input_, w, output_shape=output_shape,
77 | strides=[1, d_h, d_w, 1])
78 |
79 | biases = tf.get_variable('biases', [output_shape[-1]], initializer=tf.constant_initializer(0.0))
80 | deconv = tf.reshape(tf.nn.bias_add(deconv, biases), deconv.get_shape())
81 |
82 | if with_w:
83 | return deconv, w, biases
84 | else:
85 | return deconv
86 |
87 |
88 | def lrelu(x, leak=0.2, name="lrelu"):
89 | return tf.maximum(x, leak * x)
90 |
91 |
92 | def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
93 | shape = input_.get_shape().as_list()
94 |
95 | with tf.variable_scope(scope or "Linear"):
96 | try:
97 | matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
98 | tf.random_normal_initializer(stddev=stddev))
99 | except ValueError as err:
100 | msg = "NOTE: Usually, this is due to an issue with the image dimensions. Did you correctly set '--crop' or '--input_height' or '--output_height'?"
101 | err.args = err.args + (msg,)
102 | raise
103 | bias = tf.get_variable("bias", [output_size],
104 | initializer=tf.constant_initializer(bias_start))
105 | if with_w:
106 | return tf.matmul(input_, matrix) + bias, matrix, bias
107 | else:
108 | return tf.matmul(input_, matrix) + bias
109 |
110 |
111 | def conv_out_size_same(size, stride):
112 | return int(math.ceil(float(size) / float(stride)))
113 |
114 |
115 | def gen_random(size):
116 | # z - N(0,100)
117 | return np.random.normal(0, 100, size=size)
118 |
119 |
120 | class WGAN(object):
121 | def __init__(self, sess, dataset_class,batch_size=64, height=29, width=58, z_dim=100, gf_dim=64, df_dim=64,
122 | gfc_dim=1024, dfc_dim=1024, max_to_keep=1):
123 | self.sess = sess
124 | self.dataset_class = dataset_class
125 | self.batch_size = batch_size
126 |
127 | self.height = height
128 | self.width = width
129 | self.z_dim = z_dim
130 | self.gf_dim = gf_dim
131 | self.df_dim = df_dim
132 | self.gfc_dim = gfc_dim
133 | self.dfc_dim = dfc_dim
134 | # batch normalization : deals with poor initialization helps gradient flow
135 | self.d_bn1 = batch_norm(name='d_bn1')
136 | self.d_bn2 = batch_norm(name='d_bn2')
137 | self.d_bn3 = batch_norm(name='d_bn3')
138 | self.g_bn0 = batch_norm(name='g_bn0')
139 | self.g_bn1 = batch_norm(name='g_bn1')
140 | self.g_bn2 = batch_norm(name='g_bn2')
141 | self.g_bn3 = batch_norm(name='g_bn3')
142 |
143 | self.max_to_keep = max_to_keep
144 |
145 | self.build_model()
146 |
147 | def build_model(self):
148 | self.inputs = tf.placeholder(tf.float32,
149 | [self.batch_size, self.height, self.width, 1],
150 | name='real_images')
151 | inputs = self.inputs
152 |
153 | self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
154 | self.G = self.generator(self.z)
155 |
156 | self.D, self.D_logits = self.discriminator(inputs, reuse=False)
157 | self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)
158 |
159 | # def _cross_entropy_loss(self, logits, labels):
160 | # xentropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits, labels))
161 | # return xentropy
162 | self.d_loss = tf.reduce_mean(tf.square(self.D_logits - self.D_logits_))
163 | self.g_loss = tf.reduce_mean(tf.square(self.D_logits_))
164 | # self.d_loss_real = tf.reduce_mean(
165 | # _cross_entropy_loss(self.D_logits, tf.ones_like(self.D)))
166 | # self.d_loss_fake = tf.reduce_mean(
167 | # _cross_entropy_loss(self.D_logits_, tf.zeros_like(self.D_)))
168 | #
169 | # self.g_loss = tf.reduce_mean(
170 | # _cross_entropy_loss(self.D_logits_, tf.ones_like(self.D_)))
171 | # self.d_loss = self.d_loss_real + self.d_loss_fake
172 | #
173 | t_vars = tf.trainable_variables()
174 | self.d_vars = [var for var in t_vars if 'd_' in var.name]
175 | self.g_vars = [var for var in t_vars if 'g_' in var.name]
176 |
177 | self.saver = tf.train.Saver(max_to_keep=self.max_to_keep)
178 |
179 | def train(self, config):
180 | d_optim = tf.train.RMSPropOptimizer(config.learning_rate, decay=config.beta1) \
181 | .minimize(self.d_loss, var_list=self.d_vars)
182 | g_optim =tf.train.RMSPropOptimizer(config.learning_rate, decay=config.beta1) \
183 | .minimize(self.g_loss, var_list=self.g_vars)
184 | try:
185 | tf.global_variables_initializer().run()
186 | except:
187 | tf.initialize_all_variables().run()
188 | train_idxs = list(range(self.dataset_class.train_matrix.shape[0]))
189 | for epoch in xrange(config.epoch):
190 | np.random.shuffle(train_idxs)
191 | for i in range(len(train_idxs) // self.batch_size):
192 | cur_idxs = train_idxs[i * self.batch_size:(i + 1) * self.batch_size]
193 | batch_inputs = self.dataset_class.train_matrix[cur_idxs].toarray()
194 | # transform range&shape
195 | batch_inputs = (batch_inputs - 2.5) / 2.5
196 | batch_inputs = np.reshape(batch_inputs, [self.batch_size, self.height, self.width, 1])
197 | # batch_inputs = np.random.random_sample([self.batch_size, self.height, self.width, 1])
198 | batch_z = gen_random(size=[config.batch_size, self.z_dim]).astype(np.float32)
199 |
200 | # Update D network
201 | _ = self.sess.run(d_optim, feed_dict={self.inputs: batch_inputs, self.z: batch_z})
202 |
203 | # Update G network
204 | _ = self.sess.run(g_optim, feed_dict={self.z: batch_z})
205 |
206 | # Run g_optim twice to make sure that d_loss does not go to zero (different from paper)
207 |
208 | errD= self.d_loss.eval({self.inputs: batch_inputs,self.z: batch_z})
209 | # errD_real = self.d_loss_real.eval({self.inputs: batch_inputs})
210 | errG = self.g_loss.eval({self.z: batch_z})
211 |
212 | print("Epoch:[%2d/%2d]d_loss: %.8f, g_loss: %.8f" \
213 | % (epoch, config.epoch, errD, errG))
214 |
215 | def discriminator(self, image, reuse=False):
216 | with tf.variable_scope("discriminator") as scope:
217 | if reuse:
218 | scope.reuse_variables()
219 | # [conv+BN+LeakyRelu[64,128,256,512]]+[FC]+[sigmoid]
220 | h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
221 | h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim * 2, name='d_h1_conv')))
222 | h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim * 4, name='d_h2_conv')))
223 | h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim * 8, name='d_h3_conv')))
224 | h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h4_lin')
225 |
226 | return tf.nn.sigmoid(h4), h4
227 |
228 | def generator(self, z):
229 | with tf.variable_scope("generator") as scope:
230 | s_h, s_w = self.height, self.width
231 | # CONV stride=2
232 | s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
233 | s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
234 | s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
235 | s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)
236 |
237 | # FC of 2*4*512&ReLU&BN
238 | self.z_, self.h0_w, self.h0_b = linear(
239 | z, self.gf_dim * 8 * s_h16 * s_w16, 'g_h0_lin', with_w=True)
240 | self.h0 = tf.reshape(
241 | self.z_, [-1, s_h16, s_w16, self.gf_dim * 8])
242 | h0 = tf.nn.relu(self.g_bn0(self.h0))
243 |
244 | # four transposed CONV of [256,128,64] &ReLU&BN&kernel_size = 5 * 5
245 | self.h1, self.h1_w, self.h1_b = deconv2d(
246 | h0, [self.batch_size, s_h8, s_w8, self.gf_dim * 4], name='g_h1', with_w=True)
247 | h1 = tf.nn.relu(self.g_bn1(self.h1))
248 | h2, self.h2_w, self.h2_b = deconv2d(
249 | h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2], name='g_h2', with_w=True)
250 | h2 = tf.nn.relu(self.g_bn2(h2))
251 | h3, self.h3_w, self.h3_b = deconv2d(
252 | h2, [self.batch_size, s_h2, s_w2, self.gf_dim * 1], name='g_h3', with_w=True)
253 | h3 = tf.nn.relu(self.g_bn3(h3))
254 |
255 | # transposed CONV of [1] &tanh
256 | h4, self.h4_w, self.h4_b = deconv2d(
257 | h3, [self.batch_size, s_h, s_w, 1], name='g_h4', with_w=True)
258 |
259 | return tf.nn.tanh(h4)
--------------------------------------------------------------------------------