├── .gitignore ├── README.md ├── eval ├── eval1.sh ├── eval_1v1.py ├── eval_1vn.py ├── evalijbx.sh └── ijbx_template_feature.py ├── libs ├── ASE │ ├── enrollment.py │ └── gen_sim.py ├── IronMask │ ├── enrollment.py │ └── gen_sim.py ├── SFM │ ├── enrollment.py │ └── gen_sim.py ├── SecureVector │ ├── crypto_system.py │ └── enrollment.py └── baseline │ └── gen_sim.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Mac file 132 | *DS_Store 133 | 134 | # keys 135 | libs/SFM/keys/ 136 | libs/SecureVector/keys/ 137 | 138 | # data 139 | data/ 140 | *.tar 141 | # results 142 | results/ 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## SecureVector 2 | 3 | A official implementation of SecureVector [Towards Privacy-Preserving, Real-Time and Lossless Feature Matching](https://arxiv.org/abs/2208.00214) and involved baselines of template protection. 4 | 5 | 6 | ### Usage 7 | 1. Download data for lfw/cfp/agedb from [Gdrive](https://drive.google.com/file/d/1iwDNUw6e1dOBaTTheBlXjedB67D_ThMj/view?usp=sharing) or [BaiduDrive](https://pan.baidu.com/s/1vk5lV8m-fgIxvQf3q8ophA?pwd=q58b). 8 | 9 | 2. Download IJB from BaiduDrive [part1](https://pan.baidu.com/s/1ykRaraO4PTmyMigoj2qNZA?pwd=zcja) and [part2](https://pan.baidu.com/s/1JJiUIXdB0tsyeY81usO2fw?pwd=nasq). Merge them by command `cat data2a* > data2.tar`. 10 | 11 | 3. Extract them in the root directory. You should have the following structure: 12 | 13 | **Note**: Features are extracted by [MagFace](https://github.com/IrvingMeng/MagFace). Replace the feat.list if you use another model. 14 | ``` 15 | data/ 16 | ├── agedb 17 | │ ├── agedb_feat.list 18 | │ └── pair.list 19 | ├── cfp 20 | │ ├── cfp_feat.list 21 | │ └── pair.list 22 | ├── ijb 23 | │ ├── ijbb_feat.list 24 | │ ├── ijbc_feat.list 25 | │ └── meta 26 | │ ├── ijbb_face_tid_mid.txt 27 | │ ├── ijbb_template_pair_label.txt 28 | │ ├── ijbc_face_tid_mid.txt 29 | │ └── ijbc_template_pair_label.txt 30 | └── lfw 31 | ├── lfw_feat.list 32 | └── pair.list 33 | ``` 34 | 35 | 4. Run evaluations on the face task by: 36 | ``` 37 | # [key] for method 38 | # 0. baseline 39 | # 1. SecureVector [1] 40 | # 2. ase [2] 41 | # 3. ironmask [3] 42 | # 4. sfm [4] 43 | 44 | export key=1 45 | 46 | # LFW/CFP/AgeDB 47 | eval/eval1.sh $key 48 | 49 | # IJB 50 | eval/evalibjx.sh $key 51 | ``` 52 | 53 | 54 | ### References 55 | [1] Qiang Meng, el al, "Towards Privacy-Preserving, Real-Time and Lossless Feature Matching", arXiv 2022. 56 | 57 | [2] Dusmanu, Mihai, et al. "Privacy-preserving image features via adversarial affine subspace embeddings." *Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition.* 2021. 58 | 59 | [3] Kim, Sunpill, et al. "Ironmask: Modular architecture for protecting deep face template." *Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition.* 2021. 60 | 61 | [4] Boddeti, Vishnu Naresh. "Secure face matching using fully homomorphic encryption." *2018 IEEE 9th International Conference on Biometrics Theory, Applications and Systems (BTAS).* IEEE, 2018. 62 | -------------------------------------------------------------------------------- /eval/eval1.sh: -------------------------------------------------------------------------------- 1 | M=$1 2 | 3 | METHOD_LIST=('baseline' 'securevector' 'ase' 'ironmask' 'sfm') 4 | METHOD=${METHOD_LIST[$M]} 5 | 6 | 7 | for BM in 'lfw' #'cfp' 'agedb' 8 | do 9 | FEAT_LIST=data/${BM}/${BM}_feat.list 10 | PAIR_LIST=data/${BM}/pair.list 11 | BASE_FOLD=results/ 12 | 13 | FOLD=${BASE_FOLD}/${METHOD}/${BM} 14 | SCORE_LIST=${FOLD}/score.list 15 | 16 | if [[ $M == 0 ]] 17 | then 18 | # generate similarities 19 | python3 libs/baseline/gen_sim.py --feat_list ${FEAT_LIST} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 20 | 21 | elif [[ $M == 1 ]] 22 | then 23 | KS=512 24 | K=64 25 | # enrollment 26 | if [ ! -f libs/SecureVector/keys/privatekey_{KS} ]; then 27 | echo 'generate paillier keys...' 28 | mkdir libs/SecureVector/keys/ 29 | python libs/SecureVector/crypto_system.py --genkey 1 --key_size ${KS} 30 | fi 31 | python3 libs/SecureVector/enrollment.py --feat_list ${FEAT_LIST} --key_size ${KS} --K ${K} --folder ${FOLD} 32 | # generate similarities 33 | python libs/SecureVector/crypto_system.py --key_size ${KS} --K ${K} --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 34 | 35 | elif [[ $M == 2 ]] 36 | then 37 | ASE_DIM=4 38 | # enrollment 39 | python3 libs/ASE/enrollment.py --feat_list ${FEAT_LIST} --folder ${FOLD} --ase_dim ${ASE_DIM} 40 | # generate similarities 41 | python libs/ASE/gen_sim.py --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 42 | 43 | elif [[ $M == 3 ]] 44 | then 45 | ALPHA=16 46 | # enrollment 47 | python3 libs/IronMask/enrollment.py --feat_list ${FEAT_LIST} --folder ${FOLD} --alpha ${ALPHA} 48 | # generate similarities 49 | python libs/IronMask/gen_sim.py --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} --alpha ${ALPHA} --feat_list ${FEAT_LIST} 50 | 51 | elif [[ $M == 4 ]] 52 | then 53 | PRECISION=125 54 | if [ ! -f libs/SFM/keys/gal_key ]; then 55 | echo 'generate SFM keys...' 56 | mkdir libs/SFM/keys/ 57 | python libs/SFM/gen_sim.py --genkey 1 58 | fi 59 | # enrollment 60 | python3 libs/SFM/enrollment.py --feat_list ${FEAT_LIST} --folder ${FOLD} --precision ${PRECISION} 61 | # generate similarities 62 | python libs/SFM/gen_sim.py --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} --precision ${PRECISION} 63 | 64 | else 65 | echo 'key error' 66 | fi 67 | done 68 | 69 | for BM in 'lfw' 'cfp' 'agedb' 70 | do 71 | echo [${METHOD}]: ${BM} 72 | PAIR_LIST=data/${BM}/pair.list 73 | FOLD=${BASE_FOLD}/${METHOD}/${BM} 74 | SCORE_LIST=${FOLD}/score.list 75 | 76 | # eval for lfw/cfp/agedb 77 | python eval/eval_1v1.py --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 78 | done -------------------------------------------------------------------------------- /eval/eval_1v1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | 1v1 evaluation. 4 | 1. use 10-fold cross validation. 5 | 2. [NOT] use mean substraction. 6 | """ 7 | import math 8 | import argparse 9 | import numpy as np 10 | from sklearn.model_selection import KFold 11 | from sklearn import metrics 12 | from scipy.optimize import brentq 13 | from scipy import interpolate 14 | from joblib import Parallel, delayed 15 | 16 | parser = argparse.ArgumentParser(description='Evaluation') 17 | parser.add_argument('--pair_list', type=str, 18 | help='opensource pair list.') 19 | parser.add_argument('--score_list', type=str, 20 | help='opensource score list.') 21 | 22 | 23 | def distance_(embeddings0, embeddings1): 24 | # Distance based on cosine similarity 25 | dot = np.sum(np.multiply(embeddings0, embeddings1), axis=1) 26 | norm = np.linalg.norm(embeddings0, axis=1) * \ 27 | np.linalg.norm(embeddings1, axis=1) 28 | # shaving 29 | similarity = dot/norm 30 | similarity = np.clip(similarity, -1., 1.) 31 | dist = np.arccos(similarity) / math.pi 32 | return dist 33 | 34 | 35 | def calculate_roc(thresholds, dist, actual_issame, nrof_folds=1): 36 | nrof_pairs = len(actual_issame) 37 | nrof_thresholds = len(thresholds) 38 | 39 | tprs = np.zeros((nrof_folds, nrof_thresholds)) 40 | fprs = np.zeros((nrof_folds, nrof_thresholds)) 41 | accuracy = np.zeros((nrof_folds)) 42 | 43 | # Find the best threshold for the fold 44 | acc_train = np.zeros((nrof_thresholds)) 45 | for threshold_idx, threshold in enumerate(thresholds): 46 | _, _, acc_train[threshold_idx] = calculate_accuracy( 47 | threshold, dist, actual_issame) 48 | best_threshold_index = np.argmax(acc_train) 49 | fold_idx = 0 50 | for threshold_idx, threshold in enumerate(thresholds): 51 | tprs[fold_idx, threshold_idx], fprs[fold_idx, 52 | threshold_idx], _ = calculate_accuracy(threshold, dist, actual_issame) 53 | _, _, accuracy[fold_idx] = calculate_accuracy( 54 | thresholds[best_threshold_index], dist, actual_issame) 55 | 56 | tpr = np.mean(tprs, 0) 57 | fpr = np.mean(fprs, 0) 58 | return tpr, fpr, accuracy 59 | 60 | 61 | def calculate_accuracy(threshold, dist, actual_issame): 62 | predict_issame = np.less(dist, threshold) 63 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 64 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 65 | tn = np.sum(np.logical_and(np.logical_not( 66 | predict_issame), np.logical_not(actual_issame))) 67 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 68 | tpr = 0 if (tp+fn == 0) else float(tp) / float(tp+fn) 69 | fpr = 0 if (fp+tn == 0) else float(fp) / float(fp+tn) 70 | acc = float(tp + tn) / dist.size 71 | return tpr, fpr, acc 72 | 73 | 74 | class LFold: 75 | def __init__(self, n_splits=2, shuffle=False): 76 | self.n_splits = n_splits 77 | if self.n_splits > 1: 78 | self.k_fold = KFold(n_splits=n_splits, shuffle=shuffle) 79 | 80 | def split(self, indices): 81 | if self.n_splits > 1: 82 | return self.k_fold.split(indices) 83 | else: 84 | return [(indices, indices)] 85 | 86 | 87 | def perform_1v1_eval(targets, dists): 88 | targets = np.vstack(targets).reshape(-1,) 89 | dists = np.vstack(dists).reshape(-1,) 90 | 91 | thresholds = np.arange(0, 4, 0.01) 92 | tpr, fpr, accuracy = calculate_roc(thresholds, dists, targets) 93 | print(' Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy))) 94 | resultline = '%2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy)) 95 | 96 | 97 | def load_pair_score(pair_list, score_list): 98 | with open(pair_list, 'r') as f: 99 | pair_lines = f.readlines() 100 | with open(score_list, 'r') as f: 101 | score_lines = f.readlines() 102 | assert len(pair_lines) == len(score_lines) 103 | 104 | # load pair score 105 | targets, dists = [], [] 106 | for i in range(len(pair_lines)): 107 | parts1 = pair_lines[i].strip().split(' ') 108 | parts2 = score_lines[i].strip().split(' ') 109 | assert parts1[0] == parts2[0] 110 | assert parts1[1] == parts2[1] 111 | is_same = int(parts1[2]) 112 | dist = np.arccos(float(parts2[2])) / math.pi 113 | # collect 114 | targets.append(is_same) 115 | dists.append(dist) 116 | return targets, dists 117 | 118 | 119 | def eval(pair_list, score_list): 120 | targets, dists = load_pair_score(pair_list, score_list) 121 | perform_1v1_eval(targets, dists) 122 | 123 | 124 | def main(): 125 | args = parser.parse_args() 126 | eval(args.pair_list, args.score_list) 127 | 128 | 129 | if __name__ == '__main__': 130 | main() 131 | -------------------------------------------------------------------------------- /eval/eval_1vn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | import cv2 5 | import math 6 | import numpy as np 7 | from tqdm import tqdm 8 | from sklearn import metrics 9 | from scipy.optimize import brentq 10 | from scipy import interpolate 11 | from sklearn.metrics import roc_curve, auc 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.utils.data as data 16 | import torch.nn.functional as F 17 | import torch.backends.cudnn as cudnn 18 | parser = argparse.ArgumentParser(description='Evaluation') 19 | parser.add_argument('--pair_list', type=str, 20 | help='opensource pair list.') 21 | parser.add_argument('--score_list', type=str, 22 | help='opensource score list.') 23 | 24 | 25 | def perform_1vn_eval(label, scores): 26 | fpr, tpr, _ = roc_curve(label, scores) 27 | roc_auc = auc(fpr, tpr) 28 | fpr = np.flipud(fpr) 29 | tpr = np.flipud(tpr) 30 | x_labels = [10**-6, 10**-5, 10**-4, 10**-3, 10**-2, 10**-1] 31 | to_print = '' 32 | for fpr_iter in np.arange(len(x_labels)): 33 | _, min_index = min( 34 | list(zip(abs(fpr-x_labels[fpr_iter]), range(len(fpr))))) 35 | 36 | print(' {:0.4f}'.format(tpr[min_index])) 37 | 38 | to_print = to_print + ' {:0.4f}'.format(tpr[min_index]) 39 | 40 | print(to_print) 41 | 42 | 43 | def load_pair_score(pair_list, score_list): 44 | with open(pair_list, 'r') as f: 45 | pair_lines = f.readlines() 46 | with open(score_list, 'r') as f: 47 | score_lines = f.readlines() 48 | assert len(pair_lines) == len(score_lines) 49 | 50 | targets, scores = [], [] 51 | for i in range(len(pair_lines)): 52 | parts1 = pair_lines[i].strip().split(' ') 53 | parts2 = score_lines[i].strip().split(' ') 54 | assert parts1[0] == parts2[0] 55 | assert parts1[1] == parts2[1] 56 | is_same = int(parts1[2]) 57 | score = float(parts2[2]) 58 | targets.append(is_same) 59 | scores.append(score) 60 | return targets, scores 61 | 62 | 63 | def eval(pair_list, score_list): 64 | labels, scores = load_pair_score(pair_list, score_list) 65 | perform_1vn_eval(labels, scores) 66 | 67 | 68 | def main(): 69 | args = parser.parse_args() 70 | eval(args.pair_list, args.score_list) 71 | 72 | 73 | if __name__ == '__main__': 74 | main() 75 | -------------------------------------------------------------------------------- /eval/evalijbx.sh: -------------------------------------------------------------------------------- 1 | M=$1 2 | 3 | METHOD_LIST=('baseline' 'securevector' 'ase' 'ironmask' 'sfm') 4 | METHOD=${METHOD_LIST[$M]} 5 | 6 | # cd ../ 7 | for BM in 'b' 'c' 8 | do 9 | # Convert ijbx feature to id-template feature 10 | IJBX_BASE_FOLD=data/ijb/ 11 | 12 | FEAT_LIST=${IJBX_BASE_FOLD}/ijb${BM}_feat.list 13 | TEMP_FEAT_LIST=${IJBX_BASE_FOLD}/ijb${BM}_template.list 14 | PAIR_LIST=${IJBX_BASE_FOLD}/ijb${BM}.pair.list 15 | 16 | BASE_FOLD=results/ 17 | FOLD=${BASE_FOLD}/${METHOD}/${BM} 18 | SCORE_LIST=${FOLD}/score.list 19 | 20 | if [ ! -f ${TEMP_FEAT_LIST} ]; then 21 | python eval/ijbx_template_feature.py \ 22 | --feat_list ${FEAT_LIST} \ 23 | --base_dir ${IJBX_BASE_FOLD} \ 24 | --type ${BM} \ 25 | --template_feature ${TEMP_FEAT_LIST} \ 26 | --pair_list ${PAIR_LIST} 27 | 28 | fi 29 | # use the converted id-template feature file. 30 | 31 | if [[ $M == 0 ]] 32 | then 33 | # generate similarities 34 | python3 libs/baseline/gen_sim.py --feat_list ${TEMP_FEAT_LIST} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 35 | 36 | elif [[ $M == 1 ]] 37 | then 38 | KS=512 39 | K=64 40 | # enrollment 41 | if [ ! -f libs/SecureVector/keys/privatekey_{KS} ]; then 42 | echo 'generate paillier keys...' 43 | mkdir libs/SecureVector/keys/ 44 | python libs/SecureVector/crypto_system.py --genkey 1 --key_size ${KS} 45 | fi 46 | python3 libs/SecureVector/enrollment.py --feat_list ${TEMP_FEAT_LIST} --key_size ${KS} --K ${K} --folder ${FOLD} 47 | # generate similarities 48 | python libs/SecureVector/crypto_system.py --key_size ${KS} --K ${K} --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 49 | 50 | elif [[ $M == 2 ]] 51 | then 52 | ASE_DIM=4 53 | # enrollment 54 | python3 libs/ASE/enrollment.py --feat_list ${TEMP_FEAT_LIST} --folder ${FOLD} --ase_dim ${ASE_DIM} 55 | # generate similarities 56 | python libs/ASE/gen_sim.py --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 57 | 58 | elif [[ $M == 3 ]] 59 | then 60 | ALPHA=16 61 | # enrollment 62 | python3 libs/IronMask/enrollment.py --feat_list ${TEMP_FEAT_LIST} --folder ${FOLD} --alpha ${ALPHA} 63 | # generate similarities 64 | python libs/IronMask/gen_sim.py --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} --alpha ${ALPHA} --feat_list ${FEAT_LIST} 65 | 66 | elif [[ $M == 4 ]] 67 | then 68 | PRECISION=125 69 | if [ ! -f libs/SFM/keys/gal_key ]; then 70 | echo 'generate SFM keys...' 71 | mkdir libs/SFM/keys/ 72 | python libs/SFM/gen_sim.py --genkey 1 73 | fi 74 | # enrollment 75 | python3 libs/SFM/enrollment.py --feat_list ${FEAT_LIST} --folder ${FOLD} --precision ${PRECISION} 76 | # generate similarities 77 | python libs/SFM/gen_sim.py --folder ${FOLD} --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} --precision ${PRECISION} 78 | 79 | else 80 | echo 'key error' 81 | fi 82 | done 83 | 84 | for BM in 'b' 'c' 85 | do 86 | echo [${METHOD}]: ${BM} 87 | IJBX_BASE_FOLD=data/ijb/ 88 | PAIR_LIST=${IJBX_BASE_FOLD}/ijb${BM}.pair.list 89 | BASE_FOLD=results/ 90 | FOLD=${BASE_FOLD}/${METHOD}/${BM} 91 | SCORE_LIST=${FOLD}/score.list 92 | 93 | python eval/eval_1vn.py --pair_list ${PAIR_LIST} --score_list ${SCORE_LIST} 94 | done -------------------------------------------------------------------------------- /eval/ijbx_template_feature.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Generate feature.list and pair.list for ijbx 3 | 4 | import argparse 5 | import os 6 | import numpy as np 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | # basic args 11 | parser = argparse.ArgumentParser(description='Evaluation') 12 | parser.add_argument('--feat_list', type=str, 13 | help='the ijbx feature list') 14 | parser.add_argument('--base_dir', default='/ssd/irving/data/IJB_release/IJBC/') 15 | parser.add_argument('--type', default='c') 16 | parser.add_argument('--embedding_size', default=512, type=int) 17 | parser.add_argument('--template_feature', type=str, 18 | help='the template feature file') 19 | parser.add_argument('--pair_list', type=str, help='the pair list file') 20 | 21 | 22 | def read_template_media_list(path): 23 | ijb_meta, templates, medias = [], [], [] 24 | with open(path, 'r') as f: 25 | lines = f.readlines() 26 | for line in lines: 27 | parts = line.strip().split(' ') 28 | ijb_meta.append(parts[0]) 29 | templates.append(int(parts[1])) 30 | medias.append(int(parts[2])) 31 | return np.array(templates), np.array(medias) 32 | 33 | 34 | def read_template_pair_list(path): 35 | t1, t2, label = [], [], [] 36 | with open(path, 'r') as f: 37 | lines = f.readlines() 38 | for line in lines: 39 | data = line.strip().split(' ') 40 | t1.append(int(data[0])) 41 | t2.append(int(data[1])) 42 | label.append(int(data[2])) 43 | return np.array(t1), np.array(t2), np.array(label) 44 | 45 | 46 | def read_feats(args): 47 | with open(args.feat_list, 'r') as f: 48 | lines = f.readlines() 49 | img_feats = [] 50 | for line in lines: 51 | data = line.strip().split(' ') 52 | img_feats.append([float(ele) for ele in data[1:1+args.embedding_size]]) 53 | img_feats = np.array(img_feats).astype(np.float32) 54 | return img_feats 55 | 56 | 57 | def image2template_feature(img_feats=None, 58 | templates=None, 59 | medias=None): 60 | # ========================================================== 61 | # 1. face image feature l2 normalization. img_feats:[number_image x feats_dim] 62 | # 2. compute media feature. 63 | # 3. compute template feature. 64 | # ========================================================== 65 | unique_templates = np.unique(templates) 66 | # template_feats = np.zeros((len(unique_templates), img_feats.shape[1])) 67 | template_feats = torch.zeros((len(unique_templates), img_feats.shape[1])) 68 | 69 | for count_template, uqt in enumerate(unique_templates): 70 | (ind_t,) = np.where(templates == uqt) 71 | face_norm_feats = img_feats[ind_t] 72 | 73 | face_medias = medias[ind_t] 74 | unique_medias, unique_media_counts = np.unique( 75 | face_medias, return_counts=True) 76 | media_norm_feats = [] 77 | for u, ct in zip(unique_medias, unique_media_counts): 78 | (ind_m,) = np.where(face_medias == u) 79 | media_norm_feats += [np.mean(face_norm_feats[ind_m], 80 | 0, keepdims=False)] 81 | 82 | # media_norm_feats = np.array(media_norm_feats) 83 | media_norm_feats = torch.tensor(media_norm_feats) 84 | media_norm_feats = F.normalize(media_norm_feats) 85 | # template_feats[count_template] = np.mean(media_norm_feats, 0) 86 | template_feats[count_template] = torch.mean(media_norm_feats, 0) 87 | if count_template % 2000 == 0: 88 | print('Finish Calculating {} template features.'.format(count_template)) 89 | # template_norm_feats = template_feats / np.sqrt(np.sum(template_feats ** 2, -1, keepdims=True)) 90 | return template_feats, unique_templates 91 | 92 | 93 | def gather_pair_features(args): 94 | 95 | templates, medias = read_template_media_list( 96 | '{}/meta/ijb{}_face_tid_mid.txt'.format(args.base_dir, args.type) 97 | ) 98 | p1, p2, label = read_template_pair_list( 99 | '{}/meta/ijb{}_template_pair_label.txt'.format( 100 | args.base_dir, args.type) 101 | ) 102 | img_feats = read_feats(args) 103 | template_feats, unique_templates = image2template_feature(img_feats, 104 | templates, 105 | medias) 106 | 107 | template2id = np.zeros((max(unique_templates)+1), dtype=int) 108 | for count_template, uqt in enumerate(unique_templates): 109 | template2id[uqt] = count_template 110 | 111 | with open(args.template_feature, 'w') as f: 112 | for i, feat in enumerate(template_feats): 113 | featlist = [str(b) for b in feat.tolist()] 114 | f.write('{} {}\n'.format(i, ' '.join(featlist))) 115 | 116 | with open(args.pair_list, 'w') as f: 117 | for i in range(len(p1)): 118 | issame = label[i] 119 | f.write('{} {} {}\n'.format( 120 | template2id[p1[i]], template2id[p2[i]], issame)) 121 | 122 | 123 | def main(): 124 | args = parser.parse_args() 125 | gather_pair_features(args) 126 | 127 | 128 | if __name__ == '__main__': 129 | main() 130 | -------------------------------------------------------------------------------- /libs/ASE/enrollment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import numpy as np 4 | import argparse 5 | import os 6 | import time 7 | import shutil 8 | from numpy.random import default_rng 9 | import resource 10 | 11 | rng = default_rng() 12 | 13 | # parse the args 14 | parser = argparse.ArgumentParser(description='Enrollment in ASE') 15 | parser.add_argument('--feat_list', type=str) 16 | parser.add_argument('--folder', type=str, 17 | help='use to store the keys and encrypted features') 18 | parser.add_argument('--ase_dim', type=int, default=4) 19 | args = parser.parse_args() 20 | 21 | 22 | def load_features(feature_list): 23 | """ 24 | load the features. 25 | index (0,1,2,...), features 26 | """ 27 | features = [] 28 | with open(feature_list, 'r') as f: 29 | lines = f.readlines() 30 | for line in lines: 31 | parts = line.strip().split(' ') 32 | feature = [float(e) for e in parts[1:]] 33 | feature = feature/np.linalg.norm(np.array(feature)) 34 | features.append(feature) 35 | return features 36 | 37 | 38 | def gen_random_basis(n=1, dim=512): 39 | """ 40 | generate n [-1, 1]^dim basis 41 | """ 42 | basis = [] 43 | for i in range(n): 44 | base = rng.choice([-1, 1], size=dim, replace=True) 45 | basis.append(base) 46 | return basis 47 | 48 | 49 | def gen_adversarial_basis(cand, d, n=1): 50 | """ 51 | select n basis from candidates 52 | cand: a list of features 53 | d: the translation vector 54 | """ 55 | total_num = len(cand) 56 | assert total_num > n 57 | 58 | rng = default_rng() 59 | chosen_idxes = rng.choice(total_num, size=n, replace=False) 60 | 61 | return [cand[idx] - d for idx in chosen_idxes] 62 | 63 | 64 | def convert_orth_basis(basis): 65 | """ 66 | generate orthogonal basis 67 | """ 68 | u_list = [] 69 | for base in basis: 70 | proj_base = base 71 | for u in u_list: 72 | proj_base = proj_base - np.dot(proj_base, u)*u 73 | proj_base = proj_base / np.linalg.norm(proj_base) 74 | u_list.append(proj_base) 75 | return u_list 76 | 77 | 78 | def ortho_proj(e, d, basis): 79 | """ 80 | calculate orthogonal projection of e onto d + span(basis) 81 | """ 82 | u_list = convert_orth_basis(basis) 83 | relative_ = e - d 84 | proj_e = d 85 | for u in u_list: 86 | proj_e = proj_e + np.dot(relative_, u) * u 87 | return proj_e 88 | 89 | 90 | def check_valid(basis): 91 | """ 92 | check if all basis are linearly independent. 93 | """ 94 | u_list = [] 95 | for base in basis: 96 | proj_base = base 97 | for u in u_list: 98 | proj_base = proj_base - np.dot(proj_base, u)*u 99 | if np.linalg.norm(proj_base) < 1e-10: 100 | return 0 101 | return 1 102 | 103 | 104 | def generate_subspace(d, dim, ase_dim, adv_features): 105 | """ 106 | generate a subspace on d 107 | """ 108 | # generate source basis 109 | start = time.time() 110 | rand_dim = int(ase_dim/2) 111 | adv_dim = ase_dim - rand_dim 112 | while 1: 113 | basis = gen_random_basis(n=rand_dim, dim=dim) + \ 114 | gen_adversarial_basis(adv_features, d, adv_dim) 115 | if check_valid(basis) == 1: 116 | break 117 | 118 | # permute translation vector 119 | e = gen_random_basis(1, dim=dim) 120 | d_1 = ortho_proj(e, d, basis) 121 | 122 | # permute basis 123 | basis_1 = [] 124 | for _ in range(ase_dim): 125 | e = gen_random_basis(1, dim=dim) 126 | proj_e = ortho_proj(e, d, basis) 127 | base_1 = proj_e - d_1 128 | basis_1.append(base_1) 129 | return [d_1, basis_1], time.time() - start 130 | 131 | 132 | def main(feature_list, folder, ase_dim): 133 | """ 134 | enrollment 135 | """ 136 | # print('loading features...') 137 | features = load_features(feature_list) 138 | n, dim = len(features), len(features[0]) 139 | # L_list = [i for i in range(0, 2*L)] 140 | 141 | print('[ASE] Encrypting features...') 142 | start = time.time() 143 | duration_plain = [] 144 | for i, feature in enumerate(features): 145 | ase_result, duration = generate_subspace( 146 | feature, dim, ase_dim, features) 147 | np.save('{}/{}.npy'.format(folder, i), 148 | np.array(ase_result, np.dtype(object))) 149 | # measure time 150 | duration_plain.append(duration) 151 | if i % 1000 == 0: 152 | print('{}/{}'.format(i, n)) 153 | duration = time.time() - start 154 | print('total duration {}, ase duration {}, encrypted {} features.\n'.format( 155 | duration, sum(duration_plain), n)) 156 | 157 | 158 | if __name__ == '__main__': 159 | if os.path.exists(args.folder): 160 | shutil.rmtree(args.folder) 161 | os.makedirs(args.folder) 162 | 163 | main(args.feat_list, args.folder, args.ase_dim) 164 | -------------------------------------------------------------------------------- /libs/ASE/gen_sim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import math 4 | import numpy as np 5 | import argparse 6 | import os 7 | import time 8 | import random 9 | import resource 10 | from joblib import Parallel, delayed 11 | 12 | # parse the args 13 | parser = argparse.ArgumentParser(description='Match in ASE') 14 | parser.add_argument('--folder', default='', type=str, 15 | help='fold which stores the encrypted features') 16 | parser.add_argument('--pair_list', default='', type=str, help='pair file') 17 | parser.add_argument('--score_list', type=str, 18 | help='a file which stores the scores') 19 | args = parser.parse_args() 20 | 21 | 22 | def load_enrolled_file(file): 23 | d, basis_d = np.load(file, allow_pickle=True) 24 | return d, basis_d 25 | 26 | 27 | def convert_orth_basis(basis): 28 | """ 29 | generate orthogonal basis 30 | """ 31 | u_list = [] 32 | for base in basis: 33 | proj_base = base 34 | for u in u_list: 35 | proj_base = proj_base - np.dot(proj_base, u)*u 36 | proj_base = proj_base / np.linalg.norm(proj_base) 37 | u_list.append(proj_base) 38 | return u_list 39 | 40 | 41 | def ortho_proj(e, d, basis): 42 | """ 43 | calculate orthogonal projection of e onto d + span(basis) 44 | """ 45 | u_list = convert_orth_basis(basis) 46 | relative_ = e - d 47 | proj_e = d 48 | for u in u_list: 49 | proj_e = proj_e + np.dot(relative_, u) * u 50 | return proj_e 51 | 52 | 53 | def dist_p_to_s(e, d, basis_d): 54 | """ 55 | point-to-subspace distance 56 | """ 57 | proj_e = ortho_proj(e, d, basis_d) 58 | dist = np.linalg.norm(e - proj_e) 59 | return dist 60 | 61 | 62 | def dist_s_to_s(d, basis_d, e, basis_e): 63 | """ 64 | subspace-to-subspace distance 65 | """ 66 | assert len(basis_d) == len(basis_e) 67 | start = time.time() 68 | num_basis = len(basis_d) 69 | # generate orthogonal basis 70 | D = np.array(basis_d) 71 | E = np.array(basis_e) 72 | 73 | # the big left matrix 74 | tmp = np.zeros((2*num_basis, 2*num_basis)) 75 | tmp[:num_basis, :num_basis] = np.dot(D, D.T) 76 | tmp[:num_basis, num_basis:] = -np.dot(D, E.T) 77 | tmp[num_basis:, :num_basis] = np.dot(E, D.T) 78 | tmp[num_basis:, num_basis:] = -np.dot(E, E.T) 79 | 80 | # calculate alpha, beta 81 | alpha_beta = np.dot( 82 | np.dot(np.linalg.inv(tmp), np.concatenate([D, E], axis=0)), 83 | e-d) 84 | 85 | # calculate x_star, y_star 86 | x_star = d + np.dot(alpha_beta[:num_basis], basis_d) 87 | y_star = e + np.dot(alpha_beta[num_basis:], basis_e) 88 | 89 | dist = np.linalg.norm(x_star - y_star) 90 | return dist, time.time() - start 91 | 92 | 93 | def chunkify(fname, size=1024*1024): 94 | fileEnd = os.path.getsize(fname) 95 | with open(fname, 'rb') as f: 96 | chunkEnd = f.tell() 97 | while True: 98 | chunkStart = chunkEnd 99 | f.seek(size, 1) 100 | f.readline() 101 | chunkEnd = f.tell() 102 | yield chunkStart, chunkEnd - chunkStart 103 | if chunkEnd > fileEnd: 104 | break 105 | 106 | 107 | def process_lines(chunk_info_list, pair_list, folder, i): 108 | score_list = [] 109 | durations_list = [] 110 | partid_lineinfo_map = {} 111 | with open(pair_list, 'r') as f: 112 | for j in range(len(chunk_info_list)): 113 | chunkStart, chunkSize = chunk_info_list[j] 114 | f.seek(chunkStart) 115 | lines = f.read(chunkSize).splitlines() 116 | for line in lines: 117 | file1, file2, _ = line.strip().split(' ') 118 | # load files 119 | d, basis_d = load_enrolled_file( 120 | '{}/{}.npy'.format(folder, file1)) 121 | e, basis_e = load_enrolled_file( 122 | '{}/{}.npy'.format(folder, file2)) 123 | dist, duration = dist_s_to_s(d, basis_d, e, basis_e) 124 | score = (2 - dist**2)/2 125 | score = min(max(score, -1), 1) 126 | 127 | score_list.append((file1, file2, score)) 128 | durations_list.append((file1, file2, duration)) 129 | 130 | partid_lineinfo_map[i] = [score_list, durations_list] 131 | 132 | return partid_lineinfo_map 133 | 134 | 135 | def main(folder, pair_list, score_list): 136 | # load pair_file 137 | with open(pair_list, 'r') as f: 138 | lines = f.readlines() 139 | 140 | fw = open(score_list, 'w') 141 | 142 | print('[ASE] Decrypting features...') 143 | start = time.time() 144 | duration_plain = [] 145 | 146 | n = len(lines) 147 | if n < 100000: 148 | for i, line in enumerate(lines): 149 | file1, file2, _ = line.strip().split(' ') 150 | # load files 151 | d, basis_d = load_enrolled_file('{}/{}.npy'.format(folder, file1)) 152 | e, basis_e = load_enrolled_file('{}/{}.npy'.format(folder, file2)) 153 | 154 | dist, duration = dist_s_to_s(d, basis_d, e, basis_e) 155 | # measure time 156 | score = (2 - dist**2)/2 157 | score = min(max(score, -1), 1) 158 | duration_plain.append(duration) 159 | fw.write('{} {} {}\n'.format(file1, file2, score)) 160 | if i % 1000 == 0: 161 | print('{}/{}'.format(i, n)) 162 | else: 163 | # Paralel Generate the scores. 164 | chunk_info_list = list(chunkify(pair_list, 1024*1024)) 165 | lnum = len(chunk_info_list) 166 | num_jobs = min(lnum, 10) 167 | idxs = list(range(0, lnum, math.ceil(lnum/num_jobs))) 168 | idxs.append(lnum) 169 | # recheck the number of jobs. 170 | num_jobs = len(idxs) - 1 171 | result_list = Parallel(n_jobs=num_jobs, verbose=100)(delayed(process_lines)( 172 | chunk_info_list[idxs[i]:idxs[i+1]], pair_list, folder, i) for i in range(num_jobs)) 173 | 174 | # concat in order 175 | all_partid_lineinfo_map = {} 176 | for (partid_lineinfo_map) in result_list: 177 | for partid, info in partid_lineinfo_map.items(): 178 | all_partid_lineinfo_map[partid] = info 179 | 180 | i = 0 181 | for j in range(num_jobs): 182 | score_list, durations_list = all_partid_lineinfo_map[j] 183 | assert len(score_list) == len(durations_list) 184 | for lineid, scoreinfo in enumerate(score_list): 185 | file1, file2, score = scoreinfo 186 | fw.write('{} {} {}\n'.format(file1, file2, score)) 187 | _, _, duration = durations_list[lineid] 188 | duration_plain.append(duration) 189 | if i % 1000 == 0: 190 | print('{}/{}'.format(i, n)) 191 | i += 1 192 | fw.close() 193 | 194 | duration = time.time() - start 195 | print('total duration {}, ase duration {}, calculate {} pairs.\n'.format( 196 | duration, sum(duration_plain), n)) 197 | 198 | 199 | if __name__ == '__main__': 200 | main(args.folder, args.pair_list, args.score_list) 201 | -------------------------------------------------------------------------------- /libs/IronMask/enrollment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import numpy as np 4 | import argparse 5 | import os 6 | import time 7 | import shutil 8 | import math 9 | from scipy.stats import ortho_group 10 | from numpy.random import default_rng 11 | import hashlib 12 | import resource 13 | 14 | rng = default_rng() 15 | 16 | # parse the args 17 | parser = argparse.ArgumentParser(description='Enrollment in IronMask') 18 | parser.add_argument('--feat_list', type=str) 19 | parser.add_argument('--folder', type=str, 20 | help='use to store the keys and encrypted features') 21 | parser.add_argument('--alpha', type=int, default=16) 22 | args = parser.parse_args() 23 | 24 | 25 | def load_features(feature_list): 26 | """ 27 | load the features. 28 | index (0,1,2,...), features 29 | """ 30 | features = [] 31 | with open(feature_list, 'r') as f: 32 | lines = f.readlines() 33 | for line in lines: 34 | parts = line.strip().split(' ') 35 | feature = [float(e) for e in parts[1:]] 36 | feature = feature/np.linalg.norm(np.array(feature)) 37 | features.append(feature) 38 | return features 39 | 40 | 41 | def gen_code(alpha=16, n=512): 42 | """ 43 | generate a code with dim n and alpha non zeros entries 44 | """ 45 | base_ele = 1/np.sqrt(alpha) 46 | indexes = rng.choice(n, size=alpha, replace=False) 47 | symbols = rng.choice([-1, 1], size=alpha, replace=True) 48 | code = np.zeros(n) 49 | for i, index in enumerate(indexes): 50 | code[index] = base_ele * symbols[i] 51 | return code 52 | 53 | 54 | def compute_rotation(t, c): 55 | """ 56 | compute a rotation matrix R which has Rt = c 57 | """ 58 | # check if normalized and dim is the same 59 | assert_error = 1e-5 60 | assert(abs(np.linalg.norm(t)-1) < assert_error) 61 | assert(abs(np.linalg.norm(c)-1) < assert_error) 62 | assert(len(t) == len(c)) 63 | 64 | # here starts 65 | I = np.identity(len(t)) 66 | w = c - np.dot(t, c)*t 67 | w = w / np.linalg.norm(w) 68 | 69 | cos_theta = np.dot(t, c) 70 | sin_theta = math.sin(math.acos(cos_theta)) 71 | 72 | R = I - np.outer(t, t) - np.outer(w, w) + \ 73 | (np.outer(t, t) + np.outer(w, w)) * cos_theta + \ 74 | (np.outer(w, t) - np.outer(t, w)) * sin_theta 75 | return R 76 | 77 | 78 | def enroll_ironmask(feature, alpha): 79 | start = time.time() 80 | dim = len(feature) 81 | c = gen_code(alpha=alpha, n=dim) 82 | 83 | feature = feature/np.linalg.norm(feature) 84 | 85 | Q = ortho_group.rvs(dim) 86 | R = compute_rotation(np.dot(Q, feature), c) 87 | P = np.dot(R, Q) 88 | # hash 89 | hash_func = hashlib.md5() 90 | hash_func.update(c.tobytes()) 91 | r = hash_func.hexdigest() 92 | return [P, r], time.time() - start 93 | 94 | 95 | def main(feature_list, folder, alpha): 96 | """ 97 | enrollment 98 | """ 99 | # print('loading features...') 100 | features = load_features(feature_list) 101 | n, dim = len(features), len(features[0]) 102 | # L_list = [i for i in range(0, 2*L)] 103 | 104 | print('[IronMask] Encrypting features...') 105 | start = time.time() 106 | duration_plain = [] 107 | for i, feature in enumerate(features): 108 | result, duration = enroll_ironmask(feature, alpha) 109 | np.save('{}/{}.npy'.format(folder, i), 110 | np.array(result, np.dtype(object))) 111 | # measure time 112 | duration_plain.append(duration) 113 | if i % 1000 == 0: 114 | print('{}/{}'.format(i, n)) 115 | duration = time.time() - start 116 | print('total duration {}, ironmask duration {}, encrypted {} features.\n'.format( 117 | duration, sum(duration_plain), n)) 118 | 119 | 120 | if __name__ == '__main__': 121 | if os.path.exists(args.folder): 122 | shutil.rmtree(args.folder) 123 | os.makedirs(args.folder) 124 | 125 | main(args.feat_list, args.folder, args.alpha) 126 | -------------------------------------------------------------------------------- /libs/IronMask/gen_sim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import numpy as np 4 | import argparse 5 | import os 6 | import time 7 | import random 8 | import hashlib 9 | import resource 10 | 11 | # parse the args 12 | parser = argparse.ArgumentParser(description='Match in IronMask') 13 | parser.add_argument('--folder', default='', type=str, 14 | help='fold which stores the encrypted features') 15 | parser.add_argument('--pair_list', default='', type=str, help='pair file') 16 | parser.add_argument('--score_list', type=str, 17 | help='a file which stores the scores') 18 | parser.add_argument('--alpha', type=int, default=16) 19 | parser.add_argument('--feat_list', type=str) 20 | args = parser.parse_args() 21 | 22 | 23 | def load_features(feature_list): 24 | """ 25 | load the features. 26 | index (0,1,2,...), features 27 | """ 28 | features = [] 29 | with open(feature_list, 'r') as f: 30 | lines = f.readlines() 31 | for line in lines: 32 | parts = line.strip().split(' ') 33 | feature = [float(e) for e in parts[1:]] 34 | feature = feature/np.linalg.norm(np.array(feature)) 35 | features.append(feature) 36 | return features 37 | 38 | 39 | def load_enrolled_file(file): 40 | P, r = np.load(file, allow_pickle=True) 41 | return P, r 42 | 43 | 44 | def decode(feature, alpha=16): 45 | """ 46 | map a feature to the corresponding code 47 | """ 48 | base_ele = 1/np.sqrt(alpha) 49 | abs_feature = np.abs(feature) 50 | indexes = np.argsort(abs_feature)[::-1] 51 | code = np.zeros(len(feature)) 52 | for i in range(alpha): 53 | code[indexes[i]] = base_ele 54 | if feature[indexes[i]] < 0: 55 | code[indexes[i]] = code[indexes[i]] * (-1) 56 | return code 57 | 58 | 59 | def check_ironmask(feature, P, r, alpha): 60 | """ 61 | return 1 if feature and P,r is from same id 62 | """ 63 | start = time.time() 64 | c_prime = decode(np.dot(P, feature), alpha) 65 | 66 | hash_func = hashlib.md5() 67 | hash_func.update(c_prime.tobytes()) 68 | r_prime = hash_func.hexdigest() 69 | return int(r_prime == r), time.time() - start 70 | 71 | 72 | def main(folder, feat_list, pair_list, score_list, alpha): 73 | # load pair_file 74 | features = load_features(feat_list) 75 | with open(pair_list, 'r') as f: 76 | lines = f.readlines() 77 | 78 | fw = open(score_list, 'w') 79 | 80 | print('[IronMask] Decrypting features...') 81 | r_init = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 82 | start = time.time() 83 | duration_plain = [] 84 | n = len(lines) 85 | for i, line in enumerate(lines): 86 | file1, file2, _ = line.strip().split(' ') 87 | # load files 88 | feature1 = features[int(file1)] 89 | P, r = load_enrolled_file('{}/{}.npy'.format(folder, file2)) 90 | 91 | score, duration = check_ironmask(feature1, P, r, alpha) 92 | duration_plain.append(duration) 93 | fw.write('{} {} {}\n'.format(file1, file2, score)) 94 | if i % 1000 == 0: 95 | print('{}/{}'.format(i, n)) 96 | fw.close() 97 | 98 | duration = time.time() - start 99 | print('total duration {}, ironmask duration {}, calculate {} pairs.\n'.format( 100 | duration, sum(duration_plain), n)) 101 | 102 | 103 | if __name__ == '__main__': 104 | main(args.folder, args.feat_list, args.pair_list, args.score_list, args.alpha) 105 | -------------------------------------------------------------------------------- /libs/SFM/enrollment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import numpy as np 4 | import argparse 5 | import os 6 | import time 7 | import random 8 | from itertools import repeat 9 | import shutil 10 | import resource 11 | import tenseal as ts 12 | import tenseal.sealapi as sealapi 13 | 14 | # parse the args 15 | parser = argparse.ArgumentParser( 16 | description='Enrollment in SecureFaceMatching') 17 | parser.add_argument('--feat_list', type=str) 18 | parser.add_argument('--folder', type=str, 19 | help='use to store the keys and encrypted features') 20 | parser.add_argument('--public_key', default='libs/SFM/keys/public_key', 21 | type=str, help='path to the public key') 22 | parser.add_argument('--precision', default=125, type=int) 23 | args = parser.parse_args() 24 | 25 | 26 | def load_features(feature_list): 27 | """ 28 | load the features. 29 | index (0,1,2,...), features 30 | """ 31 | features = [] 32 | with open(feature_list, 'r') as f: 33 | lines = f.readlines() 34 | for line in lines: 35 | parts = line.strip().split(' ') 36 | feature = [float(e) for e in parts[1:]] 37 | feature = feature/np.linalg.norm(np.array(feature)) 38 | features.append(feature) 39 | return features 40 | 41 | 42 | def enroll(feature, precision, encryptor, batchenc, ctx): 43 | """ 44 | enroll a feature 45 | """ 46 | start = time.time() 47 | quant_feature = [int(precision*e) for e in feature] 48 | # plaintext 49 | plaintext = sealapi.Plaintext() 50 | batchenc.encode(quant_feature, plaintext) 51 | # cipertext 52 | ciphertext = sealapi.Ciphertext(ctx) 53 | encryptor.encrypt(plaintext, ciphertext) 54 | duration = time.time() - start 55 | return ciphertext, duration 56 | 57 | 58 | def load_key(public_key): 59 | poly_modulus_degree = 4096 60 | plain_modulus = 1032193 61 | # Setup TenSEAL context 62 | parms = sealapi.EncryptionParameters(sealapi.SCHEME_TYPE.BFV) 63 | parms.set_poly_modulus_degree(poly_modulus_degree) 64 | parms.set_plain_modulus(plain_modulus) 65 | coeff = sealapi.CoeffModulus.BFVDefault( 66 | poly_modulus_degree, sealapi.SEC_LEVEL_TYPE.TC128) 67 | parms.set_coeff_modulus(coeff) 68 | ctx = sealapi.SEALContext(parms, True, sealapi.SEC_LEVEL_TYPE.TC128) 69 | keygen = sealapi.KeyGenerator(ctx) 70 | pub_key = sealapi.PublicKey() 71 | pub_key.load(ctx, public_key) 72 | 73 | encryptor = sealapi.Encryptor(ctx, pub_key) 74 | batchenc = sealapi.BatchEncoder(ctx) 75 | return encryptor, batchenc, ctx 76 | 77 | 78 | def main(feature_list, folder, precision, public_key): 79 | """ 80 | enrollment in sfm 81 | """ 82 | # print('loading features...') 83 | features = load_features(feature_list) 84 | n, dim = len(features), len(features[0]) 85 | 86 | print('[SFM] Encrypting features...') 87 | encryptor, batchenc, ctx = load_key(public_key) 88 | 89 | start = time.time() 90 | duration_sfm = [] 91 | # r_init = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 92 | for i, feature in enumerate(features): 93 | result, duration = enroll(feature, precision, encryptor, batchenc, ctx) 94 | result.save('{}/{}'.format(folder, i)) 95 | # measure time 96 | duration_sfm.append(duration) 97 | if i % 1000 == 0: 98 | print('{}/{}'.format(i, n)) 99 | duration = time.time() - start 100 | print('total duration {}, sfm duration {}, encrypted {} features.\n'.format( 101 | duration, sum(duration_sfm), n)) 102 | 103 | 104 | if __name__ == '__main__': 105 | if os.path.exists(args.folder): 106 | shutil.rmtree(args.folder) 107 | os.makedirs(args.folder) 108 | 109 | main(args.feat_list, args.folder, args.precision, args.public_key) 110 | -------------------------------------------------------------------------------- /libs/SFM/gen_sim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import numpy as np 4 | import argparse 5 | import os 6 | import time 7 | import random 8 | import resource 9 | from itertools import repeat 10 | import tenseal as ts 11 | import tenseal.sealapi as sealapi 12 | 13 | # parse the args 14 | parser = argparse.ArgumentParser(description='Match in SecureFaceMatching') 15 | parser.add_argument('--folder', default='', type=str, 16 | help='fold which stores the encrypted features') 17 | parser.add_argument('--pair_list', default='', type=str, help='pair file') 18 | parser.add_argument('--score_list', type=str, 19 | help='a file which stores the scores') 20 | parser.add_argument('--genkey', default=0, type=int) 21 | parser.add_argument('--precision', default=125, type=int) 22 | args = parser.parse_args() 23 | 24 | poly_modulus_degree = 4096 25 | plain_modulus = 1032193 26 | # Setup TenSEAL context 27 | parms = sealapi.EncryptionParameters(sealapi.SCHEME_TYPE.BFV) 28 | parms.set_poly_modulus_degree(poly_modulus_degree) 29 | parms.set_plain_modulus(plain_modulus) 30 | coeff = sealapi.CoeffModulus.BFVDefault( 31 | poly_modulus_degree, sealapi.SEC_LEVEL_TYPE.TC128) 32 | parms.set_coeff_modulus(coeff) 33 | ctx = sealapi.SEALContext(parms, True, sealapi.SEC_LEVEL_TYPE.TC128) 34 | keygen = sealapi.KeyGenerator(ctx) 35 | 36 | if args.genkey == 1: 37 | # public key 38 | public_key = sealapi.PublicKey() 39 | keygen.create_public_key(public_key) 40 | # secret key 41 | secret_key = keygen.secret_key() 42 | # galois keys 43 | gal_key = sealapi.GaloisKeys() 44 | keygen.create_galois_keys(gal_key) 45 | # relin keys 46 | relin_key = sealapi.RelinKeys() 47 | keygen.create_relin_keys(relin_key) 48 | 49 | public_key.save('libs/SFM/keys/public_key') 50 | secret_key.save('libs/SFM/keys/secret_key') 51 | gal_key.save('libs/SFM/keys/gal_key') 52 | relin_key.save('libs/SFM/keys/relin_key') 53 | exit(1) 54 | else: 55 | public_key = sealapi.PublicKey() 56 | public_key.load(ctx, 'libs/SFM/keys/public_key') 57 | secret_key = keygen.secret_key() 58 | secret_key.load(ctx, 'libs/SFM/keys/secret_key') 59 | gal_key = sealapi.GaloisKeys() 60 | gal_key.load(ctx, 'libs/SFM/keys/gal_key') 61 | relin_key = sealapi.RelinKeys() 62 | relin_key.load(ctx, 'libs/SFM/keys/relin_key') 63 | 64 | 65 | def cipher_zero(ctx, batchenc, encryptor): 66 | plaintext = sealapi.Plaintext() 67 | batchenc.encode([0], plaintext) 68 | # ciphertext 69 | ciphertext = sealapi.Ciphertext(ctx) 70 | encryptor.encrypt(plaintext, ciphertext) 71 | return ciphertext 72 | 73 | 74 | def calculate_sim(cipher1, cipher2, ctx, batchenc, encryptor, decryptor, evaluator, precision): 75 | # decrypt 76 | start = time.time() 77 | cipher0 = cipher_zero(ctx, batchenc, encryptor) 78 | 79 | evaluator.multiply_inplace(cipher1, cipher2) 80 | evaluator.relinearize_inplace(cipher1, relin_key) 81 | encrypted_result = sealapi.Ciphertext(ctx) 82 | evaluator.add(cipher1, cipher0, encrypted_result) 83 | 84 | slot_count = batchenc.slot_count() 85 | row_size = int(slot_count / 2) 86 | for i in range(int(np.log2(row_size))): 87 | evaluator.rotate_rows(encrypted_result, pow(2, i), gal_key, cipher1) 88 | evaluator.add_inplace(encrypted_result, cipher1) 89 | 90 | plaintext_result = sealapi.Plaintext() 91 | decryptor.decrypt(encrypted_result, plaintext_result) 92 | score = batchenc.decode_int64(plaintext_result)[0]/(precision*precision) 93 | return score, time.time() - start 94 | 95 | 96 | def main(folder, pair_list, score_list, precision): 97 | # load pair_file 98 | with open(pair_list, 'r') as f: 99 | lines = f.readlines() 100 | 101 | fw = open(score_list, 'w') 102 | 103 | print('[SFM] Decrypting features...') 104 | encryptor = sealapi.Encryptor(ctx, public_key) 105 | decryptor = sealapi.Decryptor(ctx, secret_key) 106 | evaluator = sealapi.Evaluator(ctx) 107 | batchenc = sealapi.BatchEncoder(ctx) 108 | 109 | start = time.time() 110 | duration_sfm = [] 111 | 112 | cipher1 = sealapi.Ciphertext(ctx) 113 | cipher2 = sealapi.Ciphertext(ctx) 114 | 115 | n = len(lines) 116 | for i, line in enumerate(lines): 117 | file1, file2, _ = line.strip().split(' ') 118 | # load files 119 | cipher1.load(ctx, '{}/{}'.format(folder, file1)) 120 | cipher2.load(ctx, '{}/{}'.format(folder, file2)) 121 | score, duration = calculate_sim( 122 | cipher1, cipher2, ctx, batchenc, encryptor, decryptor, evaluator, precision) 123 | # measure time 124 | duration_sfm.append(duration) 125 | fw.write('{} {} {}\n'.format(file1, file2, score)) 126 | if i % 1000 == 0: 127 | print('{}/{}'.format(i, n)) 128 | fw.close() 129 | 130 | duration = time.time() - start 131 | print('total duration {}, sfm duration {}, calculate {} pairs.\n'.format( 132 | duration, sum(duration_sfm), n)) 133 | 134 | 135 | if __name__ == '__main__': 136 | main(args.folder, args.pair_list, args.score_list, args.precision) 137 | -------------------------------------------------------------------------------- /libs/SecureVector/crypto_system.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | This is the "root" code, only it can get access to the private key. 4 | In: paths to two enrolled features 5 | Out: Similarity scores 6 | useage: 7 | generate keys: 8 | python crypo_system.py --genkey 1 --key_size 1024 9 | calculate similarities: 10 | python crypo_system.py --key_size 1024 --K 128 --folder $F --pair_list $P --score_list $S 11 | """ 12 | import sys 13 | import math 14 | import numpy as np 15 | import phe.paillier as paillier 16 | from gmpy2 import mpz 17 | import argparse 18 | import os 19 | import time 20 | import random 21 | import resource 22 | from itertools import repeat 23 | from joblib import Parallel, delayed 24 | 25 | # parse the args 26 | parser = argparse.ArgumentParser(description='Match in SecureVector') 27 | parser.add_argument('--folder', default='', type=str, 28 | help='fold which stores the encrypted features') 29 | parser.add_argument('--pair_list', default='', type=str, help='pair file') 30 | parser.add_argument('--score_list', type=str, 31 | help='a file which stores the scores') 32 | parser.add_argument('--K', default=128, type=int) 33 | parser.add_argument('--key_size', default=1024, type=int) 34 | parser.add_argument('--genkey', default=0, type=int) 35 | args = parser.parse_args() 36 | 37 | if args.genkey == 1: 38 | pubkey, prikey = paillier.generate_paillier_keypair(n_length=args.key_size) 39 | np.save( 40 | 'libs/SecureVector/keys/privatekey_{}.npy'.format(args.key_size), [prikey]) 41 | np.save( 42 | 'libs/SecureVector/keys/publickey_{}.npy'.format(args.key_size), [pubkey]) 43 | exit(1) 44 | else: 45 | private_key = np.load( 46 | 'libs/SecureVector/keys/privatekey_{}.npy'.format(args.key_size), allow_pickle=True)[0] 47 | 48 | 49 | def load_enrolled_file(file): 50 | c_f, C_tilde_f = np.load(file, allow_pickle=True) 51 | return c_f, C_tilde_f 52 | 53 | 54 | def decrypt_sum(C_tilde_x, C_tilde_y): 55 | C_z = private_key.decrypt(C_tilde_x + C_tilde_y) 56 | return C_z 57 | 58 | 59 | def decode_uvw(C_f, K, L): 60 | u_list, v_list = [], [] 61 | for i in range(K): 62 | next_C_f = C_f//(4*L) 63 | u_list.append(C_f - (4*L)*next_C_f) 64 | C_f = next_C_f 65 | for i in range(K): 66 | next_C_f = C_f//(4*L) 67 | v_list.append(C_f - (4*L)*next_C_f) 68 | C_f = next_C_f 69 | w_f = C_f 70 | u_list.reverse() 71 | v_list.reverse() 72 | return u_list, v_list, int(w_f) 73 | 74 | 75 | def calculate_sim(c_x, c_y, C_tilde_x, C_tilde_y, K, L, M): 76 | # decrypt 77 | start = time.time() 78 | C_z = decrypt_sum(C_tilde_x, C_tilde_y) 79 | duration_cypher = time.time() - start 80 | 81 | # generate bar_c_xy 82 | start = time.time() 83 | c_xy = c_x*c_y 84 | n = len(c_x) 85 | bar_c_xy = [sum(c_xy[i:i+n//K]) for i in range(0, n, n//K)] 86 | 87 | # recover u_list, v_list, w 88 | u_list, v_list, w_z = decode_uvw(C_z, K, L) 89 | s_list = [1 if v % 2 == 0 else -1 for v in v_list] 90 | # calculate the score 91 | W_z = np.e**((w_z - 2**15 * L**8)/(2**14 * L**7*M)) 92 | score = W_z * \ 93 | sum([bar_c_xy[i]/(s_list[i] * np.e**((u_list[i]-2*L)/M)) 94 | for i in range(K)]) 95 | duration_plain = time.time() - start 96 | 97 | return score, [duration_plain, duration_cypher] 98 | 99 | 100 | def chunkify(fname, size=1024*1024): 101 | fileEnd = os.path.getsize(fname) 102 | with open(fname, 'rb') as f: 103 | chunkEnd = f.tell() 104 | while True: 105 | chunkStart = chunkEnd 106 | f.seek(size, 1) 107 | f.readline() 108 | chunkEnd = f.tell() 109 | yield chunkStart, chunkEnd - chunkStart 110 | if chunkEnd > fileEnd: 111 | break 112 | 113 | 114 | def process_lines(chunk_info_list, pair_list, folder, K, L, M, i): 115 | score_list = [] 116 | durations_list = [] 117 | partid_lineinfo_map = {} 118 | with open(pair_list, 'r') as f: 119 | for j in range(len(chunk_info_list)): 120 | chunkStart, chunkSize = chunk_info_list[j] 121 | f.seek(chunkStart) 122 | lines = f.read(chunkSize).splitlines() 123 | for line in lines: 124 | file1, file2, _ = line.strip().split(' ') 125 | # load files 126 | c_x, C_tilde_x = load_enrolled_file( 127 | '{}/{}.npy'.format(folder, file1)) 128 | c_y, C_tilde_y = load_enrolled_file( 129 | '{}/{}.npy'.format(folder, file2)) 130 | # here you need to check if c_x, c_y, C_tilde_x, C_tilde_y are similar 131 | # if they are similar, the code should refuse to encrypt the results 132 | score, durations = calculate_sim( 133 | c_x, c_y, C_tilde_x, C_tilde_y, K, L, M) 134 | score_list.append((file1, file2, score)) 135 | durations_list.append((file1, file2, durations)) 136 | 137 | partid_lineinfo_map[i] = [score_list, durations_list] 138 | 139 | return partid_lineinfo_map 140 | 141 | 142 | def main(folder, pair_list, score_list, K, L, M): 143 | # load pair_file 144 | with open(pair_list, 'r') as f: 145 | lines = f.readlines() 146 | 147 | fw = open(score_list, 'w') 148 | 149 | print('[SecureVector] Decrypting features...') 150 | start = time.time() 151 | duration_plain = [] 152 | duration_cypher = [] 153 | 154 | n = len(lines) 155 | if True: 156 | for i, line in enumerate(lines): 157 | file1, file2, _ = line.strip().split(' ') 158 | # load files 159 | c_x, C_tilde_x = load_enrolled_file( 160 | '{}/{}.npy'.format(folder, file1)) 161 | c_y, C_tilde_y = load_enrolled_file( 162 | '{}/{}.npy'.format(folder, file2)) 163 | # here you need to check if c_x, c_y, C_tilde_x, C_tilde_y are similar 164 | # if they are similar, the code should refuse to encrypt the results 165 | score, durations = calculate_sim( 166 | c_x, c_y, C_tilde_x, C_tilde_y, K, L, M) 167 | # measure time 168 | duration_plain.append(durations[0]) 169 | duration_cypher.append(durations[1]) 170 | fw.write('{} {} {}\n'.format(file1, file2, score)) 171 | if i % 1000 == 0: 172 | print('{}/{}'.format(i, n)) 173 | else: 174 | # Paralel Generate the scores. 175 | num_jobs = 12 176 | chunk_info_list = list(chunkify(pair_list)) 177 | lnum = len(chunk_info_list) 178 | idxs = list(range(0, lnum, math.ceil(lnum/num_jobs))) 179 | idxs.append(lnum) 180 | num_jobs = len(idxs) - 1 181 | result_list = Parallel(n_jobs=num_jobs, verbose=100)(delayed(process_lines)( 182 | chunk_info_list[idxs[i]:idxs[i+1]], pair_list, folder, K, L, M, i) for i in range(num_jobs)) 183 | # concat in order 184 | all_partid_lineinfo_map = {} 185 | for (partid_lineinfo_map) in result_list: 186 | for partid, info in partid_lineinfo_map.items(): 187 | all_partid_lineinfo_map[partid] = info 188 | 189 | i = 0 190 | for j in range(num_jobs): 191 | score_list, durations_list = all_partid_lineinfo_map[j] 192 | assert len(score_list) == len(durations_list) 193 | for lineid, scoreinfo in enumerate(score_list): 194 | file1, file2, score = scoreinfo 195 | fw.write('{} {} {}\n'.format(file1, file2, score)) 196 | _, _, durations = durations_list[lineid] 197 | duration_plain.append(durations[0]) 198 | duration_cypher.append(durations[1]) 199 | if i % 1000 == 0: 200 | print('{}/{}'.format(i, n)) 201 | i += 1 202 | 203 | fw.close() 204 | 205 | duration = time.time() - start 206 | print('total duration {}, permutation duration {}, paillier duration {}, calculate {} pairs.\n'.format( 207 | duration, sum(duration_plain), sum(duration_cypher), n)) 208 | 209 | 210 | if __name__ == '__main__': 211 | L = int(np.ceil(2**(args.key_size/(2*args.K+9)-2) - 1)) 212 | M = L/128 213 | security_level = 2*args.K + args.K*np.log2(L) 214 | 215 | assert L > 1 216 | # print('K: {} L: {} M: {}'.format(args.K, L, M)) 217 | # print('the security level is: {}'.format(security_level)) 218 | 219 | main(args.folder, args.pair_list, args.score_list, args.K, L, M) 220 | -------------------------------------------------------------------------------- /libs/SecureVector/enrollment.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import numpy as np 4 | import phe.paillier as paillier 5 | from gmpy2 import mpz 6 | import argparse 7 | import os 8 | import time 9 | import random 10 | from itertools import repeat 11 | import shutil 12 | import resource 13 | 14 | # parse the args 15 | parser = argparse.ArgumentParser(description='Enrollment in SecureVector') 16 | parser.add_argument('--K', default=128, type=int) 17 | parser.add_argument('--feat_list', type=str) 18 | parser.add_argument('--folder', type=str, 19 | help='use to store the keys and encrypted features') 20 | parser.add_argument('--public_key', default='libs/SecureVector/keys/publickey', 21 | type=str, help='path to the public key') 22 | parser.add_argument('--key_size', default=2048, type=int) 23 | args = parser.parse_args() 24 | 25 | 26 | def load_features(feature_list): 27 | """ 28 | load the features. 29 | index (0,1,2,...), features 30 | """ 31 | features = [] 32 | with open(feature_list, 'r') as f: 33 | lines = f.readlines() 34 | for line in lines: 35 | parts = line.strip().split(' ') 36 | feature = [float(e) for e in parts[1:]] 37 | feature = feature/np.linalg.norm(np.array(feature)) 38 | features.append(feature) 39 | return features 40 | 41 | 42 | def enroll(feature, K, L, M, public_key): 43 | """ 44 | enroll a feature 45 | """ 46 | start = time.time() 47 | u_list = [int(e) for e in np.random.rand(K)*(2*L)] 48 | v_list = [int(e) for e in np.random.rand(K)*(2*L)] 49 | s_list = [1 if v % 2 == 0 else -1 for v in v_list] 50 | 51 | # generate c_f 52 | n = len(feature) 53 | scale = [s_list[i] * np.e**((u_list[i]-L)/M) for i in range(K)] 54 | b_f = [x for item in scale for x in repeat(item, n//K)] * feature 55 | W_f = np.linalg.norm(b_f) 56 | c_f = b_f/W_f 57 | 58 | # encrypt 59 | base = [(4*L)**(K-1-i) for i in range(K)] 60 | w_f = int((np.log(W_f) + L/M)/(2*L/M) * 2**15 * L**8) 61 | C_f = np.dot(u_list, base) + \ 62 | np.dot(v_list, base) * (4*L)**(K) + \ 63 | w_f * (4*L)**(2*K) 64 | duration_plain = time.time() - start 65 | 66 | start = time.time() 67 | C_tilde_f = public_key.encrypt(C_f) 68 | duration_cypher = time.time() - start 69 | return [c_f, C_tilde_f], [duration_plain, duration_cypher] 70 | 71 | 72 | def main(K, L, M, feature_list, folder, public_key): 73 | """ 74 | enrollment in SecureVector 75 | """ 76 | # print('loading features...') 77 | features = load_features(feature_list) 78 | n, dim = len(features), len(features[0]) 79 | 80 | print('[SecureVector] Encrypting features...') 81 | publickey = np.load(public_key, allow_pickle=True)[0] 82 | 83 | start = time.time() 84 | duration_plain = [] 85 | duration_cypher = [] 86 | # r_init = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss 87 | for i, feature in enumerate(features): 88 | result, durations = enroll(feature, K, L, M, publickey) 89 | np.save('{}/{}.npy'.format(folder, i), 90 | np.array(result, np.dtype(object))) 91 | # measure time 92 | duration_plain.append(durations[0]) 93 | duration_cypher.append(durations[1]) 94 | if i % 1000 == 0: 95 | print('{}/{}'.format(i, n)) 96 | duration = time.time() - start 97 | print('total duration {}, permutation duration {}, paillier duration {}, encrypted {} features.\n'.format( 98 | duration, sum(duration_plain), sum(duration_cypher), n)) 99 | 100 | 101 | if __name__ == '__main__': 102 | L = int(np.ceil(2**(args.key_size/(2*args.K+9)-2) - 1)) 103 | M = L/128 104 | security_level = 2*args.K + args.K*np.log2(L) 105 | 106 | print('K: {} L: {} M: {}'.format(args.K, L, M)) 107 | print('the security level is: {}'.format(security_level)) 108 | assert L > 1 109 | if os.path.exists(args.folder): 110 | shutil.rmtree(args.folder) 111 | os.makedirs(args.folder) 112 | 113 | main(args.K, L, M, args.feat_list, args.folder, 114 | '{}_{}.npy'.format(args.public_key, args.key_size)) 115 | -------------------------------------------------------------------------------- /libs/baseline/gen_sim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Generate similarities directly from features and pair lists. 4 | """ 5 | 6 | import sys 7 | import numpy as np 8 | import argparse 9 | import os 10 | import time 11 | import random 12 | 13 | 14 | # parse the args 15 | parser = argparse.ArgumentParser(description='Match in directly') 16 | parser.add_argument('--feat_list', type=str) 17 | parser.add_argument('--pair_list', default='', type=str, help='pair file') 18 | parser.add_argument('--score_list', type=str, 19 | help='a file which stores the scores') 20 | args = parser.parse_args() 21 | 22 | 23 | def load_features(feature_list): 24 | """ 25 | load the features. 26 | index (0,1,2,...), features 27 | """ 28 | features = [] 29 | with open(feature_list, 'r') as f: 30 | lines = f.readlines() 31 | for line in lines: 32 | parts = line.strip().split(' ') 33 | feature = [float(e) for e in parts[1:]] 34 | feature = feature/np.linalg.norm(np.array(feature)) 35 | features.append(feature) 36 | return features 37 | 38 | 39 | def main(feat_list, pair_list, score_list): 40 | features = load_features(feat_list) 41 | with open(pair_list, 'r') as f: 42 | lines = f.readlines() 43 | n = len(lines) 44 | 45 | fw = open(score_list, 'w') 46 | for i, line in enumerate(lines): 47 | file1, file2, _ = line.strip().split(' ') 48 | score = np.dot(features[int(file1)], features[int(file2)]) 49 | # measure time 50 | fw.write('{} {} {}\n'.format(file1, file2, score)) 51 | if i % 1000 == 0: 52 | print('{}/{}'.format(i, n)) 53 | fw.close() 54 | 55 | 56 | if __name__ == '__main__': 57 | folder = '/'.join(args.score_list.split('/')[:-1]) 58 | if not os.path.exists(folder): 59 | os.makedirs(folder) 60 | main(args.feat_list, args.pair_list, args.score_list) 61 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pycrypto>=2.6.1 2 | gmpy2>=2.0.4 3 | numpy>=1.9.1 4 | nose>=1.3.4 5 | click>=6.7 6 | phe==1.4.0 7 | tenseal==0.3.4 8 | scipy 9 | hashlib 10 | resource 11 | --------------------------------------------------------------------------------