├── README.md ├── AdaFS ├── README.md ├── models │ ├── layer.py │ └── modules.py ├── utils │ └── train_help.py ├── dataloader │ └── tfloader.py └── train.py ├── LPFS ├── README.md ├── models │ ├── layer.py │ └── modules.py ├── utils │ └── train_help.py ├── dataloader │ └── tfloader.py └── train.py ├── AutoField ├── README.md ├── models │ ├── layer.py │ ├── modules_retrain.py │ └── modules_search.py ├── dataloader │ └── tfloader.py ├── utils │ └── train_help.py ├── retrain.py └── search.py ├── LICENSE └── .gitignore /README.md: -------------------------------------------------------------------------------- 1 | # AutoField 2 | This repository is contains several Automated feature selection methods in CTR Predicition: 3 | - [AutoField: Automating Feature Selection in Deep Recommender Systems](https://dl.acm.org/doi/10.1145/3485447.3512071) 4 | - [AdaFS: Adaptive Feature Selection in Deep Recommender System](https://dl.acm.org/doi/abs/10.1145/3534678.3539204) 5 | - [LPFS: Learnable Polarizing Feature Selection for Click-Through Rate Prediction](https://arxiv.org/abs/2206.00267) 6 | -------------------------------------------------------------------------------- /AdaFS/README.md: -------------------------------------------------------------------------------- 1 | # AdaFS 2 | This repository contains PyTorch Implementation of KDD 2022 paper: 3 | - [AdaFS: Adaptive Feature Selection in Deep Recommender System](https://dl.acm.org/doi/abs/10.1145/3534678.3539204) 4 | 5 | ### Run 6 | 7 | Running AdaFS: 8 | ``` 9 | python -u train.py --dataset $YOUR_DATASET --model $YOUR_MODEL \ 10 | --gpu $GPU --lr $LR --l2 $L2 --arch_lr $ARCH_LR \ 11 | --pretrain $PRETRAIN \ 12 | ``` 13 | 14 | You can choose `YOUR_DATASET` from \{Criteo, Avazu, KDD12\} and `YOUR_MODEL` from \{FM, DeeepFM, DCN, IPNN\}. Here we empirically set $ARCH_LR=$LR and choose $PRETRAIN from \{1, 2, 5\} 15 | -------------------------------------------------------------------------------- /LPFS/README.md: -------------------------------------------------------------------------------- 1 | # LPFS 2 | This repository contains PyTorch Implementation of paper: 3 | - [LPFS: Learnable Polarizing Feature Selection for Click-Through Rate Prediction](https://arxiv.org/abs/2206.00267) 4 | 5 | ### Run 6 | 7 | Running LPFS: 8 | ``` 9 | python -u train.py --dataset $YOUR_DATASET --model $YOUR_MODEL \ 10 | --gpu $GPU --lr $LR --l2 $L2 \ 11 | --epsilon $EPSILON --lam $LAM \ 12 | ``` 13 | 14 | You can choose `YOUR_DATASET` from \{Criteo, Avazu, KDD12\} and `YOUR_MODEL` from \{FM, DeeepFM, DCN, IPNN\}. Here we empirically set $EPSILON and $LAM based on the following. 15 | 16 | 17 | | Dataset | Criteo | Avazu | KDD12 | 18 | | ---------- | ---------- | ---------- | ---------- | 19 | | EPSILON | 1e-1 | 4e-3 | 1e-3 | 20 | | LAM | 1e-1 | 25e-2 | 3e-1 | -------------------------------------------------------------------------------- /AutoField/README.md: -------------------------------------------------------------------------------- 1 | # AutoField 2 | This repository contains PyTorch Implementation of WWW 2022 paper: 3 | - [AutoField: Automating Feature Selection in Deep Recommender Systems](https://dl.acm.org/doi/10.1145/3485447.3512071) 4 | 5 | ### Run 6 | 7 | Running Search Process: 8 | ``` 9 | python -u search.py --dataset $YOUR_DATASET --model $YOUR_MODEL \ 10 | --debug_mode 1 --gpu $GPU --lr $LR --l2 $L2 --arch_lr $ARCH_LR \ 11 | --save_name $PATH_TO_SEARCH_RESULT \ 12 | ``` 13 | 14 | You can choose `YOUR_DATASET` from \{Criteo, Avazu, KDD12\} and `YOUR_MODEL` from \{FM, DeeepFM, DCN, IPNN\} 15 | 16 | 17 | Running Retrain Process: 18 | ``` 19 | python -u retrain.py --dataset $YOUR_DATASET --model $YOUR_MODEL \ 20 | --debug_mode 0 --gpu $GPU --lr $LR --l2 $L2 \ 21 | --arch_file $PATH_TO_SEARCH_RESULT 22 | ``` 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Fuyuan Lyu Tommy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | data/* 132 | *.out 133 | *.sh 134 | .vscode/* 135 | result/ 136 | sh/ -------------------------------------------------------------------------------- /LPFS/models/layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class FeaturesLinear(torch.nn.Module): 4 | 5 | def __init__(self, field_dims, output_dim=1): 6 | super().__init__() 7 | self.fc = torch.nn.Embedding(sum(field_dims), output_dim) 8 | self.bias = torch.nn.Parameter(torch.zeros((output_dim,))) 9 | 10 | def forward(self, x): 11 | """ 12 | :param x: Long tensor of size ``(batch_size, num_fields)`` 13 | """ 14 | return torch.sum(self.fc(x), dim=1) + self.bias 15 | 16 | class FactorizationMachine(torch.nn.Module): 17 | 18 | def __init__(self, reduce_sum=True): 19 | super().__init__() 20 | self.reduce_sum = reduce_sum 21 | 22 | def forward(self, x): 23 | """ 24 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 25 | """ 26 | square_of_sum = torch.sum(x, dim=1) ** 2 27 | sum_of_square = torch.sum(x ** 2, dim=1) 28 | ix = square_of_sum - sum_of_square 29 | if self.reduce_sum: 30 | ix = torch.sum(ix, dim=1, keepdim=True) 31 | return 0.5 * ix 32 | 33 | class MultiLayerPerceptron(torch.nn.Module): 34 | 35 | def __init__(self, input_dim, embed_dims, dropout, output_layer=True): 36 | super().__init__() 37 | layers = list() 38 | for embed_dim in embed_dims: 39 | layers.append(torch.nn.Linear(input_dim, embed_dim)) 40 | # layers.append(torch.nn.BatchNorm1d(embed_dim)) 41 | layers.append(torch.nn.ReLU()) 42 | layers.append(torch.nn.Dropout(p=dropout)) 43 | input_dim = embed_dim 44 | if output_layer: 45 | layers.append(torch.nn.Linear(input_dim, 1)) 46 | self.mlp = torch.nn.Sequential(*layers) 47 | 48 | def forward(self, x): 49 | """ 50 | :param x: Float tensor of size ``(batch_size, embed_dim)`` 51 | """ 52 | return self.mlp(x) 53 | 54 | class CrossNetwork(torch.nn.Module): 55 | def __init__(self, input_dim, num_layers): 56 | super().__init__() 57 | self.num_layers = num_layers 58 | self.w = torch.nn.ModuleList([ 59 | torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers) 60 | ]) 61 | self.b = torch.nn.ParameterList([ 62 | torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers) 63 | ]) 64 | 65 | def forward(self, x): 66 | """ 67 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 68 | """ 69 | x0 = x 70 | for i in range(self.num_layers): 71 | xw = self.w[i](x) 72 | x = x0 * xw + self.b[i] + x 73 | return x 74 | -------------------------------------------------------------------------------- /AdaFS/models/layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class FeaturesLinear(torch.nn.Module): 4 | 5 | def __init__(self, field_dims, output_dim=1): 6 | super().__init__() 7 | self.fc = torch.nn.Embedding(sum(field_dims), output_dim) 8 | self.bias = torch.nn.Parameter(torch.zeros((output_dim,))) 9 | 10 | def forward(self, x): 11 | """ 12 | :param x: Long tensor of size ``(batch_size, num_fields)`` 13 | """ 14 | return torch.sum(self.fc(x), dim=1) + self.bias 15 | 16 | class FactorizationMachine(torch.nn.Module): 17 | 18 | def __init__(self, reduce_sum=True): 19 | super().__init__() 20 | self.reduce_sum = reduce_sum 21 | 22 | def forward(self, x): 23 | """ 24 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 25 | """ 26 | square_of_sum = torch.sum(x, dim=1) ** 2 27 | sum_of_square = torch.sum(x ** 2, dim=1) 28 | ix = square_of_sum - sum_of_square 29 | if self.reduce_sum: 30 | ix = torch.sum(ix, dim=1, keepdim=True) 31 | return 0.5 * ix 32 | 33 | class MultiLayerPerceptron(torch.nn.Module): 34 | 35 | def __init__(self, input_dim, embed_dims, dropout, output_layer=True): 36 | super().__init__() 37 | layers = list() 38 | for embed_dim in embed_dims: 39 | layers.append(torch.nn.Linear(input_dim, embed_dim)) 40 | # layers.append(torch.nn.BatchNorm1d(embed_dim)) 41 | layers.append(torch.nn.ReLU()) 42 | layers.append(torch.nn.Dropout(p=dropout)) 43 | input_dim = embed_dim 44 | if output_layer: 45 | layers.append(torch.nn.Linear(input_dim, 1)) 46 | self.mlp = torch.nn.Sequential(*layers) 47 | 48 | def forward(self, x): 49 | """ 50 | :param x: Float tensor of size ``(batch_size, embed_dim)`` 51 | """ 52 | return self.mlp(x) 53 | 54 | class CrossNetwork(torch.nn.Module): 55 | def __init__(self, input_dim, num_layers): 56 | super().__init__() 57 | self.num_layers = num_layers 58 | self.w = torch.nn.ModuleList([ 59 | torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers) 60 | ]) 61 | self.b = torch.nn.ParameterList([ 62 | torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers) 63 | ]) 64 | 65 | def forward(self, x): 66 | """ 67 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 68 | """ 69 | x0 = x 70 | for i in range(self.num_layers): 71 | xw = self.w[i](x) 72 | x = x0 * xw + self.b[i] + x 73 | return x 74 | -------------------------------------------------------------------------------- /AutoField/models/layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class FeaturesLinear(torch.nn.Module): 4 | 5 | def __init__(self, field_dims, output_dim=1): 6 | super().__init__() 7 | self.fc = torch.nn.Embedding(sum(field_dims), output_dim) 8 | self.bias = torch.nn.Parameter(torch.zeros((output_dim,))) 9 | 10 | def forward(self, x): 11 | """ 12 | :param x: Long tensor of size ``(batch_size, num_fields)`` 13 | """ 14 | return torch.sum(self.fc(x), dim=1) + self.bias 15 | 16 | class FactorizationMachine(torch.nn.Module): 17 | 18 | def __init__(self, reduce_sum=True): 19 | super().__init__() 20 | self.reduce_sum = reduce_sum 21 | 22 | def forward(self, x): 23 | """ 24 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 25 | """ 26 | square_of_sum = torch.sum(x, dim=1) ** 2 27 | sum_of_square = torch.sum(x ** 2, dim=1) 28 | ix = square_of_sum - sum_of_square 29 | if self.reduce_sum: 30 | ix = torch.sum(ix, dim=1, keepdim=True) 31 | return 0.5 * ix 32 | 33 | class MultiLayerPerceptron(torch.nn.Module): 34 | 35 | def __init__(self, input_dim, embed_dims, dropout, output_layer=True): 36 | super().__init__() 37 | layers = list() 38 | for embed_dim in embed_dims: 39 | layers.append(torch.nn.Linear(input_dim, embed_dim)) 40 | # layers.append(torch.nn.BatchNorm1d(embed_dim)) 41 | layers.append(torch.nn.ReLU()) 42 | layers.append(torch.nn.Dropout(p=dropout)) 43 | input_dim = embed_dim 44 | if output_layer: 45 | layers.append(torch.nn.Linear(input_dim, 1)) 46 | self.mlp = torch.nn.Sequential(*layers) 47 | 48 | def forward(self, x): 49 | """ 50 | :param x: Float tensor of size ``(batch_size, embed_dim)`` 51 | """ 52 | return self.mlp(x) 53 | 54 | class CrossNetwork(torch.nn.Module): 55 | def __init__(self, input_dim, num_layers): 56 | super().__init__() 57 | self.num_layers = num_layers 58 | self.w = torch.nn.ModuleList([ 59 | torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers) 60 | ]) 61 | self.b = torch.nn.ParameterList([ 62 | torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers) 63 | ]) 64 | 65 | def forward(self, x): 66 | """ 67 | :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)`` 68 | """ 69 | x0 = x 70 | for i in range(self.num_layers): 71 | xw = self.w[i](x) 72 | x = x0 * xw + self.b[i] + x 73 | return x 74 | -------------------------------------------------------------------------------- /AdaFS/utils/train_help.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from models.modules import * 4 | from dataloader.tfloader import CriteoLoader, Avazuloader, KDD12loader 5 | import torch 6 | import pickle 7 | import os 8 | 9 | def get_dataloader(dataset, path): 10 | if dataset == 'Criteo': 11 | return CriteoLoader(path) 12 | elif dataset == 'Avazu': 13 | return Avazuloader(path) 14 | elif dataset == 'KDD12': 15 | return KDD12loader(path) 16 | 17 | def get_stats(path): 18 | defaults_path = os.path.join(path + "/defaults.pkl") 19 | with open(defaults_path, 'rb') as fi: 20 | defaults = pickle.load(fi) 21 | return [i+1 for i in list(defaults.values())] 22 | 23 | def get_model(opt): 24 | name = opt["model"] 25 | if name == "fm": 26 | model = FM(opt) 27 | elif name == "deepfm": 28 | model = DeepFM(opt) 29 | elif name == "fnn": 30 | model = FNN(opt) 31 | elif name == "ipnn": 32 | model = IPNN(opt) 33 | elif name == "dcn": 34 | model = DCN(opt) 35 | else: 36 | raise ValueError("Invalid model type: {}".format(name)) 37 | return model 38 | 39 | def get_optimizer(network, opt): 40 | arch_params, trans_params, network_params, embedding_params = [], [], [], [] 41 | arch_names, trans_names, network_names, embedding_names = [], [], [], [] 42 | for name, param in network.named_parameters(): 43 | if name == "arch": 44 | arch_params.append(param) 45 | arch_names.append(name) 46 | elif "trans" in name: 47 | trans_params.append(param) 48 | trans_names.append(name) 49 | elif name == "embedding": 50 | embedding_params.append(param) 51 | embedding_names.append(name) 52 | else: 53 | network_params.append(param) 54 | network_names.append(name) 55 | 56 | arch_group = { 57 | "params": arch_params, 58 | "lr": opt["arch_lr"] 59 | } 60 | arch_optimizer = torch.optim.SGD([arch_group]) 61 | 62 | embedding_group = { 63 | 'params': embedding_params, 64 | 'weight_decay': opt['wd'], 65 | 'lr': opt['lr'] 66 | } 67 | network_group = { 68 | 'params': network_params, 69 | 'weight_decay': opt['wd'], 70 | 'lr': opt['lr'] 71 | } 72 | if opt['optimizer'] == 'sgd': 73 | optimizer = torch.optim.SGD([network_group, embedding_group]) 74 | elif opt['optimizer'] == 'adam': 75 | optimizer = torch.optim.Adam([network_group, embedding_group]) 76 | else: 77 | print("Optimizer not supported.") 78 | sys.exit(-1) 79 | 80 | return arch_optimizer, optimizer 81 | 82 | def get_cuda(enabled, device_id=0): 83 | if enabled: 84 | assert torch.cuda.is_available(), 'CUDA is not available' 85 | torch.cuda.set_device(device_id) 86 | 87 | def get_log(name=""): 88 | FORMATTER = logging.Formatter(fmt="[{asctime}]:{message}", style= '{') 89 | logger = logging.getLogger(name) 90 | logger.setLevel(logging.DEBUG) 91 | ch = logging.StreamHandler(stream=sys.stdout) 92 | ch.setFormatter(FORMATTER) 93 | logger.addHandler(ch) 94 | return logger 95 | -------------------------------------------------------------------------------- /LPFS/utils/train_help.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from models.modules import * 4 | from dataloader.tfloader import CriteoLoader, Avazuloader, KDD12loader 5 | import torch 6 | import pickle 7 | import os 8 | 9 | def get_dataloader(dataset, path): 10 | if dataset == 'Criteo': 11 | return CriteoLoader(path) 12 | elif dataset == 'Avazu': 13 | return Avazuloader(path) 14 | elif dataset == 'KDD12': 15 | return KDD12loader(path) 16 | 17 | def get_stats(path): 18 | defaults_path = os.path.join(path + "/defaults.pkl") 19 | with open(defaults_path, 'rb') as fi: 20 | defaults = pickle.load(fi) 21 | return [i+1 for i in list(defaults.values())] 22 | 23 | def get_model(opt): 24 | name = opt["model"] 25 | if name == "fm": 26 | model = FM(opt) 27 | elif name == "deepfm": 28 | model = DeepFM(opt) 29 | elif name == "fnn": 30 | model = FNN(opt) 31 | elif name == "ipnn": 32 | model = IPNN(opt) 33 | elif name == "dcn": 34 | model = DCN(opt) 35 | else: 36 | raise ValueError("Invalid model type: {}".format(name)) 37 | return model 38 | 39 | def get_optimizer(network, opt): 40 | arch_params, trans_params, network_params, embedding_params = [], [], [], [] 41 | arch_names, trans_names, network_names, embedding_names = [], [], [], [] 42 | for name, param in network.named_parameters(): 43 | if name == "arch": 44 | arch_params.append(param) 45 | arch_names.append(name) 46 | elif "trans" in name: 47 | trans_params.append(param) 48 | trans_names.append(name) 49 | elif name == "embedding": 50 | embedding_params.append(param) 51 | embedding_names.append(name) 52 | else: 53 | network_params.append(param) 54 | network_names.append(name) 55 | 56 | arch_group = { 57 | "params": arch_params, 58 | "lr": opt["arch_lr"] 59 | } 60 | arch_optimizer = torch.optim.SGD([arch_group]) 61 | 62 | embedding_group = { 63 | 'params': embedding_params, 64 | 'weight_decay': opt['wd'], 65 | 'lr': opt['lr'] 66 | } 67 | network_group = { 68 | 'params': network_params, 69 | 'weight_decay': opt['wd'], 70 | 'lr': opt['lr'] 71 | } 72 | if opt['optimizer'] == 'sgd': 73 | optimizer = torch.optim.SGD([network_group, embedding_group]) 74 | elif opt['optimizer'] == 'adam': 75 | optimizer = torch.optim.Adam([network_group, embedding_group]) 76 | else: 77 | print("Optimizer not supported.") 78 | sys.exit(-1) 79 | 80 | return arch_optimizer, optimizer 81 | 82 | def get_cuda(enabled, device_id=0): 83 | if enabled: 84 | assert torch.cuda.is_available(), 'CUDA is not available' 85 | torch.cuda.set_device(device_id) 86 | 87 | def get_log(name=""): 88 | FORMATTER = logging.Formatter(fmt="[{asctime}]:{message}", style= '{') 89 | logger = logging.getLogger(name) 90 | logger.setLevel(logging.DEBUG) 91 | ch = logging.StreamHandler(stream=sys.stdout) 92 | ch.setFormatter(FORMATTER) 93 | logger.addHandler(ch) 94 | return logger 95 | -------------------------------------------------------------------------------- /AdaFS/dataloader/tfloader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import glob 3 | import torch 4 | import os 5 | 6 | repo_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | 8 | class CriteoLoader(object): 9 | def __init__(self, tfrecord_path): 10 | self.SAMPLES = 1 11 | self.FIELDS = 39 12 | self.tfrecord_path = tfrecord_path 13 | self.description = { 14 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 15 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 16 | } 17 | 18 | def get_data(self, data_type, batch_size = 1): 19 | @tf.autograph.experimental.do_not_convert 20 | def read_data(raw_rec): 21 | example = tf.io.parse_single_example(raw_rec, self.description) 22 | return example['feature'], example['label'] 23 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 24 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 25 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 26 | for x,y in ds: 27 | x = torch.from_numpy(x.numpy()) 28 | y = torch.from_numpy(y.numpy()) 29 | yield x, y 30 | 31 | class Avazuloader(object): 32 | def __init__(self, tfrecord_path): 33 | self.SAMPLES = 1 34 | self.FIELDS = 24 35 | self.tfrecord_path = tfrecord_path 36 | self.description = { 37 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 38 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 39 | } 40 | 41 | def get_data(self, data_type, batch_size = 1): 42 | @tf.autograph.experimental.do_not_convert 43 | def read_data(raw_rec): 44 | example = tf.io.parse_single_example(raw_rec, self.description) 45 | return example['feature'], example['label'] 46 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 47 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 48 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 49 | for x,y in ds: 50 | x = torch.from_numpy(x.numpy()) 51 | y = torch.from_numpy(y.numpy()) 52 | yield x, y 53 | 54 | class KDD12loader(object): 55 | def __init__(self, tfrecord_path): 56 | self.SAMPLES = 1 57 | self.FIELDS = 11 58 | self.tfrecord_path = tfrecord_path 59 | self.description = { 60 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 61 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 62 | } 63 | 64 | def get_data(self, data_type, batch_size = 1): 65 | @tf.autograph.experimental.do_not_convert 66 | def read_data(raw_rec): 67 | example = tf.io.parse_single_example(raw_rec, self.description) 68 | return example['feature'], example['label'] 69 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 70 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 71 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 72 | for x,y in ds: 73 | x = torch.from_numpy(x.numpy()) 74 | y = torch.from_numpy(y.numpy()) 75 | yield x, y -------------------------------------------------------------------------------- /LPFS/dataloader/tfloader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import glob 3 | import torch 4 | import os 5 | 6 | repo_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | 8 | class CriteoLoader(object): 9 | def __init__(self, tfrecord_path): 10 | self.SAMPLES = 1 11 | self.FIELDS = 39 12 | self.tfrecord_path = tfrecord_path 13 | self.description = { 14 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 15 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 16 | } 17 | 18 | def get_data(self, data_type, batch_size = 1): 19 | @tf.autograph.experimental.do_not_convert 20 | def read_data(raw_rec): 21 | example = tf.io.parse_single_example(raw_rec, self.description) 22 | return example['feature'], example['label'] 23 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 24 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 25 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 26 | for x,y in ds: 27 | x = torch.from_numpy(x.numpy()) 28 | y = torch.from_numpy(y.numpy()) 29 | yield x, y 30 | 31 | class Avazuloader(object): 32 | def __init__(self, tfrecord_path): 33 | self.SAMPLES = 1 34 | self.FIELDS = 24 35 | self.tfrecord_path = tfrecord_path 36 | self.description = { 37 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 38 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 39 | } 40 | 41 | def get_data(self, data_type, batch_size = 1): 42 | @tf.autograph.experimental.do_not_convert 43 | def read_data(raw_rec): 44 | example = tf.io.parse_single_example(raw_rec, self.description) 45 | return example['feature'], example['label'] 46 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 47 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 48 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 49 | for x,y in ds: 50 | x = torch.from_numpy(x.numpy()) 51 | y = torch.from_numpy(y.numpy()) 52 | yield x, y 53 | 54 | class KDD12loader(object): 55 | def __init__(self, tfrecord_path): 56 | self.SAMPLES = 1 57 | self.FIELDS = 11 58 | self.tfrecord_path = tfrecord_path 59 | self.description = { 60 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 61 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 62 | } 63 | 64 | def get_data(self, data_type, batch_size = 1): 65 | @tf.autograph.experimental.do_not_convert 66 | def read_data(raw_rec): 67 | example = tf.io.parse_single_example(raw_rec, self.description) 68 | return example['feature'], example['label'] 69 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 70 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 71 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 72 | for x,y in ds: 73 | x = torch.from_numpy(x.numpy()) 74 | y = torch.from_numpy(y.numpy()) 75 | yield x, y -------------------------------------------------------------------------------- /AutoField/dataloader/tfloader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import glob 3 | import torch 4 | import os 5 | 6 | repo_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 7 | 8 | class CriteoLoader(object): 9 | def __init__(self, tfrecord_path): 10 | self.SAMPLES = 1 11 | self.FIELDS = 39 12 | self.tfrecord_path = tfrecord_path 13 | self.description = { 14 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 15 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 16 | } 17 | 18 | def get_data(self, data_type, batch_size = 1): 19 | @tf.autograph.experimental.do_not_convert 20 | def read_data(raw_rec): 21 | example = tf.io.parse_single_example(raw_rec, self.description) 22 | return example['feature'], example['label'] 23 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 24 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 25 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 26 | for x,y in ds: 27 | x = torch.from_numpy(x.numpy()) 28 | y = torch.from_numpy(y.numpy()) 29 | yield x, y 30 | 31 | class Avazuloader(object): 32 | def __init__(self, tfrecord_path): 33 | self.SAMPLES = 1 34 | self.FIELDS = 24 35 | self.tfrecord_path = tfrecord_path 36 | self.description = { 37 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 38 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 39 | } 40 | 41 | def get_data(self, data_type, batch_size = 1): 42 | @tf.autograph.experimental.do_not_convert 43 | def read_data(raw_rec): 44 | example = tf.io.parse_single_example(raw_rec, self.description) 45 | return example['feature'], example['label'] 46 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 47 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 48 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 49 | for x,y in ds: 50 | x = torch.from_numpy(x.numpy()) 51 | y = torch.from_numpy(y.numpy()) 52 | yield x, y 53 | 54 | class KDD12loader(object): 55 | def __init__(self, tfrecord_path): 56 | self.SAMPLES = 1 57 | self.FIELDS = 11 58 | self.tfrecord_path = tfrecord_path 59 | self.description = { 60 | "label": tf.io.FixedLenFeature([self.SAMPLES], tf.float32), 61 | "feature": tf.io.FixedLenFeature([self.FIELDS], tf.int64), 62 | } 63 | 64 | def get_data(self, data_type, batch_size = 1): 65 | @tf.autograph.experimental.do_not_convert 66 | def read_data(raw_rec): 67 | example = tf.io.parse_single_example(raw_rec, self.description) 68 | return example['feature'], example['label'] 69 | files = glob.glob(repo_path + '/' + self.tfrecord_path + '/' + "{}*".format(data_type)) 70 | ds = tf.data.TFRecordDataset(files).map(read_data, num_parallel_calls=tf.data.experimental.AUTOTUNE).\ 71 | batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE) 72 | for x,y in ds: 73 | x = torch.from_numpy(x.numpy()) 74 | y = torch.from_numpy(y.numpy()) 75 | yield x, y -------------------------------------------------------------------------------- /AutoField/utils/train_help.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from models.modules_search import * 4 | from models.modules_retrain import * 5 | from dataloader.tfloader import CriteoLoader, Avazuloader, KDD12loader 6 | import torch 7 | import pickle 8 | import os 9 | 10 | def get_dataloader(dataset, path): 11 | if dataset == 'Criteo': 12 | return CriteoLoader(path) 13 | elif dataset == 'Avazu': 14 | return Avazuloader(path) 15 | elif dataset == 'KDD12': 16 | return KDD12loader(path) 17 | 18 | def get_stats(path): 19 | defaults_path = os.path.join(path + "/defaults.pkl") 20 | with open(defaults_path, 'rb') as fi: 21 | defaults = pickle.load(fi) 22 | return [i+1 for i in list(defaults.values())] 23 | 24 | def get_search(opt): 25 | name = opt["model"] 26 | if name == "fm": 27 | model = FM_search(opt) 28 | elif name == "deepfm": 29 | model = DeepFM_search(opt) 30 | elif name == "fnn": 31 | model = FNN_search(opt) 32 | elif name == "ipnn": 33 | model = IPNN_search(opt) 34 | elif name == "dcn": 35 | model = DCN_search(opt) 36 | else: 37 | raise ValueError("Invalid model type: {}".format(name)) 38 | return model 39 | 40 | def get_retrain(opt, arch): 41 | name = opt["model"] 42 | if name == "fm": 43 | model = FM_retrain(opt, arch) 44 | elif name == "deepfm": 45 | model = DeepFM_retrain(opt, arch) 46 | elif name == "fnn": 47 | model = FNN_retrain(opt, arch) 48 | elif name == "ipnn": 49 | model = IPNN_retrain(opt, arch) 50 | elif name == "dcn": 51 | model = DCN_retrain(opt, arch) 52 | else: 53 | raise ValueError("Invalid model type: {}".format(name)) 54 | return model 55 | 56 | def get_optimizer(network, opt): 57 | arch_params, trans_params, network_params, embedding_params = [], [], [], [] 58 | arch_names, trans_names, network_names, embedding_names = [], [], [], [] 59 | for name, param in network.named_parameters(): 60 | if name == "arch": 61 | arch_params.append(param) 62 | arch_names.append(name) 63 | elif "trans" in name: 64 | trans_params.append(param) 65 | trans_names.append(name) 66 | elif name == "embedding": 67 | embedding_params.append(param) 68 | embedding_names.append(name) 69 | else: 70 | network_params.append(param) 71 | network_names.append(name) 72 | 73 | # if opt['transform'] == 'linear': 74 | # for name, param in network.trans_b.named_parameters(): 75 | # trans_params.append(param) 76 | # trans_names.append(name) 77 | # for name, param in network.trans_bn.named_parameters(): 78 | # trans_params.append(param) 79 | # trans_names.append(name) 80 | 81 | print("arch_names:", arch_names) 82 | print("_"*30) 83 | print("trans_names:", trans_names) 84 | print("_"*30) 85 | print("embedding_names:", embedding_names) 86 | print("_"*30) 87 | print("network_names:", network_names) 88 | print("_"*30) 89 | 90 | arch_group = { 91 | "params": arch_params, 92 | "lr": opt["arch_lr"] 93 | } 94 | arch_optimizer = torch.optim.SGD([arch_group]) 95 | 96 | embedding_group = { 97 | 'params': embedding_params, 98 | 'weight_decay': opt['wd'], 99 | 'lr': opt['lr'] 100 | } 101 | trans_group = { 102 | 'params': trans_params, 103 | 'lr': opt['lr'] 104 | } 105 | network_group = { 106 | 'params': network_params, 107 | 'weight_decay': opt['wd'], 108 | 'lr': opt['lr'] 109 | } 110 | if opt['optimizer'] == 'sgd': 111 | optimizer = torch.optim.SGD([network_group, trans_group, embedding_group]) 112 | elif opt['optimizer'] == 'adam': 113 | optimizer = torch.optim.Adam([network_group, trans_group, embedding_group]) 114 | else: 115 | print("Optimizer not supported.") 116 | sys.exit(-1) 117 | 118 | return arch_optimizer, optimizer 119 | 120 | def get_cuda(enabled, device_id=0): 121 | if enabled: 122 | assert torch.cuda.is_available(), 'CUDA is not available' 123 | torch.cuda.set_device(device_id) 124 | 125 | def get_log(name=""): 126 | FORMATTER = logging.Formatter(fmt="[{asctime}]:{message}", style= '{') 127 | logger = logging.getLogger(name) 128 | logger.setLevel(logging.DEBUG) 129 | ch = logging.StreamHandler(stream=sys.stdout) 130 | ch.setFormatter(FORMATTER) 131 | logger.addHandler(ch) 132 | return logger 133 | -------------------------------------------------------------------------------- /AutoField/models/modules_retrain.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from models.layer import * 7 | 8 | class BasicModel(torch.nn.Module): 9 | def __init__(self, opt, arch): 10 | super(BasicModel, self).__init__() 11 | self.device = torch.device("cuda:0" if opt.get('use_cuda') else "cpu") 12 | self.latent_dim = opt['latent_dim'] 13 | self.field_num = len(opt['field_dim']) 14 | self.feature_num = sum(opt['field_dim']) 15 | self.field_dim = opt['field_dim'] 16 | self.embedding = self.init_embedding() 17 | self.init_arch(arch) 18 | 19 | def init_embedding(self): 20 | e = nn.Parameter(torch.rand([self.feature_num, self.latent_dim])) 21 | torch.nn.init.xavier_uniform_(e) 22 | return e 23 | 24 | def init_arch(self, arch): 25 | dis_arch = np.zeros_like(arch) 26 | dis_arch = np.where(arch > 0, 1, 0) 27 | self.arch = torch.from_numpy(dis_arch).to(self.device) 28 | 29 | def calc_sparsity(self): 30 | base = self.latent_dim * self.feature_num 31 | params = 0 32 | for i, num_i in enumerate(self.field_dim): 33 | params += self.arch[i] * num_i * self.latent_dim 34 | return params, (1 - params/base) 35 | 36 | def calc_input(self, x): 37 | xv = F.embedding(x, self.embedding) 38 | xe = torch.mul(xv, self.arch.unsqueeze(0).unsqueeze(2)) 39 | return xe 40 | 41 | class FM_retrain(BasicModel): 42 | def __init__(self, opt, arch): 43 | super(FM_retrain, self).__init__(opt, arch) 44 | self.linear = FeaturesLinear(opt['field_dim']) 45 | self.fm = FactorizationMachine(reduce_sum=True) 46 | 47 | def forward(self, x): 48 | linear_score = self.linear.forward(x) 49 | xe = self.calc_input(x) 50 | fm_score = self.fm.forward(xe) 51 | score = linear_score + fm_score 52 | return score.squeeze(1) 53 | 54 | class DeepFM_retrain(FM_retrain): 55 | def __init__(self, opt, arch): 56 | super(DeepFM_retrain, self).__init__(opt, arch) 57 | self.embed_output_dim = self.field_num * self.latent_dim 58 | self.mlp_dims = opt['mlp_dims'] 59 | self.dropout = opt['mlp_dropout'] 60 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 61 | 62 | def forward(self, x): 63 | linear_score = self.linear.forward(x) 64 | xe = self.calc_input(x) 65 | fm_score = self.fm.forward(xe) 66 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 67 | score = linear_score + fm_score + dnn_score 68 | return score.squeeze(1) 69 | 70 | class FNN_retrain(BasicModel): 71 | def __init__(self, opt, arch): 72 | super(FNN_retrain, self).__init__(opt, arch) 73 | self.embed_output_dim = self.field_num * self.latent_dim 74 | self.mlp_dims = opt['mlp_dims'] 75 | self.dropout = opt['mlp_dropout'] 76 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 77 | 78 | def forward(self, x): 79 | xe = self.calc_input(x) 80 | score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 81 | return score.squeeze(1) 82 | 83 | class IPNN_retrain(BasicModel): 84 | def __init__(self, opt, arch): 85 | super(IPNN_retrain, self).__init__(opt, arch) 86 | self.embed_output_dim = self.field_num * self.latent_dim 87 | self.product_output_dim = int(self.field_num * (self.field_num - 1) / 2) 88 | self.dnn_input_dim = self.embed_output_dim + self.product_output_dim 89 | self.mlp_dims = opt['mlp_dims'] 90 | self.dropout = opt['mlp_dropout'] 91 | self.mlp = MultiLayerPerceptron(self.dnn_input_dim, self.mlp_dims, dropout=self.dropout) 92 | 93 | # Create indexes 94 | rows = [] 95 | cols = [] 96 | for i in range(self.field_num): 97 | for j in range(i+1, self.field_num): 98 | rows.append(i) 99 | cols.append(j) 100 | self.rows = torch.tensor(rows, device=self.device) 101 | self.cols = torch.tensor(cols, device=self.device) 102 | 103 | def calc_product(self, xe): 104 | batch_size = xe.shape[0] 105 | trans = torch.transpose(xe, 1, 2) 106 | gather_rows = torch.gather(trans, 2, self.rows.expand(batch_size, trans.shape[1], self.rows.shape[0])) 107 | gather_cols = torch.gather(trans, 2, self.cols.expand(batch_size, trans.shape[1], self.rows.shape[0])) 108 | p = torch.transpose(gather_rows, 1, 2) 109 | q = torch.transpose(gather_cols, 1, 2) 110 | product_embedding = torch.mul(p, q) 111 | product_embedding = torch.sum(product_embedding, 2) 112 | return product_embedding 113 | 114 | def forward(self, x): 115 | xv = self.calc_input(x) 116 | product = self.calc_product(xv) 117 | xv = xv.view(-1, self.embed_output_dim) 118 | xe = torch.cat((xv, product), 1) 119 | score = self.mlp.forward(xe) 120 | return score.squeeze(1) 121 | 122 | class DCN_retrain(BasicModel): 123 | def __init__(self, opt, arch): 124 | super(DCN_retrain, self).__init__(opt, arch) 125 | self.embed_output_dim = self.field_num * self.latent_dim 126 | self.mlp_dims = opt['mlp_dims'] 127 | self.dropout = opt['mlp_dropout'] 128 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout, output_layer=False) 129 | self.cross = CrossNetwork(self.embed_output_dim, opt['cross_layer_num']) 130 | self.combine = torch.nn.Linear(self.mlp_dims[-1] + self.embed_output_dim, 1) 131 | 132 | def forward(self, x): 133 | xe = self.calc_input(x) 134 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 135 | cross_score = self.cross.forward(xe.view(-1, self.embed_output_dim)) 136 | stacked = torch.cat((dnn_score, cross_score), 1) 137 | score = self.combine(stacked) 138 | return score.squeeze(1) 139 | -------------------------------------------------------------------------------- /AdaFS/models/modules.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from models.layer import * 7 | 8 | class BasicModel(torch.nn.Module): 9 | def __init__(self, opt): 10 | super(BasicModel, self).__init__() 11 | self.device = torch.device("cuda:0" if opt.get('use_cuda') else "cpu") 12 | self.latent_dim = opt['latent_dim'] 13 | self.field_num = len(opt['field_dim']) 14 | self.feature_num = sum(opt['field_dim']) 15 | self.field_dim = opt['field_dim'] 16 | self.embedding = self.init_embedding() 17 | self.embedding_bn = torch.nn.LayerNorm(opt['latent_dim'], elementwise_affine=False) 18 | 19 | def init_embedding(self): 20 | e = nn.Parameter(torch.rand([self.feature_num, self.latent_dim])) 21 | nn.init.xavier_uniform_(e) 22 | return e 23 | 24 | def get_arch(self, arch): 25 | my_arch = arch.detach().cpu().numpy() 26 | current_arch = np.zeros_like(my_arch) 27 | current_arch = np.where(my_arch > 0, 1, 0) 28 | return current_arch 29 | 30 | def calc_sparsity(self, arch): 31 | base = self.latent_dim * self.feature_num 32 | current_arch = self.get_arch(arch) 33 | params = 0 34 | for i, num_i in enumerate(self.field_dim): 35 | params += num_i * current_arch[i] * self.latent_dim 36 | return params, (1 - params/base) 37 | 38 | def calc_input(self, x, controller): 39 | xv = F.embedding(x, self.embedding) 40 | norm_xv = self.embedding_bn(xv) 41 | if controller is not None: 42 | alpha = controller(norm_xv) 43 | xe = torch.mul(xv, alpha) 44 | else: 45 | xe = norm_xv 46 | return xe 47 | 48 | class FM(BasicModel): 49 | def __init__(self, opt): 50 | super(FM, self).__init__(opt) 51 | self.linear = FeaturesLinear(opt['field_dim']) 52 | self.fm = FactorizationMachine(reduce_sum=True) 53 | 54 | def forward(self, x, controller=None): 55 | linear_score = self.linear.forward(x) 56 | xe = self.calc_input(x, controller) 57 | fm_score = self.fm.forward(xe) 58 | score = linear_score + fm_score 59 | return score.squeeze(1) 60 | 61 | class DeepFM(FM): 62 | def __init__(self, opt): 63 | super(DeepFM, self).__init__(opt) 64 | self.embed_output_dim = self.field_num * self.latent_dim 65 | self.mlp_dims = opt['mlp_dims'] 66 | self.dropout = opt['mlp_dropout'] 67 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 68 | 69 | def forward(self, x, controller=None): 70 | linear_score = self.linear.forward(x) 71 | xe = self.calc_input(x, controller) 72 | fm_score = self.fm.forward(xe) 73 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 74 | score = linear_score + fm_score + dnn_score 75 | return score.squeeze(1) 76 | 77 | class FNN(BasicModel): 78 | def __init__(self, opt): 79 | super(FNN, self).__init__(opt) 80 | self.embed_output_dim = self.field_num * self.latent_dim 81 | self.mlp_dims = opt['mlp_dims'] 82 | self.dropout = opt['mlp_dropout'] 83 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 84 | 85 | def forward(self, x, controller=None): 86 | xv = self.calc_input(x, controller) 87 | score = self.mlp.forward(xv.view(-1, self.embed_output_dim)) 88 | return score.squeeze(1) 89 | 90 | class IPNN(BasicModel): 91 | def __init__(self, opt): 92 | super(IPNN, self).__init__(opt) 93 | self.embed_output_dim = self.field_num * self.latent_dim 94 | self.product_output_dim = int(self.field_num * (self.field_num - 1) / 2) 95 | self.dnn_input_dim = self.embed_output_dim + self.product_output_dim 96 | self.mlp_dims = opt['mlp_dims'] 97 | self.dropout = opt['mlp_dropout'] 98 | self.mlp = MultiLayerPerceptron(self.dnn_input_dim, self.mlp_dims, dropout=self.dropout) 99 | 100 | # Create indexes 101 | rows = [] 102 | cols = [] 103 | for i in range(self.field_num): 104 | for j in range(i+1, self.field_num): 105 | rows.append(i) 106 | cols.append(j) 107 | self.rows = torch.tensor(rows, device=self.device) 108 | self.cols = torch.tensor(cols, device=self.device) 109 | 110 | def calc_product(self, xe): 111 | batch_size = xe.shape[0] 112 | trans = torch.transpose(xe, 1, 2) 113 | gather_rows = torch.gather(trans, 2, self.rows.expand(batch_size, trans.shape[1], self.rows.shape[0])) 114 | gather_cols = torch.gather(trans, 2, self.cols.expand(batch_size, trans.shape[1], self.rows.shape[0])) 115 | p = torch.transpose(gather_rows, 1, 2) 116 | q = torch.transpose(gather_cols, 1, 2) 117 | product_embedding = torch.mul(p, q) 118 | product_embedding = torch.sum(product_embedding, 2) 119 | return product_embedding 120 | 121 | def forward(self, x, controller=None): 122 | xv = self.calc_input(x, controller) 123 | product = self.calc_product(xv) 124 | xv = xv.view(-1, self.embed_output_dim) 125 | xe = torch.cat((xv, product), 1) 126 | score = self.mlp.forward(xe) 127 | return score.squeeze(1) 128 | 129 | class DCN(BasicModel): 130 | def __init__(self, opt): 131 | super(DCN, self).__init__(opt) 132 | self.embed_output_dim = self.field_num * self.latent_dim 133 | self.mlp_dims = opt['mlp_dims'] 134 | self.dropout = opt['mlp_dropout'] 135 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout, output_layer=False) 136 | self.cross = CrossNetwork(self.embed_output_dim, opt['cross_layer_num']) 137 | self.combine = torch.nn.Linear(self.mlp_dims[-1] + self.embed_output_dim, 1) 138 | 139 | def forward(self, x, controller=None): 140 | xe = self.calc_input(x, controller) 141 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 142 | cross_score = self.cross.forward(xe.view(-1, self.embed_output_dim)) 143 | stacked = torch.cat((dnn_score, cross_score), 1) 144 | score = self.combine(stacked) 145 | return score.squeeze(1) 146 | 147 | -------------------------------------------------------------------------------- /AutoField/models/modules_search.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from models.layer import * 7 | 8 | class BasicModel(torch.nn.Module): 9 | def __init__(self, opt): 10 | super(BasicModel, self).__init__() 11 | self.device = torch.device("cuda:0" if opt.get('use_cuda') else "cpu") 12 | self.latent_dim = opt['latent_dim'] 13 | self.field_num = len(opt['field_dim']) 14 | self.feature_num = sum(opt['field_dim']) 15 | self.field_dim = opt['field_dim'] 16 | self.embedding = self.init_embedding() 17 | 18 | def init_embedding(self): 19 | e = nn.Parameter(torch.rand([self.feature_num, self.latent_dim])) 20 | nn.init.xavier_uniform_(e) 21 | return e 22 | 23 | def calc_arch_prob(self, beta, arch): 24 | return F.softmax(arch / beta, dim=0) 25 | 26 | def get_arch(self, arch): 27 | my_arch = arch.detach().cpu().numpy() 28 | current_arch = np.zeros_like(my_arch) 29 | current_arch = np.where(my_arch > 0, 1, 0) 30 | return current_arch 31 | 32 | def calc_sparsity(self, arch): 33 | base = self.latent_dim * self.feature_num 34 | current_arch = self.get_arch(arch) 35 | params = 0 36 | for i, num_i in enumerate(self.field_dim): 37 | params += num_i * current_arch[i] * self.latent_dim 38 | return params, (1 - params/base) 39 | 40 | def calc_input(self, x, beta, arch): 41 | xv = F.embedding(x, self.embedding) 42 | prob = self.calc_arch_prob(beta, arch) 43 | prob = prob.unsqueeze(0).unsqueeze(2) 44 | xe = torch.mul(xv, prob) 45 | return xe 46 | 47 | class FM_search(BasicModel): 48 | def __init__(self, opt): 49 | super(FM_search, self).__init__(opt) 50 | self.linear = FeaturesLinear(opt['field_dim']) 51 | self.fm = FactorizationMachine(reduce_sum=True) 52 | 53 | def forward(self, x, beta=1, arch=None): 54 | linear_score = self.linear.forward(x) 55 | xe = self.calc_input(x, beta, arch) 56 | fm_score = self.fm.forward(xe) 57 | score = linear_score + fm_score 58 | return score.squeeze(1) 59 | 60 | class DeepFM_search(FM_search): 61 | def __init__(self, opt): 62 | super(DeepFM_search, self).__init__(opt) 63 | self.embed_output_dim = self.field_num * self.latent_dim 64 | self.mlp_dims = opt['mlp_dims'] 65 | self.dropout = opt['mlp_dropout'] 66 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 67 | 68 | def forward(self, x, beta=1, arch=None): 69 | linear_score = self.linear.forward(x) 70 | xe = self.calc_input(x, beta, arch) 71 | fm_score = self.fm.forward(xe) 72 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 73 | score = linear_score + fm_score + dnn_score 74 | return score.squeeze(1) 75 | 76 | class FNN_search(BasicModel): 77 | def __init__(self, opt): 78 | super(FNN_search, self).__init__(opt) 79 | self.embed_output_dim = self.field_num * self.latent_dim 80 | self.mlp_dims = opt['mlp_dims'] 81 | self.dropout = opt['mlp_dropout'] 82 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 83 | 84 | def forward(self, x, beta=1, arch=None): 85 | xv = self.calc_input(x, beta, arch) 86 | score = self.mlp.forward(xv.view(-1, self.embed_output_dim)) 87 | return score.squeeze(1) 88 | 89 | class IPNN_search(BasicModel): 90 | def __init__(self, opt): 91 | super(IPNN_search, self).__init__(opt) 92 | self.embed_output_dim = self.field_num * self.latent_dim 93 | self.product_output_dim = int(self.field_num * (self.field_num - 1) / 2) 94 | self.dnn_input_dim = self.embed_output_dim + self.product_output_dim 95 | self.mlp_dims = opt['mlp_dims'] 96 | self.dropout = opt['mlp_dropout'] 97 | self.mlp = MultiLayerPerceptron(self.dnn_input_dim, self.mlp_dims, dropout=self.dropout) 98 | 99 | # Create indexes 100 | rows = [] 101 | cols = [] 102 | for i in range(self.field_num): 103 | for j in range(i+1, self.field_num): 104 | rows.append(i) 105 | cols.append(j) 106 | self.rows = torch.tensor(rows, device=self.device) 107 | self.cols = torch.tensor(cols, device=self.device) 108 | 109 | def calc_product(self, xe): 110 | batch_size = xe.shape[0] 111 | trans = torch.transpose(xe, 1, 2) 112 | gather_rows = torch.gather(trans, 2, self.rows.expand(batch_size, trans.shape[1], self.rows.shape[0])) 113 | gather_cols = torch.gather(trans, 2, self.cols.expand(batch_size, trans.shape[1], self.rows.shape[0])) 114 | p = torch.transpose(gather_rows, 1, 2) 115 | q = torch.transpose(gather_cols, 1, 2) 116 | product_embedding = torch.mul(p, q) 117 | product_embedding = torch.sum(product_embedding, 2) 118 | return product_embedding 119 | 120 | def forward(self, x, beta=1, arch=None): 121 | xv = self.calc_input(x, beta, arch) 122 | product = self.calc_product(xv) 123 | xv = xv.view(-1, self.embed_output_dim) 124 | xe = torch.cat((xv, product), 1) 125 | score = self.mlp.forward(xe) 126 | return score.squeeze(1) 127 | 128 | class DCN_search(BasicModel): 129 | def __init__(self, opt): 130 | super(DCN_search, self).__init__(opt) 131 | self.embed_output_dim = self.field_num * self.latent_dim 132 | self.mlp_dims = opt['mlp_dims'] 133 | self.dropout = opt['mlp_dropout'] 134 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout, output_layer=False) 135 | self.cross = CrossNetwork(self.embed_output_dim, opt['cross_layer_num']) 136 | self.combine = torch.nn.Linear(self.mlp_dims[-1] + self.embed_output_dim, 1) 137 | 138 | def forward(self, x, beta=1, arch=None): 139 | xe = self.calc_input(x, beta, arch) 140 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 141 | cross_score = self.cross.forward(xe.view(-1, self.embed_output_dim)) 142 | stacked = torch.cat((dnn_score, cross_score), 1) 143 | score = self.combine(stacked) 144 | return score.squeeze(1) 145 | 146 | -------------------------------------------------------------------------------- /LPFS/models/modules.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from models.layer import * 7 | 8 | def lpfs_pp(x, epsilon, alpha=100, tao=2, init_val=1.0): 9 | g1 = x*x / (x*x + epsilon) 10 | g2 = alpha * epsilon ** (1.0 / tao) * torch.atan(x) 11 | g = torch.where(x>0, g2+g1, g2-g1) / init_val 12 | return g 13 | 14 | def lpfs(x, epsilon): 15 | g = x*x / (x*x + epsilon) 16 | return g 17 | 18 | class BasicModel(torch.nn.Module): 19 | def __init__(self, opt): 20 | super(BasicModel, self).__init__() 21 | self.device = torch.device("cuda:0" if opt.get('use_cuda') else "cpu") 22 | self.latent_dim = opt['latent_dim'] 23 | self.field_num = len(opt['field_dim']) 24 | self.feature_num = sum(opt['field_dim']) 25 | self.field_dim = opt['field_dim'] 26 | self.embedding = self.init_embedding() 27 | 28 | if opt['selector'] == 'lpfs': 29 | self.selector = lpfs 30 | elif opt['selector'] == 'lpfs++': 31 | self.selector = lpfs_pp 32 | else: 33 | raise ValueError("Wrong feature selector!!") 34 | self.epsilon = opt['epsilon'] 35 | 36 | def init_embedding(self): 37 | e = nn.Parameter(torch.rand([self.feature_num, self.latent_dim])) 38 | nn.init.xavier_uniform_(e) 39 | return e 40 | 41 | def get_arch(self, arch): 42 | my_arch = arch.detach().cpu().numpy() 43 | current_arch = np.zeros_like(my_arch) 44 | current_arch = np.where(my_arch > 1e-10, 1, 0) 45 | return current_arch 46 | 47 | def calc_sparsity(self, arch): 48 | base = self.feature_num 49 | current_arch = self.get_arch(arch) 50 | params = 0 51 | for i, num_i in enumerate(self.field_dim): 52 | params += num_i * current_arch[i] 53 | return params, (1 - params/base) 54 | 55 | def calc_input(self, x, arch): 56 | xv = F.embedding(x, self.embedding) 57 | gate = self.selector(arch, self.epsilon).unsqueeze(0).unsqueeze(2) 58 | xe = torch.mul(xv, gate) 59 | return xe 60 | 61 | class FM(BasicModel): 62 | def __init__(self, opt): 63 | super(FM, self).__init__(opt) 64 | self.linear = FeaturesLinear(opt['field_dim']) 65 | self.fm = FactorizationMachine(reduce_sum=True) 66 | 67 | def forward(self, x, controller=None): 68 | linear_score = self.linear.forward(x) 69 | xe = self.calc_input(x, controller) 70 | fm_score = self.fm.forward(xe) 71 | score = linear_score + fm_score 72 | return score.squeeze(1) 73 | 74 | class DeepFM(FM): 75 | def __init__(self, opt): 76 | super(DeepFM, self).__init__(opt) 77 | self.embed_output_dim = self.field_num * self.latent_dim 78 | self.mlp_dims = opt['mlp_dims'] 79 | self.dropout = opt['mlp_dropout'] 80 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 81 | 82 | def forward(self, x, controller=None): 83 | linear_score = self.linear.forward(x) 84 | xe = self.calc_input(x, controller) 85 | fm_score = self.fm.forward(xe) 86 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 87 | score = linear_score + fm_score + dnn_score 88 | return score.squeeze(1) 89 | 90 | class FNN(BasicModel): 91 | def __init__(self, opt): 92 | super(FNN, self).__init__(opt) 93 | self.embed_output_dim = self.field_num * self.latent_dim 94 | self.mlp_dims = opt['mlp_dims'] 95 | self.dropout = opt['mlp_dropout'] 96 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout) 97 | 98 | def forward(self, x, controller=None): 99 | xv = self.calc_input(x, controller) 100 | score = self.mlp.forward(xv.view(-1, self.embed_output_dim)) 101 | return score.squeeze(1) 102 | 103 | class IPNN(BasicModel): 104 | def __init__(self, opt): 105 | super(IPNN, self).__init__(opt) 106 | self.embed_output_dim = self.field_num * self.latent_dim 107 | self.product_output_dim = int(self.field_num * (self.field_num - 1) / 2) 108 | self.dnn_input_dim = self.embed_output_dim + self.product_output_dim 109 | self.mlp_dims = opt['mlp_dims'] 110 | self.dropout = opt['mlp_dropout'] 111 | self.mlp = MultiLayerPerceptron(self.dnn_input_dim, self.mlp_dims, dropout=self.dropout) 112 | 113 | # Create indexes 114 | rows = [] 115 | cols = [] 116 | for i in range(self.field_num): 117 | for j in range(i+1, self.field_num): 118 | rows.append(i) 119 | cols.append(j) 120 | self.rows = torch.tensor(rows, device=self.device) 121 | self.cols = torch.tensor(cols, device=self.device) 122 | 123 | def calc_product(self, xe): 124 | batch_size = xe.shape[0] 125 | trans = torch.transpose(xe, 1, 2) 126 | gather_rows = torch.gather(trans, 2, self.rows.expand(batch_size, trans.shape[1], self.rows.shape[0])) 127 | gather_cols = torch.gather(trans, 2, self.cols.expand(batch_size, trans.shape[1], self.rows.shape[0])) 128 | p = torch.transpose(gather_rows, 1, 2) 129 | q = torch.transpose(gather_cols, 1, 2) 130 | product_embedding = torch.mul(p, q) 131 | product_embedding = torch.sum(product_embedding, 2) 132 | return product_embedding 133 | 134 | def forward(self, x, controller=None): 135 | xv = self.calc_input(x, controller) 136 | product = self.calc_product(xv) 137 | xv = xv.view(-1, self.embed_output_dim) 138 | xe = torch.cat((xv, product), 1) 139 | score = self.mlp.forward(xe) 140 | return score.squeeze(1) 141 | 142 | class DCN(BasicModel): 143 | def __init__(self, opt): 144 | super(DCN, self).__init__(opt) 145 | self.embed_output_dim = self.field_num * self.latent_dim 146 | self.mlp_dims = opt['mlp_dims'] 147 | self.dropout = opt['mlp_dropout'] 148 | self.mlp = MultiLayerPerceptron(self.embed_output_dim, self.mlp_dims, dropout=self.dropout, output_layer=False) 149 | self.cross = CrossNetwork(self.embed_output_dim, opt['cross_layer_num']) 150 | self.combine = torch.nn.Linear(self.mlp_dims[-1] + self.embed_output_dim, 1) 151 | 152 | def forward(self, x, controller=None): 153 | xe = self.calc_input(x, controller) 154 | dnn_score = self.mlp.forward(xe.view(-1, self.embed_output_dim)) 155 | cross_score = self.cross.forward(xe.view(-1, self.embed_output_dim)) 156 | stacked = torch.cat((dnn_score, cross_score), 1) 157 | score = self.combine(stacked) 158 | return score.squeeze(1) 159 | 160 | -------------------------------------------------------------------------------- /AutoField/retrain.py: -------------------------------------------------------------------------------- 1 | from absl import flags 2 | import sys, os 3 | import time, random, statistics 4 | import collections 5 | import numpy as np 6 | import torch 7 | import torch.nn.functional as F 8 | import torch.utils.data as data 9 | from sklearn.metrics import roc_auc_score, log_loss 10 | from utils.train_help import get_retrain, get_log, get_cuda, get_optimizer, get_stats, get_dataloader 11 | 12 | my_seed = 0 13 | torch.manual_seed(my_seed) 14 | torch.cuda.manual_seed_all(my_seed) 15 | np.random.seed(my_seed) 16 | random.seed(my_seed) 17 | 18 | FLAGS = flags.FLAGS 19 | flags.DEFINE_integer("gpu", 0, "specify gpu core", lower_bound=-1, upper_bound=7) 20 | flags.DEFINE_string("dataset", "Criteo", "Criteo, Avazu or KDD12") 21 | 22 | flags.DEFINE_string("model", "deepfm", "prediction model") 23 | flags.DEFINE_integer("batch_size", 4096, "batch size") 24 | flags.DEFINE_integer("epoch", 20, "epoch for training/pruning") 25 | flags.DEFINE_integer("latent_dim", 16, "latent dimension for embedding table") 26 | flags.DEFINE_list("mlp_dims", [1024, 512, 256], "dimension for each MLP") 27 | flags.DEFINE_float("mlp_dropout", 0.0, "dropout for MLP") 28 | flags.DEFINE_string("optimizer", "adam", "optimizer for training") 29 | flags.DEFINE_float("lr", 1e-4, "model learning rate") 30 | flags.DEFINE_float("wd", 5e-5, "model weight decay") 31 | flags.DEFINE_float("arch_lr", 1e-3, "architecture param learning rate") 32 | flags.DEFINE_integer("cross_layer_num", 3, "cross layer num") # Deep & Cross Network 33 | 34 | # AutoDim 35 | flags.DEFINE_string("transform", "linear", "transform method: linear or zero") 36 | 37 | # How to save model 38 | flags.DEFINE_integer("debug_mode", 0, "0 for debug mode, 1 for noraml mode") 39 | flags.DEFINE_string("save_path", "save", "Path to save") 40 | flags.DEFINE_string("save_name", "retrain.pth", "Save file name") 41 | flags.DEFINE_string("arch_file", "arch.npy", "Arch file") 42 | FLAGS(sys.argv) 43 | 44 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu) 45 | os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' 46 | os.environ['NUMEXPR_NUM_THREADS'] = '8' 47 | os.environ['NUMEXPR_MAX_THREADS'] = '8' 48 | 49 | class Retrainer(object): 50 | def __init__(self, opt): 51 | self.loader = get_dataloader(opt["dataset"], opt["data_path"]) 52 | self.save_path = os.path.join(opt["save_path"], opt['dataset'], opt['model']) 53 | self.save_name = opt["save_name"] 54 | self.batch_size = opt["batch_size"] 55 | self.debug_mode = opt["debug_mode"] 56 | 57 | if opt['cuda'] != -1: 58 | get_cuda(True, 0) 59 | self.device = torch.device('cuda') 60 | opt['train']['use_cuda']=True 61 | else: 62 | opt['train']['use_cuda'] = False 63 | with open(os.path.join(self.save_path, opt["arch_file"]), 'rb') as f: 64 | arch = np.load(f) 65 | print(arch) 66 | self.model = get_retrain(opt['train'], arch).to(self.device) 67 | 68 | self.criterion = F.binary_cross_entropy_with_logits 69 | self.optimizer = get_optimizer(self.model, opt["train"]) 70 | self.logger = get_log() 71 | 72 | def __update(self, label, data): 73 | self.model.train() 74 | for opt in self.optimizer: 75 | opt.zero_grad() 76 | data, label = data.to(self.device), label.to(self.device) 77 | prob = self.model.forward(data) 78 | loss = self.criterion(prob, label.squeeze()) 79 | loss.backward() 80 | for opt in self.optimizer: 81 | opt.step() 82 | return loss.item() 83 | 84 | def __evaluate(self, label, data): 85 | self.model.eval() 86 | data, label = data.to(self.device), label.to(self.device) 87 | prob = self.model.forward(data) 88 | prob = torch.sigmoid(prob).detach().cpu().numpy() 89 | label = label.detach().cpu().numpy() 90 | return prob, label 91 | 92 | def eval_one_part(self, name): 93 | preds, trues = [], [] 94 | for feature,label in self.loader.get_data(name, batch_size=self.batch_size): 95 | pred, label = self.__evaluate(label, feature) 96 | preds.append(pred) 97 | trues.append(label) 98 | y_pred = np.concatenate(preds).astype("float64") 99 | y_true = np.concatenate(trues).astype("float64") 100 | auc = roc_auc_score(y_true, y_pred) 101 | loss = log_loss(y_true, y_pred) 102 | return auc, loss 103 | 104 | def __save_model(self): 105 | os.makedirs(self.save_path, exist_ok=True) 106 | torch.save(self.model.state_dict(), os.path.join(self.save_path, self.save_name)) 107 | 108 | def train_epoch(self, max_epoch): 109 | print('-' * 80) 110 | print('Begin Training ...') 111 | params, sparsity = self.model.calc_sparsity() 112 | self.logger.info("[Params {} | Sparsity {}]".format(params, sparsity)) 113 | step_idx = 0 114 | best_auc = 0.0 115 | for epoch_idx in range(int(max_epoch)): 116 | epoch_step = 0 117 | train_loss = 0.0 118 | for feature, label in self.loader.get_data("train", batch_size = self.batch_size): 119 | step_idx += 1 120 | epoch_step += 1 121 | update_loss = self.__update(label, feature) 122 | train_loss += update_loss 123 | train_loss /= epoch_step 124 | val_auc, val_loss = self.eval_one_part(name='val') 125 | test_auc, test_loss = self.eval_one_part(name='test') 126 | self.logger.info("[Epoch {} | Train Loss:{}]".format(epoch_idx, train_loss)) 127 | self.logger.info("[Epoch {} | Val Loss:{} | Val AUC: {}]".format(epoch_idx, val_loss, val_auc)) 128 | self.logger.info("[Epoch {} | Test Loss:{} | Test AUC: {}]".format(epoch_idx, test_loss, test_auc)) 129 | 130 | if best_auc < val_auc: 131 | best_auc = val_auc 132 | best_test_auc, best_test_loss = test_auc, test_loss 133 | if self.debug_mode == 1: 134 | self.__save_model() 135 | else: 136 | self.logger.info("Early stopped!!!") 137 | break 138 | self.logger.info("Most Accurate | AUC: {} | Logloss: {}".format(best_test_auc, best_test_loss)) 139 | 140 | def test_one_time(self): 141 | mytime = [] 142 | preds, trues = [], [] 143 | index = 0 144 | for feature, label in self.loader.get_data("val", batch_size=self.batch_size): 145 | starttime = time.time() 146 | pred, label = self.__evaluate(label, feature) 147 | endtime = time.time() 148 | preds.append(pred) 149 | trues.append(label) 150 | mytime.append(endtime - starttime) 151 | index += 1 152 | return (sum(mytime) * 1000 / index) 153 | 154 | def test_time(self): 155 | testtimes = [] 156 | for i in range(5): 157 | testtime = self.test_one_time() 158 | testtimes.append(testtime) 159 | print("Mean: {mean:.6f}".format(mean=statistics.mean(testtimes))) 160 | print("Std: {std:.6f}".format(std=statistics.stdev(testtimes))) 161 | 162 | def main(): 163 | sys.path.extend(["./models","./dataloader","./utils"]) 164 | if FLAGS.dataset == "Criteo": 165 | field_dim = get_stats("../../datasets/criteo_stats") 166 | data = "../../datasets/criteo" 167 | # field_dim = get_stats("../dataset/criteo/stats_2") 168 | # data = "../dataset/criteo/threshold_2" 169 | elif FLAGS.dataset == "Avazu": 170 | field_dim = get_stats("../../datasets/avazu_stats") 171 | data = "../../datasets/avazu" 172 | # field_dim = get_stats("../dataset/avazu/stats_2") 173 | # data = "../dataset/avazu/threshold_2" 174 | elif FLAGS.dataset == "KDD12": 175 | field_dim = get_stats("../../datasets/kdd12_stats") 176 | data = "../../datasets/kdd12" 177 | # field_dim = get_stats("../dataset/kdd12/stats_10") 178 | # data = "../dataset/kdd12/threshold_10" 179 | 180 | train_opt = { 181 | "model":FLAGS.model, "optimizer":FLAGS.optimizer, 182 | "lr":FLAGS.lr, "wd":FLAGS.wd, "arch_lr":FLAGS.arch_lr, 183 | "field_dim": field_dim, "latent_dim":FLAGS.latent_dim, 184 | "mlp_dims":FLAGS.mlp_dims, "mlp_dropout":FLAGS.mlp_dropout, 185 | "cross_layer_num":FLAGS.cross_layer_num, "transform":FLAGS.transform 186 | } 187 | opt = { 188 | "dataset":FLAGS.dataset, "data_path":data, 189 | "cuda":FLAGS.gpu, "model":FLAGS.model, "batch_size":FLAGS.batch_size, 190 | "save_path":FLAGS.save_path, "save_name":FLAGS.save_name, "debug_mode":FLAGS.debug_mode, 191 | "arch_file":FLAGS.arch_file, "train":train_opt 192 | } 193 | # print("opt:{}".format(opt)) 194 | 195 | rter = Retrainer(opt) 196 | # rter.train_epoch(FLAGS.epoch) 197 | rter.test_time() 198 | 199 | if __name__ == '__main__': 200 | try: 201 | main() 202 | os._exit(0) 203 | except: 204 | import traceback 205 | traceback.print_exc() 206 | time.sleep(1) 207 | os._exit(1) 208 | -------------------------------------------------------------------------------- /LPFS/train.py: -------------------------------------------------------------------------------- 1 | from absl import flags 2 | import sys, os 3 | import time, random, statistics 4 | import collections 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.utils.data as data 10 | from sklearn.metrics import roc_auc_score, log_loss 11 | from utils.train_help import get_model, get_log, get_cuda, get_optimizer, get_stats, get_dataloader 12 | 13 | my_seed = 0 14 | torch.manual_seed(my_seed) 15 | torch.cuda.manual_seed_all(my_seed) 16 | np.random.seed(my_seed) 17 | random.seed(my_seed) 18 | 19 | FLAGS = flags.FLAGS 20 | flags.DEFINE_integer("gpu", 0, "specify gpu core", lower_bound=-1, upper_bound=7) 21 | flags.DEFINE_string("dataset", "Criteo", "Criteo, Avazu or KDD12") 22 | 23 | # General Model 24 | flags.DEFINE_string("model", "deepfm", "prediction model") 25 | flags.DEFINE_integer("batch_size", 4096, "batch size") 26 | flags.DEFINE_integer("epoch", 20, "epoch for training/pruning") 27 | flags.DEFINE_integer("latent_dim", 16, "latent dimension for embedding table") 28 | flags.DEFINE_list("mlp_dims", [1024, 512, 256], "dimension for each MLP") 29 | flags.DEFINE_float("mlp_dropout", 0.0, "dropout for MLP") 30 | flags.DEFINE_string("optimizer", "adam", "optimizer for training") 31 | flags.DEFINE_float("lr", 1e-4, "model learning rate") 32 | flags.DEFINE_float("wd", 5e-5, "model weight decay") 33 | flags.DEFINE_integer("cross_layer_num", 3, "cross layer num") # Deep & Cross Network 34 | 35 | # LPFS Component 36 | flags.DEFINE_enum("selector", "lpfs", ['lpfs', 'lpfspp'], "LPFS selector") 37 | flags.DEFINE_float("epsilon", 1e-1, "epsilon term for LPFS and LPFS++") 38 | flags.DEFINE_float("lam", 2e-2, "architecture param regularization term") 39 | FLAGS(sys.argv) 40 | 41 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu) 42 | os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' 43 | os.environ['NUMEXPR_NUM_THREADS'] = '8' 44 | os.environ['NUMEXPR_MAX_THREADS'] = '8' 45 | 46 | class Trainer(object): 47 | def __init__(self, opt): 48 | self.loader = get_dataloader(opt["dataset"], opt["data_path"]) 49 | self.batch_size = opt["batch_size"] 50 | 51 | if opt['cuda'] != -1: 52 | get_cuda(True, 0) 53 | self.device = torch.device('cuda') 54 | opt['train']['use_cuda'] = True 55 | else: 56 | self.device = torch.device('cpu') 57 | opt['train']['use_cuda'] = False 58 | self.model = get_model(opt['train']).to(self.device) 59 | self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=opt['train']['lr'], weight_decay=opt['train']['wd']) 60 | 61 | # initialize arch parameter 62 | self.selector = opt['train']['selector'] 63 | self.arch = nn.Parameter(torch.ones([len(opt['train']['field_dim'])])) 64 | self.arch_optimizer = torch.optim.SGD(params=[self.arch], lr=opt['train']['lr']) 65 | self.arch = self.arch.to(self.device) 66 | self.lam = opt['train']['lam'] 67 | self.lr = opt['train']['lr'] 68 | 69 | self.criterion = F.binary_cross_entropy_with_logits 70 | self.logger = get_log() 71 | 72 | def __train(self, label, data, p): 73 | data, label = data.to(self.device), label.to(self.device) 74 | 75 | # Update Model 76 | self.model.train() 77 | self.optimizer.zero_grad() 78 | self.arch_optimizer.zero_grad() 79 | logits = self.model.forward(data, self.arch) 80 | loss = self.criterion(logits, label.squeeze()) 81 | # if self.selector == 'lpfs': 82 | # regloss = self.lam * torch.sum(torch.abs(self.arch)) 83 | # loss += regloss 84 | loss.backward() 85 | self.optimizer.step() 86 | self.arch_optimizer.step() 87 | 88 | # proximal-L1 algorithm update 89 | if self.selector == 'lpfspp' or self.selector == 'lpfs': 90 | thr = 2 * self.lam * self.lr 91 | in1 = p.data > thr 92 | in2 = p.data < -thr 93 | in3 = ~(in1 | in2) 94 | p.data[in1] -= thr 95 | p.data[in2] += thr 96 | p.data[in3] = 0.0 97 | 98 | self.arch = p 99 | self.arch = self.arch.to(self.device) 100 | return loss.item() 101 | 102 | def __evaluate(self, label, data): 103 | self.model.eval() 104 | data, label = data.to(self.device), label.to(self.device) 105 | prob = self.model.forward(data, self.arch) 106 | prob = torch.sigmoid(prob).detach().cpu().numpy() 107 | label = label.detach().cpu().numpy() 108 | return prob, label 109 | 110 | def eval_one_part(self, name): 111 | preds, trues = [], [] 112 | for feature, label in self.loader.get_data(name, batch_size=self.batch_size): 113 | pred, label = self.__evaluate(label, feature) 114 | preds.append(pred) 115 | trues.append(label) 116 | y_pred = np.concatenate(preds).astype("float64") 117 | y_true = np.concatenate(trues).astype("float64") 118 | auc = roc_auc_score(y_true, y_pred) 119 | loss = log_loss(y_true, y_pred) 120 | return auc, loss 121 | 122 | def train(self, max_epoch): 123 | step_idx = 0 124 | best_auc = 0.0 125 | # Training 126 | print('-' * 80) 127 | print('Begin Training ...') 128 | p = self.arch_optimizer.param_groups[0]["params"][0] 129 | for epoch_idx in range(int(max_epoch)): 130 | epoch_step = 0 131 | train_loss = 0.0 132 | for feature, label in self.loader.get_data("train", batch_size = self.batch_size): 133 | step_idx += 1 134 | epoch_step += 1 135 | update_loss = self.__train(label, feature, p) 136 | train_loss += update_loss 137 | train_loss /= epoch_step 138 | val_auc, val_loss = self.eval_one_part('val') 139 | test_auc, test_loss = self.eval_one_part('test') 140 | params, sparsity = self.model.calc_sparsity(self.arch) 141 | print(self.arch) 142 | self.logger.info("[Epoch {epoch:d} | Train Loss:{loss:.6f} | Sparsity:{sparsity:.6f}]".format(epoch=epoch_idx, loss=train_loss, sparsity=sparsity)) 143 | self.logger.info("[Epoch {epoch:d} | Val Loss:{loss:.6f} | Val AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=val_loss, auc=val_auc)) 144 | self.logger.info("[Epoch {epoch:d} | Test Loss:{loss:.6f} | Test AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=test_loss, auc=test_auc)) 145 | 146 | if best_auc < val_auc: 147 | best_auc, best_sparsity = val_auc, sparsity 148 | best_test_auc, best_test_logloss = test_auc, test_loss 149 | else: 150 | self.logger.info("Early stopped!!!") 151 | break 152 | self.logger.info("Most Accurate | AUC:{auc:.6f} | Logloss:{logloss:.6f} | Sparsity:{sparsity:.6f}".format(auc=best_test_auc, logloss=best_test_logloss, sparsity=best_sparsity)) 153 | 154 | def test_one_time(self): 155 | mytime = [] 156 | preds, trues = [], [] 157 | index = 0 158 | for feature, label in self.loader.get_data("val", batch_size=self.batch_size): 159 | starttime = time.time() 160 | pred, label = self.__evaluate(label, feature) 161 | endtime = time.time() 162 | preds.append(pred) 163 | trues.append(label) 164 | mytime.append(endtime - starttime) 165 | index += 1 166 | return (sum(mytime) * 1000 / index) 167 | 168 | def test_time(self): 169 | testtimes = [] 170 | for i in range(5): 171 | testtime = self.test_one_time() 172 | testtimes.append(testtime) 173 | print("Mean: {mean:.6f}".format(mean=statistics.mean(testtimes))) 174 | print("Std: {std:.6f}".format(std=statistics.stdev(testtimes))) 175 | 176 | def main(): 177 | sys.path.extend(["./models","./dataloader","./utils"]) 178 | if FLAGS.dataset == "Criteo": 179 | field_dim = get_stats("../../datasets/criteo_stats") 180 | data = "../../datasets/criteo" 181 | # field_dim = get_stats("../dataset/criteo/stats_2") 182 | # data = "../dataset/criteo/threshold_2" 183 | elif FLAGS.dataset == "Avazu": 184 | field_dim = get_stats("../../datasets/avazu_stats") 185 | data = "../../datasets/avazu" 186 | # field_dim = get_stats("../dataset/avazu/stats_2") 187 | # data = "../dataset/avazu/threshold_2" 188 | elif FLAGS.dataset == "KDD12": 189 | field_dim = get_stats("../../datasets/kdd12_stats") 190 | data = "../../datasets/kdd12" 191 | # field_dim = get_stats("../dataset/kdd12/stats_10") 192 | # data = "../dataset/kdd12/threshold_10" 193 | 194 | train_opt = { 195 | "model":FLAGS.model, "optimizer":FLAGS.optimizer, "selector":FLAGS.selector, 196 | "lr":FLAGS.lr, "wd":FLAGS.wd, "lam":FLAGS.lam, "epsilon":FLAGS.epsilon, 197 | "field_dim":field_dim, "latent_dim":FLAGS.latent_dim, 198 | "mlp_dims":FLAGS.mlp_dims, "mlp_dropout":FLAGS.mlp_dropout, 199 | "cross_layer_num":FLAGS.cross_layer_num 200 | } 201 | opt = { 202 | "dataset":FLAGS.dataset, "data_path":data, "cuda":FLAGS.gpu, "model":FLAGS.model, 203 | "batch_size":FLAGS.batch_size, "train":train_opt 204 | } 205 | # print("opt:{}".format(opt)) 206 | 207 | trainer = Trainer(opt) 208 | trainer.test_time() 209 | # trainer.train(FLAGS.epoch) 210 | 211 | if __name__ == '__main__': 212 | try: 213 | main() 214 | os._exit(0) 215 | except: 216 | import traceback 217 | traceback.print_exc() 218 | time.sleep(1) 219 | os._exit(1) 220 | -------------------------------------------------------------------------------- /AutoField/search.py: -------------------------------------------------------------------------------- 1 | from absl import flags 2 | import sys, os 3 | import time, random 4 | import collections 5 | import numpy as np 6 | import torch 7 | import torch.nn.functional as F 8 | import torch.utils.data as data 9 | from sklearn.metrics import roc_auc_score, log_loss 10 | from utils.train_help import get_search, get_log, get_cuda, get_optimizer, get_stats, get_dataloader 11 | 12 | my_seed = 0 13 | torch.manual_seed(my_seed) 14 | torch.cuda.manual_seed_all(my_seed) 15 | np.random.seed(my_seed) 16 | random.seed(my_seed) 17 | 18 | FLAGS = flags.FLAGS 19 | flags.DEFINE_integer("gpu", 0, "specify gpu core", lower_bound=-1, upper_bound=7) 20 | flags.DEFINE_string("dataset", "Criteo", "Criteo, Avazu or KDD12") 21 | 22 | # General Model 23 | flags.DEFINE_string("model", "deepfm", "prediction model") 24 | flags.DEFINE_integer("batch_size", 4096, "batch size") 25 | flags.DEFINE_integer("epoch", 20, "epoch for training/pruning") 26 | flags.DEFINE_integer("latent_dim", 16, "latent dimension for embedding table") 27 | flags.DEFINE_list("mlp_dims", [1024, 512, 256], "dimension for each MLP") 28 | flags.DEFINE_float("mlp_dropout", 0.0, "dropout for MLP") 29 | flags.DEFINE_string("optimizer", "adam", "optimizer for training") 30 | flags.DEFINE_float("lr", 1e-4, "model learning rate") 31 | flags.DEFINE_float("wd", 5e-5, "model weight decay") 32 | flags.DEFINE_float("arch_lr", 1e-3, "architecture param learning rate") 33 | flags.DEFINE_integer("cross_layer_num", 3, "cross layer num") # Deep & Cross Network 34 | 35 | # How to save model 36 | flags.DEFINE_integer("debug_mode", 0, "0 for debug mode, 1 for noraml mode") 37 | flags.DEFINE_string("save_path", "save/", "Path to save") 38 | flags.DEFINE_string("save_name", "arch.npy", "Save file name") 39 | FLAGS(sys.argv) 40 | 41 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu) 42 | os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' 43 | os.environ['NUMEXPR_NUM_THREADS'] = '8' 44 | os.environ['NUMEXPR_MAX_THREADS'] = '8' 45 | 46 | class Searcher(object): 47 | def __init__(self, opt): 48 | self.loader = get_dataloader(opt["dataset"], opt["data_path"]) 49 | self.save_path = os.path.join(opt["save_path"], opt['dataset'], opt['model']) 50 | self.save_name = opt["save_name"] 51 | self.batch_size = opt["batch_size"] 52 | self.debug_mode = opt["debug_mode"] 53 | 54 | if opt['cuda'] != -1: 55 | get_cuda(True, 0) 56 | self.device = torch.device('cuda') 57 | opt['train']['use_cuda']=True 58 | else: 59 | self.device = torch.device('cpu') 60 | opt['train']['use_cuda'] = False 61 | self.model = get_search(opt['train']).to(self.device) 62 | self.optimizer = get_optimizer(self.model, opt["train"])[1] 63 | 64 | self.arch = torch.zeros(len(opt['train']['field_dim']), device=self.device, requires_grad=True) 65 | arch_optim_config = {'params': self.arch, 'lr': opt['train']['arch_lr']} 66 | self.arch_optimizer = torch.optim.Adam([arch_optim_config]) 67 | 68 | self.proxy_model = get_search(opt['train']).to(self.device) 69 | self.proxy_optimizer = torch.optim.SGD(params=self.proxy_model.parameters(), lr=opt['train']['lr']) 70 | 71 | self.criterion = F.binary_cross_entropy_with_logits 72 | self.logger = get_log() 73 | 74 | def __get_beta(self, t_gs): 75 | beta = max(0.01, 1 - 5e-5 * t_gs) 76 | return beta 77 | 78 | def __update(self, label, data, beta=1, index=0): 79 | data, label = data.to(self.device), label.to(self.device) 80 | 81 | # Copy to proxy model 82 | for x, y in zip(self.proxy_model.parameters(), self.model.parameters()): 83 | x.data.copy_(y.data) 84 | 85 | # Update Model 86 | self.model.train() 87 | self.optimizer.zero_grad() 88 | in_logits = self.model.forward(data, beta, self.arch) 89 | in_loss = self.criterion(in_logits, label.squeeze()) 90 | in_loss.backward() 91 | self.optimizer.step() 92 | 93 | # Compute Proxy 94 | self.proxy_model.train() 95 | self.proxy_optimizer.zero_grad() 96 | proxy_logits = self.proxy_model.forward(data, beta, self.arch) 97 | proxy_loss = self.criterion(proxy_logits, label.squeeze()) 98 | proxy_loss.backward() 99 | self.proxy_optimizer.step() 100 | 101 | # Update Arch 102 | self.arch_optimizer.zero_grad() 103 | out_logits = self.proxy_model.forward(data, beta, self.arch) 104 | out_loss = self.criterion(out_logits, label.squeeze()) 105 | out_loss.backward() 106 | self.arch_optimizer.step() 107 | 108 | return in_loss.item() 109 | 110 | def __evaluate(self, label, data, beta=1): 111 | self.model.eval() 112 | data, label = data.to(self.device), label.to(self.device) 113 | prob = self.model.forward(data, beta, self.arch) 114 | prob = torch.sigmoid(prob).detach().cpu().numpy() 115 | label = label.detach().cpu().numpy() 116 | return prob, label 117 | 118 | def eval_one_part(self, name, beta): 119 | preds, trues = [], [] 120 | for feature,label in self.loader.get_data(name, batch_size=self.batch_size): 121 | pred, label = self.__evaluate(label, feature, beta) 122 | preds.append(pred) 123 | trues.append(label) 124 | y_pred = np.concatenate(preds).astype("float64") 125 | y_true = np.concatenate(trues).astype("float64") 126 | auc = roc_auc_score(y_true, y_pred) 127 | loss = log_loss(y_true, y_pred) 128 | return auc, loss 129 | 130 | def __save_model(self): 131 | os.makedirs(self.save_path, exist_ok=True) 132 | arch = self.arch.detach().cpu().numpy() 133 | dis_arch = np.zeros_like(arch) 134 | dis_arch = np.where(arch > 0, 1, 0) 135 | with open(os.path.join(self.save_path, self.save_name), 'wb') as f: 136 | np.save(f, dis_arch) 137 | 138 | def search(self, max_epoch): 139 | print('-' * 80) 140 | print('Begin Searching ...') 141 | step_idx = 0 142 | best_auc = 0.0 143 | for epoch_idx in range(int(max_epoch)): 144 | epoch_step = 0 145 | train_loss = 0.0 146 | for feature, label in self.loader.get_data("train", batch_size = self.batch_size): 147 | step_idx += 1 148 | epoch_step += 1 149 | beta = self.__get_beta(step_idx) 150 | update_loss = self.__update(label, feature, beta, epoch_step) 151 | train_loss += update_loss 152 | train_loss /= epoch_step 153 | val_auc, val_loss = self.eval_one_part(name='val', beta=beta) 154 | test_auc, test_loss = self.eval_one_part(name='test', beta=beta) 155 | params, sparsity = self.model.calc_sparsity(self.arch) 156 | self.logger.info("[Epoch {epoch:d} | Train Loss:{loss:.6f} | Sparsity:{sparsity:.6f}]".format(epoch=epoch_idx, loss=train_loss, sparsity=sparsity)) 157 | self.logger.info("[Epoch {epoch:d} | Val Loss:{loss:.6f} | Val AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=val_loss, auc=val_auc)) 158 | self.logger.info("[Epoch {epoch:d} | Test Loss:{loss:.6f} | Test AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=test_loss, auc=test_auc)) 159 | 160 | if best_auc < val_auc: 161 | best_auc, best_sparsity = val_auc, sparsity 162 | best_test_auc, best_test_logloss = test_auc, test_loss 163 | if self.debug_mode == 1: 164 | self.__save_model() 165 | else: 166 | self.logger.info("Early stopped!!!") 167 | break 168 | self.logger.info("Most Accurate | AUC:{auc:.6f} | Logloss:{logloss:.6f} | Sparsity:{sparsity:.6f}".format(auc=best_test_auc, logloss=best_test_logloss, sparsity=best_sparsity)) 169 | 170 | def main(): 171 | sys.path.extend(["./models","./dataloader","./utils"]) 172 | if FLAGS.dataset == "Criteo": 173 | field_dim = get_stats("../../datasets/criteo_stats") 174 | data = "../../datasets/criteo" 175 | # field_dim = get_stats("../dataset/criteo/stats_2") 176 | # data = "../dataset/criteo/threshold_2" 177 | elif FLAGS.dataset == "Avazu": 178 | field_dim = get_stats("../../datasets/avazu_stats") 179 | data = "../../datasets/avazu" 180 | # field_dim = get_stats("../dataset/avazu/stats_2") 181 | # data = "../dataset/avazu/threshold_2" 182 | elif FLAGS.dataset == "KDD12": 183 | field_dim = get_stats("../../datasets/kdd12_stats") 184 | data = "../../datasets/kdd12" 185 | # field_dim = get_stats("../dataset/kdd12/stats_10") 186 | # data = "../dataset/kdd12/threshold_10" 187 | 188 | train_opt = { 189 | "model":FLAGS.model, "optimizer":FLAGS.optimizer, 190 | "lr":FLAGS.lr, "wd":FLAGS.wd, "arch_lr":FLAGS.arch_lr, 191 | "field_dim":field_dim, "latent_dim":FLAGS.latent_dim, 192 | "mlp_dims":FLAGS.mlp_dims, "mlp_dropout":FLAGS.mlp_dropout, 193 | "cross_layer_num":FLAGS.cross_layer_num 194 | } 195 | opt = { 196 | "dataset":FLAGS.dataset, "data_path":data, "cuda":FLAGS.gpu, "model":FLAGS.model, 197 | "batch_size":FLAGS.batch_size, "save_path":FLAGS.save_path, "save_name":FLAGS.save_name, 198 | "debug_mode":FLAGS.debug_mode, "train":train_opt 199 | } 200 | print("opt:{}".format(opt)) 201 | 202 | searcher = Searcher(opt) 203 | searcher.search(FLAGS.epoch) 204 | 205 | if __name__ == '__main__': 206 | try: 207 | main() 208 | os._exit(0) 209 | except: 210 | import traceback 211 | traceback.print_exc() 212 | time.sleep(1) 213 | os._exit(1) 214 | -------------------------------------------------------------------------------- /AdaFS/train.py: -------------------------------------------------------------------------------- 1 | from absl import flags 2 | import sys, os 3 | import time, random, statistics 4 | import collections 5 | import numpy as np 6 | import torch 7 | import torch.nn.functional as F 8 | import torch.utils.data as data 9 | from sklearn.metrics import roc_auc_score, log_loss 10 | from utils.train_help import get_model, get_log, get_cuda, get_optimizer, get_stats, get_dataloader 11 | 12 | my_seed = 0 13 | torch.manual_seed(my_seed) 14 | torch.cuda.manual_seed_all(my_seed) 15 | np.random.seed(my_seed) 16 | random.seed(my_seed) 17 | 18 | FLAGS = flags.FLAGS 19 | flags.DEFINE_integer("gpu", 0, "specify gpu core", lower_bound=-1, upper_bound=7) 20 | flags.DEFINE_string("dataset", "Criteo", "Criteo, Avazu or KDD12") 21 | 22 | # General Model 23 | flags.DEFINE_string("model", "deepfm", "prediction model") 24 | flags.DEFINE_integer("batch_size", 4096, "batch size") 25 | flags.DEFINE_integer("epoch", 20, "epoch for training/pruning") 26 | flags.DEFINE_integer("pretrain", 2, "epoch for pretraining") 27 | flags.DEFINE_integer("latent_dim", 16, "latent dimension for embedding table") 28 | flags.DEFINE_list("mlp_dims", [1024, 512, 256], "dimension for each MLP") 29 | flags.DEFINE_float("mlp_dropout", 0.0, "dropout for MLP") 30 | flags.DEFINE_string("optimizer", "adam", "optimizer for training") 31 | flags.DEFINE_float("lr", 1e-4, "model learning rate") 32 | flags.DEFINE_float("wd", 5e-5, "model weight decay") 33 | flags.DEFINE_float("arch_lr", 1e-3, "architecture param learning rate") 34 | flags.DEFINE_integer("cross_layer_num", 3, "cross layer num") # Deep & Cross Network 35 | FLAGS(sys.argv) 36 | 37 | os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpu) 38 | os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices' 39 | os.environ['NUMEXPR_NUM_THREADS'] = '8' 40 | os.environ['NUMEXPR_MAX_THREADS'] = '8' 41 | 42 | class Trainer(object): 43 | def __init__(self, opt): 44 | self.loader = get_dataloader(opt["dataset"], opt["data_path"]) 45 | self.batch_size = opt["batch_size"] 46 | 47 | if opt['cuda'] != -1: 48 | get_cuda(True, 0) 49 | self.device = torch.device('cuda') 50 | opt['train']['use_cuda']=True 51 | else: 52 | self.device = torch.device('cpu') 53 | opt['train']['use_cuda'] = False 54 | self.model = get_model(opt['train']).to(self.device) 55 | self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=opt['train']['lr'], weight_decay=opt['train']['wd']) 56 | 57 | # initialize controller 58 | layer = list() 59 | layer.append(torch.nn.Linear(opt['train']["latent_dim"], 1)) 60 | layer.append(torch.nn.Softmax(dim=1)) 61 | self.controller = torch.nn.Sequential(*layer).to(self.device) 62 | self.controller_optimizer = torch.optim.Adam(params=self.controller.parameters(), lr=opt['train']['lr']) 63 | 64 | self.proxy_model = get_model(opt['train']).to(self.device) 65 | self.proxy_optimizer = torch.optim.SGD(params=self.proxy_model.parameters(), lr=opt['train']['lr']) 66 | 67 | self.criterion = F.binary_cross_entropy_with_logits 68 | self.logger = get_log() 69 | 70 | def __pretrain(self, label, data): 71 | data, label = data.to(self.device), label.to(self.device) 72 | 73 | # Update Model 74 | self.model.train() 75 | self.optimizer.zero_grad() 76 | logits = self.model.forward(data) 77 | loss = self.criterion(logits, label.squeeze()) 78 | loss.backward() 79 | self.optimizer.step() 80 | 81 | return loss.item() 82 | 83 | def __train(self, label, data): 84 | data, label = data.to(self.device), label.to(self.device) 85 | 86 | # Copy to proxy model 87 | for x, y in zip(self.proxy_model.parameters(), self.model.parameters()): 88 | x.data.copy_(y.data) 89 | 90 | # Update Model 91 | self.model.train() 92 | self.optimizer.zero_grad() 93 | in_logits = self.model.forward(data, self.controller) 94 | in_loss = self.criterion(in_logits, label.squeeze()) 95 | in_loss.backward() 96 | self.optimizer.step() 97 | 98 | # Compute Proxy 99 | self.proxy_model.train() 100 | self.proxy_optimizer.zero_grad() 101 | proxy_logits = self.proxy_model.forward(data, self.controller) 102 | proxy_loss = self.criterion(proxy_logits, label.squeeze()) 103 | proxy_loss.backward() 104 | self.proxy_optimizer.step() 105 | 106 | # Update Controller 107 | self.controller_optimizer.zero_grad() 108 | out_logits = self.proxy_model.forward(data, self.controller) 109 | out_loss = self.criterion(out_logits, label.squeeze()) 110 | out_loss.backward() 111 | self.controller_optimizer.step() 112 | 113 | return in_loss.item() 114 | 115 | def __evaluate(self, label, data, controller=None): 116 | self.model.eval() 117 | data, label = data.to(self.device), label.to(self.device) 118 | if controller is None: 119 | prob = self.model.forward(data) 120 | else: 121 | prob = self.model.forward(data, controller) 122 | prob = torch.sigmoid(prob).detach().cpu().numpy() 123 | label = label.detach().cpu().numpy() 124 | return prob, label 125 | 126 | def eval_one_part(self, name, controller=None): 127 | preds, trues = [], [] 128 | for feature,label in self.loader.get_data(name, batch_size=self.batch_size): 129 | pred, label = self.__evaluate(label, feature, controller) 130 | preds.append(pred) 131 | trues.append(label) 132 | y_pred = np.concatenate(preds).astype("float64") 133 | y_true = np.concatenate(trues).astype("float64") 134 | auc = roc_auc_score(y_true, y_pred) 135 | loss = log_loss(y_true, y_pred) 136 | return auc, loss 137 | 138 | def pretrain(self, max_epoch): 139 | step_idx = 0 140 | # Pre-training 141 | print('-' * 80) 142 | print('Begin Pretraining ...') 143 | for epoch_idx in range(max_epoch): 144 | epoch_step = 0 145 | train_loss = 0.0 146 | for feature, label in self.loader.get_data("train", batch_size = self.batch_size): 147 | step_idx += 1 148 | epoch_step += 1 149 | update_loss = self.__pretrain(label, feature) 150 | train_loss += update_loss 151 | train_loss /= epoch_step 152 | val_auc, val_loss = self.eval_one_part('val') 153 | test_auc, test_loss = self.eval_one_part('test') 154 | self.logger.info("[Epoch {epoch:d} | Train Loss:{loss:.6f}]".format(epoch=epoch_idx, loss=train_loss)) 155 | self.logger.info("[Epoch {epoch:d} | Val Loss:{loss:.6f} | Val AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=val_loss, auc=val_auc)) 156 | self.logger.info("[Epoch {epoch:d} | Test Loss:{loss:.6f} | Test AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=test_loss, auc=test_auc)) 157 | 158 | def train(self, max_epoch): 159 | step_idx = 0 160 | best_auc = 0.0 161 | # Training 162 | print('-' * 80) 163 | print('Begin Training ...') 164 | for epoch_idx in range(int(max_epoch)): 165 | epoch_step = 0 166 | train_loss = 0.0 167 | for feature, label in self.loader.get_data("train", batch_size = self.batch_size): 168 | step_idx += 1 169 | epoch_step += 1 170 | update_loss = self.__train(label, feature) 171 | train_loss += update_loss 172 | train_loss /= epoch_step 173 | val_auc, val_loss = self.eval_one_part('val', self.controller) 174 | test_auc, test_loss = self.eval_one_part('test', self.controller) 175 | self.logger.info("[Epoch {epoch:d} | Train Loss:{loss:.6f}]".format(epoch=epoch_idx, loss=train_loss)) 176 | self.logger.info("[Epoch {epoch:d} | Val Loss:{loss:.6f} | Val AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=val_loss, auc=val_auc)) 177 | self.logger.info("[Epoch {epoch:d} | Test Loss:{loss:.6f} | Test AUC:{auc:.6f}]".format(epoch=epoch_idx, loss=test_loss, auc=test_auc)) 178 | 179 | if best_auc < val_auc: 180 | best_auc = val_auc 181 | best_test_auc, best_test_logloss = test_auc, test_loss 182 | else: 183 | self.logger.info("Early stopped!!!") 184 | break 185 | self.logger.info("Most Accurate | AUC:{auc:.6f} | Logloss:{logloss:.6f}".format(auc=best_test_auc, logloss=best_test_logloss)) 186 | 187 | def test_one_time(self): 188 | mytime = [] 189 | preds, trues = [], [] 190 | index = 0 191 | for feature, label in self.loader.get_data("val", batch_size=self.batch_size): 192 | starttime = time.time() 193 | pred, label = self.__evaluate(label, feature) 194 | endtime = time.time() 195 | preds.append(pred) 196 | trues.append(label) 197 | mytime.append(endtime - starttime) 198 | index += 1 199 | return (sum(mytime) * 1000 / index) 200 | 201 | def test_time(self): 202 | testtimes = [] 203 | for i in range(5): 204 | testtime = self.test_one_time() 205 | testtimes.append(testtime) 206 | print("Mean: {mean:.6f}".format(mean=statistics.mean(testtimes))) 207 | print("Std: {std:.6f}".format(std=statistics.stdev(testtimes))) 208 | 209 | def main(): 210 | sys.path.extend(["./models","./dataloader","./utils"]) 211 | if FLAGS.dataset == "Criteo": 212 | field_dim = get_stats("../../datasets/criteo_stats") 213 | data = "../../datasets/criteo" 214 | elif FLAGS.dataset == "Avazu": 215 | field_dim = get_stats("../../datasets/avazu_stats") 216 | data = "../../datasets/avazu" 217 | elif FLAGS.dataset == "KDD12": 218 | field_dim = get_stats("../../datasets/kdd12_stats") 219 | data = "../../datasets/kdd12" 220 | 221 | train_opt = { 222 | "model":FLAGS.model, "optimizer":FLAGS.optimizer, 223 | "lr":FLAGS.lr, "wd":FLAGS.wd, "arch_lr":FLAGS.arch_lr, 224 | "field_dim":field_dim, "latent_dim":FLAGS.latent_dim, 225 | "mlp_dims":FLAGS.mlp_dims, "mlp_dropout":FLAGS.mlp_dropout, 226 | "cross_layer_num":FLAGS.cross_layer_num 227 | } 228 | opt = { 229 | "dataset":FLAGS.dataset, "data_path":data, "cuda":FLAGS.gpu, "model":FLAGS.model, 230 | "batch_size":FLAGS.batch_size, "train":train_opt 231 | } 232 | # print("opt:{}".format(opt)) 233 | 234 | trainer = Trainer(opt) 235 | trainer.test_time() 236 | # trainer.pretrain(FLAGS.pretrain) 237 | # trainer.train(FLAGS.epoch) 238 | 239 | if __name__ == '__main__': 240 | try: 241 | main() 242 | os._exit(0) 243 | except: 244 | import traceback 245 | traceback.print_exc() 246 | time.sleep(1) 247 | os._exit(1) 248 | --------------------------------------------------------------------------------