├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── image.png
    └── overview.png
├── config
    ├── dataset_config.yaml
    ├── model_config.yaml
    └── tuner
    │   └── TiktokLarge_STEM_tuner.yaml
├── data
    └── TiktokLarge
    │   └── preprocess.py
├── fuxictr
    ├── __init__.py
    ├── autotuner.py
    ├── datasets
    │   ├── __init__.py
    │   ├── avazu.py
    │   ├── criteo.py
    │   └── kkbox.py
    ├── features.py
    ├── metrics.py
    ├── preprocess
    │   ├── __init__.py
    │   ├── build_dataset.py
    │   ├── feature_processor.py
    │   ├── normalizer.py
    │   └── tokenizer.py
    ├── pytorch
    │   ├── __init__.py
    │   ├── dataloaders
    │   │   ├── __init__.py
    │   │   ├── h5_block_dataloader.py
    │   │   └── h5_dataloader.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── activations.py
    │   │   ├── attentions
    │   │   │   ├── __init__.py
    │   │   │   ├── dot_product_attention.py
    │   │   │   ├── squeeze_excitation.py
    │   │   │   └── target_attention.py
    │   │   ├── blocks
    │   │   │   ├── __init__.py
    │   │   │   ├── factorization_machine.py
    │   │   │   ├── logistic_regression.py
    │   │   │   └── mlp_block.py
    │   │   ├── embeddings
    │   │   │   ├── __init__.py
    │   │   │   ├── feature_embedding.py
    │   │   │   └── pretrained_embedding.py
    │   │   ├── interactions
    │   │   │   ├── __init__.py
    │   │   │   ├── bilinear_interaction.py
    │   │   │   ├── compressed_interaction_net.py
    │   │   │   ├── cross_net.py
    │   │   │   ├── holographic_interaction.py
    │   │   │   ├── inner_product.py
    │   │   │   └── interaction_machine.py
    │   │   └── pooling.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── multitask_model.py
    │   │   └── rank_model.py
    │   └── torch_utils.py
    ├── tensorflow
    │   ├── __init__.py
    │   ├── dataloaders
    │   │   ├── __init__.py
    │   │   └── tf_dataloader.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── blocks
    │   │   │   ├── __init__.py
    │   │   │   ├── factorization_machine.py
    │   │   │   ├── linear.py
    │   │   │   ├── logistic_regression.py
    │   │   │   └── mlp_block.py
    │   │   ├── embeddings
    │   │   │   ├── __init__.py
    │   │   │   └── feature_embedding.py
    │   │   ├── interactions
    │   │   │   ├── __init__.py
    │   │   │   ├── cross_net.py
    │   │   │   └── inner_product.py
    │   │   └── pooling.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   └── rank_model.py
    │   └── tf_utils.py
    ├── utils.py
    └── version.py
├── models
    ├── AITM.py
    ├── ESMM.py
    ├── MMoE.py
    ├── OMoE.py
    ├── PLE.py
    ├── STEM.py
    ├── SharedBottom.py
    └── __init__.py
├── run_expid.py
├── run_expid_list.py
└── run_param_tuner.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 
162 | # data
163 | *.txt
164 | *.tgz
165 | *.csv
166 | data/TiktokLarge/TiktokLarge/
167 | 
168 | #checkpoints
169 | checkpoints


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # STEM: Unleashing the Power of Embeddings for Multi-Task Recommendation
 2 | ![logo](assets/image.png)
 3 | 
 4 | The open source code for "[STEM: Unleashing the Power of Embeddings for Multi-Task Recommendation](https://arxiv.org/abs/2308.13537)" presented at AAAI 2024.
 5 | 
 6 | 
 7 | 🚧 **This code repository is under construction, please stay tuned!**
 8 | ## Model Overview
 9 | ![overview](assets/overview.png)
10 | 
11 | ## Getting Started
12 | ### Data Preparation
13 | #### Tiktok 
14 | Step 1. Please download the dataset ```final_track1_train.txt.tgz``` of IEEE ICME 2019 Grand Challenge Track 1 to the directory ```data/TiktokLarge/raw_data```. You can download the original dataset from [BaiduNetDisk](https://pan.baidu.com/s/1ktHIHVx6mJqKnZCNcvQ41w)(password:tk1c). Kindly be informed that the usage of this dataset is restricted to academic research purposes exclusively and it must not be utilized for any commercial or illegal activities. 
15 | 
16 | Step 2. Then, run the data preprocessing script as follows.
17 | ```sh
18 | cd data/TiktokLarge
19 | # Please put final_track1_train.txt under ./raw_data
20 | # or modify the data set path in preprocess.py
21 | python preprocess.py
22 | ```
23 | 
24 | ### Reproduce Steps
25 | 
26 | In the next steps, we assume that the directory is ```src```.
27 | 
28 | #### Model and Dataset Configuration
29 | We provided available model configuration file ```model_config.yaml``` and data configuration file ```dataset_config.yaml``` in the ```config``` directory. Please modify these two files according to your needs if necessary.
30 | 
31 | #### Reproduce the model performance
32 | You can train a STEM model by running the following script.
33 | ```sh
34 | python run_expid.py --expid STEM_TiktokLarge --config ./config --gpu 0 
35 | ```
36 | For better performance, we provide the tuner to automatically search for optimal hyperparameters. For example, 
37 | ```sh 
38 | # Please modify TiktokLarge_STEM_tuner.yaml to adjust the search space
39 | python run_param_tuner.py --config ./config/tuner/TiktokLarge_STEM_tuner.yaml --gpu 0 1 
40 | ```
41 | We also provide baseline implementations for comparing model performance. For example, 
42 | ```sh
43 | # Shared-Bottom
44 | python run_expid.py --expid SharedBottom_TiktokLarge --config ./config --gpu 0 
45 | # MMoE
46 | python run_expid.py --expid MMoE_TiktokLarge --config ./config --gpu 0 
47 | # PLE
48 | python run_expid.py --expid PLE_TiktokLarge --config ./config --gpu 0 
49 | ```
50 | 
51 | ## Contact
52 | If you have any problem about this implementation, please create an issue or send us an Email at: 
53 | - sulc21@mails.tsinghua.edu.cn (Liangcai Su)
54 | 
55 | ## Citation
56 | 
57 | If you find our code or propcessed data helpful in your research, please kindly cite the following papers.
58 | ```bibtex
59 | @article{AAAI24_STEM,
60 |   author       = {Liangcai Su and
61 |                   Junwei Pan and
62 |                   Ximei Wang and
63 |                   Xi Xiao and
64 |                   Shijie Quan and
65 |                   Xihua Chen and
66 |                   Jie Jiang},
67 |   title        = {{STEM:} Unleashing the Power of Embeddings for Multi-task Recommendation},
68 |   journal={Proceedings of the 38-th AAAI Conference on Artificial Intelligence (AAAI 2024)}
69 |   year         = {2024},
70 | }
71 | ```
72 | 
73 | 
74 | Our code is based on the FuxiCTR and BARS. 
75 | 
76 | > Jieming Zhu, Jinyang Liu, Shuai Yang, Qi Zhang, Xiuqiang He. [Open Benchmarking for Click-Through Rate Prediction](https://arxiv.org/abs/2009.05794). *The 30th ACM International Conference on Information and Knowledge Management (CIKM)*, 2021. [[Bibtex](https://dblp.org/rec/conf/cikm/ZhuLYZH21.html?view=bibtex)]
77 | 
78 | > Jieming Zhu, Quanyu Dai, Liangcai Su, Rong Ma, Jinyang Liu, Guohao Cai, Xi Xiao, Rui Zhang. [BARS: Towards Open Benchmarking for Recommender Systems](https://arxiv.org/abs/2205.09626). *The 45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)*, 2022. [[Bibtex](https://dblp.org/rec/conf/sigir/ZhuDSMLCXZ22.html?view=bibtex)]
79 | 


--------------------------------------------------------------------------------
/assets/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiangcaiSu/STEM/769e2af0d0d1a0be9f58b475b95e05f502a20df5/assets/image.png


--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiangcaiSu/STEM/769e2af0d0d1a0be9f58b475b95e05f502a20df5/assets/overview.png


--------------------------------------------------------------------------------
/config/dataset_config.yaml:
--------------------------------------------------------------------------------
 1 | TiktokLarge:
 2 |   data_root: ./data/TiktokLarge
 3 |   data_format: csv
 4 |   train_data: ./data/TiktokLarge/train.csv
 5 |   valid_data: ./data/TiktokLarge/valid.csv
 6 |   test_data: ./data/TiktokLarge/test.csv
 7 |   min_categr_count: 10
 8 |   feature_cols:
 9 |     [ { name: ['uid', 'item_id', 'author_id', 'item_city',
10 |               'channel', 'music_id', 'device_id', 'video_duration'],
11 |         active: True, dtype: str, type: categorical } ]
12 |   label_col: [ { name: finish, dtype: float },
13 |                 { name: like, dtype: float } ]
14 |   group_id: 'uid'
15 | 
16 | 


--------------------------------------------------------------------------------
/config/model_config.yaml:
--------------------------------------------------------------------------------
  1 | Base: 
  2 |     model_root: './checkpoints'
  3 |     num_workers: 3
  4 |     verbose: 1
  5 |     early_stop_patience: 1
  6 |     pickle_feature_encoder: True
  7 |     save_best_only: True
  8 |     eval_steps: null
  9 |     debug_mode: False
 10 |     group_id: null
 11 |     use_features: null
 12 |     feature_specs: null
 13 |     feature_config: null
 14 | 
 15 | SharedBottom_TiktokLarge_finish:
 16 |     model: SharedBottom
 17 |     dataset_id: TiktokLarge
 18 |     task_labels: ['finish']
 19 |     loss: ['binary_crossentropy']
 20 |     metrics: ['logloss', 'AUC', 'gAUC']
 21 |     task: ['binary_classification']
 22 |     num_tasks: 1
 23 |     optimizer: adam
 24 |     learning_rate: 1.e-4
 25 |     bottom_hidden_units: [512,512,512]
 26 |     tower_hidden_units: [128, 128]
 27 |     hidden_activations: relu
 28 |     net_regularizer: 0
 29 |     embedding_regularizer: 1.e-6
 30 |     batch_norm: False
 31 |     net_dropout: 0
 32 |     batch_size: 4096
 33 |     embedding_dim: 16
 34 |     epochs: 20
 35 |     shuffle: True
 36 |     seed: 2023
 37 |     monitor: 'AUC'
 38 |     monitor_mode: 'max'
 39 |     group_id: 'uid'
 40 |    
 41 | SharedBottom_TiktokLarge_like:
 42 |     model: SharedBottom
 43 |     dataset_id: TiktokLarge
 44 |     loss: ['binary_crossentropy']
 45 |     metrics: ['logloss', 'AUC', 'gAUC']
 46 |     task: ['binary_classification']
 47 |     task_labels: ['like']
 48 |     num_tasks: 1
 49 |     optimizer: adam
 50 |     learning_rate: 1.e-4
 51 |     bottom_hidden_units: [512,512,512]
 52 |     tower_hidden_units: [128, 128]
 53 |     hidden_activations: relu
 54 |     net_regularizer: 0
 55 |     embedding_regularizer: 1.e-6
 56 |     batch_norm: False
 57 |     net_dropout: 0
 58 |     batch_size: 4096
 59 |     embedding_dim: 16
 60 |     epochs: 20
 61 |     shuffle: True
 62 |     seed: 2023
 63 |     monitor: 'AUC'
 64 |     monitor_mode: 'max'
 65 |     group_id: 'uid'
 66 | 
 67 | SharedBottom_TiktokLarge:
 68 |     model: SharedBottom
 69 |     dataset_id: TiktokLarge
 70 |     loss: ['binary_crossentropy','binary_crossentropy']
 71 |     metrics: ['logloss', 'AUC', 'gAUC']
 72 |     task: ['binary_classification','binary_classification']
 73 |     num_tasks: 2
 74 |     optimizer: adam
 75 |     learning_rate: 1.e-4
 76 |     bottom_hidden_units: [512,512,512]
 77 |     tower_hidden_units: [128, 128]
 78 |     hidden_activations: relu
 79 |     net_regularizer: 0
 80 |     embedding_regularizer: 1.e-6
 81 |     batch_norm: False
 82 |     net_dropout: 0
 83 |     batch_size: 4096
 84 |     embedding_dim: 16
 85 |     epochs: 20
 86 |     shuffle: True
 87 |     seed: 2023
 88 |     monitor: 'AUC'
 89 |     monitor_mode: 'max'
 90 |     group_id: 'uid'
 91 | 
 92 | 
 93 | OMoE_TiktokLarge:
 94 |     model: OMoE
 95 |     dataset_id: TiktokLarge
 96 |     loss: ['binary_crossentropy','binary_crossentropy']
 97 |     metrics: ['logloss', 'AUC', 'gAUC']
 98 |     task: ['binary_classification','binary_classification']
 99 |     num_tasks: 2
100 |     optimizer: adam
101 |     learning_rate: 1.e-4
102 |     num_experts: 8
103 |     expert_hidden_units: [512,512,512]
104 |     gate_hidden_units: [128, 64]
105 |     tower_hidden_units: [128, 64]
106 |     hidden_activations: relu
107 |     net_regularizer: 0
108 |     embedding_regularizer: 1.e-6
109 |     batch_norm: False
110 |     net_dropout: 0
111 |     batch_size: 4096
112 |     embedding_dim: 16
113 |     epochs: 20
114 |     shuffle: True
115 |     seed: 2023
116 |     monitor: 'AUC'
117 |     monitor_mode: 'max'
118 |     group_id: 'uid'
119 | 
120 | 
121 | MMoE_TiktokLarge:
122 |     model: MMoE
123 |     dataset_id: TiktokLarge
124 |     loss: ['binary_crossentropy','binary_crossentropy']
125 |     metrics: ['logloss', 'AUC', 'gAUC']
126 |     task: ['binary_classification','binary_classification']
127 |     num_tasks: 2
128 |     optimizer: adam
129 |     learning_rate: 1.e-4
130 |     num_experts: 8
131 |     expert_hidden_units: [512,512,512]
132 |     gate_hidden_units: [128, 64]
133 |     tower_hidden_units: [128, 64]
134 |     hidden_activations: relu
135 |     net_regularizer: 0
136 |     embedding_regularizer: 1.e-6
137 |     batch_norm: False
138 |     net_dropout: 0
139 |     batch_size: 4096
140 |     embedding_dim: 16
141 |     epochs: 20
142 |     shuffle: True
143 |     seed: 2023
144 |     monitor: 'AUC'
145 |     monitor_mode: 'max'
146 |     group_id: 'uid'
147 | 
148 | 
149 | PLE_TiktokLarge:
150 |     model: PLE
151 |     dataset_id: TiktokLarge
152 |     loss: ['binary_crossentropy','binary_crossentropy']
153 |     metrics: ['logloss', 'AUC', 'gAUC']
154 |     task: ['binary_classification','binary_classification']
155 |     num_tasks: 2
156 |     optimizer: adam
157 |     learning_rate: 1.e-4
158 |     num_layers: 1 
159 |     num_workers: 4
160 |     num_shared_experts: 8
161 |     num_specific_experts: 1
162 |     expert_hidden_units: [512,512,512]
163 |     gate_hidden_units: [128, 64]
164 |     tower_hidden_units: [128, 64]
165 |     hidden_activations: relu
166 |     net_regularizer: 0
167 |     embedding_regularizer: 1.e-6
168 |     batch_norm: False
169 |     net_dropout: 0
170 |     batch_size: 4096
171 |     embedding_dim: 16
172 |     epochs: 20
173 |     shuffle: True
174 |     seed: 2023
175 |     monitor: 'AUC'
176 |     monitor_mode: 'max'
177 |     group_id: 'uid'
178 | 
179 | AITM_TiktokLarge:
180 |     model: AITM
181 |     dataset_id: TiktokLarge
182 |     loss: ['binary_crossentropy','binary_crossentropy']
183 |     metrics: ['logloss', 'AUC', 'gAUC']
184 |     task: ['binary_classification','binary_classification']
185 |     num_tasks: 2
186 |     optimizer: adam
187 |     learning_rate: 1.e-4
188 |     bottom_hidden_units: [512,512,512]
189 |     tower_hidden_units: [128, 128]
190 |     hidden_activations: relu
191 |     net_regularizer: 0
192 |     embedding_regularizer: 1.e-6
193 |     batch_norm: False
194 |     net_dropout: 0
195 |     batch_size: 4096
196 |     embedding_dim: 16
197 |     epochs: 20
198 |     shuffle: True
199 |     seed: 2023
200 |     monitor: 'AUC'
201 |     monitor_mode: 'max'
202 |     group_id: 'uid'
203 | 
204 | 
205 | ESMM_TiktokLarge:
206 |     model: ESMM
207 |     dataset_id: TiktokLarge
208 |     loss: ['binary_crossentropy','binary_crossentropy']
209 |     metrics: ['logloss', 'AUC']
210 |     task: ['binary_classification','binary_classification']
211 |     num_tasks: 2
212 |     optimizer: adam
213 |     learning_rate: 1.e-4
214 |     tower_hidden_units: [512, 512, 512]
215 |     hidden_activations: relu
216 |     net_regularizer: 0
217 |     embedding_regularizer: 1.e-6
218 |     batch_norm: False
219 |     net_dropout: 0
220 |     batch_size: 4096
221 |     embedding_dim: 16
222 |     epochs: 50
223 |     shuffle: True
224 |     seed: 2023
225 |     monitor: 'AUC'
226 |     monitor_mode: 'max'
227 | 
228 | 
229 | 
230 | MMoE_ME_TiktokLarge:
231 |     model: MMoE_ME
232 |     dataset_id: TiktokLarge
233 |     loss: ['binary_crossentropy','binary_crossentropy']
234 |     metrics: ['logloss', 'AUC', 'gAUC']
235 |     task: ['binary_classification','binary_classification']
236 |     num_tasks: 2
237 |     optimizer: adam
238 |     learning_rate: 1.e-4
239 |     num_experts: 8
240 |     expert_hidden_units: [512,512,512]
241 |     gate_hidden_units: [128, 64]
242 |     tower_hidden_units: [128, 64]
243 |     hidden_activations: relu
244 |     net_regularizer: 0
245 |     embedding_regularizer: 1.e-6
246 |     batch_norm: False
247 |     net_dropout: 0
248 |     batch_size: 4096
249 |     embedding_dim: 16
250 |     epochs: 20
251 |     shuffle: True
252 |     seed: 2023
253 |     monitor: 'AUC'
254 |     monitor_mode: 'max'
255 |     group_id: 'uid'
256 | 
257 | 
258 | PLE_ME_TiktokLarge:
259 |     model: PLE_ME
260 |     dataset_id: TiktokLarge
261 |     loss: ['binary_crossentropy','binary_crossentropy']
262 |     metrics: ['logloss', 'AUC', 'gAUC']
263 |     task: ['binary_classification','binary_classification']
264 |     num_tasks: 2
265 |     optimizer: adam
266 |     learning_rate: 1.e-4
267 |     num_layers: 1 
268 |     num_workers: 4
269 |     num_shared_experts: 8
270 |     num_specific_experts: 1
271 |     expert_hidden_units: [512,512,512]
272 |     gate_hidden_units: [128, 64]
273 |     tower_hidden_units: [128, 64]
274 |     hidden_activations: relu
275 |     net_regularizer: 0
276 |     embedding_regularizer: 1.e-6
277 |     batch_norm: False
278 |     net_dropout: 0
279 |     batch_size: 4096
280 |     embedding_dim: 16
281 |     epochs: 20
282 |     shuffle: True
283 |     seed: 2023
284 |     monitor: 'AUC'
285 |     monitor_mode: 'max'
286 |     group_id: 'uid'
287 | 
288 | STEM_TiktokLarge:
289 |     model: STEM
290 |     dataset_id: TiktokLarge
291 |     loss: ['binary_crossentropy','binary_crossentropy']
292 |     metrics: ['logloss', 'AUC', 'gAUC']
293 |     task: ['binary_classification','binary_classification']
294 |     num_tasks: 2
295 |     optimizer: adam
296 |     learning_rate: 1.e-3
297 |     num_layers: 1 
298 |     num_shared_experts: 8
299 |     num_specific_experts: 1
300 |     expert_hidden_units: [512,512,512]
301 |     gate_hidden_units: [128, 64]
302 |     tower_hidden_units: [128, 64]
303 |     hidden_activations: relu
304 |     net_regularizer: 0
305 |     embedding_regularizer: 1.e-6
306 |     batch_norm: False
307 |     net_dropout: 0
308 |     batch_size: 4096
309 |     embedding_dim: 16
310 |     epochs: 20
311 |     shuffle: True
312 |     seed: 2023
313 |     monitor: 'AUC'
314 |     monitor_mode: 'max'
315 |     group_id: 'uid'
316 | 


--------------------------------------------------------------------------------
/config/tuner/TiktokLarge_STEM_tuner.yaml:
--------------------------------------------------------------------------------
 1 | base_config: ./config
 2 | base_expid: STEM_TiktokLarge
 3 | dataset_id: TiktokLarge
 4 | 
 5 | tuner_space:
 6 |     model_root: './checkpoints/TiktokLarge/TiktokLarge_STEM_tuner'
 7 |     num_shared_experts: [1,2,4,8]
 8 |     num_specific_experts: [1,2,4]
 9 |     expert_hidden_units: [[512,512,512]]
10 |     gate_hidden_units: [[128, 64]]
11 |     tower_hidden_units: [[128, 64]]
12 |     hidden_activations: relu
13 |     net_regularizer: 0
14 |     embedding_regularizer: [1.e-6, 1.e-5, 1.e-4, 1.e-3, 5.e-6, 5.e-5, 5.e-4, 5.e-3]
15 |     net_dropout: [0, 0.1, 0.2, 0.3]
16 |     batch_norm: False
17 |     learning_rate: [1.e-4]
18 |     batch_size: 4096
19 |     seed: [2023, 76, 525, 728, 42]
20 |     group_id: uid
21 |     metrics: [[gAUC, AUC, logloss]]
22 |     monitor: {"gAUC": 0, "AUC": 1}
23 | 


--------------------------------------------------------------------------------
/data/TiktokLarge/preprocess.py:
--------------------------------------------------------------------------------
 1 | # %%
 2 | import numpy as np 
 3 | import pandas as pd 
 4 | # from datetime import datetime
 5 | from sklearn.model_selection import train_test_split
 6 | 
 7 | # %%
 8 | data = pd.read_csv('./raw_data/final_track1_train.txt',sep='\t',
 9 |                    names=['uid', 'user_city', 'item_id', 'author_id', 'item_city', 'channel', 'finish', 'like', 'music_id',
10 |                     'device_id', 'create_time', 'video_duration'],
11 |                    header=None)
12 | 
13 | 
14 | # %%
15 | train_valid_set, test_set = train_test_split(data, test_size=0.1, random_state=42)
16 | train_set, valid_set = train_test_split(train_valid_set, test_size=0.1, random_state=42)
17 | 
18 | # %%
19 | train_set.to_csv('./train.csv',index=None)
20 | valid_set.to_csv('./valid.csv',index=None)
21 | test_set.to_csv('./test.csv',index=None)
22 | 


--------------------------------------------------------------------------------
/fuxictr/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__


--------------------------------------------------------------------------------
/fuxictr/autotuner.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | 
 18 | import itertools
 19 | import subprocess
 20 | import yaml
 21 | import os
 22 | import numpy as np
 23 | import time
 24 | import glob
 25 | import hashlib
 26 | from .utils import print_to_json, load_model_config, load_dataset_config
 27 | 
 28 | # add this line to avoid weird characters in yaml files
 29 | yaml.Dumper.ignore_aliases = lambda *args : True
 30 | 
 31 | def enumerate_params(config_file, exclude_expid=[]):
 32 |     with open(config_file, "r") as cfg:
 33 |         config_dict = yaml.load(cfg, Loader=yaml.FullLoader)
 34 |     # tuning space
 35 |     tune_dict = config_dict["tuner_space"]
 36 |     for k, v in tune_dict.items():
 37 |         if not isinstance(v, list):
 38 |             tune_dict[k] = [v]
 39 |     experiment_id = config_dict["base_expid"]
 40 |     if "model_config" in config_dict:
 41 |         model_dict = config_dict["model_config"][experiment_id]
 42 |     else:
 43 |         base_config_dir = config_dict.get("base_config", os.path.dirname(config_file))
 44 |         model_dict = load_model_config(base_config_dir, experiment_id)
 45 | 
 46 |     dataset_id = config_dict.get("dataset_id", model_dict["dataset_id"])
 47 |     if "dataset_config" in config_dict:
 48 |         dataset_dict = config_dict["dataset_config"][dataset_id]
 49 |     else:
 50 |         dataset_dict = load_dataset_config(base_config_dir, dataset_id)
 51 |         
 52 |     if model_dict["dataset_id"] == "TBD": # rename base expid
 53 |         model_dict["dataset_id"] = dataset_id
 54 |         experiment_id = model_dict["model"] + "_" + dataset_id
 55 |         
 56 |     # key checking
 57 |     tuner_keys = set(tune_dict.keys())
 58 |     base_keys = set(model_dict.keys()).union(set(dataset_dict.keys()))
 59 |     if len(tuner_keys - base_keys) > 0:
 60 |         raise RuntimeError("Invalid params in tuner config: {}".format(tuner_keys - base_keys))
 61 | 
 62 |     config_dir = config_file.replace(".yaml", "")
 63 |     if not os.path.exists(config_dir):
 64 |         os.makedirs(config_dir)
 65 | 
 66 |     # enumerate dataset para combinations
 67 |     dataset_dict = {k: tune_dict[k] if k in tune_dict else [v] for k, v in dataset_dict.items()}
 68 |     dataset_para_keys = list(dataset_dict.keys())
 69 |     dataset_para_combs = dict()
 70 |     for idx, values in enumerate(itertools.product(*map(dataset_dict.get, dataset_para_keys))):
 71 |         dataset_params = dict(zip(dataset_para_keys, values))
 72 |         if dataset_params["data_format"] == "h5":
 73 |             dataset_para_combs[dataset_id] = dataset_params
 74 |         else:
 75 |             hash_id = hashlib.md5("".join(sorted(print_to_json(dataset_params))).encode("utf-8")).hexdigest()[0:8]
 76 |             dataset_para_combs[dataset_id + "_{}".format(hash_id)] = dataset_params
 77 | 
 78 |     # dump dataset para combinations to config file
 79 |     dataset_config = os.path.join(config_dir, "dataset_config.yaml")
 80 |     with open(dataset_config, "w") as fw:
 81 |         yaml.dump(dataset_para_combs, fw, default_flow_style=None, indent=4)
 82 | 
 83 |     # enumerate model para combinations
 84 |     model_dict = {k: tune_dict[k] if k in tune_dict else [v] for k, v in model_dict.items()}
 85 |     model_para_keys = list(model_dict.keys())
 86 |     model_param_combs = dict()
 87 |     for idx, values in enumerate(itertools.product(*map(model_dict.get, model_para_keys))):
 88 |         model_param_combs[idx + 1] = dict(zip(model_para_keys, values))
 89 |         
 90 |     # update dataset_id into model params
 91 |     merged_param_combs = dict()
 92 |     for idx, item in enumerate(itertools.product(model_param_combs.values(),
 93 |                                                  dataset_para_combs.keys())):
 94 |         para_dict = item[0]
 95 |         para_dict["dataset_id"] = item[1]
 96 |         del para_dict["model_id"]
 97 |         random_str = ""
 98 |         if para_dict["debug_mode"]:
 99 |             random_str = "{:06d}".format(np.random.randint(1e6)) # add a random number to avoid duplicate during debug
100 |         hash_id = hashlib.md5(("".join(sorted(print_to_json(para_dict))) + random_str).encode("utf-8")).hexdigest()[0:8]
101 |         hash_expid = experiment_id + "_{:03d}_{}".format(idx + 1, hash_id)
102 |         if hash_expid not in exclude_expid:
103 |             merged_param_combs[hash_expid] = para_dict.copy()
104 | 
105 |     # dump model para combinations to config file
106 |     model_config = os.path.join(config_dir, "model_config.yaml")
107 |     with open(model_config, "w") as fw:
108 |         yaml.dump(merged_param_combs, fw, default_flow_style=None, indent=4)
109 |     print("Enumerate all tuner configurations done.")    
110 |     return config_dir
111 | 
112 | def load_experiment_ids(config_dir):
113 |     model_configs = glob.glob(os.path.join(config_dir, "model_config.yaml"))
114 |     if not model_configs:
115 |         model_configs = glob.glob(os.path.join(config_dir, "model_config/*.yaml"))
116 |     experiment_id_list = []
117 |     for config in model_configs:
118 |         with open(config, "r") as cfg:
119 |             config_dict = yaml.load(cfg, Loader=yaml.FullLoader)
120 |             experiment_id_list += config_dict.keys()
121 |     return sorted(experiment_id_list)
122 | 
123 | def grid_search(config_dir, gpu_list, expid_tag=None, script='run_expid.py'):
124 |     experiment_id_list = load_experiment_ids(config_dir)
125 |     if expid_tag is not None:
126 |         experiment_id_list = [expid for expid in experiment_id_list if str(expid_tag) in expid]
127 |         assert len(experiment_id_list) > 0, "tag={} does not match any expid."
128 |     gpu_list = list(gpu_list)
129 |     idle_queue = list(range(len(gpu_list)))
130 |     processes = dict()
131 |     while len(experiment_id_list) > 0:
132 |         if len(idle_queue) > 0:
133 |             idle_idx = idle_queue.pop(0)
134 |             gpu_id = gpu_list[idle_idx]
135 |             expid = experiment_id_list.pop(0)
136 |             cmd = "python -u {} --config {} --expid {} --gpu {}"\
137 |                     .format(script, config_dir, expid, gpu_id)
138 |             p = subprocess.Popen(cmd.split())
139 |             processes[idle_idx] = p
140 |         else:
141 |             time.sleep(3)
142 |             for idle_idx, p in processes.items():
143 |                 if p.poll() is not None: # terminated
144 |                     idle_queue.append(idle_idx)
145 |     [p.wait() for p in processes.values()]
146 | 


--------------------------------------------------------------------------------
/fuxictr/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from . import criteo
2 | from . import avazu
3 | from . import kkbox
4 | 


--------------------------------------------------------------------------------
/fuxictr/datasets/avazu.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | from fuxictr.preprocess import FeatureProcessor as BaseFeatureProcessor
19 | from datetime import datetime, date
20 | 
21 | 
22 | class FeatureProcessor(BaseFeatureProcessor):
23 |     def convert_weekday(self, df, col_name):
24 |         def _convert_weekday(timestamp):
25 |             dt = date(int('20' + timestamp[0:2]), int(timestamp[2:4]), int(timestamp[4:6]))
26 |             return int(dt.strftime('%w'))
27 |         return df['hour'].apply(_convert_weekday)
28 | 
29 |     def convert_weekend(self, df, col_name):
30 |         def _convert_weekend(timestamp):
31 |             dt = date(int('20' + timestamp[0:2]), int(timestamp[2:4]), int(timestamp[4:6]))
32 |             return 1 if dt.strftime('%w') in ['6', '0'] else 0
33 |         return df['hour'].apply(_convert_weekend)
34 | 
35 |     def convert_hour(self, df, col_name):
36 |         return df['hour'].apply(lambda x: int(x[6:8]))
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/fuxictr/datasets/criteo.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import numpy as np
19 | from fuxictr.preprocess import FeatureProcessor as BaseFeatureProcessor
20 | 
21 | 
22 | class FeatureProcessor(BaseFeatureProcessor):
23 |     def convert_to_bucket(self, df, col_name):
24 |         def _convert_to_bucket(value):
25 |             if value > 2:
26 |                 value = int(np.floor(np.log(value) ** 2))
27 |             else:
28 |                 value = int(value)
29 |             return value
30 |         return df[col_name].map(_convert_to_bucket).astype(int)
31 | 
32 | 
33 | 	
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/fuxictr/datasets/kkbox.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import pandas as pd
19 | from fuxictr.preprocess import FeatureProcessor as BaseFeatureProcessor
20 | 
21 | class FeatureProcessor(BaseFeatureProcessor):
22 |     def extract_country_code(self, df, col_name):
23 |         return df[col_name].apply(lambda isrc: isrc[0:2] if not pd.isnull(isrc) else "")
24 | 
25 |     def bucketize_age(self, df, col_name):
26 |         def _bucketize(age):
27 |             if pd.isnull(age):
28 |                 return ""
29 |             else:
30 |                 age = float(age)
31 |                 if age < 1 or age > 95:
32 |                     return ""
33 |                 elif age <= 10:
34 |                     return "1"
35 |                 elif age <=20:
36 |                     return "2"
37 |                 elif age <=30:
38 |                     return "3"
39 |                 elif age <=40:
40 |                     return "4"
41 |                 elif age <=50:
42 |                     return "5"
43 |                 elif age <=60:
44 |                     return "6"
45 |                 else:
46 |                     return "7"
47 |         return df[col_name].apply(_bucketize)
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/fuxictr/features.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | 
 18 | from collections import OrderedDict
 19 | import io
 20 | import os
 21 | import logging
 22 | import json
 23 | 
 24 | 
 25 | class FeatureMap(object):
 26 |     def __init__(self, dataset_id, data_dir):
 27 |         self.data_dir = data_dir # must keep to be used in embedding layer for pretrained emb
 28 |         self.dataset_id = dataset_id
 29 |         self.num_fields = 0
 30 |         self.total_features = 0
 31 |         self.input_length = 0
 32 |         self.features = OrderedDict()
 33 |         self.labels = []
 34 |         self.column_index = dict()
 35 |         self.group_id = None
 36 |         self.default_emb_dim = None
 37 | 
 38 |     def load(self, json_file, params):
 39 |         logging.info("Load feature_map from json: " + json_file)
 40 |         with io.open(json_file, "r", encoding="utf-8") as fd:
 41 |             feature_map = json.load(fd) #, object_pairs_hook=OrderedDict
 42 |         if feature_map["dataset_id"] != self.dataset_id:
 43 |             raise RuntimeError("dataset_id={} does not match feature_map!".format(self.dataset_id))
 44 |         self.num_fields = feature_map["num_fields"]
 45 |         self.labels = feature_map.get("labels", [])
 46 |         self.total_features = feature_map.get("total_features", 0)
 47 |         self.input_length = feature_map.get("input_length", 0)
 48 |         self.group_id = feature_map.get("group_id", None)
 49 |         self.default_emb_dim = params.get("embedding_dim", None)
 50 |         self.features = OrderedDict((k, v) for x in feature_map["features"] for k, v in x.items())
 51 |         if params.get("use_features", None):
 52 |             self.features = OrderedDict((x, self.features[x]) for x in params["use_features"])
 53 |         if params.get("feature_specs", None):
 54 |             self.update_feature_specs(params["feature_specs"])
 55 |         self.set_column_index()
 56 | 
 57 |     def update_feature_specs(self, feature_specs):
 58 |         for col in feature_specs:
 59 |             namelist = col["name"]
 60 |             if type(namelist) != list:
 61 |                 namelist = [namelist]
 62 |             for name in namelist:
 63 |                 for k, v in col.items():
 64 |                     if k != "name":
 65 |                         self.features[name][k] = v
 66 | 
 67 |     def save(self, json_file):
 68 |         logging.info("Save feature_map to json: " + json_file)
 69 |         os.makedirs(os.path.dirname(json_file), exist_ok=True)
 70 |         feature_map = OrderedDict()
 71 |         feature_map["dataset_id"] = self.dataset_id
 72 |         feature_map["num_fields"] = self.num_fields
 73 |         feature_map["total_features"] = self.total_features
 74 |         feature_map["input_length"] = self.input_length
 75 |         feature_map["labels"] = self.labels
 76 |         if self.group_id is not None:
 77 |             feature_map["group_id"] = self.group_id
 78 |         feature_map["features"] = [{k: v} for k, v in self.features.items()]
 79 |         with open(json_file, "w") as fd:
 80 |             json.dump(feature_map, fd, indent=4)
 81 | 
 82 |     def get_num_fields(self, feature_source=[]):
 83 |         if type(feature_source) != list:
 84 |             feature_source = [feature_source]
 85 |         num_fields = 0
 86 |         for feature, feature_spec in self.features.items():
 87 |             if feature_spec["type"] == "meta":
 88 |                 continue
 89 |             if len(feature_source) == 0 or feature_spec.get("source") in feature_source:
 90 |                 num_fields += 1
 91 |         return num_fields
 92 | 
 93 |     def sum_emb_out_dim(self, feature_source=[]):
 94 |         if type(feature_source) != list:
 95 |             feature_source = [feature_source]
 96 |         total_dim = 0
 97 |         for feature, feature_spec in self.features.items():
 98 |             if feature_spec["type"] == "meta":
 99 |                 continue
100 |             if len(feature_source) == 0 or feature_spec.get("source") in feature_source:
101 |                 total_dim += feature_spec.get("emb_output_dim",
102 |                                               feature_spec.get("embedding_dim", 
103 |                                                                self.default_emb_dim))
104 |         return total_dim
105 | 
106 |     def set_column_index(self):
107 |         logging.info("Set column index...")
108 |         idx = 0
109 |         for feature, feature_spec in self.features.items():
110 |             if "max_len" in feature_spec:
111 |                 col_indexes = [i + idx for i in range(feature_spec["max_len"])]
112 |                 self.column_index[feature] = col_indexes
113 |                 idx += feature_spec["max_len"]
114 |             else:
115 |                 self.column_index[feature] = idx
116 |                 idx += 1
117 |         self.input_length = idx
118 |         for label in self.labels:
119 |             self.column_index[label] = idx
120 |             idx += 1
121 | 
122 |     def get_column_index(self, feature):
123 |         if feature not in self.column_index:
124 |             self.set_column_index()
125 |         return self.column_index[feature]
126 | 
127 |         
128 | 


--------------------------------------------------------------------------------
/fuxictr/metrics.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | 
 18 | from sklearn.metrics import roc_auc_score, log_loss, accuracy_score
 19 | import numpy as np
 20 | import pandas as pd
 21 | import multiprocessing as mp
 22 | from collections import OrderedDict
 23 | 
 24 | 
 25 | def evaluate_metrics(y_true, y_pred, metrics, group_id=None):
 26 |     return_dict = OrderedDict()
 27 |     group_metrics = []
 28 |     for metric in metrics:
 29 |         if metric in ['logloss', 'binary_crossentropy']:
 30 |             return_dict[metric] = log_loss(y_true, y_pred, eps=1e-7)
 31 |         elif metric == 'AUC':
 32 |             return_dict[metric] = roc_auc_score(y_true, y_pred)
 33 |         elif metric in ["gAUC", "avgAUC", "MRR"] or metric.startswith("NDCG"):
 34 |             return_dict[metric] = 0
 35 |             group_metrics.append(metric)
 36 |         else:
 37 |             raise ValueError("metric={} not supported.".format(metric))
 38 |     if len(group_metrics) > 0:
 39 |         assert group_id is not None, "group_index is required."
 40 |         metric_funcs = []
 41 |         for metric in group_metrics:
 42 |             try:
 43 |                 metric_funcs.append(eval(metric))
 44 |             except:
 45 |                 raise NotImplementedError('metrics={} not implemented.'.format(metric))
 46 |         score_df = pd.DataFrame({"group_index": group_id,
 47 |                                  "y_true": y_true,
 48 |                                  "y_pred": y_pred})
 49 |         results = []
 50 |         pool = mp.Pool(processes=mp.cpu_count() // 2)
 51 |         for idx, df in score_df.groupby("group_index"):
 52 |             results.append(pool.apply_async(evaluate_block, args=(df, metric_funcs)))
 53 |         pool.close()
 54 |         pool.join()
 55 |         results = [res.get() for res in results]
 56 |         sum_results = np.array(results).sum(0)
 57 |         average_result = list(sum_results[:, 0] / sum_results[:, 1])
 58 |         return_dict.update(dict(zip(group_metrics, average_result)))
 59 |     return return_dict
 60 | 
 61 | def evaluate_block(df, metric_funcs):
 62 |     res_list = []
 63 |     for fn in metric_funcs:
 64 |         v = fn(df.y_true.values, df.y_pred.values)
 65 |         if type(v) == tuple:
 66 |             res_list.append(v)
 67 |         else: # add group weight
 68 |             res_list.append((v, 1))
 69 |     return res_list
 70 | 
 71 | def avgAUC(y_true, y_pred):
 72 |     """ avgAUC used in MIND news recommendation """
 73 |     if np.sum(y_true) > 0 and np.sum(y_true) < len(y_true):
 74 |         auc = roc_auc_score(y_true, y_pred)
 75 |         return (auc, 1)
 76 |     else: # in case all negatives or all positives for a group
 77 |         return (0, 0)
 78 | 
 79 | def gAUC(y_true, y_pred):
 80 |     """ gAUC defined in DIN paper """
 81 |     if np.sum(y_true) > 0 and np.sum(y_true) < len(y_true):
 82 |         auc = roc_auc_score(y_true, y_pred)
 83 |         n_samples = len(y_true)
 84 |         return (auc * n_samples, n_samples)
 85 |     else: # in case all negatives or all positives for a group
 86 |         return (0, 0)
 87 | 
 88 | def MRR(y_true, y_pred):
 89 |     order = np.argsort(y_pred)[::-1]
 90 |     y_true = np.take(y_true, order)
 91 |     rr_score = y_true / (np.arange(len(y_true)) + 1)
 92 |     mrr = np.sum(rr_score) / (np.sum(y_true) + 1e-12)
 93 |     return mrr
 94 | 
 95 | 
 96 | class NDCG(object):
 97 |     """Normalized discounted cumulative gain metric."""
 98 |     def __init__(self, k=1):
 99 |         self.topk = k
100 | 
101 |     def dcg_score(self, y_true, y_pred):
102 |         order = np.argsort(y_pred)[::-1]
103 |         y_true = np.take(y_true, order[:self.topk])
104 |         gains = 2 ** y_true - 1
105 |         discounts = np.log2(np.arange(len(y_true)) + 2)
106 |         return np.sum(gains / discounts)
107 | 
108 |     def __call__(self, y_true, y_pred):
109 |         idcg = self.dcg_score(y_true, y_true)
110 |         dcg = self.dcg_score(y_true, y_pred)
111 |         return dcg / (idcg + 1e-12)
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/fuxictr/preprocess/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_dataset import *
2 | from .feature_processor import *


--------------------------------------------------------------------------------
/fuxictr/preprocess/build_dataset.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | 
 18 | import h5py
 19 | import os
 20 | import logging
 21 | import numpy as np
 22 | import gc
 23 | import multiprocessing as mp
 24 | 
 25 | 
 26 | def save_h5(darray_dict, data_path):
 27 |     logging.info("Saving data to h5: " + data_path)
 28 |     os.makedirs(os.path.dirname(data_path), exist_ok=True)
 29 |     with h5py.File(data_path, 'w') as hf:
 30 |         for key, arr in darray_dict.items():
 31 |             hf.create_dataset(key, data=arr)
 32 | 
 33 | 
 34 | def split_train_test(train_ddf=None, valid_ddf=None, test_ddf=None, valid_size=0, 
 35 |                      test_size=0, split_type="sequential"):
 36 |     num_samples = len(train_ddf)
 37 |     train_size = num_samples
 38 |     instance_IDs = np.arange(num_samples)
 39 |     if split_type == "random":
 40 |         np.random.shuffle(instance_IDs)
 41 |     if test_size > 0:
 42 |         if test_size < 1:
 43 |             test_size = int(num_samples * test_size)
 44 |         train_size = train_size - test_size
 45 |         test_ddf = train_ddf.loc[instance_IDs[train_size:], :].reset_index()
 46 |         instance_IDs = instance_IDs[0:train_size]
 47 |     if valid_size > 0:
 48 |         if valid_size < 1:
 49 |             valid_size = int(num_samples * valid_size)
 50 |         train_size = train_size - valid_size
 51 |         valid_ddf = train_ddf.loc[instance_IDs[train_size:], :].reset_index()
 52 |         instance_IDs = instance_IDs[0:train_size]
 53 |     if valid_size > 0 or test_size > 0:
 54 |         train_ddf = train_ddf.loc[instance_IDs, :].reset_index()
 55 |     return train_ddf, valid_ddf, test_ddf
 56 | 
 57 | 
 58 | def transform_block(feature_encoder, df_block, filename, preprocess=False):
 59 |     if preprocess:
 60 |         df_block = feature_encoder.preprocess(df_block)
 61 |     darray_dict = feature_encoder.transform(df_block)
 62 |     save_h5(darray_dict, os.path.join(feature_encoder.data_dir, filename))
 63 | 
 64 | 
 65 | def transform_h5(feature_encoder, ddf, filename, preprocess=False, block_size=0):
 66 |     if block_size > 0:
 67 |         pool = mp.Pool(mp.cpu_count() // 2)
 68 |         block_id = 0
 69 |         for idx in range(0, len(ddf), block_size):
 70 |             df_block = ddf[idx: (idx + block_size)]
 71 |             pool.apply_async(transform_block, args=(feature_encoder,
 72 |                                                     df_block,
 73 |                                                     '{}/part_{}.h5'.format(filename, block_id),
 74 |                                                     preprocess))
 75 |             block_id += 1
 76 |         pool.close()
 77 |         pool.join()
 78 |     else:
 79 |         transform_block(feature_encoder, ddf, filename + ".h5", preprocess)
 80 | 
 81 | 
 82 | def build_dataset(feature_encoder, train_data=None, valid_data=None, test_data=None, valid_size=0, 
 83 |                   test_size=0, split_type="sequential", data_block_size=0, **kwargs):
 84 |     """ Build feature_map and transform h5 data """
 85 | 
 86 |     feature_map_json = os.path.join(feature_encoder.data_dir, "feature_map.json")
 87 |     if os.path.exists(feature_map_json):
 88 |         logging.warn("Skip rebuilding {}. Please delete it manually if rebuilding is required." \
 89 |                      .format(feature_map_json))
 90 |     else:
 91 |         # Load csv data
 92 |         train_ddf = feature_encoder.read_csv(train_data, **kwargs)
 93 |         valid_ddf = None
 94 |         test_ddf = None
 95 | 
 96 |         # Split data for train/validation/test
 97 |         if valid_size > 0 or test_size > 0:
 98 |             valid_ddf = feature_encoder.read_csv(valid_data, **kwargs)
 99 |             test_ddf = feature_encoder.read_csv(test_data, **kwargs)
100 |             train_ddf, valid_ddf, test_ddf = split_train_test(train_ddf, valid_ddf, test_ddf, 
101 |                                                             valid_size, test_size, split_type)
102 |         
103 |         # fit and transform train_ddf
104 |         train_ddf = feature_encoder.preprocess(train_ddf)
105 |         feature_encoder.fit(train_ddf, **kwargs)
106 |         transform_h5(feature_encoder, train_ddf, 'train', preprocess=False, block_size=data_block_size)
107 |         del train_ddf
108 |         gc.collect()
109 | 
110 |         # Transfrom valid_ddf
111 |         if valid_ddf is None and (valid_data is not None):
112 |             valid_ddf = feature_encoder.read_csv(valid_data, **kwargs)
113 |         if valid_ddf is not None:
114 |             transform_h5(feature_encoder, valid_ddf, 'valid', preprocess=True, block_size=data_block_size)
115 |             del valid_ddf
116 |             gc.collect()
117 | 
118 |         # Transfrom test_ddf
119 |         if test_ddf is None and (test_data is not None):
120 |             test_ddf = feature_encoder.read_csv(test_data, **kwargs)
121 |         if test_ddf is not None:
122 |             transform_h5(feature_encoder, test_ddf, 'test', preprocess=True, block_size=data_block_size)
123 |             del test_ddf
124 |             gc.collect()
125 |         logging.info("Transform csv data to h5 done.")
126 |     
127 |     # Return processed data splits
128 |     return os.path.join(feature_encoder.data_dir, "train"), \
129 |            os.path.join(feature_encoder.data_dir, "valid"), \
130 |            os.path.join(feature_encoder.data_dir, "test") if (
131 |            test_data or test_size > 0) else None
132 | 


--------------------------------------------------------------------------------
/fuxictr/preprocess/normalizer.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | import numpy as np
18 | import sklearn.preprocessing as sklearn_preprocess
19 | 
20 | 
21 | class Normalizer(object):
22 |     def __init__(self, normalizer):
23 |         if not callable(normalizer):
24 |             self.callable = False
25 |             if normalizer in ['StandardScaler', 'MinMaxScaler']:
26 |                 self.normalizer = getattr(sklearn_preprocess, normalizer)()
27 |             else:
28 |                 raise NotImplementedError('normalizer={}'.format(normalizer))
29 |         else:
30 |             # normalizer is a method
31 |             self.normalizer = normalizer
32 |             self.callable = True
33 | 
34 |     def fit(self, X):
35 |         if not self.callable:
36 |             null_index = np.isnan(X)
37 |             self.normalizer.fit(X[~null_index].reshape(-1, 1))
38 | 
39 |     def normalize(self, X):
40 |         if self.callable:
41 |             return self.normalizer(X)
42 |         else:
43 |             return self.normalizer.transform(X.reshape(-1, 1)).flatten()
44 | 


--------------------------------------------------------------------------------
/fuxictr/preprocess/tokenizer.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | from collections import Counter
 18 | import numpy as np
 19 | import pandas as pd
 20 | import h5py
 21 | from tqdm import tqdm
 22 | from keras_preprocessing.sequence import pad_sequences
 23 | from concurrent.futures import ProcessPoolExecutor, as_completed
 24 | 
 25 | 
 26 | class Tokenizer(object):
 27 |     def __init__(self, max_features=None, na_value="", min_freq=1, splitter=None, remap=True,
 28 |                  lower=False, max_len=0, padding="pre", num_workers=8):
 29 |         self._max_features = max_features
 30 |         self._na_value = na_value
 31 |         self._min_freq = min_freq
 32 |         self._lower = lower
 33 |         self._splitter = splitter
 34 |         self.vocab = dict()
 35 |         self.max_len = max_len
 36 |         self.padding = padding
 37 |         self.num_workers = num_workers
 38 |         self.remap = remap
 39 | 
 40 |     def fit_on_texts(self, texts):
 41 |         word_counts = Counter()
 42 |         if self._splitter is not None: # for sequence
 43 |             max_len = 0
 44 |             with ProcessPoolExecutor(max_workers=self.num_workers) as executor:
 45 |                 chunks = np.array_split(texts, self.num_workers)
 46 |                 tasks = [executor.submit(count_tokens, chunk, self._splitter) for chunk in chunks]
 47 |                 for future in tqdm(as_completed(tasks), total=len(tasks)):
 48 |                     block_word_counts, block_max_len = future.result()
 49 |                     word_counts.update(block_word_counts)
 50 |                     max_len = max(max_len, block_max_len)
 51 |             if self.max_len == 0:  # if argument max_len not given
 52 |                 self.max_len = max_len
 53 |         else:
 54 |             word_counts = Counter(list(texts))
 55 |         self.build_vocab(word_counts)
 56 | 
 57 |     def build_vocab(self, word_counts):
 58 |         word_counts = word_counts.items()
 59 |         # sort to guarantee the determinism of index order
 60 |         word_counts = sorted(word_counts, key=lambda x: (-x[1], x[0]))
 61 |         if self._max_features: # keep the most frequent features
 62 |             word_counts = word_counts[0:self._max_features]
 63 |         words = []
 64 |         for token, count in word_counts:
 65 |             if count >= self._min_freq:
 66 |                 if token != self._na_value:
 67 |                     words.append(token.lower() if self._lower else token)
 68 |             else:
 69 |                 break # already sorted in decending order
 70 |         if self.remap:
 71 |             self.vocab = dict((token, idx) for idx, token in enumerate(words, 1))
 72 |         else:
 73 |             self.vocab = dict((token, int(token)) for token in words)
 74 |         self.vocab["__PAD__"] = 0 # use 0 for reserved __PAD__
 75 |         self.vocab["__OOV__"] = self.vocab_size() # use the last index for __OOV__
 76 | 
 77 |     def merge_vocab(self, shared_tokenizer):
 78 |         if self.remap:
 79 |             new_words = 0
 80 |             for word in self.vocab.keys():
 81 |                 if word not in shared_tokenizer.vocab:
 82 |                     shared_tokenizer.vocab[word] = shared_tokenizer.vocab["__OOV__"] + new_words
 83 |                     new_words += 1
 84 |         else:
 85 |             shared_tokenizer.vocab.update(self.vocab)
 86 |         vocab_size = shared_tokenizer.vocab_size()
 87 |         if shared_tokenizer.vocab["__OOV__"] != vocab_size - 1:
 88 |             shared_tokenizer.vocab["__OOV__"] = vocab_size
 89 |         self.vocab = shared_tokenizer.vocab
 90 |         return shared_tokenizer
 91 | 
 92 |     def vocab_size(self):
 93 |         return max(self.vocab.values()) + 1
 94 | 
 95 |     def update_vocab(self, word_list):
 96 |         new_words = 0
 97 |         for word in word_list:
 98 |             if word not in self.vocab:
 99 |                 self.vocab[word] = self.vocab["__OOV__"] + new_words
100 |                 new_words += 1
101 |         if new_words > 0:
102 |             self.vocab["__OOV__"] = self.vocab_size()
103 | 
104 |     def encode_meta(self, values):
105 |         word_counts = Counter(list(values))
106 |         if len(self.vocab) == 0:
107 |             self.build_vocab(word_counts)
108 |         else: # for considering meta data in test data
109 |             self.update_vocab(word_counts.keys())
110 |         meta_values = [self.vocab.get(x, self.vocab["__OOV__"]) for x in values]
111 |         return np.array(meta_values)
112 | 
113 |     def encode_category(self, categories):
114 |         category_indices = [self.vocab.get(x, self.vocab["__OOV__"]) for x in categories]
115 |         return np.array(category_indices)
116 | 
117 |     def encode_sequence(self, texts):
118 |         sequence_list = []
119 |         for text in texts:
120 |             if pd.isnull(text) or text == '':
121 |                 sequence_list.append([])
122 |             else:
123 |                 sequence_list.append([self.vocab.get(x, self.vocab["__OOV__"]) if x != self._na_value \
124 |                                       else self.vocab["__PAD__"] for x in text.split(self._splitter)])
125 |         sequence_list = pad_sequences(sequence_list, maxlen=self.max_len, value=self.vocab["__PAD__"],
126 |                                       padding=self.padding, truncating=self.padding)
127 |         return np.array(sequence_list)
128 |     
129 |     def load_pretrained_vocab(self, feature_dtype, pretrain_path, expand_vocab=True):
130 |         with h5py.File(pretrain_path, 'r') as hf:
131 |             keys = hf["key"][:]
132 |             keys = keys.astype(feature_dtype) # in case mismatch of dtype between int and str
133 |         # Update vocab with pretrained keys in case new tokens appear in validation or test set
134 |         # Do not update OOV index here since it is used in PretrainedEmbedding
135 |         if expand_vocab:
136 |             for word in keys:
137 |                 if word not in self.vocab:
138 |                     self.vocab[word] = self.vocab_size()
139 | 
140 | 
141 | def count_tokens(texts, splitter):
142 |     word_counts = Counter()
143 |     max_len = 0
144 |     for text in texts:
145 |         text_split = text.split(splitter)
146 |         max_len = max(max_len, len(text_split))
147 |         for token in text_split:
148 |             word_counts[token] += 1
149 |     return word_counts, max_len
150 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiangcaiSu/STEM/769e2af0d0d1a0be9f58b475b95e05f502a20df5/fuxictr/pytorch/__init__.py


--------------------------------------------------------------------------------
/fuxictr/pytorch/dataloaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .h5_block_dataloader import H5BlockDataLoader
2 | from .h5_dataloader import H5DataLoader


--------------------------------------------------------------------------------
/fuxictr/pytorch/dataloaders/h5_block_dataloader.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2023. FuxiCTR Authors. All rights reserved.
  3 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  4 | # 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | # =========================================================================
 17 | 
 18 | 
 19 | import numpy as np
 20 | from fuxictr.utils import load_h5
 21 | import h5py
 22 | from itertools import chain
 23 | import torch
 24 | from torch.utils import data
 25 | import logging
 26 | import glob
 27 | 
 28 | 
 29 | class BlockIterDataPipe(data.IterDataPipe):
 30 |     def __init__(self, block_datapipe, feature_map, verbose=0):
 31 |         self.feature_map = feature_map
 32 |         self.block_datapipe = block_datapipe
 33 |         self.verbose = verbose
 34 |         
 35 |     def load_data(self, data_path):
 36 |         data_dict = load_h5(data_path, verbose=self.verbose)
 37 |         data_arrays = []
 38 |         all_cols = list(self.feature_map.features.keys()) + self.feature_map.labels
 39 |         for col in all_cols:
 40 |             array = data_dict[col]
 41 |             if array.ndim == 1:
 42 |                 data_arrays.append(array.reshape(-1, 1))
 43 |             else:
 44 |                 data_arrays.append(array)
 45 |         data_tensor = torch.from_numpy(np.hstack(data_arrays))
 46 |         return data_tensor
 47 | 
 48 |     def read_block(self, data_block):
 49 |         darray = self.load_data(data_block)
 50 |         for idx in range(darray.shape[0]):
 51 |             yield darray[idx, :]
 52 | 
 53 |     def __iter__(self):
 54 |         worker_info = data.get_worker_info()
 55 |         if worker_info is None: # single-process data loading
 56 |             block_list = self.block_datapipe
 57 |         else: # in a worker process
 58 |             block_list = [
 59 |                 block
 60 |                 for idx, block in enumerate(self.block_datapipe)
 61 |                 if idx % worker_info.num_workers == worker_info.id
 62 |             ]
 63 |         return chain.from_iterable(map(self.read_block, block_list))
 64 | 
 65 | 
 66 | class DataLoader(data.DataLoader):
 67 |     def __init__(self, feature_map, data_path, batch_size=32, shuffle=False,
 68 |                  num_workers=1, verbose=0, buffer_size=100000, **kwargs):
 69 |         data_blocks = glob.glob(data_path + "/*.h5")
 70 |         assert len(data_blocks) > 0, f"invalid data_path: {data_path}"
 71 |         if len(data_blocks) > 1:
 72 |             data_blocks.sort(key=lambda x: int(x.split("_")[-1].split(".")[0])) # e.g. "part_1.h5"
 73 |         self.data_blocks = data_blocks
 74 |         self.num_blocks = len(self.data_blocks)
 75 |         self.feature_map = feature_map
 76 |         self.batch_size = batch_size
 77 |         self.num_batches, self.num_samples = self.count_batches_and_samples()
 78 |         datapipe = BlockIterDataPipe(data_blocks, feature_map, verbose)
 79 |         if shuffle:
 80 |             datapipe = datapipe.shuffle(buffer_size=buffer_size)
 81 |         super(DataLoader, self).__init__(dataset=datapipe, batch_size=batch_size, num_workers=num_workers)
 82 | 
 83 |     def __len__(self):
 84 |         return self.num_batches
 85 | 
 86 |     def count_batches_and_samples(self):
 87 |         num_samples = 0
 88 |         num_batches = 0
 89 |         for block_path in self.data_blocks:
 90 |             with h5py.File(block_path, 'r') as hf:
 91 |                 y = hf[self.feature_map.labels[0]][:]
 92 |                 num_samples += len(y)
 93 |                 num_batches += int(np.ceil(len(y) * 1.0 / self.batch_size))
 94 |         return num_batches, num_samples
 95 | 
 96 | 
 97 | class H5BlockDataLoader(object):
 98 |     def __init__(self, feature_map, stage="both", train_data=None, valid_data=None, test_data=None,
 99 |                  batch_size=32, shuffle=True, verbose=0, **kwargs):
100 |         logging.info("Loading data...")
101 |         train_gen = None
102 |         valid_gen = None
103 |         test_gen = None
104 |         self.stage = stage
105 |         if stage in ["both", "train"]:
106 |             train_gen = DataLoader(feature_map, train_data, batch_size=batch_size, shuffle=shuffle, verbose=verbose, **kwargs)
107 |             logging.info("Train samples: total/{:d}, blocks/{:d}".format(train_gen.num_samples, train_gen.num_blocks))     
108 |             if valid_data:
109 |                 valid_gen = DataLoader(feature_map, valid_data, batch_size=batch_size, shuffle=False, verbose=verbose, **kwargs)
110 |                 logging.info("Validation samples: total/{:d}, blocks/{:d}".format(valid_gen.num_samples, valid_gen.num_blocks))
111 | 
112 |         if stage in ["both", "test"]:
113 |             if test_data:
114 |                 test_gen = DataLoader(feature_map, test_data, batch_size=batch_size, shuffle=False, verbose=verbose, **kwargs)
115 |                 logging.info("Test samples: total/{:d}, blocks/{:d}".format(test_gen.num_samples, test_gen.num_blocks))
116 |         self.train_gen, self.valid_gen, self.test_gen = train_gen, valid_gen, test_gen
117 | 
118 |     def make_iterator(self):
119 |         if self.stage == "train":
120 |             logging.info("Loading train and validation data done.")
121 |             return self.train_gen, self.valid_gen
122 |         elif self.stage == "test":
123 |             logging.info("Loading test data done.")
124 |             return self.test_gen
125 |         else:
126 |             logging.info("Loading data done.")
127 |             return self.train_gen, self.valid_gen, self.test_gen
128 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/dataloaders/h5_dataloader.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import numpy as np
19 | from torch.utils import data
20 | from fuxictr.utils import load_h5
21 | import torch
22 | import logging
23 | 
24 | 
25 | class Dataset(data.Dataset):
26 |     def __init__(self, feature_map, data_path):
27 |         self.feature_map = feature_map
28 |         self.darray = self.load_data_array(data_path)
29 |         
30 |     def __getitem__(self, index):
31 |         return self.darray[index, :]
32 |     
33 |     def __len__(self):
34 |         return self.darray.shape[0]
35 | 
36 |     def load_data_array(self, data_path):
37 |         data_dict = load_h5(data_path) # dict of arrays from h5
38 |         data_arrays = []
39 |         all_cols = list(self.feature_map.features.keys()) + self.feature_map.labels
40 |         for col in all_cols:
41 |             array = data_dict[col]
42 |             if array.ndim == 1:
43 |                 data_arrays.append(array.reshape(-1, 1))
44 |             else:
45 |                 data_arrays.append(array)
46 |         print("lalalala1")
47 |         data_tensor = torch.from_numpy(np.hstack(data_arrays))
48 |         print("lalalala2")
49 |         return data_tensor
50 | 
51 | 
52 | class DataLoader(data.DataLoader):
53 |     def __init__(self, feature_map, data_path, batch_size=32, shuffle=False, num_workers=1, **kwargs):
54 |         if not data_path.endswith(".h5"):
55 |             data_path += ".h5"
56 |         self.dataset = Dataset(feature_map, data_path)
57 |         super(DataLoader, self).__init__(dataset=self.dataset, batch_size=batch_size,
58 |                                          shuffle=shuffle, num_workers=num_workers)
59 |         self.num_samples = len(self.dataset)
60 |         self.num_batches = int(np.ceil(self.num_samples * 1.0 / self.batch_size))
61 | 
62 |     def __len__(self):
63 |         return self.num_batches
64 | 
65 | 
66 | class H5DataLoader(object):
67 |     def __init__(self, feature_map, stage="both", train_data=None, valid_data=None, test_data=None,
68 |                  batch_size=32, shuffle=True, **kwargs):
69 |         logging.info("Loading data...")
70 |         train_gen = None
71 |         valid_gen = None
72 |         test_gen = None
73 |         self.stage = stage
74 |         if stage in ["both", "train"]:
75 |             train_gen = DataLoader(feature_map, train_data, batch_size=batch_size, shuffle=shuffle, **kwargs)
76 |             logging.info("Train samples: total/{:d}, blocks/{:d}".format(train_gen.num_samples, 1))
77 |             if valid_data:  
78 |                 valid_gen = DataLoader(feature_map, valid_data, batch_size=batch_size, shuffle=False, **kwargs)
79 |                 logging.info("Validation samples: total/{:d}, blocks/{:d}".format(valid_gen.num_samples, 1))
80 |         if stage in ["both", "test"]:
81 |             if test_data:
82 |                 test_gen = DataLoader(feature_map, test_data, batch_size=batch_size, shuffle=False, **kwargs)
83 |                 logging.info("Test samples: total/{:d}, blocks/{:d}".format(test_gen.num_samples, 1))
84 |         self.train_gen, self.valid_gen, self.test_gen = train_gen, valid_gen, test_gen
85 | 
86 |     def make_iterator(self):
87 |         if self.stage == "train":
88 |             logging.info("Loading train and validation data done.")
89 |             return self.train_gen, self.valid_gen
90 |         elif self.stage == "test":
91 |             logging.info("Loading test data done.")
92 |             return self.test_gen
93 |         else:
94 |             logging.info("Loading data done.")
95 |             return self.train_gen, self.valid_gen, self.test_gen
96 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .pooling import *
 2 | from .embeddings import *
 3 | from .activations import *
 4 | from .blocks import *
 5 | from .interactions import *
 6 | from .attentions import *
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/activations.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | import numpy as np
21 | 
22 | 
23 | class Dice(nn.Module):
24 |     def __init__(self, input_dim, eps=1e-9):
25 |         super(Dice, self).__init__()
26 |         self.bn = nn.BatchNorm1d(input_dim, affine=False, eps=eps, momentum=0.01)
27 |         self.alpha = nn.Parameter(torch.zeros(input_dim))
28 | 
29 |     def forward(self, X):
30 |         p = torch.sigmoid(self.bn(X))
31 |         output = p * X + self.alpha * (1 - p) *  X
32 |         return output
33 | 
34 | 
35 | class GELU(nn.Module):
36 |     def __init__(self):
37 |         super(GELU, self).__init__()
38 | 
39 |     def forward(self, x):
40 |         return 0.5 * x * (1 + torch.tanh(np.sqrt(2 / np.pi) * ( x + 0.044715 * torch.pow(x, 3))))
41 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/attentions/__init__.py:
--------------------------------------------------------------------------------
1 | from .dot_product_attention import *
2 | from .squeeze_excitation import *
3 | from .target_attention import *
4 | 
5 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/attentions/dot_product_attention.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2021. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # Copyright (C) 2018. pengshuang@Github for ScaledDotProductAttention.
 4 | # 
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # =========================================================================
17 | 
18 | 
19 | import torch
20 | from torch import nn
21 | 
22 | 
23 | class ScaledDotProductAttention(nn.Module):
24 |     """ Scaled Dot-Product Attention 
25 |         Ref: https://zhuanlan.zhihu.com/p/47812375
26 |     """
27 |     def __init__(self, dropout_rate=0.):
28 |         super(ScaledDotProductAttention, self).__init__()
29 |         self.dropout = nn.Dropout(dropout_rate) if dropout_rate > 0 else None
30 | 
31 |     def forward(self, Q, K, V, scale=None, mask=None):
32 |         # mask: 0 for masked positions
33 |         scores = torch.matmul(Q, K.transpose(-1, -2))
34 |         if scale:
35 |             scores = scores / scale
36 |         if mask is not None:
37 |             mask = mask.view_as(scores)
38 |             scores = scores.masked_fill_(mask.float() == 0, -1.e9) # fill -inf if mask=0
39 |         attention = scores.softmax(dim=-1)
40 |         if self.dropout is not None:
41 |             attention = self.dropout(attention)
42 |         output = torch.matmul(attention, V)
43 |         return output, attention
44 | 
45 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/attentions/squeeze_excitation.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | 
21 | 
22 | class SqueezeExcitation(nn.Module):
23 |     def __init__(self, num_fields, reduction_ratio=3, excitation_activation="ReLU"):
24 |         super(SqueezeExcitation, self).__init__()
25 |         reduced_size = max(1, int(num_fields / reduction_ratio))
26 |         excitation = [nn.Linear(num_fields, reduced_size, bias=False),
27 |                       nn.ReLU(),
28 |                       nn.Linear(reduced_size, num_fields, bias=False)]
29 |         if excitation_activation.lower() == "relu":
30 |             excitation.append(nn.ReLU())
31 |         elif excitation_activation.lower() == "sigmoid":
32 |             excitation.append(nn.Sigmoid())
33 |         else:
34 |             raise NotImplementedError
35 |         self.excitation = nn.Sequential(*excitation)
36 | 
37 |     def forward(self, feature_emb):
38 |         Z = torch.mean(feature_emb, dim=-1, out=None)
39 |         A = self.excitation(Z)
40 |         V = feature_emb * A.unsqueeze(-1)
41 |         return V
42 |         


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/attentions/target_attention.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | 
 18 | import torch
 19 | from torch import nn
 20 | from .dot_product_attention import ScaledDotProductAttention
 21 | from ..activations import Dice
 22 | from ..blocks.mlp_block import MLP_Block
 23 | 
 24 | 
 25 | class DIN_Attention(nn.Module):
 26 |     def __init__(self, 
 27 |                  embedding_dim=64,
 28 |                  attention_units=[32], 
 29 |                  hidden_activations="ReLU",
 30 |                  output_activation=None,
 31 |                  dropout_rate=0,
 32 |                  batch_norm=False,
 33 |                  use_softmax=False):
 34 |         super(DIN_Attention, self).__init__()
 35 |         self.embedding_dim = embedding_dim
 36 |         self.use_softmax = use_softmax
 37 |         if isinstance(hidden_activations, str) and hidden_activations.lower() == "dice":
 38 |             hidden_activations = [Dice(units) for units in attention_units]
 39 |         self.attention_layer = MLP_Block(input_dim=4 * embedding_dim,
 40 |                                          output_dim=1,
 41 |                                          hidden_units=attention_units,
 42 |                                          hidden_activations=hidden_activations,
 43 |                                          output_activation=output_activation,
 44 |                                          dropout_rates=dropout_rate,
 45 |                                          batch_norm=batch_norm)
 46 | 
 47 |     def forward(self, target_item, history_sequence, mask=None):
 48 |         """
 49 |         target_item: b x emd
 50 |         history_sequence: b x len x emb
 51 |         mask: mask of history_sequence, 0 for masked positions
 52 |         """
 53 |         seq_len = history_sequence.size(1)
 54 |         target_item = target_item.unsqueeze(1).expand(-1, seq_len, -1)
 55 |         attention_input = torch.cat([target_item, history_sequence, target_item - history_sequence, 
 56 |                                      target_item * history_sequence], dim=-1) # b x len x 4*emb
 57 |         attention_weight = self.attention_layer(attention_input.view(-1, 4 * self.embedding_dim))
 58 |         attention_weight = attention_weight.view(-1, seq_len) # b x len
 59 |         if mask is not None:
 60 |             attention_weight = attention_weight * mask.float()
 61 |         if self.use_softmax:
 62 |             if mask is not None:
 63 |                 attention_weight += -1.e9 * (1 - mask.float())
 64 |             attention_weight = attention_weight.softmax(dim=-1)
 65 |         output = (attention_weight.unsqueeze(-1) * history_sequence).sum(dim=1)
 66 |         return output
 67 | 
 68 | 
 69 | class MultiHeadTargetAttention(nn.Module):
 70 |     def __init__(self,
 71 |                  input_dim=64,
 72 |                  attention_dim=64,
 73 |                  num_heads=1,
 74 |                  dropout_rate=0,
 75 |                  use_scale=True,
 76 |                  use_qkvo=True):
 77 |         super(MultiHeadTargetAttention, self).__init__()
 78 |         if not use_qkvo:
 79 |             attention_dim = input_dim
 80 |         assert attention_dim % num_heads == 0, \
 81 |                "attention_dim={} is not divisible by num_heads={}".format(attention_dim, num_heads)
 82 |         self.num_heads = num_heads
 83 |         self.head_dim = attention_dim // num_heads
 84 |         self.scale = self.head_dim ** 0.5 if use_scale else None
 85 |         self.use_qkvo = use_qkvo
 86 |         if use_qkvo:
 87 |             self.W_q = nn.Linear(input_dim, attention_dim, bias=False)
 88 |             self.W_k = nn.Linear(input_dim, attention_dim, bias=False)
 89 |             self.W_v = nn.Linear(input_dim, attention_dim, bias=False)
 90 |             self.W_o = nn.Linear(attention_dim, input_dim, bias=False)
 91 |         self.dot_attention = ScaledDotProductAttention(dropout_rate)
 92 | 
 93 |     def forward(self, target_item, history_sequence, mask=None):
 94 |         """
 95 |         target_item: b x emd
 96 |         history_sequence: b x len x emb
 97 |         mask: mask of history_sequence, 0 for masked positions
 98 |         """
 99 |         # linear projection
100 |         if self.use_qkvo:
101 |             query = self.W_q(target_item)
102 |             key = self.W_k(history_sequence)
103 |             value = self.W_v(history_sequence)
104 |         else:
105 |             query, key, value = target_item, history_sequence, history_sequence
106 | 
107 |         # split by heads
108 |         batch_size = query.size(0)
109 |         query = query.view(batch_size, 1, self.num_heads, self.head_dim).transpose(1, 2)
110 |         key = key.view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
111 |         value = value.view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
112 |         if mask is not None:
113 |             mask = mask.view(batch_size, 1, 1, -1).expand(-1, self.num_heads, -1, -1)
114 | 
115 |         # scaled dot product attention
116 |         output, _ = self.dot_attention(query, key, value, scale=self.scale, mask=mask)
117 |         # concat heads
118 |         output = output.transpose(1, 2).contiguous().view(-1, self.num_heads * self.head_dim)
119 |         if self.use_qkvo:
120 |             output = self.W_o(output)
121 |         return output
122 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/blocks/__init__.py:
--------------------------------------------------------------------------------
1 | from .logistic_regression import *
2 | from .factorization_machine import *
3 | from .mlp_block import *
4 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/blocks/factorization_machine.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | from .logistic_regression import LogisticRegression
21 | from ..interactions import InnerProductInteraction
22 | 
23 | 
24 | class FactorizationMachine(nn.Module):
25 |     def __init__(self, feature_map):
26 |         super(FactorizationMachine, self).__init__()
27 |         self.fm_layer = InnerProductInteraction(feature_map.num_fields, output="product_sum")
28 |         self.lr_layer = LogisticRegression(feature_map, use_bias=True)
29 | 
30 |     def forward(self, X, feature_emb):
31 |         lr_out = self.lr_layer(X)
32 |         fm_out = self.fm_layer(feature_emb)
33 |         output = fm_out + lr_out
34 |         return output
35 | 
36 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/blocks/logistic_regression.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | from fuxictr.pytorch.layers import FeatureEmbedding
21 | 
22 | 
23 | class LogisticRegression(nn.Module):
24 |     def __init__(self, feature_map, use_bias=True):
25 |         super(LogisticRegression, self).__init__()
26 |         self.bias = nn.Parameter(torch.zeros(1), requires_grad=True) if use_bias else None
27 |         # A trick for quick one-hot encoding in LR
28 |         self.embedding_layer = FeatureEmbedding(feature_map, 1, use_pretrain=False, use_sharing=False)
29 | 
30 |     def forward(self, X):
31 |         embed_weights = self.embedding_layer(X)
32 |         output = embed_weights.sum(dim=1)
33 |         if self.bias is not None:
34 |             output += self.bias
35 |         return output
36 | 
37 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/blocks/mlp_block.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import numpy as np
19 | from torch import nn
20 | from fuxictr.pytorch.torch_utils import get_activation
21 | 
22 | 
23 | class MLP_Block(nn.Module):
24 |     def __init__(self, 
25 |                  input_dim, 
26 |                  hidden_units=[], 
27 |                  hidden_activations="ReLU",
28 |                  output_dim=None,
29 |                  output_activation=None, 
30 |                  dropout_rates=0.0,
31 |                  batch_norm=False, 
32 |                  bn_only_once=False, # Set True for inference speed up
33 |                  use_bias=True):
34 |         super(MLP_Block, self).__init__()
35 |         dense_layers = []
36 |         if not isinstance(dropout_rates, list):
37 |             dropout_rates = [dropout_rates] * len(hidden_units)
38 |         if not isinstance(hidden_activations, list):
39 |             hidden_activations = [hidden_activations] * len(hidden_units)
40 |         hidden_activations = get_activation(hidden_activations, hidden_units)
41 |         hidden_units = [input_dim] + hidden_units
42 |         if batch_norm and bn_only_once:
43 |             dense_layers.append(nn.BatchNorm1d(input_dim))
44 |         for idx in range(len(hidden_units) - 1):
45 |             dense_layers.append(nn.Linear(hidden_units[idx], hidden_units[idx + 1], bias=use_bias))
46 |             if batch_norm and not bn_only_once:
47 |                 dense_layers.append(nn.BatchNorm1d(hidden_units[idx + 1]))
48 |             if hidden_activations[idx]:
49 |                 dense_layers.append(hidden_activations[idx])
50 |             if dropout_rates[idx] > 0:
51 |                 dense_layers.append(nn.Dropout(p=dropout_rates[idx]))
52 |         if output_dim is not None:
53 |             dense_layers.append(nn.Linear(hidden_units[-1], output_dim, bias=use_bias))
54 |         if output_activation is not None:
55 |             dense_layers.append(get_activation(output_activation))
56 |         self.mlp = nn.Sequential(*dense_layers) # * used to unpack list
57 |     
58 |     def forward(self, inputs):
59 |         return self.mlp(inputs)
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/embeddings/__init__.py:
--------------------------------------------------------------------------------
1 | from .feature_embedding import *
2 | from .pretrained_embedding import *
3 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/embeddings/pretrained_embedding.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | 
 18 | import torch
 19 | from torch import nn
 20 | import h5py
 21 | import os
 22 | import io
 23 | import json
 24 | import numpy as np
 25 | import logging
 26 | 
 27 | 
 28 | class PretrainedEmbedding(nn.Module):
 29 |     def __init__(self,
 30 |                  feature_name,
 31 |                  feature_spec,
 32 |                  pretrain_path,
 33 |                  vocab_path,
 34 |                  embedding_dim,
 35 |                  pretrain_dim,
 36 |                  pretrain_usage="init"):
 37 |         """
 38 |         Fusion pretrained embedding with ID embedding
 39 |         :param: fusion_type: init/sum/concat
 40 |         """
 41 |         super().__init__()
 42 |         assert pretrain_usage in ["init", "sum", "concat"]
 43 |         self.pretrain_usage = pretrain_usage
 44 |         padding_idx = feature_spec.get("padding_idx", None)
 45 |         self.oov_idx = feature_spec["oov_idx"]
 46 |         self.freeze_emb = feature_spec["freeze_emb"]
 47 |         self.pretrain_embedding = self.load_pretrained_embedding(feature_spec["vocab_size"],
 48 |                                                                  pretrain_dim,
 49 |                                                                  pretrain_path,
 50 |                                                                  vocab_path,
 51 |                                                                  feature_name,
 52 |                                                                  freeze=self.freeze_emb,
 53 |                                                                  padding_idx=padding_idx)
 54 |         if pretrain_usage != "init":
 55 |             self.id_embedding = nn.Embedding(feature_spec["vocab_size"],
 56 |                                              embedding_dim,
 57 |                                              padding_idx=padding_idx)
 58 |         self.proj = None
 59 |         if pretrain_usage in ["init", "sum"] and embedding_dim != pretrain_dim:
 60 |             self.proj = nn.Linear(pretrain_dim, embedding_dim)
 61 |         if pretrain_usage == "concat":
 62 |             self.proj = nn.Linear(pretrain_dim + embedding_dim, embedding_dim)
 63 | 
 64 |     def reset_parameters(self, embedding_initializer):
 65 |         if self.pretrain_usage in ["sum", "concat"]:
 66 |             nn.init.zeros_(self.id_embedding.weight) # set oov token embeddings to zeros
 67 |             embedding_initializer(self.id_embedding.weight[1:self.oov_idx, :])
 68 | 
 69 |     def get_pretrained_embedding(self, pretrain_path):
 70 |         with h5py.File(pretrain_path, 'r') as hf:
 71 |             keys = hf["key"][:]
 72 |             embeddings = hf["value"][:]
 73 |         logging.info("Loading pretrained_emb: {}".format(pretrain_path))
 74 |         return keys, embeddings
 75 | 
 76 |     def load_feature_vocab(self, vocab_path, feature_name):
 77 |         with io.open(vocab_path, "r", encoding="utf-8") as fd:
 78 |             vocab = json.load(fd)
 79 |         return vocab[feature_name]
 80 | 
 81 |     def load_pretrained_embedding(self, vocab_size, pretrain_dim, pretrain_path, vocab_path,
 82 |                                   feature_name, freeze=False, padding_idx=None):
 83 |         embedding_layer = nn.Embedding(vocab_size,
 84 |                                        pretrain_dim,
 85 |                                        padding_idx=padding_idx)
 86 |         if freeze:
 87 |             embedding_matrix = np.zeros((vocab_size, pretrain_dim))
 88 |         else:
 89 |             embedding_matrix = np.random.normal(loc=0, scale=1.e-4, size=(vocab_size, pretrain_dim))
 90 |             if padding_idx:
 91 |                 embedding_matrix[padding_idx, :] = np.zeros(pretrain_dim) # set as zero for PAD
 92 |         keys, embeddings = self.get_pretrained_embedding(pretrain_path)
 93 |         assert embeddings.shape[-1] == pretrain_dim, f"pretrain_dim={pretrain_dim} not correct."
 94 |         vocab = self.load_feature_vocab(vocab_path, feature_name)
 95 |         for idx, word in enumerate(keys):
 96 |             if word in vocab:
 97 |                 embedding_matrix[vocab[word]] = embeddings[idx]
 98 |         embedding_layer.weight = torch.nn.Parameter(torch.from_numpy(embedding_matrix).float())
 99 |         if freeze:
100 |             embedding_layer.weight.requires_grad = False
101 |         return embedding_layer
102 | 
103 |     def forward(self, inputs):
104 |         mask = (inputs <= self.oov_idx).float()
105 |         pretrain_emb = self.pretrain_embedding(inputs)
106 |         if not self.freeze_emb:
107 |             pretrain_emb = pretrain_emb * mask.unsqueeze(-1)
108 |         if self.pretrain_usage == "init":
109 |             if self.proj is not None:
110 |                 feature_emb = self.proj(pretrain_emb)
111 |             else:
112 |                 feature_emb = pretrain_emb
113 |         else:
114 |             id_emb = self.id_embedding(inputs)
115 |             id_emb = id_emb * mask.unsqueeze(-1)
116 |             if self.pretrain_usage == "sum":
117 |                 if self.proj is not None:
118 |                     feature_emb = self.proj(pretrain_emb) + id_emb
119 |                 else:
120 |                     feature_emb = pretrain_emb + id_emb
121 |             elif self.pretrain_usage == "concat":
122 |                 feature_emb = torch.cat([pretrain_emb, id_emb], dim=-1)
123 |                 feature_emb = self.proj(feature_emb)
124 |         return feature_emb
125 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/interactions/__init__.py:
--------------------------------------------------------------------------------
1 | from .inner_product import *
2 | from .holographic_interaction import *
3 | from .cross_net import *
4 | from .compressed_interaction_net import *
5 | from .bilinear_interaction import *
6 | from .inner_product import *
7 | from .interaction_machine import *
8 | 
9 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/interactions/bilinear_interaction.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | import torch
18 | from torch import nn
19 | from itertools import combinations
20 | 
21 | 
22 | class BilinearInteraction(nn.Module):
23 |     def __init__(self, num_fields, embedding_dim, bilinear_type="field_interaction"):
24 |         super(BilinearInteraction, self).__init__()
25 |         self.bilinear_type = bilinear_type
26 |         self.interact_dim = int(num_fields * (num_fields - 1) / 2)
27 |         if self.bilinear_type == "field_all":
28 |             self.bilinear_W = nn.Parameter(torch.Tensor(embedding_dim, embedding_dim))
29 |         elif self.bilinear_type == "field_each":
30 |             self.bilinear_W = nn.Parameter(torch.Tensor(num_fields, embedding_dim, embedding_dim))
31 |         elif self.bilinear_type == "field_interaction":
32 |             self.bilinear_W = nn.Parameter(torch.Tensor(self.interact_dim, embedding_dim, embedding_dim))
33 |         else:
34 |             raise NotImplementedError
35 |         self.reset_parameters()
36 | 
37 |     def reset_parameters(self):
38 |         nn.init.xavier_normal_(self.bilinear_W)
39 | 
40 |     def forward(self, feature_emb):
41 |         feature_emb_list = torch.split(feature_emb, 1, dim=1)
42 |         if self.bilinear_type == "field_all":
43 |             bilinear_list = [torch.matmul(v_i, self.bilinear_W) * v_j
44 |                              for v_i, v_j in combinations(feature_emb_list, 2)]
45 |         elif self.bilinear_type == "field_each":
46 |             bilinear_W_list = torch.split(self.bilinear_W, 1, dim=0)
47 |             bilinear_list = [torch.matmul(feature_emb_list[i], bilinear_W_list[i]) * feature_emb_list[j]
48 |                              for i, j in combinations(range(len(feature_emb_list)), 2)]
49 |         elif self.bilinear_type == "field_interaction":
50 |             bilinear_W_list = torch.split(self.bilinear_W, 1, dim=0)
51 |             bilinear_list = [torch.matmul(v[0], bilinear_W_list[i]) * v[1]
52 |                              for i, v in enumerate(combinations(feature_emb_list, 2))]
53 |         return torch.cat(bilinear_list, dim=1)
54 | 
55 | 
56 | class BilinearInteractionV2(nn.Module):
57 |     def __init__(self, num_fields, embedding_dim, bilinear_type="field_interaction"):
58 |         super(BilinearInteractionV2, self).__init__()
59 |         self.bilinear_type = bilinear_type
60 |         self.num_fields = num_fields
61 |         self.embedding_dim = embedding_dim
62 |         self.interact_dim = int(num_fields * (num_fields - 1) / 2)
63 |         if self.bilinear_type == "field_all":
64 |             self.bilinear_W = nn.Parameter(torch.Tensor(embedding_dim, embedding_dim))
65 |         elif self.bilinear_type == "field_each":
66 |             self.bilinear_W = nn.Parameter(torch.Tensor(num_fields, embedding_dim, embedding_dim))
67 |         elif self.bilinear_type == "field_interaction":
68 |             self.bilinear_W = nn.Parameter(torch.Tensor(self.interact_dim, embedding_dim, embedding_dim))
69 |         else:
70 |             raise NotImplementedError
71 |         self.triu_index = nn.Parameter(torch.triu_indices(num_fields, num_fields, offset=1), requires_grad=False)
72 |         self.reset_parameters()
73 | 
74 |     def reset_parameters(self):
75 |         nn.init.xavier_normal_(self.bilinear_W)
76 | 
77 |     def forward(self, feature_emb):
78 |         if self.bilinear_type == "field_interaction":
79 |             left_emb =  torch.index_select(feature_emb, 1, self.triu_index[0])
80 |             right_emb = torch.index_select(feature_emb, 1, self.triu_index[1])
81 |             bilinear_out = torch.matmul(left_emb.unsqueeze(2), self.bilinear_W).squeeze(2) * right_emb
82 |         else:
83 |             if self.bilinear_type == "field_all":
84 |                 hidden_emb = torch.matmul(feature_emb, self.bilinear_W)
85 |             elif self.bilinear_type == "field_each":
86 |                 hidden_emb = torch.matmul(feature_emb.unsqueeze(2), self.bilinear_W).squeeze(2)
87 |             left_emb =  torch.index_select(hidden_emb, 1, self.triu_index[0])
88 |             right_emb = torch.index_select(feature_emb, 1, self.triu_index[1])
89 |             bilinear_out = left_emb * right_emb
90 |         return bilinear_out
91 | 
92 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/interactions/compressed_interaction_net.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | 
21 | 
22 | class CompressedInteractionNet(nn.Module):
23 |     def __init__(self, num_fields, cin_hidden_units, output_dim=1):
24 |         super(CompressedInteractionNet, self).__init__()
25 |         self.cin_hidden_units = cin_hidden_units
26 |         self.fc = nn.Linear(sum(cin_hidden_units), output_dim)
27 |         self.cin_layer = nn.ModuleDict()
28 |         for i, unit in enumerate(self.cin_hidden_units):
29 |             in_channels = num_fields * self.cin_hidden_units[i - 1] if i > 0 else num_fields ** 2
30 |             out_channels = unit
31 |             self.cin_layer["layer_" + str(i + 1)] = nn.Conv1d(in_channels,
32 |                                                               out_channels,  # how many filters
33 |                                                               kernel_size=1) # kernel output shape
34 | 
35 |     def forward(self, feature_emb):
36 |         pooling_outputs = []
37 |         X_0 = feature_emb
38 |         batch_size = X_0.shape[0]
39 |         embedding_dim = X_0.shape[-1]
40 |         X_i = X_0
41 |         for i in range(len(self.cin_hidden_units)):
42 |             hadamard_tensor = torch.einsum("bhd,bmd->bhmd", X_0, X_i)
43 |             hadamard_tensor = hadamard_tensor.view(batch_size, -1, embedding_dim)
44 |             X_i = self.cin_layer["layer_" + str(i + 1)](hadamard_tensor) \
45 |                       .view(batch_size, -1, embedding_dim)
46 |             pooling_outputs.append(X_i.sum(dim=-1))
47 |         output = self.fc(torch.cat(pooling_outputs, dim=-1))
48 |         return output
49 |         
50 | 
51 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/interactions/cross_net.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # Copyright (C) 2021 The DeepCTR-Torch authors for CrossNetMix
  4 | # 
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | # =========================================================================
 17 | 
 18 | 
 19 | import torch
 20 | from torch import nn
 21 | 
 22 | 
 23 | class CrossInteraction(nn.Module):
 24 |     def __init__(self, input_dim):
 25 |         super(CrossInteraction, self).__init__()
 26 |         self.weight = nn.Linear(input_dim, 1, bias=False)
 27 |         self.bias = nn.Parameter(torch.zeros(input_dim))
 28 | 
 29 |     def forward(self, X_0, X_i):
 30 |         interact_out = self.weight(X_i) * X_0 + self.bias
 31 |         return interact_out
 32 | 
 33 | 
 34 | class CrossNet(nn.Module):
 35 |     def __init__(self, input_dim, num_layers):
 36 |         super(CrossNet, self).__init__()
 37 |         self.num_layers = num_layers
 38 |         self.cross_net = nn.ModuleList(CrossInteraction(input_dim)
 39 |                                        for _ in range(self.num_layers))
 40 | 
 41 |     def forward(self, X_0):
 42 |         X_i = X_0 # b x dim
 43 |         for i in range(self.num_layers):
 44 |             X_i = X_i + self.cross_net[i](X_0, X_i)
 45 |         return X_i
 46 | 
 47 | 
 48 | class CrossNetV2(nn.Module):
 49 |     def __init__(self, input_dim, num_layers):
 50 |         super(CrossNetV2, self).__init__()
 51 |         self.num_layers = num_layers
 52 |         self.cross_layers = nn.ModuleList(nn.Linear(input_dim, input_dim)
 53 |                                           for _ in range(self.num_layers))
 54 | 
 55 |     def forward(self, X_0):
 56 |         X_i = X_0 # b x dim
 57 |         for i in range(self.num_layers):
 58 |             X_i = X_i + X_0 * self.cross_layers[i](X_i)
 59 |         return X_i
 60 | 
 61 | 
 62 | class CrossNetMix(nn.Module):
 63 |     """ CrossNetMix improves CrossNetV2 by:
 64 |         1. add MOE to learn feature interactions in different subspaces
 65 |         2. add nonlinear transformations in low-dimensional space
 66 |     """
 67 |     def __init__(self, in_features, layer_num=2, low_rank=32, num_experts=4):
 68 |         super(CrossNetMix, self).__init__()
 69 |         self.layer_num = layer_num
 70 |         self.num_experts = num_experts
 71 | 
 72 |         # U: (in_features, low_rank)
 73 |         self.U_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
 74 |             torch.empty(num_experts, in_features, low_rank))) for i in range(self.layer_num)])
 75 |         # V: (in_features, low_rank)
 76 |         self.V_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
 77 |             torch.empty(num_experts, in_features, low_rank))) for i in range(self.layer_num)])
 78 |         # C: (low_rank, low_rank)
 79 |         self.C_list = torch.nn.ParameterList([nn.Parameter(nn.init.xavier_normal_(
 80 |             torch.empty(num_experts, low_rank, low_rank))) for i in range(self.layer_num)])
 81 |         self.gating = nn.ModuleList([nn.Linear(in_features, 1, bias=False) for i in range(self.num_experts)])
 82 | 
 83 |         self.bias = torch.nn.ParameterList([nn.Parameter(nn.init.zeros_(
 84 |             torch.empty(in_features, 1))) for i in range(self.layer_num)])
 85 |         # self.to(device)
 86 | 
 87 |     def forward(self, inputs):
 88 |         x_0 = inputs.unsqueeze(2)  # (bs, in_features, 1)
 89 |         x_l = x_0
 90 |         for i in range(self.layer_num):
 91 |             output_of_experts = []
 92 |             gating_score_of_experts = []
 93 |             for expert_id in range(self.num_experts):
 94 |                 # (1) G(x_l)
 95 |                 # compute the gating score by x_l
 96 |                 gating_score_of_experts.append(self.gating[expert_id](x_l.squeeze(2)))
 97 | 
 98 |                 # (2) E(x_l)
 99 |                 # project the input x_l to $\mathbb{R}^{r}$
100 |                 v_x = torch.matmul(self.V_list[i][expert_id].t(), x_l)  # (bs, low_rank, 1)
101 | 
102 |                 # nonlinear activation in low rank space
103 |                 v_x = torch.tanh(v_x)
104 |                 v_x = torch.matmul(self.C_list[i][expert_id], v_x)
105 |                 v_x = torch.tanh(v_x)
106 | 
107 |                 # project back to $\mathbb{R}^{d}$
108 |                 uv_x = torch.matmul(self.U_list[i][expert_id], v_x)  # (bs, in_features, 1)
109 | 
110 |                 dot_ = uv_x + self.bias[i]
111 |                 dot_ = x_0 * dot_  # Hadamard-product
112 | 
113 |                 output_of_experts.append(dot_.squeeze(2))
114 | 
115 |             # (3) mixture of low-rank experts
116 |             output_of_experts = torch.stack(output_of_experts, 2)  # (bs, in_features, num_experts)
117 |             gating_score_of_experts = torch.stack(gating_score_of_experts, 1)  # (bs, num_experts, 1)
118 |             moe_out = torch.matmul(output_of_experts, gating_score_of_experts.softmax(1))
119 |             x_l = moe_out + x_l  # (bs, in_features, 1)
120 | 
121 |         x_l = x_l.squeeze()  # (bs, in_features)
122 |         return x_l
123 | 
124 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/interactions/holographic_interaction.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | from itertools import combinations
21 | 
22 | 
23 | class HolographicInteraction(nn.Module):
24 |     def __init__(self, num_fields, interaction_type="circular_convolution"):
25 |         super(HolographicInteraction, self).__init__()
26 |         self.interaction_type = interaction_type
27 |         if self.interaction_type == "circular_correlation":
28 |             self.conj_sign =  nn.Parameter(torch.tensor([1., -1.]), requires_grad=False)
29 |         self.triu_index = nn.Parameter(torch.triu_indices(num_fields, num_fields, offset=1), requires_grad=False)
30 | 
31 |     def forward(self, feature_emb):
32 |         emb1 =  torch.index_select(feature_emb, 1, self.triu_index[0])
33 |         emb2 = torch.index_select(feature_emb, 1, self.triu_index[1])
34 |         if self.interaction_type == "hadamard_product":
35 |             interact_tensor = emb1 * emb2
36 |         elif self.interaction_type == "circular_convolution":
37 |             fft1 = torch.view_as_real(torch.fft.fft(emb1))
38 |             fft2 = torch.view_as_real(torch.fft.fft(emb2))
39 |             fft_product = torch.stack([fft1[..., 0] * fft2[..., 0] - fft1[..., 1] * fft2[..., 1], 
40 |                                        fft1[..., 0] * fft2[..., 1] + fft1[..., 1] * fft2[..., 0]], 
41 |                                        dim=-1)
42 |             interact_tensor = torch.view_as_real(torch.fft.ifft(torch.view_as_complex(fft_product)))[..., 0]
43 |         elif self.interaction_type == "circular_correlation":
44 |             fft1_emb = torch.view_as_real(torch.fft.fft(emb1))
45 |             fft1 = fft1_emb * self.conj_sign.expand_as(fft1_emb)
46 |             fft2 = torch.view_as_real(torch.fft.fft(emb2))
47 |             fft_product = torch.stack([fft1[..., 0] * fft2[..., 0] - fft1[..., 1] * fft2[..., 1], 
48 |                                        fft1[..., 0] * fft2[..., 1] + fft1[..., 1] * fft2[..., 0]], 
49 |                                        dim=-1)
50 |             interact_tensor = torch.view_as_real(torch.fft.ifft(torch.view_as_complex(fft_product)))[..., 0]
51 |         else:
52 |             raise ValueError("interaction_type={} not supported.".format(self.interaction_type))
53 |         return interact_tensor
54 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/interactions/inner_product.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | 
21 | 
22 | class InnerProductInteraction(nn.Module):
23 |     """ output: product_sum (bs x 1), 
24 |                 bi_interaction (bs * dim), 
25 |                 inner_product (bs x f^2/2), 
26 |                 elementwise_product (bs x f^2/2 x emb_dim)
27 |     """
28 |     def __init__(self, num_fields, output="product_sum"):
29 |         super(InnerProductInteraction, self).__init__()
30 |         self._output_type = output
31 |         if output not in ["product_sum", "bi_interaction", "inner_product", "elementwise_product"]:
32 |             raise ValueError("InnerProductInteraction output={} is not supported.".format(output))
33 |         if output == "inner_product":
34 |             self.interaction_units = int(num_fields * (num_fields - 1) / 2)
35 |             self.triu_mask = nn.Parameter(torch.triu(torch.ones(num_fields, num_fields), 1).bool(),
36 |                                           requires_grad=False)
37 |         elif output == "elementwise_product":
38 |             self.triu_index = nn.Parameter(torch.triu_indices(num_fields, num_fields, offset=1), requires_grad=False)
39 | 
40 |     def forward(self, feature_emb):
41 |         if self._output_type in ["product_sum", "bi_interaction"]:
42 |             sum_of_square = torch.sum(feature_emb, dim=1) ** 2  # sum then square
43 |             square_of_sum = torch.sum(feature_emb ** 2, dim=1) # square then sum
44 |             bi_interaction = (sum_of_square - square_of_sum) * 0.5
45 |             if self._output_type == "bi_interaction":
46 |                 return bi_interaction
47 |             else:
48 |                 return bi_interaction.sum(dim=-1, keepdim=True)
49 |         elif self._output_type == "inner_product":
50 |             inner_product_matrix = torch.bmm(feature_emb, feature_emb.transpose(1, 2))
51 |             triu_values = torch.masked_select(inner_product_matrix, self.triu_mask)
52 |             return triu_values.view(-1, self.interaction_units)
53 |         elif self._output_type == "elementwise_product":
54 |             emb1 = torch.index_select(feature_emb, 1, self.triu_index[0])
55 |             emb2 = torch.index_select(feature_emb, 1, self.triu_index[1])
56 |             return emb1 * emb2
57 | 
58 | 


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/interactions/interaction_machine.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import torch
19 | from torch import nn
20 | 
21 | 
22 | class InteractionMachine(nn.Module):
23 |     def __init__(self, embedding_dim, order=2, batch_norm=False):
24 |         super(InteractionMachine, self).__init__()
25 |         assert order < 6, "order={} is not supported.".format(order)
26 |         self.order = order
27 |         self.bn = nn.BatchNorm1d(embedding_dim * order) if batch_norm else None
28 |         self.fc = nn.Linear(order * embedding_dim, 1)
29 |         
30 |     def second_order(self, p1, p2):
31 |         return (p1.pow(2) - p2) / 2
32 | 
33 |     def third_order(self, p1, p2, p3):
34 |         return (p1.pow(3) - 3 * p1 * p2 + 2 * p3) / 6
35 | 
36 |     def fourth_order(self, p1, p2, p3, p4):
37 |         return (p1.pow(4) - 6 * p1.pow(2) * p2 + 3 * p2.pow(2)
38 |                 + 8 * p1 * p3 - 6 * p4) / 24
39 | 
40 |     def fifth_order(self, p1, p2, p3, p4, p5):
41 |         return (p1.pow(5) - 10 * p1.pow(3) * p2 + 20 * p1.pow(2) * p3 - 30 * p1 * p4
42 |                 - 20 * p2 * p3 + 15 * p1 * p2.pow(2) + 24 * p5) / 120
43 | 
44 |     def forward(self, X):
45 |         out = []
46 |         Q = X
47 |         if self.order >= 1:
48 |             p1 = Q.sum(dim=1)
49 |             out.append(p1)
50 |             if self.order >= 2:
51 |                 Q = Q * X
52 |                 p2 = Q.sum(dim=1)
53 |                 out.append(self.second_order(p1, p2))
54 |                 if self.order >= 3:
55 |                     Q = Q * X
56 |                     p3 = Q.sum(dim=1)
57 |                     out.append(self.third_order(p1, p2, p3))
58 |                     if self.order >= 4:
59 |                         Q = Q * X
60 |                         p4 = Q.sum(dim=1)
61 |                         out.append(self.fourth_order(p1, p2, p3, p4))
62 |                         if self.order == 5:
63 |                             Q = Q * X
64 |                             p5 = Q.sum(dim=1)
65 |                             out.append(self.fifth_order(p1, p2, p3, p4, p5))
66 |         out = torch.cat(out, dim=-1)
67 |         if self.bn is not None:
68 |             out = self.bn(out)
69 |         y = self.fc(out)
70 |         return y


--------------------------------------------------------------------------------
/fuxictr/pytorch/layers/pooling.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | from torch import nn
19 | import torch
20 | 
21 | 
22 | class MaskedAveragePooling(nn.Module):
23 |     def __init__(self):
24 |         super(MaskedAveragePooling, self).__init__()
25 | 
26 |     def forward(self, embedding_matrix, mask=None):
27 |         sum_out = torch.sum(embedding_matrix, dim=1)
28 |         if mask is None:
29 |             mask = embedding_matrix.sum(dim=-1) != 0 # zeros at padding tokens
30 |         avg_out = sum_out / (mask.float().sum(-1, keepdim=True) + 1e-12)
31 |         return avg_out
32 | 
33 | 
34 | class MaskedSumPooling(nn.Module):
35 |     def __init__(self):
36 |         super(MaskedSumPooling, self).__init__()
37 | 
38 |     def forward(self, embedding_matrix):
39 |         # mask by zeros
40 |         return torch.sum(embedding_matrix, dim=1)
41 | 
42 | 
43 | class KMaxPooling(nn.Module):
44 |     def __init__(self, k, dim):
45 |         super(KMaxPooling, self).__init__()
46 |         self.k = k
47 |         self.dim = dim
48 | 
49 |     def forward(self, X):
50 |         index = X.topk(self.k, dim=self.dim)[1].sort(dim=self.dim)[0]
51 |         output = X.gather(self.dim, index)
52 |         return output


--------------------------------------------------------------------------------
/fuxictr/pytorch/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .rank_model import BaseModel
2 | from .multitask_model import MultiTaskModel


--------------------------------------------------------------------------------
/fuxictr/pytorch/models/multitask_model.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. FuxiCTR Authors. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | import torch
 18 | from torch import nn
 19 | import numpy as np
 20 | import torch
 21 | import os, sys
 22 | import logging
 23 | from fuxictr.pytorch.models import BaseModel
 24 | from fuxictr.pytorch.torch_utils import get_device, get_optimizer, get_loss
 25 | from tqdm import tqdm
 26 | from collections import defaultdict
 27 | 
 28 | 
 29 | class MultiTaskModel(BaseModel):
 30 |     def __init__(self,
 31 |                  feature_map,
 32 |                  model_id="MultiTaskModel",
 33 |                  task=["binary_classification"],
 34 |                  num_tasks=1,
 35 |                  loss_weight='EQ',
 36 |                  gpu=-1,
 37 |                  monitor="AUC",
 38 |                  save_best_only=True,
 39 |                  monitor_mode="max",
 40 |                  early_stop_patience=2,
 41 |                  eval_steps=None,
 42 |                  embedding_regularizer=None,
 43 |                  net_regularizer=None,
 44 |                  reduce_lr_on_plateau=True,
 45 |                  **kwargs):
 46 |         super(MultiTaskModel, self).__init__(feature_map=feature_map,
 47 |                                            model_id=model_id,
 48 |                                            task="binary_classification",
 49 |                                            gpu=gpu,
 50 |                                            loss_weight=loss_weight,
 51 |                                            monitor=monitor,
 52 |                                            save_best_only=save_best_only,
 53 |                                            monitor_mode=monitor_mode,
 54 |                                            early_stop_patience=early_stop_patience,
 55 |                                            eval_steps=eval_steps,
 56 |                                            embedding_regularizer=embedding_regularizer,
 57 |                                            net_regularizer=net_regularizer,
 58 |                                            reduce_lr_on_plateau=reduce_lr_on_plateau,
 59 |                                            **kwargs)
 60 |         self.device = get_device(gpu)
 61 |         self.num_tasks = num_tasks
 62 |         self.loss_weight = loss_weight
 63 |         if isinstance(task, list):
 64 |             assert len(task) == num_tasks, "the number of tasks must equal the length of \"task\""
 65 |             self.output_activation = nn.ModuleList([self.get_output_activation(str(t)) for t in task])
 66 |         else:
 67 |             self.output_activation = nn.ModuleList([self.get_output_activation(task) for _ in range(num_tasks)])
 68 | 
 69 |     def compile(self, optimizer, loss, lr):
 70 |         self.optimizer = get_optimizer(optimizer, self.parameters(), lr)
 71 |         if isinstance(loss, list):
 72 |             self.loss_fn = [get_loss(l) for l in loss]
 73 |         else:
 74 |             self.loss_fn = [get_loss(loss) for _ in range(self.num_tasks)]
 75 | 
 76 |     def get_labels(self, inputs):
 77 |         labels = self.feature_map.labels
 78 |         y = [inputs[:, self.feature_map.get_column_index(labels[i])].to(self.device).float().view(-1, 1)
 79 |              for i in range(len(labels))]
 80 |         return y
 81 | 
 82 |     def compute_loss(self, return_dict, y_true):
 83 |         labels = self.feature_map.labels
 84 |         loss = [self.loss_fn[i](return_dict["{}_pred".format(labels[i])], y_true[i], reduction='mean')
 85 |                 for i in range(len(labels))]
 86 |         if self.loss_weight == 'EQ':
 87 |             # Default: All losses are weighted equally
 88 |             loss = torch.sum(torch.stack(loss))
 89 |         loss += self.regularization_loss()
 90 |         return loss
 91 |     
 92 |     def evaluate(self, data_generator, metrics=None):
 93 |         self.eval()  # set to evaluation mode
 94 |         with torch.no_grad():
 95 |             y_pred_all = defaultdict(list)
 96 |             y_true_all = defaultdict(list)
 97 |             labels = self.feature_map.labels
 98 |             group_id = []
 99 |             if self._verbose > 0:
100 |                 data_generator = tqdm(data_generator, disable=False, file=sys.stdout)
101 |             for batch_data in data_generator:
102 |                 return_dict = self.forward(batch_data)
103 |                 batch_y_true = self.get_labels(batch_data)
104 |                 for i in range(len(labels)):
105 |                     y_pred_all[labels[i]].extend(
106 |                         return_dict["{}_pred".format(labels[i])].data.cpu().numpy().reshape(-1))
107 |                     y_true_all[labels[i]].extend(batch_y_true[i].data.cpu().numpy().reshape(-1))
108 |                 if self.feature_map.group_id is not None:
109 |                     group_id.extend(self.get_group_id(batch_data).numpy().reshape(-1))
110 |             all_val_logs = {}
111 |             mean_val_logs = defaultdict(list)
112 |             group_id = np.array(group_id) if len(group_id) > 0 else None
113 | 
114 |             for i in range(len(labels)):
115 |                 y_pred = np.array(y_pred_all[labels[i]], np.float64)
116 |                 y_true = np.array(y_true_all[labels[i]], np.float64)
117 |                 if metrics is not None:
118 |                     val_logs = self.evaluate_metrics(y_true, y_pred, metrics, group_id)
119 |                 else:
120 |                     val_logs = self.evaluate_metrics(y_true, y_pred, self.validation_metrics, group_id)
121 |                 logging.info('[Metrics] [Task: {}] '.format(labels[i]) + ' - '.join(
122 |                     '{}: {:.6f}'.format(k, v) for k, v in val_logs.items()))
123 |                 for k, v in val_logs.items():
124 |                     all_val_logs['{}_{}'.format(labels[i], k)] = v
125 |                     mean_val_logs[k].append(v)
126 |             for k, v in mean_val_logs.items():
127 |                 mean_val_logs[k] = np.mean(v)
128 |             all_val_logs.update(mean_val_logs)
129 |             return all_val_logs
130 | 
131 |     def predict(self, data_generator):
132 |         self.eval()  # set to evaluation mode
133 |         with torch.no_grad():
134 |             y_pred_all = defaultdict(list)
135 |             labels = self.feature_map.labels
136 |             if self._verbose > 0:
137 |                 data_generator = tqdm(data_generator, disable=False, file=sys.stdout)
138 |             for batch_data in data_generator:
139 |                 return_dict = self.forward(batch_data)
140 |                 for i in range(len(labels)):
141 |                     y_pred_all[labels[i]].extend(
142 |                         return_dict["{}_pred".format(labels[i])].data.cpu().numpy().reshape(-1))
143 |         return y_pred_all


--------------------------------------------------------------------------------
/fuxictr/pytorch/torch_utils.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | 
 18 | import sys
 19 | import os
 20 | import numpy as np
 21 | import torch
 22 | from torch import nn
 23 | import random
 24 | from functools import partial
 25 | import h5py
 26 | import re
 27 | 
 28 | 
 29 | def seed_everything(seed=1029):
 30 |     random.seed(seed)
 31 |     os.environ["PYTHONHASHSEED"] = str(seed)
 32 |     np.random.seed(seed)
 33 |     torch.manual_seed(seed)
 34 |     torch.cuda.manual_seed(seed)
 35 |     torch.backends.cudnn.deterministic = True
 36 | 
 37 | def get_device(gpu=-1):
 38 |     if gpu >= 0 and torch.cuda.is_available():
 39 |         device = torch.device("cuda:" + str(gpu))
 40 |     else:
 41 |         device = torch.device("cpu")   
 42 |     return device
 43 | 
 44 | def get_optimizer(optimizer, params, lr):
 45 |     if isinstance(optimizer, str):
 46 |         if optimizer.lower() == "adam":
 47 |             optimizer = "Adam"
 48 |     try:
 49 |         optimizer = getattr(torch.optim, optimizer)(params, lr=lr)
 50 |     except:
 51 |         raise NotImplementedError("optimizer={} is not supported.".format(optimizer))
 52 |     return optimizer
 53 | 
 54 | def get_loss(loss):
 55 |     if isinstance(loss, str):
 56 |         if loss in ["bce", "binary_crossentropy", "binary_cross_entropy"]:
 57 |             loss = "binary_cross_entropy"
 58 |     try:
 59 |         loss_fn = getattr(torch.functional.F, loss)
 60 |     except:
 61 |         try: 
 62 |             loss_fn = eval("losses." + loss)
 63 |         except:
 64 |             raise NotImplementedError("loss={} is not supported.".format(loss))       
 65 |     return loss_fn
 66 | 
 67 | def get_regularizer(reg):
 68 |     reg_pair = [] # of tuples (p_norm, weight)
 69 |     if isinstance(reg, float):
 70 |         reg_pair.append((2, reg))
 71 |     elif isinstance(reg, str):
 72 |         try:
 73 |             if reg.startswith("l1(") or reg.startswith("l2("):
 74 |                 reg_pair.append((int(reg[1]), float(reg.rstrip(")").split("(")[-1])))
 75 |             elif reg.startswith("l1_l2"):
 76 |                 l1_reg, l2_reg = reg.rstrip(")").split("(")[-1].split(",")
 77 |                 reg_pair.append((1, float(l1_reg)))
 78 |                 reg_pair.append((2, float(l2_reg)))
 79 |             else:
 80 |                 raise NotImplementedError
 81 |         except:
 82 |             raise NotImplementedError("regularizer={} is not supported.".format(reg))
 83 |     return reg_pair
 84 | 
 85 | def get_activation(activation, hidden_units=None):
 86 |     if isinstance(activation, str):
 87 |         if activation.lower() in ["prelu", "dice"]:
 88 |             assert type(hidden_units) == int
 89 |         if activation.lower() == "relu":
 90 |             return nn.ReLU()
 91 |         elif activation.lower() == "sigmoid":
 92 |             return nn.Sigmoid()
 93 |         elif activation.lower() == "tanh":
 94 |             return nn.Tanh()
 95 |         elif activation.lower() == "softmax":
 96 |             return nn.Softmax(dim=-1)
 97 |         elif activation.lower() == "prelu":
 98 |             return nn.PReLU(hidden_units, init=0.1)
 99 |         elif activation.lower() == "dice":
100 |             from fuxictr.pytorch.layers.activations import Dice
101 |             return Dice(hidden_units)
102 |         else:
103 |             return getattr(nn, activation)()
104 |     elif isinstance(activation, list):
105 |         if hidden_units is not None:
106 |             assert len(activation) == len(hidden_units)
107 |             return [get_activation(act, units) for act, units in zip(activation, hidden_units)]
108 |         else:
109 |             return [get_activation(act) for act in activation]
110 |     return activation
111 | 
112 | def get_initializer(initializer):
113 |     if isinstance(initializer, str):
114 |         try:
115 |             initializer = eval(initializer)
116 |         except:
117 |             raise ValueError("initializer={} is not supported."\
118 |                              .format(initializer))
119 |     return initializer
120 | 
121 | def save_init_embs(model, data_path="init_embs.h5"):
122 |     emb_dict = dict()
123 |     for k, v in model.state_dict().items():
124 |         if "embedding_layers" in k:
125 |             if v.size(-1) > 1:
126 |                 f_name = re.findall(r"embedding_layers.(.*).weight", k)[0]
127 |                 emb_dict[f_name] = v.cpu().numpy()
128 |     with h5py.File(data_path, 'w') as hf:
129 |         for key, arr in emb_dict.items():
130 |             hf.create_dataset(key, data=arr)
131 | 
132 | def load_init_embs(model, data_path="init_embs.h5"):
133 |     state_dict = model.state_dict()
134 |     f_name_dict = dict()
135 |     for k in state_dict.keys():
136 |         if "embedding_layers" in k and state_dict[k].size(-1) > 1:
137 |             f_name = re.findall(r"embedding_layers.(.*).weight", k)[0]
138 |             f_name_dict[f_name] = k
139 |     with h5py.File(data_path, 'r') as hf:
140 |         for key in hf.keys():
141 |             if key in f_name_dict:
142 |                 state_dict[f_name_dict[key]] = torch.from_numpy(hf[key][:])
143 |     model.load_state_dict(state_dict)


--------------------------------------------------------------------------------
/fuxictr/tensorflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LiangcaiSu/STEM/769e2af0d0d1a0be9f58b475b95e05f502a20df5/fuxictr/tensorflow/__init__.py


--------------------------------------------------------------------------------
/fuxictr/tensorflow/dataloaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .tf_dataloader import TFRecordDataLoader


--------------------------------------------------------------------------------
/fuxictr/tensorflow/dataloaders/tf_dataloader.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | import os
18 | import tensorflow as  tf
19 | import logging
20 | 
21 | 
22 | class TFRecordDataLoader(object):
23 |     def __init__(self, feature_map, stage="both", train_data=None, valid_data=None, test_data=None,
24 |                  batch_size=32, shuffle=True, drop_remainder=False, **kwargs):
25 |         logging.info("Loading data...")
26 |         self.stage = stage
27 |         self.train_data = train_data
28 |         self.valid_data = valid_data
29 |         self.test_data = test_data
30 |         self.batch_size = batch_size
31 |         self.shuffle = shuffle
32 |         self.drop_remainder = drop_remainder
33 |         self.schema = dict()
34 |         for feat, feat_spec in feature_map.features.items():
35 |             if feat_spec["type"] == "numeric":
36 |                 self.schema[feat] = tf.io.FixedLenFeature(dtype=tf.float32, shape=1)
37 |             elif feat_spec["type"] in ["categorical", "meta"]:
38 |                 self.schema[feat] = tf.io.FixedLenFeature(dtype=tf.int64, shape=1)
39 |             elif feat_spec["type"] == "sequence":
40 |                 self.schema[feat] = tf.io.FixedLenFeature(dtype=tf.int64, shape=feat_spec["max_len"])
41 |         for label in feature_map.labels:
42 |             self.schema[label] = tf.io.FixedLenFeature(dtype=tf.float32, shape=1)
43 | 
44 |     def input_fn(self, filenames, batch_size=32, shuffle=True):
45 |         def parse_example(example):
46 |             example_dict = tf.io.parse_single_example(example, features=self.schema)
47 |             return example_dict
48 |         dataset = tf.data.TFRecordDataset(filenames).map(parse_example, num_parallel_calls=1)
49 |         dataset = dataset.prefetch(buffer_size=1).batch(batch_size, drop_remainder=self.drop_remainder)
50 |         if shuffle:
51 |             dataset = dataset.shuffle(batch_size * 10)
52 |         return dataset
53 | 
54 |     def make_iterator(self):
55 |         if self.stage == "train":
56 |             logging.info("Loading train and validation data done.")
57 |             return self.input_fn(self.train_data, batch_size=self.batch_size, shuffle=self.shuffle), \
58 |                    self.input_fn(self.valid_data, batch_size=self.batch_size, shuffle=False)
59 |         elif self.stage == "test":
60 |             logging.info("Loading test data done.")
61 |             return self.input_fn(self.test_data, batch_size=self.batch_size, shuffle=False)
62 |         else:
63 |             logging.info("Loading data done.")
64 |             return self.input_fn(self.train_data, batch_size=self.batch_size, shuffle=self.shuffle), \
65 |                    self.input_fn(self.valid_data, batch_size=self.batch_size, shuffle=False), \
66 |                    self.input_fn(self.test_data, batch_size=self.batch_size, shuffle=False)
67 | 
68 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .pooling import *
2 | from .embeddings import *
3 | from .blocks import *
4 | from .interactions import *
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/blocks/__init__.py:
--------------------------------------------------------------------------------
1 | from .logistic_regression import *
2 | from .factorization_machine import *
3 | from .linear import *
4 | from .mlp_block import *
5 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/blocks/factorization_machine.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import tensorflow as tf
19 | from tensorflow.keras.layers import Layer
20 | from .logistic_regression import LogisticRegression
21 | from ..interactions import InnerProductInteraction
22 | 
23 | 
24 | class FactorizationMachine(Layer):
25 |     def __init__(self, feature_map, regularizer=None):
26 |         super(FactorizationMachine, self).__init__()
27 |         self.fm_layer = InnerProductInteraction(feature_map.num_fields, output="product_sum")
28 |         self.lr_layer = LogisticRegression(feature_map, use_bias=True, regularizer=regularizer)
29 | 
30 |     def call(self, X, feature_emb):
31 |         lr_out = self.lr_layer(X)
32 |         fm_out = self.fm_layer(feature_emb)
33 |         output = fm_out + lr_out
34 |         return output
35 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/blocks/linear.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | from fuxictr.tensorflow.tf_utils import get_initializer, get_regularizer
19 | from tensorflow.keras.layers import Layer, Dense
20 | 
21 | 
22 | class Linear(Layer):
23 |     def __init__(self, 
24 |                  output_dim, 
25 |                  use_bias=True,
26 |                  initializer="glorot_normal",
27 |                  regularizer=None):
28 |         super(Linear, self).__init__()
29 |         self.linear = Dense(output_dim, use_bias=use_bias,
30 |                             kernel_initializer=get_initializer(initializer),
31 |                             kernel_regularizer=get_regularizer(regularizer),
32 |                             bias_regularizer=get_regularizer(regularizer))
33 |     
34 |     def call(self, inputs):
35 |         return self.linear(inputs)
36 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/blocks/logistic_regression.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import tensorflow as tf
19 | from tensorflow.keras.layers import Layer
20 | from fuxictr.tensorflow.layers import FeatureEmbedding
21 | 
22 | 
23 | class LogisticRegression(Layer):
24 |     def __init__(self, feature_map, use_bias=True, regularizer=None):
25 |         super(LogisticRegression, self).__init__()
26 |         self.bias = tf.Variable(tf.zeros(1)) if use_bias else None
27 |         self.embedding_layer = FeatureEmbedding(feature_map, 1, use_pretrain=False, 
28 |                                                 use_sharing=False,
29 |                                                 embedding_regularizer=regularizer,
30 |                                                 name_prefix="lr_")
31 | 
32 |     def call(self, X):
33 |         embed_weights = self.embedding_layer(X)
34 |         output = tf.reduce_sum(embed_weights, axis=1)
35 |         if self.bias is not None:
36 |             output += self.bias
37 |         return output
38 | 
39 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/blocks/mlp_block.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import tensorflow as tf
19 | from fuxictr.tensorflow.tf_utils import get_activation, get_initializer, get_regularizer
20 | from tensorflow.keras.layers import Layer, Dense, BatchNormalization, LayerNormalization, Dropout
21 | 
22 | 
23 | class MLP_Block(Layer):
24 |     def __init__(self, 
25 |                  input_dim, 
26 |                  hidden_units=[], 
27 |                  hidden_activations="ReLU",
28 |                  output_dim=None,
29 |                  output_activation=None, 
30 |                  dropout_rates=0.0,
31 |                  batch_norm=False, 
32 |                  layer_norm=False,
33 |                  norm_before_activation=True,
34 |                  use_bias=True,
35 |                  initializer="glorot_normal",
36 |                  regularizer=None):
37 |         super(MLP_Block, self).__init__()
38 |         self.mlp = tf.keras.Sequential()
39 |         if not isinstance(dropout_rates, list):
40 |             dropout_rates = [dropout_rates] * len(hidden_units)
41 |         if not isinstance(hidden_activations, list):
42 |             hidden_activations = [hidden_activations] * len(hidden_units)
43 |         hidden_activations = [get_activation(x) for x in hidden_activations]
44 |         hidden_units = [input_dim] + hidden_units
45 |         for idx in range(len(hidden_units) - 1):
46 |             self.mlp.add(Dense(hidden_units[idx + 1], use_bias=use_bias, 
47 |                                kernel_initializer=get_initializer(initializer), 
48 |                                kernel_regularizer=get_regularizer(regularizer),
49 |                                bias_regularizer=get_regularizer(regularizer)))
50 |             if norm_before_activation:
51 |                 if batch_norm:
52 |                     self.mlp.add(BatchNormalization(hidden_units[idx + 1]))
53 |                 elif layer_norm:
54 |                     self.mlp.add(LayerNormalization(hidden_units[idx + 1]))
55 |             if hidden_activations[idx]:
56 |                 self.mlp.add(hidden_activations[idx])
57 |             if not norm_before_activation:
58 |                 if batch_norm:
59 |                     self.mlp.add(BatchNormalization(hidden_units[idx + 1]))
60 |                 elif layer_norm:
61 |                     self.mlp.add(LayerNormalization(hidden_units[idx + 1]))
62 |             if dropout_rates[idx] > 0:
63 |                 self.mlp.add(Dropout(p=dropout_rates[idx]))
64 |         if output_dim is not None:
65 |             self.mlp.add(Dense(output_dim, use_bias=use_bias, 
66 |                                kernel_initializer=get_initializer(initializer), 
67 |                                kernel_regularizer=get_regularizer(regularizer),
68 |                                bias_regularizer=get_regularizer(regularizer)))
69 |         if output_activation is not None:
70 |             self.mlp.add(get_activation(output_activation))
71 |     
72 |     def call(self, inputs, training=None):
73 |         return self.mlp(inputs, training=training)
74 | 
75 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/embeddings/__init__.py:
--------------------------------------------------------------------------------
1 | from .feature_embedding import *
2 | 
3 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/interactions/__init__.py:
--------------------------------------------------------------------------------
1 | from .inner_product import *
2 | from .cross_net import *
3 | 
4 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/interactions/cross_net.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import tensorflow as tf
19 | from tensorflow.keras.layers import Layer, Dense
20 | 
21 | 
22 | class CrossInteraction(Layer):
23 |     def __init__(self, input_dim):
24 |         super(CrossInteraction, self).__init__()
25 |         self.weight = Dense(1, use_bias=False)
26 |         self.bias = tf.Variable(tf.zeros(input_dim))
27 | 
28 |     def call(self, X_0, X_i):
29 |         interact_out = self.weight(X_i) * X_0 + self.bias
30 |         return interact_out
31 | 
32 | 
33 | class CrossNet(Layer):
34 |     def __init__(self, input_dim, num_layers):
35 |         super(CrossNet, self).__init__()
36 |         self.num_layers = num_layers
37 |         self.cross_net = []
38 |         for _ in range(self.num_layers):
39 |             self.cross_net.append(CrossInteraction(input_dim))
40 | 
41 |     def call(self, X_0):
42 |         X_i = X_0 # b x dim
43 |         for i in range(self.num_layers):
44 |             X_i = X_i + self.cross_net[i](X_0, X_i)
45 |         return X_i
46 | 
47 | 
48 | class CrossNetV2(Layer):
49 |     def __init__(self, input_dim, num_layers):
50 |         super(CrossNetV2, self).__init__()
51 |         self.num_layers = num_layers
52 |         self.cross_layers = []
53 |         for _ in range(self.num_layers):
54 |             self.cross_layers.append(Dense(input_dim))         
55 | 
56 |     def call(self, X_0):
57 |         X_i = X_0 # b x dim
58 |         for i in range(self.num_layers):
59 |             X_i = X_i + X_0 * self.cross_layers[i](X_i)
60 |         return X_i


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/interactions/inner_product.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | import tensorflow as tf
19 | import numpy as np
20 | from tensorflow.keras.layers import Layer
21 | 
22 | 
23 | class InnerProductInteraction(Layer):
24 |     """ output: product_sum (bs x 1), 
25 |                 bi_interaction (bs * dim), 
26 |                 inner_product (bs x f^2/2), 
27 |                 elementwise_product (bs x f^2/2 x emb_dim)
28 |     """
29 |     def __init__(self, num_fields, output="product_sum"):
30 |         super(InnerProductInteraction, self).__init__()
31 |         self.output_type = output
32 |         if output not in ["product_sum", "bi_interaction", "inner_product", "elementwise_product"]:
33 |             raise ValueError("InnerProductInteraction output={} is not supported.".format(output))
34 |         if output == "inner_product":
35 |             self.interaction_units = int(num_fields * (num_fields - 1) / 2)
36 |             self.triu_mask = tf.Variable(np.triu(np.ones((num_fields, num_fields)), 1).astype(bool),
37 |                                          trainable=False)
38 |         elif output == "elementwise_product":
39 |             self.triu_index = tf.Variable(np.triu_indices(num_fields, 1), trainable=False)
40 | 
41 |     def call(self, feature_emb):
42 |         if self.output_type in ["product_sum", "bi_interaction"]:
43 |             sum_of_square = tf.reduce_sum(feature_emb, axis=1) ** 2  # sum then square
44 |             square_of_sum = tf.reduce_sum(feature_emb ** 2, axis=1) # square then sum
45 |             bi_interaction = (sum_of_square - square_of_sum) * 0.5
46 |             if self.output_type == "bi_interaction":
47 |                 return bi_interaction
48 |             else:
49 |                 return tf.reduce_sum(bi_interaction, axis=-1, keepdims=True)
50 |         elif self.output_type == "inner_product":
51 |             inner_product_matrix = tf.einsum('bij,bji->bii', feature_emb, feature_emb.transpose(1, 2))
52 |             triu_values = tf.boolean_mask(inner_product_matrix, self.triu_mask)
53 |             return tf.reshape(triu_values, (-1, self.interaction_units))
54 |         elif self.output_type == "elementwise_product":
55 |             emb1 = tf.gather(feature_emb, self.triu_index[0], axis=1)
56 |             emb2 = tf.gather(feature_emb, self.triu_index[1], axis=1)
57 |             return emb1 * emb2
58 | 
59 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/layers/pooling.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | import tensorflow as tf
18 | from tensorflow.keras import Model
19 | 
20 | 
21 | class MaskedSumPooling(Model):
22 |     def __init__(self):
23 |         super(MaskedSumPooling, self).__init__()
24 | 
25 |     def forward(self, embedding_matrix):
26 |         return tf.reduce_sum(embedding_matrix, axis=1)
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/fuxictr/tensorflow/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .rank_model import BaseModel


--------------------------------------------------------------------------------
/fuxictr/tensorflow/models/rank_model.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | import os
 18 | import sys
 19 | import numpy as np
 20 | import tensorflow as tf
 21 | from tensorflow.keras.models import Model
 22 | from fuxictr.metrics import evaluate_metrics
 23 | from fuxictr.tensorflow.tf_utils import get_optimizer, get_loss
 24 | from fuxictr.utils import Monitor
 25 | import logging
 26 | from tqdm import tqdm
 27 | 
 28 | 
 29 | class BaseModel(Model):
 30 |     def __init__(self, 
 31 |                  feature_map,
 32 |                  model_id="BaseModel", 
 33 |                  task="binary_classification", 
 34 |                  monitor="AUC", 
 35 |                  save_best_only=True, 
 36 |                  monitor_mode="max", 
 37 |                  early_stop_patience=2, 
 38 |                  eval_steps=None,
 39 |                  reduce_lr_on_plateau=True, 
 40 |                  **kwargs):
 41 |         super(BaseModel, self).__init__()
 42 |         self.valid_gen = None
 43 |         self._monitor_mode = monitor_mode
 44 |         self._monitor = Monitor(kv=monitor)
 45 |         self._early_stop_patience = early_stop_patience
 46 |         self._eval_steps = eval_steps # None default, that is evaluating every epoch
 47 |         self._save_best_only = save_best_only
 48 |         self._verbose = kwargs["verbose"]
 49 |         self._reduce_lr_on_plateau = reduce_lr_on_plateau
 50 |         self.feature_map = feature_map
 51 |         self.output_activation = self.get_output_activation(task)
 52 |         self.model_id = model_id
 53 |         self.model_dir = os.path.join(kwargs["model_root"], feature_map.dataset_id)
 54 |         self.checkpoint = os.path.abspath(os.path.join(self.model_dir, self.model_id + ".model"))
 55 |         self.validation_metrics = kwargs["metrics"]
 56 | 
 57 |     def compile(self, optimizer, loss, lr):
 58 |         self.optimizer = get_optimizer(optimizer, lr)
 59 |         self.loss_fn = get_loss(loss)
 60 | 
 61 |     def add_loss(self, inputs):
 62 |         return_dict = self(inputs, training=True)
 63 |         y_true = self.get_labels(inputs)
 64 |         loss = self.loss_fn(return_dict["y_pred"], y_true)
 65 |         return loss
 66 |     
 67 |     def get_total_loss(self, inputs):
 68 |         total_loss = self.add_loss(inputs) + sum(self.losses) # with regularization
 69 |         return total_loss
 70 | 
 71 |     def get_inputs(self, inputs, feature_source=None):
 72 |         if feature_source and type(feature_source) == str:
 73 |             feature_source = [feature_source]
 74 |         X_dict = dict()
 75 |         for feature, spec in self.feature_map.features.items():
 76 |             if (feature_source is not None) and (spec["source"] not in feature_source):
 77 |                 continue
 78 |             if spec["type"] == "meta":
 79 |                 continue
 80 |             X_dict[feature] = inputs[feature]
 81 |         return X_dict
 82 | 
 83 |     def get_labels(self, inputs):
 84 |         """ assert len(labels) == 1, "Please override get_labels() when using multiple labels!"
 85 |         """
 86 |         labels = self.feature_map.labels
 87 |         y = inputs[labels[0]]
 88 |         return y
 89 | 
 90 |     def get_group_id(self, inputs):
 91 |         return inputs[self.feature_map.group_id]
 92 | 
 93 |     def lr_decay(self, factor=0.1, min_lr=1e-6):
 94 |         self.optimizer.learning_rate = max(self.optimizer.learning_rate * factor, min_lr)
 95 |         return self.optimizer.lr.numpy()
 96 |            
 97 |     def fit(self, data_generator, epochs=1, validation_data=None,
 98 |             max_gradient_norm=10., **kwargs):
 99 |         self.valid_gen = validation_data
100 |         self._max_gradient_norm = max_gradient_norm
101 |         self._best_metric = np.Inf if self._monitor_mode == "min" else -np.Inf
102 |         self._stopping_steps = 0
103 |         self._stop_training = False
104 |         self._total_steps = 0
105 |         self._batch_index = 0
106 |         self._epoch_index = 0
107 | 
108 |         # logging.info("Start training: {} batches/epoch".format(self._batches_per_epoch))
109 |         logging.info("************ Epoch=1 start ************")
110 |         for epoch in range(epochs):
111 |             self._epoch_index = epoch
112 |             self.train_epoch(data_generator)
113 |             if self._stop_training:
114 |                 break
115 |             else:
116 |                 logging.info("************ Epoch={} end ************".format(self._epoch_index + 1))
117 |         logging.info("Training finished.")
118 |         logging.info("Load best model: {}".format(self.checkpoint))
119 |         self.load_weights(self.checkpoint)
120 | 
121 |     def train_epoch(self, data_generator):
122 |         self._batch_index = 0
123 |         train_loss = 0
124 |         if self._verbose == 0:
125 |             batch_iterator = data_generator
126 |         else:
127 |             batch_iterator = tqdm(data_generator, disable=False, file=sys.stdout)
128 |         for batch_index, batch_data in enumerate(batch_iterator):
129 |             self._batch_index = batch_index
130 |             self._total_steps += 1
131 |             loss = self.train_step(batch_data)
132 |             train_loss += loss.numpy()
133 |             if (self._eval_steps is not None) and (self._total_steps % self._eval_steps == 0):
134 |                 logging.info("Train loss: {:.6f}".format(train_loss / self._eval_steps))
135 |                 train_loss = 0
136 |                 self.eval_step()
137 |             if self._stop_training:
138 |                 break
139 |         if self._eval_steps is None:
140 |             logging.info("Train loss: {:.6f}".format(train_loss / (self._batch_index + 1)))
141 |             self.eval_step()
142 | 
143 |     @tf.function
144 |     def train_step(self, batch_data):
145 |         with tf.GradientTape() as tape:
146 |             loss = self.get_total_loss(batch_data)
147 |             grads = tape.gradient(loss, self.trainable_variables)
148 |             grads, _ = tf.clip_by_global_norm(grads, self._max_gradient_norm)
149 |             self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
150 |         return loss
151 | 
152 |     def eval_step(self):
153 |         logging.info('Evaluation @epoch {} - batch {}: '.format(self._epoch_index + 1, self._batch_index + 1))
154 |         val_logs = self.evaluate(self.valid_gen, metrics=self._monitor.get_metrics())
155 |         self.checkpoint_and_earlystop(val_logs)
156 | 
157 |     def checkpoint_and_earlystop(self, logs, min_delta=1e-6):
158 |         monitor_value = self._monitor.get_value(logs)
159 |         if (self._monitor_mode == "min" and monitor_value > self._best_metric - min_delta) or \
160 |            (self._monitor_mode == "max" and monitor_value < self._best_metric + min_delta):
161 |             self._stopping_steps += 1
162 |             logging.info("Monitor({})={:.6f} STOP!".format(self._monitor_mode, monitor_value))
163 |             if self._reduce_lr_on_plateau:
164 |                 current_lr = self.lr_decay()
165 |                 logging.info("Reduce learning rate on plateau: {:.6f}".format(current_lr))
166 |         else:
167 |             self._stopping_steps = 0
168 |             self._best_metric = monitor_value
169 |             if self._save_best_only:
170 |                 logging.info("Save best model: monitor({})={:.6f}"\
171 |                              .format(self._monitor_mode, monitor_value))
172 |                 self.save_weights(self.checkpoint)
173 |         if self._stopping_steps >= self._early_stop_patience:
174 |             self._stop_training = True
175 |             logging.info("********* Epoch=={} early stop *********".format(self._epoch_index + 1))
176 |         if not self._save_best_only:
177 |             self.save_weights(self.checkpoint)
178 | 
179 |     def evaluate(self, data_generator, metrics=None):
180 |         y_pred = []
181 |         y_true = []
182 |         group_id = []
183 |         if self._verbose > 0:
184 |             data_generator = tqdm(data_generator, disable=False, file=sys.stdout)
185 |         for batch_data in data_generator:
186 |             return_dict = self(batch_data, training=True)
187 |             y_pred.extend(return_dict["y_pred"].numpy().reshape(-1))
188 |             y_true.extend(self.get_labels(batch_data).numpy().reshape(-1))
189 |             if self.feature_map.group_id is not None:
190 |                 group_id.extend(self.get_group_id(batch_data).numpy().reshape(-1))
191 |         y_pred = np.array(y_pred, np.float64)
192 |         y_true = np.array(y_true, np.float64)
193 |         group_id = np.array(group_id) if len(group_id) > 0 else None
194 |         if metrics is not None:
195 |             val_logs = self.evaluate_metrics(y_true, y_pred, metrics, group_id)
196 |         else:
197 |             val_logs = self.evaluate_metrics(y_true, y_pred, self.validation_metrics, group_id)
198 |         logging.info('[Metrics] ' + ' - '.join('{}: {:.6f}'.format(k, v) for k, v in val_logs.items()))
199 |         return val_logs
200 | 
201 |     def evaluate_metrics(self, y_true, y_pred, metrics, group_id=None):
202 |         return evaluate_metrics(y_true, y_pred, metrics, group_id)
203 | 
204 |     def get_output_activation(self, task):
205 |         if task == "binary_classification":
206 |             return tf.keras.layers.Activation("sigmoid")
207 |         elif task == "regression":
208 |             return tf.identity
209 |         else:
210 |             raise NotImplementedError("task={} is not supported.".format(task))
211 |             


--------------------------------------------------------------------------------
/fuxictr/tensorflow/tf_utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import os
 4 | import tensorflow as tf
 5 | from tensorflow.keras import optimizers
 6 | from tensorflow.python.keras.regularizers import l2, l1, l1_l2
 7 | from tensorflow.python.keras.initializers import *
 8 | import logging
 9 | 
10 | 
11 | def seed_everything(seed=2019):
12 |     logging.info('Setting random seed={}'.format(seed))
13 |     if seed >= 0:
14 |         random.seed(seed)
15 |         np.random.seed(seed)
16 |         os.environ['PYTHONHASHSEED'] = str(seed)
17 |         tf.random.set_seed(seed)
18 | 
19 | def get_activation(activation):
20 |     if isinstance(activation, str):
21 |         if activation.lower() == "relu":
22 |             return tf.keras.layers.Activation("relu")
23 |         elif activation.lower() == "sigmoid":
24 |             return tf.keras.layers.Activation("sigmoid")
25 |         elif activation.lower() == "tanh":
26 |             return tf.keras.layers.Activation("tanh")
27 |         elif activation.lower() == "softmax":
28 |             return tf.keras.layers.Softmax()
29 |         else:
30 |             return getattr(tf.keras.layers, activation)()
31 |     else:
32 |         return activation
33 | 
34 | def get_optimizer(optimizer, learning_rate=1.0e-3):
35 |     if isinstance(optimizer, str):
36 |         if optimizer.lower() == 'adam':
37 |             return optimizers.Adam(learning_rate=learning_rate)
38 |         elif optimizer.lower() == 'ftrl':
39 |             return optimizers.Ftrl(learning_rate=learning_rate, l1_regularization_strength=0.1)
40 |         elif optimizer.lower() == 'adagrad':
41 |             return optimizers.Adagrad(learning_rate=learning_rate)
42 |         else:
43 |             try:
44 |                 return getattr(optimizers, optimizer)(learning_rate=learning_rate)
45 |             except:
46 |                 raise ValueError('optimizer={} is not supported.'.format(optimizer))
47 |     return optimizer
48 | 
49 | def get_loss(loss):
50 |     if isinstance(loss, str):
51 |         if loss in ['bce', 'binary_crossentropy', 'binary_cross_entropy']:
52 |             loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
53 |         else:
54 |             raise ValueError('loss={} is not supported.'.format(loss))
55 |     return loss
56 | 
57 | def get_regularizer(reg):
58 |     if type(reg) in [int, float]:
59 |         return l2(reg)
60 |     elif isinstance(reg, str):
61 |         if '(' in reg:
62 |             try:
63 |                 return eval(reg)
64 |             except:
65 |                 raise ValueError('reg={} is not supported.'.format(reg))
66 |     return reg
67 | 
68 | def get_initializer(initializer, seed=20222023):
69 |     if isinstance(initializer, str):
70 |         try:
71 |             if '(' in initializer:
72 |                 return eval(initializer.rstrip(')') + ', seed={})'.format(seed))
73 |             else:
74 |                 return eval(initializer)(seed=seed)
75 |         except:
76 |             raise ValueError("initializer={} not supported.".format(initializer))
77 |     return initializer
78 | 
79 | 


--------------------------------------------------------------------------------
/fuxictr/utils.py:
--------------------------------------------------------------------------------
  1 | # =========================================================================
  2 | # Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved.
  3 | # 
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # =========================================================================
 16 | 
 17 | import os
 18 | import logging
 19 | import logging.config
 20 | import yaml
 21 | import glob
 22 | import json
 23 | from collections import OrderedDict
 24 | import h5py
 25 | 
 26 | 
 27 | def load_config(config_dir, experiment_id):
 28 |     params = load_model_config(config_dir, experiment_id)
 29 |     data_params = load_dataset_config(config_dir, params['dataset_id'])
 30 |     params.update(data_params)
 31 |     return params
 32 | 
 33 | def load_model_config(config_dir, experiment_id):
 34 |     model_configs = glob.glob(os.path.join(config_dir, "model_config.yaml"))
 35 |     if not model_configs:
 36 |         model_configs = glob.glob(os.path.join(config_dir, "model_config/*.yaml"))
 37 |     if not model_configs:
 38 |         raise RuntimeError('config_dir={} is not valid!'.format(config_dir))
 39 |     found_params = dict()
 40 |     for config in model_configs:
 41 |         with open(config, 'r') as cfg:
 42 |             config_dict = yaml.load(cfg, Loader=yaml.FullLoader)
 43 |             if 'Base' in config_dict:
 44 |                 found_params['Base'] = config_dict['Base']
 45 |             if experiment_id in config_dict:
 46 |                 found_params[experiment_id] = config_dict[experiment_id]
 47 |         if len(found_params) == 2:
 48 |             break
 49 |     # Update base and exp_id settings consectively to allow overwritting when conflicts exist
 50 |     params = found_params.get('Base', {})
 51 |     params.update(found_params.get(experiment_id, {}))
 52 |     assert "dataset_id" in params, f'expid={experiment_id} is not valid in config.'
 53 |     params["model_id"] = experiment_id
 54 |     return params
 55 | 
 56 | def load_dataset_config(config_dir, dataset_id):
 57 |     params = {"dataset_id": dataset_id}
 58 |     dataset_configs = glob.glob(os.path.join(config_dir, "dataset_config.yaml"))
 59 |     if not dataset_configs:
 60 |         dataset_configs = glob.glob(os.path.join(config_dir, "dataset_config/*.yaml"))
 61 |     for config in dataset_configs:
 62 |         with open(config, "r") as cfg:
 63 |             config_dict = yaml.load(cfg, Loader=yaml.FullLoader)
 64 |             if dataset_id in config_dict:
 65 |                 params.update(config_dict[dataset_id])
 66 |                 return params
 67 |     raise RuntimeError(f'dataset_id={dataset_id} is not found in config.')
 68 | 
 69 | def set_logger(params):
 70 |     dataset_id = params['dataset_id']
 71 |     model_id = params.get('model_id', '')
 72 |     log_dir = os.path.join(params.get('model_root', './checkpoints'), dataset_id)
 73 |     os.makedirs(log_dir, exist_ok=True)
 74 |     log_file = os.path.join(log_dir, model_id + '.log')
 75 | 
 76 |     # logs will not show in the file without the two lines.
 77 |     for handler in logging.root.handlers[:]: 
 78 |         logging.root.removeHandler(handler)
 79 |         
 80 |     logging.basicConfig(level=logging.INFO,
 81 |                         format='%(asctime)s P%(process)d %(levelname)s %(message)s',
 82 |                         handlers=[logging.FileHandler(log_file, mode='w'),
 83 |                                   logging.StreamHandler()])
 84 | 
 85 | def print_to_json(data, sort_keys=True):
 86 |     new_data = dict((k, str(v)) for k, v in data.items())
 87 |     if sort_keys:
 88 |         new_data = OrderedDict(sorted(new_data.items(), key=lambda x: x[0]))
 89 |     return json.dumps(new_data, indent=4)
 90 | 
 91 | def print_to_list(data):
 92 |     return ' - '.join('{}: {:.6f}'.format(k, v) for k, v in data.items())
 93 | 
 94 | class Monitor(object):
 95 |     def __init__(self, kv):
 96 |         if isinstance(kv, str):
 97 |             kv = {kv: 1}
 98 |         self.kv_pairs = kv
 99 | 
100 |     def get_value(self, logs):
101 |         value = 0
102 |         for k, v in self.kv_pairs.items():
103 |             value += logs.get(k, 0) * v
104 |         return value
105 | 
106 |     def get_metrics(self):
107 |         return list(self.kv_pairs.keys())
108 | 
109 | def load_h5(data_path, verbose=0):
110 |     if verbose == 0:
111 |         logging.info('Loading data from h5: ' + data_path)
112 |     data_dict = dict()
113 |     with h5py.File(data_path, 'r') as hf:
114 |         for key in hf.keys():
115 |             data_dict[key] = hf[key][:]
116 |     return data_dict


--------------------------------------------------------------------------------
/fuxictr/version.py:
--------------------------------------------------------------------------------
1 | __version__="2.1.2"
2 | 


--------------------------------------------------------------------------------
/models/AITM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from fuxictr.pytorch.models import MultiTaskModel
 4 | from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
 5 | import numpy as np 
 6 | 
 7 | '''
 8 | Reference Code: https://github.com/easezyc/Multitask-Recommendation-Library/blob/main/models/aitm.py
 9 | '''
10 | 
11 | class AITM(MultiTaskModel):
12 |     def __init__(self,
13 |                  feature_map,
14 |                  model_id="AITM",
15 |                  gpu=-1,
16 |                  task=["binary_classification"],
17 |                  num_tasks=1,
18 |                  loss_weight='EQ',
19 |                  learning_rate=1e-3,
20 |                  embedding_dim=10,
21 |                  bottom_hidden_units=[64, 64, 64],
22 |                  tower_hidden_units=[64, ],
23 |                  hidden_activations="ReLU",
24 |                  net_dropout=0,
25 |                  batch_norm=False,
26 |                  embedding_regularizer=None,
27 |                  net_regularizer=None,
28 |                  **kwargs):
29 |         super(AITM, self).__init__(feature_map,
30 |                                            task=task,
31 |                                            loss_weight=loss_weight,
32 |                                            num_tasks=num_tasks,
33 |                                            model_id=model_id,
34 |                                            gpu=gpu,
35 |                                            embedding_regularizer=embedding_regularizer,
36 |                                            net_regularizer=net_regularizer,
37 |                                            **kwargs)
38 |         self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim)
39 |         self.hidden_dim = bottom_hidden_units[-1]
40 |         self.g = torch.nn.ModuleList([torch.nn.Linear(bottom_hidden_units[-1], bottom_hidden_units[-1]) for i in range(num_tasks - 1)])
41 |         self.h1 = torch.nn.Linear(bottom_hidden_units[-1], bottom_hidden_units[-1])
42 |         self.h2 = torch.nn.Linear(bottom_hidden_units[-1], bottom_hidden_units[-1])
43 |         self.h3 = torch.nn.Linear(bottom_hidden_units[-1], bottom_hidden_units[-1])
44 | 
45 |         self.bottom = nn.ModuleList([MLP_Block(input_dim=embedding_dim * feature_map.num_fields,
46 |                                 hidden_units=bottom_hidden_units,
47 |                                 hidden_activations=hidden_activations,
48 |                                 output_activation=None,
49 |                                 dropout_rates=net_dropout,
50 |                                 batch_norm=batch_norm) for _ in range(num_tasks)])
51 |         self.tower = nn.ModuleList([MLP_Block(input_dim=bottom_hidden_units[-1],
52 |                                               output_dim=1,
53 |                                               hidden_units=tower_hidden_units,
54 |                                               hidden_activations=hidden_activations,
55 |                                               output_activation=None,
56 |                                               dropout_rates=net_dropout,
57 |                                               batch_norm=batch_norm)
58 |                                     for _ in range(num_tasks)])
59 |         self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
60 |         self.reset_parameters()
61 |         self.model_to_device()
62 | 
63 |     def forward(self, inputs):
64 |         X = self.get_inputs(inputs)
65 |         feature_emb = self.embedding_layer(X)
66 |         bottom_output = [self.bottom[i](feature_emb.flatten(start_dim=1)) for i in range(self.num_tasks)] # [(?, bottom_hidden_units[-1])]
67 |         for i in range(1, self.num_tasks):
68 |             p = self.g[i - 1](bottom_output[i - 1]).unsqueeze(1)
69 |             q = bottom_output[i].unsqueeze(1)
70 |             x = torch.cat([p, q], dim = 1)
71 |             V = self.h1(x)
72 |             K = self.h2(x)
73 |             Q = self.h3(x)
74 |             bottom_output[i] = torch.sum(torch.nn.functional.softmax(torch.sum(K * Q, 2, True) / np.sqrt(self.hidden_dim), dim=1) * V, 1)
75 |         tower_output = [self.tower[i](bottom_output[i]) for i in range(self.num_tasks)]
76 |         y_pred = [self.output_activation[i](tower_output[i]) for i in range(self.num_tasks)]
77 |         return_dict = {}
78 |         labels = self.feature_map.labels
79 |         for i in range(self.num_tasks):
80 |             return_dict["{}_pred".format(labels[i])] = y_pred[i]
81 |         return return_dict
82 | 


--------------------------------------------------------------------------------
/models/ESMM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from fuxictr.pytorch.models import MultiTaskModel
 4 | from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
 5 | 
 6 | 
 7 | class ESMM(MultiTaskModel):
 8 |     def __init__(self,
 9 |                  feature_map,
10 |                  model_id="ESMM",
11 |                  gpu=-1,
12 |                  task=["binary_classification"],
13 |                  num_tasks=1,
14 |                  loss_weight='EQ',
15 |                  learning_rate=1e-3,
16 |                  embedding_dim=10,
17 |                  tower_hidden_units=[64, ],
18 |                  hidden_activations="ReLU",
19 |                  net_dropout=0,
20 |                  batch_norm=False,
21 |                  embedding_regularizer=None,
22 |                  net_regularizer=None,
23 |                  **kwargs):
24 |         super(ESMM, self).__init__(feature_map,
25 |                                            task=task,
26 |                                            loss_weight=loss_weight,
27 |                                            num_tasks=num_tasks,
28 |                                            model_id=model_id,
29 |                                            gpu=gpu,
30 |                                            embedding_regularizer=embedding_regularizer,
31 |                                            net_regularizer=net_regularizer,
32 |                                            **kwargs)
33 |         self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim)
34 |         if num_tasks != 2:
35 |             raise ValueError("the number of tasks must be equal to 2!")
36 |         self.tower = nn.ModuleList([MLP_Block(input_dim=embedding_dim * feature_map.num_fields,
37 |                                               output_dim=1,
38 |                                               hidden_units=tower_hidden_units,
39 |                                               hidden_activations=hidden_activations,
40 |                                               output_activation=None,
41 |                                               dropout_rates=net_dropout,
42 |                                               batch_norm=batch_norm)
43 |                                     for _ in range(num_tasks)])
44 |         self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
45 |         self.reset_parameters()
46 |         self.model_to_device()
47 | 
48 |     def forward(self, inputs):
49 |         X = self.get_inputs(inputs)
50 |         feature_emb = self.embedding_layer(X).flatten(start_dim=1)
51 |         tower_output = [self.tower[i](feature_emb) for i in range(self.num_tasks)]
52 |         cvr_pred = self.output_activation[0](tower_output[0])
53 |         ctr_pred = self.output_activation[1](tower_output[1])
54 |         ctcvr_pred =  ctr_pred * cvr_pred
55 |         y_pred = [ctr_pred, ctcvr_pred]
56 |         return_dict = {}
57 |         labels = self.feature_map.labels
58 |         for i in range(self.num_tasks):
59 |             return_dict["{}_pred".format(labels[i])] = y_pred[i]
60 |         return return_dict
61 | 


--------------------------------------------------------------------------------
/models/MMoE.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from fuxictr.pytorch.models import MultiTaskModel
  4 | from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
  5 | from fuxictr.pytorch.torch_utils import get_activation
  6 | import logging 
  7 | 
  8 | class MMoE_Layer(nn.Module):
  9 |     def __init__(self, num_experts, num_tasks, input_dim, expert_hidden_units, gate_hidden_units, hidden_activations,
 10 |                  net_dropout, batch_norm):
 11 |         super(MMoE_Layer, self).__init__()
 12 |         self.num_experts = num_experts
 13 |         self.num_tasks = num_tasks
 14 |         self.experts = nn.ModuleList([MLP_Block(input_dim=input_dim,
 15 |                                                 hidden_units=expert_hidden_units,
 16 |                                                 hidden_activations=hidden_activations,
 17 |                                                 output_activation=None,
 18 |                                                 dropout_rates=net_dropout,
 19 |                                                 batch_norm=batch_norm) for _ in range(self.num_experts)])
 20 |         self.gate = nn.ModuleList([MLP_Block(input_dim=input_dim,
 21 |                                              hidden_units=gate_hidden_units,
 22 |                                              output_dim=num_experts,
 23 |                                              hidden_activations=hidden_activations,
 24 |                                              output_activation=None,
 25 |                                              dropout_rates=net_dropout,
 26 |                                              batch_norm=batch_norm) for _ in range(self.num_tasks)])
 27 |         self.gate_activation = get_activation('softmax')
 28 | 
 29 |     def forward(self, x):
 30 |         experts_output = torch.stack([self.experts[i](x) for i in range(self.num_experts)],
 31 |                                      dim=1)  # (?, num_experts, dim)
 32 |         mmoe_output = []
 33 |         gate_output_list = []
 34 |         for i in range(self.num_tasks):
 35 |             gate_output = self.gate[i](x)
 36 |             if self.gate_activation is not None:
 37 |                 gate_output = self.gate_activation(gate_output)  # (?, num_experts)
 38 |             gate_output_list.append(gate_output.mean(dim=0))
 39 |             mmoe_output.append(torch.sum(torch.multiply(gate_output.unsqueeze(-1), experts_output), dim=1))
 40 |         return mmoe_output
 41 | 
 42 | 
 43 | class MMoE(MultiTaskModel):
 44 |     def __init__(self,
 45 |                  feature_map,
 46 |                  task=["binary_classification"],
 47 |                  num_tasks=1,
 48 |                  model_id="MMoE",
 49 |                  gpu=-1,
 50 |                  learning_rate=1e-3,
 51 |                  embedding_dim=10,
 52 |                  num_experts=4,
 53 |                  expert_hidden_units=[512, 256, 128],
 54 |                  gate_hidden_units=[128, 64],
 55 |                  tower_hidden_units=[128, 64],
 56 |                  hidden_activations="ReLU",
 57 |                  net_dropout=0,
 58 |                  batch_norm=False,
 59 |                  embedding_regularizer=None,
 60 |                  net_regularizer=None,
 61 |                  **kwargs):
 62 |         super(MMoE, self).__init__(feature_map,
 63 |                                    task=task,
 64 |                                    num_tasks=num_tasks,
 65 |                                    model_id=model_id,
 66 |                                    gpu=gpu,
 67 |                                    embedding_regularizer=embedding_regularizer,
 68 |                                    net_regularizer=net_regularizer,
 69 |                                    **kwargs)
 70 |         self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim)
 71 |         self.num_experts = num_experts
 72 |         self.mmoe_layer = MMoE_Layer(num_experts=num_experts,
 73 |                                      num_tasks=self.num_tasks,
 74 |                                      input_dim=embedding_dim * feature_map.num_fields,
 75 |                                      expert_hidden_units=expert_hidden_units,
 76 |                                      gate_hidden_units=gate_hidden_units,
 77 |                                      hidden_activations=hidden_activations,
 78 |                                      net_dropout=net_dropout,
 79 |                                      batch_norm=batch_norm)
 80 |         self.tower = nn.ModuleList([MLP_Block(input_dim=expert_hidden_units[-1],
 81 |                                               output_dim=1,
 82 |                                               hidden_units=tower_hidden_units,
 83 |                                               hidden_activations=hidden_activations,
 84 |                                               output_activation=None,
 85 |                                               dropout_rates=net_dropout,
 86 |                                               batch_norm=batch_norm)
 87 |                                     for _ in range(num_tasks)])
 88 |         self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
 89 |         self.reset_parameters()
 90 |         self.model_to_device()
 91 | 
 92 |     def forward(self, inputs):
 93 |         X = self.get_inputs(inputs)
 94 |         feature_emb = self.embedding_layer(X)
 95 |         expert_output = self.mmoe_layer(feature_emb.flatten(start_dim=1))            
 96 |         tower_output = [self.tower[i](expert_output[i]) for i in range(self.num_tasks)]
 97 |         y_pred = [self.output_activation[i](tower_output[i]) for i in range(self.num_tasks)]
 98 |         return_dict = {}
 99 |         labels = self.feature_map.labels
100 |         for i in range(self.num_tasks):
101 |             return_dict["{}_pred".format(labels[i])] = y_pred[i]
102 |         return return_dict


--------------------------------------------------------------------------------
/models/OMoE.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from fuxictr.pytorch.models import MultiTaskModel
  4 | from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
  5 | from fuxictr.pytorch.torch_utils import get_activation
  6 | 
  7 | 
  8 | class OMoE_Layer(nn.Module):
  9 |     def __init__(self, num_experts, num_tasks, input_dim, expert_hidden_units, gate_hidden_units, hidden_activations,
 10 |                  net_dropout, batch_norm):
 11 |         super(OMoE_Layer, self).__init__()
 12 |         self.num_experts = num_experts
 13 |         self.num_tasks = num_tasks
 14 |         self.experts = nn.ModuleList([MLP_Block(input_dim=input_dim,
 15 |                                                 hidden_units=expert_hidden_units,
 16 |                                                 hidden_activations=hidden_activations,
 17 |                                                 output_activation=None,
 18 |                                                 dropout_rates=net_dropout,
 19 |                                                 batch_norm=batch_norm) for _ in range(self.num_experts)])
 20 |         self.gate = MLP_Block(input_dim=input_dim,
 21 |                             hidden_units=gate_hidden_units,
 22 |                             output_dim=num_experts,
 23 |                             hidden_activations=hidden_activations,
 24 |                             output_activation=None,
 25 |                             dropout_rates=net_dropout,
 26 |                             batch_norm=batch_norm)
 27 |         self.gate_activation = get_activation('softmax')
 28 | 
 29 |     def forward(self, x):
 30 |         experts_output = torch.stack([self.experts[i](x) for i in range(self.num_experts)],
 31 |                                      dim=1)  # (?, num_experts, dim)
 32 |         omoe_output = []
 33 |         for i in range(self.num_tasks):
 34 |             gate_output = self.gate(x)
 35 |             if self.gate_activation is not None:
 36 |                 gate_output = self.gate_activation(gate_output)  # (?, num_experts)
 37 |             omoe_output.append(torch.sum(torch.multiply(gate_output.unsqueeze(-1), experts_output), dim=1))
 38 |         return omoe_output
 39 | 
 40 | 
 41 | class OMoE(MultiTaskModel):
 42 |     def __init__(self,
 43 |                  feature_map,
 44 |                  task=["binary_classification"],
 45 |                  num_tasks=1,
 46 |                  model_id="OMoE",
 47 |                  gpu=-1,
 48 |                  learning_rate=1e-3,
 49 |                  embedding_dim=10,
 50 |                  num_experts=4,
 51 |                  expert_hidden_units=[512, 256, 128],
 52 |                  gate_hidden_units=[128, 64],
 53 |                  tower_hidden_units=[128, 64],
 54 |                  hidden_activations="ReLU",
 55 |                  net_dropout=0,
 56 |                  batch_norm=False,
 57 |                  embedding_regularizer=None,
 58 |                  net_regularizer=None,
 59 |                  **kwargs):
 60 |         super(OMoE, self).__init__(feature_map,
 61 |                                    task=task,
 62 |                                    num_tasks=num_tasks,
 63 |                                    model_id=model_id,
 64 |                                    gpu=gpu,
 65 |                                    embedding_regularizer=embedding_regularizer,
 66 |                                    net_regularizer=net_regularizer,
 67 |                                    **kwargs)
 68 |         self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim)
 69 |         self.omoe_layer = OMoE_Layer(num_experts=num_experts,
 70 |                                      num_tasks=self.num_tasks,
 71 |                                      input_dim=embedding_dim * feature_map.num_fields,
 72 |                                      expert_hidden_units=expert_hidden_units,
 73 |                                      gate_hidden_units=gate_hidden_units,
 74 |                                      hidden_activations=hidden_activations,
 75 |                                      net_dropout=net_dropout,
 76 |                                      batch_norm=batch_norm)
 77 |         self.tower = nn.ModuleList([MLP_Block(input_dim=expert_hidden_units[-1],
 78 |                                               output_dim=1,
 79 |                                               hidden_units=tower_hidden_units,
 80 |                                               hidden_activations=hidden_activations,
 81 |                                               output_activation=None,
 82 |                                               dropout_rates=net_dropout,
 83 |                                               batch_norm=batch_norm)
 84 |                                     for _ in range(num_tasks)])
 85 |         self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
 86 |         self.reset_parameters()
 87 |         self.model_to_device()
 88 | 
 89 |     def forward(self, inputs):
 90 |         X = self.get_inputs(inputs)
 91 |         feature_emb = self.embedding_layer(X)
 92 |         expert_output = self.omoe_layer(feature_emb.flatten(start_dim=1))
 93 |         tower_output = [self.tower[i](expert_output[i]) for i in range(self.num_tasks)]
 94 |         y_pred = [self.output_activation[i](tower_output[i]) for i in range(self.num_tasks)]
 95 |         return_dict = {}
 96 |         labels = self.feature_map.labels
 97 |         for i in range(self.num_tasks):
 98 |             return_dict["{}_pred".format(labels[i])] = y_pred[i]
 99 |         return return_dict
100 | 


--------------------------------------------------------------------------------
/models/PLE.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from fuxictr.pytorch.models import MultiTaskModel
  4 | from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
  5 | from fuxictr.pytorch.torch_utils import get_activation
  6 | import logging 
  7 | 
  8 | class CGC_Layer(nn.Module):
  9 |     def __init__(self, num_shared_experts, num_specific_experts, num_tasks, input_dim, expert_hidden_units, gate_hidden_units, hidden_activations,
 10 |                  net_dropout, batch_norm):
 11 |         super(CGC_Layer, self).__init__()
 12 |         self.num_shared_experts = num_shared_experts 
 13 |         self.num_specific_experts = num_specific_experts 
 14 |         self.num_tasks = num_tasks 
 15 |         self.shared_experts = nn.ModuleList([MLP_Block(input_dim=input_dim,
 16 |                                                 hidden_units=expert_hidden_units,
 17 |                                                 hidden_activations=hidden_activations,
 18 |                                                 output_activation=None,
 19 |                                                 dropout_rates=net_dropout,
 20 |                                                 batch_norm=batch_norm) for _ in range(self.num_shared_experts)])
 21 |         self.specific_experts = nn.ModuleList([nn.ModuleList([MLP_Block(input_dim=input_dim,
 22 |                                                 hidden_units=expert_hidden_units,
 23 |                                                 hidden_activations=hidden_activations,
 24 |                                                 output_activation=None,
 25 |                                                 dropout_rates=net_dropout,
 26 |                                                 batch_norm=batch_norm) for _ in range(self.num_specific_experts)]) for _ in range(num_tasks)])
 27 |         self.gate = nn.ModuleList([MLP_Block(input_dim=input_dim,
 28 |                                              output_dim=num_specific_experts+num_shared_experts if i < num_tasks else num_shared_experts,
 29 |                                              hidden_units=gate_hidden_units,
 30 |                                              hidden_activations=hidden_activations,
 31 |                                              output_activation=None,
 32 |                                              dropout_rates=net_dropout,
 33 |                                              batch_norm=batch_norm) for i in range(self.num_tasks+1)])
 34 |         self.gate_activation = get_activation('softmax')
 35 |     def forward(self, x, require_gate=False):
 36 |         """
 37 |         x: list, len(x)==num_tasks+1
 38 |         """
 39 |         specific_expert_outputs = [] 
 40 |         shared_expert_outputs = []
 41 |         # specific experts
 42 |         for i in range(self.num_tasks):
 43 |             task_expert_outputs = []
 44 |             for j in range(self.num_specific_experts):
 45 |                 task_expert_outputs.append(self.specific_experts[i][j](x[i]))
 46 |             specific_expert_outputs.append(task_expert_outputs)
 47 |         # shared experts 
 48 |         for i in range(self.num_shared_experts):
 49 |             shared_expert_outputs.append(self.shared_experts[i](x[-1]))
 50 |         # gate 
 51 |         cgc_outputs = [] 
 52 |         gates = [] 
 53 |         for i in range(self.num_tasks+1):
 54 |             if i < self.num_tasks:
 55 |                 # for specific experts
 56 |                 gate_input = torch.stack(specific_expert_outputs[i] + shared_expert_outputs, dim=1) # (?, num_specific_experts+num_shared_experts, dim)
 57 |                 gate = self.gate_activation(self.gate[i](x[i])) # (?, num_specific_experts+num_shared_experts)
 58 |                 gates.append(gate.mean(0))
 59 |                 cgc_output = torch.sum(gate.unsqueeze(-1) * gate_input, dim=1) # (?, dim)
 60 |                 cgc_outputs.append(cgc_output)
 61 |             else: 
 62 |                 # for shared experts 
 63 |                 gate_input = torch.stack(shared_expert_outputs, dim=1) # (?, num_shared_experts, dim)
 64 |                 gate = self.gate_activation(self.gate[i](x[-1])) # (?, num_shared_experts)
 65 |                 # gates.append(gate.mean(0))
 66 |                 cgc_output = torch.sum(gate.unsqueeze(-1) * gate_input, dim=1) # (?, dim)
 67 |                 cgc_outputs.append(cgc_output)
 68 |         if require_gate:
 69 |             return cgc_outputs, gates 
 70 |         else: 
 71 |             return cgc_outputs
 72 | 
 73 | class PLE(MultiTaskModel):
 74 |     def __init__(self,
 75 |                  feature_map,
 76 |                  task=["binary_classification"],
 77 |                  num_tasks=1,
 78 |                  model_id="PLE",
 79 |                  gpu=-1,
 80 |                  learning_rate=1e-3,
 81 |                  embedding_dim=10,
 82 |                  num_layers=1,
 83 |                  num_shared_experts=1,
 84 |                  num_specific_experts=1,
 85 |                  expert_hidden_units=[512, 256, 128],
 86 |                  gate_hidden_units=[128, 64],
 87 |                  tower_hidden_units=[128, 64],
 88 |                  hidden_activations="ReLU",
 89 |                  net_dropout=0,
 90 |                  batch_norm=False,
 91 |                  embedding_regularizer=None,
 92 |                  net_regularizer=None,
 93 |                  **kwargs):
 94 |         super(PLE, self).__init__(feature_map,
 95 |                                    task=task,
 96 |                                    num_tasks=num_tasks,
 97 |                                    model_id=model_id,
 98 |                                    gpu=gpu,
 99 |                                    embedding_regularizer=embedding_regularizer,
100 |                                    net_regularizer=net_regularizer,
101 |                                    **kwargs)
102 |         self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim)
103 |         self.num_layers = num_layers
104 |         self.num_shared_experts = num_shared_experts
105 |         self.num_specific_experts = num_specific_experts
106 |         self.cgc_layers = nn.ModuleList([CGC_Layer(num_shared_experts,
107 |                                                    num_specific_experts,
108 |                                                    num_tasks,
109 |                                                    input_dim= embedding_dim * feature_map.num_fields if i==0 else expert_hidden_units[-1],
110 |                                                    expert_hidden_units= expert_hidden_units,
111 |                                                    gate_hidden_units=gate_hidden_units,
112 |                                                    hidden_activations=hidden_activations,
113 |                                                    net_dropout=net_dropout,
114 |                                                    batch_norm=batch_norm) for i in range(self.num_layers)])
115 |         self.tower = nn.ModuleList([MLP_Block(input_dim=expert_hidden_units[-1],
116 |                                               output_dim=1,
117 |                                               hidden_units=tower_hidden_units,
118 |                                               hidden_activations=hidden_activations,
119 |                                               output_activation=None,
120 |                                               dropout_rates=net_dropout,
121 |                                               batch_norm=batch_norm)
122 |                                     for _ in range(num_tasks)])
123 |         self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
124 |         self.reset_parameters()
125 |         self.model_to_device()
126 | 
127 |     def forward(self, inputs):
128 |         X = self.get_inputs(inputs)
129 |         feature_emb = self.embedding_layer(X)
130 |         cgc_inputs = [feature_emb.flatten(start_dim=1) for _ in range(self.num_tasks+1)]
131 |         for i in range(self.num_layers):
132 |             cgc_outputs = self.cgc_layers[i](cgc_inputs)
133 |             cgc_inputs = cgc_outputs
134 |         tower_output = [self.tower[i](cgc_outputs[i]) for i in range(self.num_tasks)]
135 |         y_pred = [self.output_activation[i](tower_output[i]) for i in range(self.num_tasks)]
136 |         return_dict = {}
137 |         labels = self.feature_map.labels
138 |         for i in range(self.num_tasks):
139 |             return_dict["{}_pred".format(labels[i])] = y_pred[i]
140 |         return return_dict
141 |     


--------------------------------------------------------------------------------
/models/STEM.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from fuxictr.pytorch.models import MultiTaskModel
  4 | from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
  5 | from fuxictr.pytorch.torch_utils import get_activation
  6 | import numpy as np 
  7 | import logging 
  8 | class STEM_Layer(nn.Module):
  9 |     def __init__(self, num_shared_experts, num_specific_experts, num_tasks, input_dim, expert_hidden_units, gate_hidden_units, hidden_activations,
 10 |                  net_dropout, batch_norm):
 11 |         super(STEM_Layer, self).__init__()
 12 |         self.num_shared_experts = num_shared_experts 
 13 |         self.num_specific_experts = num_specific_experts 
 14 |         self.num_tasks = num_tasks 
 15 |         self.shared_experts = nn.ModuleList([MLP_Block(input_dim=input_dim,
 16 |                                                 hidden_units=expert_hidden_units,
 17 |                                                 hidden_activations=hidden_activations,
 18 |                                                 output_activation=None,
 19 |                                                 dropout_rates=net_dropout,
 20 |                                                 batch_norm=batch_norm) for _ in range(self.num_shared_experts)])
 21 |         self.specific_experts = nn.ModuleList([nn.ModuleList([MLP_Block(input_dim=input_dim,
 22 |                                                 hidden_units=expert_hidden_units,
 23 |                                                 hidden_activations=hidden_activations,
 24 |                                                 output_activation=None,
 25 |                                                 dropout_rates=net_dropout,
 26 |                                                 batch_norm=batch_norm) for _ in range(self.num_specific_experts)]) for _ in range(num_tasks)])
 27 |         self.gate = nn.ModuleList([MLP_Block(input_dim=input_dim,
 28 |                                              output_dim=num_specific_experts*num_tasks+num_shared_experts,
 29 |                                              hidden_units=gate_hidden_units,
 30 |                                              hidden_activations=hidden_activations,
 31 |                                              output_activation=None,
 32 |                                              dropout_rates=net_dropout,
 33 |                                              batch_norm=batch_norm) for i in range(self.num_tasks+1)])
 34 |         self.gate_activation = get_activation('softmax')
 35 |     def forward(self, x, return_gate=False):
 36 |         """
 37 |         x: list, len(x)==num_tasks+1
 38 |         """
 39 |         specific_expert_outputs = [] 
 40 |         shared_expert_outputs = []
 41 |         # specific experts
 42 |         for i in range(self.num_tasks):
 43 |             task_expert_outputs = []
 44 |             for j in range(self.num_specific_experts):
 45 |                 task_expert_outputs.append(self.specific_experts[i][j](x[i]))
 46 |             specific_expert_outputs.append(task_expert_outputs)
 47 |         # shared experts 
 48 |         for i in range(self.num_shared_experts):
 49 |             shared_expert_outputs.append(self.shared_experts[i](x[-1]))
 50 |         
 51 |         # gate 
 52 |         stem_outputs = [] 
 53 |         stem_gates = []
 54 |         for i in range(self.num_tasks+1):
 55 |             if i < self.num_tasks:
 56 |                 # for specific experts
 57 |                 gate_input = [] 
 58 |                 for j in range(self.num_tasks):
 59 |                     if j == i:
 60 |                         gate_input.extend(specific_expert_outputs[j])
 61 |                     else: 
 62 |                         specific_expert_outputs_j = specific_expert_outputs[j]
 63 |                         specific_expert_outputs_j = [out.detach() for out in specific_expert_outputs_j]
 64 |                         gate_input.extend(specific_expert_outputs_j)
 65 |                 gate_input.extend(shared_expert_outputs)
 66 |                 gate_input = torch.stack(gate_input, dim=1) # (?, num_specific_experts*num_tasks+num_shared_experts, dim)
 67 |                 gate = self.gate_activation(self.gate[i](x[i]+x[-1])) # (?, num_specific_experts*num_tasks+num_shared_experts)
 68 |                 if return_gate:
 69 |                     specific_gate = gate[:,:self.num_specific_experts*self.num_tasks].mean(0)
 70 |                     task_gate = torch.chunk(specific_gate, chunks=self.num_tasks)
 71 |                     specific_gate_list = [] 
 72 |                     for tg in task_gate:
 73 |                         specific_gate_list.append(torch.sum(tg))
 74 |                     shared_gate = gate[:,-self.num_shared_experts:].mean(0).sum()
 75 |                     target_task_gate = torch.stack(specific_gate_list+[shared_gate],dim=0).view(-1) # (num_task+1,1)
 76 |                     assert len(target_task_gate) == self.num_tasks+1 
 77 |                     stem_gates.append(target_task_gate)
 78 |                 stem_output = torch.sum(gate.unsqueeze(-1) * gate_input, dim=1) # (?, dim)
 79 |                 stem_outputs.append(stem_output)
 80 |             else: 
 81 |                 # for shared experts 
 82 |                 gate_input = [] 
 83 |                 for j in range(self.num_tasks):
 84 |                     gate_input.extend(specific_expert_outputs[j])
 85 |                 gate_input.extend(shared_expert_outputs)
 86 |                 gate_input = torch.stack(gate_input, dim=1) # (?, num_specific_experts*num_tasks+num_shared_experts, dim)
 87 |                 gate = self.gate_activation(self.gate[i](x[-1])) # (?, num_specific_experts*num_tasks+num_shared_experts)
 88 |                 stem_output = torch.sum(gate.unsqueeze(-1) * gate_input, dim=1) # (?, dim)
 89 |                 stem_outputs.append(stem_output)
 90 | 
 91 |         if return_gate:
 92 |             return stem_outputs, stem_gates
 93 |         else:
 94 |             return stem_outputs
 95 | 
 96 | 
 97 | class STEM(MultiTaskModel):
 98 |     def __init__(self,
 99 |                  feature_map,
100 |                  task=["binary_classification"],
101 |                  num_tasks=1,
102 |                  model_id="STEM",
103 |                  gpu=-1,
104 |                  learning_rate=1e-3,
105 |                  embedding_dim=10,
106 |                  num_layers=1,
107 |                  num_shared_experts=1,
108 |                  num_specific_experts=1,
109 |                  expert_hidden_units=[512, 256, 128],
110 |                  gate_hidden_units=[128, 64],
111 |                  tower_hidden_units=[128, 64],
112 |                  hidden_activations="ReLU",
113 |                  net_dropout=0,
114 |                  batch_norm=False,
115 |                  embedding_regularizer=None,
116 |                  net_regularizer=None,
117 |                  **kwargs):
118 |         super(STEM, self).__init__(feature_map,
119 |                                    task=task,
120 |                                    num_tasks=num_tasks,
121 |                                    model_id=model_id,
122 |                                    gpu=gpu,
123 |                                    embedding_regularizer=embedding_regularizer,
124 |                                    net_regularizer=net_regularizer,
125 |                                    **kwargs)
126 |         self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim * (self.num_tasks+1))
127 |         self.num_layers = num_layers
128 |         self.embedding_dim = embedding_dim
129 |         self.num_specific_experts = num_specific_experts
130 |         self.num_shared_experts = num_shared_experts
131 |         self.stem_layers = nn.ModuleList([STEM_Layer(num_shared_experts,
132 |                                                    num_specific_experts,
133 |                                                    num_tasks,
134 |                                                    input_dim= self.embedding_dim * feature_map.num_fields if i==0 else expert_hidden_units[-1],
135 |                                                    expert_hidden_units= expert_hidden_units,
136 |                                                    gate_hidden_units=gate_hidden_units,
137 |                                                    hidden_activations=hidden_activations,
138 |                                                    net_dropout=net_dropout,
139 |                                                    batch_norm=batch_norm) for i in range(self.num_layers)])
140 |         self.tower = nn.ModuleList([MLP_Block(input_dim=expert_hidden_units[-1],
141 |                                               output_dim=1,
142 |                                               hidden_units=tower_hidden_units,
143 |                                               hidden_activations=hidden_activations,
144 |                                               output_activation=None,
145 |                                               dropout_rates=net_dropout,
146 |                                               batch_norm=batch_norm)
147 |                                     for _ in range(num_tasks)])
148 |         self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
149 |         self.reset_parameters()
150 |         self.model_to_device()
151 | 
152 |     def forward(self, inputs):
153 |         X = self.get_inputs(inputs)
154 |         feature_emb = self.embedding_layer(X) # (?, num_field, D)
155 |         feature_embs = feature_emb.split(self.embedding_dim, dim=2)
156 |         stem_inputs = [feature_embs[i].flatten(start_dim=1) for i in range(self.num_tasks+1)]
157 |         for i in range(self.num_layers):
158 |             stem_outputs = self.stem_layers[i](stem_inputs)
159 |             stem_inputs = stem_outputs
160 |         tower_output = [self.tower[i](stem_outputs[i]) for i in range(self.num_tasks)]
161 |         y_pred = [self.output_activation[i](tower_output[i]) for i in range(self.num_tasks)]
162 |         return_dict = {}
163 |         labels = self.feature_map.labels
164 |         for i in range(self.num_tasks):
165 |             return_dict["{}_pred".format(labels[i])] = y_pred[i]
166 |         return return_dict


--------------------------------------------------------------------------------
/models/SharedBottom.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from fuxictr.pytorch.models import MultiTaskModel
 4 | from fuxictr.pytorch.layers import FeatureEmbedding, MLP_Block
 5 | 
 6 | class SharedBottom(MultiTaskModel):
 7 |     def __init__(self,
 8 |                  feature_map,
 9 |                  model_id="SharedBottom",
10 |                  gpu=-1,
11 |                  task=["binary_classification"],
12 |                  num_tasks=1,
13 |                  loss_weight='EQ',
14 |                  learning_rate=1e-3,
15 |                  embedding_dim=10,
16 |                  bottom_hidden_units=[64, 64, 64],
17 |                  tower_hidden_units=[64, ],
18 |                  hidden_activations="ReLU",
19 |                  net_dropout=0,
20 |                  batch_norm=False,
21 |                  embedding_regularizer=None,
22 |                  net_regularizer=None,
23 |                  **kwargs):
24 |         super(SharedBottom, self).__init__(feature_map,
25 |                                            task=task,
26 |                                            loss_weight=loss_weight,
27 |                                            num_tasks=num_tasks,
28 |                                            model_id=model_id,
29 |                                            gpu=gpu,
30 |                                            embedding_regularizer=embedding_regularizer,
31 |                                            net_regularizer=net_regularizer,
32 |                                            **kwargs)
33 |         self.embedding_layer = FeatureEmbedding(feature_map, embedding_dim)
34 |         self.bottom = MLP_Block(input_dim=embedding_dim * feature_map.num_fields,
35 |                                 hidden_units=bottom_hidden_units,
36 |                                 hidden_activations=hidden_activations,
37 |                                 output_activation=None,
38 |                                 dropout_rates=net_dropout,
39 |                                 batch_norm=batch_norm)
40 |         self.tower = nn.ModuleList([MLP_Block(input_dim=bottom_hidden_units[-1],
41 |                                               output_dim=1,
42 |                                               hidden_units=tower_hidden_units,
43 |                                               hidden_activations=hidden_activations,
44 |                                               output_activation=None,
45 |                                               dropout_rates=net_dropout,
46 |                                               batch_norm=batch_norm)
47 |                                     for _ in range(num_tasks)])
48 |         self.compile(kwargs["optimizer"], kwargs["loss"], learning_rate)
49 |         self.reset_parameters()
50 |         self.model_to_device()
51 | 
52 |     def forward(self, inputs):
53 |         X = self.get_inputs(inputs)
54 |         feature_emb = self.embedding_layer(X)
55 |         bottom_output = self.bottom(feature_emb.flatten(start_dim=1)) # (?, bottom_hidden_units[-1])
56 |         tower_output = [self.tower[i](bottom_output) for i in range(self.num_tasks)]
57 |         y_pred = [self.output_activation[i](tower_output[i]) for i in range(self.num_tasks)]
58 |         return_dict = {}
59 |         labels = self.feature_map.labels
60 |         for i in range(self.num_tasks):
61 |             return_dict["{}_pred".format(labels[i])] = y_pred[i]
62 |         return return_dict


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .SharedBottom import SharedBottom
2 | from .MMoE import MMoE
3 | from .PLE import PLE 
4 | from .AITM import AITM 
5 | from .ESMM import ESMM 
6 | from .OMoE import OMoE 
7 | from .STEM import STEM


--------------------------------------------------------------------------------
/run_expid.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
 3 | import sys
 4 | import logging
 5 | # import fuxictr_version
 6 | from fuxictr import datasets
 7 | from datetime import datetime
 8 | from fuxictr.utils import load_config, set_logger, print_to_json, print_to_list
 9 | from fuxictr.features import FeatureMap
10 | from fuxictr.pytorch.torch_utils import seed_everything
11 | from fuxictr.pytorch.dataloaders import H5DataLoader 
12 | from fuxictr.preprocess import FeatureProcessor, build_dataset
13 | import models as model_zoo
14 | import gc
15 | import argparse
16 | import os
17 | from pathlib import Path
18 | from fuxictr.pytorch.models import MultiTaskModel, BaseModel
19 | 
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     ''' Usage: python run_expid.py --config {config_dir} --expid {experiment_id} --gpu {gpu_device_id}
24 |     '''
25 |     parser = argparse.ArgumentParser()
26 |     parser.add_argument('--config', type=str, default='./config/', help='The config directory.')
27 |     parser.add_argument('--expid', type=str, default='DeepFM_test', help='The experiment id to run.')
28 |     parser.add_argument('--gpu', type=int, default=-1, help='The gpu index, -1 for cpu')
29 |     args = vars(parser.parse_args())
30 |     
31 |     experiment_id = args['expid']
32 |     params = load_config(args['config'], experiment_id)
33 |     params['gpu'] = args['gpu']
34 |     set_logger(params)
35 |     logging.info("Params: " + print_to_json(params))
36 |     seed_everything(seed=params['seed'])
37 | 
38 |     data_dir = os.path.join(params['data_root'], params['dataset_id'])
39 |     feature_map_json = os.path.join(data_dir, "feature_map.json")
40 |     if params["data_format"] == "csv":
41 |         # Build feature_map and transform h5 data
42 |         feature_encoder = FeatureProcessor(**params)
43 |         params["train_data"], params["valid_data"], params["test_data"] = \
44 |             build_dataset(feature_encoder, **params)
45 |     
46 |     feature_map = FeatureMap(params['dataset_id'], data_dir)
47 |     feature_map.load(feature_map_json, params)
48 |     logging.info("Feature specs: " + print_to_json(feature_map.features))
49 |     if 'task_labels' in params:
50 |         if isinstance(params['task_labels'],list):
51 |             task_labels = params['task_labels']
52 |         else: 
53 |             task_labels = list(params['task_labels'])
54 |         feature_map.labels = task_labels
55 |         feature_map.set_column_index()
56 | 
57 |     model_class = getattr(model_zoo, params['model'])
58 |     model = model_class(feature_map, **params)
59 |     model.count_parameters() # print number of parameters used in model
60 | 
61 |     train_gen, valid_gen = H5DataLoader(feature_map, stage='train', **params).make_iterator()
62 |     model.fit(train_gen, validation_data=valid_gen, **params)
63 | 
64 |     logging.info('****** Validation evaluation ******')
65 |     valid_result = model.evaluate(valid_gen)
66 |     del train_gen, valid_gen
67 |     gc.collect()
68 |     
69 |     logging.info('******** Test evaluation ********')
70 |     test_gen = H5DataLoader(feature_map, stage='test', **params).make_iterator()
71 |     test_result = {}
72 |     if test_gen:
73 |       test_result = model.evaluate(test_gen)
74 |     
75 |     result_filename = os.path.join('results',Path(args['config']).name.replace(".yaml", "") + '.csv')
76 |     with open(result_filename, 'a+') as fw:
77 |         fw.write(' {},[command] python {},[exp_id] {},[dataset_id] {},[train] {},[val] {},[test] {}\n' \
78 |             .format(datetime.now().strftime('%Y%m%d-%H%M%S'), 
79 |                     ' '.join(sys.argv), experiment_id, params['dataset_id'],
80 |                     "N.A.", print_to_list(valid_result), print_to_list(test_result)))
81 | 


--------------------------------------------------------------------------------
/run_expid_list.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | 
18 | from datetime import datetime
19 | import gc
20 | import argparse
21 | import fuxictr_version
22 | from fuxictr import autotuner 
23 | 
24 | if __name__ == '__main__':
25 |     parser = argparse.ArgumentParser()
26 |     parser.add_argument('--config', type=str, default='../config/tuner_config_LR_avazu_01/', 
27 |                         help='The config file for para tuning.')
28 |     parser.add_argument('--gpu', nargs='+', default=[-1], help='The list of gpu indexes, -1 for cpu.')
29 |     args = vars(parser.parse_args())
30 |     gpu_list = args['gpu']
31 |     config_dir = args['config']
32 | 
33 |     # generate parameter space combinations
34 |     autotuner.grid_search(config_dir, gpu_list)
35 | 
36 | 


--------------------------------------------------------------------------------
/run_param_tuner.py:
--------------------------------------------------------------------------------
 1 | # =========================================================================
 2 | # Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved.
 3 | # 
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # =========================================================================
16 | 
17 | from datetime import datetime
18 | import gc
19 | import argparse
20 | # import fuxictr_version
21 | from fuxictr import autotuner 
22 | 
23 | if __name__ == '__main__':
24 |     parser = argparse.ArgumentParser()
25 |     parser.add_argument('--config', type=str, default='../config/tuner_config.yaml', 
26 |                         help='The config file for para tuning.')
27 |     parser.add_argument('--tag', type=str, default=None, help='Use the tag to determine which expid to run (e.g. 001 for the first expid).')
28 |     parser.add_argument('--gpu', nargs='+', default=[-1], help='The list of gpu indexes, -1 for cpu.')
29 |     args = vars(parser.parse_args())
30 |     gpu_list = args['gpu']
31 |     expid_tag = args['tag']
32 | 
33 |     # generate parameter space combinations
34 |     config_dir = autotuner.enumerate_params(args['config'])
35 |     autotuner.grid_search(config_dir, gpu_list, expid_tag)
36 | 
37 | 


--------------------------------------------------------------------------------