├── .gitattributes ├── .gitignore ├── .readthedocs.yml ├── .travis.yml ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── README_DeepCTR.md ├── assets └── cowclip.png ├── clip.py ├── data ├── .gitkeep └── criteo_kaggle │ └── criteo_sample.txt ├── data_utils.py ├── deepctr ├── __init__.py ├── contrib │ ├── __init__.py │ ├── rnn.py │ ├── rnn_v2.py │ └── utils.py ├── estimator │ ├── __init__.py │ ├── feature_column.py │ ├── inputs.py │ ├── models │ │ ├── __init__.py │ │ ├── afm.py │ │ ├── autoint.py │ │ ├── ccpm.py │ │ ├── dcn.py │ │ ├── deepfefm.py │ │ ├── deepfm.py │ │ ├── fibinet.py │ │ ├── fnn.py │ │ ├── fwfm.py │ │ ├── nfm.py │ │ ├── pnn.py │ │ ├── wdl.py │ │ └── xdeepfm.py │ └── utils.py ├── feature_column.py ├── inputs.py ├── layers │ ├── __init__.py │ ├── activation.py │ ├── core.py │ ├── interaction.py │ ├── normalization.py │ ├── sequence.py │ └── utils.py ├── models │ ├── __init__.py │ ├── afm.py │ ├── autoint.py │ ├── ccpm.py │ ├── dcn.py │ ├── dcnmix.py │ ├── deepfefm.py │ ├── deepfm.py │ ├── difm.py │ ├── fgcnn.py │ ├── fibinet.py │ ├── flen.py │ ├── fnn.py │ ├── fwfm.py │ ├── ifm.py │ ├── mlr.py │ ├── multitask │ │ ├── __init__.py │ │ ├── esmm.py │ │ ├── mmoe.py │ │ ├── ple.py │ │ └── sharedbottom.py │ ├── nfm.py │ ├── onn.py │ ├── pnn.py │ ├── sequence │ │ ├── __init__.py │ │ ├── bst.py │ │ ├── dien.py │ │ ├── din.py │ │ └── dsin.py │ ├── wdl.py │ ├── widefm.py │ └── xdeepfm.py └── utils.py ├── docs ├── Makefile ├── make.bat ├── pics │ ├── AFM.png │ ├── AutoInt.png │ ├── BST.png │ ├── CCPM.png │ ├── CIN.png │ ├── DCN-M.png │ ├── DCN-Mix.png │ ├── DCN.png │ ├── DIEN.png │ ├── DIFM.jpg │ ├── DIN.png │ ├── DSIN.png │ ├── DeepFEFM.jpg │ ├── DeepFM.png │ ├── FGCNN.png │ ├── FLEN.jpg │ ├── FNN.png │ ├── FiBiNET.png │ ├── IFM.jpg │ ├── InteractingLayer.png │ ├── MLR.png │ ├── NFM.png │ ├── ONN.png │ ├── PNN.png │ ├── WDL.png │ ├── code.png │ ├── criteo_sample.png │ ├── deepctrbot.png │ ├── fms.png │ ├── mlr1.png │ ├── mlrvsdnn.png │ ├── movielens_sample.png │ ├── movielens_sample_with_genres.png │ ├── multitaskmodels │ │ ├── ESMM.png │ │ ├── MMOE.png │ │ ├── PLE.png │ │ └── SharedBottom.png │ ├── weichennote.png │ └── xDeepFM.png ├── requirements.readthedocs.txt └── source │ ├── Estimators.rst │ ├── Examples.md │ ├── FAQ.md │ ├── Features.md │ ├── History.md │ ├── Layers.rst │ ├── Model_Methods.md │ ├── Models.rst │ ├── Quick-Start.md │ ├── conf.py │ ├── deepctr.contrib.rnn.rst │ ├── deepctr.contrib.rst │ ├── deepctr.contrib.utils.rst │ ├── deepctr.estimator.feature_column.rst │ ├── deepctr.estimator.inputs.rst │ ├── deepctr.estimator.models.afm.rst │ ├── deepctr.estimator.models.autoint.rst │ ├── deepctr.estimator.models.ccpm.rst │ ├── deepctr.estimator.models.dcn.rst │ ├── deepctr.estimator.models.deepfefm.rst │ ├── deepctr.estimator.models.deepfm.rst │ ├── deepctr.estimator.models.fibinet.rst │ ├── deepctr.estimator.models.fnn.rst │ ├── deepctr.estimator.models.fwfm.rst │ ├── deepctr.estimator.models.nfm.rst │ ├── deepctr.estimator.models.pnn.rst │ ├── deepctr.estimator.models.rst │ ├── deepctr.estimator.models.wdl.rst │ ├── deepctr.estimator.models.xdeepfm.rst │ ├── deepctr.estimator.rst │ ├── deepctr.estimator.utils.rst │ ├── deepctr.feature_column.rst │ ├── deepctr.inputs.rst │ ├── deepctr.layers.activation.rst │ ├── deepctr.layers.core.rst │ ├── deepctr.layers.interaction.rst │ ├── deepctr.layers.normalization.rst │ ├── deepctr.layers.rst │ ├── deepctr.layers.sequence.rst │ ├── deepctr.layers.utils.rst │ ├── deepctr.models.afm.rst │ ├── deepctr.models.autoint.rst │ ├── deepctr.models.ccpm.rst │ ├── deepctr.models.dcn.rst │ ├── deepctr.models.dcnmix.rst │ ├── deepctr.models.deepfefm.rst │ ├── deepctr.models.deepfm.rst │ ├── deepctr.models.deepfwfm.rst │ ├── deepctr.models.difm.rst │ ├── deepctr.models.fgcnn.rst │ ├── deepctr.models.fibinet.rst │ ├── deepctr.models.flen.rst │ ├── deepctr.models.fnn.rst │ ├── deepctr.models.ifm.rst │ ├── deepctr.models.mlr.rst │ ├── deepctr.models.multitask.esmm.rst │ ├── deepctr.models.multitask.mmoe.rst │ ├── deepctr.models.multitask.ple.rst │ ├── deepctr.models.multitask.sharedbottom.rst │ ├── deepctr.models.nfm.rst │ ├── deepctr.models.onn.rst │ ├── deepctr.models.pnn.rst │ ├── deepctr.models.rst │ ├── deepctr.models.sequence.bst.rst │ ├── deepctr.models.sequence.dien.rst │ ├── deepctr.models.sequence.din.rst │ ├── deepctr.models.sequence.dsin.rst │ ├── deepctr.models.wdl.rst │ ├── deepctr.models.xdeepfm.rst │ ├── deepctr.rst │ ├── deepctr.utils.rst │ ├── index.rst │ └── modules.rst ├── examples ├── avazu_sample.txt ├── census-income.sample ├── criteo_sample.te.tfrecords ├── criteo_sample.tr.tfrecords ├── criteo_sample.txt ├── gen_tfrecords.py ├── movielens_age_vocabulary.csv ├── movielens_sample.txt ├── run_all.sh ├── run_classification_criteo.py ├── run_classification_criteo_hash.py ├── run_classification_criteo_multi_gpu.py ├── run_dien.py ├── run_din.py ├── run_dsin.py ├── run_estimator_pandas_classification.py ├── run_estimator_tfrecord_classification.py ├── run_flen.py ├── run_mtl.py ├── run_multivalue_movielens.py ├── run_multivalue_movielens_hash.py ├── run_multivalue_movielens_vocab_hash.py └── run_regression_movielens.py ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── README.md ├── __init__.py ├── feature_test.py ├── layers │ ├── __init__.py │ ├── activations_test.py │ ├── core_test.py │ ├── interaction_test.py │ ├── normalization_test.py │ ├── sequence_test.py │ ├── utils_test.py │ └── vocabulary_example.csv ├── models │ ├── AFM_test.py │ ├── AutoInt_test.py │ ├── BST_test.py │ ├── CCPM_test.py │ ├── DCNMix_test.py │ ├── DCN_test.py │ ├── DIEN_test.py │ ├── DIFM_test.py │ ├── DIN_test.py │ ├── DSIN_test.py │ ├── DeepFEFM_test.py │ ├── DeepFM_test.py │ ├── FGCNN_test.py │ ├── FLEN_test.py │ ├── FNN_test.py │ ├── FiBiNET_test.py │ ├── FwFM_test.py │ ├── IFM_test.py │ ├── MLR_test.py │ ├── MTL_test.py │ ├── NFM_test.py │ ├── ONN_test.py │ ├── PNN_test.py │ ├── WDL_test.py │ ├── __init__.py │ └── xDeepFM_test.py ├── utils.py ├── utils_mtl.py └── utils_test.py ├── train.py └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # data 2 | *.feather 3 | !data/.gitkeep 4 | !data/criteo_kaggle/criteo_sample.txt 5 | data/criteo_kaggle/train.txt 6 | data/avazu 7 | data/taobao 8 | data/criteo_terabyte 9 | logs 10 | 11 | 12 | *.h5 13 | *.ipynb 14 | .pytest_cache/ 15 | .vscode/ 16 | tests/unused/* 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | .idea/ 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | env/ 28 | build/ 29 | develop-eggs/ 30 | dist/ 31 | downloads/ 32 | eggs/ 33 | .eggs/ 34 | lib/ 35 | lib64/ 36 | parts/ 37 | sdist/ 38 | var/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *,cover 62 | .hypothesis/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | 72 | # Flask instance folder 73 | instance/ 74 | 75 | # Scrapy stuff: 76 | .scrapy 77 | 78 | # Sphinx documentation 79 | docs/_build/ 80 | 81 | # PyBuilder 82 | target/ 83 | 84 | # IPython Notebook 85 | .ipynb_checkpoints 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # celery beat schedule file 91 | celerybeat-schedule 92 | 93 | # dotenv 94 | .env 95 | 96 | # virtualenv 97 | venv/ 98 | ENV/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # ========================= 107 | # Operating System Files 108 | # ========================= 109 | 110 | # OSX 111 | # ========================= 112 | 113 | .DS_Store 114 | .AppleDouble 115 | .LSOverride 116 | 117 | # Thumbnails 118 | ._* 119 | 120 | # Files that might appear in the root of a volume 121 | .DocumentRevisions-V100 122 | .fseventsd 123 | .Spotlight-V100 124 | .TemporaryItems 125 | .Trashes 126 | .VolumeIcon.icns 127 | 128 | # Directories potentially created on remote AFP share 129 | .AppleDB 130 | .AppleDesktop 131 | Network Trash Folder 132 | Temporary Items 133 | .apdisk 134 | 135 | # Windows 136 | # ========================= 137 | 138 | # Windows image file caches 139 | Thumbs.db 140 | ehthumbs.db 141 | 142 | # Folder config file 143 | Desktop.ini 144 | 145 | # Recycle Bin used on file shares 146 | $RECYCLE.BIN/ 147 | 148 | # Windows Installer files 149 | *.cab 150 | *.msi 151 | *.msm 152 | *.msp 153 | 154 | # Windows shortcuts 155 | *.lnk 156 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | build: 2 | image: latest 3 | 4 | python: 5 | version: 3.6 -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | #sudo: required 2 | #dist: trusty xenial 3 | language: python 4 | 5 | python: 6 | - "2.7" #time out 7 | #- "3.4" 8 | - "3.5" 9 | - "3.6" 10 | #- "3.7" 11 | 12 | env: 13 | # - TF_VERSION=1.13.1 14 | # - TF_VERSION=1.12.2 15 | - TF_VERSION=1.4.0 16 | #Not Support- TF_VERSION=1.7.0 17 | #Not Support- TF_VERSION=1.7.1 18 | #Not Support- TF_VERSION=1.8.0 19 | #- TF_VERSION=1.8.0 20 | # - TF_VERSION=1.11.0 21 | #- TF_VERSION=1.6.0 22 | - TF_VERSION=2.0.0b1 23 | #- TF_VERSION=1.13.2 24 | - TF_VERSION=1.14.0 25 | 26 | matrix: 27 | allow_failures: 28 | - python: "2.7" 29 | env: TF_VERSION=1.6.0 # to speed up 30 | - python: "2.7" 31 | env: TF_VERSION=2.0.0b1 32 | - python: "3.4" 33 | - python: "3.5" 34 | - python: "3.7" 35 | - env: TF_VERSION=1.5.0 #local is ok,but sometimes CI is failed 36 | - env: TF_VERSION=1.7.0 37 | - env: TF_VERSION=1.7.1 38 | - env: TF_VERSION=1.8.0 39 | - env: TF_VERSION=1.12.0 # too slow 40 | - env: TF_VERSION=1.13.1 # too slow 41 | - env: TF_VERSION=1.13.2 # too slow 42 | - env: TF_VERSION=1.14.0 # too slow 43 | 44 | fast_finish: true 45 | 46 | cache: pip 47 | # command to install dependencies 48 | install: 49 | - pip install -q pytest-cov==2.4.0 #>=2.4.0,<2.6 50 | - pip install -q python-coveralls 51 | - pip install -q codacy-coverage 52 | - pip install -q tensorflow==$TF_VERSION 53 | - pip install -q pandas 54 | - pip install -q packaging 55 | - pip install -e . 56 | # command to run tests 57 | script: 58 | - pytest --cov=deepctr 59 | 60 | notifications: 61 | recipients: 62 | - weichenswc@163.com 63 | 64 | on_success: change 65 | on_failure: change 66 | 67 | after_success: 68 | - coveralls 69 | - coverage xml 70 | - python-codacy-coverage -r coverage.xml 71 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | This project is under development and we need developers to participate in. 2 | # Join us 3 | If you 4 | 5 | - familiar with and interested in ctr prediction algorithms 6 | - familiar with tensorflow 7 | - have spare time to learn and develop 8 | - familiar with git 9 | 10 | please send a brief introduction of your background and experience to weichenswc@163.com, welcome to join us! 11 | 12 | # Creating a pull request 13 | 1. **Become a collaborator**: Send an email with introduction and your github account name to weichenswc@163.com and waiting for invitation to become a collaborator. 14 | 2. **Fork&Dev**: Fork your own branch(`dev_yourname`) in `DeepCTR` from the `master` branch for development.If the `master` is updated during the development process, remember to merge and update to `dev_yourname` regularly. 15 | 3. **Testing**: Test logical correctness and effect when finishing the code development of the `dev_yourname` branch. 16 | 4. **Pre-release** : After testing contact weichenswc@163.com for pre-release integration, usually your branch `dev_yourname` will be merged into `release` branch by squash merge. 17 | 5. **Release a new version**: After confirming that the change is no longer needed, `release` branch will be merged into `master` and a new python package will be released on pypi. 18 | 19 | # Discussions 20 | 21 | https://github.com/shenweichen/DeepCTR/discussions -------------------------------------------------------------------------------- /assets/cowclip.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/assets/cowclip.png -------------------------------------------------------------------------------- /clip.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def cow_clip(w, g, ratio=1, ids=None, cnts=None, min_w=0.03, const=False): 5 | if isinstance(g, tf.IndexedSlices): 6 | # FIXME: This part is not tested 7 | values = tf.convert_to_tensor(g.values) 8 | clipnorm = tf.norm(tf.gather(w, g.indices), axis=-1) 9 | else: 10 | values = g 11 | if const: 12 | clipnorm = tf.constant([min_w] * g.shape[0]) 13 | else: 14 | clipnorm = tf.norm(w, axis=-1) 15 | # bound weight norm by min_w 16 | clipnorm = tf.maximum(clipnorm, min_w) 17 | # scale by cnting 18 | cnts = tf.tensor_scatter_nd_update( 19 | tf.ones([clipnorm.shape[0]], dtype=tf.int32), 20 | tf.expand_dims(ids, -1), 21 | cnts, 22 | ) 23 | clipnorm = clipnorm * tf.cast(cnts, tf.float32) 24 | 25 | clip_t = ratio * clipnorm 26 | l2sum_row = tf.reduce_sum(values * values, axis=-1) 27 | pred = l2sum_row > 0 28 | l2sum_row_safe = tf.where(pred, l2sum_row, tf.ones_like(l2sum_row)) 29 | l2norm_row = tf.sqrt(l2sum_row_safe) 30 | intermediate = values * tf.expand_dims(clip_t, -1) 31 | g_clip = intermediate / tf.expand_dims(tf.maximum(l2norm_row, clip_t), -1) 32 | 33 | if isinstance(g, tf.IndexedSlices): 34 | return tf.IndexedSlices(g_clip, g.indices, g.dense_shape) 35 | else: 36 | return g_clip 37 | -------------------------------------------------------------------------------- /data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/data/.gitkeep -------------------------------------------------------------------------------- /deepctr/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import check_version 2 | 3 | __version__ = '0.9.0' 4 | check_version(__version__) 5 | -------------------------------------------------------------------------------- /deepctr/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/deepctr/contrib/__init__.py -------------------------------------------------------------------------------- /deepctr/estimator/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import * -------------------------------------------------------------------------------- /deepctr/estimator/feature_column.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.feature_column.feature_column import _EmbeddingColumn 3 | 4 | from .utils import LINEAR_SCOPE_NAME, variable_scope, get_collection, get_GraphKeys, input_layer, get_losses 5 | 6 | 7 | def linear_model(features, linear_feature_columns): 8 | if tf.__version__ >= '2.0.0': 9 | linear_logits = tf.compat.v1.feature_column.linear_model(features, linear_feature_columns) 10 | else: 11 | linear_logits = tf.feature_column.linear_model(features, linear_feature_columns) 12 | return linear_logits 13 | 14 | 15 | def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0): 16 | with variable_scope(LINEAR_SCOPE_NAME): 17 | if not linear_feature_columns: 18 | linear_logits = tf.Variable([[0.0]], name='bias_weights') 19 | else: 20 | 21 | linear_logits = linear_model(features, linear_feature_columns) 22 | 23 | if l2_reg_linear > 0: 24 | for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]: 25 | get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"), 26 | get_GraphKeys().REGULARIZATION_LOSSES) 27 | return linear_logits 28 | 29 | 30 | def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0): 31 | dense_value_list = [] 32 | sparse_emb_list = [] 33 | for feat in feature_columns: 34 | if is_embedding(feat): 35 | sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1) 36 | sparse_emb_list.append(sparse_emb) 37 | if l2_reg_embedding > 0: 38 | get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"), 39 | get_GraphKeys().REGULARIZATION_LOSSES) 40 | 41 | else: 42 | dense_value_list.append(input_layer(features, [feat])) 43 | 44 | return sparse_emb_list, dense_value_list 45 | 46 | 47 | def is_embedding(feature_column): 48 | try: 49 | from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn 50 | except ImportError: 51 | EmbeddingColumn = _EmbeddingColumn 52 | return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn)) 53 | -------------------------------------------------------------------------------- /deepctr/estimator/inputs.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10, 5 | num_threads=1): 6 | if label is not None: 7 | y = df[label] 8 | else: 9 | y = None 10 | if tf.__version__ >= "2.0.0": 11 | return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, 12 | num_epochs=num_epochs, 13 | shuffle=shuffle, 14 | queue_capacity=batch_size * queue_capacity_factor, 15 | num_threads=num_threads) 16 | 17 | return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs, 18 | shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor, 19 | num_threads=num_threads) 20 | 21 | 22 | def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8, 23 | shuffle_factor=10, prefetch_factor=1, 24 | ): 25 | def _parse_examples(serial_exmp): 26 | try: 27 | features = tf.parse_single_example(serial_exmp, features=feature_description) 28 | except AttributeError: 29 | features = tf.io.parse_single_example(serial_exmp, features=feature_description) 30 | if label is not None: 31 | labels = features.pop(label) 32 | return features, labels 33 | return features 34 | 35 | def input_fn(): 36 | dataset = tf.data.TFRecordDataset(filenames) 37 | dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls) 38 | if shuffle_factor > 0: 39 | dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor) 40 | 41 | dataset = dataset.repeat(num_epochs).batch(batch_size) 42 | 43 | if prefetch_factor > 0: 44 | dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor) 45 | try: 46 | iterator = dataset.make_one_shot_iterator() 47 | except AttributeError: 48 | iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) 49 | 50 | return iterator.get_next() 51 | 52 | return input_fn 53 | -------------------------------------------------------------------------------- /deepctr/estimator/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .afm import AFMEstimator 2 | from .autoint import AutoIntEstimator 3 | from .ccpm import CCPMEstimator 4 | from .dcn import DCNEstimator 5 | from .deepfm import DeepFMEstimator 6 | from .fwfm import FwFMEstimator 7 | from .fibinet import FiBiNETEstimator 8 | from .fnn import FNNEstimator 9 | from .nfm import NFMEstimator 10 | from .pnn import PNNEstimator 11 | from .wdl import WDLEstimator 12 | from .xdeepfm import xDeepFMEstimator 13 | from .deepfefm import DeepFEFMEstimator 14 | -------------------------------------------------------------------------------- /deepctr/estimator/models/afm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | Author: 5 | Weichen Shen, weichenswc@163.com 6 | 7 | Reference: 8 | [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. 9 | (https://arxiv.org/abs/1708.04617) 10 | 11 | """ 12 | import tensorflow as tf 13 | 14 | from ..feature_column import get_linear_logit, input_from_feature_columns 15 | from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope 16 | from ...layers.interaction import AFMLayer, FM 17 | from ...layers.utils import concat_func 18 | 19 | 20 | def AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=True, attention_factor=8, 21 | l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024, 22 | task='binary', model_dir=None, config=None, linear_optimizer='Ftrl', 23 | dnn_optimizer='Adagrad', training_chief_hooks=None): 24 | """Instantiates the Attentional Factorization Machine architecture. 25 | 26 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 27 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 28 | :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine** 29 | :param attention_factor: positive integer,units in attention net 30 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 31 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 32 | :param l2_reg_att: float. L2 regularizer strength applied to attention net 33 | :param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout. 34 | :param seed: integer ,to use as random seed. 35 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 36 | :param model_dir: Directory to save model parameters, graph and etc. This can 37 | also be used to load checkpoints from the directory into a estimator 38 | to continue training a previously saved model. 39 | :param config: tf.RunConfig object to configure the runtime settings. 40 | :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to 41 | the linear part of the model. Defaults to FTRL optimizer. 42 | :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to 43 | the deep part of the model. Defaults to Adagrad optimizer. 44 | :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to 45 | run on the chief worker during training. 46 | :return: A Tensorflow Estimator instance. 47 | 48 | """ 49 | 50 | def _model_fn(features, labels, mode, config): 51 | train_flag = (mode == tf.estimator.ModeKeys.TRAIN) 52 | 53 | linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear) 54 | 55 | with variable_scope(DNN_SCOPE_NAME): 56 | sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns, 57 | l2_reg_embedding=l2_reg_embedding) 58 | if use_attention: 59 | 60 | fm_logit = AFMLayer(attention_factor, l2_reg_att, afm_dropout, 61 | seed)(sparse_embedding_list, training=train_flag) 62 | else: 63 | fm_logit = FM()(concat_func(sparse_embedding_list, axis=1)) 64 | 65 | logits = linear_logits + fm_logit 66 | 67 | return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer, 68 | training_chief_hooks=training_chief_hooks) 69 | 70 | return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config) 71 | -------------------------------------------------------------------------------- /deepctr/layers/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from .activation import Dice 4 | from .core import DNN, LocalActivationUnit, PredictionLayer 5 | from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix, 6 | InnerProductLayer, InteractingLayer, 7 | OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction, 8 | FieldWiseBiInteraction, FwFMLayer, FEFMLayer) 9 | from .normalization import LayerNormalization 10 | from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM, 11 | KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer, 12 | Transformer, DynamicGRU,PositionEncoding) 13 | 14 | from .utils import NoMask, Hash, Linear, Add, combined_dnn_input, softmax, reduce_sum 15 | 16 | custom_objects = {'tf': tf, 17 | 'InnerProductLayer': InnerProductLayer, 18 | 'OutterProductLayer': OutterProductLayer, 19 | 'DNN': DNN, 20 | 'PredictionLayer': PredictionLayer, 21 | 'FM': FM, 22 | 'AFMLayer': AFMLayer, 23 | 'CrossNet': CrossNet, 24 | 'CrossNetMix': CrossNetMix, 25 | 'BiInteractionPooling': BiInteractionPooling, 26 | 'LocalActivationUnit': LocalActivationUnit, 27 | 'Dice': Dice, 28 | 'SequencePoolingLayer': SequencePoolingLayer, 29 | 'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer, 30 | 'CIN': CIN, 31 | 'InteractingLayer': InteractingLayer, 32 | 'LayerNormalization': LayerNormalization, 33 | 'BiLSTM': BiLSTM, 34 | 'Transformer': Transformer, 35 | 'NoMask': NoMask, 36 | 'BiasEncoding': BiasEncoding, 37 | 'KMaxPooling': KMaxPooling, 38 | 'FGCNNLayer': FGCNNLayer, 39 | 'Hash': Hash, 40 | 'Linear': Linear, 41 | 'DynamicGRU': DynamicGRU, 42 | 'SENETLayer': SENETLayer, 43 | 'BilinearInteraction': BilinearInteraction, 44 | 'WeightedSequenceLayer': WeightedSequenceLayer, 45 | 'Add': Add, 46 | 'FieldWiseBiInteraction': FieldWiseBiInteraction, 47 | 'FwFMLayer': FwFMLayer, 48 | 'softmax': softmax, 49 | 'FEFMLayer': FEFMLayer, 50 | 'reduce_sum': reduce_sum, 51 | 'PositionEncoding':PositionEncoding 52 | } 53 | -------------------------------------------------------------------------------- /deepctr/layers/activation.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | Author: 5 | Weichen Shen,weichenswc@163.com 6 | 7 | """ 8 | 9 | import tensorflow as tf 10 | from tensorflow.python.keras.initializers import Zeros 11 | from tensorflow.python.keras.layers import Layer 12 | 13 | try: 14 | unicode 15 | except NameError: 16 | unicode = str 17 | 18 | 19 | class Dice(Layer): 20 | """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. 21 | 22 | Input shape 23 | - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. 24 | 25 | Output shape 26 | - Same shape as the input. 27 | 28 | Arguments 29 | - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). 30 | 31 | - **epsilon** : Small float added to variance to avoid dividing by zero. 32 | 33 | References 34 | - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) 35 | """ 36 | 37 | def __init__(self, axis=-1, epsilon=1e-9, **kwargs): 38 | self.axis = axis 39 | self.epsilon = epsilon 40 | super(Dice, self).__init__(**kwargs) 41 | 42 | def build(self, input_shape): 43 | self.bn = tf.keras.layers.BatchNormalization( 44 | axis=self.axis, epsilon=self.epsilon, center=False, scale=False) 45 | self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( 46 | ), dtype=tf.float32, name='dice_alpha') # name='alpha_'+self.name 47 | super(Dice, self).build(input_shape) # Be sure to call this somewhere! 48 | self.uses_learning_phase = True 49 | 50 | def call(self, inputs, training=None, **kwargs): 51 | inputs_normed = self.bn(inputs, training=training) 52 | # tf.layers.batch_normalization( 53 | # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) 54 | x_p = tf.sigmoid(inputs_normed) 55 | return self.alphas * (1.0 - x_p) * inputs + x_p * inputs 56 | 57 | def compute_output_shape(self, input_shape): 58 | return input_shape 59 | 60 | def get_config(self, ): 61 | config = {'axis': self.axis, 'epsilon': self.epsilon} 62 | base_config = super(Dice, self).get_config() 63 | return dict(list(base_config.items()) + list(config.items())) 64 | 65 | 66 | def activation_layer(activation): 67 | if activation in ("dice", "Dice"): 68 | act_layer = Dice() 69 | elif isinstance(activation, (str, unicode)): 70 | act_layer = tf.keras.layers.Activation(activation) 71 | elif issubclass(activation, Layer): 72 | act_layer = activation() 73 | else: 74 | raise ValueError( 75 | "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation)) 76 | return act_layer 77 | -------------------------------------------------------------------------------- /deepctr/layers/normalization.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | Author: 5 | Weichen Shen,weichenswc@163.com 6 | 7 | """ 8 | 9 | from tensorflow.python.keras import backend as K 10 | from tensorflow.python.keras.initializers import Ones, Zeros 11 | from tensorflow.python.keras.layers import Layer 12 | 13 | 14 | class LayerNormalization(Layer): 15 | def __init__(self, axis=-1, eps=1e-9, center=True, 16 | scale=True, **kwargs): 17 | self.axis = axis 18 | self.eps = eps 19 | self.center = center 20 | self.scale = scale 21 | super(LayerNormalization, self).__init__(**kwargs) 22 | 23 | def build(self, input_shape): 24 | self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], 25 | initializer=Ones(), trainable=True) 26 | self.beta = self.add_weight(name='beta', shape=input_shape[-1:], 27 | initializer=Zeros(), trainable=True) 28 | super(LayerNormalization, self).build(input_shape) 29 | 30 | def call(self, inputs): 31 | mean = K.mean(inputs, axis=self.axis, keepdims=True) 32 | variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True) 33 | std = K.sqrt(variance + self.eps) 34 | outputs = (inputs - mean) / std 35 | if self.scale: 36 | outputs *= self.gamma 37 | if self.center: 38 | outputs += self.beta 39 | return outputs 40 | 41 | def compute_output_shape(self, input_shape): 42 | return input_shape 43 | 44 | def get_config(self, ): 45 | config = {'axis': self.axis, 'eps': self.eps, 'center': self.center, 'scale': self.scale} 46 | base_config = super(LayerNormalization, self).get_config() 47 | return dict(list(base_config.items()) + list(config.items())) 48 | -------------------------------------------------------------------------------- /deepctr/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .afm import AFM 2 | from .autoint import AutoInt 3 | from .ccpm import CCPM 4 | from .dcn import DCN 5 | from .dcnmix import DCNMix 6 | from .deepfefm import DeepFEFM 7 | from .deepfm import DeepFM 8 | from .difm import DIFM 9 | from .fgcnn import FGCNN 10 | from .fibinet import FiBiNET 11 | from .flen import FLEN 12 | from .fnn import FNN 13 | from .fwfm import FwFM 14 | from .ifm import IFM 15 | from .mlr import MLR 16 | from .multitask import SharedBottom, ESMM, MMOE, PLE 17 | from .nfm import NFM 18 | from .onn import ONN 19 | from .pnn import PNN 20 | from .sequence import DIN, DIEN, DSIN, BST 21 | from .wdl import WDL 22 | from .xdeepfm import xDeepFM 23 | from .widefm import wideFM 24 | 25 | __all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN", 26 | "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM", 27 | "SharedBottom", "ESMM", "MMOE", "PLE", "wideFM"] 28 | -------------------------------------------------------------------------------- /deepctr/models/afm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | Author: 5 | Weichen Shen, weichenswc@163.com 6 | 7 | Reference: 8 | [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. 9 | (https://arxiv.org/abs/1708.04617) 10 | 11 | """ 12 | import tensorflow as tf 13 | 14 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns 15 | from ..layers.core import PredictionLayer 16 | from ..layers.interaction import AFMLayer, FM 17 | from ..layers.utils import concat_func, add_func 18 | 19 | 20 | def AFM(linear_feature_columns, dnn_feature_columns, fm_group=DEFAULT_GROUP_NAME, use_attention=True, 21 | attention_factor=8, 22 | l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024, 23 | task='binary'): 24 | """Instantiates the Attentional Factorization Machine architecture. 25 | 26 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 27 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 28 | :param fm_group: list, group_name of features that will be used to do feature interactions. 29 | :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine** 30 | :param attention_factor: positive integer,units in attention net 31 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 32 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 33 | :param l2_reg_att: float. L2 regularizer strength applied to attention net 34 | :param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout. 35 | :param seed: integer ,to use as random seed. 36 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 37 | :return: A Keras model instance. 38 | """ 39 | 40 | features = build_input_features( 41 | linear_feature_columns + dnn_feature_columns) 42 | 43 | inputs_list = list(features.values()) 44 | 45 | group_embedding_dict, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, 46 | seed, support_dense=False, support_group=True) 47 | 48 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 49 | l2_reg=l2_reg_linear) 50 | 51 | if use_attention: 52 | fm_logit = add_func([AFMLayer(attention_factor, l2_reg_att, afm_dropout, 53 | seed)(list(v)) for k, v in group_embedding_dict.items() if k in fm_group]) 54 | else: 55 | fm_logit = add_func([FM()(concat_func(v, axis=1)) 56 | for k, v in group_embedding_dict.items() if k in fm_group]) 57 | 58 | final_logit = add_func([linear_logit, fm_logit]) 59 | output = PredictionLayer(task)(final_logit) 60 | 61 | model = tf.keras.models.Model(inputs=inputs_list, outputs=output) 62 | return model 63 | -------------------------------------------------------------------------------- /deepctr/models/deepfm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen, weichenswc@163.com 5 | 6 | Reference: 7 | [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247) 8 | 9 | """ 10 | 11 | from itertools import chain 12 | 13 | import tensorflow as tf 14 | 15 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns 16 | from ..layers.core import PredictionLayer, DNN 17 | from ..layers.interaction import FM 18 | from ..layers.utils import concat_func, add_func, combined_dnn_input 19 | 20 | 21 | def DeepFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64), 22 | l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, 23 | dnn_activation='relu', dnn_use_bn=False, task='binary', keras_model=tf.keras.models.Model): 24 | """Instantiates the DeepFM Network architecture. 25 | 26 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 27 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 28 | :param fm_group: list, group_name of features that will be used to do feature interactions. 29 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN 30 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 31 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 32 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 33 | :param seed: integer ,to use as random seed. 34 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 35 | :param dnn_activation: Activation function to use in DNN 36 | :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN 37 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 38 | :return: A Keras model instance. 39 | """ 40 | 41 | features = build_input_features( 42 | linear_feature_columns + dnn_feature_columns) 43 | 44 | inputs_list = list(features.values()) 45 | 46 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 47 | l2_reg=l2_reg_linear) 48 | 49 | group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, 50 | seed, support_group=True) 51 | 52 | fm_logit = add_func([FM()(concat_func(v, axis=1)) 53 | for k, v in group_embedding_dict.items() if k in fm_group]) 54 | 55 | dnn_input = combined_dnn_input(list(chain.from_iterable( 56 | group_embedding_dict.values())), dense_value_list) 57 | dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) 58 | dnn_logit = tf.keras.layers.Dense( 59 | 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output) 60 | 61 | final_logit = add_func([linear_logit, fm_logit, dnn_logit]) 62 | 63 | output = PredictionLayer(task)(final_logit) 64 | model = keras_model(inputs=inputs_list, outputs=output) 65 | return model 66 | -------------------------------------------------------------------------------- /deepctr/models/fibinet.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen, weichenswc@163.com 5 | 6 | Reference: 7 | [1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019. 8 | """ 9 | 10 | import tensorflow as tf 11 | 12 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns 13 | from ..layers.core import PredictionLayer, DNN 14 | from ..layers.interaction import SENETLayer, BilinearInteraction 15 | from ..layers.utils import concat_func, add_func, combined_dnn_input 16 | 17 | 18 | def FiBiNET(linear_feature_columns, dnn_feature_columns, bilinear_type='interaction', reduction_ratio=3, 19 | dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5, 20 | l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', 21 | task='binary'): 22 | """Instantiates the Feature Importance and Bilinear feature Interaction NETwork architecture. 23 | 24 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 25 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 26 | :param bilinear_type: str,bilinear function type used in Bilinear Interaction Layer,can be ``'all'`` , ``'each'`` or ``'interaction'`` 27 | :param reduction_ratio: integer in [1,inf), reduction ratio used in SENET Layer 28 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN 29 | :param l2_reg_linear: float. L2 regularizer strength applied to wide part 30 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 31 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 32 | :param seed: integer ,to use as random seed. 33 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 34 | :param dnn_activation: Activation function to use in DNN 35 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 36 | :return: A Keras model instance. 37 | """ 38 | 39 | features = build_input_features(linear_feature_columns + dnn_feature_columns) 40 | 41 | inputs_list = list(features.values()) 42 | 43 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 44 | l2_reg=l2_reg_linear) 45 | 46 | sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 47 | l2_reg_embedding, seed) 48 | 49 | senet_embedding_list = SENETLayer( 50 | reduction_ratio, seed)(sparse_embedding_list) 51 | 52 | senet_bilinear_out = BilinearInteraction( 53 | bilinear_type=bilinear_type, seed=seed)(senet_embedding_list) 54 | bilinear_out = BilinearInteraction( 55 | bilinear_type=bilinear_type, seed=seed)(sparse_embedding_list) 56 | 57 | dnn_input = combined_dnn_input( 58 | [tf.keras.layers.Flatten()(concat_func([senet_bilinear_out, bilinear_out]))], dense_value_list) 59 | dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) 60 | dnn_logit = tf.keras.layers.Dense( 61 | 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) 62 | 63 | final_logit = add_func([linear_logit, dnn_logit]) 64 | output = PredictionLayer(task)(final_logit) 65 | 66 | model = tf.keras.models.Model(inputs=inputs_list, outputs=output) 67 | return model 68 | -------------------------------------------------------------------------------- /deepctr/models/flen.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Tingyi Tan, 5636374@qq.com 5 | 6 | Reference: 7 | [1] Chen W, Zhan L, Ci Y, Lin C. FLEN: Leveraging Field for Scalable CTR Prediction . arXiv preprint arXiv:1911.04690, 2019.(https://arxiv.org/pdf/1911.04690) 8 | 9 | """ 10 | 11 | from itertools import chain 12 | 13 | import tensorflow as tf 14 | 15 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns 16 | from ..layers.core import PredictionLayer, DNN 17 | from ..layers.interaction import FieldWiseBiInteraction 18 | from ..layers.utils import concat_func, add_func, combined_dnn_input 19 | 20 | 21 | def FLEN(linear_feature_columns, 22 | dnn_feature_columns, 23 | dnn_hidden_units=(256, 128, 64), 24 | l2_reg_linear=0.00001, 25 | l2_reg_embedding=0.00001, 26 | l2_reg_dnn=0, 27 | seed=1024, 28 | dnn_dropout=0.0, 29 | dnn_activation='relu', 30 | dnn_use_bn=False, 31 | task='binary'): 32 | """Instantiates the FLEN Network architecture. 33 | 34 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 35 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 36 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net 37 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 38 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 39 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 40 | :param seed: integer ,to use as random seed. 41 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 42 | :param dnn_activation: Activation function to use in DNN 43 | :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN 44 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 45 | :return: A Keras model instance. 46 | """ 47 | 48 | features = build_input_features(linear_feature_columns + 49 | dnn_feature_columns) 50 | 51 | inputs_list = list(features.values()) 52 | 53 | group_embedding_dict, dense_value_list = input_from_feature_columns( 54 | features, 55 | dnn_feature_columns, 56 | l2_reg_embedding, 57 | seed, 58 | support_group=True) 59 | 60 | linear_logit = get_linear_logit(features, 61 | linear_feature_columns, 62 | seed=seed, 63 | prefix='linear', 64 | l2_reg=l2_reg_linear) 65 | 66 | fm_mf_out = FieldWiseBiInteraction(seed=seed)( 67 | [concat_func(v, axis=1) for k, v in group_embedding_dict.items()]) 68 | 69 | dnn_input = combined_dnn_input( 70 | list(chain.from_iterable(group_embedding_dict.values())), 71 | dense_value_list) 72 | dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) 73 | 74 | dnn_logit = tf.keras.layers.Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))( 75 | concat_func([fm_mf_out, dnn_output])) 76 | 77 | final_logit = add_func([linear_logit, dnn_logit]) 78 | output = PredictionLayer(task)(final_logit) 79 | 80 | model = tf.keras.models.Model(inputs=inputs_list, outputs=output) 81 | return model 82 | -------------------------------------------------------------------------------- /deepctr/models/fnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen, weichenswc@163.com 5 | 6 | Reference: 7 | [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf) 8 | """ 9 | import tensorflow as tf 10 | 11 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns 12 | from ..layers.core import PredictionLayer, DNN 13 | from ..layers.utils import add_func, combined_dnn_input 14 | 15 | 16 | def FNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), 17 | l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, 18 | dnn_activation='relu', task='binary'): 19 | """Instantiates the Factorization-supported Neural Network architecture. 20 | 21 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 22 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 23 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net 24 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 25 | :param l2_reg_linear: float. L2 regularizer strength applied to linear weight 26 | :param l2_reg_dnn: float . L2 regularizer strength applied to DNN 27 | :param seed: integer ,to use as random seed. 28 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 29 | :param dnn_activation: Activation function to use in DNN 30 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 31 | :return: A Keras model instance. 32 | """ 33 | features = build_input_features( 34 | linear_feature_columns + dnn_feature_columns) 35 | 36 | inputs_list = list(features.values()) 37 | 38 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 39 | l2_reg=l2_reg_linear) 40 | 41 | sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 42 | l2_reg_embedding, seed) 43 | 44 | dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) 45 | deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) 46 | dnn_logit = tf.keras.layers.Dense( 47 | 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out) 48 | final_logit = add_func([dnn_logit, linear_logit]) 49 | 50 | output = PredictionLayer(task)(final_logit) 51 | 52 | model = tf.keras.models.Model(inputs=inputs_list, 53 | outputs=output) 54 | return model 55 | -------------------------------------------------------------------------------- /deepctr/models/fwfm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Harshit Pande 5 | 6 | Reference: 7 | [1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising 8 | (https://arxiv.org/pdf/1806.03514.pdf) 9 | 10 | """ 11 | 12 | from itertools import chain 13 | 14 | import tensorflow as tf 15 | 16 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns 17 | from ..layers.core import PredictionLayer, DNN 18 | from ..layers.interaction import FwFMLayer 19 | from ..layers.utils import concat_func, add_func, combined_dnn_input 20 | 21 | 22 | def FwFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64), 23 | l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0, 24 | seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): 25 | """Instantiates the FwFM Network architecture. 26 | 27 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 28 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 29 | :param fm_group: list, group_name of features that will be used to do feature interactions. 30 | :param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units 31 | in each layer of DNN 32 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 33 | :param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters 34 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 35 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 36 | :param seed: integer ,to use as random seed. 37 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 38 | :param dnn_activation: Activation function to use in DNN 39 | :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN 40 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 41 | :return: A Keras model instance. 42 | """ 43 | 44 | features = build_input_features(linear_feature_columns + dnn_feature_columns) 45 | 46 | inputs_list = list(features.values()) 47 | 48 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 49 | l2_reg=l2_reg_linear) 50 | 51 | group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 52 | l2_reg_embedding, seed, 53 | support_group=True) 54 | 55 | fwfm_logit = add_func([FwFMLayer(num_fields=len(v), regularizer=l2_reg_field_strength) 56 | (concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group]) 57 | 58 | final_logit_components = [linear_logit, fwfm_logit] 59 | 60 | if dnn_hidden_units: 61 | dnn_input = combined_dnn_input(list(chain.from_iterable( 62 | group_embedding_dict.values())), dense_value_list) 63 | dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) 64 | dnn_logit = tf.keras.layers.Dense( 65 | 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output) 66 | final_logit_components.append(dnn_logit) 67 | 68 | final_logit = add_func(final_logit_components) 69 | 70 | output = PredictionLayer(task)(final_logit) 71 | model = tf.keras.models.Model(inputs=inputs_list, outputs=output) 72 | return model 73 | -------------------------------------------------------------------------------- /deepctr/models/mlr.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen, weichenswc@163.com 5 | 6 | Reference: 7 | [1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194) 8 | """ 9 | from tensorflow.python.keras.layers import Activation, dot 10 | from tensorflow.python.keras.models import Model 11 | 12 | from ..feature_column import build_input_features, get_linear_logit 13 | from ..layers.core import PredictionLayer 14 | from ..layers.utils import concat_func 15 | 16 | 17 | def MLR(region_feature_columns, base_feature_columns=None, region_num=4, 18 | l2_reg_linear=1e-5, seed=1024, task='binary', 19 | bias_feature_columns=None): 20 | """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model. 21 | 22 | :param region_feature_columns: An iterable containing all the features used by region part of the model. 23 | :param base_feature_columns: An iterable containing all the features used by base part of the model. 24 | :param region_num: integer > 1,indicate the piece number 25 | :param l2_reg_linear: float. L2 regularizer strength applied to weight 26 | :param seed: integer ,to use as random seed. 27 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 28 | :param bias_feature_columns: An iterable containing all the features used by bias part of the model. 29 | :return: A Keras model instance. 30 | """ 31 | 32 | if region_num <= 1: 33 | raise ValueError("region_num must > 1") 34 | 35 | if base_feature_columns is None or len(base_feature_columns) == 0: 36 | base_feature_columns = region_feature_columns 37 | 38 | if bias_feature_columns is None: 39 | bias_feature_columns = [] 40 | 41 | features = build_input_features(region_feature_columns + base_feature_columns + bias_feature_columns) 42 | 43 | inputs_list = list(features.values()) 44 | 45 | region_score = get_region_score(features, region_feature_columns, region_num, l2_reg_linear, seed) 46 | learner_score = get_learner_score(features, base_feature_columns, region_num, l2_reg_linear, seed, task=task) 47 | 48 | final_logit = dot([region_score, learner_score], axes=-1) 49 | 50 | if bias_feature_columns is not None and len(bias_feature_columns) > 0: 51 | bias_score = get_learner_score(features, bias_feature_columns, 1, l2_reg_linear, seed, prefix='bias_', 52 | task='binary') 53 | 54 | final_logit = dot([final_logit, bias_score], axes=-1) 55 | 56 | model = Model(inputs=inputs_list, outputs=final_logit) 57 | return model 58 | 59 | 60 | def get_region_score(features, feature_columns, region_number, l2_reg, seed, prefix='region_', seq_mask_zero=True): 61 | region_logit = concat_func([get_linear_logit(features, feature_columns, seed=seed + i, 62 | prefix=prefix + str(i + 1), l2_reg=l2_reg) for i in 63 | range(region_number)]) 64 | return Activation('softmax')(region_logit) 65 | 66 | 67 | def get_learner_score(features, feature_columns, region_number, l2_reg, seed, prefix='learner_', seq_mask_zero=True, 68 | task='binary'): 69 | region_score = [PredictionLayer(task=task, use_bias=False)( 70 | get_linear_logit(features, feature_columns, seed=seed + i, prefix=prefix + str(i + 1), 71 | l2_reg=l2_reg)) for i in 72 | range(region_number)] 73 | 74 | return concat_func(region_score) 75 | -------------------------------------------------------------------------------- /deepctr/models/multitask/__init__.py: -------------------------------------------------------------------------------- 1 | from .esmm import ESMM 2 | from .mmoe import MMOE 3 | from .ple import PLE 4 | from .sharedbottom import SharedBottom -------------------------------------------------------------------------------- /deepctr/models/multitask/esmm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: 3 | Mincai Lai, laimc@shanghaitech.edu.cn 4 | 5 | Weichen Shen, weichenswc@163.com 6 | 7 | Reference: 8 | [1] Ma X, Zhao L, Huang G, et al. Entire space multi-task model: An effective approach for estimating post-click conversion rate[C]//The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval. 2018.(https://arxiv.org/abs/1804.07931) 9 | """ 10 | 11 | import tensorflow as tf 12 | 13 | from ...feature_column import build_input_features, input_from_feature_columns 14 | from ...layers.core import PredictionLayer, DNN 15 | from ...layers.utils import combined_dnn_input 16 | 17 | 18 | def ESMM(dnn_feature_columns, tower_dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0, 19 | seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_types=('binary', 'binary'), 20 | task_names=('ctr', 'ctcvr')): 21 | """Instantiates the Entire Space Multi-Task Model architecture. 22 | 23 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 24 | :param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task DNN. 25 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector. 26 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN. 27 | :param seed: integer ,to use as random seed. 28 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 29 | :param dnn_activation: Activation function to use in DNN 30 | :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN 31 | :param task_types: str, indicating the loss of each tasks, ``"binary"`` for binary logloss or ``"regression"`` for regression loss. 32 | :param task_names: list of str, indicating the predict target of each tasks. default value is ['ctr', 'ctcvr'] 33 | 34 | :return: A Keras model instance. 35 | """ 36 | if len(task_names) != 2: 37 | raise ValueError("the length of task_names must be equal to 2") 38 | 39 | for task_type in task_types: 40 | if task_type != 'binary': 41 | raise ValueError("task must be binary in ESMM, {} is illegal".format(task_type)) 42 | 43 | features = build_input_features(dnn_feature_columns) 44 | inputs_list = list(features.values()) 45 | 46 | sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 47 | l2_reg_embedding, seed) 48 | 49 | dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) 50 | 51 | ctr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)( 52 | dnn_input) 53 | cvr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)( 54 | dnn_input) 55 | 56 | ctr_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(ctr_output) 57 | cvr_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(cvr_output) 58 | 59 | ctr_pred = PredictionLayer('binary', name=task_names[0])(ctr_logit) 60 | cvr_pred = PredictionLayer('binary')(cvr_logit) 61 | 62 | ctcvr_pred = tf.keras.layers.Multiply(name=task_names[1])([ctr_pred, cvr_pred]) # CTCVR = CTR * CVR 63 | 64 | model = tf.keras.models.Model(inputs=inputs_list, outputs=[ctr_pred, ctcvr_pred]) 65 | return model 66 | -------------------------------------------------------------------------------- /deepctr/models/multitask/sharedbottom.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: 3 | Mincai Lai, laimc@shanghaitech.edu.cn 4 | 5 | Weichen Shen, weichenswc@163.com 6 | 7 | Reference: 8 | [1] Ruder S. An overview of multi-task learning in deep neural networks[J]. arXiv preprint arXiv:1706.05098, 2017.(https://arxiv.org/pdf/1706.05098.pdf) 9 | """ 10 | 11 | import tensorflow as tf 12 | 13 | from ...feature_column import build_input_features, input_from_feature_columns 14 | from ...layers.core import PredictionLayer, DNN 15 | from ...layers.utils import combined_dnn_input 16 | 17 | 18 | def SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(256, 128), tower_dnn_hidden_units=(64,), 19 | l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', 20 | dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')): 21 | """Instantiates the SharedBottom multi-task learning Network architecture. 22 | 23 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 24 | :param bottom_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of shared bottom DNN. 25 | :param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN. 26 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 27 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 28 | :param seed: integer ,to use as random seed. 29 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 30 | :param dnn_activation: Activation function to use in DNN 31 | :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN 32 | :param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for binary logloss or ``"regression"`` for regression loss. e.g. ['binary', 'regression'] 33 | :param task_names: list of str, indicating the predict target of each tasks 34 | 35 | :return: A Keras model instance. 36 | """ 37 | num_tasks = len(task_names) 38 | if num_tasks <= 1: 39 | raise ValueError("num_tasks must be greater than 1") 40 | if len(task_types) != num_tasks: 41 | raise ValueError("num_tasks must be equal to the length of task_types") 42 | 43 | for task_type in task_types: 44 | if task_type not in ['binary', 'regression']: 45 | raise ValueError("task must be binary or regression, {} is illegal".format(task_type)) 46 | 47 | features = build_input_features(dnn_feature_columns) 48 | inputs_list = list(features.values()) 49 | 50 | sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 51 | l2_reg_embedding, seed) 52 | 53 | dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) 54 | shared_bottom_output = DNN(bottom_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)( 55 | dnn_input) 56 | 57 | tasks_output = [] 58 | for task_type, task_name in zip(task_types, task_names): 59 | tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed, 60 | name='tower_' + task_name)(shared_bottom_output) 61 | 62 | logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(tower_output) 63 | output = PredictionLayer(task_type, name=task_name)(logit) 64 | tasks_output.append(output) 65 | 66 | model = tf.keras.models.Model(inputs=inputs_list, outputs=tasks_output) 67 | return model 68 | -------------------------------------------------------------------------------- /deepctr/models/nfm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen, weichenswc@163.com 5 | 6 | Reference: 7 | [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027) 8 | """ 9 | import tensorflow as tf 10 | 11 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns 12 | from ..layers.core import PredictionLayer, DNN 13 | from ..layers.interaction import BiInteractionPooling 14 | from ..layers.utils import concat_func, add_func, combined_dnn_input 15 | 16 | 17 | def NFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), 18 | l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0, 19 | dnn_dropout=0, dnn_activation='relu', task='binary'): 20 | """Instantiates the Neural Factorization Machine architecture. 21 | 22 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 23 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 24 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net 25 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 26 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part. 27 | :param l2_reg_dnn: float . L2 regularizer strength applied to DNN 28 | :param seed: integer ,to use as random seed. 29 | :param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer. 30 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 31 | :param dnn_activation: Activation function to use in deep net 32 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 33 | :return: A Keras model instance. 34 | """ 35 | 36 | features = build_input_features( 37 | linear_feature_columns + dnn_feature_columns) 38 | 39 | inputs_list = list(features.values()) 40 | 41 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 42 | l2_reg=l2_reg_linear) 43 | 44 | sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 45 | l2_reg_embedding, seed) 46 | 47 | fm_input = concat_func(sparse_embedding_list, axis=1) 48 | bi_out = BiInteractionPooling()(fm_input) 49 | if bi_dropout: 50 | bi_out = tf.keras.layers.Dropout(bi_dropout)(bi_out, training=None) 51 | dnn_input = combined_dnn_input([bi_out], dense_value_list) 52 | dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) 53 | dnn_logit = tf.keras.layers.Dense( 54 | 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output) 55 | 56 | final_logit = add_func([linear_logit, dnn_logit]) 57 | 58 | output = PredictionLayer(task)(final_logit) 59 | 60 | model = tf.keras.models.Model(inputs=inputs_list, outputs=output) 61 | return model 62 | -------------------------------------------------------------------------------- /deepctr/models/pnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen, weichenswc@163.com 5 | 6 | Reference: 7 | [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) 8 | """ 9 | 10 | import tensorflow as tf 11 | 12 | from ..feature_column import build_input_features, input_from_feature_columns 13 | from ..layers.core import PredictionLayer, DNN 14 | from ..layers.interaction import InnerProductLayer, OutterProductLayer 15 | from ..layers.utils import concat_func, combined_dnn_input 16 | 17 | 18 | def PNN(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0, 19 | seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat', 20 | task='binary'): 21 | """Instantiates the Product-based Neural Network architecture. 22 | 23 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 24 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net 25 | :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector 26 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 27 | :param seed: integer ,to use as random seed. 28 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 29 | :param dnn_activation: Activation function to use in DNN 30 | :param use_inner: bool,whether use inner-product or not. 31 | :param use_outter: bool,whether use outter-product or not. 32 | :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'`` 33 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 34 | :return: A Keras model instance. 35 | """ 36 | 37 | if kernel_type not in ['mat', 'vec', 'num']: 38 | raise ValueError("kernel_type must be mat,vec or num") 39 | 40 | features = build_input_features(dnn_feature_columns) 41 | 42 | inputs_list = list(features.values()) 43 | 44 | sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 45 | l2_reg_embedding, seed) 46 | inner_product = tf.keras.layers.Flatten()( 47 | InnerProductLayer()(sparse_embedding_list)) 48 | outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list) 49 | 50 | # ipnn deep input 51 | linear_signal = tf.keras.layers.Reshape( 52 | [sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list)) 53 | 54 | if use_inner and use_outter: 55 | deep_input = tf.keras.layers.Concatenate()( 56 | [linear_signal, inner_product, outter_product]) 57 | elif use_inner: 58 | deep_input = tf.keras.layers.Concatenate()( 59 | [linear_signal, inner_product]) 60 | elif use_outter: 61 | deep_input = tf.keras.layers.Concatenate()( 62 | [linear_signal, outter_product]) 63 | else: 64 | deep_input = linear_signal 65 | 66 | dnn_input = combined_dnn_input([deep_input], dense_value_list) 67 | dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) 68 | dnn_logit = tf.keras.layers.Dense( 69 | 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) 70 | 71 | output = PredictionLayer(task)(dnn_logit) 72 | 73 | model = tf.keras.models.Model(inputs=inputs_list, 74 | outputs=output) 75 | return model 76 | -------------------------------------------------------------------------------- /deepctr/models/sequence/__init__.py: -------------------------------------------------------------------------------- 1 | from .bst import BST 2 | from .dien import DIEN 3 | from .din import DIN 4 | from .dsin import DSIN 5 | -------------------------------------------------------------------------------- /deepctr/models/wdl.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen, weichenswc@163.com 5 | 6 | Reference: 7 | [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf) 8 | """ 9 | 10 | import tensorflow as tf 11 | 12 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns 13 | from ..layers.core import PredictionLayer, DNN 14 | from ..layers.utils import add_func, combined_dnn_input 15 | 16 | 17 | def WDL(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, 18 | l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', 19 | task='binary', keras_model=tf.keras.models.Model): 20 | """Instantiates the Wide&Deep Learning architecture. 21 | 22 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 23 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 24 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN 25 | :param l2_reg_linear: float. L2 regularizer strength applied to wide part 26 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 27 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 28 | :param seed: integer ,to use as random seed. 29 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 30 | :param dnn_activation: Activation function to use in DNN 31 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 32 | :return: A Keras model instance. 33 | """ 34 | 35 | features = build_input_features( 36 | linear_feature_columns + dnn_feature_columns) 37 | 38 | inputs_list = list(features.values()) 39 | 40 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 41 | l2_reg=l2_reg_linear) 42 | 43 | sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, 44 | l2_reg_embedding, seed) 45 | 46 | dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) 47 | dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input) 48 | dnn_logit = tf.keras.layers.Dense( 49 | 1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out) 50 | 51 | final_logit = add_func([dnn_logit, linear_logit]) 52 | 53 | output = PredictionLayer(task)(final_logit) 54 | 55 | model = keras_model(inputs=inputs_list, outputs=output) 56 | return model 57 | -------------------------------------------------------------------------------- /deepctr/models/widefm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from itertools import chain 3 | 4 | import tensorflow as tf 5 | 6 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns 7 | from ..layers.core import PredictionLayer, DNN 8 | from ..layers.interaction import FM 9 | from ..layers.utils import concat_func, add_func, combined_dnn_input 10 | 11 | 12 | def wideFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64), 13 | l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, 14 | dnn_activation='relu', dnn_use_bn=False, task='binary', keras_model=tf.keras.models.Model): 15 | """Instantiates the DeepFM Network architecture. 16 | 17 | :param linear_feature_columns: An iterable containing all the features used by linear part of the model. 18 | :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. 19 | :param fm_group: list, group_name of features that will be used to do feature interactions. 20 | :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN 21 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 22 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 23 | :param l2_reg_dnn: float. L2 regularizer strength applied to DNN 24 | :param seed: integer ,to use as random seed. 25 | :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. 26 | :param dnn_activation: Activation function to use in DNN 27 | :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN 28 | :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss 29 | :return: A Keras model instance. 30 | """ 31 | 32 | features = build_input_features( 33 | linear_feature_columns + dnn_feature_columns) 34 | 35 | inputs_list = list(features.values()) 36 | 37 | linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear', 38 | l2_reg=l2_reg_linear) 39 | 40 | group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding, 41 | seed, support_group=True) 42 | 43 | fm_logit = add_func([FM()(concat_func(v, axis=1)) 44 | for k, v in group_embedding_dict.items() if k in fm_group]) 45 | final_logit = add_func([linear_logit, fm_logit]) 46 | 47 | output = PredictionLayer(task)(final_logit) 48 | model = keras_model(inputs=inputs_list, outputs=output) 49 | return model 50 | -------------------------------------------------------------------------------- /deepctr/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | Author: 5 | Weichen Shen,weichenswc@163.com 6 | 7 | """ 8 | 9 | import json 10 | import logging 11 | from threading import Thread 12 | 13 | import requests 14 | 15 | try: 16 | from packaging.version import parse 17 | except ImportError: 18 | from pip._vendor.packaging.version import parse 19 | 20 | 21 | def check_version(version): 22 | """Return version of package on pypi.python.org using json.""" 23 | 24 | def check(version): 25 | try: 26 | url_pattern = 'https://pypi.python.org/pypi/deepctr/json' 27 | req = requests.get(url_pattern) 28 | latest_version = parse('0') 29 | version = parse(version) 30 | if req.status_code == requests.codes.ok: 31 | j = json.loads(req.text.encode('utf-8')) 32 | releases = j.get('releases', []) 33 | for release in releases: 34 | ver = parse(release) 35 | if ver.is_prerelease or ver.is_postrelease: 36 | continue 37 | latest_version = max(latest_version, ver) 38 | if latest_version > version: 39 | logging.warning( 40 | '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( 41 | latest_version, version)) 42 | except: 43 | print("Please check the latest version manually on https://pypi.org/project/deepctr/#history") 44 | return 45 | 46 | Thread(target=check, args=(version,)).start() 47 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = DeepCTR 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=DeepCTR 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/pics/AFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/AFM.png -------------------------------------------------------------------------------- /docs/pics/AutoInt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/AutoInt.png -------------------------------------------------------------------------------- /docs/pics/BST.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/BST.png -------------------------------------------------------------------------------- /docs/pics/CCPM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/CCPM.png -------------------------------------------------------------------------------- /docs/pics/CIN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/CIN.png -------------------------------------------------------------------------------- /docs/pics/DCN-M.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DCN-M.png -------------------------------------------------------------------------------- /docs/pics/DCN-Mix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DCN-Mix.png -------------------------------------------------------------------------------- /docs/pics/DCN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DCN.png -------------------------------------------------------------------------------- /docs/pics/DIEN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DIEN.png -------------------------------------------------------------------------------- /docs/pics/DIFM.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DIFM.jpg -------------------------------------------------------------------------------- /docs/pics/DIN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DIN.png -------------------------------------------------------------------------------- /docs/pics/DSIN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DSIN.png -------------------------------------------------------------------------------- /docs/pics/DeepFEFM.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DeepFEFM.jpg -------------------------------------------------------------------------------- /docs/pics/DeepFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DeepFM.png -------------------------------------------------------------------------------- /docs/pics/FGCNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FGCNN.png -------------------------------------------------------------------------------- /docs/pics/FLEN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FLEN.jpg -------------------------------------------------------------------------------- /docs/pics/FNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FNN.png -------------------------------------------------------------------------------- /docs/pics/FiBiNET.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FiBiNET.png -------------------------------------------------------------------------------- /docs/pics/IFM.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/IFM.jpg -------------------------------------------------------------------------------- /docs/pics/InteractingLayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/InteractingLayer.png -------------------------------------------------------------------------------- /docs/pics/MLR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/MLR.png -------------------------------------------------------------------------------- /docs/pics/NFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/NFM.png -------------------------------------------------------------------------------- /docs/pics/ONN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/ONN.png -------------------------------------------------------------------------------- /docs/pics/PNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/PNN.png -------------------------------------------------------------------------------- /docs/pics/WDL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/WDL.png -------------------------------------------------------------------------------- /docs/pics/code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/code.png -------------------------------------------------------------------------------- /docs/pics/criteo_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/criteo_sample.png -------------------------------------------------------------------------------- /docs/pics/deepctrbot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/deepctrbot.png -------------------------------------------------------------------------------- /docs/pics/fms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/fms.png -------------------------------------------------------------------------------- /docs/pics/mlr1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/mlr1.png -------------------------------------------------------------------------------- /docs/pics/mlrvsdnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/mlrvsdnn.png -------------------------------------------------------------------------------- /docs/pics/movielens_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/movielens_sample.png -------------------------------------------------------------------------------- /docs/pics/movielens_sample_with_genres.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/movielens_sample_with_genres.png -------------------------------------------------------------------------------- /docs/pics/multitaskmodels/ESMM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/ESMM.png -------------------------------------------------------------------------------- /docs/pics/multitaskmodels/MMOE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/MMOE.png -------------------------------------------------------------------------------- /docs/pics/multitaskmodels/PLE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/PLE.png -------------------------------------------------------------------------------- /docs/pics/multitaskmodels/SharedBottom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/SharedBottom.png -------------------------------------------------------------------------------- /docs/pics/weichennote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/weichennote.png -------------------------------------------------------------------------------- /docs/pics/xDeepFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/xDeepFM.png -------------------------------------------------------------------------------- /docs/requirements.readthedocs.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.5.1 2 | recommonmark==0.7.1 -------------------------------------------------------------------------------- /docs/source/Estimators.rst: -------------------------------------------------------------------------------- 1 | DeepCTR Estimators API 2 | ====================== 3 | 4 | .. toctree:: 5 | CCPM 6 | FNN 7 | PNN 8 | WDL 9 | DeepFM 10 | NFM 11 | AFM 12 | DCN 13 | xDeepFM 14 | AutoInt 15 | FiBiNET 16 | -------------------------------------------------------------------------------- /docs/source/Layers.rst: -------------------------------------------------------------------------------- 1 | DeepCTR Layers API 2 | ====================== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 3 7 | :caption: API: 8 | 9 | Core Layers 10 | Interaction Layers 11 | Activation Layers 12 | Normalization Layers 13 | Sequence Layers -------------------------------------------------------------------------------- /docs/source/Models.rst: -------------------------------------------------------------------------------- 1 | DeepCTR Models API 2 | ====================== 3 | 4 | .. toctree:: 5 | Model Methods 6 | CCPM 7 | FNN 8 | PNN 9 | WDL 10 | DeepFM 11 | MLR 12 | NFM 13 | AFM 14 | DCN 15 | DCNMix 16 | DIN 17 | DIEN 18 | DSIN 19 | BST 20 | xDeepFM 21 | AutoInt 22 | ONN 23 | FGCNN 24 | FiBiNET 25 | FLEN 26 | IFM 27 | DIFM 28 | DeepFEFM 29 | SharedBottom 30 | ESMM 31 | MMOE 32 | PLE 33 | 34 | -------------------------------------------------------------------------------- /docs/source/deepctr.contrib.rnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.contrib.rnn module 2 | ========================== 3 | 4 | .. automodule:: deepctr.contrib.rnn 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.contrib.rst: -------------------------------------------------------------------------------- 1 | deepctr.contrib package 2 | ======================= 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.contrib.rnn 10 | deepctr.contrib.utils 11 | 12 | Module contents 13 | --------------- 14 | 15 | .. automodule:: deepctr.contrib 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/source/deepctr.contrib.utils.rst: -------------------------------------------------------------------------------- 1 | deepctr.contrib.utils module 2 | ============================ 3 | 4 | .. automodule:: deepctr.contrib.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.feature_column.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.feature\_column module 2 | ======================================== 3 | 4 | .. automodule:: deepctr.estimator.feature_column 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.inputs.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.inputs module 2 | =============================== 3 | 4 | .. automodule:: deepctr.estimator.inputs 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.afm.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.afm module 2 | =================================== 3 | 4 | .. automodule:: deepctr.estimator.models.afm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.autoint.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.autoint module 2 | ======================================= 3 | 4 | .. automodule:: deepctr.estimator.models.autoint 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.ccpm.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.ccpm module 2 | ==================================== 3 | 4 | .. automodule:: deepctr.estimator.models.ccpm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.dcn.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.dcn module 2 | =================================== 3 | 4 | .. automodule:: deepctr.estimator.models.dcn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.deepfefm.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.deepfefm module 2 | ====================================== 3 | 4 | .. automodule:: deepctr.estimator.models.deepfefm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.deepfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.deepfm module 2 | ====================================== 3 | 4 | .. automodule:: deepctr.estimator.models.deepfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.fibinet.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.fibinet module 2 | ======================================= 3 | 4 | .. automodule:: deepctr.estimator.models.fibinet 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.fnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.fnn module 2 | =================================== 3 | 4 | .. automodule:: deepctr.estimator.models.fnn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.fwfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.fwfm module 2 | ======================================== 3 | 4 | .. automodule:: deepctr.estimator.models.fwfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.nfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.nfm module 2 | =================================== 3 | 4 | .. automodule:: deepctr.estimator.models.nfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.pnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.pnn module 2 | =================================== 3 | 4 | .. automodule:: deepctr.estimator.models.pnn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models package 2 | ================================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.estimator.models.afm 10 | deepctr.estimator.models.autoint 11 | deepctr.estimator.models.ccpm 12 | deepctr.estimator.models.dcn 13 | deepctr.estimator.models.deepfm 14 | deepctr.estimator.models.deepfwfm 15 | deepctr.estimator.models.fibinet 16 | deepctr.estimator.models.fnn 17 | deepctr.estimator.models.nfm 18 | deepctr.estimator.models.pnn 19 | deepctr.estimator.models.wdl 20 | deepctr.estimator.models.xdeepfm 21 | 22 | Module contents 23 | --------------- 24 | 25 | .. automodule:: deepctr.estimator.models 26 | :members: 27 | :undoc-members: 28 | :show-inheritance: 29 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.wdl.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.wdl module 2 | =================================== 3 | 4 | .. automodule:: deepctr.estimator.models.wdl 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.models.xdeepfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.models.xdeepfm module 2 | ======================================= 3 | 4 | .. automodule:: deepctr.estimator.models.xdeepfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator package 2 | ========================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.estimator.models 10 | 11 | Submodules 12 | ---------- 13 | 14 | .. toctree:: 15 | 16 | deepctr.estimator.feature_column 17 | deepctr.estimator.inputs 18 | deepctr.estimator.utils 19 | 20 | Module contents 21 | --------------- 22 | 23 | .. automodule:: deepctr.estimator 24 | :members: 25 | :undoc-members: 26 | :show-inheritance: 27 | -------------------------------------------------------------------------------- /docs/source/deepctr.estimator.utils.rst: -------------------------------------------------------------------------------- 1 | deepctr.estimator.utils module 2 | ============================== 3 | 4 | .. automodule:: deepctr.estimator.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.feature_column.rst: -------------------------------------------------------------------------------- 1 | deepctr.feature\_column module 2 | ============================== 3 | 4 | .. automodule:: deepctr.feature_column 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.inputs.rst: -------------------------------------------------------------------------------- 1 | deepctr.inputs module 2 | ===================== 3 | 4 | .. automodule:: deepctr.inputs 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.activation.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers.activation module 2 | ================================ 3 | 4 | .. automodule:: deepctr.layers.activation 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.core.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers.core module 2 | ========================== 3 | 4 | .. automodule:: deepctr.layers.core 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.interaction.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers.interaction module 2 | ================================= 3 | 4 | .. automodule:: deepctr.layers.interaction 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.normalization.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers.normalization module 2 | =================================== 3 | 4 | .. automodule:: deepctr.layers.normalization 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.layers.activation 10 | deepctr.layers.core 11 | deepctr.layers.interaction 12 | deepctr.layers.normalization 13 | deepctr.layers.sequence 14 | deepctr.layers.utils 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: deepctr.layers 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.sequence.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers.sequence module 2 | ============================== 3 | 4 | .. automodule:: deepctr.layers.sequence 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.utils.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers.utils module 2 | =========================== 3 | 4 | .. automodule:: deepctr.layers.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.afm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.afm module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.afm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.autoint.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.autoint module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.autoint 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.ccpm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.ccpm module 2 | ========================== 3 | 4 | .. automodule:: deepctr.models.ccpm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.dcn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.dcn module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.dcn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.dcnmix.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.dcnmix module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.dcnmix 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.deepfefm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.deepfefm module 2 | ============================== 3 | 4 | .. automodule:: deepctr.models.deepfefm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.deepfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.deepfm module 2 | ============================ 3 | 4 | .. automodule:: deepctr.models.deepfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.deepfwfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.deepfwfm module 2 | ============================== 3 | 4 | .. automodule:: deepctr.models.deepfwfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.difm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.difm module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.difm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.fgcnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.fgcnn module 2 | =========================== 3 | 4 | .. automodule:: deepctr.models.fgcnn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.fibinet.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.fibinet module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.fibinet 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.flen.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.flen module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.flen 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.fnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.fnn module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.fnn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.ifm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.ifm module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.ifm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.mlr.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.mlr module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.mlr 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.multitask.esmm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.multitask.esmm module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.multitask.esmm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.multitask.mmoe.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.multitask.mmoe module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.multitask.mmoe 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.multitask.ple.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.multitask.ple module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.multitask.ple 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.multitask.sharedbottom.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.multitask.sharedbottom module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.multitask.sharedbottom 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.nfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.nfm module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.nfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.onn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.onn module 2 | ========================== 3 | 4 | .. automodule:: deepctr.models.onn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.pnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.pnn module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.pnn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.rst: -------------------------------------------------------------------------------- 1 | deepctr.models package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.models.afm 10 | deepctr.models.autoint 11 | deepctr.models.ccpm 12 | deepctr.models.dcn 13 | deepctr.models.dcnmix 14 | deepctr.models.deepfm 15 | deepctr.models.dien 16 | deepctr.models.din 17 | deepctr.models.dsin 18 | deepctr.models.fgcnn 19 | deepctr.models.fibinet 20 | deepctr.models.fnn 21 | deepctr.models.mlr 22 | deepctr.models.onn 23 | deepctr.models.nfm 24 | deepctr.models.pnn 25 | deepctr.models.wdl 26 | deepctr.models.xdeepfm 27 | deepctr.models.flen 28 | deepctr.models.ifm 29 | deepctr.models.difm 30 | deepctr.models.deepfefm 31 | deepctr.models.multitask.sharedbottom 32 | deepctr.models.multitask.esmm 33 | deepctr.models.multitask.mmoe 34 | deepctr.models.multitask.ple 35 | 36 | 37 | Module contents 38 | --------------- 39 | 40 | .. automodule:: deepctr.models 41 | :members: 42 | :undoc-members: 43 | :show-inheritance: 44 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.sequence.bst.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.sequence.bst module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.sequence.bst 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.sequence.dien.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.sequence.dien module 2 | ========================== 3 | 4 | .. automodule:: deepctr.models.sequence.dien 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.sequence.din.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.sequence.din module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.sequence.din 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.sequence.dsin.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.sequence.dsin module 2 | ========================== 3 | 4 | .. automodule:: deepctr.models.sequence.dsin 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.wdl.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.wdl module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.wdl 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.xdeepfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.xdeepfm module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.xdeepfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.rst: -------------------------------------------------------------------------------- 1 | deepctr package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.contrib 10 | deepctr.layers 11 | deepctr.models 12 | 13 | Submodules 14 | ---------- 15 | 16 | .. toctree:: 17 | 18 | deepctr.inputs 19 | deepctr.utils 20 | 21 | Module contents 22 | --------------- 23 | 24 | .. automodule:: deepctr 25 | :members: 26 | :undoc-members: 27 | :show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/source/deepctr.utils.rst: -------------------------------------------------------------------------------- 1 | deepctr.utils module 2 | ==================== 3 | 4 | .. automodule:: deepctr.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. DeepCTR documentation master file, created by 2 | sphinx-quickstart on Fri Nov 23 21:08:54 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to DeepCTR's documentation! 7 | =================================== 8 | 9 | |Downloads|_ |Stars|_ |Forks|_ |PyPii|_ |Issues|_ |Chat|_ 10 | 11 | .. |Downloads| image:: https://pepy.tech/badge/deepctr 12 | .. _Downloads: https://pepy.tech/project/deepctr 13 | 14 | .. |Stars| image:: https://img.shields.io/github/stars/shenweichen/deepctr.svg 15 | .. _Stars: https://github.com/shenweichen/DeepCTR 16 | 17 | .. |Forks| image:: https://img.shields.io/github/forks/shenweichen/deepctr.svg 18 | .. _Forks: https://github.com/shenweichen/DeepCTR/fork 19 | 20 | .. |PyPii| image:: https://img.shields.io/pypi/v/deepctr.svg 21 | .. _PyPii: https://pypi.org/project/deepctr 22 | 23 | .. |Issues| image:: https://img.shields.io/github/issues/shenweichen/deepctr.svg 24 | .. _Issues: https://github.com/shenweichen/deepctr/issues 25 | 26 | .. |Chat| image:: https://img.shields.io/badge/chat-wechat-brightgreen?style=flat 27 | .. _Chat: ./#disscussiongroup 28 | 29 | DeepCTR is a **Easy-to-use** , **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer which can be used to easily build custom models.You can use any complex model with ``model.fit()`` and ``model.predict()``. 30 | 31 | - Provide ``tf.keras.Model`` like interface for **quick experiment**. `example `_ 32 | - Provide ``tensorflow estimator`` interface for **large scale data** and **distributed training**. `example `_ 33 | - It is compatible with both ``tf 1.x`` and ``tf 2.x``. 34 | 35 | Let's `Get Started! <./Quick-Start.html>`_ (`Chinese Introduction `_) 36 | 37 | You can read the latest code and related projects 38 | 39 | - DeepCTR: https://github.com/shenweichen/DeepCTR 40 | - DeepMatch: https://github.com/shenweichen/DeepMatch 41 | - DeepCTR-Torch: https://github.com/shenweichen/DeepCTR-Torch 42 | 43 | News 44 | ----- 45 | 09/03/2021 : Add multitask learning models: `SharedBottom <./Features.html#sharedbottom>`_ , `ESMM <./Features.html#esmm-entire-space-multi-task-model>`_ , `MMOE <./Features.html#mmoe-multi-gate-mixture-of-experts>`_ , `PLE <./Features.html#ple-progressive-layered-extraction>`_ . `running example <./Examples.html#multitask-learning-mmoe>`_ `Changelog `_ 46 | 47 | 07/18/2021 : Support pre-defined key-value vocabulary in `Hash` Layer. `example <./Examples.html#hash-layer-with-pre-defined-key-value-vocabulary>`_ `Changelog `_ 48 | 49 | 06/14/2021 : Add `IFM <./Features.html#ifm-input-aware-factorization-machine>`_ , `DIFM <./Features.html#difm-dual-input-aware-factorization-machine>`_ and `DeepFEFM <./Features.html#deepfefm-deep-field-embedded-factorization-machine>`_ . `Changelog `_ 50 | 51 | DisscussionGroup 52 | ----------------------- 53 | 54 | `Discussions `_ 公众号:**浅梦学习笔记** wechat ID: **deepctrbot** 55 | 56 | .. image:: ../pics/code.png 57 | 58 | .. toctree:: 59 | :maxdepth: 2 60 | :caption: Home: 61 | 62 | Quick-Start 63 | Features 64 | Examples 65 | FAQ 66 | History 67 | 68 | .. toctree:: 69 | :maxdepth: 3 70 | :caption: API: 71 | 72 | Models 73 | Estimators 74 | Layers 75 | 76 | 77 | 78 | 79 | Indices and tables 80 | ================== 81 | 82 | * :ref:`genindex` 83 | * :ref:`modindex` 84 | * :ref:`search` -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | deepctr 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | deepctr 8 | -------------------------------------------------------------------------------- /examples/criteo_sample.te.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/examples/criteo_sample.te.tfrecords -------------------------------------------------------------------------------- /examples/criteo_sample.tr.tfrecords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/examples/criteo_sample.tr.tfrecords -------------------------------------------------------------------------------- /examples/gen_tfrecords.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def make_example(line, sparse_feature_name, dense_feature_name, label_name): 4 | features = {feat: tf.train.Feature(int64_list=tf.train.Int64List(value=[int(line[1][feat])])) for feat in 5 | sparse_feature_name} 6 | features.update( 7 | {feat: tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][feat]])) for feat in dense_feature_name}) 8 | features[label_name] = tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][label_name]])) 9 | return tf.train.Example(features=tf.train.Features(feature=features)) 10 | 11 | 12 | def write_tfrecord(filename, df, sparse_feature_names, dense_feature_names, label_name): 13 | writer = tf.python_io.TFRecordWriter(filename) 14 | for line in df.iterrows(): 15 | ex = make_example(line, sparse_feature_names, dense_feature_names, label_name) 16 | writer.write(ex.SerializeToString()) 17 | writer.close() 18 | 19 | # write_tfrecord('./criteo_sample.tr.tfrecords',train,sparse_features,dense_features,'label') 20 | # write_tfrecord('./criteo_sample.te.tfrecords',test,sparse_features,dense_features,'label') 21 | -------------------------------------------------------------------------------- /examples/movielens_age_vocabulary.csv: -------------------------------------------------------------------------------- 1 | 1,1 2 | 2,18 3 | 3,25 4 | 4,35 5 | 5,45 6 | 6,50 7 | 7,56 8 | -------------------------------------------------------------------------------- /examples/run_all.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | function run_py(){ 4 | 5 | code_path=./ 6 | for file in $(ls) 7 | do 8 | if [[ $file =~ .py ]] 9 | then 10 | python $code_path$file 11 | if [ $? -eq 0 ] 12 | then 13 | echo run $code_path$file succeed in $python_version 14 | else 15 | echo run $code_path$file failed in $python_version 16 | exit -1 17 | fi 18 | fi 19 | done 20 | 21 | 22 | } 23 | 24 | ## python3 25 | python_version=python3 26 | source activate base 27 | cd .. 28 | python setup.py install 29 | cd ./examples 30 | run_py 31 | 32 | #python2 33 | python_version=python2 34 | source activate py27 35 | cd .. 36 | python setup.py install 37 | cd ./examples 38 | run_py 39 | echo "all examples run succeed in python2.7" 40 | 41 | 42 | echo "all examples run succeed in python3.6" 43 | 44 | echo "all examples run succeed in python2.7 and python3.6" -------------------------------------------------------------------------------- /examples/run_classification_criteo.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import log_loss, roc_auc_score 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler 5 | 6 | from deepctr.models import DeepFM 7 | from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names 8 | 9 | if __name__ == "__main__": 10 | data = pd.read_csv('./criteo_sample.txt') 11 | 12 | sparse_features = ['C' + str(i) for i in range(1, 27)] 13 | dense_features = ['I' + str(i) for i in range(1, 14)] 14 | 15 | data[sparse_features] = data[sparse_features].fillna('-1', ) 16 | data[dense_features] = data[dense_features].fillna(0, ) 17 | target = ['label'] 18 | 19 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 20 | for feat in sparse_features: 21 | lbe = LabelEncoder() 22 | data[feat] = lbe.fit_transform(data[feat]) 23 | mms = MinMaxScaler(feature_range=(0, 1)) 24 | data[dense_features] = mms.fit_transform(data[dense_features]) 25 | 26 | # 2.count #unique features for each sparse field,and record dense feature field name 27 | 28 | fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4) 29 | for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) 30 | for feat in dense_features] 31 | 32 | dnn_feature_columns = fixlen_feature_columns 33 | linear_feature_columns = fixlen_feature_columns 34 | 35 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) 36 | 37 | # 3.generate input data for model 38 | 39 | train, test = train_test_split(data, test_size=0.2, random_state=2020) 40 | train_model_input = {name: train[name] for name in feature_names} 41 | test_model_input = {name: test[name] for name in feature_names} 42 | 43 | # 4.Define Model,train,predict and evaluate 44 | model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary') 45 | model.compile("adam", "binary_crossentropy", 46 | metrics=['binary_crossentropy'], ) 47 | 48 | history = model.fit(train_model_input, train[target].values, 49 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 50 | pred_ans = model.predict(test_model_input, batch_size=256) 51 | print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) 52 | print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) 53 | -------------------------------------------------------------------------------- /examples/run_classification_criteo_hash.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import log_loss, roc_auc_score 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.preprocessing import MinMaxScaler 5 | 6 | from deepctr.models import DeepFM 7 | from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names 8 | 9 | if __name__ == "__main__": 10 | data = pd.read_csv('./criteo_sample.txt') 11 | 12 | sparse_features = ['C' + str(i) for i in range(1, 27)] 13 | dense_features = ['I' + str(i) for i in range(1, 14)] 14 | 15 | data[sparse_features] = data[sparse_features].fillna('-1', ) 16 | data[dense_features] = data[dense_features].fillna(0, ) 17 | target = ['label'] 18 | 19 | # 1.do simple Transformation for dense features 20 | mms = MinMaxScaler(feature_range=(0, 1)) 21 | data[dense_features] = mms.fit_transform(data[dense_features]) 22 | 23 | # 2.set hashing space for each sparse field,and record dense feature field name 24 | 25 | fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000,embedding_dim=4, use_hash=True, dtype='string') # since the input is string 26 | for feat in sparse_features] + [DenseFeat(feat, 1, ) 27 | for feat in dense_features] 28 | 29 | linear_feature_columns = fixlen_feature_columns 30 | dnn_feature_columns = fixlen_feature_columns 31 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, ) 32 | 33 | # 3.generate input data for model 34 | 35 | train, test = train_test_split(data, test_size=0.2, random_state=2020) 36 | 37 | train_model_input = {name:train[name] for name in feature_names} 38 | test_model_input = {name:test[name] for name in feature_names} 39 | 40 | 41 | # 4.Define Model,train,predict and evaluate 42 | model = DeepFM(linear_feature_columns,dnn_feature_columns, task='binary') 43 | model.compile("adam", "binary_crossentropy", 44 | metrics=['binary_crossentropy'], ) 45 | 46 | history = model.fit(train_model_input, train[target].values, 47 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 48 | pred_ans = model.predict(test_model_input, batch_size=256) 49 | print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) 50 | print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) 51 | -------------------------------------------------------------------------------- /examples/run_classification_criteo_multi_gpu.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import log_loss, roc_auc_score 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler 5 | from tensorflow.python.keras.utils import multi_gpu_model 6 | 7 | from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names 8 | from deepctr.models import DeepFM 9 | 10 | if __name__ == "__main__": 11 | data = pd.read_csv('./criteo_sample.txt') 12 | 13 | sparse_features = ['C' + str(i) for i in range(1, 27)] 14 | dense_features = ['I' + str(i) for i in range(1, 14)] 15 | 16 | data[sparse_features] = data[sparse_features].fillna('-1', ) 17 | data[dense_features] = data[dense_features].fillna(0, ) 18 | target = ['label'] 19 | 20 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 21 | for feat in sparse_features: 22 | lbe = LabelEncoder() 23 | data[feat] = lbe.fit_transform(data[feat]) 24 | mms = MinMaxScaler(feature_range=(0, 1)) 25 | data[dense_features] = mms.fit_transform(data[dense_features]) 26 | 27 | # 2.count #unique features for each sparse field,and record dense feature field name 28 | 29 | fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4) 30 | for feat in sparse_features] + [DenseFeat(feat, 1, ) 31 | for feat in dense_features] 32 | 33 | dnn_feature_columns = fixlen_feature_columns 34 | linear_feature_columns = fixlen_feature_columns 35 | 36 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) 37 | 38 | # 3.generate input data for model 39 | 40 | train, test = train_test_split(data, test_size=0.2, random_state=2020) 41 | 42 | train_model_input = {name: train[name] for name in feature_names} 43 | test_model_input = {name: test[name] for name in feature_names} 44 | 45 | # 4.Define Model,train,predict and evaluate 46 | model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary') 47 | model = multi_gpu_model(model, gpus=2) 48 | 49 | model.compile("adam", "binary_crossentropy", 50 | metrics=['binary_crossentropy'], ) 51 | 52 | history = model.fit(train_model_input, train[target].values, 53 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 54 | pred_ans = model.predict(test_model_input, batch_size=256) 55 | print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) 56 | print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) 57 | -------------------------------------------------------------------------------- /examples/run_dien.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names 5 | from deepctr.models import DIEN 6 | 7 | 8 | def get_xy_fd(use_neg=False, hash_flag=False): 9 | feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag), 10 | SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag), 11 | SparseFeat('item_id', 3 + 1, embedding_dim=8, use_hash=hash_flag), 12 | SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag), 13 | DenseFeat('pay_score', 1)] 14 | 15 | feature_columns += [ 16 | VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), 17 | maxlen=4, length_name="seq_length"), 18 | VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, 19 | length_name="seq_length")] 20 | 21 | behavior_feature_list = ["item_id", "cate_id"] 22 | uid = np.array([0, 1, 2]) 23 | ugender = np.array([0, 1, 0]) 24 | iid = np.array([1, 2, 3]) # 0 is mask value 25 | cate_id = np.array([1, 2, 2]) # 0 is mask value 26 | score = np.array([0.1, 0.2, 0.3]) 27 | 28 | hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) 29 | hist_cate_id = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]]) 30 | 31 | behavior_length = np.array([3, 3, 2]) 32 | 33 | feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, 34 | 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 35 | 'pay_score': score, "seq_length": behavior_length} 36 | 37 | if use_neg: 38 | feature_dict['neg_hist_item_id'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) 39 | feature_dict['neg_hist_cate_id'] = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]]) 40 | feature_columns += [ 41 | VarLenSparseFeat(SparseFeat('neg_hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), 42 | maxlen=4, length_name="seq_length"), 43 | VarLenSparseFeat(SparseFeat('neg_hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), 44 | maxlen=4, length_name="seq_length")] 45 | 46 | x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} 47 | y = np.array([1, 0, 1]) 48 | return x, y, feature_columns, behavior_feature_list 49 | 50 | 51 | if __name__ == "__main__": 52 | if tf.__version__ >= '2.0.0': 53 | tf.compat.v1.disable_eager_execution() 54 | USE_NEG = True 55 | x, y, feature_columns, behavior_feature_list = get_xy_fd(use_neg=USE_NEG) 56 | 57 | model = DIEN(feature_columns, behavior_feature_list, 58 | dnn_hidden_units=[4, 4, 4], dnn_dropout=0.6, gru_type="AUGRU", use_negsampling=USE_NEG) 59 | 60 | model.compile('adam', 'binary_crossentropy', 61 | metrics=['binary_crossentropy']) 62 | history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) 63 | -------------------------------------------------------------------------------- /examples/run_din.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from deepctr.models import DIN 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names 5 | 6 | 7 | def get_xy_fd(): 8 | feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat( 9 | 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8), 10 | SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)] 11 | feature_columns += [ 12 | VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), 13 | maxlen=4, length_name="seq_length"), 14 | VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, 15 | length_name="seq_length")] 16 | # Notice: History behavior sequence feature name must start with "hist_". 17 | behavior_feature_list = ["item_id", "cate_id"] 18 | uid = np.array([0, 1, 2]) 19 | ugender = np.array([0, 1, 0]) 20 | iid = np.array([1, 2, 3]) # 0 is mask value 21 | cate_id = np.array([1, 2, 2]) # 0 is mask value 22 | pay_score = np.array([0.1, 0.2, 0.3]) 23 | 24 | hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]]) 25 | hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]]) 26 | seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence 27 | 28 | feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, 29 | 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 30 | 'pay_score': pay_score, 'seq_length': seq_length} 31 | x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} 32 | y = np.array([1, 0, 1]) 33 | return x, y, feature_columns, behavior_feature_list 34 | 35 | 36 | if __name__ == "__main__": 37 | x, y, feature_columns, behavior_feature_list = get_xy_fd() 38 | model = DIN(feature_columns, behavior_feature_list) 39 | # model = BST(feature_columns, behavior_feature_list,att_head_num=4) 40 | model.compile('adam', 'binary_crossentropy', 41 | metrics=['binary_crossentropy']) 42 | history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) 43 | -------------------------------------------------------------------------------- /examples/run_dsin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names 5 | from deepctr.models import DSIN 6 | 7 | 8 | def get_xy_fd(hash_flag=False): 9 | feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag), 10 | SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag), 11 | SparseFeat('item', 3 + 1, embedding_dim=4, use_hash=hash_flag), 12 | SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag), 13 | DenseFeat('pay_score', 1)] 14 | feature_columns += [ 15 | VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), 16 | maxlen=4), VarLenSparseFeat( 17 | SparseFeat('sess_0_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'), 18 | maxlen=4)] 19 | feature_columns += [ 20 | VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), 21 | maxlen=4), VarLenSparseFeat( 22 | SparseFeat('sess_1_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'), 23 | maxlen=4)] 24 | 25 | behavior_feature_list = ["item", "cate_id"] 26 | uid = np.array([0, 1, 2]) 27 | ugender = np.array([0, 1, 0]) 28 | iid = np.array([1, 2, 3]) # 0 is mask value 29 | cateid = np.array([1, 2, 2]) # 0 is mask value 30 | score = np.array([0.1, 0.2, 0.3]) 31 | 32 | sess1_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [0, 0, 0, 0]]) 33 | sess1_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [0, 0, 0, 0]]) 34 | 35 | sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) 36 | sess2_cate_id = np.array([[1, 2, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) 37 | 38 | sess_number = np.array([2, 1, 0]) 39 | 40 | feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'cate_id': cateid, 41 | 'sess_0_item': sess1_iid, 'sess_0_cate_id': sess1_cate_id, 'pay_score': score, 42 | 'sess_1_item': sess2_iid, 'sess_1_cate_id': sess2_cate_id, } 43 | 44 | x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} 45 | x["sess_length"] = sess_number 46 | y = np.array([1, 0, 1]) 47 | return x, y, feature_columns, behavior_feature_list 48 | 49 | 50 | if __name__ == "__main__": 51 | if tf.__version__ >= '2.0.0': 52 | tf.compat.v1.disable_eager_execution() 53 | 54 | x, y, feature_columns, behavior_feature_list = get_xy_fd(True) 55 | 56 | model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2, 57 | dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, ) 58 | 59 | model.compile('adam', 'binary_crossentropy', 60 | metrics=['binary_crossentropy']) 61 | history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) 62 | -------------------------------------------------------------------------------- /examples/run_estimator_pandas_classification.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import tensorflow as tf 3 | from sklearn.metrics import log_loss, roc_auc_score 4 | from sklearn.model_selection import train_test_split 5 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler 6 | 7 | from deepctr.estimator import DeepFMEstimator 8 | from deepctr.estimator.inputs import input_fn_pandas 9 | 10 | if __name__ == "__main__": 11 | data = pd.read_csv('./criteo_sample.txt') 12 | 13 | sparse_features = ['C' + str(i) for i in range(1, 27)] 14 | dense_features = ['I' + str(i) for i in range(1, 14)] 15 | 16 | data[sparse_features] = data[sparse_features].fillna('-1', ) 17 | data[dense_features] = data[dense_features].fillna(0, ) 18 | target = ['label'] 19 | 20 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 21 | for feat in sparse_features: 22 | lbe = LabelEncoder() 23 | data[feat] = lbe.fit_transform(data[feat]) 24 | mms = MinMaxScaler(feature_range=(0, 1)) 25 | data[dense_features] = mms.fit_transform(data[dense_features]) 26 | 27 | # 2.count #unique features for each sparse field,and record dense feature field name 28 | 29 | dnn_feature_columns = [] 30 | linear_feature_columns = [] 31 | 32 | for i, feat in enumerate(sparse_features): 33 | dnn_feature_columns.append(tf.feature_column.embedding_column( 34 | tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4)) 35 | linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1)) 36 | for feat in dense_features: 37 | dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) 38 | linear_feature_columns.append(tf.feature_column.numeric_column(feat)) 39 | 40 | # 3.generate input data for model 41 | 42 | train, test = train_test_split(data, test_size=0.2, random_state=2021) 43 | 44 | # Not setting default value for continuous feature. filled with mean. 45 | 46 | train_model_input = input_fn_pandas(train, sparse_features + dense_features, 'label', shuffle=True) 47 | test_model_input = input_fn_pandas(test, sparse_features + dense_features, None, shuffle=False) 48 | 49 | # 4.Define Model,train,predict and evaluate 50 | model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', 51 | config=tf.estimator.RunConfig(tf_random_seed=2021)) 52 | 53 | model.train(train_model_input) 54 | pred_ans_iter = model.predict(test_model_input) 55 | pred_ans = list(map(lambda x: x['pred'], pred_ans_iter)) 56 | # 57 | print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) 58 | print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) 59 | -------------------------------------------------------------------------------- /examples/run_estimator_tfrecord_classification.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from tensorflow.python.ops.parsing_ops import FixedLenFeature 4 | from deepctr.estimator import DeepFMEstimator 5 | from deepctr.estimator.inputs import input_fn_tfrecord 6 | 7 | if __name__ == "__main__": 8 | 9 | # 1.generate feature_column for linear part and dnn part 10 | 11 | sparse_features = ['C' + str(i) for i in range(1, 27)] 12 | dense_features = ['I' + str(i) for i in range(1, 14)] 13 | 14 | dnn_feature_columns = [] 15 | linear_feature_columns = [] 16 | 17 | for i, feat in enumerate(sparse_features): 18 | dnn_feature_columns.append(tf.feature_column.embedding_column( 19 | tf.feature_column.categorical_column_with_identity(feat, 1000), 4)) 20 | linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000)) 21 | for feat in dense_features: 22 | dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) 23 | linear_feature_columns.append(tf.feature_column.numeric_column(feat)) 24 | 25 | # 2.generate input data for model 26 | 27 | feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features} 28 | feature_description.update( 29 | {k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features}) 30 | feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1) 31 | 32 | train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256, 33 | num_epochs=1, shuffle_factor=10) 34 | test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label', 35 | batch_size=2 ** 14, num_epochs=1, shuffle_factor=0) 36 | 37 | # 3.Define Model,train,predict and evaluate 38 | model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary', 39 | config=tf.estimator.RunConfig(tf_random_seed=2021)) 40 | 41 | model.train(train_model_input) 42 | eval_result = model.evaluate(test_model_input) 43 | 44 | print(eval_result) 45 | -------------------------------------------------------------------------------- /examples/run_flen.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import log_loss, roc_auc_score 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.preprocessing import LabelEncoder 5 | 6 | from deepctr.feature_column import SparseFeat,get_feature_names 7 | from deepctr.models import FLEN 8 | 9 | if __name__ == "__main__": 10 | data = pd.read_csv('./avazu_sample.txt') 11 | data['day'] = data['hour'].apply(lambda x: str(x)[4:6]) 12 | data['hour'] = data['hour'].apply(lambda x: str(x)[6:]) 13 | 14 | sparse_features = ['hour', 'C1', 'banner_pos', 'site_id', 'site_domain', 15 | 'site_category', 'app_id', 'app_domain', 'app_category', 'device_id', 16 | 'device_model', 'device_type', 'device_conn_type', # 'device_ip', 17 | 'C14', 18 | 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', ] 19 | 20 | data[sparse_features] = data[sparse_features].fillna('-1', ) 21 | target = ['click'] 22 | 23 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 24 | for feat in sparse_features: 25 | lbe = LabelEncoder() 26 | data[feat] = lbe.fit_transform(data[feat]) 27 | 28 | # 2.count #unique features for each sparse field,and record dense feature field name 29 | 30 | field_info = dict(C14='user', C15='user', C16='user', C17='user', 31 | C18='user', C19='user', C20='user', C21='user', C1='user', 32 | banner_pos='context', site_id='context', 33 | site_domain='context', site_category='context', 34 | app_id='item', app_domain='item', app_category='item', 35 | device_model='user', device_type='user', 36 | device_conn_type='context', hour='context', 37 | device_id='user' 38 | ) 39 | 40 | fixlen_feature_columns = [ 41 | SparseFeat(name, vocabulary_size=data[name].max() + 1, embedding_dim=16, use_hash=False, dtype='int32', 42 | group_name=field_info[name]) for name in sparse_features] 43 | 44 | dnn_feature_columns = fixlen_feature_columns 45 | linear_feature_columns = fixlen_feature_columns 46 | 47 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) 48 | 49 | # 3.generate input data for model 50 | 51 | train, test = train_test_split(data, test_size=0.2, random_state=2020) 52 | train_model_input = {name: train[name] for name in feature_names} 53 | test_model_input = {name: test[name] for name in feature_names} 54 | 55 | # 4.Define Model,train,predict and evaluate 56 | model = FLEN(linear_feature_columns, dnn_feature_columns, task='binary') 57 | model.compile("adam", "binary_crossentropy", 58 | metrics=['binary_crossentropy'], ) 59 | 60 | history = model.fit(train_model_input, train[target].values, 61 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 62 | pred_ans = model.predict(test_model_input, batch_size=256) 63 | print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4)) 64 | print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4)) 65 | -------------------------------------------------------------------------------- /examples/run_multivalue_movielens.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.preprocessing import LabelEncoder 4 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences 5 | 6 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names 7 | from deepctr.models import DeepFM 8 | 9 | 10 | def split(x): 11 | key_ans = x.split('|') 12 | for key in key_ans: 13 | if key not in key2index: 14 | # Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input 15 | key2index[key] = len(key2index) + 1 16 | return list(map(lambda x: key2index[x], key_ans)) 17 | 18 | 19 | if __name__ == "__main__": 20 | data = pd.read_csv("./movielens_sample.txt") 21 | sparse_features = ["movie_id", "user_id", 22 | "gender", "age", "occupation", "zip", ] 23 | target = ['rating'] 24 | 25 | # 1.Label Encoding for sparse features,and process sequence features 26 | for feat in sparse_features: 27 | lbe = LabelEncoder() 28 | data[feat] = lbe.fit_transform(data[feat]) 29 | # preprocess the sequence feature 30 | 31 | key2index = {} 32 | genres_list = list(map(split, data['genres'].values)) 33 | genres_length = np.array(list(map(len, genres_list))) 34 | max_len = max(genres_length) 35 | # Notice : padding=`post` 36 | genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', ) 37 | 38 | # 2.count #unique features for each sparse field and generate feature config for sequence feature 39 | 40 | fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) 41 | for feat in sparse_features] 42 | 43 | use_weighted_sequence = False 44 | if use_weighted_sequence: 45 | varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len( 46 | key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean', 47 | weight_name='genres_weight')] # Notice : value 0 is for padding for sequence input feature 48 | else: 49 | varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len( 50 | key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean', 51 | weight_name=None)] # Notice : value 0 is for padding for sequence input feature 52 | 53 | linear_feature_columns = fixlen_feature_columns + varlen_feature_columns 54 | dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns 55 | 56 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) 57 | 58 | # 3.generate input data for model 59 | model_input = {name: data[name] for name in sparse_features} # 60 | model_input["genres"] = genres_list 61 | model_input["genres_weight"] = np.random.randn(data.shape[0], max_len, 1) 62 | 63 | # 4.Define Model,compile and train 64 | model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') 65 | 66 | model.compile("adam", "mse", metrics=['mse'], ) 67 | history = model.fit(model_input, data[target].values, 68 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 69 | -------------------------------------------------------------------------------- /examples/run_multivalue_movielens_hash.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences 4 | 5 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names 6 | from deepctr.models import DeepFM 7 | 8 | if __name__ == "__main__": 9 | data = pd.read_csv("./movielens_sample.txt") 10 | sparse_features = ["movie_id", "user_id", 11 | "gender", "age", "occupation", "zip", ] 12 | 13 | data[sparse_features] = data[sparse_features].astype(str) 14 | target = ['rating'] 15 | 16 | # 1.Use hashing encoding on the fly for sparse features,and process sequence features 17 | 18 | genres_list = list(map(lambda x: x.split('|'), data['genres'].values)) 19 | genres_length = np.array(list(map(len, genres_list))) 20 | max_len = max(genres_length) 21 | 22 | # Notice : padding=`post` 23 | genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str) 24 | # 2.set hashing space for each sparse field and generate feature config for sequence feature 25 | 26 | fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, dtype='string') 27 | for feat in sparse_features] 28 | varlen_feature_columns = [ 29 | VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"), 30 | maxlen=max_len, combiner='mean', 31 | )] # Notice : value 0 is for padding for sequence input feature 32 | linear_feature_columns = fixlen_feature_columns + varlen_feature_columns 33 | dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns 34 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) 35 | 36 | # 3.generate input data for model 37 | model_input = {name: data[name] for name in feature_names} 38 | model_input['genres'] = genres_list 39 | 40 | # 4.Define Model,compile and train 41 | model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') 42 | 43 | model.compile("adam", "mse", metrics=['mse'], ) 44 | history = model.fit(model_input, data[target].values, 45 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 46 | -------------------------------------------------------------------------------- /examples/run_multivalue_movielens_vocab_hash.py: -------------------------------------------------------------------------------- 1 | from deepctr.models import DeepFM 2 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names 3 | import numpy as np 4 | import pandas as pd 5 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences 6 | 7 | try: 8 | import tensorflow.compat.v1 as tf 9 | except ImportError as e: 10 | import tensorflow as tf 11 | 12 | if __name__ == "__main__": 13 | data = pd.read_csv("./movielens_sample.txt") 14 | sparse_features = ["movie_id", "user_id", 15 | "gender", "age", "occupation", "zip", ] 16 | 17 | data[sparse_features] = data[sparse_features].astype(str) 18 | target = ['rating'] 19 | 20 | # 1.Use hashing encoding on the fly for sparse features,and process sequence features 21 | 22 | genres_list = list(map(lambda x: x.split('|'), data['genres'].values)) 23 | genres_length = np.array(list(map(len, genres_list))) 24 | max_len = max(genres_length) 25 | 26 | # Notice : padding=`post` 27 | genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str) 28 | # 2.set hashing space for each sparse field and generate feature config for sequence feature 29 | 30 | fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, 31 | vocabulary_path='./movielens_age_vocabulary.csv' if feat == 'age' else None, 32 | dtype='string') 33 | for feat in sparse_features] 34 | varlen_feature_columns = [ 35 | VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, 36 | use_hash=True, dtype="string"), 37 | maxlen=max_len, combiner='mean', 38 | )] # Notice : value 0 is for padding for sequence input feature 39 | linear_feature_columns = fixlen_feature_columns + varlen_feature_columns 40 | dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns 41 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) 42 | 43 | # 3.generate input data for model 44 | model_input = {name: data[name] for name in feature_names} 45 | model_input['genres'] = genres_list 46 | 47 | # 4.Define Model,compile and train 48 | model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') 49 | model.compile("adam", "mse", metrics=['mse'], ) 50 | if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0': 51 | with tf.Session() as sess: 52 | sess.run(tf.tables_initializer()) 53 | history = model.fit(model_input, data[target].values, 54 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 55 | else: 56 | history = model.fit(model_input, data[target].values, 57 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 58 | -------------------------------------------------------------------------------- /examples/run_regression_movielens.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.metrics import mean_squared_error 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.preprocessing import LabelEncoder 5 | 6 | from deepctr.models import DeepFM 7 | from deepctr.feature_column import SparseFeat,get_feature_names 8 | 9 | if __name__ == "__main__": 10 | 11 | data = pd.read_csv("./movielens_sample.txt") 12 | sparse_features = ["movie_id", "user_id", 13 | "gender", "age", "occupation", "zip"] 14 | target = ['rating'] 15 | 16 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 17 | for feat in sparse_features: 18 | lbe = LabelEncoder() 19 | data[feat] = lbe.fit_transform(data[feat]) 20 | # 2.count #unique features for each sparse field 21 | fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4) 22 | for feat in sparse_features] 23 | linear_feature_columns = fixlen_feature_columns 24 | dnn_feature_columns = fixlen_feature_columns 25 | feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) 26 | 27 | # 3.generate input data for model 28 | train, test = train_test_split(data, test_size=0.2, random_state=2020) 29 | train_model_input = {name:train[name].values for name in feature_names} 30 | test_model_input = {name:test[name].values for name in feature_names} 31 | 32 | # 4.Define Model,train,predict and evaluate 33 | model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') 34 | model.compile("adam", "mse", metrics=['mse'], ) 35 | 36 | history = model.fit(train_model_input, train[target].values, 37 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 38 | pred_ans = model.predict(test_model_input, batch_size=256) 39 | print("test MSE", round(mean_squared_error( 40 | test[target].values, pred_ans), 4)) 41 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow-gpu==2.4.0 2 | tensorflow-addons==0.12.0 3 | tensorboard_plugin_profile 4 | pandas 5 | scikit-learn 6 | pyarrow 7 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | desciption-file = README.md 3 | 4 | #[coverage:run] 5 | #branch = True 6 | 7 | [coverage:report] 8 | exclude_lines = 9 | # Have to re-enable the standard pragma 10 | pragma: no cover 11 | # Don't complain about missing debug-only code: 12 | def __repr__ 13 | if self\.debug 14 | 15 | # Don't complain if tests don't hit defensive assertion code: 16 | raise ValueError 17 | raise AssertionError 18 | raise NotImplementedError 19 | 20 | # Don't complain if non-runnable code isn't run: 21 | if 0: 22 | if False: 23 | if __name__ == .__main__.: 24 | 25 | [coverage:run] 26 | omit = 27 | # omit anything in a .local directory anywhere 28 | #*/.local/* 29 | # omit everything in /usr 30 | deepctr/contrib/* 31 | # omit this single file 32 | #utils/tirefire.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | REQUIRED_PACKAGES = [ 7 | 'h5py==2.10.0', 'requests' 8 | ] 9 | 10 | setuptools.setup( 11 | name="deepctr", 12 | version="0.9.0", 13 | author="Weichen Shen", 14 | author_email="weichenswc@163.com", 15 | description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow 1.x and 2.x .", 16 | long_description=long_description, 17 | long_description_content_type="text/markdown", 18 | url="https://github.com/shenweichen/deepctr", 19 | download_url='https://github.com/shenweichen/deepctr/tags', 20 | packages=setuptools.find_packages( 21 | exclude=["tests", "tests.models", "tests.layers"]), 22 | python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*", # '>=3.4', # 3.4.6 23 | install_requires=REQUIRED_PACKAGES, 24 | extras_require={ 25 | "cpu": ["tensorflow>=1.4.0,!=1.7.*,!=1.8.*"], 26 | "gpu": ["tensorflow-gpu>=1.4.0,!=1.7.*,!=1.8.*"], 27 | }, 28 | entry_points={ 29 | }, 30 | classifiers=( 31 | "License :: OSI Approved :: Apache Software License", 32 | "Operating System :: OS Independent", 33 | 'Intended Audience :: Developers', 34 | 'Intended Audience :: Education', 35 | 'Intended Audience :: Science/Research', 36 | 'Programming Language :: Python :: 3', 37 | 'Programming Language :: Python :: 2.7', 38 | 'Programming Language :: Python :: 3.5', 39 | 'Programming Language :: Python :: 3.6', 40 | 'Programming Language :: Python :: 3.7', 41 | 'Topic :: Scientific/Engineering', 42 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 43 | 'Topic :: Software Development', 44 | 'Topic :: Software Development :: Libraries', 45 | 'Topic :: Software Development :: Libraries :: Python Modules', 46 | ), 47 | license="Apache-2.0", 48 | keywords=['ctr', 'click through rate', 49 | 'deep learning', 'tensorflow', 'tensor', 'keras'], 50 | ) 51 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/README.md -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/__init__.py -------------------------------------------------------------------------------- /tests/feature_test.py: -------------------------------------------------------------------------------- 1 | from deepctr.models import DeepFM 2 | from deepctr.feature_column import SparseFeat, DenseFeat, VarLenSparseFeat, get_feature_names 3 | import numpy as np 4 | 5 | 6 | def test_long_dense_vector(): 7 | feature_columns = [SparseFeat('user_id', 4, ), SparseFeat('item_id', 5, ), DenseFeat("pic_vec", 5)] 8 | fixlen_feature_names = get_feature_names(feature_columns) 9 | 10 | user_id = np.array([[1], [0], [1]]) 11 | item_id = np.array([[3], [2], [1]]) 12 | pic_vec = np.array([[0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2]]) 13 | label = np.array([1, 0, 1]) 14 | 15 | input_dict = {'user_id': user_id, 'item_id': item_id, 'pic_vec': pic_vec} 16 | model_input = [input_dict[name] for name in fixlen_feature_names] 17 | 18 | model = DeepFM(feature_columns, feature_columns[:-1]) 19 | model.compile('adagrad', 'binary_crossentropy') 20 | model.fit(model_input, label) 21 | 22 | 23 | def test_feature_column_sparsefeat_vocabulary_path(): 24 | vocab_path = "./dummy_test.csv" 25 | sf = SparseFeat('user_id', 4, vocabulary_path=vocab_path) 26 | if sf.vocabulary_path != vocab_path: 27 | raise ValueError("sf.vocabulary_path is invalid") 28 | vlsf = VarLenSparseFeat(sf, 6) 29 | if vlsf.vocabulary_path != vocab_path: 30 | raise ValueError("vlsf.vocabulary_path is invalid") 31 | -------------------------------------------------------------------------------- /tests/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/layers/__init__.py -------------------------------------------------------------------------------- /tests/layers/activations_test.py: -------------------------------------------------------------------------------- 1 | from deepctr.layers import activation 2 | 3 | try: 4 | from tensorflow.python.keras.utils import CustomObjectScope 5 | except ImportError: 6 | from tensorflow.keras.utils import CustomObjectScope 7 | from tests.utils import layer_test 8 | 9 | 10 | def test_dice(): 11 | with CustomObjectScope({'Dice': activation.Dice}): 12 | layer_test(activation.Dice, kwargs={}, 13 | input_shape=(2, 3)) 14 | -------------------------------------------------------------------------------- /tests/layers/core_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | from tensorflow.python.keras.layers import PReLU 4 | 5 | try: 6 | from tensorflow.python.keras.utils import CustomObjectScope 7 | except ImportError: 8 | from tensorflow.keras.utils import CustomObjectScope 9 | from deepctr import layers 10 | from deepctr.layers import Dice 11 | from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE, SEQ_LENGTH 12 | from tests.utils import layer_test 13 | 14 | 15 | @pytest.mark.parametrize( 16 | 'hidden_units,activation', 17 | [(hidden_units, activation) 18 | for hidden_units in [(), (10,)] 19 | for activation in ['sigmoid', Dice, PReLU] 20 | ] 21 | ) 22 | def test_LocalActivationUnit(hidden_units, activation): 23 | if tf.__version__ >= '1.13.0' and activation != 'sigmoid': 24 | return 25 | 26 | with CustomObjectScope({'LocalActivationUnit': layers.LocalActivationUnit}): 27 | layer_test(layers.LocalActivationUnit, 28 | kwargs={'hidden_units': hidden_units, 'activation': activation, 'dropout_rate': 0.5}, 29 | input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)]) 30 | 31 | 32 | @pytest.mark.parametrize( 33 | 'hidden_units,use_bn', 34 | [(hidden_units, use_bn) 35 | for hidden_units in [(), (10,)] 36 | for use_bn in [True, False] 37 | ] 38 | ) 39 | def test_DNN(hidden_units, use_bn): 40 | with CustomObjectScope({'DNN': layers.DNN}): 41 | layer_test(layers.DNN, kwargs={'hidden_units': hidden_units, 'use_bn': use_bn, 'dropout_rate': 0.5}, 42 | input_shape=( 43 | BATCH_SIZE, EMBEDDING_SIZE)) 44 | 45 | 46 | @pytest.mark.parametrize( 47 | 'task,use_bias', 48 | [(task, use_bias) 49 | for task in ['binary', 'regression'] 50 | for use_bias in [True, False] 51 | ] 52 | ) 53 | def test_PredictionLayer(task, use_bias): 54 | with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}): 55 | layer_test(layers.PredictionLayer, kwargs={'task': task, 'use_bias': use_bias 56 | }, input_shape=(BATCH_SIZE, 1)) 57 | 58 | 59 | @pytest.mark.xfail(reason="dim size must be 1 except for the batch size dim") 60 | def test_test_PredictionLayer_invalid(): 61 | # with pytest.raises(ValueError): 62 | with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}): 63 | layer_test(layers.PredictionLayer, kwargs={'use_bias': True, 64 | }, input_shape=(BATCH_SIZE, 2, 1)) 65 | -------------------------------------------------------------------------------- /tests/layers/normalization_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | try: 4 | from tensorflow.python.keras.utils import CustomObjectScope 5 | except ImportError: 6 | from tensorflow.keras.utils import CustomObjectScope 7 | from deepctr import layers 8 | from tests.layers.interaction_test import BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE 9 | from tests.utils import layer_test 10 | 11 | 12 | @pytest.mark.parametrize( 13 | 'axis', 14 | [-1, -2 15 | ] 16 | ) 17 | def test_LayerNormalization(axis): 18 | with CustomObjectScope({'LayerNormalization': layers.LayerNormalization}): 19 | layer_test(layers.LayerNormalization, kwargs={"axis": axis, }, input_shape=( 20 | BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) 21 | -------------------------------------------------------------------------------- /tests/layers/utils_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import tensorflow as tf 4 | 5 | from deepctr.layers.utils import Hash, Linear 6 | from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE 7 | from tests.utils import layer_test 8 | 9 | try: 10 | from tensorflow.python.keras.utils import CustomObjectScope 11 | except ImportError: 12 | from tensorflow.keras.utils import CustomObjectScope 13 | 14 | 15 | @pytest.mark.parametrize( 16 | 'num_buckets,mask_zero,vocabulary_path,input_data,expected_output', 17 | [ 18 | (3 + 1, False, None, ['lakemerson'], None), 19 | (3 + 1, True, None, ['lakemerson'], None), 20 | ( 21 | 3 + 1, False, "./tests/layers/vocabulary_example.csv", [['lake'], ['johnson'], ['lakemerson']], 22 | [[1], [3], [0]]) 23 | ] 24 | ) 25 | def test_Hash(num_buckets, mask_zero, vocabulary_path, input_data, expected_output): 26 | if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0': 27 | return 28 | 29 | with CustomObjectScope({'Hash': Hash}): 30 | layer_test(Hash, 31 | kwargs={'num_buckets': num_buckets, 'mask_zero': mask_zero, 'vocabulary_path': vocabulary_path}, 32 | input_dtype=tf.string, input_data=np.array(input_data, dtype='str'), 33 | expected_output_dtype=tf.int64, expected_output=expected_output) 34 | 35 | 36 | def test_Linear(): 37 | with CustomObjectScope({'Linear': Linear}): 38 | layer_test(Linear, 39 | kwargs={'mode': 1, 'use_bias': True}, input_shape=(BATCH_SIZE, EMBEDDING_SIZE)) 40 | -------------------------------------------------------------------------------- /tests/layers/vocabulary_example.csv: -------------------------------------------------------------------------------- 1 | 1,lake 2 | 2,merson 3 | 3,johnson -------------------------------------------------------------------------------- /tests/models/AFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | from packaging import version 4 | 5 | from deepctr.estimator import AFMEstimator 6 | from deepctr.models import AFM 7 | from ..utils import check_model, check_estimator, get_test_data, get_test_data_estimator, SAMPLE_SIZE, \ 8 | Estimator_TEST_TF1 9 | 10 | 11 | @pytest.mark.parametrize( 12 | 'use_attention,sparse_feature_num,dense_feature_num', 13 | [(True, 3, 0), 14 | ] 15 | ) 16 | def test_AFM(use_attention, sparse_feature_num, dense_feature_num): 17 | model_name = "AFM" 18 | sample_size = SAMPLE_SIZE 19 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 20 | dense_feature_num=dense_feature_num) 21 | 22 | model = AFM(feature_columns, feature_columns, use_attention=use_attention, afm_dropout=0.5) 23 | 24 | check_model(model, model_name, x, y) 25 | 26 | 27 | @pytest.mark.parametrize( 28 | 'use_attention,sparse_feature_num,dense_feature_num', 29 | [(True, 3, 0), 30 | ] 31 | ) 32 | def test_AFMEstimator(use_attention, sparse_feature_num, dense_feature_num): 33 | if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'): 34 | return 35 | 36 | sample_size = SAMPLE_SIZE 37 | 38 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 39 | sparse_feature_num=sparse_feature_num, 40 | dense_feature_num=dense_feature_num) 41 | model = AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=use_attention, afm_dropout=0.5) 42 | check_estimator(model, input_fn) 43 | 44 | 45 | if __name__ == "__main__": 46 | pass 47 | -------------------------------------------------------------------------------- /tests/models/AutoInt_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | from packaging import version 4 | 5 | from deepctr.estimator import AutoIntEstimator 6 | from deepctr.models import AutoInt 7 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 8 | Estimator_TEST_TF1 9 | 10 | 11 | @pytest.mark.parametrize( 12 | 'att_layer_num,dnn_hidden_units,sparse_feature_num', 13 | [(1, (), 1), (1, (4,), 1)] # (0, (4,), 2), (2, (4, 4,), 2) 14 | ) 15 | def test_AutoInt(att_layer_num, dnn_hidden_units, sparse_feature_num): 16 | if version.parse(tf.__version__) >= version.parse("1.14.0") and len(dnn_hidden_units) == 0: # todo check version 17 | return 18 | model_name = "AutoInt" 19 | sample_size = SAMPLE_SIZE 20 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 21 | dense_feature_num=sparse_feature_num) 22 | 23 | model = AutoInt(feature_columns, feature_columns, att_layer_num=att_layer_num, 24 | dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, ) 25 | check_model(model, model_name, x, y) 26 | 27 | 28 | @pytest.mark.parametrize( 29 | 'att_layer_num,dnn_hidden_units,sparse_feature_num', 30 | [(1, (4,), 1)] # (0, (4,), 2), (2, (4, 4,), 2) 31 | ) 32 | def test_AutoIntEstimator(att_layer_num, dnn_hidden_units, sparse_feature_num): 33 | if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'): 34 | return 35 | sample_size = SAMPLE_SIZE 36 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 37 | sparse_feature_num=sparse_feature_num, 38 | dense_feature_num=sparse_feature_num) 39 | 40 | model = AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=att_layer_num, 41 | dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, ) 42 | check_estimator(model, input_fn) 43 | 44 | 45 | if __name__ == "__main__": 46 | pass 47 | -------------------------------------------------------------------------------- /tests/models/BST_test.py: -------------------------------------------------------------------------------- 1 | from deepctr.models import BST 2 | from ..utils import check_model 3 | from .DIN_test import get_xy_fd 4 | 5 | 6 | def test_BST(): 7 | model_name = "BST" 8 | 9 | x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True) 10 | 11 | model = BST(dnn_feature_columns=feature_columns, 12 | history_feature_list=behavior_feature_list, 13 | att_head_num=4) 14 | 15 | check_model(model, model_name, x, y, 16 | check_model_io=True) 17 | 18 | 19 | if __name__ == "__main__": 20 | pass 21 | -------------------------------------------------------------------------------- /tests/models/CCPM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import CCPMEstimator 5 | from deepctr.models import CCPM 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, check_estimator, get_test_data_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'sparse_feature_num,dense_feature_num', 12 | [(3, 0) 13 | ] 14 | ) 15 | def test_CCPM(sparse_feature_num, dense_feature_num): 16 | if tf.__version__ >= "2.0.0": # todo 17 | return 18 | model_name = "CCPM" 19 | 20 | sample_size = SAMPLE_SIZE 21 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 22 | dense_feature_num=dense_feature_num) 23 | 24 | model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=( 25 | 2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5) 26 | check_model(model, model_name, x, y) 27 | 28 | 29 | @pytest.mark.parametrize( 30 | 'sparse_feature_num,dense_feature_num', 31 | [(2, 0), 32 | ] 33 | ) 34 | def test_CCPM_without_seq(sparse_feature_num, dense_feature_num): 35 | if tf.__version__ >= "2.0.0": 36 | return 37 | model_name = "CCPM" 38 | 39 | sample_size = SAMPLE_SIZE 40 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 41 | dense_feature_num=dense_feature_num, sequence_feature=()) 42 | 43 | model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=( 44 | 2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5) 45 | check_model(model, model_name, x, y) 46 | 47 | 48 | @pytest.mark.parametrize( 49 | 'sparse_feature_num,dense_feature_num', 50 | [(2, 0), 51 | ] 52 | ) 53 | def test_CCPMEstimator_without_seq(sparse_feature_num, dense_feature_num): 54 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 55 | return 56 | 57 | sample_size = SAMPLE_SIZE 58 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 59 | sparse_feature_num=sparse_feature_num, 60 | dense_feature_num=sparse_feature_num) 61 | 62 | model = CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(3, 2), conv_filters=( 63 | 2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5) 64 | check_estimator(model, input_fn) 65 | 66 | 67 | if __name__ == "__main__": 68 | pass 69 | -------------------------------------------------------------------------------- /tests/models/DCNMix_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepctr.models import DCNMix 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'cross_num,hidden_size,sparse_feature_num', 9 | [(0, (8,), 2), (1, (), 1), (1, (8,), 3) 10 | ] 11 | ) 12 | def test_DCNMix(cross_num, hidden_size, sparse_feature_num): 13 | model_name = "DCNMix" 14 | 15 | sample_size = SAMPLE_SIZE 16 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 17 | dense_feature_num=sparse_feature_num) 18 | 19 | model = DCNMix(feature_columns, feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5) 20 | check_model(model, model_name, x, y) 21 | 22 | 23 | if __name__ == "__main__": 24 | pass 25 | -------------------------------------------------------------------------------- /tests/models/DCN_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import DCNEstimator 5 | from deepctr.models import DCN 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'cross_num,hidden_size,sparse_feature_num,cross_parameterization', 12 | [(0, (8,), 2, 'vector'), (1, (), 1, 'vector'), (1, (8,), 3, 'vector'), 13 | (0, (8,), 2, 'matrix'), (1, (), 1, 'matrix'), (1, (8,), 3, 'matrix'), 14 | ] 15 | ) 16 | def test_DCN(cross_num, hidden_size, sparse_feature_num, cross_parameterization): 17 | model_name = "DCN" 18 | 19 | sample_size = SAMPLE_SIZE 20 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 21 | dense_feature_num=sparse_feature_num) 22 | 23 | model = DCN(feature_columns, feature_columns, cross_num=cross_num, cross_parameterization=cross_parameterization, 24 | dnn_hidden_units=hidden_size, dnn_dropout=0.5) 25 | check_model(model, model_name, x, y) 26 | 27 | 28 | def test_DCN_2(): 29 | model_name = "DCN" 30 | 31 | sample_size = SAMPLE_SIZE 32 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=3, 33 | dense_feature_num=2) 34 | 35 | model = DCN([], feature_columns, cross_num=1, dnn_hidden_units=(8,), dnn_dropout=0.5) 36 | check_model(model, model_name, x, y) 37 | 38 | 39 | @pytest.mark.parametrize( 40 | 'cross_num,hidden_size,sparse_feature_num', 41 | [(1, (8,), 3) 42 | ] 43 | ) 44 | def test_DCNEstimator(cross_num, hidden_size, sparse_feature_num): 45 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 46 | return 47 | sample_size = SAMPLE_SIZE 48 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 49 | sparse_feature_num=sparse_feature_num, 50 | dense_feature_num=sparse_feature_num) 51 | 52 | model = DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size, 53 | dnn_dropout=0.5) 54 | check_estimator(model, input_fn) 55 | 56 | 57 | # def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()): 58 | # feature_dim_dict = {'sparse': [SparseFeat('sparse_1', 2), SparseFeat('sparse_2', 5), SparseFeat('sparse_3', 10)], 59 | # 'dense': [SparseFeat('dense_1', 1), SparseFeat('dense_1', 1), SparseFeat('dense_1', 1)]} 60 | # with pytest.raises(ValueError): 61 | # _ = DCN(None, embedding_size=embedding_size, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5) 62 | 63 | 64 | if __name__ == "__main__": 65 | pass 66 | -------------------------------------------------------------------------------- /tests/models/DIEN_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import tensorflow as tf 4 | from packaging import version 5 | 6 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names 7 | from deepctr.models import DIEN 8 | from ..utils import check_model 9 | 10 | 11 | def get_xy_fd(use_neg=False, hash_flag=False): 12 | feature_columns = [SparseFeat('user', 3, hash_flag), 13 | SparseFeat('gender', 2, hash_flag), 14 | SparseFeat('item', 3 + 1, hash_flag), 15 | SparseFeat('item_gender', 2 + 1, hash_flag), 16 | DenseFeat('score', 1)] 17 | 18 | feature_columns += [ 19 | VarLenSparseFeat(SparseFeat('hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'), 20 | maxlen=4, length_name="seq_length"), 21 | VarLenSparseFeat(SparseFeat('hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'), 22 | maxlen=4, length_name="seq_length")] 23 | 24 | behavior_feature_list = ["item", "item_gender"] 25 | uid = np.array([0, 1, 2]) 26 | ugender = np.array([0, 1, 0]) 27 | iid = np.array([1, 2, 3]) # 0 is mask value 28 | igender = np.array([1, 2, 1]) # 0 is mask value 29 | score = np.array([0.1, 0.2, 0.3]) 30 | 31 | hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) 32 | hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) 33 | 34 | behavior_length = np.array([3, 3, 2]) 35 | 36 | feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 37 | 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 38 | 'score': score,"seq_length":behavior_length} 39 | 40 | if use_neg: 41 | feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) 42 | feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) 43 | feature_columns += [ 44 | VarLenSparseFeat(SparseFeat('neg_hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'), 45 | maxlen=4, length_name="seq_length"), 46 | VarLenSparseFeat(SparseFeat('neg_hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'), 47 | maxlen=4, length_name="seq_length")] 48 | 49 | feature_names = get_feature_names(feature_columns) 50 | x = {name: feature_dict[name] for name in feature_names} 51 | y = np.array([1, 0, 1]) 52 | return x, y, feature_columns, behavior_feature_list 53 | 54 | 55 | # @pytest.mark.xfail(reason="There is a bug when save model use Dice") 56 | # @pytest.mark.skip(reason="misunderstood the API") 57 | 58 | @pytest.mark.parametrize( 59 | 'gru_type', 60 | ['GRU', 'AIGRU', 'AGRU' # ,'AUGRU', 61 | ] 62 | ) 63 | def test_DIEN(gru_type): 64 | if version.parse(tf.__version__) >= version.parse('2.0.0'): 65 | tf.compat.v1.disable_eager_execution() # todo 66 | model_name = "DIEN_" + gru_type 67 | 68 | x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True) 69 | 70 | model = DIEN(feature_columns, behavior_feature_list, 71 | dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type=gru_type) 72 | 73 | check_model(model, model_name, x, y, 74 | check_model_io=(gru_type == "GRU")) # TODO:fix bugs when load model in other type 75 | 76 | 77 | def test_DIEN_neg(): 78 | model_name = "DIEN_neg" 79 | if version.parse(tf.__version__) >= version.parse("1.14.0"): 80 | return 81 | 82 | x, y, feature_dim_dict, behavior_feature_list = get_xy_fd(use_neg=True) 83 | 84 | model = DIEN(feature_dim_dict, behavior_feature_list, 85 | dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type="AUGRU", use_negsampling=True) 86 | check_model(model, model_name, x, y) 87 | 88 | 89 | if __name__ == "__main__": 90 | pass 91 | -------------------------------------------------------------------------------- /tests/models/DIFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepctr.models import DIFM 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'att_head_num,dnn_hidden_units,sparse_feature_num', 9 | [(1, (4,), 2), (2, (4, 4,), 2), (1, (4,), 1)] 10 | ) 11 | def test_DIFM(att_head_num, dnn_hidden_units, sparse_feature_num): 12 | model_name = "DIFM" 13 | sample_size = SAMPLE_SIZE 14 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 15 | dense_feature_num=sparse_feature_num) 16 | 17 | model = DIFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5) 18 | check_model(model, model_name, x, y) 19 | 20 | 21 | if __name__ == "__main__": 22 | pass 23 | -------------------------------------------------------------------------------- /tests/models/DIN_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names 4 | from deepctr.models.sequence.din import DIN 5 | from ..utils import check_model 6 | 7 | 8 | def get_xy_fd(hash_flag=False): 9 | feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat( 10 | 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8), 11 | SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)] 12 | feature_columns += [ 13 | VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), 14 | maxlen=4, length_name="seq_length"), 15 | VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, 16 | length_name="seq_length")] 17 | # Notice: History behavior sequence feature name must start with "hist_". 18 | behavior_feature_list = ["item_id", "cate_id"] 19 | uid = np.array([0, 1, 2]) 20 | ugender = np.array([0, 1, 0]) 21 | iid = np.array([1, 2, 3]) # 0 is mask value 22 | cate_id = np.array([1, 2, 2]) # 0 is mask value 23 | pay_score = np.array([0.1, 0.2, 0.3]) 24 | 25 | hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]]) 26 | hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]]) 27 | seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence 28 | 29 | feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, 30 | 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 31 | 'pay_score': pay_score, 'seq_length': seq_length} 32 | x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} 33 | y = np.array([1, 0, 1]) 34 | return x, y, feature_columns, behavior_feature_list 35 | 36 | 37 | # @pytest.mark.xfail(reason="There is a bug when save model use Dice") 38 | # @pytest.mark.skip(reason="misunderstood the API") 39 | 40 | 41 | def test_DIN(): 42 | model_name = "DIN" 43 | 44 | x, y, feature_columns, behavior_feature_list = get_xy_fd(True) 45 | 46 | model = DIN(feature_columns, behavior_feature_list, dnn_hidden_units=[4, 4, 4], 47 | dnn_dropout=0.5) 48 | # todo test dice 49 | 50 | check_model(model, model_name, x, y) 51 | 52 | 53 | if __name__ == "__main__": 54 | pass 55 | -------------------------------------------------------------------------------- /tests/models/DSIN_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names 5 | from deepctr.models.sequence.dsin import DSIN 6 | from ..utils import check_model 7 | 8 | 9 | def get_xy_fd(hash_flag=False): 10 | feature_columns = [SparseFeat('user', 3, use_hash=hash_flag), 11 | SparseFeat('gender', 2, use_hash=hash_flag), 12 | SparseFeat('item', 3 + 1, use_hash=hash_flag), 13 | SparseFeat('item_gender', 2 + 1, use_hash=hash_flag), 14 | DenseFeat('score', 1)] 15 | feature_columns += [ 16 | VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), 17 | maxlen=4), VarLenSparseFeat( 18 | SparseFeat('sess_0_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'), 19 | maxlen=4)] 20 | feature_columns += [ 21 | VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'), 22 | maxlen=4), VarLenSparseFeat( 23 | SparseFeat('sess_1_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'), 24 | maxlen=4)] 25 | 26 | behavior_feature_list = ["item", "item_gender"] 27 | uid = np.array([0, 1, 2]) 28 | ugender = np.array([0, 1, 0]) 29 | iid = np.array([1, 2, 3]) # 0 is mask value 30 | igender = np.array([1, 2, 1]) # 0 is mask value 31 | score = np.array([0.1, 0.2, 0.3]) 32 | 33 | sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]]) 34 | sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]]) 35 | 36 | sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) 37 | sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) 38 | 39 | sess_number = np.array([2, 1, 0]) 40 | 41 | feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 42 | 'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score, 43 | 'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, } 44 | 45 | x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} 46 | x["sess_length"] = sess_number 47 | 48 | y = np.array([1, 0, 1]) 49 | return x, y, feature_columns, behavior_feature_list 50 | 51 | 52 | @pytest.mark.parametrize( 53 | 'bias_encoding', 54 | [True, False] 55 | ) 56 | def test_DSIN(bias_encoding): 57 | model_name = "DSIN" 58 | 59 | x, y, feature_columns, behavior_feature_list = get_xy_fd(True) 60 | 61 | model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2, bias_encoding=bias_encoding, 62 | dnn_hidden_units=[4, 4], dnn_dropout=0.5, ) 63 | check_model(model, model_name, x, y) 64 | 65 | 66 | if __name__ == "__main__": 67 | pass 68 | -------------------------------------------------------------------------------- /tests/models/DeepFEFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import DeepFEFMEstimator 5 | from deepctr.models import DeepFEFM 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'hidden_size,sparse_feature_num,use_fefm,use_linear,use_fefm_embed_in_dnn', 12 | [((2,), 1, True, True, True), 13 | ((2,), 1, True, True, False), 14 | ((2,), 1, True, False, True), 15 | ((2,), 1, False, True, True), 16 | ((2,), 1, True, False, False), 17 | ((2,), 1, False, True, False), 18 | ((2,), 1, False, False, True), 19 | ((2,), 1, False, False, False), 20 | ((), 1, True, True, True) 21 | ] 22 | ) 23 | def test_DeepFEFM(hidden_size, sparse_feature_num, use_fefm, use_linear, use_fefm_embed_in_dnn): 24 | if tf.__version__ == "1.15.0" or tf.__version__ == "1.4.0": # slow in tf 1.15 25 | return 26 | model_name = "DeepFEFM" 27 | sample_size = SAMPLE_SIZE 28 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 29 | dense_feature_num=sparse_feature_num) 30 | model = DeepFEFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5, 31 | use_linear=use_linear, use_fefm=use_fefm, use_fefm_embed_in_dnn=use_fefm_embed_in_dnn) 32 | 33 | check_model(model, model_name, x, y) 34 | 35 | 36 | @pytest.mark.parametrize( 37 | 'hidden_size,sparse_feature_num', 38 | [((2,), 2), 39 | ((), 2), 40 | ] 41 | ) 42 | def test_DeepFEFMEstimator(hidden_size, sparse_feature_num): 43 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 44 | return 45 | sample_size = SAMPLE_SIZE 46 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 47 | sparse_feature_num=sparse_feature_num, 48 | dense_feature_num=sparse_feature_num) 49 | 50 | model = DeepFEFMEstimator(linear_feature_columns, dnn_feature_columns, 51 | dnn_hidden_units=hidden_size, dnn_dropout=0.5) 52 | 53 | check_estimator(model, input_fn) 54 | 55 | 56 | if __name__ == "__main__": 57 | pass 58 | -------------------------------------------------------------------------------- /tests/models/DeepFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import DeepFMEstimator 5 | from deepctr.models import DeepFM 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'hidden_size,sparse_feature_num', 12 | [((2,), 1), # 13 | ((3,), 2) 14 | ] # (True, (32,), 3), (False, (32,), 1) 15 | ) 16 | def test_DeepFM(hidden_size, sparse_feature_num): 17 | model_name = "DeepFM" 18 | sample_size = SAMPLE_SIZE 19 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 20 | dense_feature_num=sparse_feature_num) 21 | 22 | model = DeepFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) 23 | 24 | check_model(model, model_name, x, y) 25 | 26 | 27 | @pytest.mark.parametrize( 28 | 'hidden_size,sparse_feature_num', 29 | [ 30 | ((3,), 2) 31 | ] # (True, (32,), 3), (False, (32,), 1) 32 | ) 33 | def test_DeepFMEstimator(hidden_size, sparse_feature_num): 34 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 35 | return 36 | sample_size = SAMPLE_SIZE 37 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 38 | sparse_feature_num=sparse_feature_num, 39 | dense_feature_num=sparse_feature_num, 40 | classification=False) 41 | 42 | model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5, 43 | task="regression") 44 | 45 | check_estimator(model, input_fn) 46 | 47 | 48 | if __name__ == "__main__": 49 | pass 50 | -------------------------------------------------------------------------------- /tests/models/FGCNN_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepctr.models import FGCNN 4 | from tests.utils import check_model, get_test_data, SAMPLE_SIZE 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'sparse_feature_num,dense_feature_num', 9 | [(1, 1), (3, 3) 10 | ] 11 | ) 12 | def test_FGCNN(sparse_feature_num, dense_feature_num): 13 | model_name = "FGCNN" 14 | 15 | sample_size = SAMPLE_SIZE 16 | x, y, feature_columns = get_test_data(sample_size, embedding_size=8, sparse_feature_num=sparse_feature_num, 17 | dense_feature_num=dense_feature_num) 18 | 19 | model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(2, 1), new_maps=( 20 | 2, 2), pooling_width=(2, 2), dnn_hidden_units=(32,), dnn_dropout=0.5, ) 21 | # TODO: add model_io check 22 | check_model(model, model_name, x, y, check_model_io=False) 23 | 24 | 25 | # @pytest.mark.parametrize( 26 | # 'sparse_feature_num,dense_feature_num', 27 | # [(2, 1), 28 | # ] 29 | # ) 30 | # def test_FGCNN_without_seq(sparse_feature_num, dense_feature_num): 31 | # model_name = "FGCNN_noseq" 32 | # 33 | # sample_size = SAMPLE_SIZE 34 | # x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 35 | # dense_feature_num=dense_feature_num, sequence_feature=()) 36 | # 37 | # model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(), conv_filters=( 38 | # ), new_maps=(), pooling_width=(), dnn_hidden_units=(32,), dnn_dropout=0.5, ) 39 | # # TODO: add model_io check 40 | # check_model(model, model_name, x, y, check_model_io=False) 41 | 42 | 43 | if __name__ == "__main__": 44 | pass 45 | -------------------------------------------------------------------------------- /tests/models/FLEN_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepctr.models import FLEN 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'hidden_size,sparse_feature_num', 9 | [ 10 | ((3,), 6) 11 | ] # (True, (32,), 3), (False, (32,), 1) 12 | ) 13 | def test_DeepFM(hidden_size, sparse_feature_num): 14 | model_name = "FLEN" 15 | sample_size = SAMPLE_SIZE 16 | x, y, feature_columns = get_test_data(sample_size, embedding_size=2, sparse_feature_num=sparse_feature_num, 17 | dense_feature_num=sparse_feature_num, use_group=True) 18 | 19 | model = FLEN(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) 20 | 21 | check_model(model, model_name, x, y) 22 | 23 | 24 | if __name__ == "__main__": 25 | pass 26 | -------------------------------------------------------------------------------- /tests/models/FNN_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import FNNEstimator 5 | from deepctr.models import FNN 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'sparse_feature_num,dense_feature_num', 12 | [(1, 1), (3, 3) 13 | ] 14 | ) 15 | def test_FNN(sparse_feature_num, dense_feature_num): 16 | if tf.__version__ >= "2.0.0": 17 | return 18 | model_name = "FNN" 19 | 20 | sample_size = SAMPLE_SIZE 21 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 22 | dense_feature_num=dense_feature_num) 23 | 24 | model = FNN(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) 25 | check_model(model, model_name, x, y) 26 | 27 | 28 | # @pytest.mark.parametrize( 29 | # 'sparse_feature_num,dense_feature_num', 30 | # [(0, 1), (1, 0) 31 | # ] 32 | # ) 33 | # def test_FNN_without_seq(sparse_feature_num, dense_feature_num): 34 | # model_name = "FNN" 35 | # 36 | # sample_size = SAMPLE_SIZE 37 | # x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, dense_feature_num, sequence_feature=()) 38 | # 39 | # model = FNN(feature_columns,feature_columns, dnn_hidden_units=[32, 32], dnn_dropout=0.5) 40 | # check_model(model, model_name, x, y) 41 | 42 | @pytest.mark.parametrize( 43 | 'sparse_feature_num,dense_feature_num', 44 | [(2, 2), 45 | ] 46 | ) 47 | def test_FNNEstimator(sparse_feature_num, dense_feature_num): 48 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 49 | return 50 | sample_size = SAMPLE_SIZE 51 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 52 | sparse_feature_num=sparse_feature_num, 53 | dense_feature_num=dense_feature_num) 54 | 55 | model = FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) 56 | 57 | check_estimator(model, input_fn) 58 | 59 | 60 | if __name__ == "__main__": 61 | pass 62 | -------------------------------------------------------------------------------- /tests/models/FiBiNET_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import FiBiNETEstimator 5 | from deepctr.models import FiBiNET 6 | from ..utils import check_model, SAMPLE_SIZE, get_test_data, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'bilinear_type', 12 | ["each", 13 | "all", "interaction"] 14 | ) 15 | def test_FiBiNET(bilinear_type): 16 | model_name = "FiBiNET" 17 | sample_size = SAMPLE_SIZE 18 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=2, dense_feature_num=2) 19 | 20 | model = FiBiNET(feature_columns, feature_columns, bilinear_type=bilinear_type, dnn_hidden_units=[4, ], 21 | dnn_dropout=0.5, ) 22 | check_model(model, model_name, x, y) 23 | 24 | 25 | @pytest.mark.parametrize( 26 | 'bilinear_type', 27 | ["interaction"] 28 | ) 29 | def test_FiBiNETEstimator(bilinear_type): 30 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 31 | return 32 | sample_size = SAMPLE_SIZE 33 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=2, 34 | dense_feature_num=2) 35 | 36 | model = FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type=bilinear_type, 37 | dnn_hidden_units=[4, ], dnn_dropout=0.5, ) 38 | 39 | check_estimator(model, input_fn) 40 | 41 | 42 | if __name__ == "__main__": 43 | pass 44 | -------------------------------------------------------------------------------- /tests/models/FwFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import FwFMEstimator 5 | from deepctr.models import FwFM 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'hidden_size,sparse_feature_num', 12 | [((2,), 1), 13 | ((), 1), 14 | ] 15 | ) 16 | def test_FwFM(hidden_size, sparse_feature_num): 17 | model_name = "FwFM" 18 | sample_size = SAMPLE_SIZE 19 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 20 | dense_feature_num=sparse_feature_num) 21 | model = FwFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) 22 | 23 | check_model(model, model_name, x, y) 24 | 25 | 26 | @pytest.mark.parametrize( 27 | 'hidden_size,sparse_feature_num', 28 | [((2,), 2), 29 | ] 30 | ) 31 | def test_FwFMEstimator(hidden_size, sparse_feature_num): 32 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 33 | return 34 | sample_size = SAMPLE_SIZE 35 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 36 | sparse_feature_num=sparse_feature_num, 37 | dense_feature_num=sparse_feature_num) 38 | 39 | model = FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) 40 | 41 | check_estimator(model, input_fn) 42 | 43 | 44 | if __name__ == "__main__": 45 | pass 46 | -------------------------------------------------------------------------------- /tests/models/IFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepctr.models import IFM 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'hidden_size,sparse_feature_num', 9 | [((2,), 1), 10 | ((3,), 2) 11 | ] 12 | ) 13 | def test_IFM(hidden_size, sparse_feature_num): 14 | model_name = "IFM" 15 | sample_size = SAMPLE_SIZE 16 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 17 | dense_feature_num=sparse_feature_num) 18 | 19 | model = IFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5) 20 | check_model(model, model_name, x, y) 21 | 22 | 23 | if __name__ == "__main__": 24 | pass 25 | -------------------------------------------------------------------------------- /tests/models/MLR_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from deepctr.models import MLR 4 | from ..utils import check_model, SAMPLE_SIZE, get_test_data 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 9 | 'region_sparse,region_dense,base_sparse,base_dense,bias_sparse,bias_dense', 10 | 11 | [(0, 2, 0, 2, 0, 1), (0, 2, 0, 1, 0, 2), (0, 2, 0, 0, 1, 0), 12 | # (0, 1, 1, 2, 1, 1,), (0, 1, 1, 1, 1, 2), (0, 1, 1, 0, 2, 0), 13 | # (1, 0, 2, 2, 2, 1), (2, 0, 2, 1, 2, 2), (2, 0, 2, 0, 0, 0) 14 | ] 15 | 16 | ) 17 | def test_MLRs(region_sparse, region_dense, base_sparse, base_dense, bias_sparse, bias_dense): 18 | model_name = "MLRs" 19 | _, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse, 20 | dense_feature_num=region_dense, prefix='region') 21 | base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse, 22 | dense_feature_num=region_dense, prefix='base') 23 | bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse, 24 | dense_feature_num=region_dense, prefix='bias') 25 | 26 | model = MLR(region_feature_columns, base_feature_columns, bias_feature_columns=bias_feature_columns) 27 | model.compile('adam', 'binary_crossentropy', 28 | metrics=['binary_crossentropy']) 29 | print(model_name + " test pass!") 30 | 31 | 32 | def test_MLR(): 33 | model_name = "MLR" 34 | region_x, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3, 35 | prefix='region') 36 | base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3, 37 | prefix='base') 38 | bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3, 39 | prefix='bias') 40 | 41 | model = MLR(region_feature_columns) 42 | model.compile('adam', 'binary_crossentropy', 43 | metrics=['binary_crossentropy']) 44 | 45 | check_model(model, model_name, region_x, y) 46 | print(model_name + " test pass!") 47 | 48 | 49 | if __name__ == "__main__": 50 | pass 51 | -------------------------------------------------------------------------------- /tests/models/MTL_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.models.multitask import SharedBottom, ESMM, MMOE, PLE 5 | from ..utils_mtl import get_mtl_test_data, check_mtl_model 6 | 7 | 8 | def test_SharedBottom(): 9 | if tf.__version__ == "1.15.0": # slow in tf 1.15 10 | return 11 | model_name = "SharedBottom" 12 | x, y_list, dnn_feature_columns = get_mtl_test_data() 13 | 14 | model = SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,), 15 | task_types=['binary', 'binary'], task_names=['label_income', 'label_marital']) 16 | check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) 17 | 18 | 19 | def test_ESMM(): 20 | if tf.__version__ == "1.15.0": # slow in tf 1.15 21 | return 22 | model_name = "ESMM" 23 | x, y_list, dnn_feature_columns = get_mtl_test_data() 24 | 25 | model = ESMM(dnn_feature_columns, tower_dnn_hidden_units=(8,), task_types=['binary', 'binary'], 26 | task_names=['label_marital', 'label_income']) 27 | check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) 28 | 29 | 30 | def test_MMOE(): 31 | if tf.__version__ == "1.15.0": # slow in tf 1.15 32 | return 33 | model_name = "MMOE" 34 | x, y_list, dnn_feature_columns = get_mtl_test_data() 35 | 36 | model = MMOE(dnn_feature_columns, num_experts=3, expert_dnn_hidden_units=(8,), 37 | tower_dnn_hidden_units=(8,), 38 | gate_dnn_hidden_units=(), task_types=['binary', 'binary'], 39 | task_names=['income', 'marital']) 40 | check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) 41 | 42 | 43 | @pytest.mark.parametrize( 44 | 'num_levels,gate_dnn_hidden_units', 45 | [(2, ()), 46 | (1, (4,))] 47 | ) 48 | def test_PLE(num_levels, gate_dnn_hidden_units): 49 | if tf.__version__ == "1.15.0": # slow in tf 1.15 50 | return 51 | model_name = "PLE" 52 | x, y_list, dnn_feature_columns = get_mtl_test_data() 53 | 54 | model = PLE(dnn_feature_columns, num_levels=num_levels, expert_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,), 55 | gate_dnn_hidden_units=gate_dnn_hidden_units, 56 | task_types=['binary', 'binary'], task_names=['income', 'marital']) 57 | check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary']) 58 | 59 | 60 | if __name__ == "__main__": 61 | pass 62 | -------------------------------------------------------------------------------- /tests/models/NFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import NFMEstimator 5 | from deepctr.models import NFM 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'hidden_size,sparse_feature_num', 12 | [((8,), 1), ((8, 8,), 2)] 13 | ) 14 | def test_NFM(hidden_size, sparse_feature_num): 15 | model_name = "NFM" 16 | 17 | sample_size = SAMPLE_SIZE 18 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 19 | dense_feature_num=sparse_feature_num) 20 | 21 | model = NFM(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) 22 | check_model(model, model_name, x, y) 23 | 24 | 25 | @pytest.mark.parametrize( 26 | 'hidden_size,sparse_feature_num', 27 | [((8,), 1), ((8, 8,), 2)] 28 | ) 29 | def test_FNNEstimator(hidden_size, sparse_feature_num): 30 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 31 | return 32 | sample_size = SAMPLE_SIZE 33 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 34 | sparse_feature_num=sparse_feature_num, 35 | dense_feature_num=sparse_feature_num) 36 | 37 | model = NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5) 38 | 39 | check_estimator(model, input_fn) 40 | 41 | 42 | if __name__ == "__main__": 43 | pass 44 | -------------------------------------------------------------------------------- /tests/models/ONN_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | from packaging import version 4 | 5 | from deepctr.models import ONN 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE 7 | 8 | 9 | @pytest.mark.parametrize( 10 | 'sparse_feature_num', 11 | [2] 12 | ) 13 | def test_ONN(sparse_feature_num): 14 | if version.parse(tf.__version__) >= version.parse('2.0.0'): 15 | return 16 | model_name = "ONN" 17 | 18 | sample_size = SAMPLE_SIZE 19 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 20 | dense_feature_num=sparse_feature_num, 21 | sequence_feature=('sum', 'mean', 'max',), hash_flag=True) 22 | 23 | model = ONN(feature_columns, feature_columns, 24 | dnn_hidden_units=[4, 4], dnn_dropout=0.5) 25 | check_model(model, model_name, x, y) 26 | 27 | 28 | if __name__ == "__main__": 29 | pass 30 | -------------------------------------------------------------------------------- /tests/models/PNN_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import PNNEstimator 5 | from deepctr.models import PNN 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'use_inner, use_outter,sparse_feature_num', 12 | [(True, True, 3), (False, False, 1) 13 | ] 14 | ) 15 | def test_PNN(use_inner, use_outter, sparse_feature_num): 16 | model_name = "PNN" 17 | sample_size = SAMPLE_SIZE 18 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 19 | dense_feature_num=sparse_feature_num) 20 | model = PNN(feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner, use_outter=use_outter) 21 | check_model(model, model_name, x, y) 22 | 23 | 24 | @pytest.mark.parametrize( 25 | 'use_inner, use_outter,sparse_feature_num', 26 | [(True, True, 2) 27 | ] 28 | ) 29 | def test_PNNEstimator(use_inner, use_outter, sparse_feature_num): 30 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 31 | return 32 | sample_size = SAMPLE_SIZE 33 | _, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 34 | sparse_feature_num=sparse_feature_num, 35 | dense_feature_num=sparse_feature_num) 36 | 37 | model = PNNEstimator(dnn_feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner, 38 | use_outter=use_outter) 39 | 40 | check_estimator(model, input_fn) 41 | 42 | 43 | if __name__ == "__main__": 44 | pass 45 | -------------------------------------------------------------------------------- /tests/models/WDL_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | from packaging import version 4 | 5 | from deepctr.estimator import WDLEstimator 6 | from deepctr.models import WDL 7 | from ..utils import check_model, check_estimator, SAMPLE_SIZE, get_test_data, get_test_data_estimator, \ 8 | Estimator_TEST_TF1 9 | 10 | 11 | @pytest.mark.parametrize( 12 | 'sparse_feature_num,dense_feature_num', 13 | [(2, 0), (0, 2) # ,(2, 2) 14 | ] 15 | ) 16 | def test_WDL(sparse_feature_num, dense_feature_num): 17 | if version.parse(tf.__version__) >= version.parse('2.0.0'): 18 | return 19 | model_name = "WDL" 20 | sample_size = SAMPLE_SIZE 21 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 22 | dense_feature_num=dense_feature_num, hash_flag=True) 23 | 24 | model = WDL(feature_columns, feature_columns, 25 | dnn_hidden_units=[4, 4], dnn_dropout=0.5) 26 | check_model(model, model_name, x, y) 27 | 28 | 29 | @pytest.mark.parametrize( 30 | 'sparse_feature_num,dense_feature_num', 31 | [(2, 1), # (0, 2)#,(2, 2) 32 | ] 33 | ) 34 | def test_WDLEstimator(sparse_feature_num, dense_feature_num): 35 | if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'): 36 | return 37 | sample_size = SAMPLE_SIZE 38 | 39 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num, 40 | dense_feature_num) 41 | model = WDLEstimator(linear_feature_columns, dnn_feature_columns, 42 | dnn_hidden_units=[4, 4], dnn_dropout=0.5) 43 | check_estimator(model, input_fn) 44 | 45 | 46 | if __name__ == "__main__": 47 | pass 48 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/xDeepFM_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tensorflow as tf 3 | 4 | from deepctr.estimator import xDeepFMEstimator 5 | from deepctr.models import xDeepFM 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \ 7 | Estimator_TEST_TF1 8 | 9 | 10 | @pytest.mark.parametrize( 11 | 'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim', 12 | [ # ((), (), True, 'linear', 1, 2), 13 | ((8,), (), True, 'linear', 1, 1), 14 | ((), (8,), True, 'linear', 2, 2), 15 | ((8,), (8,), False, 'relu', 1, 0) 16 | ] 17 | ) 18 | def test_xDeepFM(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, 19 | dense_feature_dim): 20 | model_name = "xDeepFM" 21 | 22 | sample_size = SAMPLE_SIZE 23 | x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num, 24 | dense_feature_num=sparse_feature_num) 25 | 26 | model = xDeepFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, cin_layer_size=cin_layer_size, 27 | cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5) 28 | check_model(model, model_name, x, y) 29 | 30 | 31 | # @pytest.mark.parametrize( 32 | # 'hidden_size,cin_layer_size,', 33 | # [((8,), (3, 8)), 34 | # ] 35 | # ) 36 | # def test_xDeepFM_invalid(hidden_size, cin_layer_size): 37 | # feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5, 38 | # 'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']} 39 | # with pytest.raises(ValueError): 40 | # _ = xDeepFM(feature_dim_dict, None, dnn_hidden_units=hidden_size, cin_layer_size=cin_layer_size) 41 | @pytest.mark.parametrize( 42 | 'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim', 43 | [ # ((), (), True, 'linear', 1, 2), 44 | ((8,), (8,), False, 'relu', 2, 1) 45 | ] 46 | ) 47 | def test_xDeepFMEstimator(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, 48 | dense_feature_dim): 49 | if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0": 50 | return 51 | sample_size = SAMPLE_SIZE 52 | linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, 53 | sparse_feature_num=sparse_feature_num, 54 | dense_feature_num=sparse_feature_num) 55 | 56 | model = xDeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=dnn_hidden_units, 57 | cin_layer_size=cin_layer_size, 58 | cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5) 59 | 60 | check_estimator(model, input_fn) 61 | 62 | 63 | if __name__ == "__main__": 64 | pass 65 | -------------------------------------------------------------------------------- /tests/utils_mtl.py: -------------------------------------------------------------------------------- 1 | # test utils for multi task learning 2 | 3 | import os 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from tensorflow.python.keras.models import load_model, save_model 8 | 9 | from deepctr.feature_column import SparseFeat, DenseFeat, DEFAULT_GROUP_NAME 10 | from deepctr.layers import custom_objects 11 | 12 | 13 | def get_mtl_test_data(sample_size=10, embedding_size=4, sparse_feature_num=1, 14 | dense_feature_num=1, task_types=('binary', 'binary'), 15 | hash_flag=False, prefix='', use_group=False): 16 | feature_columns = [] 17 | model_input = {} 18 | 19 | for i in range(sparse_feature_num): 20 | if use_group: 21 | group_name = str(i % 3) 22 | else: 23 | group_name = DEFAULT_GROUP_NAME 24 | dim = np.random.randint(1, 10) 25 | feature_columns.append( 26 | SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, use_hash=hash_flag, dtype=tf.int32, 27 | group_name=group_name)) 28 | 29 | for i in range(dense_feature_num): 30 | def transform_fn(x): return (x - 0.0) / 1.0 31 | 32 | feature_columns.append( 33 | DenseFeat( 34 | prefix + 'dense_feature_' + str(i), 35 | 1, 36 | dtype=tf.float32, 37 | transform_fn=transform_fn 38 | ) 39 | ) 40 | 41 | for fc in feature_columns: 42 | if isinstance(fc, SparseFeat): 43 | model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size) 44 | elif isinstance(fc, DenseFeat): 45 | model_input[fc.name] = np.random.random(sample_size) 46 | y_list = [] # multi label 47 | for task in task_types: 48 | if task == 'binary': 49 | y = np.random.randint(0, 2, sample_size) 50 | y_list.append(y) 51 | else: 52 | y = np.random.random(sample_size) 53 | y_list.append(y) 54 | 55 | return model_input, y_list, feature_columns 56 | 57 | 58 | def check_mtl_model(model, model_name, x, y_list, task_types, check_model_io=True): 59 | """ 60 | compile model,train and evaluate it,then save/load weight and model file. 61 | :param model: 62 | :param model_name: 63 | :param x: 64 | :param y_list: mutil label of y 65 | :param check_model_io: test save/load model file or not 66 | :return: 67 | """ 68 | loss_list = [] 69 | metric_list = [] 70 | for task_type in task_types: 71 | if task_type == 'binary': 72 | loss_list.append('binary_crossentropy') 73 | # metric_list.append('accuracy') 74 | elif task_type == 'regression': 75 | loss_list.append('mean_squared_error') 76 | # metric_list.append('mae') 77 | print('loss:', loss_list) 78 | print('metric:', metric_list) 79 | model.compile('adam', loss=loss_list, metrics=metric_list) 80 | model.fit(x, y_list, batch_size=100, epochs=1, validation_split=0.5) 81 | 82 | print(model_name + " test train valid pass!") 83 | model.save_weights(model_name + '_weights.h5') 84 | model.load_weights(model_name + '_weights.h5') 85 | os.remove(model_name + '_weights.h5') 86 | print(model_name + " test save load weight pass!") 87 | if check_model_io: 88 | save_model(model, model_name + '.h5') 89 | model = load_model(model_name + '.h5', custom_objects) 90 | os.remove(model_name + '.h5') 91 | print(model_name + " test save load model pass!") 92 | 93 | print(model_name + " test pass!") 94 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | from deepctr.utils import check_version 2 | 3 | 4 | def test_check_version(): 5 | check_version('0.1.0') 6 | check_version(20191231) 7 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | import tensorflow as tf 7 | from sklearn.metrics import roc_auc_score 8 | 9 | 10 | def print_curtime(note=None): 11 | now = datetime.datetime.now() 12 | current_time = now.strftime("%H:%M:%S") 13 | if note is not None: 14 | print(f"{note}: {current_time}") 15 | else: 16 | print(f"Current time: {current_time}") 17 | 18 | 19 | def tf_allow_growth(): 20 | gpus = tf.config.experimental.list_physical_devices("GPU") 21 | for gpu in gpus: 22 | tf.config.experimental.set_memory_growth(gpu, True) 23 | 24 | 25 | def create_logdir(root="logs/", args=None): 26 | log_dir = root + datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 27 | Path(log_dir).mkdir(parents=True, exist_ok=True) 28 | with open(os.path.join(log_dir, "config.txt"), "w") as f: 29 | print(args, file=f) 30 | print(f"LOG_DIR: {log_dir}") 31 | summary_writer = tf.summary.create_file_writer( 32 | os.path.join(log_dir, "train")) 33 | summary_writer.set_as_default() 34 | return log_dir 35 | 36 | 37 | def auc_score(y_true, y_pred): 38 | if len(np.unique(y_true[:, 0])) == 1: 39 | return 0.5 40 | else: 41 | return roc_auc_score(y_true, y_pred) 42 | 43 | 44 | def auc(y_true, y_pred): 45 | return tf.numpy_function(auc_score, (y_true, y_pred), tf.double) 46 | 47 | 48 | def num_params(model): 49 | total_parameters = 0 50 | embed_parameters = 0 51 | dense_parameters = 0 52 | for variable in model.trainable_variables: 53 | shape = variable.get_shape() 54 | variable_parameters = 1 55 | for dim in shape: 56 | variable_parameters *= dim 57 | total_parameters += variable_parameters 58 | if 'embedding' in variable.name: 59 | embed_parameters += variable_parameters 60 | else: 61 | dense_parameters += variable_parameters 62 | 63 | print(f"Total Params: {total_parameters}") 64 | print(f"Dense Params: {dense_parameters}") 65 | print(f"Embed Params: {embed_parameters}") 66 | 67 | return total_parameters 68 | --------------------------------------------------------------------------------