├── .gitattributes
├── .gitignore
├── .readthedocs.yml
├── .travis.yml
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── README_DeepCTR.md
├── assets
    └── cowclip.png
├── clip.py
├── data
    ├── .gitkeep
    └── criteo_kaggle
    │   └── criteo_sample.txt
├── data_utils.py
├── deepctr
    ├── __init__.py
    ├── contrib
    │   ├── __init__.py
    │   ├── rnn.py
    │   ├── rnn_v2.py
    │   └── utils.py
    ├── estimator
    │   ├── __init__.py
    │   ├── feature_column.py
    │   ├── inputs.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── afm.py
    │   │   ├── autoint.py
    │   │   ├── ccpm.py
    │   │   ├── dcn.py
    │   │   ├── deepfefm.py
    │   │   ├── deepfm.py
    │   │   ├── fibinet.py
    │   │   ├── fnn.py
    │   │   ├── fwfm.py
    │   │   ├── nfm.py
    │   │   ├── pnn.py
    │   │   ├── wdl.py
    │   │   └── xdeepfm.py
    │   └── utils.py
    ├── feature_column.py
    ├── inputs.py
    ├── layers
    │   ├── __init__.py
    │   ├── activation.py
    │   ├── core.py
    │   ├── interaction.py
    │   ├── normalization.py
    │   ├── sequence.py
    │   └── utils.py
    ├── models
    │   ├── __init__.py
    │   ├── afm.py
    │   ├── autoint.py
    │   ├── ccpm.py
    │   ├── dcn.py
    │   ├── dcnmix.py
    │   ├── deepfefm.py
    │   ├── deepfm.py
    │   ├── difm.py
    │   ├── fgcnn.py
    │   ├── fibinet.py
    │   ├── flen.py
    │   ├── fnn.py
    │   ├── fwfm.py
    │   ├── ifm.py
    │   ├── mlr.py
    │   ├── multitask
    │   │   ├── __init__.py
    │   │   ├── esmm.py
    │   │   ├── mmoe.py
    │   │   ├── ple.py
    │   │   └── sharedbottom.py
    │   ├── nfm.py
    │   ├── onn.py
    │   ├── pnn.py
    │   ├── sequence
    │   │   ├── __init__.py
    │   │   ├── bst.py
    │   │   ├── dien.py
    │   │   ├── din.py
    │   │   └── dsin.py
    │   ├── wdl.py
    │   ├── widefm.py
    │   └── xdeepfm.py
    └── utils.py
├── docs
    ├── Makefile
    ├── make.bat
    ├── pics
    │   ├── AFM.png
    │   ├── AutoInt.png
    │   ├── BST.png
    │   ├── CCPM.png
    │   ├── CIN.png
    │   ├── DCN-M.png
    │   ├── DCN-Mix.png
    │   ├── DCN.png
    │   ├── DIEN.png
    │   ├── DIFM.jpg
    │   ├── DIN.png
    │   ├── DSIN.png
    │   ├── DeepFEFM.jpg
    │   ├── DeepFM.png
    │   ├── FGCNN.png
    │   ├── FLEN.jpg
    │   ├── FNN.png
    │   ├── FiBiNET.png
    │   ├── IFM.jpg
    │   ├── InteractingLayer.png
    │   ├── MLR.png
    │   ├── NFM.png
    │   ├── ONN.png
    │   ├── PNN.png
    │   ├── WDL.png
    │   ├── code.png
    │   ├── criteo_sample.png
    │   ├── deepctrbot.png
    │   ├── fms.png
    │   ├── mlr1.png
    │   ├── mlrvsdnn.png
    │   ├── movielens_sample.png
    │   ├── movielens_sample_with_genres.png
    │   ├── multitaskmodels
    │   │   ├── ESMM.png
    │   │   ├── MMOE.png
    │   │   ├── PLE.png
    │   │   └── SharedBottom.png
    │   ├── weichennote.png
    │   └── xDeepFM.png
    ├── requirements.readthedocs.txt
    └── source
    │   ├── Estimators.rst
    │   ├── Examples.md
    │   ├── FAQ.md
    │   ├── Features.md
    │   ├── History.md
    │   ├── Layers.rst
    │   ├── Model_Methods.md
    │   ├── Models.rst
    │   ├── Quick-Start.md
    │   ├── conf.py
    │   ├── deepctr.contrib.rnn.rst
    │   ├── deepctr.contrib.rst
    │   ├── deepctr.contrib.utils.rst
    │   ├── deepctr.estimator.feature_column.rst
    │   ├── deepctr.estimator.inputs.rst
    │   ├── deepctr.estimator.models.afm.rst
    │   ├── deepctr.estimator.models.autoint.rst
    │   ├── deepctr.estimator.models.ccpm.rst
    │   ├── deepctr.estimator.models.dcn.rst
    │   ├── deepctr.estimator.models.deepfefm.rst
    │   ├── deepctr.estimator.models.deepfm.rst
    │   ├── deepctr.estimator.models.fibinet.rst
    │   ├── deepctr.estimator.models.fnn.rst
    │   ├── deepctr.estimator.models.fwfm.rst
    │   ├── deepctr.estimator.models.nfm.rst
    │   ├── deepctr.estimator.models.pnn.rst
    │   ├── deepctr.estimator.models.rst
    │   ├── deepctr.estimator.models.wdl.rst
    │   ├── deepctr.estimator.models.xdeepfm.rst
    │   ├── deepctr.estimator.rst
    │   ├── deepctr.estimator.utils.rst
    │   ├── deepctr.feature_column.rst
    │   ├── deepctr.inputs.rst
    │   ├── deepctr.layers.activation.rst
    │   ├── deepctr.layers.core.rst
    │   ├── deepctr.layers.interaction.rst
    │   ├── deepctr.layers.normalization.rst
    │   ├── deepctr.layers.rst
    │   ├── deepctr.layers.sequence.rst
    │   ├── deepctr.layers.utils.rst
    │   ├── deepctr.models.afm.rst
    │   ├── deepctr.models.autoint.rst
    │   ├── deepctr.models.ccpm.rst
    │   ├── deepctr.models.dcn.rst
    │   ├── deepctr.models.dcnmix.rst
    │   ├── deepctr.models.deepfefm.rst
    │   ├── deepctr.models.deepfm.rst
    │   ├── deepctr.models.deepfwfm.rst
    │   ├── deepctr.models.difm.rst
    │   ├── deepctr.models.fgcnn.rst
    │   ├── deepctr.models.fibinet.rst
    │   ├── deepctr.models.flen.rst
    │   ├── deepctr.models.fnn.rst
    │   ├── deepctr.models.ifm.rst
    │   ├── deepctr.models.mlr.rst
    │   ├── deepctr.models.multitask.esmm.rst
    │   ├── deepctr.models.multitask.mmoe.rst
    │   ├── deepctr.models.multitask.ple.rst
    │   ├── deepctr.models.multitask.sharedbottom.rst
    │   ├── deepctr.models.nfm.rst
    │   ├── deepctr.models.onn.rst
    │   ├── deepctr.models.pnn.rst
    │   ├── deepctr.models.rst
    │   ├── deepctr.models.sequence.bst.rst
    │   ├── deepctr.models.sequence.dien.rst
    │   ├── deepctr.models.sequence.din.rst
    │   ├── deepctr.models.sequence.dsin.rst
    │   ├── deepctr.models.wdl.rst
    │   ├── deepctr.models.xdeepfm.rst
    │   ├── deepctr.rst
    │   ├── deepctr.utils.rst
    │   ├── index.rst
    │   └── modules.rst
├── examples
    ├── avazu_sample.txt
    ├── census-income.sample
    ├── criteo_sample.te.tfrecords
    ├── criteo_sample.tr.tfrecords
    ├── criteo_sample.txt
    ├── gen_tfrecords.py
    ├── movielens_age_vocabulary.csv
    ├── movielens_sample.txt
    ├── run_all.sh
    ├── run_classification_criteo.py
    ├── run_classification_criteo_hash.py
    ├── run_classification_criteo_multi_gpu.py
    ├── run_dien.py
    ├── run_din.py
    ├── run_dsin.py
    ├── run_estimator_pandas_classification.py
    ├── run_estimator_tfrecord_classification.py
    ├── run_flen.py
    ├── run_mtl.py
    ├── run_multivalue_movielens.py
    ├── run_multivalue_movielens_hash.py
    ├── run_multivalue_movielens_vocab_hash.py
    └── run_regression_movielens.py
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── README.md
    ├── __init__.py
    ├── feature_test.py
    ├── layers
    │   ├── __init__.py
    │   ├── activations_test.py
    │   ├── core_test.py
    │   ├── interaction_test.py
    │   ├── normalization_test.py
    │   ├── sequence_test.py
    │   ├── utils_test.py
    │   └── vocabulary_example.csv
    ├── models
    │   ├── AFM_test.py
    │   ├── AutoInt_test.py
    │   ├── BST_test.py
    │   ├── CCPM_test.py
    │   ├── DCNMix_test.py
    │   ├── DCN_test.py
    │   ├── DIEN_test.py
    │   ├── DIFM_test.py
    │   ├── DIN_test.py
    │   ├── DSIN_test.py
    │   ├── DeepFEFM_test.py
    │   ├── DeepFM_test.py
    │   ├── FGCNN_test.py
    │   ├── FLEN_test.py
    │   ├── FNN_test.py
    │   ├── FiBiNET_test.py
    │   ├── FwFM_test.py
    │   ├── IFM_test.py
    │   ├── MLR_test.py
    │   ├── MTL_test.py
    │   ├── NFM_test.py
    │   ├── ONN_test.py
    │   ├── PNN_test.py
    │   ├── WDL_test.py
    │   ├── __init__.py
    │   └── xDeepFM_test.py
    ├── utils.py
    ├── utils_mtl.py
    └── utils_test.py
├── train.py
└── utils.py


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # data
  2 | *.feather
  3 | !data/.gitkeep
  4 | !data/criteo_kaggle/criteo_sample.txt
  5 | data/criteo_kaggle/train.txt
  6 | data/avazu
  7 | data/taobao
  8 | data/criteo_terabyte
  9 | logs
 10 | 
 11 | 
 12 | *.h5
 13 | *.ipynb
 14 | .pytest_cache/
 15 | .vscode/
 16 | tests/unused/*
 17 | # Byte-compiled / optimized / DLL files
 18 | __pycache__/
 19 | *.py[cod]
 20 | *$py.class
 21 | .idea/
 22 | # C extensions
 23 | *.so
 24 | 
 25 | # Distribution / packaging
 26 | .Python
 27 | env/
 28 | build/
 29 | develop-eggs/
 30 | dist/
 31 | downloads/
 32 | eggs/
 33 | .eggs/
 34 | lib/
 35 | lib64/
 36 | parts/
 37 | sdist/
 38 | var/
 39 | *.egg-info/
 40 | .installed.cfg
 41 | *.egg
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | *.spec
 48 | 
 49 | # Installer logs
 50 | pip-log.txt
 51 | pip-delete-this-directory.txt
 52 | 
 53 | # Unit test / coverage reports
 54 | htmlcov/
 55 | .tox/
 56 | .coverage
 57 | .coverage.*
 58 | .cache
 59 | nosetests.xml
 60 | coverage.xml
 61 | *,cover
 62 | .hypothesis/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | 
 72 | # Flask instance folder
 73 | instance/
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # IPython Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # pyenv
 88 | .python-version
 89 | 
 90 | # celery beat schedule file
 91 | celerybeat-schedule
 92 | 
 93 | # dotenv
 94 | .env
 95 | 
 96 | # virtualenv
 97 | venv/
 98 | ENV/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | 
103 | # Rope project settings
104 | .ropeproject
105 | 
106 | # =========================
107 | # Operating System Files
108 | # =========================
109 | 
110 | # OSX
111 | # =========================
112 | 
113 | .DS_Store
114 | .AppleDouble
115 | .LSOverride
116 | 
117 | # Thumbnails
118 | ._*
119 | 
120 | # Files that might appear in the root of a volume
121 | .DocumentRevisions-V100
122 | .fseventsd
123 | .Spotlight-V100
124 | .TemporaryItems
125 | .Trashes
126 | .VolumeIcon.icns
127 | 
128 | # Directories potentially created on remote AFP share
129 | .AppleDB
130 | .AppleDesktop
131 | Network Trash Folder
132 | Temporary Items
133 | .apdisk
134 | 
135 | # Windows
136 | # =========================
137 | 
138 | # Windows image file caches
139 | Thumbs.db
140 | ehthumbs.db
141 | 
142 | # Folder config file
143 | Desktop.ini
144 | 
145 | # Recycle Bin used on file shares
146 | $RECYCLE.BIN/
147 | 
148 | # Windows Installer files
149 | *.cab
150 | *.msi
151 | *.msm
152 | *.msp
153 | 
154 | # Windows shortcuts
155 | *.lnk
156 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 |     image: latest
3 | 
4 | python:
5 |     version: 3.6


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | #sudo: required
 2 | #dist: trusty xenial
 3 | language: python
 4 | 
 5 | python:
 6 |   - "2.7" #time out
 7 |   #- "3.4"
 8 |   - "3.5"
 9 |   - "3.6"
10 |   #- "3.7"
11 | 
12 | env:
13 | #  - TF_VERSION=1.13.1
14 | #  - TF_VERSION=1.12.2
15 |   - TF_VERSION=1.4.0
16 |   #Not Support- TF_VERSION=1.7.0
17 |   #Not Support- TF_VERSION=1.7.1
18 |   #Not Support- TF_VERSION=1.8.0
19 |   #- TF_VERSION=1.8.0
20 |   #  - TF_VERSION=1.11.0
21 |   #- TF_VERSION=1.6.0
22 |   - TF_VERSION=2.0.0b1
23 |   #- TF_VERSION=1.13.2
24 |   - TF_VERSION=1.14.0
25 | 
26 | matrix:
27 |   allow_failures:
28 |     - python: "2.7"
29 |       env: TF_VERSION=1.6.0 # to speed up
30 |     - python: "2.7"
31 |       env: TF_VERSION=2.0.0b1
32 |     - python: "3.4"
33 |     - python: "3.5"
34 |     - python: "3.7"
35 |     - env: TF_VERSION=1.5.0 #local is ok,but sometimes CI is failed
36 |     - env: TF_VERSION=1.7.0
37 |     - env: TF_VERSION=1.7.1
38 |     - env: TF_VERSION=1.8.0
39 |     - env: TF_VERSION=1.12.0 # too slow
40 |     - env: TF_VERSION=1.13.1 # too slow
41 |     - env: TF_VERSION=1.13.2 # too slow
42 |     - env: TF_VERSION=1.14.0 # too slow
43 | 
44 |   fast_finish: true
45 | 
46 | cache: pip
47 | # command to install dependencies
48 | install:
49 |   - pip install -q pytest-cov==2.4.0 #>=2.4.0,<2.6
50 |   - pip install -q python-coveralls
51 |   - pip install -q codacy-coverage
52 |   - pip install -q tensorflow==$TF_VERSION
53 |   - pip install -q pandas
54 |   - pip install -q packaging
55 |   - pip install -e .
56 | # command to run tests
57 | script:
58 |   - pytest --cov=deepctr
59 | 
60 | notifications:
61 |   recipients:
62 |     - weichenswc@163.com
63 | 
64 |   on_success: change
65 |   on_failure: change
66 | 
67 | after_success:
68 |   - coveralls
69 |   - coverage xml
70 |   - python-codacy-coverage -r coverage.xml
71 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | This project is under development and we need developers to participate in.
 2 | # Join us
 3 | If you
 4 | 
 5 | - familiar with and interested in ctr prediction algorithms
 6 | - familiar with tensorflow 
 7 | - have spare time to learn and develop
 8 | - familiar with git
 9 | 
10 | please send a brief introduction of your background and experience to weichenswc@163.com, welcome to join us!
11 | 
12 | # Creating a pull request
13 | 1.  **Become a collaborator**: Send an email with introduction and your github account name to weichenswc@163.com and waiting for invitation to become a collaborator.
14 | 2.  **Fork&Dev**: Fork your own branch(`dev_yourname`) in `DeepCTR` from the `master` branch for development.If the `master` is updated during the development process, remember to merge and update to `dev_yourname` regularly.
15 | 3.  **Testing**: Test  logical correctness and effect when finishing the code development of the `dev_yourname` branch. 
16 | 4.  **Pre-release** : After testing contact weichenswc@163.com for pre-release integration, usually your branch `dev_yourname` will be merged into `release` branch by squash merge. 
17 | 5.  **Release a new version**: After confirming that the change is no longer needed, `release` branch will be merged into `master` and a new python package will be released on pypi.
18 | 
19 | # Discussions
20 | 
21 | https://github.com/shenweichen/DeepCTR/discussions


--------------------------------------------------------------------------------
/assets/cowclip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/assets/cowclip.png


--------------------------------------------------------------------------------
/clip.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def cow_clip(w, g, ratio=1, ids=None, cnts=None, min_w=0.03, const=False):
 5 |     if isinstance(g, tf.IndexedSlices):
 6 |         # FIXME: This part is not tested
 7 |         values = tf.convert_to_tensor(g.values)
 8 |         clipnorm = tf.norm(tf.gather(w, g.indices), axis=-1)
 9 |     else:
10 |         values = g
11 |         if const:
12 |             clipnorm = tf.constant([min_w] * g.shape[0])
13 |         else:
14 |             clipnorm = tf.norm(w, axis=-1)
15 |             # bound weight norm by min_w
16 |             clipnorm = tf.maximum(clipnorm, min_w)
17 |         # scale by cnting
18 |         cnts = tf.tensor_scatter_nd_update(
19 |             tf.ones([clipnorm.shape[0]], dtype=tf.int32),
20 |             tf.expand_dims(ids, -1),
21 |             cnts,
22 |         )
23 |         clipnorm = clipnorm * tf.cast(cnts, tf.float32)
24 | 
25 |     clip_t = ratio * clipnorm
26 |     l2sum_row = tf.reduce_sum(values * values, axis=-1)
27 |     pred = l2sum_row > 0
28 |     l2sum_row_safe = tf.where(pred, l2sum_row, tf.ones_like(l2sum_row))
29 |     l2norm_row = tf.sqrt(l2sum_row_safe)
30 |     intermediate = values * tf.expand_dims(clip_t, -1)
31 |     g_clip = intermediate / tf.expand_dims(tf.maximum(l2norm_row, clip_t), -1)
32 | 
33 |     if isinstance(g, tf.IndexedSlices):
34 |         return tf.IndexedSlices(g_clip, g.indices, g.dense_shape)
35 |     else:
36 |         return g_clip
37 | 


--------------------------------------------------------------------------------
/data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/data/.gitkeep


--------------------------------------------------------------------------------
/deepctr/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import check_version
2 | 
3 | __version__ = '0.9.0'
4 | check_version(__version__)
5 | 


--------------------------------------------------------------------------------
/deepctr/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/deepctr/contrib/__init__.py


--------------------------------------------------------------------------------
/deepctr/estimator/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import *


--------------------------------------------------------------------------------
/deepctr/estimator/feature_column.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.feature_column.feature_column import _EmbeddingColumn
 3 | 
 4 | from .utils import LINEAR_SCOPE_NAME, variable_scope, get_collection, get_GraphKeys, input_layer, get_losses
 5 | 
 6 | 
 7 | def linear_model(features, linear_feature_columns):
 8 |     if tf.__version__ >= '2.0.0':
 9 |         linear_logits = tf.compat.v1.feature_column.linear_model(features, linear_feature_columns)
10 |     else:
11 |         linear_logits = tf.feature_column.linear_model(features, linear_feature_columns)
12 |     return linear_logits
13 | 
14 | 
15 | def get_linear_logit(features, linear_feature_columns, l2_reg_linear=0):
16 |     with variable_scope(LINEAR_SCOPE_NAME):
17 |         if not linear_feature_columns:
18 |             linear_logits = tf.Variable([[0.0]], name='bias_weights')
19 |         else:
20 | 
21 |             linear_logits = linear_model(features, linear_feature_columns)
22 | 
23 |             if l2_reg_linear > 0:
24 |                 for var in get_collection(get_GraphKeys().TRAINABLE_VARIABLES, LINEAR_SCOPE_NAME)[:-1]:
25 |                     get_losses().add_loss(l2_reg_linear * tf.nn.l2_loss(var, name=var.name.split(":")[0] + "_l2loss"),
26 |                                           get_GraphKeys().REGULARIZATION_LOSSES)
27 |     return linear_logits
28 | 
29 | 
30 | def input_from_feature_columns(features, feature_columns, l2_reg_embedding=0.0):
31 |     dense_value_list = []
32 |     sparse_emb_list = []
33 |     for feat in feature_columns:
34 |         if is_embedding(feat):
35 |             sparse_emb = tf.expand_dims(input_layer(features, [feat]), axis=1)
36 |             sparse_emb_list.append(sparse_emb)
37 |             if l2_reg_embedding > 0:
38 |                 get_losses().add_loss(l2_reg_embedding * tf.nn.l2_loss(sparse_emb, name=feat.name + "_l2loss"),
39 |                                       get_GraphKeys().REGULARIZATION_LOSSES)
40 | 
41 |         else:
42 |             dense_value_list.append(input_layer(features, [feat]))
43 | 
44 |     return sparse_emb_list, dense_value_list
45 | 
46 | 
47 | def is_embedding(feature_column):
48 |     try:
49 |         from tensorflow.python.feature_column.feature_column_v2 import EmbeddingColumn
50 |     except ImportError:
51 |         EmbeddingColumn = _EmbeddingColumn
52 |     return isinstance(feature_column, (_EmbeddingColumn, EmbeddingColumn))
53 | 


--------------------------------------------------------------------------------
/deepctr/estimator/inputs.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def input_fn_pandas(df, features, label=None, batch_size=256, num_epochs=1, shuffle=False, queue_capacity_factor=10,
 5 |                     num_threads=1):
 6 |     if label is not None:
 7 |         y = df[label]
 8 |     else:
 9 |         y = None
10 |     if tf.__version__ >= "2.0.0":
11 |         return tf.compat.v1.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size,
12 |                                                              num_epochs=num_epochs,
13 |                                                              shuffle=shuffle,
14 |                                                              queue_capacity=batch_size * queue_capacity_factor,
15 |                                                              num_threads=num_threads)
16 | 
17 |     return tf.estimator.inputs.pandas_input_fn(df[features], y, batch_size=batch_size, num_epochs=num_epochs,
18 |                                                shuffle=shuffle, queue_capacity=batch_size * queue_capacity_factor,
19 |                                                num_threads=num_threads)
20 | 
21 | 
22 | def input_fn_tfrecord(filenames, feature_description, label=None, batch_size=256, num_epochs=1, num_parallel_calls=8,
23 |                       shuffle_factor=10, prefetch_factor=1,
24 |                       ):
25 |     def _parse_examples(serial_exmp):
26 |         try:
27 |             features = tf.parse_single_example(serial_exmp, features=feature_description)
28 |         except AttributeError:
29 |             features = tf.io.parse_single_example(serial_exmp, features=feature_description)
30 |         if label is not None:
31 |             labels = features.pop(label)
32 |             return features, labels
33 |         return features
34 | 
35 |     def input_fn():
36 |         dataset = tf.data.TFRecordDataset(filenames)
37 |         dataset = dataset.map(_parse_examples, num_parallel_calls=num_parallel_calls)
38 |         if shuffle_factor > 0:
39 |             dataset = dataset.shuffle(buffer_size=batch_size * shuffle_factor)
40 | 
41 |         dataset = dataset.repeat(num_epochs).batch(batch_size)
42 | 
43 |         if prefetch_factor > 0:
44 |             dataset = dataset.prefetch(buffer_size=batch_size * prefetch_factor)
45 |         try:
46 |             iterator = dataset.make_one_shot_iterator()
47 |         except AttributeError:
48 |             iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
49 | 
50 |         return iterator.get_next()
51 | 
52 |     return input_fn
53 | 


--------------------------------------------------------------------------------
/deepctr/estimator/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .afm import AFMEstimator
 2 | from .autoint import AutoIntEstimator
 3 | from .ccpm import CCPMEstimator
 4 | from .dcn import DCNEstimator
 5 | from .deepfm import DeepFMEstimator
 6 | from .fwfm import FwFMEstimator
 7 | from .fibinet import FiBiNETEstimator
 8 | from .fnn import FNNEstimator
 9 | from .nfm import NFMEstimator
10 | from .pnn import PNNEstimator
11 | from .wdl import WDLEstimator
12 | from .xdeepfm import xDeepFMEstimator
13 | from .deepfefm import DeepFEFMEstimator
14 | 


--------------------------------------------------------------------------------
/deepctr/estimator/models/afm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | Author:
 5 |     Weichen Shen, weichenswc@163.com
 6 | 
 7 | Reference:
 8 |     [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.
 9 |     (https://arxiv.org/abs/1708.04617)
10 | 
11 | """
12 | import tensorflow as tf
13 | 
14 | from ..feature_column import get_linear_logit, input_from_feature_columns
15 | from ..utils import deepctr_model_fn, DNN_SCOPE_NAME, variable_scope
16 | from ...layers.interaction import AFMLayer, FM
17 | from ...layers.utils import concat_func
18 | 
19 | 
20 | def AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=True, attention_factor=8,
21 |                  l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024,
22 |                  task='binary', model_dir=None, config=None, linear_optimizer='Ftrl',
23 |                  dnn_optimizer='Adagrad', training_chief_hooks=None):
24 |     """Instantiates the Attentional Factorization Machine architecture.
25 | 
26 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
27 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
28 |     :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine**
29 |     :param attention_factor: positive integer,units in attention net
30 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
31 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
32 |     :param l2_reg_att: float. L2 regularizer strength applied to attention net
33 |     :param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout.
34 |     :param seed: integer ,to use as random seed.
35 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
36 |     :param model_dir: Directory to save model parameters, graph and etc. This can
37 |         also be used to load checkpoints from the directory into a estimator
38 |         to continue training a previously saved model.
39 |     :param config: tf.RunConfig object to configure the runtime settings.
40 |     :param linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
41 |         the linear part of the model. Defaults to FTRL optimizer.
42 |     :param dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
43 |         the deep part of the model. Defaults to Adagrad optimizer.
44 |     :param training_chief_hooks: Iterable of `tf.train.SessionRunHook` objects to
45 |         run on the chief worker during training.
46 |     :return: A Tensorflow Estimator  instance.
47 | 
48 |     """
49 | 
50 |     def _model_fn(features, labels, mode, config):
51 |         train_flag = (mode == tf.estimator.ModeKeys.TRAIN)
52 | 
53 |         linear_logits = get_linear_logit(features, linear_feature_columns, l2_reg_linear=l2_reg_linear)
54 | 
55 |         with variable_scope(DNN_SCOPE_NAME):
56 |             sparse_embedding_list, _ = input_from_feature_columns(features, dnn_feature_columns,
57 |                                                                                  l2_reg_embedding=l2_reg_embedding)
58 |             if use_attention:
59 | 
60 |                 fm_logit = AFMLayer(attention_factor, l2_reg_att, afm_dropout,
61 |                                     seed)(sparse_embedding_list, training=train_flag)
62 |             else:
63 |                 fm_logit = FM()(concat_func(sparse_embedding_list, axis=1))
64 | 
65 |         logits = linear_logits + fm_logit
66 | 
67 |         return deepctr_model_fn(features, mode, logits, labels, task, linear_optimizer, dnn_optimizer,
68 |                                 training_chief_hooks=training_chief_hooks)
69 | 
70 |     return tf.estimator.Estimator(_model_fn, model_dir=model_dir, config=config)
71 | 


--------------------------------------------------------------------------------
/deepctr/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from .activation import Dice
 4 | from .core import DNN, LocalActivationUnit, PredictionLayer
 5 | from .interaction import (CIN, FM, AFMLayer, BiInteractionPooling, CrossNet, CrossNetMix,
 6 |                           InnerProductLayer, InteractingLayer,
 7 |                           OutterProductLayer, FGCNNLayer, SENETLayer, BilinearInteraction,
 8 |                           FieldWiseBiInteraction, FwFMLayer, FEFMLayer)
 9 | from .normalization import LayerNormalization
10 | from .sequence import (AttentionSequencePoolingLayer, BiasEncoding, BiLSTM,
11 |                        KMaxPooling, SequencePoolingLayer, WeightedSequenceLayer,
12 |                        Transformer, DynamicGRU,PositionEncoding)
13 | 
14 | from .utils import NoMask, Hash, Linear, Add, combined_dnn_input, softmax, reduce_sum
15 | 
16 | custom_objects = {'tf': tf,
17 |                   'InnerProductLayer': InnerProductLayer,
18 |                   'OutterProductLayer': OutterProductLayer,
19 |                   'DNN': DNN,
20 |                   'PredictionLayer': PredictionLayer,
21 |                   'FM': FM,
22 |                   'AFMLayer': AFMLayer,
23 |                   'CrossNet': CrossNet,
24 |                   'CrossNetMix': CrossNetMix,
25 |                   'BiInteractionPooling': BiInteractionPooling,
26 |                   'LocalActivationUnit': LocalActivationUnit,
27 |                   'Dice': Dice,
28 |                   'SequencePoolingLayer': SequencePoolingLayer,
29 |                   'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer,
30 |                   'CIN': CIN,
31 |                   'InteractingLayer': InteractingLayer,
32 |                   'LayerNormalization': LayerNormalization,
33 |                   'BiLSTM': BiLSTM,
34 |                   'Transformer': Transformer,
35 |                   'NoMask': NoMask,
36 |                   'BiasEncoding': BiasEncoding,
37 |                   'KMaxPooling': KMaxPooling,
38 |                   'FGCNNLayer': FGCNNLayer,
39 |                   'Hash': Hash,
40 |                   'Linear': Linear,
41 |                   'DynamicGRU': DynamicGRU,
42 |                   'SENETLayer': SENETLayer,
43 |                   'BilinearInteraction': BilinearInteraction,
44 |                   'WeightedSequenceLayer': WeightedSequenceLayer,
45 |                   'Add': Add,
46 |                   'FieldWiseBiInteraction': FieldWiseBiInteraction,
47 |                   'FwFMLayer': FwFMLayer,
48 |                   'softmax': softmax,
49 |                   'FEFMLayer': FEFMLayer,
50 |                   'reduce_sum': reduce_sum,
51 |                   'PositionEncoding':PositionEncoding
52 |                   }
53 | 


--------------------------------------------------------------------------------
/deepctr/layers/activation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | Author:
 5 |     Weichen Shen,weichenswc@163.com
 6 | 
 7 | """
 8 | 
 9 | import tensorflow as tf
10 | from tensorflow.python.keras.initializers import Zeros
11 | from tensorflow.python.keras.layers import Layer
12 | 
13 | try:
14 |     unicode
15 | except NameError:
16 |     unicode = str
17 | 
18 | 
19 | class Dice(Layer):
20 |     """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data.
21 | 
22 |       Input shape
23 |         - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
24 | 
25 |       Output shape
26 |         - Same shape as the input.
27 | 
28 |       Arguments
29 |         - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis).
30 | 
31 |         - **epsilon** : Small float added to variance to avoid dividing by zero.
32 | 
33 |       References
34 |         - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
35 |     """
36 | 
37 |     def __init__(self, axis=-1, epsilon=1e-9, **kwargs):
38 |         self.axis = axis
39 |         self.epsilon = epsilon
40 |         super(Dice, self).__init__(**kwargs)
41 | 
42 |     def build(self, input_shape):
43 |         self.bn = tf.keras.layers.BatchNormalization(
44 |             axis=self.axis, epsilon=self.epsilon, center=False, scale=False)
45 |         self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros(
46 |         ), dtype=tf.float32, name='dice_alpha')  # name='alpha_'+self.name
47 |         super(Dice, self).build(input_shape)  # Be sure to call this somewhere!
48 |         self.uses_learning_phase = True
49 | 
50 |     def call(self, inputs, training=None, **kwargs):
51 |         inputs_normed = self.bn(inputs, training=training)
52 |         # tf.layers.batch_normalization(
53 |         # inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False)
54 |         x_p = tf.sigmoid(inputs_normed)
55 |         return self.alphas * (1.0 - x_p) * inputs + x_p * inputs
56 | 
57 |     def compute_output_shape(self, input_shape):
58 |         return input_shape
59 | 
60 |     def get_config(self, ):
61 |         config = {'axis': self.axis, 'epsilon': self.epsilon}
62 |         base_config = super(Dice, self).get_config()
63 |         return dict(list(base_config.items()) + list(config.items()))
64 | 
65 | 
66 | def activation_layer(activation):
67 |     if activation in ("dice", "Dice"):
68 |         act_layer = Dice()
69 |     elif isinstance(activation, (str, unicode)):
70 |         act_layer = tf.keras.layers.Activation(activation)
71 |     elif issubclass(activation, Layer):
72 |         act_layer = activation()
73 |     else:
74 |         raise ValueError(
75 |             "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation))
76 |     return act_layer
77 | 


--------------------------------------------------------------------------------
/deepctr/layers/normalization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | Author:
 5 |     Weichen Shen,weichenswc@163.com
 6 | 
 7 | """
 8 | 
 9 | from tensorflow.python.keras import backend as K
10 | from tensorflow.python.keras.initializers import Ones, Zeros
11 | from tensorflow.python.keras.layers import Layer
12 | 
13 | 
14 | class LayerNormalization(Layer):
15 |     def __init__(self, axis=-1, eps=1e-9, center=True,
16 |                  scale=True, **kwargs):
17 |         self.axis = axis
18 |         self.eps = eps
19 |         self.center = center
20 |         self.scale = scale
21 |         super(LayerNormalization, self).__init__(**kwargs)
22 | 
23 |     def build(self, input_shape):
24 |         self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:],
25 |                                      initializer=Ones(), trainable=True)
26 |         self.beta = self.add_weight(name='beta', shape=input_shape[-1:],
27 |                                     initializer=Zeros(), trainable=True)
28 |         super(LayerNormalization, self).build(input_shape)
29 | 
30 |     def call(self, inputs):
31 |         mean = K.mean(inputs, axis=self.axis, keepdims=True)
32 |         variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True)
33 |         std = K.sqrt(variance + self.eps)
34 |         outputs = (inputs - mean) / std
35 |         if self.scale:
36 |             outputs *= self.gamma
37 |         if self.center:
38 |             outputs += self.beta
39 |         return outputs
40 | 
41 |     def compute_output_shape(self, input_shape):
42 |         return input_shape
43 | 
44 |     def get_config(self, ):
45 |         config = {'axis': self.axis, 'eps': self.eps, 'center': self.center, 'scale': self.scale}
46 |         base_config = super(LayerNormalization, self).get_config()
47 |         return dict(list(base_config.items()) + list(config.items()))
48 | 


--------------------------------------------------------------------------------
/deepctr/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .afm import AFM
 2 | from .autoint import AutoInt
 3 | from .ccpm import CCPM
 4 | from .dcn import DCN
 5 | from .dcnmix import DCNMix
 6 | from .deepfefm import DeepFEFM
 7 | from .deepfm import DeepFM
 8 | from .difm import DIFM
 9 | from .fgcnn import FGCNN
10 | from .fibinet import FiBiNET
11 | from .flen import FLEN
12 | from .fnn import FNN
13 | from .fwfm import FwFM
14 | from .ifm import IFM
15 | from .mlr import MLR
16 | from .multitask import SharedBottom, ESMM, MMOE, PLE
17 | from .nfm import NFM
18 | from .onn import ONN
19 | from .pnn import PNN
20 | from .sequence import DIN, DIEN, DSIN, BST
21 | from .wdl import WDL
22 | from .xdeepfm import xDeepFM
23 | from .widefm import wideFM
24 | 
25 | __all__ = ["AFM", "CCPM", "DCN", "IFM", "DIFM", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
26 |            "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST", "DeepFEFM",
27 |            "SharedBottom", "ESMM", "MMOE", "PLE", "wideFM"]
28 | 


--------------------------------------------------------------------------------
/deepctr/models/afm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | Author:
 5 |     Weichen Shen, weichenswc@163.com
 6 | 
 7 | Reference:
 8 |     [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.
 9 |     (https://arxiv.org/abs/1708.04617)
10 | 
11 | """
12 | import tensorflow as tf
13 | 
14 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns
15 | from ..layers.core import PredictionLayer
16 | from ..layers.interaction import AFMLayer, FM
17 | from ..layers.utils import concat_func, add_func
18 | 
19 | 
20 | def AFM(linear_feature_columns, dnn_feature_columns, fm_group=DEFAULT_GROUP_NAME, use_attention=True,
21 |         attention_factor=8,
22 |         l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, afm_dropout=0, seed=1024,
23 |         task='binary'):
24 |     """Instantiates the Attentional Factorization Machine architecture.
25 | 
26 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
27 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
28 |     :param fm_group: list, group_name of features that will be used to do feature interactions.
29 |     :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine**
30 |     :param attention_factor: positive integer,units in attention net
31 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
32 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
33 |     :param l2_reg_att: float. L2 regularizer strength applied to attention net
34 |     :param afm_dropout: float in [0,1), Fraction of the attention net output units to dropout.
35 |     :param seed: integer ,to use as random seed.
36 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
37 |     :return: A Keras model instance.
38 |     """
39 | 
40 |     features = build_input_features(
41 |         linear_feature_columns + dnn_feature_columns)
42 | 
43 |     inputs_list = list(features.values())
44 | 
45 |     group_embedding_dict, _ = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
46 |                                                          seed, support_dense=False, support_group=True)
47 | 
48 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
49 |                                     l2_reg=l2_reg_linear)
50 | 
51 |     if use_attention:
52 |         fm_logit = add_func([AFMLayer(attention_factor, l2_reg_att, afm_dropout,
53 |                                       seed)(list(v)) for k, v in group_embedding_dict.items() if k in fm_group])
54 |     else:
55 |         fm_logit = add_func([FM()(concat_func(v, axis=1))
56 |                              for k, v in group_embedding_dict.items() if k in fm_group])
57 | 
58 |     final_logit = add_func([linear_logit, fm_logit])
59 |     output = PredictionLayer(task)(final_logit)
60 | 
61 |     model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
62 |     return model
63 | 


--------------------------------------------------------------------------------
/deepctr/models/deepfm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen, weichenswc@163.com
 5 | 
 6 | Reference:
 7 |     [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
 8 | 
 9 | """
10 | 
11 | from itertools import chain
12 | 
13 | import tensorflow as tf
14 | 
15 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns
16 | from ..layers.core import PredictionLayer, DNN
17 | from ..layers.interaction import FM
18 | from ..layers.utils import concat_func, add_func, combined_dnn_input
19 | 
20 | 
21 | def DeepFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64),
22 |            l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
23 |            dnn_activation='relu', dnn_use_bn=False, task='binary', keras_model=tf.keras.models.Model):
24 |     """Instantiates the DeepFM Network architecture.
25 | 
26 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
27 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
28 |     :param fm_group: list, group_name of features that will be used to do feature interactions.
29 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
30 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
31 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
32 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
33 |     :param seed: integer ,to use as random seed.
34 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
35 |     :param dnn_activation: Activation function to use in DNN
36 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
37 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
38 |     :return: A Keras model instance.
39 |     """
40 | 
41 |     features = build_input_features(
42 |         linear_feature_columns + dnn_feature_columns)
43 | 
44 |     inputs_list = list(features.values())
45 | 
46 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
47 |                                     l2_reg=l2_reg_linear)
48 | 
49 |     group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
50 |                                                                         seed, support_group=True)
51 | 
52 |     fm_logit = add_func([FM()(concat_func(v, axis=1))
53 |                          for k, v in group_embedding_dict.items() if k in fm_group])
54 | 
55 |     dnn_input = combined_dnn_input(list(chain.from_iterable(
56 |         group_embedding_dict.values())), dense_value_list)
57 |     dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
58 |     dnn_logit = tf.keras.layers.Dense(
59 |         1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed=seed))(dnn_output)
60 | 
61 |     final_logit = add_func([linear_logit, fm_logit, dnn_logit])
62 | 
63 |     output = PredictionLayer(task)(final_logit)
64 |     model = keras_model(inputs=inputs_list, outputs=output)
65 |     return model
66 | 


--------------------------------------------------------------------------------
/deepctr/models/fibinet.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen, weichenswc@163.com
 5 | 
 6 | Reference:
 7 |     [1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019.
 8 | """
 9 | 
10 | import tensorflow as tf
11 | 
12 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
13 | from ..layers.core import PredictionLayer, DNN
14 | from ..layers.interaction import SENETLayer, BilinearInteraction
15 | from ..layers.utils import concat_func, add_func, combined_dnn_input
16 | 
17 | 
18 | def FiBiNET(linear_feature_columns, dnn_feature_columns, bilinear_type='interaction', reduction_ratio=3,
19 |             dnn_hidden_units=(256, 128, 64), l2_reg_linear=1e-5,
20 |             l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
21 |             task='binary'):
22 |     """Instantiates the Feature Importance and Bilinear feature Interaction NETwork architecture.
23 | 
24 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
25 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
26 |     :param bilinear_type: str,bilinear function type used in Bilinear Interaction Layer,can be ``'all'`` , ``'each'`` or ``'interaction'``
27 |     :param reduction_ratio: integer in [1,inf), reduction ratio used in SENET Layer
28 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
29 |     :param l2_reg_linear: float. L2 regularizer strength applied to wide part
30 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
31 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
32 |     :param seed: integer ,to use as random seed.
33 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
34 |     :param dnn_activation: Activation function to use in DNN
35 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
36 |     :return: A Keras model instance.
37 |     """
38 | 
39 |     features = build_input_features(linear_feature_columns + dnn_feature_columns)
40 | 
41 |     inputs_list = list(features.values())
42 | 
43 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
44 |                                     l2_reg=l2_reg_linear)
45 | 
46 |     sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
47 |                                                                          l2_reg_embedding, seed)
48 | 
49 |     senet_embedding_list = SENETLayer(
50 |         reduction_ratio, seed)(sparse_embedding_list)
51 | 
52 |     senet_bilinear_out = BilinearInteraction(
53 |         bilinear_type=bilinear_type, seed=seed)(senet_embedding_list)
54 |     bilinear_out = BilinearInteraction(
55 |         bilinear_type=bilinear_type, seed=seed)(sparse_embedding_list)
56 | 
57 |     dnn_input = combined_dnn_input(
58 |         [tf.keras.layers.Flatten()(concat_func([senet_bilinear_out, bilinear_out]))], dense_value_list)
59 |     dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
60 |     dnn_logit = tf.keras.layers.Dense(
61 |         1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out)
62 | 
63 |     final_logit = add_func([linear_logit, dnn_logit])
64 |     output = PredictionLayer(task)(final_logit)
65 | 
66 |     model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
67 |     return model
68 | 


--------------------------------------------------------------------------------
/deepctr/models/flen.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Tingyi Tan, 5636374@qq.com
 5 | 
 6 | Reference:
 7 |     [1] Chen W, Zhan L, Ci Y, Lin C. FLEN: Leveraging Field for Scalable CTR Prediction . arXiv preprint arXiv:1911.04690, 2019.(https://arxiv.org/pdf/1911.04690)
 8 | 
 9 | """
10 | 
11 | from itertools import chain
12 | 
13 | import tensorflow as tf
14 | 
15 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
16 | from ..layers.core import PredictionLayer, DNN
17 | from ..layers.interaction import FieldWiseBiInteraction
18 | from ..layers.utils import concat_func, add_func, combined_dnn_input
19 | 
20 | 
21 | def FLEN(linear_feature_columns,
22 |          dnn_feature_columns,
23 |          dnn_hidden_units=(256, 128, 64),
24 |          l2_reg_linear=0.00001,
25 |          l2_reg_embedding=0.00001,
26 |          l2_reg_dnn=0,
27 |          seed=1024,
28 |          dnn_dropout=0.0,
29 |          dnn_activation='relu',
30 |          dnn_use_bn=False,
31 |          task='binary'):
32 |     """Instantiates the FLEN Network architecture.
33 | 
34 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
35 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
36 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
37 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
38 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
39 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
40 |     :param seed: integer ,to use as random seed.
41 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
42 |     :param dnn_activation: Activation function to use in DNN
43 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
44 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
45 |     :return: A Keras model instance.
46 |     """
47 | 
48 |     features = build_input_features(linear_feature_columns +
49 |                                     dnn_feature_columns)
50 | 
51 |     inputs_list = list(features.values())
52 | 
53 |     group_embedding_dict, dense_value_list = input_from_feature_columns(
54 |         features,
55 |         dnn_feature_columns,
56 |         l2_reg_embedding,
57 |         seed,
58 |         support_group=True)
59 | 
60 |     linear_logit = get_linear_logit(features,
61 |                                     linear_feature_columns,
62 |                                     seed=seed,
63 |                                     prefix='linear',
64 |                                     l2_reg=l2_reg_linear)
65 | 
66 |     fm_mf_out = FieldWiseBiInteraction(seed=seed)(
67 |         [concat_func(v, axis=1) for k, v in group_embedding_dict.items()])
68 | 
69 |     dnn_input = combined_dnn_input(
70 |         list(chain.from_iterable(group_embedding_dict.values())),
71 |         dense_value_list)
72 |     dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
73 | 
74 |     dnn_logit = tf.keras.layers.Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(
75 |         concat_func([fm_mf_out, dnn_output]))
76 | 
77 |     final_logit = add_func([linear_logit, dnn_logit])
78 |     output = PredictionLayer(task)(final_logit)
79 | 
80 |     model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
81 |     return model
82 | 


--------------------------------------------------------------------------------
/deepctr/models/fnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen, weichenswc@163.com
 5 | 
 6 | Reference:
 7 |     [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf)
 8 | """
 9 | import tensorflow as tf
10 | 
11 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
12 | from ..layers.core import PredictionLayer, DNN
13 | from ..layers.utils import add_func, combined_dnn_input
14 | 
15 | 
16 | def FNN(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
17 |         l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
18 |         dnn_activation='relu', task='binary'):
19 |     """Instantiates the Factorization-supported Neural Network architecture.
20 | 
21 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
22 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
23 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
24 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
25 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear weight
26 |     :param l2_reg_dnn: float . L2 regularizer strength applied to DNN
27 |     :param seed: integer ,to use as random seed.
28 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
29 |     :param dnn_activation: Activation function to use in DNN
30 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
31 |     :return: A Keras model instance.
32 |     """
33 |     features = build_input_features(
34 |         linear_feature_columns + dnn_feature_columns)
35 | 
36 |     inputs_list = list(features.values())
37 | 
38 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
39 |                                     l2_reg=l2_reg_linear)
40 | 
41 |     sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
42 |                                                                          l2_reg_embedding, seed)
43 | 
44 |     dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
45 |     deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
46 |     dnn_logit = tf.keras.layers.Dense(
47 |         1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(deep_out)
48 |     final_logit = add_func([dnn_logit, linear_logit])
49 | 
50 |     output = PredictionLayer(task)(final_logit)
51 | 
52 |     model = tf.keras.models.Model(inputs=inputs_list,
53 |                                   outputs=output)
54 |     return model
55 | 


--------------------------------------------------------------------------------
/deepctr/models/fwfm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Harshit Pande
 5 | 
 6 | Reference:
 7 |     [1] Field-weighted Factorization Machines for Click-Through Rate Prediction in Display Advertising
 8 |     (https://arxiv.org/pdf/1806.03514.pdf)
 9 | 
10 | """
11 | 
12 | from itertools import chain
13 | 
14 | import tensorflow as tf
15 | 
16 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns
17 | from ..layers.core import PredictionLayer, DNN
18 | from ..layers.interaction import FwFMLayer
19 | from ..layers.utils import concat_func, add_func, combined_dnn_input
20 | 
21 | 
22 | def FwFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64),
23 |          l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_field_strength=0.00001, l2_reg_dnn=0,
24 |          seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'):
25 |     """Instantiates the FwFM Network architecture.
26 | 
27 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
28 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
29 |     :param fm_group: list, group_name of features that will be used to do feature interactions.
30 |     :param dnn_hidden_units: list,list of positive integer or empty list if do not want DNN, the layer number and units
31 |     in each layer of DNN
32 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
33 |     :param l2_reg_field_strength: float. L2 regularizer strength applied to the field pair strength parameters
34 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
35 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
36 |     :param seed: integer ,to use as random seed.
37 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
38 |     :param dnn_activation: Activation function to use in DNN
39 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
40 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
41 |     :return: A Keras model instance.
42 |     """
43 | 
44 |     features = build_input_features(linear_feature_columns + dnn_feature_columns)
45 | 
46 |     inputs_list = list(features.values())
47 | 
48 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
49 |                                     l2_reg=l2_reg_linear)
50 | 
51 |     group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
52 |                                                                         l2_reg_embedding, seed,
53 |                                                                         support_group=True)
54 | 
55 |     fwfm_logit = add_func([FwFMLayer(num_fields=len(v), regularizer=l2_reg_field_strength)
56 |                            (concat_func(v, axis=1)) for k, v in group_embedding_dict.items() if k in fm_group])
57 | 
58 |     final_logit_components = [linear_logit, fwfm_logit]
59 | 
60 |     if dnn_hidden_units:
61 |         dnn_input = combined_dnn_input(list(chain.from_iterable(
62 |             group_embedding_dict.values())), dense_value_list)
63 |         dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input)
64 |         dnn_logit = tf.keras.layers.Dense(
65 |             1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)
66 |         final_logit_components.append(dnn_logit)
67 | 
68 |     final_logit = add_func(final_logit_components)
69 | 
70 |     output = PredictionLayer(task)(final_logit)
71 |     model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
72 |     return model
73 | 


--------------------------------------------------------------------------------
/deepctr/models/mlr.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen, weichenswc@163.com
 5 | 
 6 | Reference:
 7 |     [1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194)
 8 | """
 9 | from tensorflow.python.keras.layers import Activation, dot
10 | from tensorflow.python.keras.models import Model
11 | 
12 | from ..feature_column import build_input_features, get_linear_logit
13 | from ..layers.core import PredictionLayer
14 | from ..layers.utils import concat_func
15 | 
16 | 
17 | def MLR(region_feature_columns, base_feature_columns=None, region_num=4,
18 |         l2_reg_linear=1e-5, seed=1024, task='binary',
19 |         bias_feature_columns=None):
20 |     """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model.
21 | 
22 |     :param region_feature_columns: An iterable containing all the features used by region part of the model.
23 |     :param base_feature_columns: An iterable containing all the features used by base part of the model.
24 |     :param region_num: integer > 1,indicate the piece number
25 |     :param l2_reg_linear: float. L2 regularizer strength applied to weight
26 |     :param seed: integer ,to use as random seed.
27 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
28 |     :param bias_feature_columns: An iterable containing all the features used by bias part of the model.
29 |     :return: A Keras model instance.
30 |     """
31 | 
32 |     if region_num <= 1:
33 |         raise ValueError("region_num must > 1")
34 | 
35 |     if base_feature_columns is None or len(base_feature_columns) == 0:
36 |         base_feature_columns = region_feature_columns
37 | 
38 |     if bias_feature_columns is None:
39 |         bias_feature_columns = []
40 | 
41 |     features = build_input_features(region_feature_columns + base_feature_columns + bias_feature_columns)
42 | 
43 |     inputs_list = list(features.values())
44 | 
45 |     region_score = get_region_score(features, region_feature_columns, region_num, l2_reg_linear, seed)
46 |     learner_score = get_learner_score(features, base_feature_columns, region_num, l2_reg_linear, seed, task=task)
47 | 
48 |     final_logit = dot([region_score, learner_score], axes=-1)
49 | 
50 |     if bias_feature_columns is not None and len(bias_feature_columns) > 0:
51 |         bias_score = get_learner_score(features, bias_feature_columns, 1, l2_reg_linear, seed, prefix='bias_',
52 |                                        task='binary')
53 | 
54 |         final_logit = dot([final_logit, bias_score], axes=-1)
55 | 
56 |     model = Model(inputs=inputs_list, outputs=final_logit)
57 |     return model
58 | 
59 | 
60 | def get_region_score(features, feature_columns, region_number, l2_reg, seed, prefix='region_', seq_mask_zero=True):
61 |     region_logit = concat_func([get_linear_logit(features, feature_columns, seed=seed + i,
62 |                                                  prefix=prefix + str(i + 1), l2_reg=l2_reg) for i in
63 |                                 range(region_number)])
64 |     return Activation('softmax')(region_logit)
65 | 
66 | 
67 | def get_learner_score(features, feature_columns, region_number, l2_reg, seed, prefix='learner_', seq_mask_zero=True,
68 |                       task='binary'):
69 |     region_score = [PredictionLayer(task=task, use_bias=False)(
70 |         get_linear_logit(features, feature_columns, seed=seed + i, prefix=prefix + str(i + 1),
71 |                          l2_reg=l2_reg)) for i in
72 |         range(region_number)]
73 | 
74 |     return concat_func(region_score)
75 | 


--------------------------------------------------------------------------------
/deepctr/models/multitask/__init__.py:
--------------------------------------------------------------------------------
1 | from .esmm import ESMM
2 | from .mmoe import MMOE
3 | from .ple import PLE
4 | from .sharedbottom import SharedBottom


--------------------------------------------------------------------------------
/deepctr/models/multitask/esmm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author:
 3 |     Mincai Lai, laimc@shanghaitech.edu.cn
 4 | 
 5 |     Weichen Shen, weichenswc@163.com
 6 | 
 7 | Reference:
 8 |     [1] Ma X, Zhao L, Huang G, et al. Entire space multi-task model: An effective approach for estimating post-click conversion rate[C]//The 41st International ACM SIGIR Conference on Research & Development in Information Retrieval. 2018.(https://arxiv.org/abs/1804.07931)
 9 | """
10 | 
11 | import tensorflow as tf
12 | 
13 | from ...feature_column import build_input_features, input_from_feature_columns
14 | from ...layers.core import PredictionLayer, DNN
15 | from ...layers.utils import combined_dnn_input
16 | 
17 | 
18 | def ESMM(dnn_feature_columns, tower_dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0,
19 |          seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task_types=('binary', 'binary'),
20 |          task_names=('ctr', 'ctcvr')):
21 |     """Instantiates the Entire Space Multi-Task Model architecture.
22 | 
23 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
24 |     :param tower_dnn_hidden_units:  list,list of positive integer or empty list, the layer number and units in each layer of task DNN.
25 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector.
26 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN.
27 |     :param seed: integer ,to use as random seed.
28 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
29 |     :param dnn_activation: Activation function to use in DNN
30 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
31 |     :param task_types:  str, indicating the loss of each tasks, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss.
32 |     :param task_names: list of str, indicating the predict target of each tasks. default value is ['ctr', 'ctcvr']
33 | 
34 |     :return: A Keras model instance.
35 |     """
36 |     if len(task_names) != 2:
37 |         raise ValueError("the length of task_names must be equal to 2")
38 | 
39 |     for task_type in task_types:
40 |         if task_type != 'binary':
41 |             raise ValueError("task must be binary in ESMM, {} is illegal".format(task_type))
42 | 
43 |     features = build_input_features(dnn_feature_columns)
44 |     inputs_list = list(features.values())
45 | 
46 |     sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
47 |                                                                          l2_reg_embedding, seed)
48 | 
49 |     dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
50 | 
51 |     ctr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
52 |         dnn_input)
53 |     cvr_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
54 |         dnn_input)
55 | 
56 |     ctr_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(ctr_output)
57 |     cvr_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(cvr_output)
58 | 
59 |     ctr_pred = PredictionLayer('binary', name=task_names[0])(ctr_logit)
60 |     cvr_pred = PredictionLayer('binary')(cvr_logit)
61 | 
62 |     ctcvr_pred = tf.keras.layers.Multiply(name=task_names[1])([ctr_pred, cvr_pred])  # CTCVR = CTR * CVR
63 | 
64 |     model = tf.keras.models.Model(inputs=inputs_list, outputs=[ctr_pred, ctcvr_pred])
65 |     return model
66 | 


--------------------------------------------------------------------------------
/deepctr/models/multitask/sharedbottom.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author:
 3 |     Mincai Lai, laimc@shanghaitech.edu.cn
 4 | 
 5 |     Weichen Shen, weichenswc@163.com
 6 | 
 7 | Reference:
 8 |     [1] Ruder S. An overview of multi-task learning in deep neural networks[J]. arXiv preprint arXiv:1706.05098, 2017.(https://arxiv.org/pdf/1706.05098.pdf)
 9 | """
10 | 
11 | import tensorflow as tf
12 | 
13 | from ...feature_column import build_input_features, input_from_feature_columns
14 | from ...layers.core import PredictionLayer, DNN
15 | from ...layers.utils import combined_dnn_input
16 | 
17 | 
18 | def SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(256, 128), tower_dnn_hidden_units=(64,),
19 |                  l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
20 |                  dnn_use_bn=False, task_types=('binary', 'binary'), task_names=('ctr', 'ctcvr')):
21 |     """Instantiates the SharedBottom multi-task learning Network architecture.
22 | 
23 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
24 |     :param bottom_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of shared bottom DNN.
25 |     :param tower_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of task-specific DNN.
26 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
27 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
28 |     :param seed: integer ,to use as random seed.
29 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
30 |     :param dnn_activation: Activation function to use in DNN
31 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
32 |     :param task_types: list of str, indicating the loss of each tasks, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss. e.g. ['binary', 'regression']
33 |     :param task_names: list of str, indicating the predict target of each tasks
34 | 
35 |     :return: A Keras model instance.
36 |     """
37 |     num_tasks = len(task_names)
38 |     if num_tasks <= 1:
39 |         raise ValueError("num_tasks must be greater than 1")
40 |     if len(task_types) != num_tasks:
41 |         raise ValueError("num_tasks must be equal to the length of task_types")
42 | 
43 |     for task_type in task_types:
44 |         if task_type not in ['binary', 'regression']:
45 |             raise ValueError("task must be binary or regression, {} is illegal".format(task_type))
46 | 
47 |     features = build_input_features(dnn_feature_columns)
48 |     inputs_list = list(features.values())
49 | 
50 |     sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
51 |                                                                          l2_reg_embedding, seed)
52 | 
53 |     dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
54 |     shared_bottom_output = DNN(bottom_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(
55 |         dnn_input)
56 | 
57 |     tasks_output = []
58 |     for task_type, task_name in zip(task_types, task_names):
59 |         tower_output = DNN(tower_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed,
60 |                            name='tower_' + task_name)(shared_bottom_output)
61 | 
62 |         logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(tower_output)
63 |         output = PredictionLayer(task_type, name=task_name)(logit)
64 |         tasks_output.append(output)
65 | 
66 |     model = tf.keras.models.Model(inputs=inputs_list, outputs=tasks_output)
67 |     return model
68 | 


--------------------------------------------------------------------------------
/deepctr/models/nfm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen, weichenswc@163.com
 5 | 
 6 | Reference:
 7 |     [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027)
 8 | """
 9 | import tensorflow as tf
10 | 
11 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
12 | from ..layers.core import PredictionLayer, DNN
13 | from ..layers.interaction import BiInteractionPooling
14 | from ..layers.utils import concat_func, add_func, combined_dnn_input
15 | 
16 | 
17 | def NFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64),
18 |         l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_dnn=0, seed=1024, bi_dropout=0,
19 |         dnn_dropout=0, dnn_activation='relu', task='binary'):
20 |     """Instantiates the Neural Factorization Machine architecture.
21 | 
22 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
23 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
24 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
25 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
26 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part.
27 |     :param l2_reg_dnn: float . L2 regularizer strength applied to DNN
28 |     :param seed: integer ,to use as random seed.
29 |     :param biout_dropout: When not ``None``, the probability we will drop out the output of BiInteractionPooling Layer.
30 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
31 |     :param dnn_activation: Activation function to use in deep net
32 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
33 |     :return: A Keras model instance.
34 |     """
35 | 
36 |     features = build_input_features(
37 |         linear_feature_columns + dnn_feature_columns)
38 | 
39 |     inputs_list = list(features.values())
40 | 
41 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
42 |                                     l2_reg=l2_reg_linear)
43 | 
44 |     sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
45 |                                                                          l2_reg_embedding, seed)
46 | 
47 |     fm_input = concat_func(sparse_embedding_list, axis=1)
48 |     bi_out = BiInteractionPooling()(fm_input)
49 |     if bi_dropout:
50 |         bi_out = tf.keras.layers.Dropout(bi_dropout)(bi_out, training=None)
51 |     dnn_input = combined_dnn_input([bi_out], dense_value_list)
52 |     dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
53 |     dnn_logit = tf.keras.layers.Dense(
54 |         1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_output)
55 | 
56 |     final_logit = add_func([linear_logit, dnn_logit])
57 | 
58 |     output = PredictionLayer(task)(final_logit)
59 | 
60 |     model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
61 |     return model
62 | 


--------------------------------------------------------------------------------
/deepctr/models/pnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen, weichenswc@163.com
 5 | 
 6 | Reference:
 7 |     [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf)
 8 | """
 9 | 
10 | import tensorflow as tf
11 | 
12 | from ..feature_column import build_input_features, input_from_feature_columns
13 | from ..layers.core import PredictionLayer, DNN
14 | from ..layers.interaction import InnerProductLayer, OutterProductLayer
15 | from ..layers.utils import concat_func, combined_dnn_input
16 | 
17 | 
18 | def PNN(dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_embedding=0.00001, l2_reg_dnn=0,
19 |         seed=1024, dnn_dropout=0, dnn_activation='relu', use_inner=True, use_outter=False, kernel_type='mat',
20 |         task='binary'):
21 |     """Instantiates the Product-based Neural Network architecture.
22 | 
23 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
24 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net
25 |     :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector
26 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
27 |     :param seed: integer ,to use as random seed.
28 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
29 |     :param dnn_activation: Activation function to use in DNN
30 |     :param use_inner: bool,whether use inner-product or not.
31 |     :param use_outter: bool,whether use outter-product or not.
32 |     :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'``
33 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
34 |     :return: A Keras model instance.
35 |     """
36 | 
37 |     if kernel_type not in ['mat', 'vec', 'num']:
38 |         raise ValueError("kernel_type must be mat,vec or num")
39 | 
40 |     features = build_input_features(dnn_feature_columns)
41 | 
42 |     inputs_list = list(features.values())
43 | 
44 |     sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
45 |                                                                          l2_reg_embedding, seed)
46 |     inner_product = tf.keras.layers.Flatten()(
47 |         InnerProductLayer()(sparse_embedding_list))
48 |     outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list)
49 | 
50 |     # ipnn deep input
51 |     linear_signal = tf.keras.layers.Reshape(
52 |         [sum(map(lambda x: int(x.shape[-1]), sparse_embedding_list))])(concat_func(sparse_embedding_list))
53 | 
54 |     if use_inner and use_outter:
55 |         deep_input = tf.keras.layers.Concatenate()(
56 |             [linear_signal, inner_product, outter_product])
57 |     elif use_inner:
58 |         deep_input = tf.keras.layers.Concatenate()(
59 |             [linear_signal, inner_product])
60 |     elif use_outter:
61 |         deep_input = tf.keras.layers.Concatenate()(
62 |             [linear_signal, outter_product])
63 |     else:
64 |         deep_input = linear_signal
65 | 
66 |     dnn_input = combined_dnn_input([deep_input], dense_value_list)
67 |     dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
68 |     dnn_logit = tf.keras.layers.Dense(
69 |         1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out)
70 | 
71 |     output = PredictionLayer(task)(dnn_logit)
72 | 
73 |     model = tf.keras.models.Model(inputs=inputs_list,
74 |                                   outputs=output)
75 |     return model
76 | 


--------------------------------------------------------------------------------
/deepctr/models/sequence/__init__.py:
--------------------------------------------------------------------------------
1 | from .bst import BST
2 | from .dien import DIEN
3 | from .din import DIN
4 | from .dsin import DSIN
5 | 


--------------------------------------------------------------------------------
/deepctr/models/wdl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen, weichenswc@163.com
 5 | 
 6 | Reference:
 7 |     [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf)
 8 | """
 9 | 
10 | import tensorflow as tf
11 | 
12 | from ..feature_column import build_input_features, get_linear_logit, input_from_feature_columns
13 | from ..layers.core import PredictionLayer, DNN
14 | from ..layers.utils import add_func, combined_dnn_input
15 | 
16 | 
17 | def WDL(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001,
18 |         l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu',
19 |         task='binary', keras_model=tf.keras.models.Model):
20 |     """Instantiates the Wide&Deep Learning architecture.
21 | 
22 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
23 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
24 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
25 |     :param l2_reg_linear: float. L2 regularizer strength applied to wide part
26 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
27 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
28 |     :param seed: integer ,to use as random seed.
29 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
30 |     :param dnn_activation: Activation function to use in DNN
31 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
32 |     :return: A Keras model instance.
33 |     """
34 | 
35 |     features = build_input_features(
36 |         linear_feature_columns + dnn_feature_columns)
37 | 
38 |     inputs_list = list(features.values())
39 | 
40 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
41 |                                     l2_reg=l2_reg_linear)
42 | 
43 |     sparse_embedding_list, dense_value_list = input_from_feature_columns(features, dnn_feature_columns,
44 |                                                                          l2_reg_embedding, seed)
45 | 
46 |     dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
47 |     dnn_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed=seed)(dnn_input)
48 |     dnn_logit = tf.keras.layers.Dense(
49 |         1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(dnn_out)
50 | 
51 |     final_logit = add_func([dnn_logit, linear_logit])
52 | 
53 |     output = PredictionLayer(task)(final_logit)
54 | 
55 |     model = keras_model(inputs=inputs_list, outputs=output)
56 |     return model
57 | 


--------------------------------------------------------------------------------
/deepctr/models/widefm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | from itertools import chain
 3 | 
 4 | import tensorflow as tf
 5 | 
 6 | from ..feature_column import build_input_features, get_linear_logit, DEFAULT_GROUP_NAME, input_from_feature_columns
 7 | from ..layers.core import PredictionLayer, DNN
 8 | from ..layers.interaction import FM
 9 | from ..layers.utils import concat_func, add_func, combined_dnn_input
10 | 
11 | 
12 | def wideFM(linear_feature_columns, dnn_feature_columns, fm_group=(DEFAULT_GROUP_NAME,), dnn_hidden_units=(256, 128, 64),
13 |            l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0,
14 |            dnn_activation='relu', dnn_use_bn=False, task='binary', keras_model=tf.keras.models.Model):
15 |     """Instantiates the DeepFM Network architecture.
16 | 
17 |     :param linear_feature_columns: An iterable containing all the features used by linear part of the model.
18 |     :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
19 |     :param fm_group: list, group_name of features that will be used to do feature interactions.
20 |     :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
21 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
22 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
23 |     :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
24 |     :param seed: integer ,to use as random seed.
25 |     :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
26 |     :param dnn_activation: Activation function to use in DNN
27 |     :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN
28 |     :param task: str, ``"binary"`` for  binary logloss or  ``"regression"`` for regression loss
29 |     :return: A Keras model instance.
30 |     """
31 | 
32 |     features = build_input_features(
33 |         linear_feature_columns + dnn_feature_columns)
34 | 
35 |     inputs_list = list(features.values())
36 | 
37 |     linear_logit = get_linear_logit(features, linear_feature_columns, seed=seed, prefix='linear',
38 |                                     l2_reg=l2_reg_linear)
39 | 
40 |     group_embedding_dict, dense_value_list = input_from_feature_columns(features, dnn_feature_columns, l2_reg_embedding,
41 |                                                                         seed, support_group=True)
42 | 
43 |     fm_logit = add_func([FM()(concat_func(v, axis=1))
44 |                          for k, v in group_embedding_dict.items() if k in fm_group])
45 |     final_logit = add_func([linear_logit, fm_logit])
46 | 
47 |     output = PredictionLayer(task)(final_logit)
48 |     model = keras_model(inputs=inputs_list, outputs=output)
49 |     return model
50 | 


--------------------------------------------------------------------------------
/deepctr/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | Author:
 5 |     Weichen Shen,weichenswc@163.com
 6 | 
 7 | """
 8 | 
 9 | import json
10 | import logging
11 | from threading import Thread
12 | 
13 | import requests
14 | 
15 | try:
16 |     from packaging.version import parse
17 | except ImportError:
18 |     from pip._vendor.packaging.version import parse
19 | 
20 | 
21 | def check_version(version):
22 |     """Return version of package on pypi.python.org using json."""
23 | 
24 |     def check(version):
25 |         try:
26 |             url_pattern = 'https://pypi.python.org/pypi/deepctr/json'
27 |             req = requests.get(url_pattern)
28 |             latest_version = parse('0')
29 |             version = parse(version)
30 |             if req.status_code == requests.codes.ok:
31 |                 j = json.loads(req.text.encode('utf-8'))
32 |                 releases = j.get('releases', [])
33 |                 for release in releases:
34 |                     ver = parse(release)
35 |                     if ver.is_prerelease or ver.is_postrelease:
36 |                         continue
37 |                     latest_version = max(latest_version, ver)
38 |                 if latest_version > version:
39 |                     logging.warning(
40 |                         '\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format(
41 |                             latest_version, version))
42 |         except:
43 |             print("Please check the latest version manually on https://pypi.org/project/deepctr/#history")
44 |             return
45 | 
46 |     Thread(target=check, args=(version,)).start()
47 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = DeepCTR
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=DeepCTR
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/pics/AFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/AFM.png


--------------------------------------------------------------------------------
/docs/pics/AutoInt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/AutoInt.png


--------------------------------------------------------------------------------
/docs/pics/BST.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/BST.png


--------------------------------------------------------------------------------
/docs/pics/CCPM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/CCPM.png


--------------------------------------------------------------------------------
/docs/pics/CIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/CIN.png


--------------------------------------------------------------------------------
/docs/pics/DCN-M.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DCN-M.png


--------------------------------------------------------------------------------
/docs/pics/DCN-Mix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DCN-Mix.png


--------------------------------------------------------------------------------
/docs/pics/DCN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DCN.png


--------------------------------------------------------------------------------
/docs/pics/DIEN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DIEN.png


--------------------------------------------------------------------------------
/docs/pics/DIFM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DIFM.jpg


--------------------------------------------------------------------------------
/docs/pics/DIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DIN.png


--------------------------------------------------------------------------------
/docs/pics/DSIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DSIN.png


--------------------------------------------------------------------------------
/docs/pics/DeepFEFM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DeepFEFM.jpg


--------------------------------------------------------------------------------
/docs/pics/DeepFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/DeepFM.png


--------------------------------------------------------------------------------
/docs/pics/FGCNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FGCNN.png


--------------------------------------------------------------------------------
/docs/pics/FLEN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FLEN.jpg


--------------------------------------------------------------------------------
/docs/pics/FNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FNN.png


--------------------------------------------------------------------------------
/docs/pics/FiBiNET.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/FiBiNET.png


--------------------------------------------------------------------------------
/docs/pics/IFM.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/IFM.jpg


--------------------------------------------------------------------------------
/docs/pics/InteractingLayer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/InteractingLayer.png


--------------------------------------------------------------------------------
/docs/pics/MLR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/MLR.png


--------------------------------------------------------------------------------
/docs/pics/NFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/NFM.png


--------------------------------------------------------------------------------
/docs/pics/ONN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/ONN.png


--------------------------------------------------------------------------------
/docs/pics/PNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/PNN.png


--------------------------------------------------------------------------------
/docs/pics/WDL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/WDL.png


--------------------------------------------------------------------------------
/docs/pics/code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/code.png


--------------------------------------------------------------------------------
/docs/pics/criteo_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/criteo_sample.png


--------------------------------------------------------------------------------
/docs/pics/deepctrbot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/deepctrbot.png


--------------------------------------------------------------------------------
/docs/pics/fms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/fms.png


--------------------------------------------------------------------------------
/docs/pics/mlr1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/mlr1.png


--------------------------------------------------------------------------------
/docs/pics/mlrvsdnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/mlrvsdnn.png


--------------------------------------------------------------------------------
/docs/pics/movielens_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/movielens_sample.png


--------------------------------------------------------------------------------
/docs/pics/movielens_sample_with_genres.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/movielens_sample_with_genres.png


--------------------------------------------------------------------------------
/docs/pics/multitaskmodels/ESMM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/ESMM.png


--------------------------------------------------------------------------------
/docs/pics/multitaskmodels/MMOE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/MMOE.png


--------------------------------------------------------------------------------
/docs/pics/multitaskmodels/PLE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/PLE.png


--------------------------------------------------------------------------------
/docs/pics/multitaskmodels/SharedBottom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/multitaskmodels/SharedBottom.png


--------------------------------------------------------------------------------
/docs/pics/weichennote.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/weichennote.png


--------------------------------------------------------------------------------
/docs/pics/xDeepFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/docs/pics/xDeepFM.png


--------------------------------------------------------------------------------
/docs/requirements.readthedocs.txt:
--------------------------------------------------------------------------------
1 | tensorflow==2.5.1
2 | recommonmark==0.7.1


--------------------------------------------------------------------------------
/docs/source/Estimators.rst:
--------------------------------------------------------------------------------
 1 | DeepCTR Estimators API
 2 | ======================
 3 | 
 4 | .. toctree::
 5 |    CCPM<deepctr.estimator.models.ccpm>
 6 |    FNN<deepctr.estimator.models.fnn>
 7 |    PNN<deepctr.estimator.models.pnn>
 8 |    WDL<deepctr.estimator.models.wdl>
 9 |    DeepFM<deepctr.estimator.models.deepfm>
10 |    NFM<deepctr.estimator.models.nfm>
11 |    AFM<deepctr.estimator.models.afm>
12 |    DCN<deepctr.estimator.models.dcn>
13 |    xDeepFM<deepctr.estimator.models.xdeepfm>
14 |    AutoInt<deepctr.estimator.models.autoint>
15 |    FiBiNET<deepctr.estimator.models.fibinet>
16 | 


--------------------------------------------------------------------------------
/docs/source/Layers.rst:
--------------------------------------------------------------------------------
 1 | DeepCTR Layers API
 2 | ======================
 3 | 
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 3
 7 |    :caption: API:
 8 | 
 9 |    Core Layers<deepctr.layers.core>
10 |    Interaction Layers<deepctr.layers.interaction>
11 |    Activation Layers<deepctr.layers.activation>
12 |    Normalization Layers<deepctr.layers.normalization>
13 |    Sequence Layers<deepctr.layers.sequence>


--------------------------------------------------------------------------------
/docs/source/Models.rst:
--------------------------------------------------------------------------------
 1 | DeepCTR Models API
 2 | ======================
 3 | 
 4 | .. toctree::
 5 |    Model Methods<Model_Methods.md>
 6 |    CCPM<deepctr.models.ccpm>
 7 |    FNN<deepctr.models.fnn>
 8 |    PNN<deepctr.models.pnn>
 9 |    WDL<deepctr.models.wdl>
10 |    DeepFM<deepctr.models.deepfm>
11 |    MLR<deepctr.models.mlr>
12 |    NFM<deepctr.models.nfm>
13 |    AFM<deepctr.models.afm>
14 |    DCN<deepctr.models.dcn>
15 |    DCNMix<deepctr.models.dcnmix>
16 |    DIN<deepctr.models.sequence.din>
17 |    DIEN<deepctr.models.sequence.dien>
18 |    DSIN<deepctr.models.sequence.dsin>
19 |    BST<deepctr.models.sequence.bst>
20 |    xDeepFM<deepctr.models.xdeepfm>
21 |    AutoInt<deepctr.models.autoint>
22 |    ONN<deepctr.models.onn>
23 |    FGCNN<deepctr.models.fgcnn>
24 |    FiBiNET<deepctr.models.fibinet>
25 |    FLEN<deepctr.models.flen>
26 |    IFM<deepctr.models.ifm>
27 |    DIFM<deepctr.models.difm>
28 |    DeepFEFM<deepctr.models.deepfefm>
29 |    SharedBottom<deepctr.models.multitask.sharedbottom>
30 |    ESMM<deepctr.models.multitask.esmm>
31 |    MMOE<deepctr.models.multitask.mmoe>
32 |    PLE<deepctr.models.multitask.ple>
33 | 
34 |    


--------------------------------------------------------------------------------
/docs/source/deepctr.contrib.rnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.contrib.rnn module
2 | ==========================
3 | 
4 | .. automodule:: deepctr.contrib.rnn
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.contrib.rst:
--------------------------------------------------------------------------------
 1 | deepctr.contrib package
 2 | =======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    deepctr.contrib.rnn
10 |    deepctr.contrib.utils
11 | 
12 | Module contents
13 | ---------------
14 | 
15 | .. automodule:: deepctr.contrib
16 |     :members:
17 |     :undoc-members:
18 |     :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.contrib.utils.rst:
--------------------------------------------------------------------------------
1 | deepctr.contrib.utils module
2 | ============================
3 | 
4 | .. automodule:: deepctr.contrib.utils
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.feature_column.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.feature\_column module
2 | ========================================
3 | 
4 | .. automodule:: deepctr.estimator.feature_column
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.inputs.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.inputs module
2 | ===============================
3 | 
4 | .. automodule:: deepctr.estimator.inputs
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.afm.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.afm module
2 | ===================================
3 | 
4 | .. automodule:: deepctr.estimator.models.afm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.autoint.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.autoint module
2 | =======================================
3 | 
4 | .. automodule:: deepctr.estimator.models.autoint
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.ccpm.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.ccpm module
2 | ====================================
3 | 
4 | .. automodule:: deepctr.estimator.models.ccpm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.dcn.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.dcn module
2 | ===================================
3 | 
4 | .. automodule:: deepctr.estimator.models.dcn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.deepfefm.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.deepfefm module
2 | ======================================
3 | 
4 | .. automodule:: deepctr.estimator.models.deepfefm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.deepfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.deepfm module
2 | ======================================
3 | 
4 | .. automodule:: deepctr.estimator.models.deepfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.fibinet.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.fibinet module
2 | =======================================
3 | 
4 | .. automodule:: deepctr.estimator.models.fibinet
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.fnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.fnn module
2 | ===================================
3 | 
4 | .. automodule:: deepctr.estimator.models.fnn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.fwfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.fwfm module
2 | ========================================
3 | 
4 | .. automodule:: deepctr.estimator.models.fwfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.nfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.nfm module
2 | ===================================
3 | 
4 | .. automodule:: deepctr.estimator.models.nfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.pnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.pnn module
2 | ===================================
3 | 
4 | .. automodule:: deepctr.estimator.models.pnn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.rst:
--------------------------------------------------------------------------------
 1 | deepctr.estimator.models package
 2 | ================================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    deepctr.estimator.models.afm
10 |    deepctr.estimator.models.autoint
11 |    deepctr.estimator.models.ccpm
12 |    deepctr.estimator.models.dcn
13 |    deepctr.estimator.models.deepfm
14 |    deepctr.estimator.models.deepfwfm
15 |    deepctr.estimator.models.fibinet
16 |    deepctr.estimator.models.fnn
17 |    deepctr.estimator.models.nfm
18 |    deepctr.estimator.models.pnn
19 |    deepctr.estimator.models.wdl
20 |    deepctr.estimator.models.xdeepfm
21 | 
22 | Module contents
23 | ---------------
24 | 
25 | .. automodule:: deepctr.estimator.models
26 |     :members:
27 |     :undoc-members:
28 |     :show-inheritance:
29 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.wdl.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.wdl module
2 | ===================================
3 | 
4 | .. automodule:: deepctr.estimator.models.wdl
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.models.xdeepfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.models.xdeepfm module
2 | =======================================
3 | 
4 | .. automodule:: deepctr.estimator.models.xdeepfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.rst:
--------------------------------------------------------------------------------
 1 | deepctr.estimator package
 2 | =========================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     deepctr.estimator.models
10 | 
11 | Submodules
12 | ----------
13 | 
14 | .. toctree::
15 | 
16 |    deepctr.estimator.feature_column
17 |    deepctr.estimator.inputs
18 |    deepctr.estimator.utils
19 | 
20 | Module contents
21 | ---------------
22 | 
23 | .. automodule:: deepctr.estimator
24 |     :members:
25 |     :undoc-members:
26 |     :show-inheritance:
27 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.estimator.utils.rst:
--------------------------------------------------------------------------------
1 | deepctr.estimator.utils module
2 | ==============================
3 | 
4 | .. automodule:: deepctr.estimator.utils
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.feature_column.rst:
--------------------------------------------------------------------------------
1 | deepctr.feature\_column module
2 | ==============================
3 | 
4 | .. automodule:: deepctr.feature_column
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.inputs.rst:
--------------------------------------------------------------------------------
1 | deepctr.inputs module
2 | =====================
3 | 
4 | .. automodule:: deepctr.inputs
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.activation.rst:
--------------------------------------------------------------------------------
1 | deepctr.layers.activation module
2 | ================================
3 | 
4 | .. automodule:: deepctr.layers.activation
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.core.rst:
--------------------------------------------------------------------------------
1 | deepctr.layers.core module
2 | ==========================
3 | 
4 | .. automodule:: deepctr.layers.core
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.interaction.rst:
--------------------------------------------------------------------------------
1 | deepctr.layers.interaction module
2 | =================================
3 | 
4 | .. automodule:: deepctr.layers.interaction
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.normalization.rst:
--------------------------------------------------------------------------------
1 | deepctr.layers.normalization module
2 | ===================================
3 | 
4 | .. automodule:: deepctr.layers.normalization
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.rst:
--------------------------------------------------------------------------------
 1 | deepctr.layers package
 2 | ======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    deepctr.layers.activation
10 |    deepctr.layers.core
11 |    deepctr.layers.interaction
12 |    deepctr.layers.normalization
13 |    deepctr.layers.sequence
14 |    deepctr.layers.utils
15 | 
16 | Module contents
17 | ---------------
18 | 
19 | .. automodule:: deepctr.layers
20 |     :members:
21 |     :undoc-members:
22 |     :show-inheritance:
23 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.sequence.rst:
--------------------------------------------------------------------------------
1 | deepctr.layers.sequence module
2 | ==============================
3 | 
4 | .. automodule:: deepctr.layers.sequence
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.utils.rst:
--------------------------------------------------------------------------------
1 | deepctr.layers.utils module
2 | ===========================
3 | 
4 | .. automodule:: deepctr.layers.utils
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.afm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.afm module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.afm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.autoint.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.autoint module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.autoint
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.ccpm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.ccpm module
2 | ==========================
3 | 
4 | .. automodule:: deepctr.models.ccpm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.dcn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.dcn module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.dcn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.dcnmix.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.dcnmix module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.dcnmix
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.deepfefm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.deepfefm module
2 | ==============================
3 | 
4 | .. automodule:: deepctr.models.deepfefm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.deepfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.deepfm module
2 | ============================
3 | 
4 | .. automodule:: deepctr.models.deepfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.deepfwfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.deepfwfm module
2 | ==============================
3 | 
4 | .. automodule:: deepctr.models.deepfwfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.difm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.difm module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.difm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.fgcnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.fgcnn module
2 | ===========================
3 | 
4 | .. automodule:: deepctr.models.fgcnn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.fibinet.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.fibinet module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.fibinet
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.flen.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.flen module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.flen
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.fnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.fnn module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.fnn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.ifm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.ifm module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.ifm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.mlr.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.mlr module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.mlr
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.multitask.esmm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.multitask.esmm module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.multitask.esmm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.multitask.mmoe.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.multitask.mmoe module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.multitask.mmoe
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.multitask.ple.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.multitask.ple module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.multitask.ple
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.multitask.sharedbottom.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.multitask.sharedbottom module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.multitask.sharedbottom
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.nfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.nfm module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.nfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.onn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.onn module
2 | ==========================
3 | 
4 | .. automodule:: deepctr.models.onn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.pnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.pnn module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.pnn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.rst:
--------------------------------------------------------------------------------
 1 | deepctr.models package
 2 | ======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    deepctr.models.afm
10 |    deepctr.models.autoint
11 |    deepctr.models.ccpm
12 |    deepctr.models.dcn
13 |    deepctr.models.dcnmix
14 |    deepctr.models.deepfm
15 |    deepctr.models.dien
16 |    deepctr.models.din
17 |    deepctr.models.dsin
18 |    deepctr.models.fgcnn
19 |    deepctr.models.fibinet
20 |    deepctr.models.fnn
21 |    deepctr.models.mlr
22 |    deepctr.models.onn
23 |    deepctr.models.nfm
24 |    deepctr.models.pnn
25 |    deepctr.models.wdl
26 |    deepctr.models.xdeepfm
27 |    deepctr.models.flen
28 |    deepctr.models.ifm
29 |    deepctr.models.difm
30 |    deepctr.models.deepfefm
31 |    deepctr.models.multitask.sharedbottom
32 |    deepctr.models.multitask.esmm
33 |    deepctr.models.multitask.mmoe
34 |    deepctr.models.multitask.ple
35 | 
36 | 
37 | Module contents
38 | ---------------
39 | 
40 | .. automodule:: deepctr.models
41 |     :members:
42 |     :undoc-members:
43 |     :show-inheritance:
44 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.sequence.bst.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.sequence.bst module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.sequence.bst
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.sequence.dien.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.sequence.dien module
2 | ==========================
3 | 
4 | .. automodule:: deepctr.models.sequence.dien
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.sequence.din.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.sequence.din module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.sequence.din
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.sequence.dsin.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.sequence.dsin module
2 | ==========================
3 | 
4 | .. automodule:: deepctr.models.sequence.dsin
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.wdl.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.wdl module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.wdl
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.xdeepfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.xdeepfm module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.xdeepfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.rst:
--------------------------------------------------------------------------------
 1 | deepctr package
 2 | ===============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     deepctr.contrib
10 |     deepctr.layers
11 |     deepctr.models
12 | 
13 | Submodules
14 | ----------
15 | 
16 | .. toctree::
17 | 
18 |    deepctr.inputs
19 |    deepctr.utils
20 | 
21 | Module contents
22 | ---------------
23 | 
24 | .. automodule:: deepctr
25 |     :members:
26 |     :undoc-members:
27 |     :show-inheritance:
28 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.utils.rst:
--------------------------------------------------------------------------------
1 | deepctr.utils module
2 | ====================
3 | 
4 | .. automodule:: deepctr.utils
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. DeepCTR documentation master file, created by
 2 |    sphinx-quickstart on Fri Nov 23 21:08:54 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to DeepCTR's documentation!
 7 | ===================================
 8 | 
 9 | |Downloads|_ |Stars|_ |Forks|_ |PyPii|_ |Issues|_ |Chat|_
10 | 
11 | .. |Downloads| image:: https://pepy.tech/badge/deepctr
12 | .. _Downloads: https://pepy.tech/project/deepctr
13 | 
14 | .. |Stars| image:: https://img.shields.io/github/stars/shenweichen/deepctr.svg
15 | .. _Stars: https://github.com/shenweichen/DeepCTR
16 | 
17 | .. |Forks| image:: https://img.shields.io/github/forks/shenweichen/deepctr.svg
18 | .. _Forks: https://github.com/shenweichen/DeepCTR/fork
19 | 
20 | .. |PyPii| image:: https://img.shields.io/pypi/v/deepctr.svg
21 | .. _PyPii: https://pypi.org/project/deepctr
22 | 
23 | .. |Issues| image:: https://img.shields.io/github/issues/shenweichen/deepctr.svg
24 | .. _Issues: https://github.com/shenweichen/deepctr/issues
25 | 
26 | .. |Chat| image:: https://img.shields.io/badge/chat-wechat-brightgreen?style=flat
27 | .. _Chat: ./#disscussiongroup
28 | 
29 | DeepCTR is a **Easy-to-use** , **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer  which can be used to easily build custom models.You can use any complex model with ``model.fit()`` and ``model.predict()``.
30 | 
31 | - Provide ``tf.keras.Model`` like interface for **quick experiment**. `example <https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html#getting-started-4-steps-to-deepctr>`_
32 | - Provide  ``tensorflow estimator`` interface for **large scale data** and **distributed training**. `example <https://deepctr-doc.readthedocs.io/en/latest/Quick-Start.html#getting-started-4-steps-to-deepctr-estimator-with-tfrecord>`_
33 | - It is compatible with both ``tf 1.x``  and ``tf 2.x``.
34 | 
35 | Let's `Get Started! <./Quick-Start.html>`_ (`Chinese Introduction <https://zhuanlan.zhihu.com/p/53231955>`_)
36 | 
37 | You can read the latest code and related projects
38 | 
39 | - DeepCTR: https://github.com/shenweichen/DeepCTR
40 | - DeepMatch: https://github.com/shenweichen/DeepMatch
41 | - DeepCTR-Torch: https://github.com/shenweichen/DeepCTR-Torch
42 | 
43 | News
44 | -----
45 | 09/03/2021 : Add multitask learning models: `SharedBottom <./Features.html#sharedbottom>`_ , `ESMM <./Features.html#esmm-entire-space-multi-task-model>`_ , `MMOE <./Features.html#mmoe-multi-gate-mixture-of-experts>`_ , `PLE <./Features.html#ple-progressive-layered-extraction>`_ .  `running example <./Examples.html#multitask-learning-mmoe>`_ `Changelog <https://github.com/shenweichen/DeepCTR/releases/tag/v0.9.0>`_
46 | 
47 | 07/18/2021 : Support pre-defined key-value vocabulary in `Hash` Layer. `example <./Examples.html#hash-layer-with-pre-defined-key-value-vocabulary>`_ `Changelog <https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.7>`_
48 | 
49 | 06/14/2021 : Add `IFM <./Features.html#ifm-input-aware-factorization-machine>`_ , `DIFM <./Features.html#difm-dual-input-aware-factorization-machine>`_ and `DeepFEFM <./Features.html#deepfefm-deep-field-embedded-factorization-machine>`_ . `Changelog <https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.6>`_
50 | 
51 | DisscussionGroup
52 | -----------------------
53 | 
54 | `Discussions <https://github.com/shenweichen/DeepCTR/discussions>`_  公众号：**浅梦学习笔记**  wechat ID: **deepctrbot**
55 | 
56 | .. image:: ../pics/code.png
57 | 
58 | .. toctree::
59 |    :maxdepth: 2
60 |    :caption: Home:
61 | 
62 |    Quick-Start<Quick-Start.md>
63 |    Features<Features.md>
64 |    Examples<Examples.md>
65 |    FAQ<FAQ.md>
66 |    History<History.md>
67 | 
68 | .. toctree::
69 |    :maxdepth: 3
70 |    :caption: API:
71 | 
72 |    Models<Models>
73 |    Estimators<Estimators>
74 |    Layers<Layers>
75 | 
76 | 
77 | 
78 | 
79 | Indices and tables
80 | ==================
81 | 
82 | * :ref:`genindex`
83 | * :ref:`modindex`
84 | * :ref:`search`


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | deepctr
2 | =======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    deepctr
8 | 


--------------------------------------------------------------------------------
/examples/criteo_sample.te.tfrecords:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/examples/criteo_sample.te.tfrecords


--------------------------------------------------------------------------------
/examples/criteo_sample.tr.tfrecords:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/examples/criteo_sample.tr.tfrecords


--------------------------------------------------------------------------------
/examples/gen_tfrecords.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | def make_example(line, sparse_feature_name, dense_feature_name, label_name):
 4 |     features = {feat: tf.train.Feature(int64_list=tf.train.Int64List(value=[int(line[1][feat])])) for feat in
 5 |                 sparse_feature_name}
 6 |     features.update(
 7 |         {feat: tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][feat]])) for feat in dense_feature_name})
 8 |     features[label_name] = tf.train.Feature(float_list=tf.train.FloatList(value=[line[1][label_name]]))
 9 |     return tf.train.Example(features=tf.train.Features(feature=features))
10 | 
11 | 
12 | def write_tfrecord(filename, df, sparse_feature_names, dense_feature_names, label_name):
13 |     writer = tf.python_io.TFRecordWriter(filename)
14 |     for line in df.iterrows():
15 |         ex = make_example(line, sparse_feature_names, dense_feature_names, label_name)
16 |         writer.write(ex.SerializeToString())
17 |     writer.close()
18 | 
19 | # write_tfrecord('./criteo_sample.tr.tfrecords',train,sparse_features,dense_features,'label')
20 | # write_tfrecord('./criteo_sample.te.tfrecords',test,sparse_features,dense_features,'label')
21 | 


--------------------------------------------------------------------------------
/examples/movielens_age_vocabulary.csv:
--------------------------------------------------------------------------------
1 | 1,1
2 | 2,18
3 | 3,25
4 | 4,35
5 | 5,45
6 | 6,50
7 | 7,56
8 | 


--------------------------------------------------------------------------------
/examples/run_all.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | function run_py(){
 4 | 
 5 |     code_path=./
 6 |     for file in $(ls)
 7 |     do
 8 |       if [[ $file =~ .py ]]
 9 |         then
10 |           python $code_path$file
11 |           if [ $? -eq 0 ]
12 |             then
13 |               echo run $code_path$file succeed in $python_version
14 |             else
15 |               echo run $code_path$file failed in $python_version
16 |               exit -1
17 |           fi
18 |       fi
19 |     done
20 | 
21 | 
22 | }
23 | 
24 | ## python3
25 | python_version=python3
26 | source activate base
27 | cd ..
28 | python setup.py install
29 | cd ./examples
30 | run_py
31 | 
32 | #python2
33 | python_version=python2
34 | source activate py27
35 | cd ..
36 | python setup.py install
37 | cd ./examples
38 | run_py
39 | echo "all examples run succeed in python2.7"
40 | 
41 | 
42 | echo "all examples run succeed in python3.6"
43 | 
44 | echo "all examples run succeed in python2.7 and python3.6"


--------------------------------------------------------------------------------
/examples/run_classification_criteo.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.metrics import log_loss, roc_auc_score
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 5 | 
 6 | from deepctr.models import DeepFM
 7 | from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
 8 | 
 9 | if __name__ == "__main__":
10 |     data = pd.read_csv('./criteo_sample.txt')
11 | 
12 |     sparse_features = ['C' + str(i) for i in range(1, 27)]
13 |     dense_features = ['I' + str(i) for i in range(1, 14)]
14 | 
15 |     data[sparse_features] = data[sparse_features].fillna('-1', )
16 |     data[dense_features] = data[dense_features].fillna(0, )
17 |     target = ['label']
18 | 
19 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
20 |     for feat in sparse_features:
21 |         lbe = LabelEncoder()
22 |         data[feat] = lbe.fit_transform(data[feat])
23 |     mms = MinMaxScaler(feature_range=(0, 1))
24 |     data[dense_features] = mms.fit_transform(data[dense_features])
25 | 
26 |     # 2.count #unique features for each sparse field,and record dense feature field name
27 | 
28 |     fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4)
29 |                               for i, feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, )
30 |                                                                             for feat in dense_features]
31 | 
32 |     dnn_feature_columns = fixlen_feature_columns
33 |     linear_feature_columns = fixlen_feature_columns
34 | 
35 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
36 | 
37 |     # 3.generate input data for model
38 | 
39 |     train, test = train_test_split(data, test_size=0.2, random_state=2020)
40 |     train_model_input = {name: train[name] for name in feature_names}
41 |     test_model_input = {name: test[name] for name in feature_names}
42 | 
43 |     # 4.Define Model,train,predict and evaluate
44 |     model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
45 |     model.compile("adam", "binary_crossentropy",
46 |                   metrics=['binary_crossentropy'], )
47 | 
48 |     history = model.fit(train_model_input, train[target].values,
49 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
50 |     pred_ans = model.predict(test_model_input, batch_size=256)
51 |     print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
52 |     print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
53 | 


--------------------------------------------------------------------------------
/examples/run_classification_criteo_hash.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.metrics import log_loss, roc_auc_score
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.preprocessing import MinMaxScaler
 5 | 
 6 | from deepctr.models import DeepFM
 7 | from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
 8 | 
 9 | if __name__ == "__main__":
10 |     data = pd.read_csv('./criteo_sample.txt')
11 | 
12 |     sparse_features = ['C' + str(i) for i in range(1, 27)]
13 |     dense_features = ['I' + str(i) for i in range(1, 14)]
14 | 
15 |     data[sparse_features] = data[sparse_features].fillna('-1', )
16 |     data[dense_features] = data[dense_features].fillna(0, )
17 |     target = ['label']
18 | 
19 |     # 1.do simple Transformation for dense features
20 |     mms = MinMaxScaler(feature_range=(0, 1))
21 |     data[dense_features] = mms.fit_transform(data[dense_features])
22 | 
23 |     # 2.set hashing space for each sparse field,and record dense feature field name
24 | 
25 |     fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=1000,embedding_dim=4, use_hash=True, dtype='string')  # since the input is string
26 |                               for feat in sparse_features] + [DenseFeat(feat, 1, )
27 |                           for feat in dense_features]
28 | 
29 |     linear_feature_columns = fixlen_feature_columns
30 |     dnn_feature_columns = fixlen_feature_columns
31 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns, )
32 | 
33 |     # 3.generate input data for model
34 | 
35 |     train, test = train_test_split(data, test_size=0.2, random_state=2020)
36 | 
37 |     train_model_input = {name:train[name] for name in feature_names}
38 |     test_model_input = {name:test[name] for name in feature_names}
39 | 
40 | 
41 |     # 4.Define Model,train,predict and evaluate
42 |     model = DeepFM(linear_feature_columns,dnn_feature_columns, task='binary')
43 |     model.compile("adam", "binary_crossentropy",
44 |                   metrics=['binary_crossentropy'], )
45 | 
46 |     history = model.fit(train_model_input, train[target].values,
47 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
48 |     pred_ans = model.predict(test_model_input, batch_size=256)
49 |     print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
50 |     print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
51 | 


--------------------------------------------------------------------------------
/examples/run_classification_criteo_multi_gpu.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.metrics import log_loss, roc_auc_score
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 5 | from tensorflow.python.keras.utils import multi_gpu_model
 6 | 
 7 | from deepctr.feature_column import SparseFeat, DenseFeat,get_feature_names
 8 | from deepctr.models import DeepFM
 9 | 
10 | if __name__ == "__main__":
11 |     data = pd.read_csv('./criteo_sample.txt')
12 | 
13 |     sparse_features = ['C' + str(i) for i in range(1, 27)]
14 |     dense_features = ['I' + str(i) for i in range(1, 14)]
15 | 
16 |     data[sparse_features] = data[sparse_features].fillna('-1', )
17 |     data[dense_features] = data[dense_features].fillna(0, )
18 |     target = ['label']
19 | 
20 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
21 |     for feat in sparse_features:
22 |         lbe = LabelEncoder()
23 |         data[feat] = lbe.fit_transform(data[feat])
24 |     mms = MinMaxScaler(feature_range=(0, 1))
25 |     data[dense_features] = mms.fit_transform(data[dense_features])
26 | 
27 |     # 2.count #unique features for each sparse field,and record dense feature field name
28 | 
29 |     fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4)
30 |                               for feat in sparse_features] + [DenseFeat(feat, 1, )
31 |                                                               for feat in dense_features]
32 | 
33 |     dnn_feature_columns = fixlen_feature_columns
34 |     linear_feature_columns = fixlen_feature_columns
35 | 
36 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
37 | 
38 |     # 3.generate input data for model
39 | 
40 |     train, test = train_test_split(data, test_size=0.2, random_state=2020)
41 | 
42 |     train_model_input = {name: train[name] for name in feature_names}
43 |     test_model_input = {name: test[name] for name in feature_names}
44 | 
45 |     # 4.Define Model,train,predict and evaluate
46 |     model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary')
47 |     model = multi_gpu_model(model, gpus=2)
48 | 
49 |     model.compile("adam", "binary_crossentropy",
50 |                   metrics=['binary_crossentropy'], )
51 | 
52 |     history = model.fit(train_model_input, train[target].values,
53 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
54 |     pred_ans = model.predict(test_model_input, batch_size=256)
55 |     print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
56 |     print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
57 | 


--------------------------------------------------------------------------------
/examples/run_dien.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names
 5 | from deepctr.models import DIEN
 6 | 
 7 | 
 8 | def get_xy_fd(use_neg=False, hash_flag=False):
 9 |     feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag),
10 |                        SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag),
11 |                        SparseFeat('item_id', 3 + 1, embedding_dim=8, use_hash=hash_flag),
12 |                        SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag),
13 |                        DenseFeat('pay_score', 1)]
14 | 
15 |     feature_columns += [
16 |         VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
17 |                          maxlen=4, length_name="seq_length"),
18 |         VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
19 |                          length_name="seq_length")]
20 | 
21 |     behavior_feature_list = ["item_id", "cate_id"]
22 |     uid = np.array([0, 1, 2])
23 |     ugender = np.array([0, 1, 0])
24 |     iid = np.array([1, 2, 3])  # 0 is mask value
25 |     cate_id = np.array([1, 2, 2])  # 0 is mask value
26 |     score = np.array([0.1, 0.2, 0.3])
27 | 
28 |     hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
29 |     hist_cate_id = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]])
30 | 
31 |     behavior_length = np.array([3, 3, 2])
32 | 
33 |     feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
34 |                     'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
35 |                     'pay_score': score, "seq_length": behavior_length}
36 | 
37 |     if use_neg:
38 |         feature_dict['neg_hist_item_id'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
39 |         feature_dict['neg_hist_cate_id'] = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]])
40 |         feature_columns += [
41 |             VarLenSparseFeat(SparseFeat('neg_hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
42 |                              maxlen=4, length_name="seq_length"),
43 |             VarLenSparseFeat(SparseFeat('neg_hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'),
44 |                              maxlen=4, length_name="seq_length")]
45 | 
46 |     x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
47 |     y = np.array([1, 0, 1])
48 |     return x, y, feature_columns, behavior_feature_list
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     if tf.__version__ >= '2.0.0':
53 |         tf.compat.v1.disable_eager_execution()
54 |     USE_NEG = True
55 |     x, y, feature_columns, behavior_feature_list = get_xy_fd(use_neg=USE_NEG)
56 | 
57 |     model = DIEN(feature_columns, behavior_feature_list,
58 |                  dnn_hidden_units=[4, 4, 4], dnn_dropout=0.6, gru_type="AUGRU", use_negsampling=USE_NEG)
59 | 
60 |     model.compile('adam', 'binary_crossentropy',
61 |                   metrics=['binary_crossentropy'])
62 |     history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
63 | 


--------------------------------------------------------------------------------
/examples/run_din.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from deepctr.models import DIN
 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
 5 | 
 6 | 
 7 | def get_xy_fd():
 8 |     feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat(
 9 |         'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8),
10 |                        SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)]
11 |     feature_columns += [
12 |         VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
13 |                          maxlen=4, length_name="seq_length"),
14 |         VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
15 |                          length_name="seq_length")]
16 |     # Notice: History behavior sequence feature name must start with "hist_".
17 |     behavior_feature_list = ["item_id", "cate_id"]
18 |     uid = np.array([0, 1, 2])
19 |     ugender = np.array([0, 1, 0])
20 |     iid = np.array([1, 2, 3])  # 0 is mask value
21 |     cate_id = np.array([1, 2, 2])  # 0 is mask value
22 |     pay_score = np.array([0.1, 0.2, 0.3])
23 | 
24 |     hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
25 |     hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
26 |     seq_length = np.array([3, 3, 2])  # the actual length of the behavior sequence
27 | 
28 |     feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
29 |                     'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
30 |                     'pay_score': pay_score, 'seq_length': seq_length}
31 |     x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
32 |     y = np.array([1, 0, 1])
33 |     return x, y, feature_columns, behavior_feature_list
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     x, y, feature_columns, behavior_feature_list = get_xy_fd()
38 |     model = DIN(feature_columns, behavior_feature_list)
39 |     # model = BST(feature_columns, behavior_feature_list,att_head_num=4)
40 |     model.compile('adam', 'binary_crossentropy',
41 |                   metrics=['binary_crossentropy'])
42 |     history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
43 | 


--------------------------------------------------------------------------------
/examples/run_dsin.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names
 5 | from deepctr.models import DSIN
 6 | 
 7 | 
 8 | def get_xy_fd(hash_flag=False):
 9 |     feature_columns = [SparseFeat('user', 3, embedding_dim=10, use_hash=hash_flag),
10 |                        SparseFeat('gender', 2, embedding_dim=4, use_hash=hash_flag),
11 |                        SparseFeat('item', 3 + 1, embedding_dim=4, use_hash=hash_flag),
12 |                        SparseFeat('cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag),
13 |                        DenseFeat('pay_score', 1)]
14 |     feature_columns += [
15 |         VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
16 |                          maxlen=4), VarLenSparseFeat(
17 |             SparseFeat('sess_0_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'),
18 |             maxlen=4)]
19 |     feature_columns += [
20 |         VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
21 |                          maxlen=4), VarLenSparseFeat(
22 |             SparseFeat('sess_1_cate_id', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='cate_id'),
23 |             maxlen=4)]
24 | 
25 |     behavior_feature_list = ["item", "cate_id"]
26 |     uid = np.array([0, 1, 2])
27 |     ugender = np.array([0, 1, 0])
28 |     iid = np.array([1, 2, 3])  # 0 is mask value
29 |     cateid = np.array([1, 2, 2])  # 0 is mask value
30 |     score = np.array([0.1, 0.2, 0.3])
31 | 
32 |     sess1_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [0, 0, 0, 0]])
33 |     sess1_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [0, 0, 0, 0]])
34 | 
35 |     sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
36 |     sess2_cate_id = np.array([[1, 2, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
37 | 
38 |     sess_number = np.array([2, 1, 0])
39 | 
40 |     feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'cate_id': cateid,
41 |                     'sess_0_item': sess1_iid, 'sess_0_cate_id': sess1_cate_id, 'pay_score': score,
42 |                     'sess_1_item': sess2_iid, 'sess_1_cate_id': sess2_cate_id, }
43 | 
44 |     x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
45 |     x["sess_length"] = sess_number
46 |     y = np.array([1, 0, 1])
47 |     return x, y, feature_columns, behavior_feature_list
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     if tf.__version__ >= '2.0.0':
52 |         tf.compat.v1.disable_eager_execution()
53 | 
54 |     x, y, feature_columns, behavior_feature_list = get_xy_fd(True)
55 | 
56 |     model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2,
57 |                  dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, )
58 | 
59 |     model.compile('adam', 'binary_crossentropy',
60 |                   metrics=['binary_crossentropy'])
61 |     history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
62 | 


--------------------------------------------------------------------------------
/examples/run_estimator_pandas_classification.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import tensorflow as tf
 3 | from sklearn.metrics import log_loss, roc_auc_score
 4 | from sklearn.model_selection import train_test_split
 5 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 6 | 
 7 | from deepctr.estimator import DeepFMEstimator
 8 | from deepctr.estimator.inputs import input_fn_pandas
 9 | 
10 | if __name__ == "__main__":
11 |     data = pd.read_csv('./criteo_sample.txt')
12 | 
13 |     sparse_features = ['C' + str(i) for i in range(1, 27)]
14 |     dense_features = ['I' + str(i) for i in range(1, 14)]
15 | 
16 |     data[sparse_features] = data[sparse_features].fillna('-1', )
17 |     data[dense_features] = data[dense_features].fillna(0, )
18 |     target = ['label']
19 | 
20 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
21 |     for feat in sparse_features:
22 |         lbe = LabelEncoder()
23 |         data[feat] = lbe.fit_transform(data[feat])
24 |     mms = MinMaxScaler(feature_range=(0, 1))
25 |     data[dense_features] = mms.fit_transform(data[dense_features])
26 | 
27 |     # 2.count #unique features for each sparse field,and record dense feature field name
28 | 
29 |     dnn_feature_columns = []
30 |     linear_feature_columns = []
31 | 
32 |     for i, feat in enumerate(sparse_features):
33 |         dnn_feature_columns.append(tf.feature_column.embedding_column(
34 |             tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4))
35 |         linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1))
36 |     for feat in dense_features:
37 |         dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
38 |         linear_feature_columns.append(tf.feature_column.numeric_column(feat))
39 | 
40 |     # 3.generate input data for model
41 | 
42 |     train, test = train_test_split(data, test_size=0.2, random_state=2021)
43 | 
44 |     # Not setting default value for continuous feature. filled with mean.
45 | 
46 |     train_model_input = input_fn_pandas(train, sparse_features + dense_features, 'label', shuffle=True)
47 |     test_model_input = input_fn_pandas(test, sparse_features + dense_features, None, shuffle=False)
48 | 
49 |     # 4.Define Model,train,predict and evaluate
50 |     model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary',
51 |                             config=tf.estimator.RunConfig(tf_random_seed=2021))
52 | 
53 |     model.train(train_model_input)
54 |     pred_ans_iter = model.predict(test_model_input)
55 |     pred_ans = list(map(lambda x: x['pred'], pred_ans_iter))
56 |     #
57 |     print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
58 |     print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
59 | 


--------------------------------------------------------------------------------
/examples/run_estimator_tfrecord_classification.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | from tensorflow.python.ops.parsing_ops import FixedLenFeature
 4 | from deepctr.estimator import DeepFMEstimator
 5 | from deepctr.estimator.inputs import input_fn_tfrecord
 6 | 
 7 | if __name__ == "__main__":
 8 | 
 9 |     # 1.generate feature_column for linear part and dnn part
10 | 
11 |     sparse_features = ['C' + str(i) for i in range(1, 27)]
12 |     dense_features = ['I' + str(i) for i in range(1, 14)]
13 | 
14 |     dnn_feature_columns = []
15 |     linear_feature_columns = []
16 | 
17 |     for i, feat in enumerate(sparse_features):
18 |         dnn_feature_columns.append(tf.feature_column.embedding_column(
19 |             tf.feature_column.categorical_column_with_identity(feat, 1000), 4))
20 |         linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, 1000))
21 |     for feat in dense_features:
22 |         dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
23 |         linear_feature_columns.append(tf.feature_column.numeric_column(feat))
24 | 
25 |     # 2.generate input data for model
26 | 
27 |     feature_description = {k: FixedLenFeature(dtype=tf.int64, shape=1) for k in sparse_features}
28 |     feature_description.update(
29 |         {k: FixedLenFeature(dtype=tf.float32, shape=1) for k in dense_features})
30 |     feature_description['label'] = FixedLenFeature(dtype=tf.float32, shape=1)
31 | 
32 |     train_model_input = input_fn_tfrecord('./criteo_sample.tr.tfrecords', feature_description, 'label', batch_size=256,
33 |                                           num_epochs=1, shuffle_factor=10)
34 |     test_model_input = input_fn_tfrecord('./criteo_sample.te.tfrecords', feature_description, 'label',
35 |                                          batch_size=2 ** 14, num_epochs=1, shuffle_factor=0)
36 | 
37 |     # 3.Define Model,train,predict and evaluate
38 |     model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, task='binary',
39 |                             config=tf.estimator.RunConfig(tf_random_seed=2021))
40 | 
41 |     model.train(train_model_input)
42 |     eval_result = model.evaluate(test_model_input)
43 | 
44 |     print(eval_result)
45 | 


--------------------------------------------------------------------------------
/examples/run_flen.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.metrics import log_loss, roc_auc_score
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.preprocessing import LabelEncoder
 5 | 
 6 | from deepctr.feature_column import SparseFeat,get_feature_names
 7 | from deepctr.models import FLEN
 8 | 
 9 | if __name__ == "__main__":
10 |     data = pd.read_csv('./avazu_sample.txt')
11 |     data['day'] = data['hour'].apply(lambda x: str(x)[4:6])
12 |     data['hour'] = data['hour'].apply(lambda x: str(x)[6:])
13 | 
14 |     sparse_features = ['hour', 'C1', 'banner_pos', 'site_id', 'site_domain',
15 |                        'site_category', 'app_id', 'app_domain', 'app_category', 'device_id',
16 |                        'device_model', 'device_type', 'device_conn_type',  # 'device_ip',
17 |                        'C14',
18 |                        'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', ]
19 | 
20 |     data[sparse_features] = data[sparse_features].fillna('-1', )
21 |     target = ['click']
22 | 
23 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
24 |     for feat in sparse_features:
25 |         lbe = LabelEncoder()
26 |         data[feat] = lbe.fit_transform(data[feat])
27 | 
28 |     # 2.count #unique features for each sparse field,and record dense feature field name
29 | 
30 |     field_info = dict(C14='user', C15='user', C16='user', C17='user',
31 |                       C18='user', C19='user', C20='user', C21='user', C1='user',
32 |                       banner_pos='context', site_id='context',
33 |                       site_domain='context', site_category='context',
34 |                       app_id='item', app_domain='item', app_category='item',
35 |                       device_model='user', device_type='user',
36 |                       device_conn_type='context', hour='context',
37 |                       device_id='user'
38 |                       )
39 | 
40 |     fixlen_feature_columns = [
41 |         SparseFeat(name, vocabulary_size=data[name].max() + 1, embedding_dim=16, use_hash=False, dtype='int32',
42 |                    group_name=field_info[name]) for name in sparse_features]
43 | 
44 |     dnn_feature_columns = fixlen_feature_columns
45 |     linear_feature_columns = fixlen_feature_columns
46 | 
47 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
48 | 
49 |     # 3.generate input data for model
50 | 
51 |     train, test = train_test_split(data, test_size=0.2, random_state=2020)
52 |     train_model_input = {name: train[name] for name in feature_names}
53 |     test_model_input = {name: test[name] for name in feature_names}
54 | 
55 |     # 4.Define Model,train,predict and evaluate
56 |     model = FLEN(linear_feature_columns, dnn_feature_columns, task='binary')
57 |     model.compile("adam", "binary_crossentropy",
58 |                   metrics=['binary_crossentropy'], )
59 | 
60 |     history = model.fit(train_model_input, train[target].values,
61 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
62 |     pred_ans = model.predict(test_model_input, batch_size=256)
63 |     print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))
64 |     print("test AUC", round(roc_auc_score(test[target].values, pred_ans), 4))
65 | 


--------------------------------------------------------------------------------
/examples/run_multivalue_movielens.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn.preprocessing import LabelEncoder
 4 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences
 5 | 
 6 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names
 7 | from deepctr.models import DeepFM
 8 | 
 9 | 
10 | def split(x):
11 |     key_ans = x.split('|')
12 |     for key in key_ans:
13 |         if key not in key2index:
14 |             # Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input
15 |             key2index[key] = len(key2index) + 1
16 |     return list(map(lambda x: key2index[x], key_ans))
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     data = pd.read_csv("./movielens_sample.txt")
21 |     sparse_features = ["movie_id", "user_id",
22 |                        "gender", "age", "occupation", "zip", ]
23 |     target = ['rating']
24 | 
25 |     # 1.Label Encoding for sparse features,and process sequence features
26 |     for feat in sparse_features:
27 |         lbe = LabelEncoder()
28 |         data[feat] = lbe.fit_transform(data[feat])
29 |     # preprocess the sequence feature
30 | 
31 |     key2index = {}
32 |     genres_list = list(map(split, data['genres'].values))
33 |     genres_length = np.array(list(map(len, genres_list)))
34 |     max_len = max(genres_length)
35 |     # Notice : padding=`post`
36 |     genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', )
37 | 
38 |     # 2.count #unique features for each sparse field and generate feature config for sequence feature
39 | 
40 |     fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4)
41 |                               for feat in sparse_features]
42 | 
43 |     use_weighted_sequence = False
44 |     if use_weighted_sequence:
45 |         varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len(
46 |             key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean',
47 |                                                    weight_name='genres_weight')]  # Notice : value 0 is for padding for sequence input feature
48 |     else:
49 |         varlen_feature_columns = [VarLenSparseFeat(SparseFeat('genres', vocabulary_size=len(
50 |             key2index) + 1, embedding_dim=4), maxlen=max_len, combiner='mean',
51 |                                                    weight_name=None)]  # Notice : value 0 is for padding for sequence input feature
52 | 
53 |     linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
54 |     dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
55 | 
56 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
57 | 
58 |     # 3.generate input data for model
59 |     model_input = {name: data[name] for name in sparse_features}  #
60 |     model_input["genres"] = genres_list
61 |     model_input["genres_weight"] = np.random.randn(data.shape[0], max_len, 1)
62 | 
63 |     # 4.Define Model,compile and train
64 |     model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
65 | 
66 |     model.compile("adam", "mse", metrics=['mse'], )
67 |     history = model.fit(model_input, data[target].values,
68 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
69 | 


--------------------------------------------------------------------------------
/examples/run_multivalue_movielens_hash.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences
 4 | 
 5 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names
 6 | from deepctr.models import DeepFM
 7 | 
 8 | if __name__ == "__main__":
 9 |     data = pd.read_csv("./movielens_sample.txt")
10 |     sparse_features = ["movie_id", "user_id",
11 |                        "gender", "age", "occupation", "zip", ]
12 | 
13 |     data[sparse_features] = data[sparse_features].astype(str)
14 |     target = ['rating']
15 | 
16 |     # 1.Use hashing encoding on the fly for sparse features,and process sequence features
17 | 
18 |     genres_list = list(map(lambda x: x.split('|'), data['genres'].values))
19 |     genres_length = np.array(list(map(len, genres_list)))
20 |     max_len = max(genres_length)
21 | 
22 |     # Notice : padding=`post`
23 |     genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str)
24 |     # 2.set hashing space for each sparse field and generate feature config for sequence feature
25 | 
26 |     fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True, dtype='string')
27 |                               for feat in sparse_features]
28 |     varlen_feature_columns = [
29 |         VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4, use_hash=True, dtype="string"),
30 |                          maxlen=max_len, combiner='mean',
31 |                          )]  # Notice : value 0 is for padding for sequence input feature
32 |     linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
33 |     dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
34 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
35 | 
36 |     # 3.generate input data for model
37 |     model_input = {name: data[name] for name in feature_names}
38 |     model_input['genres'] = genres_list
39 | 
40 |     # 4.Define Model,compile and train
41 |     model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
42 | 
43 |     model.compile("adam", "mse", metrics=['mse'], )
44 |     history = model.fit(model_input, data[target].values,
45 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
46 | 


--------------------------------------------------------------------------------
/examples/run_multivalue_movielens_vocab_hash.py:
--------------------------------------------------------------------------------
 1 | from deepctr.models import DeepFM
 2 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, get_feature_names
 3 | import numpy as np
 4 | import pandas as pd
 5 | from tensorflow.python.keras.preprocessing.sequence import pad_sequences
 6 | 
 7 | try:
 8 |     import tensorflow.compat.v1 as tf
 9 | except ImportError as e:
10 |     import tensorflow as tf
11 | 
12 | if __name__ == "__main__":
13 |     data = pd.read_csv("./movielens_sample.txt")
14 |     sparse_features = ["movie_id", "user_id",
15 |                        "gender", "age", "occupation", "zip", ]
16 | 
17 |     data[sparse_features] = data[sparse_features].astype(str)
18 |     target = ['rating']
19 | 
20 |     # 1.Use hashing encoding on the fly for sparse features,and process sequence features
21 | 
22 |     genres_list = list(map(lambda x: x.split('|'), data['genres'].values))
23 |     genres_length = np.array(list(map(len, genres_list)))
24 |     max_len = max(genres_length)
25 | 
26 |     # Notice : padding=`post`
27 |     genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=object, value=0).astype(str)
28 |     # 2.set hashing space for each sparse field and generate feature config for sequence feature
29 | 
30 |     fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique() * 5, embedding_dim=4, use_hash=True,
31 |                                          vocabulary_path='./movielens_age_vocabulary.csv' if feat == 'age' else None,
32 |                                          dtype='string')
33 |                               for feat in sparse_features]
34 |     varlen_feature_columns = [
35 |         VarLenSparseFeat(SparseFeat('genres', vocabulary_size=100, embedding_dim=4,
36 |                                     use_hash=True, dtype="string"),
37 |                          maxlen=max_len, combiner='mean',
38 |                          )]  # Notice : value 0 is for padding for sequence input feature
39 |     linear_feature_columns = fixlen_feature_columns + varlen_feature_columns
40 |     dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns
41 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
42 | 
43 |     # 3.generate input data for model
44 |     model_input = {name: data[name] for name in feature_names}
45 |     model_input['genres'] = genres_list
46 | 
47 |     # 4.Define Model,compile and train
48 |     model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
49 |     model.compile("adam", "mse", metrics=['mse'], )
50 |     if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0':
51 |         with tf.Session() as sess:
52 |             sess.run(tf.tables_initializer())
53 |             history = model.fit(model_input, data[target].values,
54 |                                 batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
55 |     else:
56 |         history = model.fit(model_input, data[target].values,
57 |                             batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
58 | 


--------------------------------------------------------------------------------
/examples/run_regression_movielens.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.metrics import mean_squared_error
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.preprocessing import LabelEncoder
 5 | 
 6 | from deepctr.models import DeepFM
 7 | from deepctr.feature_column import SparseFeat,get_feature_names
 8 | 
 9 | if __name__ == "__main__":
10 | 
11 |     data = pd.read_csv("./movielens_sample.txt")
12 |     sparse_features = ["movie_id", "user_id",
13 |                        "gender", "age", "occupation", "zip"]
14 |     target = ['rating']
15 | 
16 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
17 |     for feat in sparse_features:
18 |         lbe = LabelEncoder()
19 |         data[feat] = lbe.fit_transform(data[feat])
20 |     # 2.count #unique features for each sparse field
21 |     fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4)
22 |                               for feat in sparse_features]
23 |     linear_feature_columns = fixlen_feature_columns
24 |     dnn_feature_columns = fixlen_feature_columns
25 |     feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
26 | 
27 |     # 3.generate input data for model
28 |     train, test = train_test_split(data, test_size=0.2, random_state=2020)
29 |     train_model_input = {name:train[name].values for name in feature_names}
30 |     test_model_input = {name:test[name].values for name in feature_names}
31 | 
32 |     # 4.Define Model,train,predict and evaluate
33 |     model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
34 |     model.compile("adam", "mse", metrics=['mse'], )
35 | 
36 |     history = model.fit(train_model_input, train[target].values,
37 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
38 |     pred_ans = model.predict(test_model_input, batch_size=256)
39 |     print("test MSE", round(mean_squared_error(
40 |         test[target].values, pred_ans), 4))
41 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow-gpu==2.4.0
2 | tensorflow-addons==0.12.0
3 | tensorboard_plugin_profile
4 | pandas
5 | scikit-learn
6 | pyarrow
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | desciption-file = README.md
 3 | 
 4 | #[coverage:run]
 5 | #branch = True
 6 | 
 7 | [coverage:report]
 8 | exclude_lines =
 9 |     # Have to re-enable the standard pragma
10 |     pragma: no cover
11 |     # Don't complain about missing debug-only code:
12 |     def __repr__
13 |     if self\.debug
14 | 
15 |     # Don't complain if tests don't hit defensive assertion code:
16 |     raise ValueError
17 |     raise AssertionError
18 |     raise NotImplementedError
19 | 
20 |     # Don't complain if non-runnable code isn't run:
21 |     if 0:
22 |     if False:
23 |     if __name__ == .__main__.:
24 | 
25 | [coverage:run]
26 | omit =
27 |     # omit anything in a .local directory anywhere
28 |     #*/.local/*
29 |     # omit everything in /usr
30 |     deepctr/contrib/*
31 |     # omit this single file
32 |     #utils/tirefire.py


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | REQUIRED_PACKAGES = [
 7 |     'h5py==2.10.0', 'requests'
 8 | ]
 9 | 
10 | setuptools.setup(
11 |     name="deepctr",
12 |     version="0.9.0",
13 |     author="Weichen Shen",
14 |     author_email="weichenswc@163.com",
15 |     description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow 1.x and 2.x .",
16 |     long_description=long_description,
17 |     long_description_content_type="text/markdown",
18 |     url="https://github.com/shenweichen/deepctr",
19 |     download_url='https://github.com/shenweichen/deepctr/tags',
20 |     packages=setuptools.find_packages(
21 |         exclude=["tests", "tests.models", "tests.layers"]),
22 |     python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*",  # '>=3.4',  # 3.4.6
23 |     install_requires=REQUIRED_PACKAGES,
24 |     extras_require={
25 |         "cpu": ["tensorflow>=1.4.0,!=1.7.*,!=1.8.*"],
26 |         "gpu": ["tensorflow-gpu>=1.4.0,!=1.7.*,!=1.8.*"],
27 |     },
28 |     entry_points={
29 |     },
30 |     classifiers=(
31 |         "License :: OSI Approved :: Apache Software License",
32 |         "Operating System :: OS Independent",
33 |         'Intended Audience :: Developers',
34 |         'Intended Audience :: Education',
35 |         'Intended Audience :: Science/Research',
36 |         'Programming Language :: Python :: 3',
37 |         'Programming Language :: Python :: 2.7',
38 |         'Programming Language :: Python :: 3.5',
39 |         'Programming Language :: Python :: 3.6',
40 |         'Programming Language :: Python :: 3.7',
41 |         'Topic :: Scientific/Engineering',
42 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
43 |         'Topic :: Software Development',
44 |         'Topic :: Software Development :: Libraries',
45 |         'Topic :: Software Development :: Libraries :: Python Modules',
46 |     ),
47 |     license="Apache-2.0",
48 |     keywords=['ctr', 'click through rate',
49 |               'deep learning', 'tensorflow', 'tensor', 'keras'],
50 | )
51 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/README.md


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/__init__.py


--------------------------------------------------------------------------------
/tests/feature_test.py:
--------------------------------------------------------------------------------
 1 | from deepctr.models import DeepFM
 2 | from deepctr.feature_column import SparseFeat, DenseFeat, VarLenSparseFeat, get_feature_names
 3 | import numpy as np
 4 | 
 5 | 
 6 | def test_long_dense_vector():
 7 |     feature_columns = [SparseFeat('user_id', 4, ), SparseFeat('item_id', 5, ), DenseFeat("pic_vec", 5)]
 8 |     fixlen_feature_names = get_feature_names(feature_columns)
 9 | 
10 |     user_id = np.array([[1], [0], [1]])
11 |     item_id = np.array([[3], [2], [1]])
12 |     pic_vec = np.array([[0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2], [0.1, 0.5, 0.4, 0.3, 0.2]])
13 |     label = np.array([1, 0, 1])
14 | 
15 |     input_dict = {'user_id': user_id, 'item_id': item_id, 'pic_vec': pic_vec}
16 |     model_input = [input_dict[name] for name in fixlen_feature_names]
17 | 
18 |     model = DeepFM(feature_columns, feature_columns[:-1])
19 |     model.compile('adagrad', 'binary_crossentropy')
20 |     model.fit(model_input, label)
21 | 
22 | 
23 | def test_feature_column_sparsefeat_vocabulary_path():
24 |     vocab_path = "./dummy_test.csv"
25 |     sf = SparseFeat('user_id', 4, vocabulary_path=vocab_path)
26 |     if sf.vocabulary_path != vocab_path:
27 |         raise ValueError("sf.vocabulary_path is invalid")
28 |     vlsf = VarLenSparseFeat(sf, 6)
29 |     if vlsf.vocabulary_path != vocab_path:
30 |         raise ValueError("vlsf.vocabulary_path is invalid")
31 | 


--------------------------------------------------------------------------------
/tests/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/layers/__init__.py


--------------------------------------------------------------------------------
/tests/layers/activations_test.py:
--------------------------------------------------------------------------------
 1 | from deepctr.layers import activation
 2 | 
 3 | try:
 4 |     from tensorflow.python.keras.utils import CustomObjectScope
 5 | except ImportError:
 6 |     from tensorflow.keras.utils import CustomObjectScope
 7 | from tests.utils import layer_test
 8 | 
 9 | 
10 | def test_dice():
11 |     with CustomObjectScope({'Dice': activation.Dice}):
12 |         layer_test(activation.Dice, kwargs={},
13 |                    input_shape=(2, 3))
14 | 


--------------------------------------------------------------------------------
/tests/layers/core_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | from tensorflow.python.keras.layers import PReLU
 4 | 
 5 | try:
 6 |     from tensorflow.python.keras.utils import CustomObjectScope
 7 | except ImportError:
 8 |     from tensorflow.keras.utils import CustomObjectScope
 9 | from deepctr import layers
10 | from deepctr.layers import Dice
11 | from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE, SEQ_LENGTH
12 | from tests.utils import layer_test
13 | 
14 | 
15 | @pytest.mark.parametrize(
16 |     'hidden_units,activation',
17 |     [(hidden_units, activation)
18 |      for hidden_units in [(), (10,)]
19 |      for activation in ['sigmoid', Dice, PReLU]
20 |      ]
21 | )
22 | def test_LocalActivationUnit(hidden_units, activation):
23 |     if tf.__version__ >= '1.13.0' and activation != 'sigmoid':
24 |         return
25 | 
26 |     with CustomObjectScope({'LocalActivationUnit': layers.LocalActivationUnit}):
27 |         layer_test(layers.LocalActivationUnit,
28 |                    kwargs={'hidden_units': hidden_units, 'activation': activation, 'dropout_rate': 0.5},
29 |                    input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)])
30 | 
31 | 
32 | @pytest.mark.parametrize(
33 |     'hidden_units,use_bn',
34 |     [(hidden_units, use_bn)
35 |      for hidden_units in [(), (10,)]
36 |      for use_bn in [True, False]
37 |      ]
38 | )
39 | def test_DNN(hidden_units, use_bn):
40 |     with CustomObjectScope({'DNN': layers.DNN}):
41 |         layer_test(layers.DNN, kwargs={'hidden_units': hidden_units, 'use_bn': use_bn, 'dropout_rate': 0.5},
42 |                    input_shape=(
43 |                        BATCH_SIZE, EMBEDDING_SIZE))
44 | 
45 | 
46 | @pytest.mark.parametrize(
47 |     'task,use_bias',
48 |     [(task, use_bias)
49 |      for task in ['binary', 'regression']
50 |      for use_bias in [True, False]
51 |      ]
52 | )
53 | def test_PredictionLayer(task, use_bias):
54 |     with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}):
55 |         layer_test(layers.PredictionLayer, kwargs={'task': task, 'use_bias': use_bias
56 |                                                    }, input_shape=(BATCH_SIZE, 1))
57 | 
58 | 
59 | @pytest.mark.xfail(reason="dim size must be 1 except for the batch size dim")
60 | def test_test_PredictionLayer_invalid():
61 |     # with pytest.raises(ValueError):
62 |     with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}):
63 |         layer_test(layers.PredictionLayer, kwargs={'use_bias': True,
64 |                                                    }, input_shape=(BATCH_SIZE, 2, 1))
65 | 


--------------------------------------------------------------------------------
/tests/layers/normalization_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | try:
 4 |     from tensorflow.python.keras.utils import CustomObjectScope
 5 | except ImportError:
 6 |     from tensorflow.keras.utils import CustomObjectScope
 7 | from deepctr import layers
 8 | from tests.layers.interaction_test import BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE
 9 | from tests.utils import layer_test
10 | 
11 | 
12 | @pytest.mark.parametrize(
13 |     'axis',
14 |     [-1, -2
15 |      ]
16 | )
17 | def test_LayerNormalization(axis):
18 |     with CustomObjectScope({'LayerNormalization': layers.LayerNormalization}):
19 |         layer_test(layers.LayerNormalization, kwargs={"axis": axis, }, input_shape=(
20 |             BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
21 | 


--------------------------------------------------------------------------------
/tests/layers/utils_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import tensorflow as tf
 4 | 
 5 | from deepctr.layers.utils import Hash, Linear
 6 | from tests.layers.interaction_test import BATCH_SIZE, EMBEDDING_SIZE
 7 | from tests.utils import layer_test
 8 | 
 9 | try:
10 |     from tensorflow.python.keras.utils import CustomObjectScope
11 | except ImportError:
12 |     from tensorflow.keras.utils import CustomObjectScope
13 | 
14 | 
15 | @pytest.mark.parametrize(
16 |     'num_buckets,mask_zero,vocabulary_path,input_data,expected_output',
17 |     [
18 |         (3 + 1, False, None, ['lakemerson'], None),
19 |         (3 + 1, True, None, ['lakemerson'], None),
20 |         (
21 |                 3 + 1, False, "./tests/layers/vocabulary_example.csv", [['lake'], ['johnson'], ['lakemerson']],
22 |                 [[1], [3], [0]])
23 |     ]
24 | )
25 | def test_Hash(num_buckets, mask_zero, vocabulary_path, input_data, expected_output):
26 |     if not hasattr(tf, 'version') or tf.version.VERSION < '2.0.0':
27 |         return
28 | 
29 |     with CustomObjectScope({'Hash': Hash}):
30 |         layer_test(Hash,
31 |                    kwargs={'num_buckets': num_buckets, 'mask_zero': mask_zero, 'vocabulary_path': vocabulary_path},
32 |                    input_dtype=tf.string, input_data=np.array(input_data, dtype='str'),
33 |                    expected_output_dtype=tf.int64, expected_output=expected_output)
34 | 
35 | 
36 | def test_Linear():
37 |     with CustomObjectScope({'Linear': Linear}):
38 |         layer_test(Linear,
39 |                    kwargs={'mode': 1, 'use_bias': True}, input_shape=(BATCH_SIZE, EMBEDDING_SIZE))
40 | 


--------------------------------------------------------------------------------
/tests/layers/vocabulary_example.csv:
--------------------------------------------------------------------------------
1 | 1,lake
2 | 2,merson
3 | 3,johnson


--------------------------------------------------------------------------------
/tests/models/AFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | from packaging import version
 4 | 
 5 | from deepctr.estimator import AFMEstimator
 6 | from deepctr.models import AFM
 7 | from ..utils import check_model, check_estimator, get_test_data, get_test_data_estimator, SAMPLE_SIZE, \
 8 |     Estimator_TEST_TF1
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     'use_attention,sparse_feature_num,dense_feature_num',
13 |     [(True, 3, 0),
14 |      ]
15 | )
16 | def test_AFM(use_attention, sparse_feature_num, dense_feature_num):
17 |     model_name = "AFM"
18 |     sample_size = SAMPLE_SIZE
19 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
20 |                                           dense_feature_num=dense_feature_num)
21 | 
22 |     model = AFM(feature_columns, feature_columns, use_attention=use_attention, afm_dropout=0.5)
23 | 
24 |     check_model(model, model_name, x, y)
25 | 
26 | 
27 | @pytest.mark.parametrize(
28 |     'use_attention,sparse_feature_num,dense_feature_num',
29 |     [(True, 3, 0),
30 |      ]
31 | )
32 | def test_AFMEstimator(use_attention, sparse_feature_num, dense_feature_num):
33 |     if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'):
34 |         return
35 | 
36 |     sample_size = SAMPLE_SIZE
37 | 
38 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
39 |                                                                                     sparse_feature_num=sparse_feature_num,
40 |                                                                                     dense_feature_num=dense_feature_num)
41 |     model = AFMEstimator(linear_feature_columns, dnn_feature_columns, use_attention=use_attention, afm_dropout=0.5)
42 |     check_estimator(model, input_fn)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     pass
47 | 


--------------------------------------------------------------------------------
/tests/models/AutoInt_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | from packaging import version
 4 | 
 5 | from deepctr.estimator import AutoIntEstimator
 6 | from deepctr.models import AutoInt
 7 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 8 |     Estimator_TEST_TF1
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     'att_layer_num,dnn_hidden_units,sparse_feature_num',
13 |     [(1, (), 1), (1, (4,), 1)]  # (0, (4,), 2), (2, (4, 4,), 2)
14 | )
15 | def test_AutoInt(att_layer_num, dnn_hidden_units, sparse_feature_num):
16 |     if version.parse(tf.__version__) >= version.parse("1.14.0") and len(dnn_hidden_units) == 0:  # todo check version
17 |         return
18 |     model_name = "AutoInt"
19 |     sample_size = SAMPLE_SIZE
20 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
21 |                                           dense_feature_num=sparse_feature_num)
22 | 
23 |     model = AutoInt(feature_columns, feature_columns, att_layer_num=att_layer_num,
24 |                     dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, )
25 |     check_model(model, model_name, x, y)
26 | 
27 | 
28 | @pytest.mark.parametrize(
29 |     'att_layer_num,dnn_hidden_units,sparse_feature_num',
30 |     [(1, (4,), 1)]  # (0, (4,), 2), (2, (4, 4,), 2)
31 | )
32 | def test_AutoIntEstimator(att_layer_num, dnn_hidden_units, sparse_feature_num):
33 |     if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'):
34 |         return
35 |     sample_size = SAMPLE_SIZE
36 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
37 |                                                                                     sparse_feature_num=sparse_feature_num,
38 |                                                                                     dense_feature_num=sparse_feature_num)
39 | 
40 |     model = AutoIntEstimator(linear_feature_columns, dnn_feature_columns, att_layer_num=att_layer_num,
41 |                              dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5, )
42 |     check_estimator(model, input_fn)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     pass
47 | 


--------------------------------------------------------------------------------
/tests/models/BST_test.py:
--------------------------------------------------------------------------------
 1 | from deepctr.models import BST
 2 | from ..utils import check_model
 3 | from .DIN_test import get_xy_fd
 4 | 
 5 | 
 6 | def test_BST():
 7 |     model_name = "BST"
 8 | 
 9 |     x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True)
10 | 
11 |     model = BST(dnn_feature_columns=feature_columns,
12 |                 history_feature_list=behavior_feature_list,
13 |                 att_head_num=4)
14 | 
15 |     check_model(model, model_name, x, y,
16 |                 check_model_io=True)
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     pass
21 | 


--------------------------------------------------------------------------------
/tests/models/CCPM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import CCPMEstimator
 5 | from deepctr.models import CCPM
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, check_estimator, get_test_data_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'sparse_feature_num,dense_feature_num',
12 |     [(3, 0)
13 |      ]
14 | )
15 | def test_CCPM(sparse_feature_num, dense_feature_num):
16 |     if tf.__version__ >= "2.0.0":  # todo
17 |         return
18 |     model_name = "CCPM"
19 | 
20 |     sample_size = SAMPLE_SIZE
21 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
22 |                                           dense_feature_num=dense_feature_num)
23 | 
24 |     model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(
25 |         2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5)
26 |     check_model(model, model_name, x, y)
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 |     'sparse_feature_num,dense_feature_num',
31 |     [(2, 0),
32 |      ]
33 | )
34 | def test_CCPM_without_seq(sparse_feature_num, dense_feature_num):
35 |     if tf.__version__ >= "2.0.0":
36 |         return
37 |     model_name = "CCPM"
38 | 
39 |     sample_size = SAMPLE_SIZE
40 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
41 |                                           dense_feature_num=dense_feature_num, sequence_feature=())
42 | 
43 |     model = CCPM(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(
44 |         2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5)
45 |     check_model(model, model_name, x, y)
46 | 
47 | 
48 | @pytest.mark.parametrize(
49 |     'sparse_feature_num,dense_feature_num',
50 |     [(2, 0),
51 |      ]
52 | )
53 | def test_CCPMEstimator_without_seq(sparse_feature_num, dense_feature_num):
54 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
55 |         return
56 | 
57 |     sample_size = SAMPLE_SIZE
58 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
59 |                                                                                     sparse_feature_num=sparse_feature_num,
60 |                                                                                     dense_feature_num=sparse_feature_num)
61 | 
62 |     model = CCPMEstimator(linear_feature_columns, dnn_feature_columns, conv_kernel_width=(3, 2), conv_filters=(
63 |         2, 1), dnn_hidden_units=[32, ], dnn_dropout=0.5)
64 |     check_estimator(model, input_fn)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     pass
69 | 


--------------------------------------------------------------------------------
/tests/models/DCNMix_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepctr.models import DCNMix
 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'cross_num,hidden_size,sparse_feature_num',
 9 |     [(0, (8,), 2), (1, (), 1), (1, (8,), 3)
10 |      ]
11 | )
12 | def test_DCNMix(cross_num, hidden_size, sparse_feature_num):
13 |     model_name = "DCNMix"
14 | 
15 |     sample_size = SAMPLE_SIZE
16 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
17 |                                           dense_feature_num=sparse_feature_num)
18 | 
19 |     model = DCNMix(feature_columns, feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
20 |     check_model(model, model_name, x, y)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     pass
25 | 


--------------------------------------------------------------------------------
/tests/models/DCN_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import DCNEstimator
 5 | from deepctr.models import DCN
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'cross_num,hidden_size,sparse_feature_num,cross_parameterization',
12 |     [(0, (8,), 2, 'vector'), (1, (), 1, 'vector'), (1, (8,), 3, 'vector'),
13 |      (0, (8,), 2, 'matrix'), (1, (), 1, 'matrix'), (1, (8,), 3, 'matrix'),
14 |      ]
15 | )
16 | def test_DCN(cross_num, hidden_size, sparse_feature_num, cross_parameterization):
17 |     model_name = "DCN"
18 | 
19 |     sample_size = SAMPLE_SIZE
20 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
21 |                                           dense_feature_num=sparse_feature_num)
22 | 
23 |     model = DCN(feature_columns, feature_columns, cross_num=cross_num, cross_parameterization=cross_parameterization,
24 |                 dnn_hidden_units=hidden_size, dnn_dropout=0.5)
25 |     check_model(model, model_name, x, y)
26 | 
27 | 
28 | def test_DCN_2():
29 |     model_name = "DCN"
30 | 
31 |     sample_size = SAMPLE_SIZE
32 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=3,
33 |                                           dense_feature_num=2)
34 | 
35 |     model = DCN([], feature_columns, cross_num=1, dnn_hidden_units=(8,), dnn_dropout=0.5)
36 |     check_model(model, model_name, x, y)
37 | 
38 | 
39 | @pytest.mark.parametrize(
40 |     'cross_num,hidden_size,sparse_feature_num',
41 |     [(1, (8,), 3)
42 |      ]
43 | )
44 | def test_DCNEstimator(cross_num, hidden_size, sparse_feature_num):
45 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
46 |         return
47 |     sample_size = SAMPLE_SIZE
48 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
49 |                                                                                     sparse_feature_num=sparse_feature_num,
50 |                                                                                     dense_feature_num=sparse_feature_num)
51 | 
52 |     model = DCNEstimator(linear_feature_columns, dnn_feature_columns, cross_num=cross_num, dnn_hidden_units=hidden_size,
53 |                          dnn_dropout=0.5)
54 |     check_estimator(model, input_fn)
55 | 
56 | 
57 | # def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()):
58 | #     feature_dim_dict = {'sparse': [SparseFeat('sparse_1', 2), SparseFeat('sparse_2', 5), SparseFeat('sparse_3', 10)],
59 | #                         'dense': [SparseFeat('dense_1', 1), SparseFeat('dense_1', 1), SparseFeat('dense_1', 1)]}
60 | #     with pytest.raises(ValueError):
61 | #         _ = DCN(None, embedding_size=embedding_size, cross_num=cross_num, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
62 | 
63 | 
64 | if __name__ == "__main__":
65 |     pass
66 | 


--------------------------------------------------------------------------------
/tests/models/DIEN_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | import tensorflow as tf
 4 | from packaging import version
 5 | 
 6 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
 7 | from deepctr.models import DIEN
 8 | from ..utils import check_model
 9 | 
10 | 
11 | def get_xy_fd(use_neg=False, hash_flag=False):
12 |     feature_columns = [SparseFeat('user', 3, hash_flag),
13 |                        SparseFeat('gender', 2, hash_flag),
14 |                        SparseFeat('item', 3 + 1, hash_flag),
15 |                        SparseFeat('item_gender', 2 + 1, hash_flag),
16 |                        DenseFeat('score', 1)]
17 | 
18 |     feature_columns += [
19 |         VarLenSparseFeat(SparseFeat('hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'),
20 |                          maxlen=4, length_name="seq_length"),
21 |         VarLenSparseFeat(SparseFeat('hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'),
22 |                          maxlen=4, length_name="seq_length")]
23 | 
24 |     behavior_feature_list = ["item", "item_gender"]
25 |     uid = np.array([0, 1, 2])
26 |     ugender = np.array([0, 1, 0])
27 |     iid = np.array([1, 2, 3])  # 0 is mask value
28 |     igender = np.array([1, 2, 1])  # 0 is mask value
29 |     score = np.array([0.1, 0.2, 0.3])
30 | 
31 |     hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
32 |     hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
33 | 
34 |     behavior_length = np.array([3, 3, 2])
35 | 
36 |     feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
37 |                     'hist_item': hist_iid, 'hist_item_gender': hist_igender,
38 |                     'score': score,"seq_length":behavior_length}
39 | 
40 |     if use_neg:
41 |         feature_dict['neg_hist_item'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
42 |         feature_dict['neg_hist_item_gender'] = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
43 |         feature_columns += [
44 |             VarLenSparseFeat(SparseFeat('neg_hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'),
45 |                              maxlen=4, length_name="seq_length"),
46 |             VarLenSparseFeat(SparseFeat('neg_hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'),
47 |                              maxlen=4, length_name="seq_length")]
48 | 
49 |     feature_names = get_feature_names(feature_columns)
50 |     x = {name: feature_dict[name] for name in feature_names}
51 |     y = np.array([1, 0, 1])
52 |     return x, y, feature_columns, behavior_feature_list
53 | 
54 | 
55 | # @pytest.mark.xfail(reason="There is a bug when save model use Dice")
56 | # @pytest.mark.skip(reason="misunderstood the API")
57 | 
58 | @pytest.mark.parametrize(
59 |     'gru_type',
60 |     ['GRU', 'AIGRU', 'AGRU'  # ,'AUGRU',
61 |      ]
62 | )
63 | def test_DIEN(gru_type):
64 |     if version.parse(tf.__version__) >= version.parse('2.0.0'):
65 |         tf.compat.v1.disable_eager_execution()  # todo
66 |     model_name = "DIEN_" + gru_type
67 | 
68 |     x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True)
69 | 
70 |     model = DIEN(feature_columns, behavior_feature_list,
71 |                  dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type=gru_type)
72 | 
73 |     check_model(model, model_name, x, y,
74 |                 check_model_io=(gru_type == "GRU"))  # TODO:fix bugs when load model in other type
75 | 
76 | 
77 | def test_DIEN_neg():
78 |     model_name = "DIEN_neg"
79 |     if version.parse(tf.__version__) >= version.parse("1.14.0"):
80 |         return
81 | 
82 |     x, y, feature_dim_dict, behavior_feature_list = get_xy_fd(use_neg=True)
83 | 
84 |     model = DIEN(feature_dim_dict, behavior_feature_list,
85 |                  dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, gru_type="AUGRU", use_negsampling=True)
86 |     check_model(model, model_name, x, y)
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     pass
91 | 


--------------------------------------------------------------------------------
/tests/models/DIFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepctr.models import DIFM
 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'att_head_num,dnn_hidden_units,sparse_feature_num',
 9 |     [(1, (4,), 2), (2, (4, 4,), 2), (1, (4,), 1)]
10 | )
11 | def test_DIFM(att_head_num, dnn_hidden_units, sparse_feature_num):
12 |     model_name = "DIFM"
13 |     sample_size = SAMPLE_SIZE
14 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
15 |                                           dense_feature_num=sparse_feature_num)
16 | 
17 |     model = DIFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, dnn_dropout=0.5)
18 |     check_model(model, model_name, x, y)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     pass
23 | 


--------------------------------------------------------------------------------
/tests/models/DIN_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
 4 | from deepctr.models.sequence.din import DIN
 5 | from ..utils import check_model
 6 | 
 7 | 
 8 | def get_xy_fd(hash_flag=False):
 9 |     feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat(
10 |         'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8),
11 |                        SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)]
12 |     feature_columns += [
13 |         VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
14 |                          maxlen=4, length_name="seq_length"),
15 |         VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
16 |                          length_name="seq_length")]
17 |     # Notice: History behavior sequence feature name must start with "hist_".
18 |     behavior_feature_list = ["item_id", "cate_id"]
19 |     uid = np.array([0, 1, 2])
20 |     ugender = np.array([0, 1, 0])
21 |     iid = np.array([1, 2, 3])  # 0 is mask value
22 |     cate_id = np.array([1, 2, 2])  # 0 is mask value
23 |     pay_score = np.array([0.1, 0.2, 0.3])
24 | 
25 |     hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
26 |     hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
27 |     seq_length = np.array([3, 3, 2])  # the actual length of the behavior sequence
28 | 
29 |     feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
30 |                     'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
31 |                     'pay_score': pay_score, 'seq_length': seq_length}
32 |     x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
33 |     y = np.array([1, 0, 1])
34 |     return x, y, feature_columns, behavior_feature_list
35 | 
36 | 
37 | # @pytest.mark.xfail(reason="There is a bug when save model use Dice")
38 | # @pytest.mark.skip(reason="misunderstood the API")
39 | 
40 | 
41 | def test_DIN():
42 |     model_name = "DIN"
43 | 
44 |     x, y, feature_columns, behavior_feature_list = get_xy_fd(True)
45 | 
46 |     model = DIN(feature_columns, behavior_feature_list, dnn_hidden_units=[4, 4, 4],
47 |                 dnn_dropout=0.5)
48 |     # todo test dice
49 | 
50 |     check_model(model, model_name, x, y)
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     pass
55 | 


--------------------------------------------------------------------------------
/tests/models/DSIN_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
 5 | from deepctr.models.sequence.dsin import DSIN
 6 | from ..utils import check_model
 7 | 
 8 | 
 9 | def get_xy_fd(hash_flag=False):
10 |     feature_columns = [SparseFeat('user', 3, use_hash=hash_flag),
11 |                        SparseFeat('gender', 2, use_hash=hash_flag),
12 |                        SparseFeat('item', 3 + 1, use_hash=hash_flag),
13 |                        SparseFeat('item_gender', 2 + 1, use_hash=hash_flag),
14 |                        DenseFeat('score', 1)]
15 |     feature_columns += [
16 |         VarLenSparseFeat(SparseFeat('sess_0_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
17 |                          maxlen=4), VarLenSparseFeat(
18 |             SparseFeat('sess_0_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'),
19 |             maxlen=4)]
20 |     feature_columns += [
21 |         VarLenSparseFeat(SparseFeat('sess_1_item', 3 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item'),
22 |                          maxlen=4), VarLenSparseFeat(
23 |             SparseFeat('sess_1_item_gender', 2 + 1, embedding_dim=4, use_hash=hash_flag, embedding_name='item_gender'),
24 |             maxlen=4)]
25 | 
26 |     behavior_feature_list = ["item", "item_gender"]
27 |     uid = np.array([0, 1, 2])
28 |     ugender = np.array([0, 1, 0])
29 |     iid = np.array([1, 2, 3])  # 0 is mask value
30 |     igender = np.array([1, 2, 1])  # 0 is mask value
31 |     score = np.array([0.1, 0.2, 0.3])
32 | 
33 |     sess1_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [0, 0, 0, 0]])
34 |     sess1_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [0, 0, 0, 0]])
35 | 
36 |     sess2_iid = np.array([[1, 2, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
37 |     sess2_igender = np.array([[1, 1, 2, 0], [0, 0, 0, 0], [0, 0, 0, 0]])
38 | 
39 |     sess_number = np.array([2, 1, 0])
40 | 
41 |     feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
42 |                     'sess_0_item': sess1_iid, 'sess_0_item_gender': sess1_igender, 'score': score,
43 |                     'sess_1_item': sess2_iid, 'sess_1_item_gender': sess2_igender, }
44 | 
45 |     x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
46 |     x["sess_length"] = sess_number
47 | 
48 |     y = np.array([1, 0, 1])
49 |     return x, y, feature_columns, behavior_feature_list
50 | 
51 | 
52 | @pytest.mark.parametrize(
53 |     'bias_encoding',
54 |     [True, False]
55 | )
56 | def test_DSIN(bias_encoding):
57 |     model_name = "DSIN"
58 | 
59 |     x, y, feature_columns, behavior_feature_list = get_xy_fd(True)
60 | 
61 |     model = DSIN(feature_columns, behavior_feature_list, sess_max_count=2, bias_encoding=bias_encoding,
62 |                  dnn_hidden_units=[4, 4], dnn_dropout=0.5, )
63 |     check_model(model, model_name, x, y)
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     pass
68 | 


--------------------------------------------------------------------------------
/tests/models/DeepFEFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import DeepFEFMEstimator
 5 | from deepctr.models import DeepFEFM
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'hidden_size,sparse_feature_num,use_fefm,use_linear,use_fefm_embed_in_dnn',
12 |     [((2,), 1, True, True, True),
13 |      ((2,), 1, True, True, False),
14 |      ((2,), 1, True, False, True),
15 |      ((2,), 1, False, True, True),
16 |      ((2,), 1, True, False, False),
17 |      ((2,), 1, False, True, False),
18 |      ((2,), 1, False, False, True),
19 |      ((2,), 1, False, False, False),
20 |      ((), 1, True, True, True)
21 |      ]
22 | )
23 | def test_DeepFEFM(hidden_size, sparse_feature_num, use_fefm, use_linear, use_fefm_embed_in_dnn):
24 |     if tf.__version__ == "1.15.0" or tf.__version__ == "1.4.0":  # slow in tf 1.15
25 |         return
26 |     model_name = "DeepFEFM"
27 |     sample_size = SAMPLE_SIZE
28 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
29 |                                           dense_feature_num=sparse_feature_num)
30 |     model = DeepFEFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5,
31 |                      use_linear=use_linear, use_fefm=use_fefm, use_fefm_embed_in_dnn=use_fefm_embed_in_dnn)
32 | 
33 |     check_model(model, model_name, x, y)
34 | 
35 | 
36 | @pytest.mark.parametrize(
37 |     'hidden_size,sparse_feature_num',
38 |     [((2,), 2),
39 |      ((), 2),
40 |      ]
41 | )
42 | def test_DeepFEFMEstimator(hidden_size, sparse_feature_num):
43 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
44 |         return
45 |     sample_size = SAMPLE_SIZE
46 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
47 |                                                                                     sparse_feature_num=sparse_feature_num,
48 |                                                                                     dense_feature_num=sparse_feature_num)
49 | 
50 |     model = DeepFEFMEstimator(linear_feature_columns, dnn_feature_columns,
51 |                               dnn_hidden_units=hidden_size, dnn_dropout=0.5)
52 | 
53 |     check_estimator(model, input_fn)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     pass
58 | 


--------------------------------------------------------------------------------
/tests/models/DeepFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import DeepFMEstimator
 5 | from deepctr.models import DeepFM
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'hidden_size,sparse_feature_num',
12 |     [((2,), 1),  #
13 |      ((3,), 2)
14 |      ]  # (True, (32,), 3), (False, (32,), 1)
15 | )
16 | def test_DeepFM(hidden_size, sparse_feature_num):
17 |     model_name = "DeepFM"
18 |     sample_size = SAMPLE_SIZE
19 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
20 |                                           dense_feature_num=sparse_feature_num)
21 | 
22 |     model = DeepFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
23 | 
24 |     check_model(model, model_name, x, y)
25 | 
26 | 
27 | @pytest.mark.parametrize(
28 |     'hidden_size,sparse_feature_num',
29 |     [
30 |         ((3,), 2)
31 |     ]  # (True, (32,), 3), (False, (32,), 1)
32 | )
33 | def test_DeepFMEstimator(hidden_size, sparse_feature_num):
34 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
35 |         return
36 |     sample_size = SAMPLE_SIZE
37 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
38 |                                                                                     sparse_feature_num=sparse_feature_num,
39 |                                                                                     dense_feature_num=sparse_feature_num,
40 |                                                                                     classification=False)
41 | 
42 |     model = DeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5,
43 |                             task="regression")
44 | 
45 |     check_estimator(model, input_fn)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     pass
50 | 


--------------------------------------------------------------------------------
/tests/models/FGCNN_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepctr.models import FGCNN
 4 | from tests.utils import check_model, get_test_data, SAMPLE_SIZE
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'sparse_feature_num,dense_feature_num',
 9 |     [(1, 1), (3, 3)
10 |      ]
11 | )
12 | def test_FGCNN(sparse_feature_num, dense_feature_num):
13 |     model_name = "FGCNN"
14 | 
15 |     sample_size = SAMPLE_SIZE
16 |     x, y, feature_columns = get_test_data(sample_size, embedding_size=8, sparse_feature_num=sparse_feature_num,
17 |                                           dense_feature_num=dense_feature_num)
18 | 
19 |     model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(3, 2), conv_filters=(2, 1), new_maps=(
20 |         2, 2), pooling_width=(2, 2), dnn_hidden_units=(32,), dnn_dropout=0.5, )
21 |     # TODO: add model_io check
22 |     check_model(model, model_name, x, y, check_model_io=False)
23 | 
24 | 
25 | # @pytest.mark.parametrize(
26 | #     'sparse_feature_num,dense_feature_num',
27 | #     [(2, 1),
28 | #      ]
29 | # )
30 | # def test_FGCNN_without_seq(sparse_feature_num, dense_feature_num):
31 | #     model_name = "FGCNN_noseq"
32 | #
33 | #     sample_size = SAMPLE_SIZE
34 | #     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
35 | #                                           dense_feature_num=dense_feature_num, sequence_feature=())
36 | #
37 | #     model = FGCNN(feature_columns, feature_columns, conv_kernel_width=(), conv_filters=(
38 | #     ), new_maps=(), pooling_width=(), dnn_hidden_units=(32,), dnn_dropout=0.5, )
39 | #     # TODO: add model_io check
40 | #     check_model(model, model_name, x, y, check_model_io=False)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     pass
45 | 


--------------------------------------------------------------------------------
/tests/models/FLEN_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepctr.models import FLEN
 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'hidden_size,sparse_feature_num',
 9 |     [
10 |         ((3,), 6)
11 |     ]  # (True, (32,), 3), (False, (32,), 1)
12 | )
13 | def test_DeepFM(hidden_size, sparse_feature_num):
14 |     model_name = "FLEN"
15 |     sample_size = SAMPLE_SIZE
16 |     x, y, feature_columns = get_test_data(sample_size, embedding_size=2, sparse_feature_num=sparse_feature_num,
17 |                                           dense_feature_num=sparse_feature_num, use_group=True)
18 | 
19 |     model = FLEN(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
20 | 
21 |     check_model(model, model_name, x, y)
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     pass
26 | 


--------------------------------------------------------------------------------
/tests/models/FNN_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import FNNEstimator
 5 | from deepctr.models import FNN
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'sparse_feature_num,dense_feature_num',
12 |     [(1, 1), (3, 3)
13 |      ]
14 | )
15 | def test_FNN(sparse_feature_num, dense_feature_num):
16 |     if tf.__version__ >= "2.0.0":
17 |         return
18 |     model_name = "FNN"
19 | 
20 |     sample_size = SAMPLE_SIZE
21 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
22 |                                           dense_feature_num=dense_feature_num)
23 | 
24 |     model = FNN(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
25 |     check_model(model, model_name, x, y)
26 | 
27 | 
28 | # @pytest.mark.parametrize(
29 | #     'sparse_feature_num,dense_feature_num',
30 | #     [(0, 1), (1, 0)
31 | #      ]
32 | # )
33 | # def test_FNN_without_seq(sparse_feature_num, dense_feature_num):
34 | #     model_name = "FNN"
35 | #
36 | #     sample_size = SAMPLE_SIZE
37 | #     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, dense_feature_num, sequence_feature=())
38 | #
39 | #     model = FNN(feature_columns,feature_columns, dnn_hidden_units=[32, 32], dnn_dropout=0.5)
40 | #     check_model(model, model_name, x, y)
41 | 
42 | @pytest.mark.parametrize(
43 |     'sparse_feature_num,dense_feature_num',
44 |     [(2, 2),
45 |      ]
46 | )
47 | def test_FNNEstimator(sparse_feature_num, dense_feature_num):
48 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
49 |         return
50 |     sample_size = SAMPLE_SIZE
51 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
52 |                                                                                     sparse_feature_num=sparse_feature_num,
53 |                                                                                     dense_feature_num=dense_feature_num)
54 | 
55 |     model = FNNEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
56 | 
57 |     check_estimator(model, input_fn)
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     pass
62 | 


--------------------------------------------------------------------------------
/tests/models/FiBiNET_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import FiBiNETEstimator
 5 | from deepctr.models import FiBiNET
 6 | from ..utils import check_model, SAMPLE_SIZE, get_test_data, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'bilinear_type',
12 |     ["each",
13 |      "all", "interaction"]
14 | )
15 | def test_FiBiNET(bilinear_type):
16 |     model_name = "FiBiNET"
17 |     sample_size = SAMPLE_SIZE
18 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=2, dense_feature_num=2)
19 | 
20 |     model = FiBiNET(feature_columns, feature_columns, bilinear_type=bilinear_type, dnn_hidden_units=[4, ],
21 |                     dnn_dropout=0.5, )
22 |     check_model(model, model_name, x, y)
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     'bilinear_type',
27 |     ["interaction"]
28 | )
29 | def test_FiBiNETEstimator(bilinear_type):
30 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
31 |         return
32 |     sample_size = SAMPLE_SIZE
33 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num=2,
34 |                                                                                     dense_feature_num=2)
35 | 
36 |     model = FiBiNETEstimator(linear_feature_columns, dnn_feature_columns, bilinear_type=bilinear_type,
37 |                              dnn_hidden_units=[4, ], dnn_dropout=0.5, )
38 | 
39 |     check_estimator(model, input_fn)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     pass
44 | 


--------------------------------------------------------------------------------
/tests/models/FwFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import FwFMEstimator
 5 | from deepctr.models import FwFM
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'hidden_size,sparse_feature_num',
12 |     [((2,), 1),
13 |      ((), 1),
14 |      ]
15 | )
16 | def test_FwFM(hidden_size, sparse_feature_num):
17 |     model_name = "FwFM"
18 |     sample_size = SAMPLE_SIZE
19 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
20 |                                           dense_feature_num=sparse_feature_num)
21 |     model = FwFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
22 | 
23 |     check_model(model, model_name, x, y)
24 | 
25 | 
26 | @pytest.mark.parametrize(
27 |     'hidden_size,sparse_feature_num',
28 |     [((2,), 2),
29 |      ]
30 | )
31 | def test_FwFMEstimator(hidden_size, sparse_feature_num):
32 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
33 |         return
34 |     sample_size = SAMPLE_SIZE
35 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
36 |                                                                                     sparse_feature_num=sparse_feature_num,
37 |                                                                                     dense_feature_num=sparse_feature_num)
38 | 
39 |     model = FwFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
40 | 
41 |     check_estimator(model, input_fn)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     pass
46 | 


--------------------------------------------------------------------------------
/tests/models/IFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepctr.models import IFM
 4 | from ..utils import check_model, get_test_data, SAMPLE_SIZE
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'hidden_size,sparse_feature_num',
 9 |     [((2,), 1),
10 |      ((3,), 2)
11 |      ]
12 | )
13 | def test_IFM(hidden_size, sparse_feature_num):
14 |     model_name = "IFM"
15 |     sample_size = SAMPLE_SIZE
16 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
17 |                                           dense_feature_num=sparse_feature_num)
18 | 
19 |     model = IFM(feature_columns, feature_columns, dnn_hidden_units=hidden_size, dnn_dropout=0.5)
20 |     check_model(model, model_name, x, y)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     pass
25 | 


--------------------------------------------------------------------------------
/tests/models/MLR_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from deepctr.models import MLR
 4 | from ..utils import check_model, SAMPLE_SIZE, get_test_data
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 | 
 9 |     'region_sparse,region_dense,base_sparse,base_dense,bias_sparse,bias_dense',
10 | 
11 |     [(0, 2, 0, 2, 0, 1), (0, 2, 0, 1, 0, 2), (0, 2, 0, 0, 1, 0),
12 |      #     (0, 1, 1, 2, 1, 1,), (0, 1, 1, 1, 1, 2), (0, 1, 1, 0, 2, 0),
13 |      #     (1, 0, 2, 2, 2, 1), (2, 0, 2, 1, 2, 2), (2, 0, 2, 0, 0, 0)
14 |      ]
15 | 
16 | )
17 | def test_MLRs(region_sparse, region_dense, base_sparse, base_dense, bias_sparse, bias_dense):
18 |     model_name = "MLRs"
19 |     _, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse,
20 |                                                  dense_feature_num=region_dense, prefix='region')
21 |     base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse,
22 |                                                     dense_feature_num=region_dense, prefix='base')
23 |     bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=region_sparse,
24 |                                                     dense_feature_num=region_dense, prefix='bias')
25 | 
26 |     model = MLR(region_feature_columns, base_feature_columns, bias_feature_columns=bias_feature_columns)
27 |     model.compile('adam', 'binary_crossentropy',
28 |                   metrics=['binary_crossentropy'])
29 |     print(model_name + " test pass!")
30 | 
31 | 
32 | def test_MLR():
33 |     model_name = "MLR"
34 |     region_x, y, region_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3,
35 |                                                         prefix='region')
36 |     base_x, y, base_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3,
37 |                                                     prefix='base')
38 |     bias_x, y, bias_feature_columns = get_test_data(SAMPLE_SIZE, sparse_feature_num=3, dense_feature_num=3,
39 |                                                     prefix='bias')
40 | 
41 |     model = MLR(region_feature_columns)
42 |     model.compile('adam', 'binary_crossentropy',
43 |                   metrics=['binary_crossentropy'])
44 | 
45 |     check_model(model, model_name, region_x, y)
46 |     print(model_name + " test pass!")
47 | 
48 | 
49 | if __name__ == "__main__":
50 |     pass
51 | 


--------------------------------------------------------------------------------
/tests/models/MTL_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.models.multitask import SharedBottom, ESMM, MMOE, PLE
 5 | from ..utils_mtl import get_mtl_test_data, check_mtl_model
 6 | 
 7 | 
 8 | def test_SharedBottom():
 9 |     if tf.__version__ == "1.15.0":  # slow in tf 1.15
10 |         return
11 |     model_name = "SharedBottom"
12 |     x, y_list, dnn_feature_columns = get_mtl_test_data()
13 | 
14 |     model = SharedBottom(dnn_feature_columns, bottom_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,),
15 |                          task_types=['binary', 'binary'], task_names=['label_income', 'label_marital'])
16 |     check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
17 | 
18 | 
19 | def test_ESMM():
20 |     if tf.__version__ == "1.15.0":  # slow in tf 1.15
21 |         return
22 |     model_name = "ESMM"
23 |     x, y_list, dnn_feature_columns = get_mtl_test_data()
24 | 
25 |     model = ESMM(dnn_feature_columns, tower_dnn_hidden_units=(8,), task_types=['binary', 'binary'],
26 |                  task_names=['label_marital', 'label_income'])
27 |     check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
28 | 
29 | 
30 | def test_MMOE():
31 |     if tf.__version__ == "1.15.0":  # slow in tf 1.15
32 |         return
33 |     model_name = "MMOE"
34 |     x, y_list, dnn_feature_columns = get_mtl_test_data()
35 | 
36 |     model = MMOE(dnn_feature_columns, num_experts=3, expert_dnn_hidden_units=(8,),
37 |                  tower_dnn_hidden_units=(8,),
38 |                  gate_dnn_hidden_units=(), task_types=['binary', 'binary'],
39 |                  task_names=['income', 'marital'])
40 |     check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
41 | 
42 | 
43 | @pytest.mark.parametrize(
44 |     'num_levels,gate_dnn_hidden_units',
45 |     [(2, ()),
46 |      (1, (4,))]
47 | )
48 | def test_PLE(num_levels, gate_dnn_hidden_units):
49 |     if tf.__version__ == "1.15.0":  # slow in tf 1.15
50 |         return
51 |     model_name = "PLE"
52 |     x, y_list, dnn_feature_columns = get_mtl_test_data()
53 | 
54 |     model = PLE(dnn_feature_columns, num_levels=num_levels, expert_dnn_hidden_units=(8,), tower_dnn_hidden_units=(8,),
55 |                 gate_dnn_hidden_units=gate_dnn_hidden_units,
56 |                 task_types=['binary', 'binary'], task_names=['income', 'marital'])
57 |     check_mtl_model(model, model_name, x, y_list, task_types=['binary', 'binary'])
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     pass
62 | 


--------------------------------------------------------------------------------
/tests/models/NFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import NFMEstimator
 5 | from deepctr.models import NFM
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'hidden_size,sparse_feature_num',
12 |     [((8,), 1), ((8, 8,), 2)]
13 | )
14 | def test_NFM(hidden_size, sparse_feature_num):
15 |     model_name = "NFM"
16 | 
17 |     sample_size = SAMPLE_SIZE
18 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
19 |                                           dense_feature_num=sparse_feature_num)
20 | 
21 |     model = NFM(feature_columns, feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
22 |     check_model(model, model_name, x, y)
23 | 
24 | 
25 | @pytest.mark.parametrize(
26 |     'hidden_size,sparse_feature_num',
27 |     [((8,), 1), ((8, 8,), 2)]
28 | )
29 | def test_FNNEstimator(hidden_size, sparse_feature_num):
30 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
31 |         return
32 |     sample_size = SAMPLE_SIZE
33 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
34 |                                                                                     sparse_feature_num=sparse_feature_num,
35 |                                                                                     dense_feature_num=sparse_feature_num)
36 | 
37 |     model = NFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=[8, 8], dnn_dropout=0.5)
38 | 
39 |     check_estimator(model, input_fn)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     pass
44 | 


--------------------------------------------------------------------------------
/tests/models/ONN_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | from packaging import version
 4 | 
 5 | from deepctr.models import ONN
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     'sparse_feature_num',
11 |     [2]
12 | )
13 | def test_ONN(sparse_feature_num):
14 |     if version.parse(tf.__version__) >= version.parse('2.0.0'):
15 |         return
16 |     model_name = "ONN"
17 | 
18 |     sample_size = SAMPLE_SIZE
19 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
20 |                                           dense_feature_num=sparse_feature_num,
21 |                                           sequence_feature=('sum', 'mean', 'max',), hash_flag=True)
22 | 
23 |     model = ONN(feature_columns, feature_columns,
24 |                 dnn_hidden_units=[4, 4], dnn_dropout=0.5)
25 |     check_model(model, model_name, x, y)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     pass
30 | 


--------------------------------------------------------------------------------
/tests/models/PNN_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import PNNEstimator
 5 | from deepctr.models import PNN
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'use_inner, use_outter,sparse_feature_num',
12 |     [(True, True, 3), (False, False, 1)
13 |      ]
14 | )
15 | def test_PNN(use_inner, use_outter, sparse_feature_num):
16 |     model_name = "PNN"
17 |     sample_size = SAMPLE_SIZE
18 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
19 |                                           dense_feature_num=sparse_feature_num)
20 |     model = PNN(feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner, use_outter=use_outter)
21 |     check_model(model, model_name, x, y)
22 | 
23 | 
24 | @pytest.mark.parametrize(
25 |     'use_inner, use_outter,sparse_feature_num',
26 |     [(True, True, 2)
27 |      ]
28 | )
29 | def test_PNNEstimator(use_inner, use_outter, sparse_feature_num):
30 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
31 |         return
32 |     sample_size = SAMPLE_SIZE
33 |     _, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
34 |                                                                                     sparse_feature_num=sparse_feature_num,
35 |                                                                                     dense_feature_num=sparse_feature_num)
36 | 
37 |     model = PNNEstimator(dnn_feature_columns, dnn_hidden_units=[4, 4], dnn_dropout=0.5, use_inner=use_inner,
38 |                          use_outter=use_outter)
39 | 
40 |     check_estimator(model, input_fn)
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     pass
45 | 


--------------------------------------------------------------------------------
/tests/models/WDL_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | from packaging import version
 4 | 
 5 | from deepctr.estimator import WDLEstimator
 6 | from deepctr.models import WDL
 7 | from ..utils import check_model, check_estimator, SAMPLE_SIZE, get_test_data, get_test_data_estimator, \
 8 |     Estimator_TEST_TF1
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     'sparse_feature_num,dense_feature_num',
13 |     [(2, 0), (0, 2)  # ,(2, 2)
14 |      ]
15 | )
16 | def test_WDL(sparse_feature_num, dense_feature_num):
17 |     if version.parse(tf.__version__) >= version.parse('2.0.0'):
18 |         return
19 |     model_name = "WDL"
20 |     sample_size = SAMPLE_SIZE
21 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
22 |                                           dense_feature_num=dense_feature_num, hash_flag=True)
23 | 
24 |     model = WDL(feature_columns, feature_columns,
25 |                 dnn_hidden_units=[4, 4], dnn_dropout=0.5)
26 |     check_model(model, model_name, x, y)
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 |     'sparse_feature_num,dense_feature_num',
31 |     [(2, 1),  # (0, 2)#,(2, 2)
32 |      ]
33 | )
34 | def test_WDLEstimator(sparse_feature_num, dense_feature_num):
35 |     if not Estimator_TEST_TF1 and version.parse(tf.__version__) < version.parse('2.2.0'):
36 |         return
37 |     sample_size = SAMPLE_SIZE
38 | 
39 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size, sparse_feature_num,
40 |                                                                                     dense_feature_num)
41 |     model = WDLEstimator(linear_feature_columns, dnn_feature_columns,
42 |                          dnn_hidden_units=[4, 4], dnn_dropout=0.5)
43 |     check_estimator(model, input_fn)
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     pass
48 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bytedance/LargeBatchCTR/12101b44d8e753a6d8d954437287f96d68895d49/tests/models/__init__.py


--------------------------------------------------------------------------------
/tests/models/xDeepFM_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import tensorflow as tf
 3 | 
 4 | from deepctr.estimator import xDeepFMEstimator
 5 | from deepctr.models import xDeepFM
 6 | from ..utils import check_model, get_test_data, SAMPLE_SIZE, get_test_data_estimator, check_estimator, \
 7 |     Estimator_TEST_TF1
 8 | 
 9 | 
10 | @pytest.mark.parametrize(
11 |     'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim',
12 |     [  # ((), (), True, 'linear', 1, 2),
13 |         ((8,), (), True, 'linear', 1, 1),
14 |         ((), (8,), True, 'linear', 2, 2),
15 |         ((8,), (8,), False, 'relu', 1, 0)
16 |     ]
17 | )
18 | def test_xDeepFM(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num,
19 |                  dense_feature_dim):
20 |     model_name = "xDeepFM"
21 | 
22 |     sample_size = SAMPLE_SIZE
23 |     x, y, feature_columns = get_test_data(sample_size, sparse_feature_num=sparse_feature_num,
24 |                                           dense_feature_num=sparse_feature_num)
25 | 
26 |     model = xDeepFM(feature_columns, feature_columns, dnn_hidden_units=dnn_hidden_units, cin_layer_size=cin_layer_size,
27 |                     cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5)
28 |     check_model(model, model_name, x, y)
29 | 
30 | 
31 | # @pytest.mark.parametrize(
32 | #     'hidden_size,cin_layer_size,',
33 | #     [((8,), (3, 8)),
34 | #      ]
35 | # )
36 | # def test_xDeepFM_invalid(hidden_size, cin_layer_size):
37 | #     feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5,
38 | #                                    'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']}
39 | #     with pytest.raises(ValueError):
40 | #         _ = xDeepFM(feature_dim_dict, None, dnn_hidden_units=hidden_size, cin_layer_size=cin_layer_size)
41 | @pytest.mark.parametrize(
42 |     'dnn_hidden_units,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim',
43 |     [  # ((), (), True, 'linear', 1, 2),
44 |         ((8,), (8,), False, 'relu', 2, 1)
45 |     ]
46 | )
47 | def test_xDeepFMEstimator(dnn_hidden_units, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num,
48 |                           dense_feature_dim):
49 |     if not Estimator_TEST_TF1 and tf.__version__ < "2.2.0":
50 |         return
51 |     sample_size = SAMPLE_SIZE
52 |     linear_feature_columns, dnn_feature_columns, input_fn = get_test_data_estimator(sample_size,
53 |                                                                                     sparse_feature_num=sparse_feature_num,
54 |                                                                                     dense_feature_num=sparse_feature_num)
55 | 
56 |     model = xDeepFMEstimator(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=dnn_hidden_units,
57 |                              cin_layer_size=cin_layer_size,
58 |                              cin_split_half=cin_split_half, cin_activation=cin_activation, dnn_dropout=0.5)
59 | 
60 |     check_estimator(model, input_fn)
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     pass
65 | 


--------------------------------------------------------------------------------
/tests/utils_mtl.py:
--------------------------------------------------------------------------------
 1 | # test utils for multi task learning
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from tensorflow.python.keras.models import load_model, save_model
 8 | 
 9 | from deepctr.feature_column import SparseFeat, DenseFeat, DEFAULT_GROUP_NAME
10 | from deepctr.layers import custom_objects
11 | 
12 | 
13 | def get_mtl_test_data(sample_size=10, embedding_size=4, sparse_feature_num=1,
14 |                       dense_feature_num=1, task_types=('binary', 'binary'),
15 |                       hash_flag=False, prefix='', use_group=False):
16 |     feature_columns = []
17 |     model_input = {}
18 | 
19 |     for i in range(sparse_feature_num):
20 |         if use_group:
21 |             group_name = str(i % 3)
22 |         else:
23 |             group_name = DEFAULT_GROUP_NAME
24 |         dim = np.random.randint(1, 10)
25 |         feature_columns.append(
26 |             SparseFeat(prefix + 'sparse_feature_' + str(i), dim, embedding_size, use_hash=hash_flag, dtype=tf.int32,
27 |                        group_name=group_name))
28 | 
29 |     for i in range(dense_feature_num):
30 |         def transform_fn(x): return (x - 0.0) / 1.0
31 | 
32 |         feature_columns.append(
33 |             DenseFeat(
34 |                 prefix + 'dense_feature_' + str(i),
35 |                 1,
36 |                 dtype=tf.float32,
37 |                 transform_fn=transform_fn
38 |             )
39 |         )
40 | 
41 |     for fc in feature_columns:
42 |         if isinstance(fc, SparseFeat):
43 |             model_input[fc.name] = np.random.randint(0, fc.vocabulary_size, sample_size)
44 |         elif isinstance(fc, DenseFeat):
45 |             model_input[fc.name] = np.random.random(sample_size)
46 |     y_list = []  # multi label
47 |     for task in task_types:
48 |         if task == 'binary':
49 |             y = np.random.randint(0, 2, sample_size)
50 |             y_list.append(y)
51 |         else:
52 |             y = np.random.random(sample_size)
53 |             y_list.append(y)
54 | 
55 |     return model_input, y_list, feature_columns
56 | 
57 | 
58 | def check_mtl_model(model, model_name, x, y_list, task_types, check_model_io=True):
59 |     """
60 |     compile model,train and evaluate it,then save/load weight and model file.
61 |     :param model:
62 |     :param model_name:
63 |     :param x:
64 |     :param y_list: mutil label of y
65 |     :param check_model_io: test save/load model file or not
66 |     :return:
67 |     """
68 |     loss_list = []
69 |     metric_list = []
70 |     for task_type in task_types:
71 |         if task_type == 'binary':
72 |             loss_list.append('binary_crossentropy')
73 |             # metric_list.append('accuracy')
74 |         elif task_type == 'regression':
75 |             loss_list.append('mean_squared_error')
76 |             # metric_list.append('mae')
77 |     print('loss:', loss_list)
78 |     print('metric:', metric_list)
79 |     model.compile('adam', loss=loss_list, metrics=metric_list)
80 |     model.fit(x, y_list, batch_size=100, epochs=1, validation_split=0.5)
81 | 
82 |     print(model_name + " test train valid pass!")
83 |     model.save_weights(model_name + '_weights.h5')
84 |     model.load_weights(model_name + '_weights.h5')
85 |     os.remove(model_name + '_weights.h5')
86 |     print(model_name + " test save load weight pass!")
87 |     if check_model_io:
88 |         save_model(model, model_name + '.h5')
89 |         model = load_model(model_name + '.h5', custom_objects)
90 |         os.remove(model_name + '.h5')
91 |         print(model_name + " test save load model pass!")
92 | 
93 |     print(model_name + " test pass!")
94 | 


--------------------------------------------------------------------------------
/tests/utils_test.py:
--------------------------------------------------------------------------------
1 | from deepctr.utils import check_version
2 | 
3 | 
4 | def test_check_version():
5 |     check_version('0.1.0')
6 |     check_version(20191231)
7 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | import numpy as np
 6 | import tensorflow as tf
 7 | from sklearn.metrics import roc_auc_score
 8 | 
 9 | 
10 | def print_curtime(note=None):
11 |     now = datetime.datetime.now()
12 |     current_time = now.strftime("%H:%M:%S")
13 |     if note is not None:
14 |         print(f"{note}: {current_time}")
15 |     else:
16 |         print(f"Current time: {current_time}")
17 | 
18 | 
19 | def tf_allow_growth():
20 |     gpus = tf.config.experimental.list_physical_devices("GPU")
21 |     for gpu in gpus:
22 |         tf.config.experimental.set_memory_growth(gpu, True)
23 | 
24 | 
25 | def create_logdir(root="logs/", args=None):
26 |     log_dir = root + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
27 |     Path(log_dir).mkdir(parents=True, exist_ok=True)
28 |     with open(os.path.join(log_dir, "config.txt"), "w") as f:
29 |         print(args, file=f)
30 |     print(f"LOG_DIR: {log_dir}")
31 |     summary_writer = tf.summary.create_file_writer(
32 |         os.path.join(log_dir, "train"))
33 |     summary_writer.set_as_default()
34 |     return log_dir
35 | 
36 | 
37 | def auc_score(y_true, y_pred):
38 |     if len(np.unique(y_true[:, 0])) == 1:
39 |         return 0.5
40 |     else:
41 |         return roc_auc_score(y_true, y_pred)
42 | 
43 | 
44 | def auc(y_true, y_pred):
45 |     return tf.numpy_function(auc_score, (y_true, y_pred), tf.double)
46 | 
47 | 
48 | def num_params(model):
49 |     total_parameters = 0
50 |     embed_parameters = 0
51 |     dense_parameters = 0
52 |     for variable in model.trainable_variables:
53 |         shape = variable.get_shape()
54 |         variable_parameters = 1
55 |         for dim in shape:
56 |             variable_parameters *= dim
57 |         total_parameters += variable_parameters
58 |         if 'embedding' in variable.name:
59 |             embed_parameters += variable_parameters
60 |         else:
61 |             dense_parameters += variable_parameters
62 | 
63 |     print(f"Total Params: {total_parameters}")
64 |     print(f"Dense Params: {dense_parameters}")
65 |     print(f"Embed Params: {embed_parameters}")
66 | 
67 |     return total_parameters
68 | 


--------------------------------------------------------------------------------