├── tests
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── FNN_test.py
    │   ├── NFM_test.py
    │   ├── AFM_test.py
    │   ├── DeepFM_test.py
    │   ├── AutoInt_test.py
    │   ├── PNN_test.py
    │   ├── DCN_test.py
    │   ├── WDL_test.py
    │   ├── xDeepFM_test.py
    │   ├── MLR_test.py
    │   └── DIN_test.py
    ├── README.md
    ├── utils_test.py
    ├── activations_test.py
    ├── sequence_test.py
    ├── layers_test.py
    └── utils.py
├── README.md
├── docs
    ├── requirements.readthedocs.txt
    ├── pics
    │   ├── AFM.png
    │   ├── CIN.png
    │   ├── DCN.png
    │   ├── DIN.png
    │   ├── FNN.png
    │   ├── MLR.png
    │   ├── NFM.png
    │   ├── PNN.png
    │   ├── WDL.png
    │   ├── fms.png
    │   ├── mlr1.png
    │   ├── AutoInt.png
    │   ├── DeepFM.png
    │   ├── xDeepFM.png
    │   ├── mlrvsdnn.png
    │   ├── criteo_sample.png
    │   ├── InteractingLayer.png
    │   └── movielens_sample.png
    ├── source
    │   ├── modules.rst
    │   ├── deepctr.utils.rst
    │   ├── deepctr.layers.rst
    │   ├── deepctr.sequence.rst
    │   ├── deepctr.models.afm.rst
    │   ├── deepctr.models.dcn.rst
    │   ├── deepctr.models.din.rst
    │   ├── deepctr.models.fnn.rst
    │   ├── deepctr.models.mlr.rst
    │   ├── deepctr.models.nfm.rst
    │   ├── deepctr.models.pnn.rst
    │   ├── deepctr.models.wdl.rst
    │   ├── deepctr.activations.rst
    │   ├── deepctr.models.deepfm.rst
    │   ├── deepctr.models.autoint.rst
    │   ├── deepctr.models.xdeepfm.rst
    │   ├── Models-API.rst
    │   ├── deepctr.rst
    │   ├── deepctr.models.rst
    │   ├── History.md
    │   ├── index.rst
    │   ├── FAQ.rst
    │   ├── Quick-Start.rst
    │   ├── Demo.rst
    │   ├── conf.py
    │   └── Features.rst
    ├── Makefile
    └── make.bat
├── deepRS
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── fnn.py
    │   ├── wdl.py
    │   ├── nfm.py
    │   ├── afm.py
    │   ├── pnn.py
    │   ├── deepfm.py
    │   ├── xdeepfm.py
    │   ├── autoint.py
    │   ├── dcn.py
    │   ├── din.py
    │   └── mlr.py
    ├── activations.py
    ├── utils.py
    └── sequence.py
├── .gitattributes
├── setup.cfg
├── LICENSE
├── examples
    ├── run_regression_movielens.py
    ├── run_classification_criteo.py
    ├── run_din.py
    └── movielens_sample.txt
├── .travis.yml
├── setup.py
└── .gitignore


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepRS
2 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/requirements.readthedocs.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.4.0


--------------------------------------------------------------------------------
/docs/pics/AFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/AFM.png


--------------------------------------------------------------------------------
/docs/pics/CIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/CIN.png


--------------------------------------------------------------------------------
/docs/pics/DCN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/DCN.png


--------------------------------------------------------------------------------
/docs/pics/DIN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/DIN.png


--------------------------------------------------------------------------------
/docs/pics/FNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/FNN.png


--------------------------------------------------------------------------------
/docs/pics/MLR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/MLR.png


--------------------------------------------------------------------------------
/docs/pics/NFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/NFM.png


--------------------------------------------------------------------------------
/docs/pics/PNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/PNN.png


--------------------------------------------------------------------------------
/docs/pics/WDL.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/WDL.png


--------------------------------------------------------------------------------
/docs/pics/fms.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/fms.png


--------------------------------------------------------------------------------
/docs/pics/mlr1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/mlr1.png


--------------------------------------------------------------------------------
/docs/pics/AutoInt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/AutoInt.png


--------------------------------------------------------------------------------
/docs/pics/DeepFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/DeepFM.png


--------------------------------------------------------------------------------
/docs/pics/xDeepFM.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/xDeepFM.png


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # Test Log
2 | 
3 | When use Dice ,`save model` of `DIN` will cause errror. 


--------------------------------------------------------------------------------
/docs/pics/mlrvsdnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/mlrvsdnn.png


--------------------------------------------------------------------------------
/docs/pics/criteo_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/criteo_sample.png


--------------------------------------------------------------------------------
/docs/pics/InteractingLayer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/InteractingLayer.png


--------------------------------------------------------------------------------
/docs/pics/movielens_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/movielens_sample.png


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | deepctr
2 | =======
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    deepctr
8 | 


--------------------------------------------------------------------------------
/tests/utils_test.py:
--------------------------------------------------------------------------------
1 | from deepctr.utils import check_version
2 | 
3 | 
4 | def test_check_version():
5 |     check_version('0.1.0')
6 |     check_version(124214)
7 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.utils.rst:
--------------------------------------------------------------------------------
1 | deepctr.utils module
2 | ====================
3 | 
4 | .. automodule:: deepctr.utils
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.layers.rst:
--------------------------------------------------------------------------------
1 | deepctr.layers module
2 | =====================
3 | 
4 | .. automodule:: deepctr.layers
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.sequence.rst:
--------------------------------------------------------------------------------
1 | deepctr.sequence module
2 | =======================
3 | 
4 | .. automodule:: deepctr.sequence
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.afm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.afm module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.afm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.dcn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.dcn module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.dcn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.din.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.din module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.din
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.fnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.fnn module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.fnn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.mlr.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.mlr module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.mlr
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.nfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.nfm module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.nfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.pnn.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.pnn module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.pnn
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.wdl.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.wdl module
2 | =========================
3 | 
4 | .. automodule:: deepctr.models.wdl
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/deepRS/__init__.py:
--------------------------------------------------------------------------------
1 | from .import activations
2 | from .import layers
3 | from .import sequence
4 | from . import models
5 | from .utils import check_version
6 | __version__ = '0.2.1'
7 | check_version(__version__)
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.activations.rst:
--------------------------------------------------------------------------------
1 | deepctr.activations module
2 | ==========================
3 | 
4 | .. automodule:: deepctr.activations
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.deepfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.deepfm module
2 | ============================
3 | 
4 | .. automodule:: deepctr.models.deepfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.autoint.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.autoint module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.autoint
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.xdeepfm.rst:
--------------------------------------------------------------------------------
1 | deepctr.models.xdeepfm module
2 | =============================
3 | 
4 | .. automodule:: deepctr.models.xdeepfm
5 |     :members:
6 |     :no-undoc-members:
7 |     :no-show-inheritance:
8 | 


--------------------------------------------------------------------------------
/tests/activations_test.py:
--------------------------------------------------------------------------------
 1 | from deepctr import activations
 2 | from tensorflow.python.keras.utils import CustomObjectScope
 3 | from .utils import layer_test
 4 | 
 5 | 
 6 | def test_dice():
 7 |     with CustomObjectScope({'Dice': activations.Dice}):
 8 |         layer_test(activations.Dice, kwargs={},
 9 |                    input_shape=(2, 3))
10 | 


--------------------------------------------------------------------------------
/deepRS/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .afm import AFM
 2 | from .dcn import DCN
 3 | from .mlr import MLR
 4 | from .deepfm import DeepFM
 5 | from .nfm import NFM
 6 | from .din import DIN
 7 | from .fnn import FNN
 8 | from .pnn import PNN
 9 | from .wdl import WDL
10 | from .xdeepfm import xDeepFM
11 | from .autoint import AutoInt
12 | 
13 | __all__ = ["AFM", "DCN", "MLR",  "DeepFM",
14 |            "MLR", "NFM", "DIN", "FNN", "PNN", "WDL", "xDeepFM", "AutoInt"]
15 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/docs/source/Models-API.rst:
--------------------------------------------------------------------------------
 1 | DeepCTR Models API
 2 | ======================
 3 | 
 4 | .. toctree::
 5 | 
 6 |    FNN<deepctr.models.fnn>
 7 |    PNN<deepctr.models.pnn>
 8 |    WDL<deepctr.models.wdl>
 9 |    DeepFM<deepctr.models.deepfm>
10 |    MLR<deepctr.models.mlr>
11 |    NFM<deepctr.models.nfm>
12 |    AFM<deepctr.models.afm>
13 |    DCN<deepctr.models.dcn>
14 |    DIN<deepctr.models.din>
15 |    xDeepFM<deepctr.models.xdeepfm>
16 |    AutoInt<deepctr.models.autoint>


--------------------------------------------------------------------------------
/docs/source/deepctr.rst:
--------------------------------------------------------------------------------
 1 | deepctr package
 2 | ===============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     deepctr.models
10 | 
11 | Submodules
12 | ----------
13 | 
14 | .. toctree::
15 | 
16 |    deepctr.activations
17 |    deepctr.layers
18 |    deepctr.sequence
19 |    deepctr.utils
20 | 
21 | Module contents
22 | ---------------
23 | 
24 | .. automodule:: deepctr
25 |     :members:
26 |     :no-undoc-members:
27 |     :no-show-inheritance:
28 | 


--------------------------------------------------------------------------------
/docs/source/deepctr.models.rst:
--------------------------------------------------------------------------------
 1 | deepctr.models package
 2 | ======================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |    deepctr.models.afm
10 |    deepctr.models.autoint
11 |    deepctr.models.dcn
12 |    deepctr.models.deepfm
13 |    deepctr.models.din
14 |    deepctr.models.fnn
15 |    deepctr.models.mlr
16 |    deepctr.models.nfm
17 |    deepctr.models.pnn
18 |    deepctr.models.wdl
19 |    deepctr.models.xdeepfm
20 | 
21 | Module contents
22 | ---------------
23 | 
24 | .. automodule:: deepctr.models
25 |     :members:
26 |     :no-undoc-members:
27 |     :no-show-inheritance:
28 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | desciption-file = README.md
 3 | 
 4 | #[coverage:run]
 5 | #branch = True
 6 | 
 7 | [coverage:report]
 8 | exclude_lines =
 9 |     # Have to re-enable the standard pragma
10 |     pragma: no cover
11 |     # Don't complain about missing debug-only code:
12 |     def __repr__
13 |     if self\.debug
14 | 
15 |     # Don't complain if tests don't hit defensive assertion code:
16 |     raise ValueError
17 |     raise AssertionError
18 |     raise NotImplementedError
19 | 
20 |     # Don't complain if non-runnable code isn't run:
21 |     if 0:
22 |     if False:
23 |     if __name__ == .__main__.:


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = DeepCTR
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/History.md:
--------------------------------------------------------------------------------
1 | # History
2 | - 12/27/2018 : [v0.2.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.1) released.Add [AutoInt](./Features.html#autoint-automatic-feature-interactiont) Model.
3 | - 12/22/2018 : [v0.2.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.0) released.Add [xDeepFM](./Features.html#xdeepfm) and automatic check for new version.
4 | - 12/19/2018 : [v0.1.6](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.6) released.Now DeepCTR is compatible with tensorflow from `1.4-1.12` except for `1.7` and `1.8`. 
5 | - 29/11/2018 : [v0.1.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.4) released.Add [FAQ](./FAQ.html) in docs
6 | - 11/24/2018 : DeepCTR first version v0.1.0  is released on [PyPi](https://pypi.org/project/deepctr/)


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | set SPHINXPROJ=DeepCTR
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018 Weichen Shen 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/examples/run_regression_movielens.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.preprocessing import LabelEncoder
 3 | from deepctr.models import DeepFM
 4 | 
 5 | if __name__ == "__main__":
 6 | 
 7 |     data = pd.read_csv("./movielens_sample.txt")
 8 |     sparse_features = ["movie_id", "user_id",
 9 |                        "gender", "age", "occupation", "zip"]
10 |     target = ['rating']
11 | 
12 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
13 |     for feat in sparse_features:
14 |         lbe = LabelEncoder()
15 |         data[feat] = lbe.fit_transform(data[feat])
16 |     # 2.count #unique features for each sparse field
17 |     sparse_feature_dim = {feat: data[feat].nunique()
18 |                           for feat in sparse_features}
19 |     # 3.generate input data for model
20 |     model_input = [data[feat].values for feat in sparse_feature_dim]
21 |     # 4.Define Model,compile and train
22 |     model = DeepFM({"sparse": sparse_feature_dim, "dense": []},
23 |                    final_activation='linear')
24 | 
25 |     model.compile("adam", "mse", metrics=['mse'],)
26 |     history = model.fit(model_input, data[target].values,
27 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2,)
28 | 
29 |     print("demo done")
30 | 


--------------------------------------------------------------------------------
/tests/models/FNN_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import FNN
 4 | 
 5 | from ..utils import check_model
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'sparse_feature_num',
10 |     [1, 3
11 |      ]
12 | )
13 | def test_FNN(sparse_feature_num):
14 |     model_name = "FNN"
15 | 
16 |     sample_size = 64
17 |     feature_dim_dict = {"sparse": {}, 'dense': []}
18 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
19 |         if name == "sparse":
20 |             for i in range(num):
21 |                 feature_dim_dict[name][name + '_' +
22 |                                        str(i)] = np.random.randint(1, 10)
23 |         else:
24 |             for i in range(num):
25 |                 feature_dim_dict[name].append(name + '_' + str(i))
26 | 
27 |     sparse_input = [np.random.randint(0, dim, sample_size)
28 |                     for dim in feature_dim_dict['sparse'].values()]
29 |     dense_input = [np.random.random(sample_size)
30 |                    for name in feature_dim_dict['dense']]
31 |     y = np.random.randint(0, 2, sample_size)
32 |     x = sparse_input + dense_input
33 | 
34 |     model = FNN(feature_dim_dict,  hidden_size=[32, 32], keep_prob=0.5, )
35 |     check_model(model, model_name, x, y)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     test_FNN(2)
40 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | #sudo: required
 2 | #dist: trusty xenial
 3 | language: python
 4 | 
 5 | python:
 6 |   - "3.4"
 7 |   - "3.5"
 8 |   - "3.6"
 9 | 
10 | env:
11 | - TF_VERSION=1.12.0
12 | - TF_VERSION=1.4.0
13 | #Not Support- TF_VERSION=1.7.0
14 | #Not Support- TF_VERSION=1.7.1
15 | #Not Support- TF_VERSION=1.8.0
16 | - TF_VERSION=1.9.0
17 | - TF_VERSION=1.10.0 #- TF_VERSION=1.10.1
18 | - TF_VERSION=1.11.0
19 | - TF_VERSION=1.5.1 #- TF_VERSION=1.5.0
20 | - TF_VERSION=1.6.0
21 | 
22 | matrix:
23 |   allow_failures:
24 |     - python: "3.4"
25 |     - python: "3.5"
26 |     - env: TF_VERSION=1.5.0 #local is ok,but sometimes CI is failed
27 |     - env: TF_VERSION=1.7.0
28 |     - env: TF_VERSION=1.7.1
29 |     - env: TF_VERSION=1.8.0
30 |   fast_finish: true
31 | 
32 | 
33 | cache: pip
34 | # command to install dependencies
35 | install:
36 |     - pip install -q pytest-cov==2.4.0 #>=2.4.0,<2.6
37 |     - pip install -q python-coveralls
38 |     - pip install -q codacy-coverage
39 |     - pip install -q tensorflow==$TF_VERSION
40 |     - pip install -e .
41 | # command to run tests
42 | script:
43 |     - pytest --cov=deepctr
44 | 
45 | notifications:  
46 |     recipients:
47 |         - wcshen1994@163.com
48 | 
49 |     on_success: change
50 |     on_failure: always
51 | 
52 | after_success:
53 |   - coveralls
54 |   - coverage xml 
55 |   - python-codacy-coverage -r coverage.xml


--------------------------------------------------------------------------------
/tests/sequence_test.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from tensorflow.python.keras.utils import CustomObjectScope
 3 | 
 4 | from deepctr import sequence
 5 | 
 6 | from .utils import layer_test
 7 | 
 8 | BATCH_SIZE = 4
 9 | EMBEDDING_SIZE = 8
10 | SEQ_LENGTH = 10
11 | 
12 | 
13 | @pytest.mark.parametrize(
14 | 
15 |     'weight_normalization',
16 | 
17 |     [weight_normalization
18 |         for
19 |      weight_normalization in [True, False, ]
20 |      ]
21 | 
22 | )
23 | def test_AttentionSequencePoolingLayer(weight_normalization):
24 |     with CustomObjectScope({'AttentionSequencePoolingLayer': sequence.AttentionSequencePoolingLayer}):
25 |         layer_test(sequence.AttentionSequencePoolingLayer, kwargs={'weight_normalization': weight_normalization},
26 |                    input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1)])
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 | 
31 |     'seq_len_max,mode',
32 | 
33 |     [(SEQ_LENGTH, mode)
34 | 
35 |      for mode in ['sum', 'mean', 'max']
36 |      ]
37 | 
38 | )
39 | def test_SequencePoolingLayer(seq_len_max, mode):
40 |     with CustomObjectScope({'SequencePoolingLayer': sequence.SequencePoolingLayer}):
41 |         layer_test(sequence.SequencePoolingLayer, kwargs={'seq_len_max': seq_len_max, 'mode': mode},
42 |                    input_shape=[(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1)])
43 | 


--------------------------------------------------------------------------------
/tests/models/NFM_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import NFM
 4 | from ..utils import check_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'hidden_size,sparse_feature_num',
 9 |     [((8,), 1), ((8, 8,), 2)]
10 | )
11 | def test_NFM(hidden_size, sparse_feature_num):
12 | 
13 |     model_name = "NFM"
14 | 
15 |     sample_size = 64
16 |     feature_dim_dict = {"sparse": {}, 'dense': []}
17 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
18 |         if name == "sparse":
19 |             for i in range(num):
20 |                 feature_dim_dict[name][name + '_' +
21 |                                        str(i)] = np.random.randint(1, 10)
22 |         else:
23 |             for i in range(num):
24 |                 feature_dim_dict[name].append(name + '_' + str(i))
25 |     sparse_input = [np.random.randint(0, dim, sample_size)
26 |                     for dim in feature_dim_dict['sparse'].values()]
27 |     dense_input = [np.random.random(sample_size)
28 |                    for name in feature_dim_dict['dense']]
29 |     y = np.random.randint(0, 2, sample_size)
30 |     x = sparse_input + dense_input
31 | 
32 |     model = NFM(feature_dim_dict, embedding_size=8,
33 |                 hidden_size=[32, 32], keep_prob=0.5, )
34 |     check_model(model, model_name, x, y)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     test_NFM((8, 8), 1)
39 | 


--------------------------------------------------------------------------------
/tests/models/AFM_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import AFM
 4 | from ..utils import check_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'use_attention,sparse_feature_num',
 9 |     [(True, 1), (False, 3)
10 |      ]
11 | )
12 | def test_AFM(use_attention, sparse_feature_num):
13 |     model_name = "AFM"
14 | 
15 |     sample_size = 64
16 |     feature_dim_dict = {"sparse": {}, 'dense': []}
17 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
18 |         if name == "sparse":
19 |             for i in range(num):
20 |                 feature_dim_dict[name][name + '_' +
21 |                                        str(i)] = np.random.randint(1, 10)
22 |         else:
23 |             for i in range(num):
24 |                 feature_dim_dict[name].append(name + '_' + str(i))
25 |     sparse_input = [np.random.randint(0, dim, sample_size)
26 |                     for dim in feature_dim_dict['sparse'].values()]
27 |     dense_input = [np.random.random(sample_size)
28 |                    for name in feature_dim_dict['dense']]
29 |     y = np.random.randint(0, 2, sample_size)
30 |     x = sparse_input + dense_input
31 | 
32 |     model = AFM(feature_dim_dict, use_attention=use_attention, keep_prob=0.5,)
33 |     check_model(model, model_name, x, y)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     test_AFM(use_attention=True, sparse_feature_num=2)
38 | 


--------------------------------------------------------------------------------
/tests/models/DeepFM_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import DeepFM
 4 | from ..utils import check_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'use_fm,hidden_size,sparse_feature_num',
 9 |     [(True, (), 1), (False, (), 2), (True, (32,), 3), (False, (32,), 1)
10 |      ]
11 | )
12 | def test_DeepFM(use_fm, hidden_size, sparse_feature_num):
13 |     model_name = "DeepFM"
14 |     sample_size = 64
15 |     feature_dim_dict = {"sparse": {}, 'dense': []}
16 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
17 |         if name == "sparse":
18 |             for i in range(num):
19 |                 feature_dim_dict[name][name + '_' +
20 |                                        str(i)] = np.random.randint(1, 10)
21 |         else:
22 |             for i in range(num):
23 |                 feature_dim_dict[name].append(name + '_' + str(i))
24 | 
25 |     sparse_input = [np.random.randint(0, dim, sample_size)
26 |                     for dim in feature_dim_dict['sparse'].values()]
27 |     dense_input = [np.random.random(sample_size)
28 |                    for name in feature_dim_dict['dense']]
29 |     y = np.random.randint(0, 2, sample_size)
30 |     x = sparse_input + dense_input
31 | 
32 |     model = DeepFM(feature_dim_dict,  use_fm=use_fm,
33 |                    hidden_size=hidden_size, keep_prob=0.5, )
34 |     check_model(model, model_name, x, y)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     test_DeepFM(True, (32, 32), 2)
39 | 


--------------------------------------------------------------------------------
/tests/models/AutoInt_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import AutoInt
 4 | from ..utils import check_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'att_layer_num,hidden_size,sparse_feature_num',
 9 |     [(0, (4,), 2), (1, (), 1), (1, (4,), 1), (2, (4, 4,), 2)]
10 | )
11 | def test_AutoInt(att_layer_num, hidden_size, sparse_feature_num):
12 |     model_name = "AutoInt"
13 |     sample_size = 64
14 |     feature_dim_dict = {"sparse": {}, 'dense': []}
15 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
16 |         if name == "sparse":
17 |             for i in range(num):
18 |                 feature_dim_dict[name][name + '_' +
19 |                                        str(i)] = np.random.randint(1, 10)
20 |         else:
21 |             for i in range(num):
22 |                 feature_dim_dict[name].append(name + '_' + str(i))
23 | 
24 |     sparse_input = [np.random.randint(0, dim, sample_size)
25 |                     for dim in feature_dim_dict['sparse'].values()]
26 |     dense_input = [np.random.random(sample_size)
27 |                    for name in feature_dim_dict['dense']]
28 |     y = np.random.randint(0, 2, sample_size)
29 |     x = sparse_input + dense_input
30 | 
31 |     model = AutoInt(feature_dim_dict,  att_layer_num=att_layer_num,
32 |                     hidden_size=hidden_size, keep_prob=0.5, )
33 |     check_model(model, model_name, x, y)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     test_AutoInt(True, (32, 32), 2)
38 | 


--------------------------------------------------------------------------------
/tests/models/PNN_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from deepctr.models import PNN
 5 | from ..utils import check_model
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'use_inner, use_outter,sparse_feature_num',
10 |     [(True, True, 1), (True, False, 2), (False, True, 3), (False, False, 1)
11 |      ]
12 | )
13 | def test_PNN(use_inner, use_outter, sparse_feature_num):
14 |     model_name = "PNN"
15 |     sample_size = 64
16 |     feature_dim_dict = {"sparse": {}, 'dense': []}
17 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
18 |         if name == "sparse":
19 |             for i in range(num):
20 |                 feature_dim_dict[name][name + '_' +
21 |                                        str(i)] = np.random.randint(1, 10)
22 |         else:
23 |             for i in range(num):
24 |                 feature_dim_dict[name].append(name + '_' + str(i))
25 |     sparse_input = [np.random.randint(0, dim, sample_size)
26 |                     for dim in feature_dim_dict['sparse'].values()]
27 |     dense_input = [np.random.random(sample_size)
28 |                    for name in feature_dim_dict['dense']]
29 |     y = np.random.randint(0, 2, sample_size)
30 |     x = sparse_input + dense_input
31 | 
32 |     model = PNN(feature_dim_dict, embedding_size=8,
33 |                 hidden_size=[32, 32], keep_prob=0.5, use_inner=use_inner, use_outter=use_outter)
34 |     check_model(model, model_name, x, y)
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     test_PNN(use_inner=True, use_outter=False, sparse_feature_num=1)
39 | 


--------------------------------------------------------------------------------
/examples/run_classification_criteo.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler
 3 | from deepctr.models import DeepFM
 4 | 
 5 | if __name__ == "__main__":
 6 |     data = pd.read_csv('./criteo_sample.txt')
 7 | 
 8 |     sparse_features = ['C' + str(i) for i in range(1, 27)]
 9 |     dense_features = ['I'+str(i) for i in range(1, 14)]
10 | 
11 |     data[sparse_features] = data[sparse_features].fillna('-1', )
12 |     data[dense_features] = data[dense_features].fillna(0,)
13 |     target = ['label']
14 | 
15 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
16 |     for feat in sparse_features:
17 |         lbe = LabelEncoder()
18 |         data[feat] = lbe.fit_transform(data[feat])
19 |     mms = MinMaxScaler(feature_range=(0, 1))
20 |     data[dense_features] = mms.fit_transform(data[dense_features])
21 | 
22 |     # 2.count #unique features for each sparse field,and record dense feature field name
23 | 
24 |     sparse_feature_dict = {feat: data[feat].nunique()
25 |                            for feat in sparse_features}
26 |     dense_feature_list = dense_features
27 | 
28 |     # 3.generate input data for model
29 | 
30 |     model_input = [data[feat].values for feat in sparse_feature_dict] + \
31 |         [data[feat].values for feat in dense_feature_list]  # + [data[target[0]].values]
32 | 
33 |     # 4.Define Model,compile and train
34 |     model = DeepFM({"sparse": sparse_feature_dict,
35 |                     "dense": dense_feature_list}, final_activation='sigmoid')
36 | 
37 |     model.compile("adam", "binary_crossentropy",
38 |                   metrics=['binary_crossentropy'], )
39 | 
40 |     history = model.fit(model_input, data[target].values,
41 | 
42 |                         batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
43 |     print("demo done")
44 | 


--------------------------------------------------------------------------------
/tests/models/DCN_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import DCN
 4 | from ..utils import check_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'embedding_size,cross_num,hidden_size,sparse_feature_num',
 9 |     [(8, 0, (32,), 2), (8, 1, (), 1), ('auto', 1, (32,), 3)
10 |      ]
11 | )
12 | def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num):
13 |     model_name = "DCN"
14 | 
15 |     sample_size = 64
16 |     feature_dim_dict = {"sparse": {}, 'dense': []}
17 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
18 |         if name == "sparse":
19 |             for i in range(num):
20 |                 feature_dim_dict[name][name + '_' +
21 |                                        str(i)] = np.random.randint(1, 10)
22 |         else:
23 |             for i in range(num):
24 |                 feature_dim_dict[name].append(name + '_' + str(i))
25 |     sparse_input = [np.random.randint(0, dim, sample_size)
26 |                     for dim in feature_dim_dict['sparse'].values()]
27 |     dense_input = [np.random.random(sample_size)
28 |                    for name in feature_dim_dict['dense']]
29 | 
30 |     y = np.random.randint(0, 2, sample_size)
31 |     x = sparse_input + dense_input
32 | 
33 |     model = DCN(feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num,
34 |                 hidden_size=hidden_size, keep_prob=0.5, )
35 |     check_model(model, model_name, x, y)
36 | 
37 | 
38 | def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()):
39 |     feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5,
40 |                                    'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']}
41 |     with pytest.raises(ValueError):
42 |         _ = DCN(feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num,
43 |                 hidden_size=hidden_size, keep_prob=0.5, )
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     test_DCN(8, 2, [32, 32], 2)
48 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | REQUIRED_PACKAGES = [
 7 |     'tensorflow>=1.4.0,!=1.7.*,!=1.8.*',
 8 |     'h5py'
 9 | ]
10 | 
11 | setuptools.setup(
12 |     name="deepctr",
13 |     version="0.2.1",
14 |     author="Weichen Shen",
15 |     author_email="wcshen1994@163.com",
16 |     description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow.",
17 |     long_description=long_description,
18 |     long_description_content_type="text/markdown",
19 |     url="https://github.com/shenweichen/deepctr",
20 |     download_url='https://github.com/shenweichen/deepctr/tags',
21 |     packages=setuptools.find_packages(exclude=["tests", "tests.models"]),
22 |     python_requires='>=3.4',  # 3.4.6
23 |     install_requires=REQUIRED_PACKAGES,
24 |     extras_require={
25 |         "tf": ['tensorflow>=1.4.0,!=1.7.*,!=1.8.*'],
26 |         "tf_gpu": ['tensorflow-gpu>=1.4.0,!=1.7.*,!=1.8.*'],
27 |     },
28 |     entry_points={
29 |     },
30 |     classifiers=(
31 |         "License :: OSI Approved :: MIT License",
32 |         "Operating System :: OS Independent",
33 |         'Intended Audience :: Developers',
34 |         'Intended Audience :: Education',
35 |         'Intended Audience :: Science/Research',
36 |         'Programming Language :: Python :: 3',
37 |         'Programming Language :: Python :: 3.4',
38 |         'Programming Language :: Python :: 3.5',
39 |         'Programming Language :: Python :: 3.6',
40 |         'Topic :: Scientific/Engineering',
41 |         'Topic :: Scientific/Engineering :: Artificial Intelligence',
42 |         'Topic :: Software Development',
43 |         'Topic :: Software Development :: Libraries',
44 |         'Topic :: Software Development :: Libraries :: Python Modules',
45 |     ),
46 |     license="MIT license",
47 |     keywords=['ctr', 'click through rate',
48 |               'deep learning', 'tensorflow', 'tensor', 'keras'],
49 | )
50 | 


--------------------------------------------------------------------------------
/examples/run_din.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from deepctr.models import DIN
 3 | 
 4 | 
 5 | def get_xy_fd():
 6 | 
 7 |     feature_dim_dict = {"sparse": {'user_age': 4, 'user_gender': 2,
 8 |                                    'item_id': 4, 'item_gender': 2}, "dense": []}  # raw feature:single value feature
 9 | 
10 |     # history behavior feature:multi-value value feature
11 |     behavior_feature_list = ["item_id", "item_gender"]
12 |     # single value feature input
13 |     user_age = np.array([1, 2, 3])
14 |     user_gender = np.array([0, 1, 0])
15 |     item_id = np.array([0, 1, 2])
16 |     item_gender = np.array([0, 1, 0])
17 | 
18 |     # multi-value feature input
19 |     hist_item_id = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 0]])
20 |     hist_item_gender = np.array([[0, 1, 0, 1], [0, 1, 1, 1], [0, 0, 1, 0]])
21 |     # valid length of behavior sequence of every sample
22 |     hist_length = np.array([4, 4, 3])
23 | 
24 |     feature_dict = {'user_age': user_age, 'user_gender': user_gender, 'item_id': item_id, 'item_gender': item_gender,
25 |                     'hist_item_id': hist_item_id, 'hist_item_gender': hist_item_gender, }
26 | 
27 |     x = [feature_dict[feat] for feat in feature_dim_dict["sparse"]] + \
28 |         [feature_dict['hist_'+feat]
29 |             for feat in behavior_feature_list] + [hist_length]
30 |     # Notice the concatenation order: single feature + multi-value feature + length
31 |     # Since the length of the historical sequences of different features in DIN are the same(they are all extended from item_id),only one length vector is enough.
32 |     y = [1, 0, 1]
33 | 
34 |     return x, y, feature_dim_dict, behavior_feature_list
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
39 |     model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,)
40 |     model.compile('adam', 'binary_crossentropy',
41 |                   metrics=['binary_crossentropy'])
42 |     history = model.fit(x, y, verbose=1, validation_split=0.5)
43 | 


--------------------------------------------------------------------------------
/tests/models/WDL_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from deepctr.models import WDL
 5 | from ..utils import check_model
 6 | 
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     'sparse_feature_num,wide_feature_num',
10 |     [(1, 0), (1, 2), (2, 0), (2, 1)
11 |      ]
12 | )
13 | def test_WDL(sparse_feature_num, wide_feature_num):
14 |     model_name = "WDL"
15 |     sample_size = 64
16 |     feature_dim_dict = {"sparse": {}, 'dense': []}
17 |     wide_feature_dim_dict = {"sparse": {}, 'dense': []}
18 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]):
19 |         if name == "sparse":
20 |             for i in range(num):
21 |                 feature_dim_dict[name][name + '_' +
22 |                                        str(i)] = np.random.randint(1, 10)
23 |         else:
24 |             for i in range(num):
25 |                 feature_dim_dict[name].append(name + '_' + str(i))
26 |     for name, num in zip(["sparse", "dense"], [wide_feature_num, wide_feature_num]):
27 |         if name == "sparse":
28 |             for i in range(num):
29 |                 wide_feature_dim_dict[name][name + 'wide_' +
30 |                                             str(i)] = np.random.randint(1, 10)
31 |         else:
32 |             for i in range(num):
33 |                 wide_feature_dim_dict[name].append(name + 'wide_' + str(i))
34 | 
35 |     sparse_input = [np.random.randint(0, dim, sample_size)
36 |                     for dim in feature_dim_dict['sparse'].values()]
37 |     dense_input = [np.random.random(sample_size)
38 |                    for name in feature_dim_dict['dense']]
39 |     wide_sparse_input = [np.random.randint(0, dim, sample_size)
40 |                          for dim in wide_feature_dim_dict['sparse'].values()]
41 |     wide_dense_input = [np.random.random(sample_size)
42 |                         for name in wide_feature_dim_dict['dense']]
43 |     y = np.random.randint(0, 2, sample_size)
44 |     x = sparse_input + dense_input
45 |     x_wide = wide_sparse_input + wide_dense_input
46 | 
47 |     model = WDL(feature_dim_dict, wide_feature_dim_dict,
48 |                 hidden_size=[32, 32], keep_prob=0.5)
49 |     check_model(model, model_name, x+x_wide, y)
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     test_WDL(1, 1)
54 | 


--------------------------------------------------------------------------------
/tests/models/xDeepFM_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import xDeepFM
 4 | from ..utils import check_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     'hidden_size,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim',
 9 |     [((), (), True, 'linear', 1, 2), ((16,), (), True, 'linear', 1, 1), ((), (16,), True, 'linear', 2, 2), ((16,), (16,), False, 'relu', 1, 0)
10 |      ]
11 | )
12 | def test_xDeepFM(hidden_size, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim):
13 |     model_name = "xDeepFM"
14 | 
15 |     sample_size = 64
16 |     feature_dim_dict = {"sparse": {}, 'dense': []}
17 |     for name, num in zip(["sparse", "dense"], [sparse_feature_num, dense_feature_dim]):
18 |         if name == "sparse":
19 |             for i in range(num):
20 |                 feature_dim_dict[name][name + '_' +
21 |                                        str(i)] = np.random.randint(1, 10)
22 |         else:
23 |             for i in range(num):
24 |                 feature_dim_dict[name].append(name + '_' + str(i))
25 |     sparse_input = [np.random.randint(0, dim, sample_size)
26 |                     for dim in feature_dim_dict['sparse'].values()]
27 |     dense_input = [np.random.random(sample_size)
28 |                    for name in feature_dim_dict['dense']]
29 | 
30 |     y = np.random.randint(0, 2, sample_size)
31 |     x = sparse_input + dense_input
32 | 
33 |     model = xDeepFM(feature_dim_dict, hidden_size=hidden_size, cin_layer_size=cin_layer_size,
34 |                     cin_split_half=cin_split_half, cin_activation=cin_activation, keep_prob=0.5, )
35 |     check_model(model, model_name, x, y)
36 | 
37 | 
38 | @pytest.mark.parametrize(
39 |     'hidden_size,cin_layer_size,',
40 |     [((8,), (3, 8)),
41 |      ]
42 | )
43 | def test_xDeepFM_invalid(hidden_size, cin_layer_size):
44 |     feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5,
45 |                                    'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']}
46 |     with pytest.raises(ValueError):
47 |         _ = xDeepFM(feature_dim_dict, hidden_size=hidden_size,
48 |                     cin_layer_size=cin_layer_size,)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     test_xDeepFM((256), (128,), False, 'linear', 3, 1)
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.h5
  2 | *.ipynb
  3 | .pytest_cache/
  4 | tests/unused/*
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | .idea/
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | env/
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *,cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask instance folder
 61 | instance/
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # IPython Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # dotenv
 82 | .env
 83 | 
 84 | # virtualenv
 85 | venv/
 86 | ENV/
 87 | 
 88 | # Spyder project settings
 89 | .spyderproject
 90 | 
 91 | # Rope project settings
 92 | .ropeproject
 93 | 
 94 | # =========================
 95 | # Operating System Files
 96 | # =========================
 97 | 
 98 | # OSX
 99 | # =========================
100 | 
101 | .DS_Store
102 | .AppleDouble
103 | .LSOverride
104 | 
105 | # Thumbnails
106 | ._*
107 | 
108 | # Files that might appear in the root of a volume
109 | .DocumentRevisions-V100
110 | .fseventsd
111 | .Spotlight-V100
112 | .TemporaryItems
113 | .Trashes
114 | .VolumeIcon.icns
115 | 
116 | # Directories potentially created on remote AFP share
117 | .AppleDB
118 | .AppleDesktop
119 | Network Trash Folder
120 | Temporary Items
121 | .apdisk
122 | 
123 | # Windows
124 | # =========================
125 | 
126 | # Windows image file caches
127 | Thumbs.db
128 | ehthumbs.db
129 | 
130 | # Folder config file
131 | Desktop.ini
132 | 
133 | # Recycle Bin used on file shares
134 | $RECYCLE.BIN/
135 | 
136 | # Windows Installer files
137 | *.cab
138 | *.msi
139 | *.msm
140 | *.msp
141 | 
142 | # Windows shortcuts
143 | *.lnk
144 | 


--------------------------------------------------------------------------------
/deepRS/activations.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.keras.layers import Layer
 2 | from tensorflow.python.keras.initializers import Zeros
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class Dice(Layer):
 7 |     """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data.
 8 | 
 9 |       Input shape
10 |         - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model.
11 | 
12 |       Output shape
13 |         - Same shape as the input.
14 | 
15 |       Arguments
16 |         - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis).
17 | 
18 |         - **epsilon** : Small float added to variance to avoid dividing by zero.
19 | 
20 |       References
21 |         - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
22 |     """
23 | 
24 |     def __init__(self, axis=-1, epsilon=1e-9, **kwargs):
25 |         self.axis = axis
26 |         self.epsilon = epsilon
27 |         super(Dice, self).__init__(**kwargs)
28 | 
29 |     def build(self, input_shape):
30 |         self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros(
31 |         ), dtype=tf.float32, name=self.name+'dice_alpha')  # name='alpha_'+self.name
32 |         super(Dice, self).build(input_shape)  # Be sure to call this somewhere!
33 | 
34 |     def call(self, inputs, **kwargs):
35 | 
36 |         inputs_normed = tf.layers.batch_normalization(
37 |             inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False)
38 |         x_p = tf.sigmoid(inputs_normed)
39 |         return self.alphas * (1.0 - x_p) * inputs + x_p * inputs
40 | 
41 |     def get_config(self,):
42 | 
43 |         config = {'axis': self.axis, 'epsilon': self.epsilon}
44 |         base_config = super(Dice, self).get_config()
45 |         return dict(list(base_config.items()) + list(config.items()))
46 | 
47 |     def compute_output_shape(self, input_shape):
48 |         return input_shape
49 | 
50 | 
51 | def activation_fun(activation, fc):
52 | 
53 |     if isinstance(activation, str):
54 |         fc = tf.keras.layers.Activation(activation)(fc)
55 |     elif issubclass(activation, Layer):
56 |         fc = activation()(fc)
57 |     else:
58 |         raise ValueError(
59 |             "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation))
60 |     return fc
61 | 


--------------------------------------------------------------------------------
/tests/models/MLR_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import MLR
 4 | from ..utils import check_model
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 | 
 9 |     'region_sparse,region_dense,base_sparse,base_dense,bias_sparse,bias_dense',
10 | 
11 |     [(0, 2, 0, 2, 0, 1), (0, 2, 0, 1, 0, 2), (0, 2, 0, 0, 1, 0),
12 |      (0, 1, 1, 2, 1, 1,), (0, 1, 1, 1, 1, 2), (0, 1, 1, 0, 2, 0),
13 |      (1, 0, 2, 2, 2, 1), (2, 0, 2, 1, 2, 2), (2, 0, 2, 0, 0, 0)
14 |      ]
15 | 
16 | )
17 | def test_MLRs(region_sparse, region_dense, base_sparse, base_dense, bias_sparse, bias_dense):
18 |     model_name = "MLRs"
19 |     region_fd = {"sparse": {}, 'dense': []}
20 |     for name, num in zip(["sparse", "dense"], [region_sparse, region_dense]):
21 |         if name == "sparse":
22 |             for i in range(num):
23 |                 region_fd[name][name + '_' + str(i)] = np.random.randint(1, 10)
24 |         else:
25 |             for i in range(num):
26 |                 region_fd[name].append(name + '_' + str(i))
27 | 
28 |     base_fd = {"sparse": {}, 'dense': []}
29 |     for name, num in zip(["sparse", "dense"], [base_sparse, base_dense]):
30 |         if name == "sparse":
31 |             for i in range(num):
32 |                 base_fd[name][name + '_' + str(i)] = np.random.randint(1, 10)
33 |         else:
34 |             for i in range(num):
35 |                 base_fd[name].append(name + '_' + str(i))
36 |     bias_fd = {"sparse": {}, 'dense': []}
37 |     for name, num in zip(["sparse", "dense"], [bias_sparse, bias_dense]):
38 |         if name == "sparse":
39 |             for i in range(num):
40 |                 bias_fd[name][name + '_' + str(i)] = np.random.randint(1, 10)
41 |         else:
42 |             for i in range(num):
43 |                 bias_fd[name].append(name + '_' + str(i))
44 | 
45 |     model = MLR(region_fd, base_fd, bias_feature_dim_dict=bias_fd)
46 |     model.compile('adam', 'binary_crossentropy',
47 |                   metrics=['binary_crossentropy'])
48 |     print(model_name + " test pass!")
49 | 
50 | 
51 | def test_MLR():
52 |     model_name = "MLR"
53 |     sample_size = 64
54 |     feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5,
55 |                                    'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']}
56 |     sparse_input = [np.random.randint(0, dim, sample_size)
57 |                     for dim in feature_dim_dict['sparse'].values()]
58 |     dense_input = [np.random.random(sample_size)
59 |                    for name in feature_dim_dict['dense']]
60 |     y = np.random.randint(0, 2, sample_size)
61 |     x = sparse_input + dense_input
62 | 
63 |     model = MLR(feature_dim_dict)
64 |     check_model(model, model_name, x, y)
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     test_MLR()
69 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. DeepCTR documentation master file, created by
 2 |    sphinx-quickstart on Fri Nov 23 21:08:54 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to DeepCTR's documentation!
 7 | ===================================
 8 | 
 9 | |Downloads|_ |Stars|_ |Forks|_ |PyPi|_ |Issues|_ |Activity|_
10 | 
11 | .. |Downloads| image:: https://pepy.tech/badge/deepctr
12 | .. _Downloads: https://pepy.tech/project/deepctr
13 | 
14 | .. |Stars| image:: https://img.shields.io/github/stars/shenweichen/deepctr.svg
15 | .. _Stars: https://github.com/shenweichen/DeepCTR
16 | 
17 | .. |Forks| image:: https://img.shields.io/github/forks/shenweichen/deepctr.svg
18 | .. _Forks: https://github.com/shenweichen/DeepCTR/fork
19 | 
20 | .. |PyPi| image:: https://img.shields.io/pypi/v/deepctr.svg
21 | .. _PyPi: https://pypi.org/project/deepctr/
22 | 
23 | .. |Issues| image:: https://img.shields.io/github/issues/shenweichen/deepctr.svg
24 | .. _Issues: https://github.com/shenweichen/deepctr/issues
25 | 
26 | .. |Activity| image:: https://img.shields.io/github/last-commit/shenweichen/deepctr.svg
27 | .. _Activity: https://github.com/shenweichen/DeepCTR
28 | 
29 | 
30 | DeepCTR is a **Easy-to-use** , **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer  which can be used to build your own custom model easily.You can use any complex model with ``model.fit()`` and ``model.predict()``.And the layers are compatible with tensorflow.
31 | 
32 | Through  ``pip install deepctr``  get the package and `Get Started! <./Quick-Start.html>`_
33 | 
34 | You can read the latest code at https://github.com/shenweichen/DeepCTR
35 | 
36 | News
37 | -----
38 | 12/27/2018 : Add `AutoInt <./Features.html#autoint-automatic-feature-interaction>`_ . `Changelog <https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.1>`_
39 | 
40 | 12/22/2018 : Add `xDeepFM <./Features.html#xdeepfm>`_ and automatic check for new version. `Changelog <https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.0>`_
41 | 
42 | 12/19/2018 : DeepCTR is compatible with tensorflow from ``1.4-1.12`` except for ``1.7`` and ``1.8``. `Changelog <https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.6>`_
43 | 
44 | 11/24/2018 : DeepCTR is released! `PyPi <https://pypi.org/project/deepctr/>`_.
45 | 
46 | .. toctree::
47 |    :maxdepth: 2
48 |    :caption: Home:
49 | 
50 |    Quick-Start
51 |    Features
52 |    Demo
53 |    FAQ
54 |    History<History.md>
55 | 
56 | .. toctree::
57 |    :maxdepth: 3
58 |    :caption: API:
59 | 
60 |    Models API<Models-API>
61 |    Layers API<deepctr.layers.rst>
62 |    Activations API<deepctr.activations.rst>
63 |    Sequence API<deepctr.sequence.rst>
64 | 
65 | 
66 | 
67 | Indices and tables
68 | ==================
69 | 
70 | * :ref:`genindex`
71 | * :ref:`modindex`
72 | * :ref:`search`


--------------------------------------------------------------------------------
/docs/source/FAQ.rst:
--------------------------------------------------------------------------------
 1 | FAQ
 2 | ==========
 3 | 1. Save or load weights/models
 4 | ----------------------------------------
 5 | To save/load weights,you can write codes just like any other keras models.
 6 | 
 7 | .. code-block:: python
 8 | 
 9 |     model = DeepFM()
10 |     model.save_weights('DeepFM_w.h5')
11 |     model.load_weights('DeepFM_w.h5')
12 | 
13 | 
14 | To save/load models,just a little different.
15 | 
16 | .. code-block:: python
17 | 
18 |     from tensorflow.python.keras.models import  save_model,load_model
19 |     model = DeepFM()
20 |     save_model(model, 'DeepFM.h5')# save_model, same as before
21 | 
22 |     from deepctr.utils import custom_objects
23 |     model = load_model('DeepFM.h5',custom_objects)# load_model,just add a parameter
24 | 
25 | 2. Set learning rate and use earlystopping
26 | ---------------------------------------------------
27 | You can use any models in DeepCTR like a keras model object.
28 | Here is a example of how to set learning rate and earlystopping:
29 | 
30 | .. code-block:: python
31 | 
32 |     import deepctr
33 |     from tensorflow.python.keras.optimizers import Adam,Adagrad
34 |     from tensorflow.python.keras.callbacks import EarlyStopping
35 | 
36 |     model = deepctr.models.DeepFM({"sparse": sparse_feature_dict, "dense": dense_feature_list})
37 |     model.compile(Adagrad('0.0808'),'binary_crossentropy',metrics=['binary_crossentropy'])
38 | 
39 |     es = EarlyStopping(monitor='val_binary_crossentropy')
40 |     history = model.fit(model_input, data[target].values,batch_size=256, epochs=10, verbose=2, validation_split=0.2,callbacks=[es] )
41 | 
42 | 
43 | 3. Get the attentional weights of feature interactions in AFM
44 | --------------------------------------------------------------------------
45 | First,make sure that you have install the latest version of deepctr.
46 | 
47 | Then,use the following code,the ``attentional_weights[:,i,0]`` is the ``feature_interactions[i]``'s attentional weight of all samples.
48 | 
49 | .. code-block:: python
50 | 
51 |     import itertools
52 |     import deepctr
53 |     from tensorflow.python.keras.models import Model
54 |     from tensorflow.python.keras.layers import Lambda
55 | 
56 |     feature_dim_dict = {"sparse": sparse_feature_dict, "dense": dense_feature_list}
57 |     model = deepctr.models.AFM(feature_dim_dict)
58 |     model.fit(model_input,target)
59 | 
60 |     afmlayer = model.layers[-3]
61 |     afm_weight_model = Model(model.input,outputs=Lambda(lambda x:afmlayer.normalized_att_score)(model.input))
62 |     attentional_weights = afm_weight_model.predict(model_input,batch_size=4096)
63 |     feature_interactions = list(itertools.combinations(list(feature_dim_dict['sparse'].keys()) + feature_dim_dict['dense'] ,2))
64 | 
65 | 
66 | 
67 | 4. Does the models support multi-value input?
68 | ---------------------------------------------------
69 | Now only the `DIN <Features.html#din-deep-interest-network>`_ model support multi-value input,you can use layers in `sequence <deepctr.sequence.html>`_ to build your own models!
70 | And it will be supported in a future release


--------------------------------------------------------------------------------
/docs/source/Quick-Start.rst:
--------------------------------------------------------------------------------
  1 | Quick-Start
  2 | ===========
  3 | 
  4 | Installation Guide
  5 | ----------------------
  6 | Install deepctr package is through ``pip`` ::
  7 | 
  8 |     pip install deepctr
  9 | 
 10 | 
 11 | Getting started: 4 steps to DeepCTR
 12 | -----------------------------------------
 13 | 
 14 | 
 15 | Step 1: Import model
 16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 17 | 
 18 | .. code-block:: python
 19 | 
 20 |     import pandas as pd
 21 |     from sklearn.preprocessing import LabelEncoder,MinMaxScaler
 22 | 
 23 |     from deepctr.models import DeepFM
 24 | 
 25 |     data = pd.read_csv('./criteo_sample.txt')
 26 | 
 27 |     sparse_features  = ['C' + str(i) for i in range(1, 27)]
 28 |     dense_features = ['I'+str(i) for i in range(1,14)]
 29 |     target = ['label']
 30 | 
 31 |     data[sparse_features] = data[sparse_features].fillna('-1', )
 32 |     data[dense_features] = data[dense_features].fillna(0,)
 33 | 
 34 |     
 35 | 
 36 | 
 37 | Step 2: Simple preprocessing
 38 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 39 | 
 40 | Usually there are two simple way to encode the sparse categorical feature for embedding
 41 | 
 42 | - Label Encoding: map the features to integer value from 0 ~ len(#unique) - 1
 43 | - Hash Encoding: map the features to a fix range,like 0 ~ 9999
 44 | 
 45 | And for dense numerical features,they are usually  discretized to buckets,here we use normalization.
 46 | 
 47 | .. code-block:: python
 48 | 
 49 |     for feat in sparse_features:
 50 |         lbe = LabelEncoder()# or Hash
 51 |         data[feat] = lbe.fit_transform(data[feat])
 52 |     mms = MinMaxScaler(feature_range=(0,1))
 53 |     data[dense_features] = mms.fit_transform(data[dense_features])
 54 | 
 55 | 
 56 | 
 57 | Step 3: Generate feature config dict
 58 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 59 | 
 60 | Here, for sparse features, we transform them into dense vectors by embedding techniques.
 61 | For dense numerical features, we add a dummy index like LIBFM.
 62 | That is to say, all dense features under the same field share the same embedding vector.
 63 | In some implementations, the dense feature is concatened to the input embedding vectors of the deep network, you can modify the code yourself.
 64 | 
 65 | 
 66 | .. code-block:: python
 67 | 
 68 |     sparse_feature_dict = {feat: data[feat].nunique() for feat in sparse_features}
 69 |     dense_feature_list = dense_features
 70 | 
 71 | 
 72 | Step 4: Generate the training samples and train the model
 73 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 74 | 
 75 | There are two rules here that we must follow
 76 | 
 77 |   - The sparse features are placed in front of the dense features.
 78 |   - The order of the feature we fit into the model must be consistent with the order of the feature dictionary iterations
 79 | 
 80 | .. code-block:: python
 81 | 
 82 |     # make sure the order is right
 83 |     model_input = [data[feat].values for feat in sparse_feature_dict] + [data[feat].values for feat in dense_feature_list]
 84 | 
 85 |     model = DeepFM({"sparse": sparse_feature_dict, "dense": dense_feature_list}, final_activation='sigmoid')
 86 |     model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], )
 87 |     history = model.fit(model_input, data[target].values,
 88 |                         batch_size=256, epochs=1, verbose=2, validation_split=0.2,)
 89 | 
 90 | 
 91 | You can check the full code `here <./Demo.html>`_
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 


--------------------------------------------------------------------------------
/tests/models/DIN_test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from deepctr.models import DIN
 4 | from deepctr.activations import Dice
 5 | from deepctr.utils import custom_objects
 6 | from tensorflow.python.keras.models import load_model, save_model
 7 | from ..utils import check_model
 8 | 
 9 | 
10 | def get_xy_fd():
11 |     feature_dim_dict = {"sparse": {'user': 4, 'gender': 2,
12 |                                    'item': 4, 'item_gender': 2}, "dense": []}
13 |     behavior_feature_list = ["item"]
14 |     uid = np.array([1, 2, 3])
15 |     ugender = np.array([0, 1, 0])
16 |     iid = np.array([0, 1, 2])
17 |     igender = np.array([0, 1, 0])
18 | 
19 |     hist_iid = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]])
20 |     hist_igender = np.array([[0, 1, 0, 1], [0, 1, 1, 1], [0, 0, 1, 0]])
21 |     hist_length = np.array([4, 4, 4])
22 | 
23 |     feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
24 |                     'hist_item': hist_iid, 'hist_item_gender': hist_igender, }
25 |     x = [feature_dict[feat] for feat in feature_dim_dict["sparse"]] \
26 |         + [feature_dict['hist_'+feat] for feat in behavior_feature_list]\
27 |         + [hist_length]
28 |     y = [1, 0, 1]
29 |     return x, y, feature_dim_dict, behavior_feature_list
30 | 
31 | 
32 | @pytest.mark.xfail(reason="There is a bug when save model use Dice")
33 | # @pytest.mark.skip(reason="misunderstood the API")
34 | def test_DIN_model_io():
35 | 
36 |     model_name = "DIN_att"
37 |     _, _, feature_dim_dict, behavior_feature_list = get_xy_fd()
38 | 
39 |     model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, att_activation=Dice,
40 | 
41 |                 use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,)
42 | 
43 |     model.compile('adam', 'binary_crossentropy',
44 |                   metrics=['binary_crossentropy'])
45 |    #model.fit(x, y, verbose=1, validation_split=0.5)
46 |     save_model(model,  model_name + '.h5')
47 |     model = load_model(model_name + '.h5', custom_objects)
48 |     print(model_name + " test save load model pass!")
49 | 
50 | 
51 | def test_DIN_att():
52 |     model_name = "DIN_att"
53 | 
54 |     x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
55 | 
56 |     model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8,
57 |                 use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,)
58 | 
59 |     model.compile('adam', 'binary_crossentropy',
60 |                   metrics=['binary_crossentropy'])
61 |     model.fit(x, y, verbose=1, validation_split=0.5)
62 | 
63 |     print(model_name+" test train valid pass!")
64 |     model.save_weights(model_name + '_weights.h5')
65 |     model.load_weights(model_name + '_weights.h5')
66 |     print(model_name+" test save load weight pass!")
67 | 
68 |     # try:
69 |     #     save_model(model,  name + '.h5')
70 |     #     model = load_model(name + '.h5', custom_objects)
71 |     #     print(name + " test save load model pass!")
72 |     # except:
73 |     #     print("【Error】There is a bug when save model use Dice---------------------------------------------------")
74 | 
75 |     print(model_name + " test pass!")
76 | 
77 | 
78 | def test_DIN_sum():
79 | 
80 |     model_name = "DIN_sum"
81 |     x, y, feature_dim_dict, behavior_feature_list = get_xy_fd()
82 | 
83 |     model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8,
84 |                 use_din=False, hidden_size=[4, 4, 4], keep_prob=0.6, activation="sigmoid")
85 | 
86 |     check_model(model, model_name, x, y)
87 | 
88 | 
89 | if __name__ == "__main__":
90 |     test_DIN_att()
91 |     test_DIN_sum()
92 | 


--------------------------------------------------------------------------------
/deepRS/models/fnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen,wcshen1994@163.com
 5 | 
 6 | Reference:
 7 |     [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf)
 8 | """
 9 | 
10 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, add
11 | from tensorflow.python.keras.models import Model
12 | from tensorflow.python.keras.regularizers import l2
13 | 
14 | from ..layers import PredictionLayer, MLP
15 | from ..utils import get_input, get_share_embeddings
16 | 
17 | 
18 | def FNN(feature_dim_dict, embedding_size=8,
19 |         hidden_size=(128, 128),
20 |         l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_deep=0,
21 |         init_std=0.0001, seed=1024, keep_prob=1,
22 |         activation='relu', final_activation='sigmoid', ):
23 |     """Instantiates the Factorization-supported Neural Network architecture.
24 | 
25 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
26 |     :param embedding_size: positive integer,sparse feature embedding_size
27 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
28 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
29 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear weight
30 |     :param l2_reg_deep: float . L2 regularizer strength applied to deep net
31 |     :param init_std: float,to use as the initialize std of embedding vector
32 |     :param seed: integer ,to use as random seed.
33 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
34 |     :param activation: Activation function to use in deep net
35 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
36 |     :return: A Keras model instance.
37 |     """
38 |     if not isinstance(feature_dim_dict,
39 |                       dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
40 |         raise ValueError(
41 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
42 | 
43 |     sparse_input, dense_input = get_input(feature_dim_dict, None)
44 |     sparse_embedding, linear_embedding, = get_share_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding,
45 |                                                                l2_reg_linear)
46 | 
47 |     embed_list = [sparse_embedding[i](sparse_input[i])
48 |                   for i in range(len(feature_dim_dict["sparse"]))]
49 | 
50 |     linear_term = [linear_embedding[i](sparse_input[i])
51 |                    for i in range(len(sparse_input))]
52 |     if len(linear_term) > 1:
53 |         linear_term = add(linear_term)
54 |     elif len(linear_term) == 1:
55 |         linear_term = linear_term[0]
56 | 
57 |     if len(dense_input) > 0:
58 |         continuous_embedding_list = list(
59 |             map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
60 |                 dense_input))
61 |         continuous_embedding_list = list(
62 |             map(Reshape((1, embedding_size)), continuous_embedding_list))
63 |         embed_list += continuous_embedding_list
64 | 
65 |         dense_input_ = dense_input[0] if len(
66 |             dense_input) == 1 else Concatenate()(dense_input)
67 |         linear_dense_logit = Dense(
68 |             1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_)
69 |         linear_term = add([linear_dense_logit, linear_term])
70 | 
71 |     num_inputs = len(dense_input) + len(sparse_input)
72 |     deep_input = Reshape([num_inputs*embedding_size]
73 |                          )(Concatenate()(embed_list))
74 |     deep_out = MLP(hidden_size, activation, l2_reg_deep,
75 |                    keep_prob, False, seed)(deep_input)
76 |     deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
77 |     final_logit = add([deep_logit, linear_term])
78 |     output = PredictionLayer(final_activation)(final_logit)
79 |     model = Model(inputs=sparse_input + dense_input,
80 |                   outputs=output)
81 |     return model
82 | 


--------------------------------------------------------------------------------
/deepRS/models/wdl.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen,wcshen1994@163.com
 5 | 
 6 | Reference:
 7 |     [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf)
 8 | """
 9 | 
10 | from tensorflow.python.keras.layers import Dense, Concatenate, Flatten, add
11 | from tensorflow.python.keras.models import Model
12 | from ..layers import PredictionLayer, MLP
13 | from ..utils import get_input, get_sep_embeddings
14 | 
15 | 
16 | def WDL(deep_feature_dim_dict, wide_feature_dim_dict, embedding_size=8, hidden_size=(128, 128), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_deep=0, init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid',):
17 |     """Instantiates the Wide&Deep Learning architecture.
18 | 
19 |     :param deep_feature_dim_dict: dict,to indicate sparse field and dense field in deep part like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
20 |     :param wide_feature_dim_dict: dict,to indicate sparse field and dense field in wide part like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
21 |     :param embedding_size: positive integer,sparse feature embedding_size
22 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
23 |     :param l2_reg_linear: float. L2 regularizer strength applied to wide part
24 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
25 |     :param l2_reg_deep: float. L2 regularizer strength applied to deep net
26 |     :param init_std: float,to use as the initialize std of embedding vector
27 |     :param seed: integer ,to use as random seed.
28 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
29 |     :param activation: Activation function to use in deep net
30 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
31 |     :return: A Keras model instance.
32 |     """
33 |     if not isinstance(deep_feature_dim_dict,
34 |                       dict) or "sparse" not in deep_feature_dim_dict or "dense" not in deep_feature_dim_dict:
35 |         raise ValueError(
36 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
37 | 
38 |     sparse_input, dense_input, bias_sparse_input, bias_dense_input = get_input(
39 |         deep_feature_dim_dict, wide_feature_dim_dict)
40 |     sparse_embedding, wide_linear_embedding = get_sep_embeddings(
41 |         deep_feature_dim_dict, wide_feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear)
42 | 
43 |     embed_list = [sparse_embedding[i](sparse_input[i])
44 |                   for i in range(len(sparse_input))]
45 |     deep_input = Concatenate()(embed_list) if len(
46 |         embed_list) > 1 else embed_list[0]
47 |     deep_input = Flatten()(deep_input)
48 |     if len(dense_input) > 0:
49 |         deep_input = Concatenate()([deep_input]+dense_input)
50 | 
51 |     deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
52 |                    False, seed)(deep_input)
53 |     deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
54 |     final_logit = deep_logit
55 |     if len(wide_feature_dim_dict['dense']) + len(wide_feature_dim_dict['sparse']) > 0:
56 |         if len(wide_feature_dim_dict['sparse']) > 0:
57 |             bias_embed_list = [wide_linear_embedding[i](
58 |                 bias_sparse_input[i]) for i in range(len(bias_sparse_input))]
59 |             linear_term = add(bias_embed_list) if len(
60 |                 bias_embed_list) > 1 else bias_embed_list[0]
61 |             final_logit = add([final_logit, linear_term])
62 |         if len(wide_feature_dim_dict['dense']) > 0:
63 |             wide_dense_term = Dense(1, use_bias=False, activation=None)(Concatenate()(
64 |                 bias_dense_input) if len(bias_dense_input) > 1 else bias_dense_input[0])
65 |             final_logit = add([final_logit, wide_dense_term])
66 | 
67 |     output = PredictionLayer(final_activation)(final_logit)
68 |     model = Model(inputs=sparse_input + dense_input +
69 |                   bias_sparse_input + bias_dense_input, outputs=output)
70 |     return model
71 | 


--------------------------------------------------------------------------------
/docs/source/Demo.rst:
--------------------------------------------------------------------------------
  1 | Demos
  2 | ===========
  3 | 
  4 | Classification: Criteo 
  5 | -----------------------
  6 | 
  7 | The Criteo Display Ads dataset is for the purpose of predicting ads 
  8 | click-through rate. It has 13 integer features and
  9 | 26 categorical features where each category has a high cardinality.
 10 | 
 11 | .. image:: ../pics/criteo_sample.png
 12 |    :align: center
 13 |    :scale: 70 %
 14 | 
 15 | In this demo,we simply normailize the integer feature between 0 and 1,you
 16 | can try other transformation technique like log normalization or discretization.
 17 | 
 18 | This example shows how to use *DeepFM* to solve a simple binary classification task. You can get the demo data 
 19 | `criteo_sample.txt <https://github.com/shenweichen/DeepCTR/tree/master/examples
 20 | /criteo_sample.txt>`_ and run the following codes.
 21 | 
 22 | .. code-block:: python
 23 | 
 24 |     import pandas as pd
 25 |     from sklearn.preprocessing import LabelEncoder,MinMaxScaler
 26 |     from deepctr.models import DeepFM
 27 | 
 28 |     
 29 |     data = pd.read_csv('./criteo_sample.txt')
 30 | 
 31 |     sparse_features  = ['C' + str(i) for i in range(1, 27)]
 32 |     dense_features = ['I'+str(i) for i in range(1,14)]
 33 | 
 34 |     data[sparse_features] = data[sparse_features].fillna('-1', )
 35 |     data[dense_features] = data[dense_features].fillna(0,)
 36 | 
 37 |     target = ['label']
 38 | 
 39 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
 40 |     for feat in sparse_features:
 41 |         lbe = LabelEncoder()
 42 |         data[feat] = lbe.fit_transform(data[feat])
 43 |     mms = MinMaxScaler(feature_range=(0,1))
 44 |     data[dense_features] = mms.fit_transform(data[dense_features])
 45 | 
 46 |     # 2.count #unique features for each sparse field,and record dense feature field name
 47 | 
 48 |     sparse_feature_dict = {feat: data[feat].nunique() for feat in sparse_features}
 49 |     dense_feature_list = dense_features
 50 | 
 51 |     # 3.generate input data for model
 52 | 
 53 |     model_input = [data[feat].values for feat in sparse_feature_dict] + [data[feat].values for feat in dense_feature_list]
 54 | 
 55 |     #4.Define Model,compile and
 56 | 
 57 | 
 58 |     model = DeepFM({"sparse": sparse_feature_dict, "dense": dense_feature_list}, final_activation='sigmoid')
 59 |     model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], )
 60 |     history = model.fit(model_input, data[target].values,
 61 |                         batch_size=256, epochs=1, verbose=2, validation_split=0.2,)
 62 | 
 63 | 
 64 | 
 65 | Regression: Movielens
 66 | ----------------------
 67 | The MovieLens data has been used for personalized tag recommendation,which
 68 | contains 668, 953 tag applications of users on movies.
 69 | Here is a small fraction of data include  only sparse field.
 70 | 
 71 | .. image:: ../pics/movielens_sample.png
 72 |    :align: center
 73 |    :scale: 70 %
 74 | 
 75 | This example shows how to use *DeepFM* to solve a simple binary regression task. You can get the demo data 
 76 | `movielens_sample.txt <https://github.com/shenweichen/DeepCTR/tree/master/examples
 77 | /movielens_sample.txt>`_ and run the following codes.
 78 | 
 79 | .. code-block:: python
 80 | 
 81 |     import pandas as pd
 82 |     from sklearn.preprocessing import LabelEncoder,MinMaxScaler
 83 |     from deepctr.models import DeepFM
 84 | 
 85 |     
 86 |     data = pd.read_csv("./movielens_sample.txt")
 87 |     sparse_features = [ "movie_id","user_id","gender","age","occupation","zip"]
 88 |     target = ['rating']
 89 | 
 90 |     # 1.Label Encoding for sparse features,and do simple Transformation for dense features
 91 |     for feat in sparse_features:
 92 |         lbe = LabelEncoder()
 93 |         data[feat] = lbe.fit_transform(data[feat])
 94 |     #2.count #unique features for each sparse field
 95 |     sparse_feature_dim = {feat:data[feat].nunique() for feat in sparse_features}
 96 |     #3.generate input data for model
 97 |     model_input = [data[feat].values for feat in sparse_feature_dim]
 98 |     #4.Define Model,compile and train
 99 |     model = DeepFM({"sparse":sparse_feature_dim,"dense":[]},final_activation='linear')
100 | 
101 |     model.compile("adam","mse",metrics=['mse'],)
102 |     history = model.fit(model_input,data[target].values,
103 |               batch_size=256,epochs=10,verbose=2,validation_split=0.2,)


--------------------------------------------------------------------------------
/deepRS/models/nfm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen,wcshen1994@163.com
 5 | 
 6 | Reference:
 7 |     [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027)
 8 | """
 9 | 
10 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, Dropout, add
11 | from tensorflow.python.keras.models import Model
12 | from tensorflow.python.keras.regularizers import l2
13 | from ..layers import PredictionLayer, MLP, BiInteractionPooling
14 | from ..utils import get_input, get_share_embeddings
15 | 
16 | 
17 | def NFM(feature_dim_dict, embedding_size=8,
18 |         hidden_size=(128, 128), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_deep=0,
19 |         init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid',
20 |         ):
21 |     """Instantiates the Neural Factorization Machine architecture.
22 | 
23 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
24 |     :param embedding_size: positive integer,sparse feature embedding_size
25 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
26 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
27 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part.
28 |     :param l2_reg_deep: float . L2 regularizer strength applied to deep net
29 |     :param init_std: float,to use as the initialize std of embedding vector
30 |     :param seed: integer ,to use as random seed.
31 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
32 |     :param activation: Activation function to use in deep net
33 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
34 |     :return: A Keras model instance.
35 |     """
36 |     if not isinstance(feature_dim_dict,
37 |                       dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
38 |         raise ValueError(
39 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
40 | 
41 |     sparse_input, dense_input = get_input(feature_dim_dict, None)
42 |     sparse_embedding, linear_embedding = get_share_embeddings(
43 |         feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear)
44 | 
45 |     embed_list = [sparse_embedding[i](sparse_input[i])
46 |                   for i in range(len(sparse_input))]
47 | 
48 |     linear_term = [linear_embedding[i](sparse_input[i])
49 |                    for i in range(len(sparse_input))]
50 |     if len(linear_term) > 1:
51 |         linear_term = add(linear_term)
52 |     elif len(linear_term) == 1:
53 |         linear_term = linear_term[0]
54 | 
55 |     if len(dense_input) > 0:
56 |         continuous_embedding_list = list(
57 |             map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
58 |                 dense_input))
59 |         continuous_embedding_list = list(
60 |             map(Reshape((1, embedding_size)), continuous_embedding_list))
61 |         embed_list += continuous_embedding_list
62 | 
63 |         dense_input_ = dense_input[0] if len(
64 |             dense_input) == 1 else Concatenate()(dense_input)
65 |         linear_dense_logit = Dense(
66 |             1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_)
67 |         linear_term = add([linear_dense_logit, linear_term])
68 | 
69 |     fm_input = Concatenate(axis=1)(embed_list)
70 | 
71 |     bi_out = BiInteractionPooling()(fm_input)
72 |     bi_out = Dropout(1 - keep_prob)(bi_out)
73 |     deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
74 |                    False, seed)(bi_out)
75 |     deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
76 | 
77 |     final_logit = linear_term  # TODO add bias term
78 | 
79 |     if len(hidden_size) > 0:
80 |         final_logit = add([final_logit, deep_logit])
81 | 
82 |     output = PredictionLayer(final_activation)(final_logit)
83 |     print(output)
84 |     model = Model(inputs=sparse_input + dense_input, outputs=output)
85 |     return model
86 | 


--------------------------------------------------------------------------------
/deepRS/models/afm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | 
 4 | Author:
 5 |     Weichen Shen,wcshen1994@163.com
 6 | 
 7 | Reference:
 8 |     [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.
 9 |     (https://arxiv.org/abs/1708.04617)
10 | 
11 | """
12 | 
13 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, add
14 | from tensorflow.python.keras.models import Model
15 | from tensorflow.python.keras.regularizers import l2
16 | 
17 | from ..utils import get_input, get_share_embeddings
18 | from ..layers import PredictionLayer, AFMLayer, FM
19 | 
20 | 
21 | def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor=8,
22 |         l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, keep_prob=1.0, init_std=0.0001, seed=1024,
23 |         final_activation='sigmoid',):
24 |     """Instantiates the Attentonal Factorization Machine architecture.
25 | 
26 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
27 |     :param embedding_size: positive integer,sparse feature embedding_size
28 |     :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine**
29 |     :param attention_factor: positive integer,units in attention net
30 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
31 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
32 |     :param l2_reg_att: float. L2 regularizer strength applied to attention net
33 |     :param keep_prob: float in (0,1]. keep_prob after attention net
34 |     :param init_std: float,to use as the initialize std of embedding vector
35 |     :param seed: integer ,to use as random seed.
36 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
37 |     :return: A Keras model instance.
38 |     """
39 | 
40 |     if not isinstance(feature_dim_dict,
41 |                       dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
42 |         raise ValueError(
43 |             "feature_dim_dict must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}")
44 |     if not isinstance(feature_dim_dict["sparse"], dict):
45 |         raise ValueError("feature_dim_dict['sparse'] must be a dict,cur is", type(
46 |             feature_dim_dict['sparse']))
47 |     if not isinstance(feature_dim_dict["dense"], list):
48 |         raise ValueError("feature_dim_dict['dense'] must be a list,cur is", type(
49 |             feature_dim_dict['dense']))
50 | 
51 |     sparse_input, dense_input = get_input(feature_dim_dict, None)
52 |     sparse_embedding, linear_embedding, = get_share_embeddings(
53 |         feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear)
54 | 
55 |     embed_list = [sparse_embedding[i](sparse_input[i])
56 |                   for i in range(len(sparse_input))]
57 |     linear_term = [linear_embedding[i](sparse_input[i])
58 |                    for i in range(len(sparse_input))]
59 |     if len(linear_term) > 1:
60 |         linear_term = add(linear_term)
61 |     elif len(linear_term) == 1:
62 |         linear_term = linear_term[0]
63 | 
64 |     if len(dense_input) > 0:
65 |         continuous_embedding_list = list(
66 |             map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
67 |                 dense_input))
68 |         continuous_embedding_list = list(
69 |             map(Reshape((1, embedding_size)), continuous_embedding_list))
70 |         embed_list += continuous_embedding_list
71 | 
72 |         dense_input_ = dense_input[0] if len(
73 |             dense_input) == 1 else Concatenate()(dense_input)
74 |         linear_dense_logit = Dense(
75 |             1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_)
76 |         linear_term = add([linear_dense_logit, linear_term])
77 | 
78 |     fm_input = Concatenate(axis=1)(embed_list)
79 |     if use_attention:
80 |         fm_out = AFMLayer(attention_factor, l2_reg_att,
81 |                           keep_prob, seed)(embed_list)
82 |     else:
83 |         fm_out = FM()(fm_input)
84 | 
85 |     final_logit = add([linear_term, fm_out])
86 |     output = PredictionLayer(final_activation)(final_logit)
87 |     model = Model(inputs=sparse_input + dense_input, outputs=output)
88 |     return model
89 | 


--------------------------------------------------------------------------------
/deepRS/models/pnn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen,wcshen1994@163.com
 5 | 
 6 | Reference:
 7 |     [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf)
 8 | """
 9 | 
10 | from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Reshape, Flatten
11 | from tensorflow.python.keras.models import Model
12 | from tensorflow.python.keras.initializers import RandomNormal
13 | from tensorflow.python.keras.regularizers import l2
14 | 
15 | 
16 | from ..layers import PredictionLayer, MLP, InnerProductLayer, OutterProductLayer
17 | from ..utils import get_input
18 | 
19 | 
20 | def PNN(feature_dim_dict, embedding_size=8, hidden_size=(128, 128), l2_reg_embedding=1e-5, l2_reg_deep=0,
21 |         init_std=0.0001, seed=1024, keep_prob=1, activation='relu',
22 |         final_activation='sigmoid', use_inner=True, use_outter=False, kernel_type='mat', ):
23 |     """Instantiates the Product-based Neural Network architecture.
24 | 
25 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
26 |     :param embedding_size: positive integer,sparse feature embedding_size
27 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
28 |     :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector
29 |     :param l2_reg_deep: float. L2 regularizer strength applied to deep net
30 |     :param init_std: float,to use as the initialize std of embedding vector
31 |     :param seed: integer ,to use as random seed.
32 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
33 |     :param activation: Activation function to use in deep net
34 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
35 |     :param use_inner: bool,whether use inner-product or not.
36 |     :param use_outter: bool,whether use outter-product or not.
37 |     :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'``
38 |     :return: A Keras model instance.
39 |     """
40 |     if not isinstance(feature_dim_dict,
41 |                       dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
42 |         raise ValueError(
43 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
44 |     if kernel_type not in ['mat', 'vec', 'num']:
45 |         raise ValueError("kernel_type must be mat,vec or num")
46 |     sparse_input, dense_input = get_input(feature_dim_dict, None)
47 |     sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
48 |                                   embeddings_initializer=RandomNormal(
49 |         mean=0.0, stddev=init_std, seed=seed),
50 |         embeddings_regularizer=l2(
51 |         l2_reg_embedding),
52 |         name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
53 |         enumerate(feature_dim_dict["sparse"])]
54 | 
55 |     embed_list = [sparse_embedding[i](sparse_input[i])
56 |                   for i in range(len(feature_dim_dict["sparse"]))]
57 | 
58 |     if len(dense_input) > 0:
59 |         continuous_embedding_list = list(
60 |             map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
61 |                 dense_input))
62 |         continuous_embedding_list = list(
63 |             map(Reshape((1, embedding_size)), continuous_embedding_list))
64 |         embed_list += continuous_embedding_list
65 | 
66 |     inner_product = Flatten()(InnerProductLayer()(embed_list))
67 |     outter_product = OutterProductLayer(kernel_type)(embed_list)
68 | 
69 |     # ipnn deep input
70 |     linear_signal = Reshape(
71 |         [len(embed_list)*embedding_size])(Concatenate()(embed_list))
72 | 
73 |     if use_inner and use_outter:
74 |         deep_input = Concatenate()(
75 |             [linear_signal, inner_product, outter_product])
76 |     elif use_inner:
77 |         deep_input = Concatenate()([linear_signal, inner_product])
78 |     elif use_outter:
79 |         deep_input = Concatenate()([linear_signal, outter_product])
80 |     else:
81 |         deep_input = linear_signal
82 | 
83 |     deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
84 |                    False, seed)(deep_input)
85 |     deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
86 |     final_logit = deep_logit
87 |     output = PredictionLayer(final_activation)(final_logit)
88 |     model = Model(inputs=sparse_input + dense_input,
89 |                   outputs=output)
90 |     return model
91 | 


--------------------------------------------------------------------------------
/deepRS/models/deepfm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen,wcshen1994@163.com
 5 | 
 6 | Reference:
 7 |     [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
 8 | 
 9 | """
10 | 
11 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, Flatten, add
12 | from tensorflow.python.keras.models import Model
13 | from tensorflow.python.keras.regularizers import l2
14 | from ..utils import get_input, get_share_embeddings
15 | from ..layers import PredictionLayer, MLP, FM
16 | 
17 | 
18 | def DeepFM(feature_dim_dict, embedding_size=8,
19 |            use_fm=True, hidden_size=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_deep=0,
20 |            init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid', use_bn=False):
21 |     """Instantiates the DeepFM Network architecture.
22 | 
23 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
24 |     :param embedding_size: positive integer,sparse feature embedding_size
25 |     :param use_fm: bool,use FM part or not
26 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
27 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
28 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
29 |     :param l2_reg_deep: float. L2 regularizer strength applied to deep net
30 |     :param init_std: float,to use as the initialize std of embedding vector
31 |     :param seed: integer ,to use as random seed.
32 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
33 |     :param activation: Activation function to use in deep net
34 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
35 |     :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net
36 |     :return: A Keras model instance.
37 |     """
38 |     if not isinstance(feature_dim_dict,
39 |                       dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
40 |         raise ValueError(
41 |             "feature_dim_dict must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
42 |     if not isinstance(feature_dim_dict["sparse"], dict):
43 |         raise ValueError("feature_dim_dict['sparse'] must be a dict,cur is", type(
44 |             feature_dim_dict['sparse']))
45 |     if not isinstance(feature_dim_dict["dense"], list):
46 |         raise ValueError("feature_dim_dict['dense'] must be a list,cur is", type(
47 |             feature_dim_dict['dense']))
48 | 
49 |     sparse_input, dense_input = get_input(feature_dim_dict, None)
50 |     sparse_embedding, linear_embedding, = get_share_embeddings(
51 |         feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear)
52 | 
53 |     embed_list = [sparse_embedding[i](sparse_input[i])
54 |                   for i in range(len(sparse_input))]
55 |     linear_term = [linear_embedding[i](sparse_input[i])
56 |                    for i in range(len(sparse_input))]
57 |     if len(linear_term) > 1:
58 |         linear_term = add(linear_term)
59 |     elif len(linear_term) == 1:
60 |         linear_term = linear_term[0]
61 | 
62 |     if len(dense_input) > 0:
63 |         continuous_embedding_list = list(
64 |             map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
65 |                 dense_input))
66 |         continuous_embedding_list = list(
67 |             map(Reshape((1, embedding_size)), continuous_embedding_list))
68 |         embed_list += continuous_embedding_list
69 | 
70 |         dense_input_ = dense_input[0] if len(
71 |             dense_input) == 1 else Concatenate()(dense_input)
72 |         linear_dense_logit = Dense(
73 |             1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_)
74 |         linear_term = add([linear_dense_logit, linear_term])
75 | 
76 |     fm_input = Concatenate(axis=1)(embed_list)
77 |     deep_input = Flatten()(fm_input)
78 |     fm_out = FM()(fm_input)
79 |     deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
80 |                    use_bn, seed)(deep_input)
81 |     deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
82 | 
83 |     if len(hidden_size) == 0 and use_fm == False:  # only linear
84 |         final_logit = linear_term
85 |     elif len(hidden_size) == 0 and use_fm == True:  # linear + FM
86 |         final_logit = add([linear_term, fm_out])
87 |     elif len(hidden_size) > 0 and use_fm == False:  # linear +　Deep
88 |         final_logit = add([linear_term, deep_logit])
89 |     elif len(hidden_size) > 0 and use_fm == True:  # linear + FM + Deep
90 |         final_logit = add([linear_term, fm_out, deep_logit])
91 |     else:
92 |         raise NotImplementedError
93 | 
94 |     output = PredictionLayer(final_activation)(final_logit)
95 |     model = Model(inputs=sparse_input + dense_input, outputs=output)
96 |     return model
97 | 


--------------------------------------------------------------------------------
/deepRS/models/xdeepfm.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | """
 3 | Author:
 4 |     Weichen Shen,wcshen1994@163.com
 5 | 
 6 | Reference:
 7 |     [1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf)
 8 | """
 9 | from tensorflow.python.keras.layers import Dense, Concatenate, Flatten, add, Reshape
10 | from tensorflow.python.keras.models import Model
11 | from tensorflow.python.keras.regularizers import l2
12 | from deepctr.utils import get_input, get_share_embeddings
13 | from deepctr.layers import PredictionLayer, MLP, CIN
14 | 
15 | 
16 | def xDeepFM(feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_deep=0, init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid', use_bn=False):
17 |     """Instantiates the xDeepFM architecture.
18 | 
19 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
20 |     :param embedding_size: positive integer,sparse feature embedding_size
21 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
22 |     :param cin_layer_size: list,list of positive integer or empty list, the feature maps  in each hidden layer of Compressed Interaction Network
23 |     :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit
24 |     :param cin_activation: activation function used on feature maps
25 |     :param l2_reg_linear: float. L2 regularizer strength applied to linear part
26 |     :param l2_reg_embedding: L2 regularizer strength applied to embedding vector
27 |     :param l2_reg_deep: L2 regularizer strength applied to deep net
28 |     :param init_std: float,to use as the initialize std of embedding vector
29 |     :param seed: integer ,to use as random seed.
30 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
31 |     :param activation: Activation function to use in deep net
32 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
33 |     :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net
34 |     :return: A Keras model instance.
35 |     """
36 |     if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
37 |         raise ValueError(
38 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
39 |     sparse_input, dense_input = get_input(feature_dim_dict, None)
40 |     sparse_embedding, linear_embedding, = get_share_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding,
41 |                                                                l2_reg_linear)
42 | 
43 |     embed_list = [sparse_embedding[i](sparse_input[i])
44 |                   for i in range(len(sparse_input))]
45 |     linear_term = [linear_embedding[i](sparse_input[i])
46 |                    for i in range(len(sparse_input))]
47 |     if len(linear_term) > 1:
48 |         linear_term = add(linear_term)
49 |     elif len(linear_term) == 1:
50 |         linear_term = linear_term[0]
51 | 
52 |     if len(dense_input) > 0:
53 |         continuous_embedding_list = list(
54 |             map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ),
55 |                 dense_input))
56 |         continuous_embedding_list = list(
57 |             map(Reshape((1, embedding_size)), continuous_embedding_list))
58 |         embed_list += continuous_embedding_list
59 | 
60 |         dense_input_ = dense_input[0] if len(
61 |             dense_input) == 1 else Concatenate()(dense_input)
62 |         linear_dense_logit = Dense(
63 |             1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_)
64 |         linear_term = add([linear_dense_logit, linear_term])
65 | 
66 |     linear_logit = linear_term
67 | 
68 |     fm_input = Concatenate(axis=1)(embed_list) if len(
69 |         embed_list) > 1 else embed_list[0]
70 | 
71 |     if len(cin_layer_size) > 0:
72 |         exFM_out = CIN(cin_layer_size, cin_activation,
73 |                        cin_split_half, seed)(fm_input)
74 |         exFM_logit = Dense(1, activation=None,)(exFM_out)
75 | 
76 |     deep_input = Flatten()(fm_input)
77 |     deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
78 |                    use_bn, seed)(deep_input)
79 |     deep_logit = Dense(1, use_bias=False, activation=None)(deep_out)
80 | 
81 |     if len(hidden_size) == 0 and len(cin_layer_size) == 0:  # only linear
82 |         final_logit = linear_logit
83 |     elif len(hidden_size) == 0 and len(cin_layer_size) > 0:  # linear + CIN
84 |         final_logit = add([linear_logit, exFM_logit])
85 |     elif len(hidden_size) > 0 and len(cin_layer_size) == 0:  # linear +　Deep
86 |         final_logit = add([linear_logit, deep_logit])
87 |     elif len(hidden_size) > 0 and len(cin_layer_size) > 0:  # linear + CIN + Deep
88 |         final_logit = add([linear_logit, deep_logit, exFM_logit])
89 |     else:
90 |         raise NotImplementedError
91 | 
92 |     output = PredictionLayer(final_activation)(final_logit)
93 |     model = Model(inputs=sparse_input + dense_input, outputs=output)
94 |     return model
95 | 


--------------------------------------------------------------------------------
/deepRS/models/autoint.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """
  3 | 
  4 | Author:
  5 |     Weichen Shen,wcshen1994@163.com
  6 | 
  7 | Reference:
  8 |     [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921)
  9 | 
 10 | """
 11 | 
 12 | from tensorflow.python.keras.layers import Dense, Embedding, Concatenate
 13 | from tensorflow.python.keras.models import Model
 14 | from tensorflow.python.keras.initializers import RandomNormal
 15 | from tensorflow.python.keras.regularizers import l2
 16 | import tensorflow as tf
 17 | 
 18 | from ..utils import get_input
 19 | from ..layers import PredictionLayer, MLP, InteractingLayer
 20 | 
 21 | 
 22 | def AutoInt(feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, hidden_size=(256, 256), activation='relu',
 23 |             l2_reg_deep=0, l2_reg_embedding=1e-5, use_bn=False, keep_prob=1.0, init_std=0.0001, seed=1024,
 24 |             final_activation='sigmoid',):
 25 |     """Instantiates the AutoInt Network architecture.
 26 | 
 27 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
 28 |     :param embedding_size: positive integer,sparse feature embedding_size
 29 |     :param att_layer_num: int.The InteractingLayer number to be used.
 30 |     :param att_embedding_size: int.The embedding size in multi-head self-attention network.
 31 |     :param att_head_num: int.The head number in multi-head  self-attention network.
 32 |     :param att_res: bool.Whether or not use standard residual connections before output.
 33 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
 34 |     :param activation: Activation function to use in deep net
 35 |     :param l2_reg_deep: float. L2 regularizer strength applied to deep net
 36 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
 37 |     :param use_bn:  bool. Whether use BatchNormalization before activation or not.in deep net
 38 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
 39 |     :param init_std: float,to use as the initialize std of embedding vector
 40 |     :param seed: integer ,to use as random seed.
 41 |     :param final_activation: output activation,usually ``'sigmoid'`` or ``'linear'``
 42 |     :return: A Keras model instance.
 43 |     """
 44 | 
 45 |     if len(hidden_size) <= 0 and att_layer_num <= 0:
 46 |         raise ValueError("Either hidden_layer or att_layer_num must > 0")
 47 |     if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
 48 |         raise ValueError(
 49 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
 50 | 
 51 |     sparse_input, dense_input = get_input(feature_dim_dict, None,)
 52 |     sparse_embedding = get_embeddings(
 53 |         feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding)
 54 |     embed_list = [sparse_embedding[i](sparse_input[i])
 55 |                   for i in range(len(sparse_input))]
 56 | 
 57 |     att_input = Concatenate(axis=1)(embed_list) if len(
 58 |         embed_list) > 1 else embed_list[0]
 59 | 
 60 |     for i in range(att_layer_num):
 61 |         att_input = InteractingLayer(
 62 |             att_embedding_size, att_head_num, att_res)(att_input)
 63 |     att_output = tf.keras.layers.Flatten()(att_input)
 64 | 
 65 |     deep_input = tf.keras.layers.Flatten()(Concatenate()(embed_list)
 66 |                                            if len(embed_list) > 1 else embed_list[0])
 67 |     if len(dense_input) > 0:
 68 |         if len(dense_input) == 1:
 69 |             continuous_list = dense_input[0]
 70 |         else:
 71 |             continuous_list = Concatenate()(dense_input)
 72 | 
 73 |         deep_input = Concatenate()([deep_input, continuous_list])
 74 | 
 75 |     if len(hidden_size) > 0 and att_layer_num > 0:  # Deep & Interacting Layer
 76 |         deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
 77 |                        use_bn, seed)(deep_input)
 78 |         stack_out = Concatenate()([att_output, deep_out])
 79 |         final_logit = Dense(1, use_bias=False, activation=None)(stack_out)
 80 |     elif len(hidden_size) > 0:  # Only Deep
 81 |         deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
 82 |                        use_bn, seed)(deep_input)
 83 |         final_logit = Dense(1, use_bias=False, activation=None)(deep_out)
 84 |     elif att_layer_num > 0:  # Only Interacting Layer
 85 |         final_logit = Dense(1, use_bias=False, activation=None)(att_output)
 86 |     else:  # Error
 87 |         raise NotImplementedError
 88 | 
 89 |     output = PredictionLayer(final_activation)(final_logit)
 90 |     model = Model(inputs=sparse_input + dense_input, outputs=output)
 91 | 
 92 |     return model
 93 | 
 94 | 
 95 | def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V):
 96 |     sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
 97 |                                   embeddings_initializer=RandomNormal(
 98 |         mean=0.0, stddev=init_std, seed=seed),
 99 |         embeddings_regularizer=l2(l2_rev_V),
100 |         name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
101 |         enumerate(feature_dim_dict["sparse"])]
102 | 
103 |     return sparse_embedding
104 | 


--------------------------------------------------------------------------------
/deepRS/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | from threading import Thread
  4 | 
  5 | import requests
  6 | from tensorflow.python.keras.initializers import RandomNormal
  7 | from tensorflow.python.keras.layers import Embedding, Input
  8 | 
  9 | from .activations import *
 10 | from .layers import *
 11 | from .sequence import *
 12 | 
 13 | try:
 14 |     from packaging.version import parse
 15 | except ImportError:
 16 |     from pip._vendor.packaging.version import parse
 17 | 
 18 | custom_objects = {'InnerProductLayer': InnerProductLayer,
 19 |                   'OutterProductLayer': OutterProductLayer,
 20 |                   'MLP': MLP,
 21 |                   'PredictionLayer': PredictionLayer,
 22 |                   'FM': FM,
 23 |                   'AFMLayer': AFMLayer,
 24 |                   'CrossNet': CrossNet,
 25 |                   'BiInteractionPooling': BiInteractionPooling,
 26 |                   'LocalActivationUnit': LocalActivationUnit,
 27 |                   'Dice': Dice,
 28 |                   'SequencePoolingLayer': SequencePoolingLayer,
 29 |                   'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer,
 30 |                   'CIN': CIN,
 31 |                   'InteractingLayer': InteractingLayer}
 32 | 
 33 | 
 34 | def get_input(feature_dim_dict, bias_feature_dim_dict=None):
 35 |     sparse_input = [Input(shape=(1,), name='sparse_' + str(i) + '-' + feat) for i, feat in
 36 |                     enumerate(feature_dim_dict["sparse"])]
 37 |     dense_input = [Input(shape=(1,), name='dense_' + str(i) + '-' + feat) for i, feat in
 38 |                    enumerate(feature_dim_dict["dense"])]
 39 |     if bias_feature_dim_dict is None:
 40 |         return sparse_input, dense_input
 41 |     else:
 42 |         bias_sparse_input = [Input(shape=(1,), name='bias_sparse_' + str(i) + '-' + feat) for i, feat in
 43 |                              enumerate(bias_feature_dim_dict["sparse"])]
 44 |         bias_dense_input = [Input(shape=(1,), name='bias_dense_' + str(i) + '-' + feat) for i, feat in
 45 |                             enumerate(bias_feature_dim_dict["dense"])]
 46 |         return sparse_input, dense_input, bias_sparse_input, bias_dense_input
 47 | 
 48 | 
 49 | def get_share_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w):
 50 |     sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
 51 |                                   embeddings_initializer=RandomNormal(
 52 |                                       mean=0.0, stddev=init_std, seed=seed),
 53 |                                   embeddings_regularizer=l2(l2_rev_V),
 54 |                                   name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
 55 |                         enumerate(feature_dim_dict["sparse"])]
 56 |     linear_embedding = [Embedding(feature_dim_dict["sparse"][feat], 1,
 57 |                                   embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std,
 58 |                                                                       seed=seed), embeddings_regularizer=l2(l2_reg_w),
 59 |                                   name='linear_emb_' + str(i) + '-' + feat) for
 60 |                         i, feat in enumerate(feature_dim_dict["sparse"])]
 61 | 
 62 |     return sparse_embedding, linear_embedding
 63 | 
 64 | 
 65 | def get_sep_embeddings(deep_feature_dim_dict, wide_feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w):
 66 |     sparse_embedding = [Embedding(deep_feature_dim_dict["sparse"][feat], embedding_size,
 67 |                                   embeddings_initializer=RandomNormal(
 68 |                                       mean=0.0, stddev=init_std, seed=seed),
 69 |                                   embeddings_regularizer=l2(l2_rev_V),
 70 |                                   name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
 71 |                         enumerate(deep_feature_dim_dict["sparse"])]
 72 |     linear_embedding = [Embedding(wide_feature_dim_dict["sparse"][feat], 1,
 73 |                                   embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std,
 74 |                                                                       seed=seed), embeddings_regularizer=l2(l2_reg_w),
 75 |                                   name='linear_emb_' + str(i) + '-' + feat) for
 76 |                         i, feat in enumerate(wide_feature_dim_dict["sparse"])]
 77 | 
 78 |     return sparse_embedding, linear_embedding
 79 | 
 80 | 
 81 | def check_version(version):
 82 |     """Return version of package on pypi.python.org using json."""
 83 | 
 84 |     def check(version):
 85 |         try:
 86 |             url_pattern = 'https://pypi.python.org/pypi/deepctr/json'
 87 |             req = requests.get(url_pattern)
 88 |             latest_version = parse('0')
 89 |             version = parse(version)
 90 |             if req.status_code == requests.codes.ok:
 91 |                 j = json.loads(req.text.encode('utf-8'))
 92 |                 releases = j.get('releases', [])
 93 |                 for release in releases:
 94 |                     ver = parse(release)
 95 |                     if not ver.is_prerelease:
 96 |                         latest_version = max(latest_version, ver)
 97 |                 if latest_version > version:
 98 |                     logging.warning('\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format(
 99 |                         latest_version, version))
100 |         except Exception:
101 |             return
102 |     Thread(target=check, args=(version,)).start()
103 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | sys.path.insert(0, os.path.abspath('../../'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'DeepCTR'
 23 | copyright = '2018, Weichen Shen'
 24 | author = 'Weichen Shen'
 25 | 
 26 | # The short X.Y version
 27 | version = ''
 28 | # The full version, including alpha/beta/rc tags
 29 | release = '0.2.1'
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'sphinx.ext.autodoc',
 43 |     'sphinx.ext.mathjax',
 44 |     'sphinx.ext.ifconfig',
 45 |     'sphinx.ext.viewcode',
 46 |     'sphinx.ext.githubpages',
 47 | ]
 48 | 
 49 | # Add any paths that contain templates here, relative to this directory.
 50 | templates_path = ['_templates']
 51 | 
 52 | # The suffix(es) of source filenames.
 53 | # You can specify multiple suffix as a list of string:
 54 | #
 55 | source_suffix = ['.rst', '.md']
 56 | #source_suffix = '.rst'
 57 | 
 58 | # The master toctree document.
 59 | master_doc = 'index'
 60 | 
 61 | # The language for content autogenerated by Sphinx. Refer to documentation
 62 | # for a list of supported languages.
 63 | #
 64 | # This is also used if you do content translation via gettext catalogs.
 65 | # Usually you set "language" from the command line for these cases.
 66 | language = None
 67 | 
 68 | # List of patterns, relative to source directory, that match files and
 69 | # directories to ignore when looking for source files.
 70 | # This pattern also affects html_static_path and html_extra_path .
 71 | exclude_patterns = []
 72 | 
 73 | # The name of the Pygments (syntax highlighting) style to use.
 74 | pygments_style = 'sphinx'
 75 | 
 76 | 
 77 | # -- Options for HTML output -------------------------------------------------
 78 | 
 79 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 80 | # a list of builtin themes.
 81 | #
 82 | html_theme = 'alabaster'
 83 | 
 84 | # Theme options are theme-specific and customize the look and feel of a theme
 85 | # further.  For a list of options available for each theme, see the
 86 | # documentation.
 87 | #
 88 | # html_theme_options = {}
 89 | 
 90 | # Add any paths that contain custom static files (such as style sheets) here,
 91 | # relative to this directory. They are copied after the builtin static files,
 92 | # so a file named "default.css" will overwrite the builtin "default.css".
 93 | html_static_path = ['_static']
 94 | 
 95 | # Custom sidebar templates, must be a dictionary that maps document names
 96 | # to template names.
 97 | #
 98 | # The default sidebars (for documents that don't match any pattern) are
 99 | # defined by theme itself.  Builtin themes are using these templates by
100 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
101 | # 'searchbox.html']``.
102 | #
103 | # html_sidebars = {}
104 | 
105 | 
106 | # -- Options for HTMLHelp output ---------------------------------------------
107 | 
108 | # Output file base name for HTML help builder.
109 | htmlhelp_basename = 'DeepCTRdoc'
110 | 
111 | 
112 | # -- Options for LaTeX output ------------------------------------------------
113 | 
114 | latex_elements = {
115 |     # The paper size ('letterpaper' or 'a4paper').
116 |     #
117 |     # 'papersize': 'letterpaper',
118 | 
119 |     # The font size ('10pt', '11pt' or '12pt').
120 |     #
121 |     # 'pointsize': '10pt',
122 | 
123 |     # Additional stuff for the LaTeX preamble.
124 |     #
125 |     # 'preamble': '',
126 | 
127 |     # Latex figure (float) alignment
128 |     #
129 |     # 'figure_align': 'htbp',
130 | }
131 | 
132 | # Grouping the document tree into LaTeX files. List of tuples
133 | # (source start file, target name, title,
134 | #  author, documentclass [howto, manual, or own class]).
135 | latex_documents = [
136 |     (master_doc, 'DeepCTR.tex', 'DeepCTR Documentation',
137 |      'Weichen Shen', 'manual'),
138 | ]
139 | 
140 | 
141 | # -- Options for manual page output ------------------------------------------
142 | 
143 | # One entry per manual page. List of tuples
144 | # (source start file, name, description, authors, manual section).
145 | man_pages = [
146 |     (master_doc, 'deepctr', 'DeepCTR Documentation',
147 |      [author], 1)
148 | ]
149 | 
150 | 
151 | # -- Options for Texinfo output ----------------------------------------------
152 | 
153 | # Grouping the document tree into Texinfo files. List of tuples
154 | # (source start file, target name, title, author,
155 | #  dir menu entry, description, category)
156 | texinfo_documents = [
157 |     (master_doc, 'DeepCTR', 'DeepCTR Documentation',
158 |      author, 'DeepCTR', 'One line description of project.',
159 |      'Miscellaneous'),
160 | ]
161 | 
162 | 
163 | # -- Extension configuration -------------------------------------------------
164 | todo_include_todos = False
165 | html_theme = 'sphinx_rtd_theme'
166 | 
167 | source_parsers = {
168 |     '.md': 'recommonmark.parser.CommonMarkParser',
169 | }
170 | 


--------------------------------------------------------------------------------
/deepRS/models/dcn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """
  3 | Author:
  4 |     Weichen Shen,wcshen1994@163.com
  5 | 
  6 | Reference:
  7 |     [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123)
  8 | """
  9 | from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Flatten
 10 | from tensorflow.python.keras.models import Model
 11 | from tensorflow.python.keras.initializers import RandomNormal
 12 | from tensorflow.python.keras.regularizers import l2
 13 | 
 14 | from ..utils import get_input
 15 | from ..layers import CrossNet, PredictionLayer, MLP
 16 | 
 17 | 
 18 | def DCN(feature_dim_dict, embedding_size='auto',
 19 |         cross_num=2, hidden_size=(128, 128, ), l2_reg_embedding=1e-5, l2_reg_cross=1e-5, l2_reg_deep=0,
 20 |         init_std=0.0001, seed=1024, keep_prob=1, use_bn=False, activation='relu', final_activation='sigmoid',
 21 |         ):
 22 |     """Instantiates the Deep&Cross Network architecture.
 23 | 
 24 |     :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
 25 |     :param embedding_size: positive int or str,sparse feature embedding_size.If set to "auto",it will be 6*pow(cardinality,025)
 26 |     :param cross_num: positive integet,cross layer number
 27 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
 28 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
 29 |     :param l2_reg_cross: float. L2 regularizer strength applied to cross net
 30 |     :param l2_reg_deep: float. L2 regularizer strength applied to deep net
 31 |     :param init_std: float,to use as the initialize std of embedding vector
 32 |     :param seed: integer ,to use as random seed.
 33 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
 34 |     :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net
 35 |     :param activation: Activation function to use in deep net
 36 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
 37 |     :return: A Keras model instance.
 38 | 
 39 |     """
 40 |     if len(hidden_size) == 0 and cross_num == 0:
 41 |         raise ValueError("Either hidden_layer or cross layer must > 0")
 42 |     if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
 43 |         raise ValueError(
 44 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
 45 | 
 46 |     sparse_input, dense_input = get_input(feature_dim_dict, None,)
 47 |     sparse_embedding = get_embeddings(
 48 |         feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding)
 49 |     embed_list = [sparse_embedding[i](sparse_input[i])
 50 |                   for i in range(len(sparse_input))]
 51 | 
 52 |     deep_input = Flatten()(Concatenate()(embed_list)
 53 |                            if len(embed_list) > 1 else embed_list[0])
 54 |     if len(dense_input) > 0:
 55 |         if len(dense_input) == 1:
 56 |             continuous_list = dense_input[0]
 57 |         else:
 58 |             continuous_list = Concatenate()(dense_input)
 59 | 
 60 |         deep_input = Concatenate()([deep_input, continuous_list])
 61 | 
 62 |     if len(hidden_size) > 0 and cross_num > 0:  # Deep & Cross
 63 |         deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
 64 |                        use_bn, seed)(deep_input)
 65 |         cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(deep_input)
 66 |         stack_out = Concatenate()([cross_out, deep_out])
 67 |         final_logit = Dense(1, use_bias=False, activation=None)(stack_out)
 68 |     elif len(hidden_size) > 0:  # Only Deep
 69 |         deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob,
 70 |                        use_bn, seed)(deep_input)
 71 |         final_logit = Dense(1, use_bias=False, activation=None)(deep_out)
 72 |     elif cross_num > 0:  # Only Cross
 73 |         cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(deep_input)
 74 |         final_logit = Dense(1, use_bias=False, activation=None)(cross_out)
 75 |     else:  # Error
 76 |         raise NotImplementedError
 77 | 
 78 |     # Activation(self.final_activation)(final_logit)
 79 |     output = PredictionLayer(final_activation)(final_logit)
 80 |     model = Model(inputs=sparse_input + dense_input, outputs=output)
 81 | 
 82 |     return model
 83 | 
 84 | 
 85 | def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V):
 86 |     if embedding_size == "auto":
 87 |         sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], 6*int(pow(feature_dim_dict["sparse"][feat], 0.25)),
 88 |                                       embeddings_initializer=RandomNormal(
 89 |                                           mean=0.0, stddev=init_std, seed=seed),
 90 |                                       embeddings_regularizer=l2(l2_rev_V), name='sparse_emb_' + str(i) + '-'+feat) for i, feat in
 91 |                             enumerate(feature_dim_dict["sparse"])]
 92 | 
 93 |         print("Using auto embedding size,the connected vector dimension is", sum(
 94 |             [6*int(pow(feature_dim_dict["sparse"][k], 0.25)) for k, v in feature_dim_dict["sparse"].items()]))
 95 |     else:
 96 |         sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size,
 97 |                                       embeddings_initializer=RandomNormal(
 98 |                                           mean=0.0, stddev=init_std, seed=seed),
 99 |                                       embeddings_regularizer=l2(l2_rev_V),
100 |                                       name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
101 |                             enumerate(feature_dim_dict["sparse"])]
102 | 
103 |     return sparse_embedding
104 | 


--------------------------------------------------------------------------------
/tests/layers_test.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from tensorflow.python.keras.layers import PReLU
  3 | from tensorflow.python.keras.utils import CustomObjectScope
  4 | 
  5 | from deepctr import layers
  6 | from deepctr.activations import Dice
  7 | 
  8 | from .utils import layer_test
  9 | 
 10 | BATCH_SIZE = 4
 11 | FIELD_SIZE = 3
 12 | EMBEDDING_SIZE = 8
 13 | SEQ_LENGTH = 10
 14 | 
 15 | 
 16 | @pytest.mark.parametrize(
 17 | 
 18 |     'layer_num,l2_reg',
 19 | 
 20 |     [(layer_num, l2_reg)
 21 | 
 22 |      for layer_num in [0, 1, 2, ]
 23 | 
 24 |      for l2_reg in [0, 1, ]
 25 |      ]
 26 | 
 27 | )
 28 | def test_CrossNet(layer_num, l2_reg,):
 29 |     with CustomObjectScope({'CrossNet': layers.CrossNet}):
 30 |         layer_test(layers.CrossNet, kwargs={
 31 |                    'layer_num': layer_num, 'l2_reg': l2_reg}, input_shape=(2, 3))
 32 | 
 33 | 
 34 | def test_CrossNet_invalid():
 35 |     with pytest.raises(ValueError):
 36 |         with CustomObjectScope({'CrossNet': layers.CrossNet}):
 37 |             layer_test(layers.CrossNet, kwargs={
 38 |                 'layer_num': 1, 'l2_reg': 0}, input_shape=(2, 3, 4))
 39 | 
 40 | 
 41 | @pytest.mark.parametrize(
 42 |     'hidden_size,activation',
 43 |     [(hidden_size, activation)
 44 |      for hidden_size in [(), (10,)]
 45 |      for activation in ['sigmoid', Dice, PReLU]
 46 |      ]
 47 | )
 48 | def test_LocalActivationUnit(hidden_size, activation):
 49 |     with CustomObjectScope({'LocalActivationUnit': layers.LocalActivationUnit}):
 50 |         layer_test(layers.LocalActivationUnit, kwargs={'hidden_size': hidden_size, 'activation': activation},
 51 |                    input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)])
 52 | 
 53 | 
 54 | @pytest.mark.parametrize(
 55 |     'reduce_sum',
 56 |     [reduce_sum
 57 |      for reduce_sum in [True, False]
 58 |      ]
 59 | )
 60 | def test_InnerProductLayer(reduce_sum):
 61 |     with CustomObjectScope({'InnerProductLayer': layers.InnerProductLayer}):
 62 |         layer_test(layers.InnerProductLayer, kwargs={
 63 |             'reduce_sum': reduce_sum}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE)
 64 | 
 65 | 
 66 | @pytest.mark.parametrize(
 67 |     'kernel_type',
 68 |     [kernel_type
 69 |      for kernel_type in ['mat', 'vec', 'num']
 70 |      ]
 71 | )
 72 | def test_OutterProductLayer(kernel_type):
 73 |     with CustomObjectScope({'OutterProductLayer': layers.OutterProductLayer}):
 74 |         layer_test(layers.OutterProductLayer, kwargs={
 75 |             'kernel_type': kernel_type}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE)
 76 | 
 77 | 
 78 | def test_BiInteractionPooling():
 79 |     with CustomObjectScope({'BiInteractionPooling': layers.BiInteractionPooling}):
 80 |         layer_test(layers.BiInteractionPooling, kwargs={},
 81 |                    input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
 82 | 
 83 | 
 84 | @pytest.mark.parametrize(
 85 |     'hidden_size,use_bn',
 86 |     [(hidden_size, use_bn)
 87 |      for hidden_size in [(), (10,)]
 88 |      for use_bn in [True, False]
 89 |      ]
 90 | )
 91 | def test_MLP(hidden_size, use_bn):
 92 |     with CustomObjectScope({'MLP': layers.MLP}):
 93 |         layer_test(layers.MLP, kwargs={'hidden_size': hidden_size, 'use_bn': use_bn}, input_shape=(
 94 |             BATCH_SIZE, EMBEDDING_SIZE))
 95 | 
 96 | 
 97 | @pytest.mark.parametrize(
 98 |     'activation,use_bias',
 99 |     [(activation, use_bias)
100 |      for activation in ['sigmoid', PReLU]
101 |      for use_bias in [True, False]
102 |      ]
103 | )
104 | def test_PredictionLayer(activation, use_bias):
105 |     with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}):
106 |         layer_test(layers.PredictionLayer, kwargs={'activation': activation, 'use_bias': use_bias
107 |                                                    }, input_shape=(BATCH_SIZE, 1))
108 | 
109 | 
110 | @pytest.mark.xfail(reason="dim size must be 1 except for the batch size dim")
111 | def test_test_PredictionLayer_invalid():
112 |     # with pytest.raises(ValueError):
113 |     with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}):
114 |         layer_test(layers.PredictionLayer, kwargs={'use_bias': use_bias
115 |                                                    }, input_shape=(BATCH_SIZE, 2, 1))
116 | 
117 | 
118 | def test_FM():
119 |     with CustomObjectScope({'FM': layers.FM}):
120 |         layer_test(layers.FM, kwargs={}, input_shape=(
121 |             BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
122 | 
123 | 
124 | def test_AFMLayer():
125 |     with CustomObjectScope({'AFMLayer': layers.AFMLayer}):
126 |         layer_test(layers.AFMLayer, kwargs={}, input_shape=[(
127 |             BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE)
128 | 
129 | 
130 | @pytest.mark.parametrize(
131 |     'layer_size,activation,split_half',
132 |     [(layer_size, activation, split_half)
133 |      for activation in ['linear', PReLU]
134 |      for split_half in [True, False]
135 |      for layer_size in [(10,), (10, 8)]
136 |      ]
137 | )
138 | def test_CIN(layer_size, activation, split_half):
139 |     with CustomObjectScope({'CIN': layers.CIN}):
140 |         layer_test(layers.CIN, kwargs={"layer_size": layer_size, "activation":
141 |                                        activation, "split_half": split_half}, input_shape=(
142 |             BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
143 | 
144 | 
145 | @pytest.mark.parametrize(
146 |     'layer_size',
147 |     [(), (3, 10)
148 |      ]
149 | )
150 | def test_test_CIN_invalid(layer_size):
151 |     with pytest.raises(ValueError):
152 |         with CustomObjectScope({'CIN': layers.CIN}):
153 |             layer_test(layers.CIN, kwargs={"layer_size": layer_size}, input_shape=(
154 |                 BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
155 | 
156 | 
157 | @pytest.mark.parametrize(
158 |     'head_num,use_res',
159 |     [(head_num, use_res,)
160 |      for head_num in [1, 2]
161 |      for use_res in [True, False]
162 |      ]
163 | )
164 | def test_InteractingLayer(head_num, use_res,):
165 |     with CustomObjectScope({'InteractingLayer': layers.InteractingLayer}):
166 |         layer_test(layers.InteractingLayer, kwargs={"head_num": head_num, "use_res":
167 |                                                     use_res, }, input_shape=(
168 |             BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE))
169 | 


--------------------------------------------------------------------------------
/deepRS/models/din.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """
  3 | Author:
  4 |     Weichen Shen,wcshen1994@163.com
  5 | 
  6 | Reference:
  7 |     [1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf)
  8 | """
  9 | 
 10 | from tensorflow.python.keras.layers import Input, Dense, Embedding, Concatenate, Reshape
 11 | from tensorflow.python.keras.models import Model
 12 | from tensorflow.python.keras.initializers import RandomNormal
 13 | from tensorflow.python.keras.regularizers import l2
 14 | 
 15 | from ..layers import MLP
 16 | from ..sequence import SequencePoolingLayer, AttentionSequencePoolingLayer
 17 | from ..activations import Dice
 18 | 
 19 | 
 20 | def get_input(feature_dim_dict, seq_feature_list, seq_max_len):
 21 |     sparse_input = {feat: Input(shape=(1,), name='sparse_' + str(i) + '-' + feat) for i, feat in
 22 |                     enumerate(feature_dim_dict["sparse"])}
 23 | 
 24 |     user_behavior_input = {feat: Input(shape=(seq_max_len,), name='seq_' + str(i) + '-' + feat) for i, feat in
 25 |                            enumerate(seq_feature_list)}
 26 | 
 27 |     user_behavior_length = Input(shape=(1,), name='seq_length')
 28 | 
 29 |     return sparse_input, user_behavior_input, user_behavior_length
 30 | 
 31 | 
 32 | def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16,
 33 |         use_din=True, use_bn=False, hidden_size=(200, 80), activation='relu', att_hidden_size=(80, 40), att_activation=Dice, att_weight_normalization=False,
 34 |         l2_reg_deep=0, l2_reg_embedding=1e-5, final_activation='sigmoid', keep_prob=1, init_std=0.0001, seed=1024, ):
 35 |     """Instantiates the Deep Interest Network architecture.
 36 | 
 37 |     :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]}
 38 |     :param seq_feature_list: list,to indicate  sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]``
 39 |     :param embedding_size: positive integer,sparse feature embedding_size.
 40 |     :param hist_len_max: positive int, to indicate the max length of seq input
 41 |     :param use_din: bool, whether use din pooling or not.If set to ``False``,use **sum pooling**
 42 |     :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
 43 |     :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net
 44 |     :param activation: Activation function to use in deep net
 45 |     :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net
 46 |     :param att_activation: Activation function to use in attention net
 47 |     :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit.
 48 |     :param l2_reg_deep: float. L2 regularizer strength applied to deep net
 49 |     :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
 50 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
 51 |     :param keep_prob: float in (0,1]. keep_prob used in deep net
 52 |     :param init_std: float,to use as the initialize std of embedding vector
 53 |     :param seed: integer ,to use as random seed.
 54 |     :return: A Keras model instance.
 55 | 
 56 |     """
 57 |     for feature_dim_dict in [feature_dim_dict]:
 58 |         if not isinstance(feature_dim_dict,
 59 |                           dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict:
 60 |             raise ValueError(
 61 |                 "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
 62 |     if len(feature_dim_dict['dense']) > 0:
 63 |         raise ValueError('Now DIN only support sparse input')
 64 |     sparse_input, user_behavior_input, user_behavior_length = get_input(
 65 |         feature_dim_dict, seq_feature_list, hist_len_max)
 66 |     sparse_embedding_dict = {feat: Embedding(feature_dim_dict["sparse"][feat], embedding_size,
 67 |                                              embeddings_initializer=RandomNormal(
 68 |                                                  mean=0.0, stddev=init_std, seed=seed),
 69 |                                              embeddings_regularizer=l2(
 70 |                                                  l2_reg_embedding),
 71 |                                              name='sparse_emb_' + str(i) + '-' + feat) for i, feat in
 72 |                              enumerate(feature_dim_dict["sparse"])}
 73 |     query_emb_list = [sparse_embedding_dict[feat](
 74 |         sparse_input[feat]) for feat in seq_feature_list]
 75 |     keys_emb_list = [sparse_embedding_dict[feat](
 76 |         user_behavior_input[feat]) for feat in seq_feature_list]
 77 |     deep_input_emb_list = [sparse_embedding_dict[feat](
 78 |         sparse_input[feat]) for feat in feature_dim_dict["sparse"]]
 79 | 
 80 |     query_emb = Concatenate()(query_emb_list) if len(
 81 |         query_emb_list) > 1 else query_emb_list[0]
 82 |     keys_emb = Concatenate()(keys_emb_list) if len(
 83 |         keys_emb_list) > 1 else keys_emb_list[0]
 84 |     deep_input_emb = Concatenate()(deep_input_emb_list) if len(
 85 |         deep_input_emb_list) > 1 else deep_input_emb_list[0]
 86 | 
 87 |     if use_din:
 88 |         hist = AttentionSequencePoolingLayer(att_hidden_size, att_activation, weight_normalization=att_weight_normalization)([
 89 |             query_emb, keys_emb, user_behavior_length])
 90 |     else:
 91 |         hist = SequencePoolingLayer(hist_len_max, 'sum')(
 92 |             [keys_emb, user_behavior_length])
 93 | 
 94 |     deep_input_emb = Concatenate()([deep_input_emb, hist])
 95 |     output = MLP(hidden_size, activation, l2_reg_deep,
 96 |                  keep_prob, use_bn, seed)(deep_input_emb)
 97 |     output = Dense(1, final_activation)(output)
 98 |     output = Reshape([1])(output)
 99 |     model_input_list = list(sparse_input.values(
100 |     ))+list(user_behavior_input.values()) + [user_behavior_length]
101 | 
102 |     model = Model(inputs=model_input_list, outputs=output)
103 |     return model
104 | 


--------------------------------------------------------------------------------
/deepRS/sequence.py:
--------------------------------------------------------------------------------
  1 | from tensorflow.python.keras.layers import Layer
  2 | from .layers import LocalActivationUnit
  3 | import tensorflow as tf
  4 | 
  5 | 
  6 | class SequencePoolingLayer(Layer):
  7 |     """The SequencePoolingLayer is used to apply pooling operation(sum,mean,max) on variable-length sequence feature/multi-value feature.
  8 | 
  9 |       Input shape
 10 |         - A list of two  tensor [seq_value,seq_len]
 11 | 
 12 |         - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size``
 13 | 
 14 |         - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence.
 15 | 
 16 |       Output shape
 17 |         - 3D tensor with shape: ``(batch_size, 1, embedding_size)``.
 18 | 
 19 |       Arguments
 20 |         - **seq_len_max**:Positive integer indicates that the max length of all the sequence feature,usually same as T.
 21 | 
 22 |         - **mode**:str.Pooling operation to be used,can be sum,mean or max.
 23 |     """
 24 | 
 25 |     def __init__(self, seq_len_max, mode='mean', **kwargs):
 26 | 
 27 |         if mode not in ['sum', 'mean', 'max']:
 28 |             raise ValueError("mode must be sum or mean")
 29 |         self.seq_len_max = seq_len_max
 30 |         self.mode = mode
 31 |         super(SequencePoolingLayer, self).__init__(**kwargs)
 32 | 
 33 |     def build(self, input_shape):
 34 |         super(SequencePoolingLayer, self).build(
 35 |             input_shape)  # Be sure to call this somewhere!
 36 | 
 37 |     def call(self, seq_value_len_list, **kwargs):
 38 |         uiseq_embed_list, user_behavior_length = seq_value_len_list
 39 |         embedding_size = uiseq_embed_list.shape[-1]
 40 |         mask = tf.sequence_mask(user_behavior_length,
 41 |                                 self.seq_len_max, dtype=tf.float32)
 42 | 
 43 |         mask = tf.transpose(mask, (0, 2, 1))
 44 | 
 45 |         mask = tf.tile(mask, [1, 1, embedding_size])
 46 |         uiseq_embed_list *= mask
 47 |         hist = uiseq_embed_list
 48 |         if self.mode == "max":
 49 |             return tf.reduce_max(hist, 1, keep_dims=True)
 50 | 
 51 |         hist = tf.reduce_sum(hist, 1, keep_dims=False)
 52 |         if self.mode == "mean":
 53 | 
 54 |             hist = tf.div(hist, user_behavior_length)
 55 |         hist = tf.expand_dims(hist, axis=1)
 56 |         return hist
 57 | 
 58 |     def compute_output_shape(self, input_shape):
 59 |         return (None, 1, input_shape[0][-1])
 60 | 
 61 |     def get_config(self,):
 62 |         config = {'seq_len_max': self.seq_len_max, 'mode': self.mode}
 63 |         base_config = super(SequencePoolingLayer, self).get_config()
 64 |         return dict(list(base_config.items()) + list(config.items()))
 65 | 
 66 | 
 67 | class AttentionSequencePoolingLayer(Layer):
 68 |     """The Attentional sequence pooling operation used in DIN.
 69 | 
 70 |       Input shape
 71 |         - A list of three tensor: [query,keys,keys_length]
 72 | 
 73 |         - query is a 3D tensor with shape:  ``(batch_size, 1, embedding_size)``
 74 | 
 75 |         - keys is a 3D tensor with shape:   ``(batch_size, T, embedding_size)``
 76 | 
 77 |         - keys_length is a 2D tensor with shape: ``(batch_size, 1)``
 78 | 
 79 |       Output shape
 80 |         - 3D tensor with shape: ``(batch_size, 1, embedding_size)``.
 81 | 
 82 |       Arguments
 83 |         - **hidden_size**:list of positive integer, the attention net layer number and units in each layer.
 84 | 
 85 |         - **activation**: Activation function to use in attention net.
 86 | 
 87 |         - **weight_normalization**: bool.Whether normalize the attention score of local activation unit.
 88 | 
 89 |       References
 90 |         - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf)
 91 |     """
 92 | 
 93 |     def __init__(self, hidden_size=(80, 40), activation='sigmoid', weight_normalization=False, **kwargs):
 94 | 
 95 |         self.hidden_size = hidden_size
 96 |         self.activation = activation
 97 |         self.weight_normalization = weight_normalization
 98 | 
 99 |         super(AttentionSequencePoolingLayer, self).__init__(**kwargs)
100 | 
101 |     def build(self, input_shape):
102 | 
103 |         if not isinstance(input_shape, list) or len(input_shape) != 3:
104 |             raise ValueError('A `AttentionSequencePoolingLayer` layer should be called '
105 |                              'on a list of 3 inputs')
106 | 
107 |         if len(input_shape[0]) != 3 or len(input_shape[1]) != 3 or len(input_shape[2]) != 2:
108 |             raise ValueError("Unexpected inputs dimensions,the 3 tensor dimensions are %d,%d and %d , expect to be 3,3 and 2" % (
109 |                 len(input_shape[0]), len(input_shape[1]), len(input_shape[2])))
110 | 
111 |         if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1 or input_shape[2][1] != 1:
112 |             raise ValueError('A `AttentionSequencePoolingLayer` layer requires '
113 |                              'inputs of a 3 inputs with shape (None,1,embedding_size),(None,T,embedding_size) and (None,1)'
114 |                              'Got different shapes: %s,%s and %s' % (input_shape))
115 |         super(AttentionSequencePoolingLayer, self).build(
116 |             input_shape)  # Be sure to call this somewhere!
117 | 
118 |     def call(self, inputs, **kwargs):
119 |         query_key_keylen_list = inputs
120 |         queries, keys, keys_length = query_key_keylen_list
121 |         hist_len = keys.get_shape()[1]
122 | 
123 |         attention_score = LocalActivationUnit(
124 |             self.hidden_size, self.activation, 0, 1, False, 1024,)([queries, keys])
125 | 
126 |         outputs = tf.transpose(attention_score, (0, 2, 1))
127 | 
128 |         key_masks = tf.sequence_mask(keys_length, hist_len)
129 | 
130 |         if self.weight_normalization:
131 |             paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
132 |         else:
133 |             paddings = tf.zeros_like(outputs)
134 | 
135 |         outputs = tf.where(key_masks, outputs, paddings)
136 | 
137 |         if self.weight_normalization:
138 |             outputs = tf.nn.softmax(outputs)
139 | 
140 |         outputs = tf.matmul(outputs, keys)
141 | 
142 |         return outputs
143 | 
144 |     def compute_output_shape(self, input_shape):
145 |         return (None, 1, input_shape[0][-1])
146 | 
147 |     def get_config(self,):
148 | 
149 |         config = {'hidden_size': self.hidden_size, 'activation': self.activation,
150 |                   'weight_normalization': self.weight_normalization}
151 |         base_config = super(AttentionSequencePoolingLayer, self).get_config()
152 |         return dict(list(base_config.items()) + list(config.items()))
153 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import sys
  3 | import inspect
  4 | import numpy as np
  5 | from numpy.testing import assert_allclose
  6 | from tensorflow.python.keras import backend as K
  7 | from tensorflow.python.keras.layers import Input
  8 | from tensorflow.python.keras.models import Model, save_model, load_model
  9 | from deepctr.utils import custom_objects
 10 | 
 11 | 
 12 | def get_test_data(num_train=1000, num_test=500, input_shape=(10,),
 13 | 
 14 |                   output_shape=(2,),
 15 | 
 16 |                   classification=True, num_classes=2):
 17 |     """Generates test data to train a model on.
 18 | 
 19 | 
 20 | 
 21 |     classification=True overrides output_shape
 22 | 
 23 |     (i.e. output_shape is set to (1,)) and the output
 24 | 
 25 |     consists in integers in [0, num_classes-1].
 26 | 
 27 | 
 28 | 
 29 |     Otherwise: float output with shape output_shape.
 30 | 
 31 |     """
 32 | 
 33 |     samples = num_train + num_test
 34 | 
 35 |     if classification:
 36 | 
 37 |         y = np.random.randint(0, num_classes, size=(samples,))
 38 | 
 39 |         X = np.zeros((samples,) + input_shape, dtype=np.float32)
 40 | 
 41 |         for i in range(samples):
 42 | 
 43 |             X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape)
 44 | 
 45 |     else:
 46 | 
 47 |         y_loc = np.random.random((samples,))
 48 | 
 49 |         X = np.zeros((samples,) + input_shape, dtype=np.float32)
 50 | 
 51 |         y = np.zeros((samples,) + output_shape, dtype=np.float32)
 52 | 
 53 |         for i in range(samples):
 54 | 
 55 |             X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape)
 56 | 
 57 |             y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape)
 58 | 
 59 |     return (X[:num_train], y[:num_train]), (X[num_train:], y[num_train:])
 60 | 
 61 | 
 62 | def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
 63 | 
 64 |                input_data=None, expected_output=None,
 65 | 
 66 |                expected_output_dtype=None, fixed_batch_size=False):
 67 |     """Test routine for a layer with a single input tensor
 68 | 
 69 |     and single output tensor.
 70 | 
 71 |     """
 72 | 
 73 |     # generate input data
 74 | 
 75 |     if input_data is None:
 76 | 
 77 |         if not input_shape:
 78 |             raise AssertionError()
 79 | 
 80 |         if not input_dtype:
 81 | 
 82 |             input_dtype = K.floatx()
 83 | 
 84 |         input_data_shape = list(input_shape)
 85 | 
 86 |         for i, e in enumerate(input_data_shape):
 87 | 
 88 |             if e is None:
 89 | 
 90 |                 input_data_shape[i] = np.random.randint(1, 4)
 91 | 
 92 |         if all(isinstance(e, tuple) for e in input_data_shape):
 93 |             input_data = []
 94 |             for e in input_data_shape:
 95 |                 input_data.append(
 96 |                     (10 * np.random.random(e)).astype(input_dtype))
 97 | 
 98 |         else:
 99 | 
100 |             input_data = (10 * np.random.random(input_data_shape))
101 | 
102 |             input_data = input_data.astype(input_dtype)
103 | 
104 |     else:
105 | 
106 |         if input_shape is None:
107 | 
108 |             input_shape = input_data.shape
109 | 
110 |         if input_dtype is None:
111 | 
112 |             input_dtype = input_data.dtype
113 | 
114 |     if expected_output_dtype is None:
115 | 
116 |         expected_output_dtype = input_dtype
117 | 
118 |     # instantiation
119 | 
120 |     layer = layer_cls(**kwargs)
121 | 
122 |     # test get_weights , set_weights at layer level
123 | 
124 |     weights = layer.get_weights()
125 | 
126 |     layer.set_weights(weights)
127 | 
128 |     try:
129 |         expected_output_shape = layer.compute_output_shape(input_shape)
130 |     except Exception:
131 |         expected_output_shape = layer._compute_output_shape(input_shape)
132 | 
133 |     # test in functional API
134 |     if isinstance(input_shape, list):
135 |         if fixed_batch_size:
136 | 
137 |             x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape]
138 | 
139 |         else:
140 | 
141 |             x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape]
142 |     else:
143 |         if fixed_batch_size:
144 | 
145 |             x = Input(batch_shape=input_shape, dtype=input_dtype)
146 | 
147 |         else:
148 | 
149 |             x = Input(shape=input_shape[1:], dtype=input_dtype)
150 | 
151 |     y = layer(x)
152 | 
153 |     if not (K.dtype(y) == expected_output_dtype):
154 |         raise AssertionError()
155 | 
156 |     # check with the functional API
157 | 
158 |     model = Model(x, y)
159 | 
160 |     actual_output = model.predict(input_data)
161 | 
162 |     actual_output_shape = actual_output.shape
163 | 
164 |     for expected_dim, actual_dim in zip(expected_output_shape,
165 | 
166 |                                         actual_output_shape):
167 | 
168 |         if expected_dim is not None:
169 | 
170 |             if not (expected_dim == actual_dim):
171 |                 raise AssertionError()
172 | 
173 |     if expected_output is not None:
174 | 
175 |         assert_allclose(actual_output, expected_output, rtol=1e-3)
176 | 
177 |     # test serialization, weight setting at model level
178 | 
179 |     model_config = model.get_config()
180 | 
181 |     recovered_model = model.__class__.from_config(model_config)
182 | 
183 |     if model.weights:
184 | 
185 |         weights = model.get_weights()
186 | 
187 |         recovered_model.set_weights(weights)
188 | 
189 |         _output = recovered_model.predict(input_data)
190 | 
191 |         assert_allclose(_output, actual_output, rtol=1e-3)
192 | 
193 |     # test training mode (e.g. useful when the layer has a
194 | 
195 |     # different behavior at training and testing time).
196 | 
197 |     if has_arg(layer.call, 'training'):
198 | 
199 |         model.compile('rmsprop', 'mse')
200 | 
201 |         model.train_on_batch(input_data, actual_output)
202 | 
203 |     # test instantiation from layer config
204 | 
205 |     layer_config = layer.get_config()
206 | 
207 |     layer_config['batch_input_shape'] = input_shape
208 | 
209 |     layer = layer.__class__.from_config(layer_config)
210 | 
211 |     # for further checks in the caller function
212 | 
213 |     return actual_output
214 | 
215 | 
216 | def has_arg(fn, name, accept_all=False):
217 |     """Checks if a callable accepts a given keyword argument.
218 | 
219 | 
220 | 
221 |     For Python 2, checks if there is an argument with the given name.
222 | 
223 | 
224 | 
225 |     For Python 3, checks if there is an argument with the given name, and
226 | 
227 |     also whether this argument can be called with a keyword (i.e. if it is
228 | 
229 |     not a positional-only argument).
230 | 
231 | 
232 | 
233 |     # Arguments
234 | 
235 |         fn: Callable to inspect.
236 | 
237 |         name: Check if `fn` can be called with `name` as a keyword argument.
238 | 
239 |         accept_all: What to return if there is no parameter called `name`
240 | 
241 |                     but the function accepts a `**kwargs` argument.
242 | 
243 | 
244 | 
245 |     # Returns
246 | 
247 |         bool, whether `fn` accepts a `name` keyword argument.
248 | 
249 |     """
250 | 
251 |     if sys.version_info < (3,):
252 | 
253 |         arg_spec = inspect.getargspec(fn)
254 | 
255 |         if accept_all and arg_spec.keywords is not None:
256 | 
257 |             return True
258 | 
259 |         return (name in arg_spec.args)
260 | 
261 |     elif sys.version_info < (3, 3):
262 | 
263 |         arg_spec = inspect.getfullargspec(fn)
264 | 
265 |         if accept_all and arg_spec.varkw is not None:
266 | 
267 |             return True
268 | 
269 |         return (name in arg_spec.args or
270 | 
271 |                 name in arg_spec.kwonlyargs)
272 | 
273 |     else:
274 | 
275 |         signature = inspect.signature(fn)
276 | 
277 |         parameter = signature.parameters.get(name)
278 | 
279 |         if parameter is None:
280 | 
281 |             if accept_all:
282 | 
283 |                 for param in signature.parameters.values():
284 | 
285 |                     if param.kind == inspect.Parameter.VAR_KEYWORD:
286 | 
287 |                         return True
288 | 
289 |             return False
290 | 
291 |         return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
292 | 
293 |                                    inspect.Parameter.KEYWORD_ONLY))
294 | 
295 | 
296 | def check_model(model, model_name, x, y):
297 |     model.compile('adam', 'binary_crossentropy',
298 |                   metrics=['binary_crossentropy'])
299 |     model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5)
300 | 
301 |     print(model_name+" test train valid pass!")
302 |     model.save_weights(model_name + '_weights.h5')
303 |     model.load_weights(model_name + '_weights.h5')
304 |     print(model_name+" test save load weight pass!")
305 |     save_model(model, model_name + '.h5')
306 |     model = load_model(model_name + '.h5', custom_objects)
307 |     print(model_name + " test save load model pass!")
308 | 
309 |     print(model_name + " test pass!")
310 | 


--------------------------------------------------------------------------------
/docs/source/Features.rst:
--------------------------------------------------------------------------------
  1 | Features
  2 | ==========
  3 | 
  4 | Overview
  5 | -----------
  6 | 
  7 | With the great success of deep learning,DNN-based techniques have been widely used in CTR estimation task.
  8 | 
  9 | DNN based CTR estimation models consists of the following 4 modules:
 10 | ``Input,Embedding,Low-order&High-order Feature Extractor,Prediction``
 11 | 
 12 | Input&Embedding
 13 |   The  data in CTR estimation task  usually includes high sparse,high cardinality 
 14 |   categorical features  and some dense numerical features.
 15 | 
 16 |   Since DNN are good at handling dense numerical features,we usually map the sparse categorical 
 17 |   features to dense numerical through `embedding technique`.
 18 | 
 19 |   For numerical features,we usually apply `discretization` or `normalization` on them.
 20 | 
 21 | Feature Extractor
 22 |   Low-order Extractor learns feature interaction through  product between vectors.
 23 |   Factorization-Machine and it's variants are widely used to learn the low-order feature interaction.
 24 | 
 25 |   High-order Extractor learns feature combination through complex neural network functions like MLP,Cross Net,etc.
 26 | 
 27 | Models
 28 | --------
 29 | 
 30 | FNN (Factorization-supported Neural Network)
 31 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 32 | 
 33 | According to the paper,FNN learn embedding vectors of categorical data via pre-trained FM.
 34 | It use FM's latent vector to initialiaze the embedding vectors.During the training stage,it 
 35 | concatenates the embedding vectors and feeds them into a MLP(MultiLayer Perceptron). 
 36 | 
 37 | **FNN api** `link <./deepctr.models.fnn.html>`_
 38 | 
 39 | 
 40 | .. image:: ../pics/FNN.png
 41 |    :align: center
 42 |    :scale: 50 %
 43 | 
 44 | 
 45 | 
 46 | `Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57. <https://arxiv.org/pdf/1601.02376.pdf>`_ 
 47 | 
 48 | 
 49 | PNN (Product-based Neural Network)
 50 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 51 | 
 52 | PNN concatenates sparse feature embeddings and the product between  embedding vectors as the input of MLP. 
 53 | 
 54 | **PNN api** `link <./deepctr.models.pnn.html>`_
 55 | 
 56 | .. image:: ../pics/PNN.png
 57 |    :align: center
 58 |    :scale: 70 %
 59 | 
 60 | 
 61 | `Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154. <https://arxiv.org/pdf/1611.00144.pdf>`_
 62 | 
 63 | 
 64 | Wide & Deep
 65 | >>>>>>>>>>>>>
 66 | 
 67 | WDL's deep part concatenates sparse feature embeddings as the input of MLP,the wide part use handcrafted feature as input.
 68 | The logits of deep part and wide part are added to get the prediction probability.
 69 | 
 70 | **WDL api** `link <./deepctr.models.wdl.html>`_
 71 | 
 72 | .. image:: ../pics/WDL.png
 73 |    :align: center
 74 |    :scale: 50 %
 75 | 
 76 | `Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10. <https://arxiv.org/pdf/1606.07792.pdf>`_ 
 77 | 
 78 | 
 79 | DeepFM
 80 | >>>>>>>>
 81 | 
 82 | DeepFM can be seen as an improvement of WDL and FNN.Compared with WDL,DeepFM use
 83 | FM instead of LR in the wide part and use concatenation of embedding vectors as the input of MLP in the deep part.
 84 | Compared with FNN,the embedding vector of FM and input to MLP are same.
 85 | And they do not need a FM pretrained vector to initialiaze,they are learned end2end. 
 86 | 
 87 | **DeepFM api** `link <./deepctr.models.deepfm.html>`_
 88 | 
 89 | .. image:: ../pics/DeepFM.png
 90 |    :align: center
 91 |    :scale: 50 %
 92 | 
 93 | `Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017. <http://www.ijcai.org/proceedings/2017/0239.pdf>`_ 
 94 | 
 95 | MLR(Mixed Logistic Regression/Piece-wise Linear Model)
 96 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 97 | 
 98 | MLR can be viewed as a combination of 2*m LR model,m is the piece(region) number.
 99 | m LR model learns the weight that the sample belong to each region,another m LR model learn sample's click probability in the region.
100 | Finally,the sample's CTR is a weighted sum of each region's click probability.Notice the weight is normalized weight.
101 | 
102 | **MLR api** `link <./deepctr.models.mlr.html>`_
103 | 
104 | .. image:: ../pics/MLR.png
105 |    :align: center
106 |    :scale: 50 %
107 | 
108 | `Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017. <http://arxiv.org/abs/1704.05194>`_ 
109 | 
110 | 
111 | NFM (Neural Factorization Machine)
112 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
113 | 
114 | NFM use a bi-interaction pooling layer to learn feature interaction between
115 | embedding vectors and compress the result into a singe vector which has the same size as a single embedding vector.
116 | And then fed it into a MLP.The output logit of MLP and the output logit of linear part are added to get the prediction probability. 
117 | 
118 | **NFM api** `link <./deepctr.models.nfm.html>`_
119 | 
120 | .. image:: ../pics/NFM.png
121 |    :align: center
122 |    :scale: 50 %
123 | 
124 | `He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. <https://arxiv.org/pdf/1708.05027.pdf>`_ 
125 | 
126 | 
127 | AFM (Attentional Factorization Machine)
128 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
129 | 
130 | AFM is a variant of FM,tradional FM sums the inner product of embedding vector uniformly.
131 | AFM can be seen as weighted sum of feature interactions.The weight is learned by a small MLP. 
132 | 
133 | **AFM api** `link <./deepctr.models.afm.html>`_
134 | 
135 | .. image:: ../pics/AFM.png
136 |    :align: center
137 |    :scale: 70 %
138 | 
139 | `Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. <http://www.ijcai.org/proceedings/2017/435>`_
140 | 
141 | 
142 | DCN (Deep & Cross Network)
143 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>
144 | 
145 | DCN use a Cross Net to learn both low and high order feature interaction explicitly,and use a MLP to learn feature interaction implicitly.
146 | The output of Cross Net and MLP are concatenated.The concatenated vector are feed into one fully connected layer to get the prediction probability. 
147 | 
148 | **DCN api** `link <./deepctr.models.dcn.html>`_
149 | 
150 | .. image:: ../pics/DCN.png
151 |    :align: center
152 |    :scale: 70 %
153 | 
154 | `Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. <https://arxiv.org/abs/1708.05123>`_ 
155 | 
156 | 
157 | DIN (Deep Interest Network)
158 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>
159 | 
160 | DIN introduce a attention method to learn from sequence(multi-valued) feature.
161 | Tradional method usually use sum/mean pooling on sequence feature.
162 | DIN use a local activation unit to get the activation score between candidate item and history items.
163 | User's interest are represented by weighted sum of user behaviors.
164 | user's interest vector and other embedding vectors are concatenated and fed into a MLP to get the prediction. 
165 | 
166 | **DIN api** `link <./deepctr.models.din.html>`_ **DIN demo** `link <https://github.com/shenweichen/DeepCTR/tree/master/examples
167 | /run_din.py>`_
168 | 
169 | .. image:: ../pics/DIN.png
170 |    :align: center
171 |    :scale: 70 %
172 | 
173 | `Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. <https://arxiv.org/pdf/1706.06978.pdf>`_ 
174 | 
175 | xDeepFM
176 | >>>>>>>>>>
177 | 
178 | xDeepFM use a Compressed Interaction Network (CIN) to learn both low and high order feature interaction explicitly,and use a MLP to learn feature interaction implicitly.
179 | In each layer of CIN,first compute outer products between :math:`x^k` and :math:`x_0` to get a tensor :math:`Z_{k+1}`,then use a 1DConv to learn feature maps :math:`H_{k+1}` on this tensor.
180 | Finally,apply sum pooling on all the feature maps :math:`H_k` to get one vector.The vector is used to compute the logit that CIN contributes.
181 | 
182 | 
183 | **xDeepFM api** `link <./deepctr.models.xdeepfm.html>`_
184 | 
185 | .. image:: ../pics/CIN.png
186 |    :align: center
187 |    :scale: 70 %
188 | 
189 | .. image:: ../pics/xDeepFM.png
190 |    :align: center
191 |    :scale: 70 %
192 | 
193 | `Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018. <https://arxiv.org/pdf/1803.05170.pdf>`_ 
194 | 
195 | AutoInt(Automatic Feature Interaction)
196 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
197 | 
198 | AutoInt use a interacting layer to model the interactions between different features.
199 | Within each interacting layer, each feature is allowed to interact with all the other features and is able to automatically identify relevant features to form meaningful higher-order features via the multi-head attention mechanism.
200 | By stacking multiple interacting layers,AutoInt is able to model different orders of feature interactions. 
201 | 
202 | **AutoInt api** `link <./deepctr.models.autoint.html>`_
203 | 
204 | .. image:: ../pics/InteractingLayer.png
205 |    :align: center
206 |    :scale: 70 %
207 | 
208 | .. image:: ../pics/AutoInt.png
209 |    :align: center
210 |    :scale: 70 %
211 | 
212 | `Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018. <https://arxiv.org/abs/1810.11921>`_ 
213 | 
214 | Layers
215 | --------
216 | 
217 | The models of deepctr are modular,
218 | so you can use different modules to build your own models.
219 | 
220 | The module is a class that inherits from ``tf.keras.layers.Layer``,it has
221 | the same attributes and methods as keras Layers like ``tf.keras.layers.Dense()`` etc
222 | 
223 | You can see layers API in `layers <./deepctr.layers.html>`_ 
224 | 
225 | 
226 | Activations
227 | --------------
228 | 
229 | Some custom activation functions.
230 | 
231 | You can see activations  API in `activations <./deepctr.activations.html>`_ 
232 | 
233 | Sequence
234 | ----------
235 | 
236 | The sequence module aims to process variable-length sequence data.
237 | 
238 | You can see sequences  API in `sequence <./deepctr.sequence.html>`_ 
239 | 


--------------------------------------------------------------------------------
/deepRS/models/mlr.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | """
  3 | Author:
  4 |     Weichen Shen,wcshen1994@163.com
  5 | 
  6 | Reference:
  7 |     [1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194)
  8 | """
  9 | from tensorflow.python.keras.layers import Input, Dense, Embedding, Concatenate, Activation,  Reshape,  add, dot
 10 | from tensorflow.python.keras.models import Model
 11 | from tensorflow.python.keras.initializers import TruncatedNormal
 12 | from tensorflow.python.keras.regularizers import l2
 13 | 
 14 | 
 15 | def MLR(region_feature_dim_dict, base_feature_dim_dict={"sparse": {}, "dense": []}, region_num=4,
 16 |         l2_reg_linear=1e-5,
 17 |         init_std=0.0001, seed=1024, final_activation='sigmoid',
 18 |         bias_feature_dim_dict={"sparse": {}, "dense": []}):
 19 |     """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model.
 20 | 
 21 |     :param region_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
 22 |     :param base_feature_dim_dict: dict or None,to indicate sparse field and dense field of base learner.if None, it is same as region_feature_dim_dict
 23 |     :param region_num: integer > 1,indicate the piece number
 24 |     :param l2_reg_linear: float. L2 regularizer strength applied to weight
 25 |     :param init_std: float,to use as the initialize std of embedding vector
 26 |     :param seed: integer ,to use as random seed.
 27 |     :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'``
 28 |     :param bias_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}
 29 |     :return: A Keras model instance.
 30 |     """
 31 | 
 32 |     if region_num <= 1:
 33 |         raise ValueError("region_num must > 1")
 34 |     if not isinstance(region_feature_dim_dict,
 35 |                       dict) or "sparse" not in region_feature_dim_dict or "dense" not in region_feature_dim_dict:
 36 |         raise ValueError(
 37 |             "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")
 38 | 
 39 |     same_flag = False
 40 |     if base_feature_dim_dict == {"sparse": {}, "dense": []}:
 41 |         base_feature_dim_dict = region_feature_dim_dict
 42 |         same_flag = True
 43 | 
 44 |     region_sparse_input, region_dense_input, base_sparse_input, base_dense_input, bias_sparse_input, bias_dense_input = get_input(
 45 |         region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, same_flag)
 46 |     region_embeddings, base_embeddings, bias_embedding = get_embedding(
 47 |         region_num, region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, init_std, seed, l2_reg_linear)
 48 | 
 49 |     if same_flag:
 50 | 
 51 |         base_dense_input_ = region_dense_input
 52 | 
 53 |         base_sparse_input_ = region_sparse_input
 54 | 
 55 |     else:
 56 | 
 57 |         base_dense_input_ = base_dense_input
 58 | 
 59 |         base_sparse_input_ = base_sparse_input
 60 | 
 61 |     region_dense_feature_num = len(region_feature_dim_dict['dense'])
 62 |     region_sparse_feature_num = len(region_feature_dim_dict['sparse'])
 63 |     base_dense_feature_num = len(base_feature_dim_dict['dense'])
 64 |     base_sparse_feature_num = len(base_feature_dim_dict['sparse'])
 65 |     bias_dense_feature_num = len(bias_feature_dim_dict['dense'])
 66 |     bias_sparse_feature_num = len(bias_feature_dim_dict['sparse'])
 67 | 
 68 |     if region_dense_feature_num > 1:
 69 |         region_dense_logits_ = [Dense(1, )(Concatenate()(region_dense_input)) for _ in
 70 |                                 range(region_num)]
 71 |     elif region_dense_feature_num == 1:
 72 |         region_dense_logits_ = [Dense(1, )(region_dense_input[0]) for _ in
 73 |                                 range(region_num)]
 74 | 
 75 |     if base_dense_feature_num > 1:
 76 |         base_dense_logits = [Dense(1, )(Concatenate()(base_dense_input_))for _ in
 77 |                              range(region_num)]
 78 |     elif base_dense_feature_num == 1:
 79 |         base_dense_logits = [Dense(1, )(base_dense_input_[0])for _ in
 80 |                              range(region_num)]
 81 | 
 82 |     if region_dense_feature_num > 0 and region_sparse_feature_num == 0:
 83 |         region_logits = Concatenate()(region_dense_logits_)
 84 |     elif region_dense_feature_num == 0 and region_sparse_feature_num > 0:
 85 |         region_sparse_logits = [
 86 |             add([region_embeddings[j][i](region_sparse_input[i])
 87 |                  for i in range(region_sparse_feature_num)])
 88 |             if region_sparse_feature_num > 1 else region_embeddings[j][0](region_sparse_input[0])
 89 |             for j in range(region_num)]
 90 |         region_logits = Concatenate()(region_sparse_logits)
 91 | 
 92 |     else:
 93 |         region_sparse_logits = [
 94 |             add([region_embeddings[j][i](region_sparse_input[i])
 95 |                  for i in range(region_sparse_feature_num)])
 96 |             for j in range(region_num)]
 97 |         region_logits = Concatenate()(
 98 |             [add([region_sparse_logits[i], region_dense_logits_[i]]) for i in range(region_num)])
 99 | 
100 |     if base_dense_feature_num > 0 and base_sparse_feature_num == 0:
101 |         base_logits = base_dense_logits
102 |     elif base_dense_feature_num == 0 and base_sparse_feature_num > 0:
103 |         base_sparse_logits = [add(
104 |             [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0])
105 |             for j in range(region_num)]
106 |         base_logits = base_sparse_logits
107 |     else:
108 |         base_sparse_logits = [add(
109 |             [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0])
110 |             for j in range(region_num)]
111 |         base_logits = [add([base_sparse_logits[i], base_dense_logits[i]])
112 |                        for i in range(region_num)]
113 | 
114 |     # Dense(self.region_num, activation='softmax')(final_logit)
115 |     region_weights = Activation("softmax")(region_logits)
116 |     learner_score = Concatenate()(
117 |         [Activation(final_activation, name='learner' + str(i))(base_logits[i]) for i in range(region_num)])
118 |     final_logit = dot([region_weights, learner_score], axes=-1)
119 | 
120 |     if bias_dense_feature_num + bias_sparse_feature_num > 0:
121 | 
122 |         if bias_dense_feature_num > 1:
123 |             bias_dense_logits = Dense(1,)(Concatenate()(bias_dense_input))
124 |         elif bias_dense_feature_num == 1:
125 |             bias_dense_logits = Dense(1,)(bias_dense_input[0])
126 |         else:
127 |             pass
128 | 
129 |         if bias_sparse_feature_num > 1:
130 |             bias_cate_logits = add([bias_embedding[i](bias_sparse_input[i])
131 |                                     for i, feat in enumerate(bias_feature_dim_dict['sparse'])])
132 |         elif bias_sparse_feature_num == 1:
133 |             bias_cate_logits = bias_embedding[0](bias_sparse_input[0])
134 |         else:
135 |             pass
136 | 
137 |         if bias_dense_feature_num > 0 and bias_sparse_feature_num > 0:
138 |             bias_logits = add([bias_dense_logits, bias_cate_logits])
139 |         elif bias_dense_feature_num > 0:
140 |             bias_logits = bias_dense_logits
141 |         else:
142 |             bias_logits = bias_cate_logits
143 | 
144 |         bias_prob = Activation('sigmoid')(bias_logits)
145 |         final_logit = dot([final_logit, bias_prob], axes=-1)
146 | 
147 |     output = Reshape([1])(final_logit)
148 |     model = Model(inputs=region_sparse_input + region_dense_input+base_sparse_input +
149 |                   base_dense_input+bias_sparse_input+bias_dense_input, outputs=output)
150 |     return model
151 | 
152 | 
153 | def get_input(region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, same_flag):
154 |     region_sparse_input = [Input(shape=(1,), name='region_sparse_' + str(i)+"-"+feat)
155 |                            for i, feat in enumerate(region_feature_dim_dict["sparse"])]
156 |     region_dense_input = [Input(shape=(1,), name='region_dense_' + str(i)+"-"+feat)
157 |                           for i, feat in enumerate(region_feature_dim_dict["dense"])]
158 |     if same_flag == True:
159 |         base_sparse_input = []
160 |         base_dense_input = []
161 |     else:
162 |         base_sparse_input = [Input(shape=(1,), name='base_sparse_' + str(i) + "-" + feat) for i, feat in
163 |                              enumerate(base_feature_dim_dict["sparse"])]
164 |         base_dense_input = [Input(shape=(1,), name='base_dense_' + str(i) + "-" + feat) for i, feat in
165 |                             enumerate(base_feature_dim_dict['dense'])]
166 | 
167 |     bias_sparse_input = [Input(shape=(1,), name='bias_cate_' + str(i) + "-" + feat) for i, feat in
168 |                          enumerate(bias_feature_dim_dict['sparse'])]
169 |     bias_dense_input = [Input(shape=(1,), name='bias_continuous_' + str(i) + "-" + feat) for i, feat in
170 |                         enumerate(bias_feature_dim_dict['dense'])]
171 |     return region_sparse_input, region_dense_input, base_sparse_input, base_dense_input, bias_sparse_input, bias_dense_input
172 | 
173 | 
174 | def get_embedding(region_num, region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, init_std, seed, l2_reg_linear):
175 | 
176 |     region_embeddings = [[Embedding(region_feature_dim_dict["sparse"][feat], 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed+j), embeddings_regularizer=l2(l2_reg_linear),
177 |                                     name='region_emb_' + str(j)+'_' + str(i)) for
178 |                           i, feat in enumerate(region_feature_dim_dict['sparse'])] for j in range(region_num)]
179 |     base_embeddings = [[Embedding(base_feature_dim_dict['sparse'][feat], 1,
180 |                                   embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed + j), embeddings_regularizer=l2(l2_reg_linear),
181 |                                   name='base_emb_' + str(j) + '_' + str(i)) for
182 |                         i, feat in enumerate(base_feature_dim_dict['sparse'])] for j in range(region_num)]
183 |     bias_embedding = [Embedding(bias_feature_dim_dict['sparse'][feat], 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_linear),
184 |                                 name='embed_bias' + '_' + str(i)) for
185 |                       i, feat in enumerate(bias_feature_dim_dict['sparse'])]
186 | 
187 |     return region_embeddings, base_embeddings, bias_embedding
188 | 


--------------------------------------------------------------------------------
/examples/movielens_sample.txt:
--------------------------------------------------------------------------------
  1 | user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip
  2 | 3299,235,4,968035345,Ed Wood (1994),Comedy|Drama,F,25,4,19119
  3 | 3630,3256,3,966536874,Patriot Games (1992),Action|Thriller,M,18,4,77005
  4 | 517,105,4,976203603,"Bridges of Madison County, The (1995)",Drama|Romance,F,25,14,55408
  5 | 785,2115,3,975430389,Indiana Jones and the Temple of Doom (1984),Action|Adventure,M,18,19,29307
  6 | 5848,909,5,957782527,"Apartment, The (1960)",Comedy|Drama,M,50,20,20009
  7 | 2996,2799,1,972769867,Problem Child 2 (1991),Comedy,M,18,0,63011
  8 | 3087,837,5,969738869,Matilda (1996),Children's|Comedy,F,1,1,90802
  9 | 872,3092,5,975273310,Chushingura (1962),Drama,M,50,1,20815
 10 | 4094,529,5,966223349,Searching for Bobby Fischer (1993),Drama,M,25,17,49017
 11 | 1868,3508,3,974694703,"Outlaw Josey Wales, The (1976)",Western,M,50,11,92346
 12 | 2913,1387,5,971769808,Jaws (1975),Action|Horror,F,35,20,98119
 13 | 380,3481,5,976316283,High Fidelity (2000),Comedy,M,25,2,92024
 14 | 2073,1784,5,974759084,As Good As It Gets (1997),Comedy|Drama,F,18,4,13148
 15 | 80,2059,3,977788576,"Parent Trap, The (1998)",Children's|Drama,M,56,1,49327
 16 | 3679,2557,1,976298130,I Stand Alone (Seul contre tous) (1998),Drama,M,25,4,68108
 17 | 2077,788,3,980013556,"Nutty Professor, The (1996)",Comedy|Fantasy|Romance|Sci-Fi,M,18,0,55112
 18 | 6036,2085,4,956716684,101 Dalmatians (1961),Animation|Children's,F,25,15,32603
 19 | 3675,532,3,966363610,Serial Mom (1994),Comedy|Crime|Horror,M,35,7,06680
 20 | 4566,3683,4,964489599,Blood Simple (1984),Drama|Film-Noir,M,35,17,19473
 21 | 2996,3763,3,972413564,F/X (1986),Action|Crime|Thriller,M,18,0,63011
 22 | 5831,2458,1,957898337,Armed and Dangerous (1986),Comedy|Crime,M,25,1,92120
 23 | 1869,1244,2,974695654,Manhattan (1979),Comedy|Drama|Romance,M,45,14,95148
 24 | 5389,2657,3,960328279,"Rocky Horror Picture Show, The (1975)",Comedy|Horror|Musical|Sci-Fi,M,45,7,01905
 25 | 1391,1535,3,974851275,Love! Valour! Compassion! (1997),Drama|Romance,M,35,15,20723
 26 | 3123,2407,3,969324381,Cocoon (1985),Comedy|Sci-Fi,M,25,2,90401
 27 | 4694,159,3,963602574,Clockers (1995),Drama,M,56,7,40505
 28 | 1680,1988,3,974709821,Hello Mary Lou: Prom Night II (1987),Horror,M,25,20,95380
 29 | 2002,1945,4,974677761,On the Waterfront (1954),Crime|Drama,F,56,13,02136-1522
 30 | 3430,2690,4,979949863,"Ideal Husband, An (1999)",Comedy,F,45,1,15208
 31 | 425,471,4,976284972,"Hudsucker Proxy, The (1994)",Comedy|Romance,M,25,12,55303
 32 | 1841,2289,2,974699637,"Player, The (1992)",Comedy|Drama,M,18,0,95037
 33 | 4964,2348,4,962619587,Sid and Nancy (1986),Drama,M,35,0,94110
 34 | 4520,2160,4,964883648,Rosemary's Baby (1968),Horror|Thriller,M,25,4,45810
 35 | 1265,2396,4,1011716691,Shakespeare in Love (1998),Comedy|Romance,F,18,20,49321
 36 | 2496,1278,5,974435324,Young Frankenstein (1974),Comedy|Horror,M,50,1,37932
 37 | 5511,2174,4,959787754,Beetlejuice (1988),Comedy|Fantasy,M,45,1,92407
 38 | 621,833,1,975799925,High School High (1996),Comedy,M,18,4,93560
 39 | 3045,2762,5,970189524,"Sixth Sense, The (1999)",Thriller,M,45,1,90631
 40 | 2050,2546,4,975522689,"Deep End of the Ocean, The (1999)",Drama,F,35,3,99504
 41 | 613,32,4,975812238,Twelve Monkeys (1995),Drama|Sci-Fi,M,35,20,10562
 42 | 366,1077,5,978471241,Sleeper (1973),Comedy|Sci-Fi,M,50,15,55126
 43 | 5108,367,4,962338215,"Mask, The (1994)",Comedy|Crime|Fantasy,F,25,9,93940
 44 | 4502,1960,4,965094644,"Last Emperor, The (1987)",Drama|War,M,50,0,01379
 45 | 5512,1801,5,959713840,"Man in the Iron Mask, The (1998)",Action|Drama|Romance,F,25,17,01701
 46 | 1861,2642,2,974699627,Superman III (1983),Action|Adventure|Sci-Fi,M,50,16,92129
 47 | 1667,1240,4,975016698,"Terminator, The (1984)",Action|Sci-Fi|Thriller,M,50,16,98516
 48 | 753,434,3,975460449,Cliffhanger (1993),Action|Adventure|Crime,M,1,10,42754
 49 | 1836,2736,5,974826228,Brighton Beach Memoirs (1986),Comedy,M,25,0,10016
 50 | 5626,474,5,959052158,In the Line of Fire (1993),Action|Thriller,M,56,16,32043
 51 | 1601,1396,4,978576948,Sneakers (1992),Crime|Drama|Sci-Fi,M,25,12,83001
 52 | 4725,1100,4,963369546,Days of Thunder (1990),Action|Romance,M,35,5,96707-1321
 53 | 2837,2396,5,972571456,Shakespeare in Love (1998),Comedy|Romance,M,18,0,49506
 54 | 1776,3882,4,1001558470,Bring It On (2000),Comedy,M,25,0,45801
 55 | 2820,457,2,972662398,"Fugitive, The (1993)",Action|Thriller,F,35,0,02138
 56 | 1834,2288,3,1038179198,"Thing, The (1982)",Action|Horror|Sci-Fi|Thriller,M,35,5,10990
 57 | 284,2716,4,976570902,Ghostbusters (1984),Comedy|Horror,M,25,12,91910
 58 | 2744,588,1,973215985,Aladdin (1992),Animation|Children's|Comedy|Musical,M,18,17,53818
 59 | 881,4,2,975264028,Waiting to Exhale (1995),Comedy|Drama,M,18,14,76401
 60 | 2211,916,3,974607067,Roman Holiday (1953),Comedy|Romance,M,45,6,01950
 61 | 2271,2671,4,1007158806,Notting Hill (1999),Comedy|Romance,M,50,14,13210
 62 | 1010,2953,1,975222613,Home Alone 2: Lost in New York (1992),Children's|Comedy,M,25,0,10310
 63 | 1589,2594,4,974735454,Open Your Eyes (Abre los ojos) (1997),Drama|Romance|Sci-Fi,M,25,0,95136
 64 | 1724,597,5,976441106,Pretty Woman (1990),Comedy|Romance,M,18,4,00961
 65 | 2590,2097,3,973840056,Something Wicked This Way Comes (1983),Children's|Horror,M,18,4,94044
 66 | 1717,1352,3,1009256707,Albino Alligator (1996),Crime|Thriller,F,50,6,30307
 67 | 1391,3160,2,974850796,Magnolia (1999),Drama,M,35,15,20723
 68 | 1941,1263,3,974954220,"Deer Hunter, The (1978)",Drama|War,M,35,17,94550
 69 | 3526,2867,4,966906064,Fright Night (1985),Comedy|Horror,M,35,2,62263-3004
 70 | 5767,198,3,958192148,Strange Days (1995),Action|Crime|Sci-Fi,M,25,2,75287
 71 | 5355,590,4,960596927,Dances with Wolves (1990),Adventure|Drama|Western,M,56,0,78232
 72 | 5788,156,4,958108785,Blue in the Face (1995),Comedy,M,25,0,92646
 73 | 1078,1307,4,974938851,When Harry Met Sally... (1989),Comedy|Romance,F,45,9,95661
 74 | 3808,61,2,965973222,Eye for an Eye (1996),Drama|Thriller,M,25,7,60010
 75 | 974,3897,4,975106398,Almost Famous (2000),Comedy|Drama,M,35,19,94930
 76 | 5153,1290,4,961972292,Some Kind of Wonderful (1987),Drama|Romance,M,25,7,60046
 77 | 5732,2115,3,958434069,Indiana Jones and the Temple of Doom (1984),Action|Adventure,F,25,11,02111
 78 | 4627,2478,3,964110136,Three Amigos! (1986),Comedy|Western,M,56,1,45224
 79 | 1884,1831,2,975648062,Lost in Space (1998),Action|Sci-Fi|Thriller,M,45,20,93108
 80 | 4284,517,4,965277546,Rising Sun (1993),Action|Drama|Mystery,M,50,7,40601
 81 | 1383,468,2,975979732,"Englishman Who Went Up a Hill, But Came Down a Mountain, The (1995)",Comedy|Romance,F,25,7,19806
 82 | 2230,2873,3,974599097,Lulu on the Bridge (1998),Drama|Mystery|Romance,F,45,1,60302
 83 | 2533,2266,4,974055724,"Butcher's Wife, The (1991)",Comedy|Romance,F,25,3,49423
 84 | 6040,3224,5,956716750,Woman in the Dunes (Suna no onna) (1964),Drama,M,25,6,11106
 85 | 4384,2918,5,965171739,Ferris Bueller's Day Off (1986),Comedy,M,25,0,43623
 86 | 5156,3688,3,961946487,Porky's (1981),Comedy,M,18,14,10024
 87 | 615,296,3,975805801,Pulp Fiction (1994),Crime|Drama,M,50,17,32951
 88 | 2753,3045,3,973198964,Peter's Friends (1992),Comedy|Drama,F,50,20,27516
 89 | 2438,1125,5,974259943,"Return of the Pink Panther, The (1974)",Comedy,M,35,1,22903
 90 | 5746,1242,4,958354460,Glory (1989),Action|Drama|War,M,18,15,94061
 91 | 5157,3462,5,961944604,Modern Times (1936),Comedy,M,35,1,74012
 92 | 3402,1252,5,967433929,Chinatown (1974),Film-Noir|Mystery|Thriller,M,35,20,30306
 93 | 76,593,5,977847255,"Silence of the Lambs, The (1991)",Drama|Thriller,M,35,7,55413
 94 | 2067,1019,3,974658834,"20,000 Leagues Under the Sea (1954)",Adventure|Children's|Fantasy|Sci-Fi,M,50,16,06430
 95 | 2181,2020,3,979353437,Dangerous Liaisons (1988),Drama|Romance,M,25,0,45245
 96 | 3947,593,5,965691680,"Silence of the Lambs, The (1991)",Drama|Thriller,M,25,0,90019
 97 | 546,218,4,976069421,Boys on the Side (1995),Comedy|Drama,F,25,0,37211
 98 | 1246,3030,5,1032056405,Yojimbo (1961),Comedy|Drama|Western,M,18,4,98225
 99 | 4214,3186,5,965319143,"Girl, Interrupted (1999)",Drama,F,25,0,20121
100 | 2841,680,3,982805796,Alphaville (1965),Sci-Fi,M,50,12,98056
101 | 4205,3175,4,965321085,Galaxy Quest (1999),Adventure|Comedy|Sci-Fi,F,25,15,87801
102 | 1120,1097,4,974911354,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi,M,18,4,95616
103 | 5371,3194,3,960481000,"Way We Were, The (1973)",Drama,M,25,11,55408
104 | 2695,1278,5,973310827,Young Frankenstein (1974),Comedy|Horror,M,35,11,46033
105 | 3312,520,2,976673070,Robin Hood: Men in Tights (1993),Comedy,F,18,4,90039
106 | 5039,1792,1,962513044,U.S. Marshalls (1998),Action|Thriller,F,35,4,97068
107 | 4655,2146,3,963903103,St. Elmo's Fire (1985),Drama|Romance,F,25,1,92037
108 | 3558,1580,5,966802528,Men in Black (1997),Action|Adventure|Comedy|Sci-Fi,M,18,17,66044
109 | 506,3354,1,976208080,Mission to Mars (2000),Sci-Fi,M,25,16,55103-1006
110 | 3568,1230,3,966745594,Annie Hall (1977),Comedy|Romance,M,25,0,98503
111 | 2943,1197,5,971319983,"Princess Bride, The (1987)",Action|Adventure|Comedy|Romance,M,35,12,95864
112 | 716,737,3,982881364,Barb Wire (1996),Action|Sci-Fi,M,18,4,98188
113 | 5964,454,3,956999469,"Firm, The (1993)",Drama|Thriller,M,18,5,97202
114 | 4802,1208,4,996034747,Apocalypse Now (1979),Drama|War,M,56,1,40601
115 | 1106,3624,4,974920622,Shanghai Noon (2000),Action,M,18,4,90241
116 | 3410,2565,3,967419652,"King and I, The (1956)",Musical,M,35,1,20653
117 | 1273,3095,5,974814536,"Grapes of Wrath, The (1940)",Drama,M,35,2,19123
118 | 1706,1916,4,974709448,Buffalo 66 (1998),Action|Comedy|Drama,M,25,20,19134
119 | 4889,590,5,962909224,Dances with Wolves (1990),Adventure|Drama|Western,M,18,4,63108
120 | 4966,2100,3,962609782,Splash (1984),Comedy|Fantasy|Romance,M,50,14,55407
121 | 4238,1884,4,965343416,Fear and Loathing in Las Vegas (1998),Comedy|Drama,M,35,16,44691
122 | 5365,1042,3,960502974,That Thing You Do! (1996),Comedy,M,18,12,90250
123 | 415,1302,3,977501743,Field of Dreams (1989),Drama,F,35,0,55406
124 | 4658,1009,5,963966553,Escape to Witch Mountain (1975),Adventure|Children's|Fantasy,M,25,4,99163
125 | 854,345,3,975357801,"Adventures of Priscilla, Queen of the Desert, The (1994)",Comedy|Drama,F,25,16,44092
126 | 2857,436,4,972509362,Color of Night (1994),Drama|Thriller,M,25,0,10469
127 | 1835,1330,4,974878241,April Fool's Day (1986),Comedy|Horror,M,25,19,11501
128 | 1321,2240,3,974778494,My Bodyguard (1980),Drama,F,25,14,34639
129 | 3274,3698,2,979767184,"Running Man, The (1987)",Action|Adventure|Sci-Fi,M,25,20,02062
130 | 5893,2144,3,957470619,Sixteen Candles (1984),Comedy,M,25,7,02139
131 | 3436,2724,3,967328026,Runaway Bride (1999),Comedy|Romance,M,35,0,98503
132 | 3315,2918,5,967942960,Ferris Bueller's Day Off (1986),Comedy,M,25,12,78731
133 | 5056,2700,5,962488280,"South Park: Bigger, Longer and Uncut (1999)",Animation|Comedy,M,45,1,16673
134 | 5256,208,2,961271616,Waterworld (1995),Action|Adventure,M,25,16,30269
135 | 4290,1193,4,965274348,One Flew Over the Cuckoo's Nest (1975),Drama,M,25,17,98661
136 | 1010,1379,2,975220259,Young Guns II (1990),Action|Comedy|Western,M,25,0,10310
137 | 829,904,4,975368038,Rear Window (1954),Mystery|Thriller,M,1,19,53711
138 | 5953,480,4,957143581,Jurassic Park (1993),Action|Adventure|Sci-Fi,M,1,10,21030
139 | 4732,3016,4,963332896,Creepshow (1982),Horror,M,25,14,24450
140 | 4815,3181,5,972240802,Titus (1999),Drama,F,50,18,04849
141 | 1164,1894,2,1004486985,Six Days Seven Nights (1998),Adventure|Comedy|Romance,F,25,19,90020
142 | 4373,3167,5,965180829,Carnal Knowledge (1971),Drama,M,50,12,32920
143 | 5293,1374,4,961055887,Star Trek: The Wrath of Khan (1982),Action|Adventure|Sci-Fi,M,25,12,95030
144 | 1579,3101,4,981272057,Fatal Attraction (1987),Thriller,M,25,0,60201
145 | 2600,3147,5,973804787,"Green Mile, The (1999)",Drama|Thriller,M,25,14,19312
146 | 1283,480,4,974793389,Jurassic Park (1993),Action|Adventure|Sci-Fi,F,18,1,94607
147 | 3242,3062,5,968341175,"Longest Day, The (1962)",Action|Drama|War,M,50,13,94089
148 | 3618,3374,3,967116272,Daughters of the Dust (1992),Drama,M,56,17,22657
149 | 3762,1337,4,966434517,"Body Snatcher, The (1945)",Horror,M,50,6,11746
150 | 1015,1184,3,975018699,Mediterraneo (1991),Comedy|War,M,35,3,11220
151 | 4645,2344,5,963976808,Runaway Train (1985),Action|Adventure|Drama|Thriller,F,50,6,48094
152 | 3184,1397,4,968709039,Bastard Out of Carolina (1996),Drama,F,25,18,21214
153 | 1285,1794,4,974833328,Love and Death on Long Island (1997),Comedy|Drama,M,35,4,98125
154 | 5521,3354,2,959833154,Mission to Mars (2000),Sci-Fi,F,25,6,02118
155 | 1472,2278,3,974767792,Ronin (1998),Action|Crime|Thriller,M,25,7,90248
156 | 5630,21,4,980085414,Get Shorty (1995),Action|Comedy|Drama,M,35,17,06854
157 | 3710,3033,5,966272980,Spaceballs (1987),Comedy|Sci-Fi,M,1,10,02818
158 | 192,761,1,977028390,"Phantom, The (1996)",Adventure,M,18,1,10977
159 | 1285,1198,5,974880310,Raiders of the Lost Ark (1981),Action|Adventure,M,35,4,98125
160 | 2174,1046,4,974613044,Beautiful Thing (1996),Drama|Romance,M,50,12,87505
161 | 635,1270,4,975768106,Back to the Future (1985),Comedy|Sci-Fi,M,56,17,33785
162 | 910,412,5,975207742,"Age of Innocence, The (1993)",Drama,F,50,0,98226
163 | 1752,2021,4,975729332,Dune (1984),Fantasy|Sci-Fi,M,25,3,96813
164 | 1408,198,4,974762924,Strange Days (1995),Action|Crime|Sci-Fi,M,25,0,90046
165 | 4738,1242,4,963279051,Glory (1989),Action|Drama|War,M,56,1,23608
166 | 1503,1971,2,974748897,"Nightmare on Elm Street 4: The Dream Master, A (1988)",Horror,M,25,12,92688
167 | 3053,1296,3,970601837,"Room with a View, A (1986)",Drama|Romance,F,25,3,55102
168 | 3471,3614,2,973297828,Honeymoon in Vegas (1992),Comedy|Romance,M,18,4,80302
169 | 678,1972,3,988638700,"Nightmare on Elm Street 5: The Dream Child, A (1989)",Horror,M,25,0,34952
170 | 3483,2561,3,986327282,True Crime (1999),Crime|Thriller,F,45,7,30260
171 | 3910,3108,5,965756244,"Fisher King, The (1991)",Comedy|Drama|Romance,M,25,20,91505
172 | 182,1089,1,977085647,Reservoir Dogs (1992),Crime|Thriller,M,18,4,03052
173 | 1755,1653,3,1036917836,Gattaca (1997),Drama|Sci-Fi|Thriller,F,18,4,77005
174 | 3589,70,2,966658567,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller,F,45,0,80010
175 | 471,3481,4,976222483,High Fidelity (2000),Comedy,M,35,7,08904
176 | 1141,813,2,974878678,Larger Than Life (1996),Comedy,F,25,3,84770
177 | 5227,1196,2,961476022,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Drama|Sci-Fi|War,M,18,10,64050
178 | 1303,2344,2,974837844,Runaway Train (1985),Action|Adventure|Drama|Thriller,M,25,19,94111
179 | 5080,3102,5,962412804,Jagged Edge (1985),Thriller,F,50,12,95472
180 | 2023,1012,4,1006290836,Old Yeller (1957),Children's|Drama,M,18,4,56001
181 | 3759,2151,5,966094413,"Gods Must Be Crazy II, The (1989)",Comedy,M,35,6,54751
182 | 1685,2664,2,974709721,Invasion of the Body Snatchers (1956),Horror|Sci-Fi,M,35,12,95833
183 | 4715,1221,4,963508830,"Godfather: Part II, The (1974)",Action|Crime|Drama,M,25,2,97205
184 | 1591,350,5,974742941,"Client, The (1994)",Drama|Mystery|Thriller,M,50,7,26501
185 | 4227,3635,3,965411938,"Spy Who Loved Me, The (1977)",Action,M,25,19,11414-2520
186 | 1908,36,5,974697744,Dead Man Walking (1995),Drama,M,56,13,95129
187 | 5365,1892,4,960503255,"Perfect Murder, A (1998)",Mystery|Thriller,M,18,12,90250
188 | 1579,2420,4,981272235,"Karate Kid, The (1984)",Drama,M,25,0,60201
189 | 1866,3948,5,974753321,Meet the Parents (2000),Comedy,M,25,7,94043
190 | 4238,3543,4,965415533,Diner (1982),Comedy|Drama,M,35,16,44691
191 | 3590,2000,5,966657892,Lethal Weapon (1987),Action|Comedy|Crime|Drama,F,18,15,02115
192 | 3401,3256,5,980115327,Patriot Games (1992),Action|Thriller,M,35,7,76109
193 | 3705,540,2,966287116,Sliver (1993),Thriller,M,45,7,30076
194 | 4973,1246,3,962607149,Dead Poets Society (1989),Drama,F,56,2,949702
195 | 4947,380,4,962651180,True Lies (1994),Action|Adventure|Comedy|Romance,M,35,17,90035
196 | 2346,1416,4,974413811,Evita (1996),Drama|Musical,F,1,10,48105
197 | 1427,3596,3,974840560,Screwed (2000),Comedy,M,25,12,21401
198 | 3868,1626,3,965855033,Fire Down Below (1997),Action|Drama|Thriller,M,18,12,73112
199 | 249,2369,3,976730191,Desperately Seeking Susan (1985),Comedy|Romance,F,18,14,48126
200 | 5720,349,4,958503395,Clear and Present Danger (1994),Action|Adventure|Thriller,M,25,0,60610
201 | 877,1485,3,975270899,Liar Liar (1997),Comedy,M,25,0,90631
202 | 


--------------------------------------------------------------------------------