├── tests ├── __init__.py ├── models │ ├── __init__.py │ ├── FNN_test.py │ ├── NFM_test.py │ ├── AFM_test.py │ ├── DeepFM_test.py │ ├── AutoInt_test.py │ ├── PNN_test.py │ ├── DCN_test.py │ ├── WDL_test.py │ ├── xDeepFM_test.py │ ├── MLR_test.py │ └── DIN_test.py ├── README.md ├── utils_test.py ├── activations_test.py ├── sequence_test.py ├── layers_test.py └── utils.py ├── README.md ├── docs ├── requirements.readthedocs.txt ├── pics │ ├── AFM.png │ ├── CIN.png │ ├── DCN.png │ ├── DIN.png │ ├── FNN.png │ ├── MLR.png │ ├── NFM.png │ ├── PNN.png │ ├── WDL.png │ ├── fms.png │ ├── mlr1.png │ ├── AutoInt.png │ ├── DeepFM.png │ ├── xDeepFM.png │ ├── mlrvsdnn.png │ ├── criteo_sample.png │ ├── InteractingLayer.png │ └── movielens_sample.png ├── source │ ├── modules.rst │ ├── deepctr.utils.rst │ ├── deepctr.layers.rst │ ├── deepctr.sequence.rst │ ├── deepctr.models.afm.rst │ ├── deepctr.models.dcn.rst │ ├── deepctr.models.din.rst │ ├── deepctr.models.fnn.rst │ ├── deepctr.models.mlr.rst │ ├── deepctr.models.nfm.rst │ ├── deepctr.models.pnn.rst │ ├── deepctr.models.wdl.rst │ ├── deepctr.activations.rst │ ├── deepctr.models.deepfm.rst │ ├── deepctr.models.autoint.rst │ ├── deepctr.models.xdeepfm.rst │ ├── Models-API.rst │ ├── deepctr.rst │ ├── deepctr.models.rst │ ├── History.md │ ├── index.rst │ ├── FAQ.rst │ ├── Quick-Start.rst │ ├── Demo.rst │ ├── conf.py │ └── Features.rst ├── Makefile └── make.bat ├── deepRS ├── __init__.py ├── models │ ├── __init__.py │ ├── fnn.py │ ├── wdl.py │ ├── nfm.py │ ├── afm.py │ ├── pnn.py │ ├── deepfm.py │ ├── xdeepfm.py │ ├── autoint.py │ ├── dcn.py │ ├── din.py │ └── mlr.py ├── activations.py ├── utils.py └── sequence.py ├── .gitattributes ├── setup.cfg ├── LICENSE ├── examples ├── run_regression_movielens.py ├── run_classification_criteo.py ├── run_din.py └── movielens_sample.txt ├── .travis.yml ├── setup.py └── .gitignore /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepRS 2 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.readthedocs.txt: -------------------------------------------------------------------------------- 1 | tensorflow==1.4.0 -------------------------------------------------------------------------------- /docs/pics/AFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/AFM.png -------------------------------------------------------------------------------- /docs/pics/CIN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/CIN.png -------------------------------------------------------------------------------- /docs/pics/DCN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/DCN.png -------------------------------------------------------------------------------- /docs/pics/DIN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/DIN.png -------------------------------------------------------------------------------- /docs/pics/FNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/FNN.png -------------------------------------------------------------------------------- /docs/pics/MLR.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/MLR.png -------------------------------------------------------------------------------- /docs/pics/NFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/NFM.png -------------------------------------------------------------------------------- /docs/pics/PNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/PNN.png -------------------------------------------------------------------------------- /docs/pics/WDL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/WDL.png -------------------------------------------------------------------------------- /docs/pics/fms.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/fms.png -------------------------------------------------------------------------------- /docs/pics/mlr1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/mlr1.png -------------------------------------------------------------------------------- /docs/pics/AutoInt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/AutoInt.png -------------------------------------------------------------------------------- /docs/pics/DeepFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/DeepFM.png -------------------------------------------------------------------------------- /docs/pics/xDeepFM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/xDeepFM.png -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Test Log 2 | 3 | When use Dice ,`save model` of `DIN` will cause errror. -------------------------------------------------------------------------------- /docs/pics/mlrvsdnn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/mlrvsdnn.png -------------------------------------------------------------------------------- /docs/pics/criteo_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/criteo_sample.png -------------------------------------------------------------------------------- /docs/pics/InteractingLayer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/InteractingLayer.png -------------------------------------------------------------------------------- /docs/pics/movielens_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wubinzzu/DeepRS/HEAD/docs/pics/movielens_sample.png -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | deepctr 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | deepctr 8 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | from deepctr.utils import check_version 2 | 3 | 4 | def test_check_version(): 5 | check_version('0.1.0') 6 | check_version(124214) 7 | -------------------------------------------------------------------------------- /docs/source/deepctr.utils.rst: -------------------------------------------------------------------------------- 1 | deepctr.utils module 2 | ==================== 3 | 4 | .. automodule:: deepctr.utils 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.layers.rst: -------------------------------------------------------------------------------- 1 | deepctr.layers module 2 | ===================== 3 | 4 | .. automodule:: deepctr.layers 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.sequence.rst: -------------------------------------------------------------------------------- 1 | deepctr.sequence module 2 | ======================= 3 | 4 | .. automodule:: deepctr.sequence 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.afm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.afm module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.afm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.dcn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.dcn module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.dcn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.din.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.din module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.din 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.fnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.fnn module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.fnn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.mlr.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.mlr module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.mlr 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.nfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.nfm module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.nfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.pnn.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.pnn module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.pnn 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.wdl.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.wdl module 2 | ========================= 3 | 4 | .. automodule:: deepctr.models.wdl 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /deepRS/__init__.py: -------------------------------------------------------------------------------- 1 | from .import activations 2 | from .import layers 3 | from .import sequence 4 | from . import models 5 | from .utils import check_version 6 | __version__ = '0.2.1' 7 | check_version(__version__) 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.activations.rst: -------------------------------------------------------------------------------- 1 | deepctr.activations module 2 | ========================== 3 | 4 | .. automodule:: deepctr.activations 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.deepfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.deepfm module 2 | ============================ 3 | 4 | .. automodule:: deepctr.models.deepfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.autoint.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.autoint module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.autoint 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.xdeepfm.rst: -------------------------------------------------------------------------------- 1 | deepctr.models.xdeepfm module 2 | ============================= 3 | 4 | .. automodule:: deepctr.models.xdeepfm 5 | :members: 6 | :no-undoc-members: 7 | :no-show-inheritance: 8 | -------------------------------------------------------------------------------- /tests/activations_test.py: -------------------------------------------------------------------------------- 1 | from deepctr import activations 2 | from tensorflow.python.keras.utils import CustomObjectScope 3 | from .utils import layer_test 4 | 5 | 6 | def test_dice(): 7 | with CustomObjectScope({'Dice': activations.Dice}): 8 | layer_test(activations.Dice, kwargs={}, 9 | input_shape=(2, 3)) 10 | -------------------------------------------------------------------------------- /deepRS/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .afm import AFM 2 | from .dcn import DCN 3 | from .mlr import MLR 4 | from .deepfm import DeepFM 5 | from .nfm import NFM 6 | from .din import DIN 7 | from .fnn import FNN 8 | from .pnn import PNN 9 | from .wdl import WDL 10 | from .xdeepfm import xDeepFM 11 | from .autoint import AutoInt 12 | 13 | __all__ = ["AFM", "DCN", "MLR", "DeepFM", 14 | "MLR", "NFM", "DIN", "FNN", "PNN", "WDL", "xDeepFM", "AutoInt"] 15 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /docs/source/Models-API.rst: -------------------------------------------------------------------------------- 1 | DeepCTR Models API 2 | ====================== 3 | 4 | .. toctree:: 5 | 6 | FNN 7 | PNN 8 | WDL 9 | DeepFM 10 | MLR 11 | NFM 12 | AFM 13 | DCN 14 | DIN 15 | xDeepFM 16 | AutoInt -------------------------------------------------------------------------------- /docs/source/deepctr.rst: -------------------------------------------------------------------------------- 1 | deepctr package 2 | =============== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.models 10 | 11 | Submodules 12 | ---------- 13 | 14 | .. toctree:: 15 | 16 | deepctr.activations 17 | deepctr.layers 18 | deepctr.sequence 19 | deepctr.utils 20 | 21 | Module contents 22 | --------------- 23 | 24 | .. automodule:: deepctr 25 | :members: 26 | :no-undoc-members: 27 | :no-show-inheritance: 28 | -------------------------------------------------------------------------------- /docs/source/deepctr.models.rst: -------------------------------------------------------------------------------- 1 | deepctr.models package 2 | ====================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | .. toctree:: 8 | 9 | deepctr.models.afm 10 | deepctr.models.autoint 11 | deepctr.models.dcn 12 | deepctr.models.deepfm 13 | deepctr.models.din 14 | deepctr.models.fnn 15 | deepctr.models.mlr 16 | deepctr.models.nfm 17 | deepctr.models.pnn 18 | deepctr.models.wdl 19 | deepctr.models.xdeepfm 20 | 21 | Module contents 22 | --------------- 23 | 24 | .. automodule:: deepctr.models 25 | :members: 26 | :no-undoc-members: 27 | :no-show-inheritance: 28 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | desciption-file = README.md 3 | 4 | #[coverage:run] 5 | #branch = True 6 | 7 | [coverage:report] 8 | exclude_lines = 9 | # Have to re-enable the standard pragma 10 | pragma: no cover 11 | # Don't complain about missing debug-only code: 12 | def __repr__ 13 | if self\.debug 14 | 15 | # Don't complain if tests don't hit defensive assertion code: 16 | raise ValueError 17 | raise AssertionError 18 | raise NotImplementedError 19 | 20 | # Don't complain if non-runnable code isn't run: 21 | if 0: 22 | if False: 23 | if __name__ == .__main__.: -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = DeepCTR 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/source/History.md: -------------------------------------------------------------------------------- 1 | # History 2 | - 12/27/2018 : [v0.2.1](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.1) released.Add [AutoInt](./Features.html#autoint-automatic-feature-interactiont) Model. 3 | - 12/22/2018 : [v0.2.0](https://github.com/shenweichen/DeepCTR/releases/tag/v0.2.0) released.Add [xDeepFM](./Features.html#xdeepfm) and automatic check for new version. 4 | - 12/19/2018 : [v0.1.6](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.6) released.Now DeepCTR is compatible with tensorflow from `1.4-1.12` except for `1.7` and `1.8`. 5 | - 29/11/2018 : [v0.1.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.1.4) released.Add [FAQ](./FAQ.html) in docs 6 | - 11/24/2018 : DeepCTR first version v0.1.0 is released on [PyPi](https://pypi.org/project/deepctr/) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=DeepCTR 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Weichen Shen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /examples/run_regression_movielens.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | from deepctr.models import DeepFM 4 | 5 | if __name__ == "__main__": 6 | 7 | data = pd.read_csv("./movielens_sample.txt") 8 | sparse_features = ["movie_id", "user_id", 9 | "gender", "age", "occupation", "zip"] 10 | target = ['rating'] 11 | 12 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 13 | for feat in sparse_features: 14 | lbe = LabelEncoder() 15 | data[feat] = lbe.fit_transform(data[feat]) 16 | # 2.count #unique features for each sparse field 17 | sparse_feature_dim = {feat: data[feat].nunique() 18 | for feat in sparse_features} 19 | # 3.generate input data for model 20 | model_input = [data[feat].values for feat in sparse_feature_dim] 21 | # 4.Define Model,compile and train 22 | model = DeepFM({"sparse": sparse_feature_dim, "dense": []}, 23 | final_activation='linear') 24 | 25 | model.compile("adam", "mse", metrics=['mse'],) 26 | history = model.fit(model_input, data[target].values, 27 | batch_size=256, epochs=10, verbose=2, validation_split=0.2,) 28 | 29 | print("demo done") 30 | -------------------------------------------------------------------------------- /tests/models/FNN_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import FNN 4 | 5 | from ..utils import check_model 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'sparse_feature_num', 10 | [1, 3 11 | ] 12 | ) 13 | def test_FNN(sparse_feature_num): 14 | model_name = "FNN" 15 | 16 | sample_size = 64 17 | feature_dim_dict = {"sparse": {}, 'dense': []} 18 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 19 | if name == "sparse": 20 | for i in range(num): 21 | feature_dim_dict[name][name + '_' + 22 | str(i)] = np.random.randint(1, 10) 23 | else: 24 | for i in range(num): 25 | feature_dim_dict[name].append(name + '_' + str(i)) 26 | 27 | sparse_input = [np.random.randint(0, dim, sample_size) 28 | for dim in feature_dim_dict['sparse'].values()] 29 | dense_input = [np.random.random(sample_size) 30 | for name in feature_dim_dict['dense']] 31 | y = np.random.randint(0, 2, sample_size) 32 | x = sparse_input + dense_input 33 | 34 | model = FNN(feature_dim_dict, hidden_size=[32, 32], keep_prob=0.5, ) 35 | check_model(model, model_name, x, y) 36 | 37 | 38 | if __name__ == "__main__": 39 | test_FNN(2) 40 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | #sudo: required 2 | #dist: trusty xenial 3 | language: python 4 | 5 | python: 6 | - "3.4" 7 | - "3.5" 8 | - "3.6" 9 | 10 | env: 11 | - TF_VERSION=1.12.0 12 | - TF_VERSION=1.4.0 13 | #Not Support- TF_VERSION=1.7.0 14 | #Not Support- TF_VERSION=1.7.1 15 | #Not Support- TF_VERSION=1.8.0 16 | - TF_VERSION=1.9.0 17 | - TF_VERSION=1.10.0 #- TF_VERSION=1.10.1 18 | - TF_VERSION=1.11.0 19 | - TF_VERSION=1.5.1 #- TF_VERSION=1.5.0 20 | - TF_VERSION=1.6.0 21 | 22 | matrix: 23 | allow_failures: 24 | - python: "3.4" 25 | - python: "3.5" 26 | - env: TF_VERSION=1.5.0 #local is ok,but sometimes CI is failed 27 | - env: TF_VERSION=1.7.0 28 | - env: TF_VERSION=1.7.1 29 | - env: TF_VERSION=1.8.0 30 | fast_finish: true 31 | 32 | 33 | cache: pip 34 | # command to install dependencies 35 | install: 36 | - pip install -q pytest-cov==2.4.0 #>=2.4.0,<2.6 37 | - pip install -q python-coveralls 38 | - pip install -q codacy-coverage 39 | - pip install -q tensorflow==$TF_VERSION 40 | - pip install -e . 41 | # command to run tests 42 | script: 43 | - pytest --cov=deepctr 44 | 45 | notifications: 46 | recipients: 47 | - wcshen1994@163.com 48 | 49 | on_success: change 50 | on_failure: always 51 | 52 | after_success: 53 | - coveralls 54 | - coverage xml 55 | - python-codacy-coverage -r coverage.xml -------------------------------------------------------------------------------- /tests/sequence_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from tensorflow.python.keras.utils import CustomObjectScope 3 | 4 | from deepctr import sequence 5 | 6 | from .utils import layer_test 7 | 8 | BATCH_SIZE = 4 9 | EMBEDDING_SIZE = 8 10 | SEQ_LENGTH = 10 11 | 12 | 13 | @pytest.mark.parametrize( 14 | 15 | 'weight_normalization', 16 | 17 | [weight_normalization 18 | for 19 | weight_normalization in [True, False, ] 20 | ] 21 | 22 | ) 23 | def test_AttentionSequencePoolingLayer(weight_normalization): 24 | with CustomObjectScope({'AttentionSequencePoolingLayer': sequence.AttentionSequencePoolingLayer}): 25 | layer_test(sequence.AttentionSequencePoolingLayer, kwargs={'weight_normalization': weight_normalization}, 26 | input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1)]) 27 | 28 | 29 | @pytest.mark.parametrize( 30 | 31 | 'seq_len_max,mode', 32 | 33 | [(SEQ_LENGTH, mode) 34 | 35 | for mode in ['sum', 'mean', 'max'] 36 | ] 37 | 38 | ) 39 | def test_SequencePoolingLayer(seq_len_max, mode): 40 | with CustomObjectScope({'SequencePoolingLayer': sequence.SequencePoolingLayer}): 41 | layer_test(sequence.SequencePoolingLayer, kwargs={'seq_len_max': seq_len_max, 'mode': mode}, 42 | input_shape=[(BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE), (BATCH_SIZE, 1)]) 43 | -------------------------------------------------------------------------------- /tests/models/NFM_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import NFM 4 | from ..utils import check_model 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'hidden_size,sparse_feature_num', 9 | [((8,), 1), ((8, 8,), 2)] 10 | ) 11 | def test_NFM(hidden_size, sparse_feature_num): 12 | 13 | model_name = "NFM" 14 | 15 | sample_size = 64 16 | feature_dim_dict = {"sparse": {}, 'dense': []} 17 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 18 | if name == "sparse": 19 | for i in range(num): 20 | feature_dim_dict[name][name + '_' + 21 | str(i)] = np.random.randint(1, 10) 22 | else: 23 | for i in range(num): 24 | feature_dim_dict[name].append(name + '_' + str(i)) 25 | sparse_input = [np.random.randint(0, dim, sample_size) 26 | for dim in feature_dim_dict['sparse'].values()] 27 | dense_input = [np.random.random(sample_size) 28 | for name in feature_dim_dict['dense']] 29 | y = np.random.randint(0, 2, sample_size) 30 | x = sparse_input + dense_input 31 | 32 | model = NFM(feature_dim_dict, embedding_size=8, 33 | hidden_size=[32, 32], keep_prob=0.5, ) 34 | check_model(model, model_name, x, y) 35 | 36 | 37 | if __name__ == "__main__": 38 | test_NFM((8, 8), 1) 39 | -------------------------------------------------------------------------------- /tests/models/AFM_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import AFM 4 | from ..utils import check_model 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'use_attention,sparse_feature_num', 9 | [(True, 1), (False, 3) 10 | ] 11 | ) 12 | def test_AFM(use_attention, sparse_feature_num): 13 | model_name = "AFM" 14 | 15 | sample_size = 64 16 | feature_dim_dict = {"sparse": {}, 'dense': []} 17 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 18 | if name == "sparse": 19 | for i in range(num): 20 | feature_dim_dict[name][name + '_' + 21 | str(i)] = np.random.randint(1, 10) 22 | else: 23 | for i in range(num): 24 | feature_dim_dict[name].append(name + '_' + str(i)) 25 | sparse_input = [np.random.randint(0, dim, sample_size) 26 | for dim in feature_dim_dict['sparse'].values()] 27 | dense_input = [np.random.random(sample_size) 28 | for name in feature_dim_dict['dense']] 29 | y = np.random.randint(0, 2, sample_size) 30 | x = sparse_input + dense_input 31 | 32 | model = AFM(feature_dim_dict, use_attention=use_attention, keep_prob=0.5,) 33 | check_model(model, model_name, x, y) 34 | 35 | 36 | if __name__ == "__main__": 37 | test_AFM(use_attention=True, sparse_feature_num=2) 38 | -------------------------------------------------------------------------------- /tests/models/DeepFM_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import DeepFM 4 | from ..utils import check_model 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'use_fm,hidden_size,sparse_feature_num', 9 | [(True, (), 1), (False, (), 2), (True, (32,), 3), (False, (32,), 1) 10 | ] 11 | ) 12 | def test_DeepFM(use_fm, hidden_size, sparse_feature_num): 13 | model_name = "DeepFM" 14 | sample_size = 64 15 | feature_dim_dict = {"sparse": {}, 'dense': []} 16 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 17 | if name == "sparse": 18 | for i in range(num): 19 | feature_dim_dict[name][name + '_' + 20 | str(i)] = np.random.randint(1, 10) 21 | else: 22 | for i in range(num): 23 | feature_dim_dict[name].append(name + '_' + str(i)) 24 | 25 | sparse_input = [np.random.randint(0, dim, sample_size) 26 | for dim in feature_dim_dict['sparse'].values()] 27 | dense_input = [np.random.random(sample_size) 28 | for name in feature_dim_dict['dense']] 29 | y = np.random.randint(0, 2, sample_size) 30 | x = sparse_input + dense_input 31 | 32 | model = DeepFM(feature_dim_dict, use_fm=use_fm, 33 | hidden_size=hidden_size, keep_prob=0.5, ) 34 | check_model(model, model_name, x, y) 35 | 36 | 37 | if __name__ == "__main__": 38 | test_DeepFM(True, (32, 32), 2) 39 | -------------------------------------------------------------------------------- /tests/models/AutoInt_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import AutoInt 4 | from ..utils import check_model 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'att_layer_num,hidden_size,sparse_feature_num', 9 | [(0, (4,), 2), (1, (), 1), (1, (4,), 1), (2, (4, 4,), 2)] 10 | ) 11 | def test_AutoInt(att_layer_num, hidden_size, sparse_feature_num): 12 | model_name = "AutoInt" 13 | sample_size = 64 14 | feature_dim_dict = {"sparse": {}, 'dense': []} 15 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 16 | if name == "sparse": 17 | for i in range(num): 18 | feature_dim_dict[name][name + '_' + 19 | str(i)] = np.random.randint(1, 10) 20 | else: 21 | for i in range(num): 22 | feature_dim_dict[name].append(name + '_' + str(i)) 23 | 24 | sparse_input = [np.random.randint(0, dim, sample_size) 25 | for dim in feature_dim_dict['sparse'].values()] 26 | dense_input = [np.random.random(sample_size) 27 | for name in feature_dim_dict['dense']] 28 | y = np.random.randint(0, 2, sample_size) 29 | x = sparse_input + dense_input 30 | 31 | model = AutoInt(feature_dim_dict, att_layer_num=att_layer_num, 32 | hidden_size=hidden_size, keep_prob=0.5, ) 33 | check_model(model, model_name, x, y) 34 | 35 | 36 | if __name__ == "__main__": 37 | test_AutoInt(True, (32, 32), 2) 38 | -------------------------------------------------------------------------------- /tests/models/PNN_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from deepctr.models import PNN 5 | from ..utils import check_model 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'use_inner, use_outter,sparse_feature_num', 10 | [(True, True, 1), (True, False, 2), (False, True, 3), (False, False, 1) 11 | ] 12 | ) 13 | def test_PNN(use_inner, use_outter, sparse_feature_num): 14 | model_name = "PNN" 15 | sample_size = 64 16 | feature_dim_dict = {"sparse": {}, 'dense': []} 17 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 18 | if name == "sparse": 19 | for i in range(num): 20 | feature_dim_dict[name][name + '_' + 21 | str(i)] = np.random.randint(1, 10) 22 | else: 23 | for i in range(num): 24 | feature_dim_dict[name].append(name + '_' + str(i)) 25 | sparse_input = [np.random.randint(0, dim, sample_size) 26 | for dim in feature_dim_dict['sparse'].values()] 27 | dense_input = [np.random.random(sample_size) 28 | for name in feature_dim_dict['dense']] 29 | y = np.random.randint(0, 2, sample_size) 30 | x = sparse_input + dense_input 31 | 32 | model = PNN(feature_dim_dict, embedding_size=8, 33 | hidden_size=[32, 32], keep_prob=0.5, use_inner=use_inner, use_outter=use_outter) 34 | check_model(model, model_name, x, y) 35 | 36 | 37 | if __name__ == "__main__": 38 | test_PNN(use_inner=True, use_outter=False, sparse_feature_num=1) 39 | -------------------------------------------------------------------------------- /examples/run_classification_criteo.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder, MinMaxScaler 3 | from deepctr.models import DeepFM 4 | 5 | if __name__ == "__main__": 6 | data = pd.read_csv('./criteo_sample.txt') 7 | 8 | sparse_features = ['C' + str(i) for i in range(1, 27)] 9 | dense_features = ['I'+str(i) for i in range(1, 14)] 10 | 11 | data[sparse_features] = data[sparse_features].fillna('-1', ) 12 | data[dense_features] = data[dense_features].fillna(0,) 13 | target = ['label'] 14 | 15 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 16 | for feat in sparse_features: 17 | lbe = LabelEncoder() 18 | data[feat] = lbe.fit_transform(data[feat]) 19 | mms = MinMaxScaler(feature_range=(0, 1)) 20 | data[dense_features] = mms.fit_transform(data[dense_features]) 21 | 22 | # 2.count #unique features for each sparse field,and record dense feature field name 23 | 24 | sparse_feature_dict = {feat: data[feat].nunique() 25 | for feat in sparse_features} 26 | dense_feature_list = dense_features 27 | 28 | # 3.generate input data for model 29 | 30 | model_input = [data[feat].values for feat in sparse_feature_dict] + \ 31 | [data[feat].values for feat in dense_feature_list] # + [data[target[0]].values] 32 | 33 | # 4.Define Model,compile and train 34 | model = DeepFM({"sparse": sparse_feature_dict, 35 | "dense": dense_feature_list}, final_activation='sigmoid') 36 | 37 | model.compile("adam", "binary_crossentropy", 38 | metrics=['binary_crossentropy'], ) 39 | 40 | history = model.fit(model_input, data[target].values, 41 | 42 | batch_size=256, epochs=10, verbose=2, validation_split=0.2, ) 43 | print("demo done") 44 | -------------------------------------------------------------------------------- /tests/models/DCN_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import DCN 4 | from ..utils import check_model 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'embedding_size,cross_num,hidden_size,sparse_feature_num', 9 | [(8, 0, (32,), 2), (8, 1, (), 1), ('auto', 1, (32,), 3) 10 | ] 11 | ) 12 | def test_DCN(embedding_size, cross_num, hidden_size, sparse_feature_num): 13 | model_name = "DCN" 14 | 15 | sample_size = 64 16 | feature_dim_dict = {"sparse": {}, 'dense': []} 17 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 18 | if name == "sparse": 19 | for i in range(num): 20 | feature_dim_dict[name][name + '_' + 21 | str(i)] = np.random.randint(1, 10) 22 | else: 23 | for i in range(num): 24 | feature_dim_dict[name].append(name + '_' + str(i)) 25 | sparse_input = [np.random.randint(0, dim, sample_size) 26 | for dim in feature_dim_dict['sparse'].values()] 27 | dense_input = [np.random.random(sample_size) 28 | for name in feature_dim_dict['dense']] 29 | 30 | y = np.random.randint(0, 2, sample_size) 31 | x = sparse_input + dense_input 32 | 33 | model = DCN(feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, 34 | hidden_size=hidden_size, keep_prob=0.5, ) 35 | check_model(model, model_name, x, y) 36 | 37 | 38 | def test_DCN_invalid(embedding_size=8, cross_num=0, hidden_size=()): 39 | feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5, 40 | 'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']} 41 | with pytest.raises(ValueError): 42 | _ = DCN(feature_dim_dict, embedding_size=embedding_size, cross_num=cross_num, 43 | hidden_size=hidden_size, keep_prob=0.5, ) 44 | 45 | 46 | if __name__ == "__main__": 47 | test_DCN(8, 2, [32, 32], 2) 48 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | REQUIRED_PACKAGES = [ 7 | 'tensorflow>=1.4.0,!=1.7.*,!=1.8.*', 8 | 'h5py' 9 | ] 10 | 11 | setuptools.setup( 12 | name="deepctr", 13 | version="0.2.1", 14 | author="Weichen Shen", 15 | author_email="wcshen1994@163.com", 16 | description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow.", 17 | long_description=long_description, 18 | long_description_content_type="text/markdown", 19 | url="https://github.com/shenweichen/deepctr", 20 | download_url='https://github.com/shenweichen/deepctr/tags', 21 | packages=setuptools.find_packages(exclude=["tests", "tests.models"]), 22 | python_requires='>=3.4', # 3.4.6 23 | install_requires=REQUIRED_PACKAGES, 24 | extras_require={ 25 | "tf": ['tensorflow>=1.4.0,!=1.7.*,!=1.8.*'], 26 | "tf_gpu": ['tensorflow-gpu>=1.4.0,!=1.7.*,!=1.8.*'], 27 | }, 28 | entry_points={ 29 | }, 30 | classifiers=( 31 | "License :: OSI Approved :: MIT License", 32 | "Operating System :: OS Independent", 33 | 'Intended Audience :: Developers', 34 | 'Intended Audience :: Education', 35 | 'Intended Audience :: Science/Research', 36 | 'Programming Language :: Python :: 3', 37 | 'Programming Language :: Python :: 3.4', 38 | 'Programming Language :: Python :: 3.5', 39 | 'Programming Language :: Python :: 3.6', 40 | 'Topic :: Scientific/Engineering', 41 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 42 | 'Topic :: Software Development', 43 | 'Topic :: Software Development :: Libraries', 44 | 'Topic :: Software Development :: Libraries :: Python Modules', 45 | ), 46 | license="MIT license", 47 | keywords=['ctr', 'click through rate', 48 | 'deep learning', 'tensorflow', 'tensor', 'keras'], 49 | ) 50 | -------------------------------------------------------------------------------- /examples/run_din.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from deepctr.models import DIN 3 | 4 | 5 | def get_xy_fd(): 6 | 7 | feature_dim_dict = {"sparse": {'user_age': 4, 'user_gender': 2, 8 | 'item_id': 4, 'item_gender': 2}, "dense": []} # raw feature:single value feature 9 | 10 | # history behavior feature:multi-value value feature 11 | behavior_feature_list = ["item_id", "item_gender"] 12 | # single value feature input 13 | user_age = np.array([1, 2, 3]) 14 | user_gender = np.array([0, 1, 0]) 15 | item_id = np.array([0, 1, 2]) 16 | item_gender = np.array([0, 1, 0]) 17 | 18 | # multi-value feature input 19 | hist_item_id = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 0]]) 20 | hist_item_gender = np.array([[0, 1, 0, 1], [0, 1, 1, 1], [0, 0, 1, 0]]) 21 | # valid length of behavior sequence of every sample 22 | hist_length = np.array([4, 4, 3]) 23 | 24 | feature_dict = {'user_age': user_age, 'user_gender': user_gender, 'item_id': item_id, 'item_gender': item_gender, 25 | 'hist_item_id': hist_item_id, 'hist_item_gender': hist_item_gender, } 26 | 27 | x = [feature_dict[feat] for feat in feature_dim_dict["sparse"]] + \ 28 | [feature_dict['hist_'+feat] 29 | for feat in behavior_feature_list] + [hist_length] 30 | # Notice the concatenation order: single feature + multi-value feature + length 31 | # Since the length of the historical sequences of different features in DIN are the same(they are all extended from item_id),only one length vector is enough. 32 | y = [1, 0, 1] 33 | 34 | return x, y, feature_dim_dict, behavior_feature_list 35 | 36 | 37 | if __name__ == "__main__": 38 | x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() 39 | model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4,) 40 | model.compile('adam', 'binary_crossentropy', 41 | metrics=['binary_crossentropy']) 42 | history = model.fit(x, y, verbose=1, validation_split=0.5) 43 | -------------------------------------------------------------------------------- /tests/models/WDL_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from deepctr.models import WDL 5 | from ..utils import check_model 6 | 7 | 8 | @pytest.mark.parametrize( 9 | 'sparse_feature_num,wide_feature_num', 10 | [(1, 0), (1, 2), (2, 0), (2, 1) 11 | ] 12 | ) 13 | def test_WDL(sparse_feature_num, wide_feature_num): 14 | model_name = "WDL" 15 | sample_size = 64 16 | feature_dim_dict = {"sparse": {}, 'dense': []} 17 | wide_feature_dim_dict = {"sparse": {}, 'dense': []} 18 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, sparse_feature_num]): 19 | if name == "sparse": 20 | for i in range(num): 21 | feature_dim_dict[name][name + '_' + 22 | str(i)] = np.random.randint(1, 10) 23 | else: 24 | for i in range(num): 25 | feature_dim_dict[name].append(name + '_' + str(i)) 26 | for name, num in zip(["sparse", "dense"], [wide_feature_num, wide_feature_num]): 27 | if name == "sparse": 28 | for i in range(num): 29 | wide_feature_dim_dict[name][name + 'wide_' + 30 | str(i)] = np.random.randint(1, 10) 31 | else: 32 | for i in range(num): 33 | wide_feature_dim_dict[name].append(name + 'wide_' + str(i)) 34 | 35 | sparse_input = [np.random.randint(0, dim, sample_size) 36 | for dim in feature_dim_dict['sparse'].values()] 37 | dense_input = [np.random.random(sample_size) 38 | for name in feature_dim_dict['dense']] 39 | wide_sparse_input = [np.random.randint(0, dim, sample_size) 40 | for dim in wide_feature_dim_dict['sparse'].values()] 41 | wide_dense_input = [np.random.random(sample_size) 42 | for name in wide_feature_dim_dict['dense']] 43 | y = np.random.randint(0, 2, sample_size) 44 | x = sparse_input + dense_input 45 | x_wide = wide_sparse_input + wide_dense_input 46 | 47 | model = WDL(feature_dim_dict, wide_feature_dim_dict, 48 | hidden_size=[32, 32], keep_prob=0.5) 49 | check_model(model, model_name, x+x_wide, y) 50 | 51 | 52 | if __name__ == "__main__": 53 | test_WDL(1, 1) 54 | -------------------------------------------------------------------------------- /tests/models/xDeepFM_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import xDeepFM 4 | from ..utils import check_model 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 'hidden_size,cin_layer_size,cin_split_half,cin_activation,sparse_feature_num,dense_feature_dim', 9 | [((), (), True, 'linear', 1, 2), ((16,), (), True, 'linear', 1, 1), ((), (16,), True, 'linear', 2, 2), ((16,), (16,), False, 'relu', 1, 0) 10 | ] 11 | ) 12 | def test_xDeepFM(hidden_size, cin_layer_size, cin_split_half, cin_activation, sparse_feature_num, dense_feature_dim): 13 | model_name = "xDeepFM" 14 | 15 | sample_size = 64 16 | feature_dim_dict = {"sparse": {}, 'dense': []} 17 | for name, num in zip(["sparse", "dense"], [sparse_feature_num, dense_feature_dim]): 18 | if name == "sparse": 19 | for i in range(num): 20 | feature_dim_dict[name][name + '_' + 21 | str(i)] = np.random.randint(1, 10) 22 | else: 23 | for i in range(num): 24 | feature_dim_dict[name].append(name + '_' + str(i)) 25 | sparse_input = [np.random.randint(0, dim, sample_size) 26 | for dim in feature_dim_dict['sparse'].values()] 27 | dense_input = [np.random.random(sample_size) 28 | for name in feature_dim_dict['dense']] 29 | 30 | y = np.random.randint(0, 2, sample_size) 31 | x = sparse_input + dense_input 32 | 33 | model = xDeepFM(feature_dim_dict, hidden_size=hidden_size, cin_layer_size=cin_layer_size, 34 | cin_split_half=cin_split_half, cin_activation=cin_activation, keep_prob=0.5, ) 35 | check_model(model, model_name, x, y) 36 | 37 | 38 | @pytest.mark.parametrize( 39 | 'hidden_size,cin_layer_size,', 40 | [((8,), (3, 8)), 41 | ] 42 | ) 43 | def test_xDeepFM_invalid(hidden_size, cin_layer_size): 44 | feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5, 45 | 'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']} 46 | with pytest.raises(ValueError): 47 | _ = xDeepFM(feature_dim_dict, hidden_size=hidden_size, 48 | cin_layer_size=cin_layer_size,) 49 | 50 | 51 | if __name__ == "__main__": 52 | test_xDeepFM((256), (128,), False, 'linear', 3, 1) 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.h5 2 | *.ipynb 3 | .pytest_cache/ 4 | tests/unused/* 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | .idea/ 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *,cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask instance folder 61 | instance/ 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # IPython Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | 94 | # ========================= 95 | # Operating System Files 96 | # ========================= 97 | 98 | # OSX 99 | # ========================= 100 | 101 | .DS_Store 102 | .AppleDouble 103 | .LSOverride 104 | 105 | # Thumbnails 106 | ._* 107 | 108 | # Files that might appear in the root of a volume 109 | .DocumentRevisions-V100 110 | .fseventsd 111 | .Spotlight-V100 112 | .TemporaryItems 113 | .Trashes 114 | .VolumeIcon.icns 115 | 116 | # Directories potentially created on remote AFP share 117 | .AppleDB 118 | .AppleDesktop 119 | Network Trash Folder 120 | Temporary Items 121 | .apdisk 122 | 123 | # Windows 124 | # ========================= 125 | 126 | # Windows image file caches 127 | Thumbs.db 128 | ehthumbs.db 129 | 130 | # Folder config file 131 | Desktop.ini 132 | 133 | # Recycle Bin used on file shares 134 | $RECYCLE.BIN/ 135 | 136 | # Windows Installer files 137 | *.cab 138 | *.msi 139 | *.msm 140 | *.msp 141 | 142 | # Windows shortcuts 143 | *.lnk 144 | -------------------------------------------------------------------------------- /deepRS/activations.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.keras.layers import Layer 2 | from tensorflow.python.keras.initializers import Zeros 3 | import tensorflow as tf 4 | 5 | 6 | class Dice(Layer): 7 | """The Data Adaptive Activation Function in DIN,which can be viewed as a generalization of PReLu and can adaptively adjust the rectified point according to distribution of input data. 8 | 9 | Input shape 10 | - Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. 11 | 12 | Output shape 13 | - Same shape as the input. 14 | 15 | Arguments 16 | - **axis** : Integer, the axis that should be used to compute data distribution (typically the features axis). 17 | 18 | - **epsilon** : Small float added to variance to avoid dividing by zero. 19 | 20 | References 21 | - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) 22 | """ 23 | 24 | def __init__(self, axis=-1, epsilon=1e-9, **kwargs): 25 | self.axis = axis 26 | self.epsilon = epsilon 27 | super(Dice, self).__init__(**kwargs) 28 | 29 | def build(self, input_shape): 30 | self.alphas = self.add_weight(shape=(input_shape[-1],), initializer=Zeros( 31 | ), dtype=tf.float32, name=self.name+'dice_alpha') # name='alpha_'+self.name 32 | super(Dice, self).build(input_shape) # Be sure to call this somewhere! 33 | 34 | def call(self, inputs, **kwargs): 35 | 36 | inputs_normed = tf.layers.batch_normalization( 37 | inputs, axis=self.axis, epsilon=self.epsilon, center=False, scale=False) 38 | x_p = tf.sigmoid(inputs_normed) 39 | return self.alphas * (1.0 - x_p) * inputs + x_p * inputs 40 | 41 | def get_config(self,): 42 | 43 | config = {'axis': self.axis, 'epsilon': self.epsilon} 44 | base_config = super(Dice, self).get_config() 45 | return dict(list(base_config.items()) + list(config.items())) 46 | 47 | def compute_output_shape(self, input_shape): 48 | return input_shape 49 | 50 | 51 | def activation_fun(activation, fc): 52 | 53 | if isinstance(activation, str): 54 | fc = tf.keras.layers.Activation(activation)(fc) 55 | elif issubclass(activation, Layer): 56 | fc = activation()(fc) 57 | else: 58 | raise ValueError( 59 | "Invalid activation,found %s.You should use a str or a Activation Layer Class." % (activation)) 60 | return fc 61 | -------------------------------------------------------------------------------- /tests/models/MLR_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import MLR 4 | from ..utils import check_model 5 | 6 | 7 | @pytest.mark.parametrize( 8 | 9 | 'region_sparse,region_dense,base_sparse,base_dense,bias_sparse,bias_dense', 10 | 11 | [(0, 2, 0, 2, 0, 1), (0, 2, 0, 1, 0, 2), (0, 2, 0, 0, 1, 0), 12 | (0, 1, 1, 2, 1, 1,), (0, 1, 1, 1, 1, 2), (0, 1, 1, 0, 2, 0), 13 | (1, 0, 2, 2, 2, 1), (2, 0, 2, 1, 2, 2), (2, 0, 2, 0, 0, 0) 14 | ] 15 | 16 | ) 17 | def test_MLRs(region_sparse, region_dense, base_sparse, base_dense, bias_sparse, bias_dense): 18 | model_name = "MLRs" 19 | region_fd = {"sparse": {}, 'dense': []} 20 | for name, num in zip(["sparse", "dense"], [region_sparse, region_dense]): 21 | if name == "sparse": 22 | for i in range(num): 23 | region_fd[name][name + '_' + str(i)] = np.random.randint(1, 10) 24 | else: 25 | for i in range(num): 26 | region_fd[name].append(name + '_' + str(i)) 27 | 28 | base_fd = {"sparse": {}, 'dense': []} 29 | for name, num in zip(["sparse", "dense"], [base_sparse, base_dense]): 30 | if name == "sparse": 31 | for i in range(num): 32 | base_fd[name][name + '_' + str(i)] = np.random.randint(1, 10) 33 | else: 34 | for i in range(num): 35 | base_fd[name].append(name + '_' + str(i)) 36 | bias_fd = {"sparse": {}, 'dense': []} 37 | for name, num in zip(["sparse", "dense"], [bias_sparse, bias_dense]): 38 | if name == "sparse": 39 | for i in range(num): 40 | bias_fd[name][name + '_' + str(i)] = np.random.randint(1, 10) 41 | else: 42 | for i in range(num): 43 | bias_fd[name].append(name + '_' + str(i)) 44 | 45 | model = MLR(region_fd, base_fd, bias_feature_dim_dict=bias_fd) 46 | model.compile('adam', 'binary_crossentropy', 47 | metrics=['binary_crossentropy']) 48 | print(model_name + " test pass!") 49 | 50 | 51 | def test_MLR(): 52 | model_name = "MLR" 53 | sample_size = 64 54 | feature_dim_dict = {'sparse': {'sparse_1': 2, 'sparse_2': 5, 55 | 'sparse_3': 10}, 'dense': ['dense_1', 'dense_2', 'dense_3']} 56 | sparse_input = [np.random.randint(0, dim, sample_size) 57 | for dim in feature_dim_dict['sparse'].values()] 58 | dense_input = [np.random.random(sample_size) 59 | for name in feature_dim_dict['dense']] 60 | y = np.random.randint(0, 2, sample_size) 61 | x = sparse_input + dense_input 62 | 63 | model = MLR(feature_dim_dict) 64 | check_model(model, model_name, x, y) 65 | 66 | 67 | if __name__ == "__main__": 68 | test_MLR() 69 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. DeepCTR documentation master file, created by 2 | sphinx-quickstart on Fri Nov 23 21:08:54 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to DeepCTR's documentation! 7 | =================================== 8 | 9 | |Downloads|_ |Stars|_ |Forks|_ |PyPi|_ |Issues|_ |Activity|_ 10 | 11 | .. |Downloads| image:: https://pepy.tech/badge/deepctr 12 | .. _Downloads: https://pepy.tech/project/deepctr 13 | 14 | .. |Stars| image:: https://img.shields.io/github/stars/shenweichen/deepctr.svg 15 | .. _Stars: https://github.com/shenweichen/DeepCTR 16 | 17 | .. |Forks| image:: https://img.shields.io/github/forks/shenweichen/deepctr.svg 18 | .. _Forks: https://github.com/shenweichen/DeepCTR/fork 19 | 20 | .. |PyPi| image:: https://img.shields.io/pypi/v/deepctr.svg 21 | .. _PyPi: https://pypi.org/project/deepctr/ 22 | 23 | .. |Issues| image:: https://img.shields.io/github/issues/shenweichen/deepctr.svg 24 | .. _Issues: https://github.com/shenweichen/deepctr/issues 25 | 26 | .. |Activity| image:: https://img.shields.io/github/last-commit/shenweichen/deepctr.svg 27 | .. _Activity: https://github.com/shenweichen/DeepCTR 28 | 29 | 30 | DeepCTR is a **Easy-to-use** , **Modular** and **Extendible** package of deep-learning based CTR models along with lots of core components layer which can be used to build your own custom model easily.You can use any complex model with ``model.fit()`` and ``model.predict()``.And the layers are compatible with tensorflow. 31 | 32 | Through ``pip install deepctr`` get the package and `Get Started! <./Quick-Start.html>`_ 33 | 34 | You can read the latest code at https://github.com/shenweichen/DeepCTR 35 | 36 | News 37 | ----- 38 | 12/27/2018 : Add `AutoInt <./Features.html#autoint-automatic-feature-interaction>`_ . `Changelog `_ 39 | 40 | 12/22/2018 : Add `xDeepFM <./Features.html#xdeepfm>`_ and automatic check for new version. `Changelog `_ 41 | 42 | 12/19/2018 : DeepCTR is compatible with tensorflow from ``1.4-1.12`` except for ``1.7`` and ``1.8``. `Changelog `_ 43 | 44 | 11/24/2018 : DeepCTR is released! `PyPi `_. 45 | 46 | .. toctree:: 47 | :maxdepth: 2 48 | :caption: Home: 49 | 50 | Quick-Start 51 | Features 52 | Demo 53 | FAQ 54 | History 55 | 56 | .. toctree:: 57 | :maxdepth: 3 58 | :caption: API: 59 | 60 | Models API 61 | Layers API 62 | Activations API 63 | Sequence API 64 | 65 | 66 | 67 | Indices and tables 68 | ================== 69 | 70 | * :ref:`genindex` 71 | * :ref:`modindex` 72 | * :ref:`search` -------------------------------------------------------------------------------- /docs/source/FAQ.rst: -------------------------------------------------------------------------------- 1 | FAQ 2 | ========== 3 | 1. Save or load weights/models 4 | ---------------------------------------- 5 | To save/load weights,you can write codes just like any other keras models. 6 | 7 | .. code-block:: python 8 | 9 | model = DeepFM() 10 | model.save_weights('DeepFM_w.h5') 11 | model.load_weights('DeepFM_w.h5') 12 | 13 | 14 | To save/load models,just a little different. 15 | 16 | .. code-block:: python 17 | 18 | from tensorflow.python.keras.models import save_model,load_model 19 | model = DeepFM() 20 | save_model(model, 'DeepFM.h5')# save_model, same as before 21 | 22 | from deepctr.utils import custom_objects 23 | model = load_model('DeepFM.h5',custom_objects)# load_model,just add a parameter 24 | 25 | 2. Set learning rate and use earlystopping 26 | --------------------------------------------------- 27 | You can use any models in DeepCTR like a keras model object. 28 | Here is a example of how to set learning rate and earlystopping: 29 | 30 | .. code-block:: python 31 | 32 | import deepctr 33 | from tensorflow.python.keras.optimizers import Adam,Adagrad 34 | from tensorflow.python.keras.callbacks import EarlyStopping 35 | 36 | model = deepctr.models.DeepFM({"sparse": sparse_feature_dict, "dense": dense_feature_list}) 37 | model.compile(Adagrad('0.0808'),'binary_crossentropy',metrics=['binary_crossentropy']) 38 | 39 | es = EarlyStopping(monitor='val_binary_crossentropy') 40 | history = model.fit(model_input, data[target].values,batch_size=256, epochs=10, verbose=2, validation_split=0.2,callbacks=[es] ) 41 | 42 | 43 | 3. Get the attentional weights of feature interactions in AFM 44 | -------------------------------------------------------------------------- 45 | First,make sure that you have install the latest version of deepctr. 46 | 47 | Then,use the following code,the ``attentional_weights[:,i,0]`` is the ``feature_interactions[i]``'s attentional weight of all samples. 48 | 49 | .. code-block:: python 50 | 51 | import itertools 52 | import deepctr 53 | from tensorflow.python.keras.models import Model 54 | from tensorflow.python.keras.layers import Lambda 55 | 56 | feature_dim_dict = {"sparse": sparse_feature_dict, "dense": dense_feature_list} 57 | model = deepctr.models.AFM(feature_dim_dict) 58 | model.fit(model_input,target) 59 | 60 | afmlayer = model.layers[-3] 61 | afm_weight_model = Model(model.input,outputs=Lambda(lambda x:afmlayer.normalized_att_score)(model.input)) 62 | attentional_weights = afm_weight_model.predict(model_input,batch_size=4096) 63 | feature_interactions = list(itertools.combinations(list(feature_dim_dict['sparse'].keys()) + feature_dim_dict['dense'] ,2)) 64 | 65 | 66 | 67 | 4. Does the models support multi-value input? 68 | --------------------------------------------------- 69 | Now only the `DIN `_ model support multi-value input,you can use layers in `sequence `_ to build your own models! 70 | And it will be supported in a future release -------------------------------------------------------------------------------- /docs/source/Quick-Start.rst: -------------------------------------------------------------------------------- 1 | Quick-Start 2 | =========== 3 | 4 | Installation Guide 5 | ---------------------- 6 | Install deepctr package is through ``pip`` :: 7 | 8 | pip install deepctr 9 | 10 | 11 | Getting started: 4 steps to DeepCTR 12 | ----------------------------------------- 13 | 14 | 15 | Step 1: Import model 16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 17 | 18 | .. code-block:: python 19 | 20 | import pandas as pd 21 | from sklearn.preprocessing import LabelEncoder,MinMaxScaler 22 | 23 | from deepctr.models import DeepFM 24 | 25 | data = pd.read_csv('./criteo_sample.txt') 26 | 27 | sparse_features = ['C' + str(i) for i in range(1, 27)] 28 | dense_features = ['I'+str(i) for i in range(1,14)] 29 | target = ['label'] 30 | 31 | data[sparse_features] = data[sparse_features].fillna('-1', ) 32 | data[dense_features] = data[dense_features].fillna(0,) 33 | 34 | 35 | 36 | 37 | Step 2: Simple preprocessing 38 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 39 | 40 | Usually there are two simple way to encode the sparse categorical feature for embedding 41 | 42 | - Label Encoding: map the features to integer value from 0 ~ len(#unique) - 1 43 | - Hash Encoding: map the features to a fix range,like 0 ~ 9999 44 | 45 | And for dense numerical features,they are usually discretized to buckets,here we use normalization. 46 | 47 | .. code-block:: python 48 | 49 | for feat in sparse_features: 50 | lbe = LabelEncoder()# or Hash 51 | data[feat] = lbe.fit_transform(data[feat]) 52 | mms = MinMaxScaler(feature_range=(0,1)) 53 | data[dense_features] = mms.fit_transform(data[dense_features]) 54 | 55 | 56 | 57 | Step 3: Generate feature config dict 58 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 59 | 60 | Here, for sparse features, we transform them into dense vectors by embedding techniques. 61 | For dense numerical features, we add a dummy index like LIBFM. 62 | That is to say, all dense features under the same field share the same embedding vector. 63 | In some implementations, the dense feature is concatened to the input embedding vectors of the deep network, you can modify the code yourself. 64 | 65 | 66 | .. code-block:: python 67 | 68 | sparse_feature_dict = {feat: data[feat].nunique() for feat in sparse_features} 69 | dense_feature_list = dense_features 70 | 71 | 72 | Step 4: Generate the training samples and train the model 73 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 74 | 75 | There are two rules here that we must follow 76 | 77 | - The sparse features are placed in front of the dense features. 78 | - The order of the feature we fit into the model must be consistent with the order of the feature dictionary iterations 79 | 80 | .. code-block:: python 81 | 82 | # make sure the order is right 83 | model_input = [data[feat].values for feat in sparse_feature_dict] + [data[feat].values for feat in dense_feature_list] 84 | 85 | model = DeepFM({"sparse": sparse_feature_dict, "dense": dense_feature_list}, final_activation='sigmoid') 86 | model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) 87 | history = model.fit(model_input, data[target].values, 88 | batch_size=256, epochs=1, verbose=2, validation_split=0.2,) 89 | 90 | 91 | You can check the full code `here <./Demo.html>`_ 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /tests/models/DIN_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from deepctr.models import DIN 4 | from deepctr.activations import Dice 5 | from deepctr.utils import custom_objects 6 | from tensorflow.python.keras.models import load_model, save_model 7 | from ..utils import check_model 8 | 9 | 10 | def get_xy_fd(): 11 | feature_dim_dict = {"sparse": {'user': 4, 'gender': 2, 12 | 'item': 4, 'item_gender': 2}, "dense": []} 13 | behavior_feature_list = ["item"] 14 | uid = np.array([1, 2, 3]) 15 | ugender = np.array([0, 1, 0]) 16 | iid = np.array([0, 1, 2]) 17 | igender = np.array([0, 1, 0]) 18 | 19 | hist_iid = np.array([[0, 1, 2, 3], [0, 1, 2, 3], [0, 1, 2, 3]]) 20 | hist_igender = np.array([[0, 1, 0, 1], [0, 1, 1, 1], [0, 0, 1, 0]]) 21 | hist_length = np.array([4, 4, 4]) 22 | 23 | feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, 24 | 'hist_item': hist_iid, 'hist_item_gender': hist_igender, } 25 | x = [feature_dict[feat] for feat in feature_dim_dict["sparse"]] \ 26 | + [feature_dict['hist_'+feat] for feat in behavior_feature_list]\ 27 | + [hist_length] 28 | y = [1, 0, 1] 29 | return x, y, feature_dim_dict, behavior_feature_list 30 | 31 | 32 | @pytest.mark.xfail(reason="There is a bug when save model use Dice") 33 | # @pytest.mark.skip(reason="misunderstood the API") 34 | def test_DIN_model_io(): 35 | 36 | model_name = "DIN_att" 37 | _, _, feature_dim_dict, behavior_feature_list = get_xy_fd() 38 | 39 | model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, att_activation=Dice, 40 | 41 | use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,) 42 | 43 | model.compile('adam', 'binary_crossentropy', 44 | metrics=['binary_crossentropy']) 45 | #model.fit(x, y, verbose=1, validation_split=0.5) 46 | save_model(model, model_name + '.h5') 47 | model = load_model(model_name + '.h5', custom_objects) 48 | print(model_name + " test save load model pass!") 49 | 50 | 51 | def test_DIN_att(): 52 | model_name = "DIN_att" 53 | 54 | x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() 55 | 56 | model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, 57 | use_din=True, hidden_size=[4, 4, 4], keep_prob=0.6,) 58 | 59 | model.compile('adam', 'binary_crossentropy', 60 | metrics=['binary_crossentropy']) 61 | model.fit(x, y, verbose=1, validation_split=0.5) 62 | 63 | print(model_name+" test train valid pass!") 64 | model.save_weights(model_name + '_weights.h5') 65 | model.load_weights(model_name + '_weights.h5') 66 | print(model_name+" test save load weight pass!") 67 | 68 | # try: 69 | # save_model(model, name + '.h5') 70 | # model = load_model(name + '.h5', custom_objects) 71 | # print(name + " test save load model pass!") 72 | # except: 73 | # print("【Error】There is a bug when save model use Dice---------------------------------------------------") 74 | 75 | print(model_name + " test pass!") 76 | 77 | 78 | def test_DIN_sum(): 79 | 80 | model_name = "DIN_sum" 81 | x, y, feature_dim_dict, behavior_feature_list = get_xy_fd() 82 | 83 | model = DIN(feature_dim_dict, behavior_feature_list, hist_len_max=4, embedding_size=8, 84 | use_din=False, hidden_size=[4, 4, 4], keep_prob=0.6, activation="sigmoid") 85 | 86 | check_model(model, model_name, x, y) 87 | 88 | 89 | if __name__ == "__main__": 90 | test_DIN_att() 91 | test_DIN_sum() 92 | -------------------------------------------------------------------------------- /deepRS/models/fnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf) 8 | """ 9 | 10 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, add 11 | from tensorflow.python.keras.models import Model 12 | from tensorflow.python.keras.regularizers import l2 13 | 14 | from ..layers import PredictionLayer, MLP 15 | from ..utils import get_input, get_share_embeddings 16 | 17 | 18 | def FNN(feature_dim_dict, embedding_size=8, 19 | hidden_size=(128, 128), 20 | l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_deep=0, 21 | init_std=0.0001, seed=1024, keep_prob=1, 22 | activation='relu', final_activation='sigmoid', ): 23 | """Instantiates the Factorization-supported Neural Network architecture. 24 | 25 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 26 | :param embedding_size: positive integer,sparse feature embedding_size 27 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 28 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 29 | :param l2_reg_linear: float. L2 regularizer strength applied to linear weight 30 | :param l2_reg_deep: float . L2 regularizer strength applied to deep net 31 | :param init_std: float,to use as the initialize std of embedding vector 32 | :param seed: integer ,to use as random seed. 33 | :param keep_prob: float in (0,1]. keep_prob used in deep net 34 | :param activation: Activation function to use in deep net 35 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 36 | :return: A Keras model instance. 37 | """ 38 | if not isinstance(feature_dim_dict, 39 | dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 40 | raise ValueError( 41 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 42 | 43 | sparse_input, dense_input = get_input(feature_dim_dict, None) 44 | sparse_embedding, linear_embedding, = get_share_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, 45 | l2_reg_linear) 46 | 47 | embed_list = [sparse_embedding[i](sparse_input[i]) 48 | for i in range(len(feature_dim_dict["sparse"]))] 49 | 50 | linear_term = [linear_embedding[i](sparse_input[i]) 51 | for i in range(len(sparse_input))] 52 | if len(linear_term) > 1: 53 | linear_term = add(linear_term) 54 | elif len(linear_term) == 1: 55 | linear_term = linear_term[0] 56 | 57 | if len(dense_input) > 0: 58 | continuous_embedding_list = list( 59 | map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), 60 | dense_input)) 61 | continuous_embedding_list = list( 62 | map(Reshape((1, embedding_size)), continuous_embedding_list)) 63 | embed_list += continuous_embedding_list 64 | 65 | dense_input_ = dense_input[0] if len( 66 | dense_input) == 1 else Concatenate()(dense_input) 67 | linear_dense_logit = Dense( 68 | 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) 69 | linear_term = add([linear_dense_logit, linear_term]) 70 | 71 | num_inputs = len(dense_input) + len(sparse_input) 72 | deep_input = Reshape([num_inputs*embedding_size] 73 | )(Concatenate()(embed_list)) 74 | deep_out = MLP(hidden_size, activation, l2_reg_deep, 75 | keep_prob, False, seed)(deep_input) 76 | deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) 77 | final_logit = add([deep_logit, linear_term]) 78 | output = PredictionLayer(final_activation)(final_logit) 79 | model = Model(inputs=sparse_input + dense_input, 80 | outputs=output) 81 | return model 82 | -------------------------------------------------------------------------------- /deepRS/models/wdl.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf) 8 | """ 9 | 10 | from tensorflow.python.keras.layers import Dense, Concatenate, Flatten, add 11 | from tensorflow.python.keras.models import Model 12 | from ..layers import PredictionLayer, MLP 13 | from ..utils import get_input, get_sep_embeddings 14 | 15 | 16 | def WDL(deep_feature_dim_dict, wide_feature_dim_dict, embedding_size=8, hidden_size=(128, 128), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_deep=0, init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid',): 17 | """Instantiates the Wide&Deep Learning architecture. 18 | 19 | :param deep_feature_dim_dict: dict,to indicate sparse field and dense field in deep part like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 20 | :param wide_feature_dim_dict: dict,to indicate sparse field and dense field in wide part like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 21 | :param embedding_size: positive integer,sparse feature embedding_size 22 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 23 | :param l2_reg_linear: float. L2 regularizer strength applied to wide part 24 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 25 | :param l2_reg_deep: float. L2 regularizer strength applied to deep net 26 | :param init_std: float,to use as the initialize std of embedding vector 27 | :param seed: integer ,to use as random seed. 28 | :param keep_prob: float in (0,1]. keep_prob used in deep net 29 | :param activation: Activation function to use in deep net 30 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 31 | :return: A Keras model instance. 32 | """ 33 | if not isinstance(deep_feature_dim_dict, 34 | dict) or "sparse" not in deep_feature_dim_dict or "dense" not in deep_feature_dim_dict: 35 | raise ValueError( 36 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 37 | 38 | sparse_input, dense_input, bias_sparse_input, bias_dense_input = get_input( 39 | deep_feature_dim_dict, wide_feature_dim_dict) 40 | sparse_embedding, wide_linear_embedding = get_sep_embeddings( 41 | deep_feature_dim_dict, wide_feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear) 42 | 43 | embed_list = [sparse_embedding[i](sparse_input[i]) 44 | for i in range(len(sparse_input))] 45 | deep_input = Concatenate()(embed_list) if len( 46 | embed_list) > 1 else embed_list[0] 47 | deep_input = Flatten()(deep_input) 48 | if len(dense_input) > 0: 49 | deep_input = Concatenate()([deep_input]+dense_input) 50 | 51 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 52 | False, seed)(deep_input) 53 | deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) 54 | final_logit = deep_logit 55 | if len(wide_feature_dim_dict['dense']) + len(wide_feature_dim_dict['sparse']) > 0: 56 | if len(wide_feature_dim_dict['sparse']) > 0: 57 | bias_embed_list = [wide_linear_embedding[i]( 58 | bias_sparse_input[i]) for i in range(len(bias_sparse_input))] 59 | linear_term = add(bias_embed_list) if len( 60 | bias_embed_list) > 1 else bias_embed_list[0] 61 | final_logit = add([final_logit, linear_term]) 62 | if len(wide_feature_dim_dict['dense']) > 0: 63 | wide_dense_term = Dense(1, use_bias=False, activation=None)(Concatenate()( 64 | bias_dense_input) if len(bias_dense_input) > 1 else bias_dense_input[0]) 65 | final_logit = add([final_logit, wide_dense_term]) 66 | 67 | output = PredictionLayer(final_activation)(final_logit) 68 | model = Model(inputs=sparse_input + dense_input + 69 | bias_sparse_input + bias_dense_input, outputs=output) 70 | return model 71 | -------------------------------------------------------------------------------- /docs/source/Demo.rst: -------------------------------------------------------------------------------- 1 | Demos 2 | =========== 3 | 4 | Classification: Criteo 5 | ----------------------- 6 | 7 | The Criteo Display Ads dataset is for the purpose of predicting ads 8 | click-through rate. It has 13 integer features and 9 | 26 categorical features where each category has a high cardinality. 10 | 11 | .. image:: ../pics/criteo_sample.png 12 | :align: center 13 | :scale: 70 % 14 | 15 | In this demo,we simply normailize the integer feature between 0 and 1,you 16 | can try other transformation technique like log normalization or discretization. 17 | 18 | This example shows how to use *DeepFM* to solve a simple binary classification task. You can get the demo data 19 | `criteo_sample.txt `_ and run the following codes. 21 | 22 | .. code-block:: python 23 | 24 | import pandas as pd 25 | from sklearn.preprocessing import LabelEncoder,MinMaxScaler 26 | from deepctr.models import DeepFM 27 | 28 | 29 | data = pd.read_csv('./criteo_sample.txt') 30 | 31 | sparse_features = ['C' + str(i) for i in range(1, 27)] 32 | dense_features = ['I'+str(i) for i in range(1,14)] 33 | 34 | data[sparse_features] = data[sparse_features].fillna('-1', ) 35 | data[dense_features] = data[dense_features].fillna(0,) 36 | 37 | target = ['label'] 38 | 39 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 40 | for feat in sparse_features: 41 | lbe = LabelEncoder() 42 | data[feat] = lbe.fit_transform(data[feat]) 43 | mms = MinMaxScaler(feature_range=(0,1)) 44 | data[dense_features] = mms.fit_transform(data[dense_features]) 45 | 46 | # 2.count #unique features for each sparse field,and record dense feature field name 47 | 48 | sparse_feature_dict = {feat: data[feat].nunique() for feat in sparse_features} 49 | dense_feature_list = dense_features 50 | 51 | # 3.generate input data for model 52 | 53 | model_input = [data[feat].values for feat in sparse_feature_dict] + [data[feat].values for feat in dense_feature_list] 54 | 55 | #4.Define Model,compile and 56 | 57 | 58 | model = DeepFM({"sparse": sparse_feature_dict, "dense": dense_feature_list}, final_activation='sigmoid') 59 | model.compile("adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) 60 | history = model.fit(model_input, data[target].values, 61 | batch_size=256, epochs=1, verbose=2, validation_split=0.2,) 62 | 63 | 64 | 65 | Regression: Movielens 66 | ---------------------- 67 | The MovieLens data has been used for personalized tag recommendation,which 68 | contains 668, 953 tag applications of users on movies. 69 | Here is a small fraction of data include only sparse field. 70 | 71 | .. image:: ../pics/movielens_sample.png 72 | :align: center 73 | :scale: 70 % 74 | 75 | This example shows how to use *DeepFM* to solve a simple binary regression task. You can get the demo data 76 | `movielens_sample.txt `_ and run the following codes. 78 | 79 | .. code-block:: python 80 | 81 | import pandas as pd 82 | from sklearn.preprocessing import LabelEncoder,MinMaxScaler 83 | from deepctr.models import DeepFM 84 | 85 | 86 | data = pd.read_csv("./movielens_sample.txt") 87 | sparse_features = [ "movie_id","user_id","gender","age","occupation","zip"] 88 | target = ['rating'] 89 | 90 | # 1.Label Encoding for sparse features,and do simple Transformation for dense features 91 | for feat in sparse_features: 92 | lbe = LabelEncoder() 93 | data[feat] = lbe.fit_transform(data[feat]) 94 | #2.count #unique features for each sparse field 95 | sparse_feature_dim = {feat:data[feat].nunique() for feat in sparse_features} 96 | #3.generate input data for model 97 | model_input = [data[feat].values for feat in sparse_feature_dim] 98 | #4.Define Model,compile and train 99 | model = DeepFM({"sparse":sparse_feature_dim,"dense":[]},final_activation='linear') 100 | 101 | model.compile("adam","mse",metrics=['mse'],) 102 | history = model.fit(model_input,data[target].values, 103 | batch_size=256,epochs=10,verbose=2,validation_split=0.2,) -------------------------------------------------------------------------------- /deepRS/models/nfm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027) 8 | """ 9 | 10 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, Dropout, add 11 | from tensorflow.python.keras.models import Model 12 | from tensorflow.python.keras.regularizers import l2 13 | from ..layers import PredictionLayer, MLP, BiInteractionPooling 14 | from ..utils import get_input, get_share_embeddings 15 | 16 | 17 | def NFM(feature_dim_dict, embedding_size=8, 18 | hidden_size=(128, 128), l2_reg_embedding=1e-5, l2_reg_linear=1e-5, l2_reg_deep=0, 19 | init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid', 20 | ): 21 | """Instantiates the Neural Factorization Machine architecture. 22 | 23 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 24 | :param embedding_size: positive integer,sparse feature embedding_size 25 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 26 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 27 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part. 28 | :param l2_reg_deep: float . L2 regularizer strength applied to deep net 29 | :param init_std: float,to use as the initialize std of embedding vector 30 | :param seed: integer ,to use as random seed. 31 | :param keep_prob: float in (0,1]. keep_prob used in deep net 32 | :param activation: Activation function to use in deep net 33 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 34 | :return: A Keras model instance. 35 | """ 36 | if not isinstance(feature_dim_dict, 37 | dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 38 | raise ValueError( 39 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 40 | 41 | sparse_input, dense_input = get_input(feature_dim_dict, None) 42 | sparse_embedding, linear_embedding = get_share_embeddings( 43 | feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear) 44 | 45 | embed_list = [sparse_embedding[i](sparse_input[i]) 46 | for i in range(len(sparse_input))] 47 | 48 | linear_term = [linear_embedding[i](sparse_input[i]) 49 | for i in range(len(sparse_input))] 50 | if len(linear_term) > 1: 51 | linear_term = add(linear_term) 52 | elif len(linear_term) == 1: 53 | linear_term = linear_term[0] 54 | 55 | if len(dense_input) > 0: 56 | continuous_embedding_list = list( 57 | map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), 58 | dense_input)) 59 | continuous_embedding_list = list( 60 | map(Reshape((1, embedding_size)), continuous_embedding_list)) 61 | embed_list += continuous_embedding_list 62 | 63 | dense_input_ = dense_input[0] if len( 64 | dense_input) == 1 else Concatenate()(dense_input) 65 | linear_dense_logit = Dense( 66 | 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) 67 | linear_term = add([linear_dense_logit, linear_term]) 68 | 69 | fm_input = Concatenate(axis=1)(embed_list) 70 | 71 | bi_out = BiInteractionPooling()(fm_input) 72 | bi_out = Dropout(1 - keep_prob)(bi_out) 73 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 74 | False, seed)(bi_out) 75 | deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) 76 | 77 | final_logit = linear_term # TODO add bias term 78 | 79 | if len(hidden_size) > 0: 80 | final_logit = add([final_logit, deep_logit]) 81 | 82 | output = PredictionLayer(final_activation)(final_logit) 83 | print(output) 84 | model = Model(inputs=sparse_input + dense_input, outputs=output) 85 | return model 86 | -------------------------------------------------------------------------------- /deepRS/models/afm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | Author: 5 | Weichen Shen,wcshen1994@163.com 6 | 7 | Reference: 8 | [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. 9 | (https://arxiv.org/abs/1708.04617) 10 | 11 | """ 12 | 13 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, add 14 | from tensorflow.python.keras.models import Model 15 | from tensorflow.python.keras.regularizers import l2 16 | 17 | from ..utils import get_input, get_share_embeddings 18 | from ..layers import PredictionLayer, AFMLayer, FM 19 | 20 | 21 | def AFM(feature_dim_dict, embedding_size=8, use_attention=True, attention_factor=8, 22 | l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_att=1e-5, keep_prob=1.0, init_std=0.0001, seed=1024, 23 | final_activation='sigmoid',): 24 | """Instantiates the Attentonal Factorization Machine architecture. 25 | 26 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 27 | :param embedding_size: positive integer,sparse feature embedding_size 28 | :param use_attention: bool,whether use attention or not,if set to ``False``.it is the same as **standard Factorization Machine** 29 | :param attention_factor: positive integer,units in attention net 30 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 31 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 32 | :param l2_reg_att: float. L2 regularizer strength applied to attention net 33 | :param keep_prob: float in (0,1]. keep_prob after attention net 34 | :param init_std: float,to use as the initialize std of embedding vector 35 | :param seed: integer ,to use as random seed. 36 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 37 | :return: A Keras model instance. 38 | """ 39 | 40 | if not isinstance(feature_dim_dict, 41 | dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 42 | raise ValueError( 43 | "feature_dim_dict must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']}") 44 | if not isinstance(feature_dim_dict["sparse"], dict): 45 | raise ValueError("feature_dim_dict['sparse'] must be a dict,cur is", type( 46 | feature_dim_dict['sparse'])) 47 | if not isinstance(feature_dim_dict["dense"], list): 48 | raise ValueError("feature_dim_dict['dense'] must be a list,cur is", type( 49 | feature_dim_dict['dense'])) 50 | 51 | sparse_input, dense_input = get_input(feature_dim_dict, None) 52 | sparse_embedding, linear_embedding, = get_share_embeddings( 53 | feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear) 54 | 55 | embed_list = [sparse_embedding[i](sparse_input[i]) 56 | for i in range(len(sparse_input))] 57 | linear_term = [linear_embedding[i](sparse_input[i]) 58 | for i in range(len(sparse_input))] 59 | if len(linear_term) > 1: 60 | linear_term = add(linear_term) 61 | elif len(linear_term) == 1: 62 | linear_term = linear_term[0] 63 | 64 | if len(dense_input) > 0: 65 | continuous_embedding_list = list( 66 | map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), 67 | dense_input)) 68 | continuous_embedding_list = list( 69 | map(Reshape((1, embedding_size)), continuous_embedding_list)) 70 | embed_list += continuous_embedding_list 71 | 72 | dense_input_ = dense_input[0] if len( 73 | dense_input) == 1 else Concatenate()(dense_input) 74 | linear_dense_logit = Dense( 75 | 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) 76 | linear_term = add([linear_dense_logit, linear_term]) 77 | 78 | fm_input = Concatenate(axis=1)(embed_list) 79 | if use_attention: 80 | fm_out = AFMLayer(attention_factor, l2_reg_att, 81 | keep_prob, seed)(embed_list) 82 | else: 83 | fm_out = FM()(fm_input) 84 | 85 | final_logit = add([linear_term, fm_out]) 86 | output = PredictionLayer(final_activation)(final_logit) 87 | model = Model(inputs=sparse_input + dense_input, outputs=output) 88 | return model 89 | -------------------------------------------------------------------------------- /deepRS/models/pnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) 8 | """ 9 | 10 | from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Reshape, Flatten 11 | from tensorflow.python.keras.models import Model 12 | from tensorflow.python.keras.initializers import RandomNormal 13 | from tensorflow.python.keras.regularizers import l2 14 | 15 | 16 | from ..layers import PredictionLayer, MLP, InnerProductLayer, OutterProductLayer 17 | from ..utils import get_input 18 | 19 | 20 | def PNN(feature_dim_dict, embedding_size=8, hidden_size=(128, 128), l2_reg_embedding=1e-5, l2_reg_deep=0, 21 | init_std=0.0001, seed=1024, keep_prob=1, activation='relu', 22 | final_activation='sigmoid', use_inner=True, use_outter=False, kernel_type='mat', ): 23 | """Instantiates the Product-based Neural Network architecture. 24 | 25 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 26 | :param embedding_size: positive integer,sparse feature embedding_size 27 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 28 | :param l2_reg_embedding: float . L2 regularizer strength applied to embedding vector 29 | :param l2_reg_deep: float. L2 regularizer strength applied to deep net 30 | :param init_std: float,to use as the initialize std of embedding vector 31 | :param seed: integer ,to use as random seed. 32 | :param keep_prob: float in (0,1]. keep_prob used in deep net 33 | :param activation: Activation function to use in deep net 34 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 35 | :param use_inner: bool,whether use inner-product or not. 36 | :param use_outter: bool,whether use outter-product or not. 37 | :param kernel_type: str,kernel_type used in outter-product,can be ``'mat'`` , ``'vec'`` or ``'num'`` 38 | :return: A Keras model instance. 39 | """ 40 | if not isinstance(feature_dim_dict, 41 | dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 42 | raise ValueError( 43 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 44 | if kernel_type not in ['mat', 'vec', 'num']: 45 | raise ValueError("kernel_type must be mat,vec or num") 46 | sparse_input, dense_input = get_input(feature_dim_dict, None) 47 | sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size, 48 | embeddings_initializer=RandomNormal( 49 | mean=0.0, stddev=init_std, seed=seed), 50 | embeddings_regularizer=l2( 51 | l2_reg_embedding), 52 | name='sparse_emb_' + str(i) + '-' + feat) for i, feat in 53 | enumerate(feature_dim_dict["sparse"])] 54 | 55 | embed_list = [sparse_embedding[i](sparse_input[i]) 56 | for i in range(len(feature_dim_dict["sparse"]))] 57 | 58 | if len(dense_input) > 0: 59 | continuous_embedding_list = list( 60 | map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), 61 | dense_input)) 62 | continuous_embedding_list = list( 63 | map(Reshape((1, embedding_size)), continuous_embedding_list)) 64 | embed_list += continuous_embedding_list 65 | 66 | inner_product = Flatten()(InnerProductLayer()(embed_list)) 67 | outter_product = OutterProductLayer(kernel_type)(embed_list) 68 | 69 | # ipnn deep input 70 | linear_signal = Reshape( 71 | [len(embed_list)*embedding_size])(Concatenate()(embed_list)) 72 | 73 | if use_inner and use_outter: 74 | deep_input = Concatenate()( 75 | [linear_signal, inner_product, outter_product]) 76 | elif use_inner: 77 | deep_input = Concatenate()([linear_signal, inner_product]) 78 | elif use_outter: 79 | deep_input = Concatenate()([linear_signal, outter_product]) 80 | else: 81 | deep_input = linear_signal 82 | 83 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 84 | False, seed)(deep_input) 85 | deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) 86 | final_logit = deep_logit 87 | output = PredictionLayer(final_activation)(final_logit) 88 | model = Model(inputs=sparse_input + dense_input, 89 | outputs=output) 90 | return model 91 | -------------------------------------------------------------------------------- /deepRS/models/deepfm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247) 8 | 9 | """ 10 | 11 | from tensorflow.python.keras.layers import Dense, Concatenate, Reshape, Flatten, add 12 | from tensorflow.python.keras.models import Model 13 | from tensorflow.python.keras.regularizers import l2 14 | from ..utils import get_input, get_share_embeddings 15 | from ..layers import PredictionLayer, MLP, FM 16 | 17 | 18 | def DeepFM(feature_dim_dict, embedding_size=8, 19 | use_fm=True, hidden_size=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_deep=0, 20 | init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid', use_bn=False): 21 | """Instantiates the DeepFM Network architecture. 22 | 23 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 24 | :param embedding_size: positive integer,sparse feature embedding_size 25 | :param use_fm: bool,use FM part or not 26 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 27 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 28 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 29 | :param l2_reg_deep: float. L2 regularizer strength applied to deep net 30 | :param init_std: float,to use as the initialize std of embedding vector 31 | :param seed: integer ,to use as random seed. 32 | :param keep_prob: float in (0,1]. keep_prob used in deep net 33 | :param activation: Activation function to use in deep net 34 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 35 | :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net 36 | :return: A Keras model instance. 37 | """ 38 | if not isinstance(feature_dim_dict, 39 | dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 40 | raise ValueError( 41 | "feature_dim_dict must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 42 | if not isinstance(feature_dim_dict["sparse"], dict): 43 | raise ValueError("feature_dim_dict['sparse'] must be a dict,cur is", type( 44 | feature_dim_dict['sparse'])) 45 | if not isinstance(feature_dim_dict["dense"], list): 46 | raise ValueError("feature_dim_dict['dense'] must be a list,cur is", type( 47 | feature_dim_dict['dense'])) 48 | 49 | sparse_input, dense_input = get_input(feature_dim_dict, None) 50 | sparse_embedding, linear_embedding, = get_share_embeddings( 51 | feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, l2_reg_linear) 52 | 53 | embed_list = [sparse_embedding[i](sparse_input[i]) 54 | for i in range(len(sparse_input))] 55 | linear_term = [linear_embedding[i](sparse_input[i]) 56 | for i in range(len(sparse_input))] 57 | if len(linear_term) > 1: 58 | linear_term = add(linear_term) 59 | elif len(linear_term) == 1: 60 | linear_term = linear_term[0] 61 | 62 | if len(dense_input) > 0: 63 | continuous_embedding_list = list( 64 | map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), 65 | dense_input)) 66 | continuous_embedding_list = list( 67 | map(Reshape((1, embedding_size)), continuous_embedding_list)) 68 | embed_list += continuous_embedding_list 69 | 70 | dense_input_ = dense_input[0] if len( 71 | dense_input) == 1 else Concatenate()(dense_input) 72 | linear_dense_logit = Dense( 73 | 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) 74 | linear_term = add([linear_dense_logit, linear_term]) 75 | 76 | fm_input = Concatenate(axis=1)(embed_list) 77 | deep_input = Flatten()(fm_input) 78 | fm_out = FM()(fm_input) 79 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 80 | use_bn, seed)(deep_input) 81 | deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) 82 | 83 | if len(hidden_size) == 0 and use_fm == False: # only linear 84 | final_logit = linear_term 85 | elif len(hidden_size) == 0 and use_fm == True: # linear + FM 86 | final_logit = add([linear_term, fm_out]) 87 | elif len(hidden_size) > 0 and use_fm == False: # linear + Deep 88 | final_logit = add([linear_term, deep_logit]) 89 | elif len(hidden_size) > 0 and use_fm == True: # linear + FM + Deep 90 | final_logit = add([linear_term, fm_out, deep_logit]) 91 | else: 92 | raise NotImplementedError 93 | 94 | output = PredictionLayer(final_activation)(final_logit) 95 | model = Model(inputs=sparse_input + dense_input, outputs=output) 96 | return model 97 | -------------------------------------------------------------------------------- /deepRS/models/xdeepfm.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf) 8 | """ 9 | from tensorflow.python.keras.layers import Dense, Concatenate, Flatten, add, Reshape 10 | from tensorflow.python.keras.models import Model 11 | from tensorflow.python.keras.regularizers import l2 12 | from deepctr.utils import get_input, get_share_embeddings 13 | from deepctr.layers import PredictionLayer, MLP, CIN 14 | 15 | 16 | def xDeepFM(feature_dim_dict, embedding_size=8, hidden_size=(256, 256), cin_layer_size=(128, 128,), cin_split_half=True, cin_activation='relu', l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_deep=0, init_std=0.0001, seed=1024, keep_prob=1, activation='relu', final_activation='sigmoid', use_bn=False): 17 | """Instantiates the xDeepFM architecture. 18 | 19 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 20 | :param embedding_size: positive integer,sparse feature embedding_size 21 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 22 | :param cin_layer_size: list,list of positive integer or empty list, the feature maps in each hidden layer of Compressed Interaction Network 23 | :param cin_split_half: bool.if set to True, half of the feature maps in each hidden will connect to output unit 24 | :param cin_activation: activation function used on feature maps 25 | :param l2_reg_linear: float. L2 regularizer strength applied to linear part 26 | :param l2_reg_embedding: L2 regularizer strength applied to embedding vector 27 | :param l2_reg_deep: L2 regularizer strength applied to deep net 28 | :param init_std: float,to use as the initialize std of embedding vector 29 | :param seed: integer ,to use as random seed. 30 | :param keep_prob: float in (0,1]. keep_prob used in deep net 31 | :param activation: Activation function to use in deep net 32 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 33 | :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net 34 | :return: A Keras model instance. 35 | """ 36 | if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 37 | raise ValueError( 38 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 39 | sparse_input, dense_input = get_input(feature_dim_dict, None) 40 | sparse_embedding, linear_embedding, = get_share_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding, 41 | l2_reg_linear) 42 | 43 | embed_list = [sparse_embedding[i](sparse_input[i]) 44 | for i in range(len(sparse_input))] 45 | linear_term = [linear_embedding[i](sparse_input[i]) 46 | for i in range(len(sparse_input))] 47 | if len(linear_term) > 1: 48 | linear_term = add(linear_term) 49 | elif len(linear_term) == 1: 50 | linear_term = linear_term[0] 51 | 52 | if len(dense_input) > 0: 53 | continuous_embedding_list = list( 54 | map(Dense(embedding_size, use_bias=False, kernel_regularizer=l2(l2_reg_embedding), ), 55 | dense_input)) 56 | continuous_embedding_list = list( 57 | map(Reshape((1, embedding_size)), continuous_embedding_list)) 58 | embed_list += continuous_embedding_list 59 | 60 | dense_input_ = dense_input[0] if len( 61 | dense_input) == 1 else Concatenate()(dense_input) 62 | linear_dense_logit = Dense( 63 | 1, activation=None, use_bias=False, kernel_regularizer=l2(l2_reg_linear))(dense_input_) 64 | linear_term = add([linear_dense_logit, linear_term]) 65 | 66 | linear_logit = linear_term 67 | 68 | fm_input = Concatenate(axis=1)(embed_list) if len( 69 | embed_list) > 1 else embed_list[0] 70 | 71 | if len(cin_layer_size) > 0: 72 | exFM_out = CIN(cin_layer_size, cin_activation, 73 | cin_split_half, seed)(fm_input) 74 | exFM_logit = Dense(1, activation=None,)(exFM_out) 75 | 76 | deep_input = Flatten()(fm_input) 77 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 78 | use_bn, seed)(deep_input) 79 | deep_logit = Dense(1, use_bias=False, activation=None)(deep_out) 80 | 81 | if len(hidden_size) == 0 and len(cin_layer_size) == 0: # only linear 82 | final_logit = linear_logit 83 | elif len(hidden_size) == 0 and len(cin_layer_size) > 0: # linear + CIN 84 | final_logit = add([linear_logit, exFM_logit]) 85 | elif len(hidden_size) > 0 and len(cin_layer_size) == 0: # linear + Deep 86 | final_logit = add([linear_logit, deep_logit]) 87 | elif len(hidden_size) > 0 and len(cin_layer_size) > 0: # linear + CIN + Deep 88 | final_logit = add([linear_logit, deep_logit, exFM_logit]) 89 | else: 90 | raise NotImplementedError 91 | 92 | output = PredictionLayer(final_activation)(final_logit) 93 | model = Model(inputs=sparse_input + dense_input, outputs=output) 94 | return model 95 | -------------------------------------------------------------------------------- /deepRS/models/autoint.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | 4 | Author: 5 | Weichen Shen,wcshen1994@163.com 6 | 7 | Reference: 8 | [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921) 9 | 10 | """ 11 | 12 | from tensorflow.python.keras.layers import Dense, Embedding, Concatenate 13 | from tensorflow.python.keras.models import Model 14 | from tensorflow.python.keras.initializers import RandomNormal 15 | from tensorflow.python.keras.regularizers import l2 16 | import tensorflow as tf 17 | 18 | from ..utils import get_input 19 | from ..layers import PredictionLayer, MLP, InteractingLayer 20 | 21 | 22 | def AutoInt(feature_dim_dict, embedding_size=8, att_layer_num=3, att_embedding_size=8, att_head_num=2, att_res=True, hidden_size=(256, 256), activation='relu', 23 | l2_reg_deep=0, l2_reg_embedding=1e-5, use_bn=False, keep_prob=1.0, init_std=0.0001, seed=1024, 24 | final_activation='sigmoid',): 25 | """Instantiates the AutoInt Network architecture. 26 | 27 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 28 | :param embedding_size: positive integer,sparse feature embedding_size 29 | :param att_layer_num: int.The InteractingLayer number to be used. 30 | :param att_embedding_size: int.The embedding size in multi-head self-attention network. 31 | :param att_head_num: int.The head number in multi-head self-attention network. 32 | :param att_res: bool.Whether or not use standard residual connections before output. 33 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 34 | :param activation: Activation function to use in deep net 35 | :param l2_reg_deep: float. L2 regularizer strength applied to deep net 36 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 37 | :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net 38 | :param keep_prob: float in (0,1]. keep_prob used in deep net 39 | :param init_std: float,to use as the initialize std of embedding vector 40 | :param seed: integer ,to use as random seed. 41 | :param final_activation: output activation,usually ``'sigmoid'`` or ``'linear'`` 42 | :return: A Keras model instance. 43 | """ 44 | 45 | if len(hidden_size) <= 0 and att_layer_num <= 0: 46 | raise ValueError("Either hidden_layer or att_layer_num must > 0") 47 | if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 48 | raise ValueError( 49 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 50 | 51 | sparse_input, dense_input = get_input(feature_dim_dict, None,) 52 | sparse_embedding = get_embeddings( 53 | feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding) 54 | embed_list = [sparse_embedding[i](sparse_input[i]) 55 | for i in range(len(sparse_input))] 56 | 57 | att_input = Concatenate(axis=1)(embed_list) if len( 58 | embed_list) > 1 else embed_list[0] 59 | 60 | for i in range(att_layer_num): 61 | att_input = InteractingLayer( 62 | att_embedding_size, att_head_num, att_res)(att_input) 63 | att_output = tf.keras.layers.Flatten()(att_input) 64 | 65 | deep_input = tf.keras.layers.Flatten()(Concatenate()(embed_list) 66 | if len(embed_list) > 1 else embed_list[0]) 67 | if len(dense_input) > 0: 68 | if len(dense_input) == 1: 69 | continuous_list = dense_input[0] 70 | else: 71 | continuous_list = Concatenate()(dense_input) 72 | 73 | deep_input = Concatenate()([deep_input, continuous_list]) 74 | 75 | if len(hidden_size) > 0 and att_layer_num > 0: # Deep & Interacting Layer 76 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 77 | use_bn, seed)(deep_input) 78 | stack_out = Concatenate()([att_output, deep_out]) 79 | final_logit = Dense(1, use_bias=False, activation=None)(stack_out) 80 | elif len(hidden_size) > 0: # Only Deep 81 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 82 | use_bn, seed)(deep_input) 83 | final_logit = Dense(1, use_bias=False, activation=None)(deep_out) 84 | elif att_layer_num > 0: # Only Interacting Layer 85 | final_logit = Dense(1, use_bias=False, activation=None)(att_output) 86 | else: # Error 87 | raise NotImplementedError 88 | 89 | output = PredictionLayer(final_activation)(final_logit) 90 | model = Model(inputs=sparse_input + dense_input, outputs=output) 91 | 92 | return model 93 | 94 | 95 | def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V): 96 | sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size, 97 | embeddings_initializer=RandomNormal( 98 | mean=0.0, stddev=init_std, seed=seed), 99 | embeddings_regularizer=l2(l2_rev_V), 100 | name='sparse_emb_' + str(i) + '-' + feat) for i, feat in 101 | enumerate(feature_dim_dict["sparse"])] 102 | 103 | return sparse_embedding 104 | -------------------------------------------------------------------------------- /deepRS/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from threading import Thread 4 | 5 | import requests 6 | from tensorflow.python.keras.initializers import RandomNormal 7 | from tensorflow.python.keras.layers import Embedding, Input 8 | 9 | from .activations import * 10 | from .layers import * 11 | from .sequence import * 12 | 13 | try: 14 | from packaging.version import parse 15 | except ImportError: 16 | from pip._vendor.packaging.version import parse 17 | 18 | custom_objects = {'InnerProductLayer': InnerProductLayer, 19 | 'OutterProductLayer': OutterProductLayer, 20 | 'MLP': MLP, 21 | 'PredictionLayer': PredictionLayer, 22 | 'FM': FM, 23 | 'AFMLayer': AFMLayer, 24 | 'CrossNet': CrossNet, 25 | 'BiInteractionPooling': BiInteractionPooling, 26 | 'LocalActivationUnit': LocalActivationUnit, 27 | 'Dice': Dice, 28 | 'SequencePoolingLayer': SequencePoolingLayer, 29 | 'AttentionSequencePoolingLayer': AttentionSequencePoolingLayer, 30 | 'CIN': CIN, 31 | 'InteractingLayer': InteractingLayer} 32 | 33 | 34 | def get_input(feature_dim_dict, bias_feature_dim_dict=None): 35 | sparse_input = [Input(shape=(1,), name='sparse_' + str(i) + '-' + feat) for i, feat in 36 | enumerate(feature_dim_dict["sparse"])] 37 | dense_input = [Input(shape=(1,), name='dense_' + str(i) + '-' + feat) for i, feat in 38 | enumerate(feature_dim_dict["dense"])] 39 | if bias_feature_dim_dict is None: 40 | return sparse_input, dense_input 41 | else: 42 | bias_sparse_input = [Input(shape=(1,), name='bias_sparse_' + str(i) + '-' + feat) for i, feat in 43 | enumerate(bias_feature_dim_dict["sparse"])] 44 | bias_dense_input = [Input(shape=(1,), name='bias_dense_' + str(i) + '-' + feat) for i, feat in 45 | enumerate(bias_feature_dim_dict["dense"])] 46 | return sparse_input, dense_input, bias_sparse_input, bias_dense_input 47 | 48 | 49 | def get_share_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w): 50 | sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size, 51 | embeddings_initializer=RandomNormal( 52 | mean=0.0, stddev=init_std, seed=seed), 53 | embeddings_regularizer=l2(l2_rev_V), 54 | name='sparse_emb_' + str(i) + '-' + feat) for i, feat in 55 | enumerate(feature_dim_dict["sparse"])] 56 | linear_embedding = [Embedding(feature_dim_dict["sparse"][feat], 1, 57 | embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, 58 | seed=seed), embeddings_regularizer=l2(l2_reg_w), 59 | name='linear_emb_' + str(i) + '-' + feat) for 60 | i, feat in enumerate(feature_dim_dict["sparse"])] 61 | 62 | return sparse_embedding, linear_embedding 63 | 64 | 65 | def get_sep_embeddings(deep_feature_dim_dict, wide_feature_dim_dict, embedding_size, init_std, seed, l2_rev_V, l2_reg_w): 66 | sparse_embedding = [Embedding(deep_feature_dim_dict["sparse"][feat], embedding_size, 67 | embeddings_initializer=RandomNormal( 68 | mean=0.0, stddev=init_std, seed=seed), 69 | embeddings_regularizer=l2(l2_rev_V), 70 | name='sparse_emb_' + str(i) + '-' + feat) for i, feat in 71 | enumerate(deep_feature_dim_dict["sparse"])] 72 | linear_embedding = [Embedding(wide_feature_dim_dict["sparse"][feat], 1, 73 | embeddings_initializer=RandomNormal(mean=0.0, stddev=init_std, 74 | seed=seed), embeddings_regularizer=l2(l2_reg_w), 75 | name='linear_emb_' + str(i) + '-' + feat) for 76 | i, feat in enumerate(wide_feature_dim_dict["sparse"])] 77 | 78 | return sparse_embedding, linear_embedding 79 | 80 | 81 | def check_version(version): 82 | """Return version of package on pypi.python.org using json.""" 83 | 84 | def check(version): 85 | try: 86 | url_pattern = 'https://pypi.python.org/pypi/deepctr/json' 87 | req = requests.get(url_pattern) 88 | latest_version = parse('0') 89 | version = parse(version) 90 | if req.status_code == requests.codes.ok: 91 | j = json.loads(req.text.encode('utf-8')) 92 | releases = j.get('releases', []) 93 | for release in releases: 94 | ver = parse(release) 95 | if not ver.is_prerelease: 96 | latest_version = max(latest_version, ver) 97 | if latest_version > version: 98 | logging.warning('\nDeepCTR version {0} detected. Your version is {1}.\nUse `pip install -U deepctr` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR/releases/tag/v{0}'.format( 99 | latest_version, version)) 100 | except Exception: 101 | return 102 | Thread(target=check, args=(version,)).start() 103 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('../../')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'DeepCTR' 23 | copyright = '2018, Weichen Shen' 24 | author = 'Weichen Shen' 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '0.2.1' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.autodoc', 43 | 'sphinx.ext.mathjax', 44 | 'sphinx.ext.ifconfig', 45 | 'sphinx.ext.viewcode', 46 | 'sphinx.ext.githubpages', 47 | ] 48 | 49 | # Add any paths that contain templates here, relative to this directory. 50 | templates_path = ['_templates'] 51 | 52 | # The suffix(es) of source filenames. 53 | # You can specify multiple suffix as a list of string: 54 | # 55 | source_suffix = ['.rst', '.md'] 56 | #source_suffix = '.rst' 57 | 58 | # The master toctree document. 59 | master_doc = 'index' 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | # 64 | # This is also used if you do content translation via gettext catalogs. 65 | # Usually you set "language" from the command line for these cases. 66 | language = None 67 | 68 | # List of patterns, relative to source directory, that match files and 69 | # directories to ignore when looking for source files. 70 | # This pattern also affects html_static_path and html_extra_path . 71 | exclude_patterns = [] 72 | 73 | # The name of the Pygments (syntax highlighting) style to use. 74 | pygments_style = 'sphinx' 75 | 76 | 77 | # -- Options for HTML output ------------------------------------------------- 78 | 79 | # The theme to use for HTML and HTML Help pages. See the documentation for 80 | # a list of builtin themes. 81 | # 82 | html_theme = 'alabaster' 83 | 84 | # Theme options are theme-specific and customize the look and feel of a theme 85 | # further. For a list of options available for each theme, see the 86 | # documentation. 87 | # 88 | # html_theme_options = {} 89 | 90 | # Add any paths that contain custom static files (such as style sheets) here, 91 | # relative to this directory. They are copied after the builtin static files, 92 | # so a file named "default.css" will overwrite the builtin "default.css". 93 | html_static_path = ['_static'] 94 | 95 | # Custom sidebar templates, must be a dictionary that maps document names 96 | # to template names. 97 | # 98 | # The default sidebars (for documents that don't match any pattern) are 99 | # defined by theme itself. Builtin themes are using these templates by 100 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 101 | # 'searchbox.html']``. 102 | # 103 | # html_sidebars = {} 104 | 105 | 106 | # -- Options for HTMLHelp output --------------------------------------------- 107 | 108 | # Output file base name for HTML help builder. 109 | htmlhelp_basename = 'DeepCTRdoc' 110 | 111 | 112 | # -- Options for LaTeX output ------------------------------------------------ 113 | 114 | latex_elements = { 115 | # The paper size ('letterpaper' or 'a4paper'). 116 | # 117 | # 'papersize': 'letterpaper', 118 | 119 | # The font size ('10pt', '11pt' or '12pt'). 120 | # 121 | # 'pointsize': '10pt', 122 | 123 | # Additional stuff for the LaTeX preamble. 124 | # 125 | # 'preamble': '', 126 | 127 | # Latex figure (float) alignment 128 | # 129 | # 'figure_align': 'htbp', 130 | } 131 | 132 | # Grouping the document tree into LaTeX files. List of tuples 133 | # (source start file, target name, title, 134 | # author, documentclass [howto, manual, or own class]). 135 | latex_documents = [ 136 | (master_doc, 'DeepCTR.tex', 'DeepCTR Documentation', 137 | 'Weichen Shen', 'manual'), 138 | ] 139 | 140 | 141 | # -- Options for manual page output ------------------------------------------ 142 | 143 | # One entry per manual page. List of tuples 144 | # (source start file, name, description, authors, manual section). 145 | man_pages = [ 146 | (master_doc, 'deepctr', 'DeepCTR Documentation', 147 | [author], 1) 148 | ] 149 | 150 | 151 | # -- Options for Texinfo output ---------------------------------------------- 152 | 153 | # Grouping the document tree into Texinfo files. List of tuples 154 | # (source start file, target name, title, author, 155 | # dir menu entry, description, category) 156 | texinfo_documents = [ 157 | (master_doc, 'DeepCTR', 'DeepCTR Documentation', 158 | author, 'DeepCTR', 'One line description of project.', 159 | 'Miscellaneous'), 160 | ] 161 | 162 | 163 | # -- Extension configuration ------------------------------------------------- 164 | todo_include_todos = False 165 | html_theme = 'sphinx_rtd_theme' 166 | 167 | source_parsers = { 168 | '.md': 'recommonmark.parser.CommonMarkParser', 169 | } 170 | -------------------------------------------------------------------------------- /deepRS/models/dcn.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123) 8 | """ 9 | from tensorflow.python.keras.layers import Dense, Embedding, Concatenate, Flatten 10 | from tensorflow.python.keras.models import Model 11 | from tensorflow.python.keras.initializers import RandomNormal 12 | from tensorflow.python.keras.regularizers import l2 13 | 14 | from ..utils import get_input 15 | from ..layers import CrossNet, PredictionLayer, MLP 16 | 17 | 18 | def DCN(feature_dim_dict, embedding_size='auto', 19 | cross_num=2, hidden_size=(128, 128, ), l2_reg_embedding=1e-5, l2_reg_cross=1e-5, l2_reg_deep=0, 20 | init_std=0.0001, seed=1024, keep_prob=1, use_bn=False, activation='relu', final_activation='sigmoid', 21 | ): 22 | """Instantiates the Deep&Cross Network architecture. 23 | 24 | :param feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 25 | :param embedding_size: positive int or str,sparse feature embedding_size.If set to "auto",it will be 6*pow(cardinality,025) 26 | :param cross_num: positive integet,cross layer number 27 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 28 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 29 | :param l2_reg_cross: float. L2 regularizer strength applied to cross net 30 | :param l2_reg_deep: float. L2 regularizer strength applied to deep net 31 | :param init_std: float,to use as the initialize std of embedding vector 32 | :param seed: integer ,to use as random seed. 33 | :param keep_prob: float in (0,1]. keep_prob used in deep net 34 | :param use_bn: bool. Whether use BatchNormalization before activation or not.in deep net 35 | :param activation: Activation function to use in deep net 36 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 37 | :return: A Keras model instance. 38 | 39 | """ 40 | if len(hidden_size) == 0 and cross_num == 0: 41 | raise ValueError("Either hidden_layer or cross layer must > 0") 42 | if not isinstance(feature_dim_dict, dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 43 | raise ValueError( 44 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 45 | 46 | sparse_input, dense_input = get_input(feature_dim_dict, None,) 47 | sparse_embedding = get_embeddings( 48 | feature_dim_dict, embedding_size, init_std, seed, l2_reg_embedding) 49 | embed_list = [sparse_embedding[i](sparse_input[i]) 50 | for i in range(len(sparse_input))] 51 | 52 | deep_input = Flatten()(Concatenate()(embed_list) 53 | if len(embed_list) > 1 else embed_list[0]) 54 | if len(dense_input) > 0: 55 | if len(dense_input) == 1: 56 | continuous_list = dense_input[0] 57 | else: 58 | continuous_list = Concatenate()(dense_input) 59 | 60 | deep_input = Concatenate()([deep_input, continuous_list]) 61 | 62 | if len(hidden_size) > 0 and cross_num > 0: # Deep & Cross 63 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 64 | use_bn, seed)(deep_input) 65 | cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(deep_input) 66 | stack_out = Concatenate()([cross_out, deep_out]) 67 | final_logit = Dense(1, use_bias=False, activation=None)(stack_out) 68 | elif len(hidden_size) > 0: # Only Deep 69 | deep_out = MLP(hidden_size, activation, l2_reg_deep, keep_prob, 70 | use_bn, seed)(deep_input) 71 | final_logit = Dense(1, use_bias=False, activation=None)(deep_out) 72 | elif cross_num > 0: # Only Cross 73 | cross_out = CrossNet(cross_num, l2_reg=l2_reg_cross)(deep_input) 74 | final_logit = Dense(1, use_bias=False, activation=None)(cross_out) 75 | else: # Error 76 | raise NotImplementedError 77 | 78 | # Activation(self.final_activation)(final_logit) 79 | output = PredictionLayer(final_activation)(final_logit) 80 | model = Model(inputs=sparse_input + dense_input, outputs=output) 81 | 82 | return model 83 | 84 | 85 | def get_embeddings(feature_dim_dict, embedding_size, init_std, seed, l2_rev_V): 86 | if embedding_size == "auto": 87 | sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], 6*int(pow(feature_dim_dict["sparse"][feat], 0.25)), 88 | embeddings_initializer=RandomNormal( 89 | mean=0.0, stddev=init_std, seed=seed), 90 | embeddings_regularizer=l2(l2_rev_V), name='sparse_emb_' + str(i) + '-'+feat) for i, feat in 91 | enumerate(feature_dim_dict["sparse"])] 92 | 93 | print("Using auto embedding size,the connected vector dimension is", sum( 94 | [6*int(pow(feature_dim_dict["sparse"][k], 0.25)) for k, v in feature_dim_dict["sparse"].items()])) 95 | else: 96 | sparse_embedding = [Embedding(feature_dim_dict["sparse"][feat], embedding_size, 97 | embeddings_initializer=RandomNormal( 98 | mean=0.0, stddev=init_std, seed=seed), 99 | embeddings_regularizer=l2(l2_rev_V), 100 | name='sparse_emb_' + str(i) + '-' + feat) for i, feat in 101 | enumerate(feature_dim_dict["sparse"])] 102 | 103 | return sparse_embedding 104 | -------------------------------------------------------------------------------- /tests/layers_test.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from tensorflow.python.keras.layers import PReLU 3 | from tensorflow.python.keras.utils import CustomObjectScope 4 | 5 | from deepctr import layers 6 | from deepctr.activations import Dice 7 | 8 | from .utils import layer_test 9 | 10 | BATCH_SIZE = 4 11 | FIELD_SIZE = 3 12 | EMBEDDING_SIZE = 8 13 | SEQ_LENGTH = 10 14 | 15 | 16 | @pytest.mark.parametrize( 17 | 18 | 'layer_num,l2_reg', 19 | 20 | [(layer_num, l2_reg) 21 | 22 | for layer_num in [0, 1, 2, ] 23 | 24 | for l2_reg in [0, 1, ] 25 | ] 26 | 27 | ) 28 | def test_CrossNet(layer_num, l2_reg,): 29 | with CustomObjectScope({'CrossNet': layers.CrossNet}): 30 | layer_test(layers.CrossNet, kwargs={ 31 | 'layer_num': layer_num, 'l2_reg': l2_reg}, input_shape=(2, 3)) 32 | 33 | 34 | def test_CrossNet_invalid(): 35 | with pytest.raises(ValueError): 36 | with CustomObjectScope({'CrossNet': layers.CrossNet}): 37 | layer_test(layers.CrossNet, kwargs={ 38 | 'layer_num': 1, 'l2_reg': 0}, input_shape=(2, 3, 4)) 39 | 40 | 41 | @pytest.mark.parametrize( 42 | 'hidden_size,activation', 43 | [(hidden_size, activation) 44 | for hidden_size in [(), (10,)] 45 | for activation in ['sigmoid', Dice, PReLU] 46 | ] 47 | ) 48 | def test_LocalActivationUnit(hidden_size, activation): 49 | with CustomObjectScope({'LocalActivationUnit': layers.LocalActivationUnit}): 50 | layer_test(layers.LocalActivationUnit, kwargs={'hidden_size': hidden_size, 'activation': activation}, 51 | input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE), (BATCH_SIZE, SEQ_LENGTH, EMBEDDING_SIZE)]) 52 | 53 | 54 | @pytest.mark.parametrize( 55 | 'reduce_sum', 56 | [reduce_sum 57 | for reduce_sum in [True, False] 58 | ] 59 | ) 60 | def test_InnerProductLayer(reduce_sum): 61 | with CustomObjectScope({'InnerProductLayer': layers.InnerProductLayer}): 62 | layer_test(layers.InnerProductLayer, kwargs={ 63 | 'reduce_sum': reduce_sum}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE) 64 | 65 | 66 | @pytest.mark.parametrize( 67 | 'kernel_type', 68 | [kernel_type 69 | for kernel_type in ['mat', 'vec', 'num'] 70 | ] 71 | ) 72 | def test_OutterProductLayer(kernel_type): 73 | with CustomObjectScope({'OutterProductLayer': layers.OutterProductLayer}): 74 | layer_test(layers.OutterProductLayer, kwargs={ 75 | 'kernel_type': kernel_type}, input_shape=[(BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE) 76 | 77 | 78 | def test_BiInteractionPooling(): 79 | with CustomObjectScope({'BiInteractionPooling': layers.BiInteractionPooling}): 80 | layer_test(layers.BiInteractionPooling, kwargs={}, 81 | input_shape=(BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) 82 | 83 | 84 | @pytest.mark.parametrize( 85 | 'hidden_size,use_bn', 86 | [(hidden_size, use_bn) 87 | for hidden_size in [(), (10,)] 88 | for use_bn in [True, False] 89 | ] 90 | ) 91 | def test_MLP(hidden_size, use_bn): 92 | with CustomObjectScope({'MLP': layers.MLP}): 93 | layer_test(layers.MLP, kwargs={'hidden_size': hidden_size, 'use_bn': use_bn}, input_shape=( 94 | BATCH_SIZE, EMBEDDING_SIZE)) 95 | 96 | 97 | @pytest.mark.parametrize( 98 | 'activation,use_bias', 99 | [(activation, use_bias) 100 | for activation in ['sigmoid', PReLU] 101 | for use_bias in [True, False] 102 | ] 103 | ) 104 | def test_PredictionLayer(activation, use_bias): 105 | with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}): 106 | layer_test(layers.PredictionLayer, kwargs={'activation': activation, 'use_bias': use_bias 107 | }, input_shape=(BATCH_SIZE, 1)) 108 | 109 | 110 | @pytest.mark.xfail(reason="dim size must be 1 except for the batch size dim") 111 | def test_test_PredictionLayer_invalid(): 112 | # with pytest.raises(ValueError): 113 | with CustomObjectScope({'PredictionLayer': layers.PredictionLayer}): 114 | layer_test(layers.PredictionLayer, kwargs={'use_bias': use_bias 115 | }, input_shape=(BATCH_SIZE, 2, 1)) 116 | 117 | 118 | def test_FM(): 119 | with CustomObjectScope({'FM': layers.FM}): 120 | layer_test(layers.FM, kwargs={}, input_shape=( 121 | BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) 122 | 123 | 124 | def test_AFMLayer(): 125 | with CustomObjectScope({'AFMLayer': layers.AFMLayer}): 126 | layer_test(layers.AFMLayer, kwargs={}, input_shape=[( 127 | BATCH_SIZE, 1, EMBEDDING_SIZE)]*FIELD_SIZE) 128 | 129 | 130 | @pytest.mark.parametrize( 131 | 'layer_size,activation,split_half', 132 | [(layer_size, activation, split_half) 133 | for activation in ['linear', PReLU] 134 | for split_half in [True, False] 135 | for layer_size in [(10,), (10, 8)] 136 | ] 137 | ) 138 | def test_CIN(layer_size, activation, split_half): 139 | with CustomObjectScope({'CIN': layers.CIN}): 140 | layer_test(layers.CIN, kwargs={"layer_size": layer_size, "activation": 141 | activation, "split_half": split_half}, input_shape=( 142 | BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) 143 | 144 | 145 | @pytest.mark.parametrize( 146 | 'layer_size', 147 | [(), (3, 10) 148 | ] 149 | ) 150 | def test_test_CIN_invalid(layer_size): 151 | with pytest.raises(ValueError): 152 | with CustomObjectScope({'CIN': layers.CIN}): 153 | layer_test(layers.CIN, kwargs={"layer_size": layer_size}, input_shape=( 154 | BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) 155 | 156 | 157 | @pytest.mark.parametrize( 158 | 'head_num,use_res', 159 | [(head_num, use_res,) 160 | for head_num in [1, 2] 161 | for use_res in [True, False] 162 | ] 163 | ) 164 | def test_InteractingLayer(head_num, use_res,): 165 | with CustomObjectScope({'InteractingLayer': layers.InteractingLayer}): 166 | layer_test(layers.InteractingLayer, kwargs={"head_num": head_num, "use_res": 167 | use_res, }, input_shape=( 168 | BATCH_SIZE, FIELD_SIZE, EMBEDDING_SIZE)) 169 | -------------------------------------------------------------------------------- /deepRS/models/din.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf) 8 | """ 9 | 10 | from tensorflow.python.keras.layers import Input, Dense, Embedding, Concatenate, Reshape 11 | from tensorflow.python.keras.models import Model 12 | from tensorflow.python.keras.initializers import RandomNormal 13 | from tensorflow.python.keras.regularizers import l2 14 | 15 | from ..layers import MLP 16 | from ..sequence import SequencePoolingLayer, AttentionSequencePoolingLayer 17 | from ..activations import Dice 18 | 19 | 20 | def get_input(feature_dim_dict, seq_feature_list, seq_max_len): 21 | sparse_input = {feat: Input(shape=(1,), name='sparse_' + str(i) + '-' + feat) for i, feat in 22 | enumerate(feature_dim_dict["sparse"])} 23 | 24 | user_behavior_input = {feat: Input(shape=(seq_max_len,), name='seq_' + str(i) + '-' + feat) for i, feat in 25 | enumerate(seq_feature_list)} 26 | 27 | user_behavior_length = Input(shape=(1,), name='seq_length') 28 | 29 | return sparse_input, user_behavior_input, user_behavior_length 30 | 31 | 32 | def DIN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, 33 | use_din=True, use_bn=False, hidden_size=(200, 80), activation='relu', att_hidden_size=(80, 40), att_activation=Dice, att_weight_normalization=False, 34 | l2_reg_deep=0, l2_reg_embedding=1e-5, final_activation='sigmoid', keep_prob=1, init_std=0.0001, seed=1024, ): 35 | """Instantiates the Deep Interest Network architecture. 36 | 37 | :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} 38 | :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` 39 | :param embedding_size: positive integer,sparse feature embedding_size. 40 | :param hist_len_max: positive int, to indicate the max length of seq input 41 | :param use_din: bool, whether use din pooling or not.If set to ``False``,use **sum pooling** 42 | :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net 43 | :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net 44 | :param activation: Activation function to use in deep net 45 | :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net 46 | :param att_activation: Activation function to use in attention net 47 | :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. 48 | :param l2_reg_deep: float. L2 regularizer strength applied to deep net 49 | :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector 50 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 51 | :param keep_prob: float in (0,1]. keep_prob used in deep net 52 | :param init_std: float,to use as the initialize std of embedding vector 53 | :param seed: integer ,to use as random seed. 54 | :return: A Keras model instance. 55 | 56 | """ 57 | for feature_dim_dict in [feature_dim_dict]: 58 | if not isinstance(feature_dim_dict, 59 | dict) or "sparse" not in feature_dim_dict or "dense" not in feature_dim_dict: 60 | raise ValueError( 61 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 62 | if len(feature_dim_dict['dense']) > 0: 63 | raise ValueError('Now DIN only support sparse input') 64 | sparse_input, user_behavior_input, user_behavior_length = get_input( 65 | feature_dim_dict, seq_feature_list, hist_len_max) 66 | sparse_embedding_dict = {feat: Embedding(feature_dim_dict["sparse"][feat], embedding_size, 67 | embeddings_initializer=RandomNormal( 68 | mean=0.0, stddev=init_std, seed=seed), 69 | embeddings_regularizer=l2( 70 | l2_reg_embedding), 71 | name='sparse_emb_' + str(i) + '-' + feat) for i, feat in 72 | enumerate(feature_dim_dict["sparse"])} 73 | query_emb_list = [sparse_embedding_dict[feat]( 74 | sparse_input[feat]) for feat in seq_feature_list] 75 | keys_emb_list = [sparse_embedding_dict[feat]( 76 | user_behavior_input[feat]) for feat in seq_feature_list] 77 | deep_input_emb_list = [sparse_embedding_dict[feat]( 78 | sparse_input[feat]) for feat in feature_dim_dict["sparse"]] 79 | 80 | query_emb = Concatenate()(query_emb_list) if len( 81 | query_emb_list) > 1 else query_emb_list[0] 82 | keys_emb = Concatenate()(keys_emb_list) if len( 83 | keys_emb_list) > 1 else keys_emb_list[0] 84 | deep_input_emb = Concatenate()(deep_input_emb_list) if len( 85 | deep_input_emb_list) > 1 else deep_input_emb_list[0] 86 | 87 | if use_din: 88 | hist = AttentionSequencePoolingLayer(att_hidden_size, att_activation, weight_normalization=att_weight_normalization)([ 89 | query_emb, keys_emb, user_behavior_length]) 90 | else: 91 | hist = SequencePoolingLayer(hist_len_max, 'sum')( 92 | [keys_emb, user_behavior_length]) 93 | 94 | deep_input_emb = Concatenate()([deep_input_emb, hist]) 95 | output = MLP(hidden_size, activation, l2_reg_deep, 96 | keep_prob, use_bn, seed)(deep_input_emb) 97 | output = Dense(1, final_activation)(output) 98 | output = Reshape([1])(output) 99 | model_input_list = list(sparse_input.values( 100 | ))+list(user_behavior_input.values()) + [user_behavior_length] 101 | 102 | model = Model(inputs=model_input_list, outputs=output) 103 | return model 104 | -------------------------------------------------------------------------------- /deepRS/sequence.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.keras.layers import Layer 2 | from .layers import LocalActivationUnit 3 | import tensorflow as tf 4 | 5 | 6 | class SequencePoolingLayer(Layer): 7 | """The SequencePoolingLayer is used to apply pooling operation(sum,mean,max) on variable-length sequence feature/multi-value feature. 8 | 9 | Input shape 10 | - A list of two tensor [seq_value,seq_len] 11 | 12 | - seq_value is a 3D tensor with shape: ``(batch_size, T, embedding_size`` 13 | 14 | - seq_len is a 2D tensor with shape : ``(batch_size, 1)``,indicate valid length of each sequence. 15 | 16 | Output shape 17 | - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. 18 | 19 | Arguments 20 | - **seq_len_max**:Positive integer indicates that the max length of all the sequence feature,usually same as T. 21 | 22 | - **mode**:str.Pooling operation to be used,can be sum,mean or max. 23 | """ 24 | 25 | def __init__(self, seq_len_max, mode='mean', **kwargs): 26 | 27 | if mode not in ['sum', 'mean', 'max']: 28 | raise ValueError("mode must be sum or mean") 29 | self.seq_len_max = seq_len_max 30 | self.mode = mode 31 | super(SequencePoolingLayer, self).__init__(**kwargs) 32 | 33 | def build(self, input_shape): 34 | super(SequencePoolingLayer, self).build( 35 | input_shape) # Be sure to call this somewhere! 36 | 37 | def call(self, seq_value_len_list, **kwargs): 38 | uiseq_embed_list, user_behavior_length = seq_value_len_list 39 | embedding_size = uiseq_embed_list.shape[-1] 40 | mask = tf.sequence_mask(user_behavior_length, 41 | self.seq_len_max, dtype=tf.float32) 42 | 43 | mask = tf.transpose(mask, (0, 2, 1)) 44 | 45 | mask = tf.tile(mask, [1, 1, embedding_size]) 46 | uiseq_embed_list *= mask 47 | hist = uiseq_embed_list 48 | if self.mode == "max": 49 | return tf.reduce_max(hist, 1, keep_dims=True) 50 | 51 | hist = tf.reduce_sum(hist, 1, keep_dims=False) 52 | if self.mode == "mean": 53 | 54 | hist = tf.div(hist, user_behavior_length) 55 | hist = tf.expand_dims(hist, axis=1) 56 | return hist 57 | 58 | def compute_output_shape(self, input_shape): 59 | return (None, 1, input_shape[0][-1]) 60 | 61 | def get_config(self,): 62 | config = {'seq_len_max': self.seq_len_max, 'mode': self.mode} 63 | base_config = super(SequencePoolingLayer, self).get_config() 64 | return dict(list(base_config.items()) + list(config.items())) 65 | 66 | 67 | class AttentionSequencePoolingLayer(Layer): 68 | """The Attentional sequence pooling operation used in DIN. 69 | 70 | Input shape 71 | - A list of three tensor: [query,keys,keys_length] 72 | 73 | - query is a 3D tensor with shape: ``(batch_size, 1, embedding_size)`` 74 | 75 | - keys is a 3D tensor with shape: ``(batch_size, T, embedding_size)`` 76 | 77 | - keys_length is a 2D tensor with shape: ``(batch_size, 1)`` 78 | 79 | Output shape 80 | - 3D tensor with shape: ``(batch_size, 1, embedding_size)``. 81 | 82 | Arguments 83 | - **hidden_size**:list of positive integer, the attention net layer number and units in each layer. 84 | 85 | - **activation**: Activation function to use in attention net. 86 | 87 | - **weight_normalization**: bool.Whether normalize the attention score of local activation unit. 88 | 89 | References 90 | - [Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068.](https://arxiv.org/pdf/1706.06978.pdf) 91 | """ 92 | 93 | def __init__(self, hidden_size=(80, 40), activation='sigmoid', weight_normalization=False, **kwargs): 94 | 95 | self.hidden_size = hidden_size 96 | self.activation = activation 97 | self.weight_normalization = weight_normalization 98 | 99 | super(AttentionSequencePoolingLayer, self).__init__(**kwargs) 100 | 101 | def build(self, input_shape): 102 | 103 | if not isinstance(input_shape, list) or len(input_shape) != 3: 104 | raise ValueError('A `AttentionSequencePoolingLayer` layer should be called ' 105 | 'on a list of 3 inputs') 106 | 107 | if len(input_shape[0]) != 3 or len(input_shape[1]) != 3 or len(input_shape[2]) != 2: 108 | raise ValueError("Unexpected inputs dimensions,the 3 tensor dimensions are %d,%d and %d , expect to be 3,3 and 2" % ( 109 | len(input_shape[0]), len(input_shape[1]), len(input_shape[2]))) 110 | 111 | if input_shape[0][-1] != input_shape[1][-1] or input_shape[0][1] != 1 or input_shape[2][1] != 1: 112 | raise ValueError('A `AttentionSequencePoolingLayer` layer requires ' 113 | 'inputs of a 3 inputs with shape (None,1,embedding_size),(None,T,embedding_size) and (None,1)' 114 | 'Got different shapes: %s,%s and %s' % (input_shape)) 115 | super(AttentionSequencePoolingLayer, self).build( 116 | input_shape) # Be sure to call this somewhere! 117 | 118 | def call(self, inputs, **kwargs): 119 | query_key_keylen_list = inputs 120 | queries, keys, keys_length = query_key_keylen_list 121 | hist_len = keys.get_shape()[1] 122 | 123 | attention_score = LocalActivationUnit( 124 | self.hidden_size, self.activation, 0, 1, False, 1024,)([queries, keys]) 125 | 126 | outputs = tf.transpose(attention_score, (0, 2, 1)) 127 | 128 | key_masks = tf.sequence_mask(keys_length, hist_len) 129 | 130 | if self.weight_normalization: 131 | paddings = tf.ones_like(outputs) * (-2 ** 32 + 1) 132 | else: 133 | paddings = tf.zeros_like(outputs) 134 | 135 | outputs = tf.where(key_masks, outputs, paddings) 136 | 137 | if self.weight_normalization: 138 | outputs = tf.nn.softmax(outputs) 139 | 140 | outputs = tf.matmul(outputs, keys) 141 | 142 | return outputs 143 | 144 | def compute_output_shape(self, input_shape): 145 | return (None, 1, input_shape[0][-1]) 146 | 147 | def get_config(self,): 148 | 149 | config = {'hidden_size': self.hidden_size, 'activation': self.activation, 150 | 'weight_normalization': self.weight_normalization} 151 | base_config = super(AttentionSequencePoolingLayer, self).get_config() 152 | return dict(list(base_config.items()) + list(config.items())) 153 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import sys 3 | import inspect 4 | import numpy as np 5 | from numpy.testing import assert_allclose 6 | from tensorflow.python.keras import backend as K 7 | from tensorflow.python.keras.layers import Input 8 | from tensorflow.python.keras.models import Model, save_model, load_model 9 | from deepctr.utils import custom_objects 10 | 11 | 12 | def get_test_data(num_train=1000, num_test=500, input_shape=(10,), 13 | 14 | output_shape=(2,), 15 | 16 | classification=True, num_classes=2): 17 | """Generates test data to train a model on. 18 | 19 | 20 | 21 | classification=True overrides output_shape 22 | 23 | (i.e. output_shape is set to (1,)) and the output 24 | 25 | consists in integers in [0, num_classes-1]. 26 | 27 | 28 | 29 | Otherwise: float output with shape output_shape. 30 | 31 | """ 32 | 33 | samples = num_train + num_test 34 | 35 | if classification: 36 | 37 | y = np.random.randint(0, num_classes, size=(samples,)) 38 | 39 | X = np.zeros((samples,) + input_shape, dtype=np.float32) 40 | 41 | for i in range(samples): 42 | 43 | X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape) 44 | 45 | else: 46 | 47 | y_loc = np.random.random((samples,)) 48 | 49 | X = np.zeros((samples,) + input_shape, dtype=np.float32) 50 | 51 | y = np.zeros((samples,) + output_shape, dtype=np.float32) 52 | 53 | for i in range(samples): 54 | 55 | X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape) 56 | 57 | y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape) 58 | 59 | return (X[:num_train], y[:num_train]), (X[num_train:], y[num_train:]) 60 | 61 | 62 | def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, 63 | 64 | input_data=None, expected_output=None, 65 | 66 | expected_output_dtype=None, fixed_batch_size=False): 67 | """Test routine for a layer with a single input tensor 68 | 69 | and single output tensor. 70 | 71 | """ 72 | 73 | # generate input data 74 | 75 | if input_data is None: 76 | 77 | if not input_shape: 78 | raise AssertionError() 79 | 80 | if not input_dtype: 81 | 82 | input_dtype = K.floatx() 83 | 84 | input_data_shape = list(input_shape) 85 | 86 | for i, e in enumerate(input_data_shape): 87 | 88 | if e is None: 89 | 90 | input_data_shape[i] = np.random.randint(1, 4) 91 | 92 | if all(isinstance(e, tuple) for e in input_data_shape): 93 | input_data = [] 94 | for e in input_data_shape: 95 | input_data.append( 96 | (10 * np.random.random(e)).astype(input_dtype)) 97 | 98 | else: 99 | 100 | input_data = (10 * np.random.random(input_data_shape)) 101 | 102 | input_data = input_data.astype(input_dtype) 103 | 104 | else: 105 | 106 | if input_shape is None: 107 | 108 | input_shape = input_data.shape 109 | 110 | if input_dtype is None: 111 | 112 | input_dtype = input_data.dtype 113 | 114 | if expected_output_dtype is None: 115 | 116 | expected_output_dtype = input_dtype 117 | 118 | # instantiation 119 | 120 | layer = layer_cls(**kwargs) 121 | 122 | # test get_weights , set_weights at layer level 123 | 124 | weights = layer.get_weights() 125 | 126 | layer.set_weights(weights) 127 | 128 | try: 129 | expected_output_shape = layer.compute_output_shape(input_shape) 130 | except Exception: 131 | expected_output_shape = layer._compute_output_shape(input_shape) 132 | 133 | # test in functional API 134 | if isinstance(input_shape, list): 135 | if fixed_batch_size: 136 | 137 | x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape] 138 | 139 | else: 140 | 141 | x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape] 142 | else: 143 | if fixed_batch_size: 144 | 145 | x = Input(batch_shape=input_shape, dtype=input_dtype) 146 | 147 | else: 148 | 149 | x = Input(shape=input_shape[1:], dtype=input_dtype) 150 | 151 | y = layer(x) 152 | 153 | if not (K.dtype(y) == expected_output_dtype): 154 | raise AssertionError() 155 | 156 | # check with the functional API 157 | 158 | model = Model(x, y) 159 | 160 | actual_output = model.predict(input_data) 161 | 162 | actual_output_shape = actual_output.shape 163 | 164 | for expected_dim, actual_dim in zip(expected_output_shape, 165 | 166 | actual_output_shape): 167 | 168 | if expected_dim is not None: 169 | 170 | if not (expected_dim == actual_dim): 171 | raise AssertionError() 172 | 173 | if expected_output is not None: 174 | 175 | assert_allclose(actual_output, expected_output, rtol=1e-3) 176 | 177 | # test serialization, weight setting at model level 178 | 179 | model_config = model.get_config() 180 | 181 | recovered_model = model.__class__.from_config(model_config) 182 | 183 | if model.weights: 184 | 185 | weights = model.get_weights() 186 | 187 | recovered_model.set_weights(weights) 188 | 189 | _output = recovered_model.predict(input_data) 190 | 191 | assert_allclose(_output, actual_output, rtol=1e-3) 192 | 193 | # test training mode (e.g. useful when the layer has a 194 | 195 | # different behavior at training and testing time). 196 | 197 | if has_arg(layer.call, 'training'): 198 | 199 | model.compile('rmsprop', 'mse') 200 | 201 | model.train_on_batch(input_data, actual_output) 202 | 203 | # test instantiation from layer config 204 | 205 | layer_config = layer.get_config() 206 | 207 | layer_config['batch_input_shape'] = input_shape 208 | 209 | layer = layer.__class__.from_config(layer_config) 210 | 211 | # for further checks in the caller function 212 | 213 | return actual_output 214 | 215 | 216 | def has_arg(fn, name, accept_all=False): 217 | """Checks if a callable accepts a given keyword argument. 218 | 219 | 220 | 221 | For Python 2, checks if there is an argument with the given name. 222 | 223 | 224 | 225 | For Python 3, checks if there is an argument with the given name, and 226 | 227 | also whether this argument can be called with a keyword (i.e. if it is 228 | 229 | not a positional-only argument). 230 | 231 | 232 | 233 | # Arguments 234 | 235 | fn: Callable to inspect. 236 | 237 | name: Check if `fn` can be called with `name` as a keyword argument. 238 | 239 | accept_all: What to return if there is no parameter called `name` 240 | 241 | but the function accepts a `**kwargs` argument. 242 | 243 | 244 | 245 | # Returns 246 | 247 | bool, whether `fn` accepts a `name` keyword argument. 248 | 249 | """ 250 | 251 | if sys.version_info < (3,): 252 | 253 | arg_spec = inspect.getargspec(fn) 254 | 255 | if accept_all and arg_spec.keywords is not None: 256 | 257 | return True 258 | 259 | return (name in arg_spec.args) 260 | 261 | elif sys.version_info < (3, 3): 262 | 263 | arg_spec = inspect.getfullargspec(fn) 264 | 265 | if accept_all and arg_spec.varkw is not None: 266 | 267 | return True 268 | 269 | return (name in arg_spec.args or 270 | 271 | name in arg_spec.kwonlyargs) 272 | 273 | else: 274 | 275 | signature = inspect.signature(fn) 276 | 277 | parameter = signature.parameters.get(name) 278 | 279 | if parameter is None: 280 | 281 | if accept_all: 282 | 283 | for param in signature.parameters.values(): 284 | 285 | if param.kind == inspect.Parameter.VAR_KEYWORD: 286 | 287 | return True 288 | 289 | return False 290 | 291 | return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, 292 | 293 | inspect.Parameter.KEYWORD_ONLY)) 294 | 295 | 296 | def check_model(model, model_name, x, y): 297 | model.compile('adam', 'binary_crossentropy', 298 | metrics=['binary_crossentropy']) 299 | model.fit(x, y, batch_size=100, epochs=1, validation_split=0.5) 300 | 301 | print(model_name+" test train valid pass!") 302 | model.save_weights(model_name + '_weights.h5') 303 | model.load_weights(model_name + '_weights.h5') 304 | print(model_name+" test save load weight pass!") 305 | save_model(model, model_name + '.h5') 306 | model = load_model(model_name + '.h5', custom_objects) 307 | print(model_name + " test save load model pass!") 308 | 309 | print(model_name + " test pass!") 310 | -------------------------------------------------------------------------------- /docs/source/Features.rst: -------------------------------------------------------------------------------- 1 | Features 2 | ========== 3 | 4 | Overview 5 | ----------- 6 | 7 | With the great success of deep learning,DNN-based techniques have been widely used in CTR estimation task. 8 | 9 | DNN based CTR estimation models consists of the following 4 modules: 10 | ``Input,Embedding,Low-order&High-order Feature Extractor,Prediction`` 11 | 12 | Input&Embedding 13 | The data in CTR estimation task usually includes high sparse,high cardinality 14 | categorical features and some dense numerical features. 15 | 16 | Since DNN are good at handling dense numerical features,we usually map the sparse categorical 17 | features to dense numerical through `embedding technique`. 18 | 19 | For numerical features,we usually apply `discretization` or `normalization` on them. 20 | 21 | Feature Extractor 22 | Low-order Extractor learns feature interaction through product between vectors. 23 | Factorization-Machine and it's variants are widely used to learn the low-order feature interaction. 24 | 25 | High-order Extractor learns feature combination through complex neural network functions like MLP,Cross Net,etc. 26 | 27 | Models 28 | -------- 29 | 30 | FNN (Factorization-supported Neural Network) 31 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 32 | 33 | According to the paper,FNN learn embedding vectors of categorical data via pre-trained FM. 34 | It use FM's latent vector to initialiaze the embedding vectors.During the training stage,it 35 | concatenates the embedding vectors and feeds them into a MLP(MultiLayer Perceptron). 36 | 37 | **FNN api** `link <./deepctr.models.fnn.html>`_ 38 | 39 | 40 | .. image:: ../pics/FNN.png 41 | :align: center 42 | :scale: 50 % 43 | 44 | 45 | 46 | `Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57. `_ 47 | 48 | 49 | PNN (Product-based Neural Network) 50 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 51 | 52 | PNN concatenates sparse feature embeddings and the product between embedding vectors as the input of MLP. 53 | 54 | **PNN api** `link <./deepctr.models.pnn.html>`_ 55 | 56 | .. image:: ../pics/PNN.png 57 | :align: center 58 | :scale: 70 % 59 | 60 | 61 | `Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154. `_ 62 | 63 | 64 | Wide & Deep 65 | >>>>>>>>>>>>> 66 | 67 | WDL's deep part concatenates sparse feature embeddings as the input of MLP,the wide part use handcrafted feature as input. 68 | The logits of deep part and wide part are added to get the prediction probability. 69 | 70 | **WDL api** `link <./deepctr.models.wdl.html>`_ 71 | 72 | .. image:: ../pics/WDL.png 73 | :align: center 74 | :scale: 50 % 75 | 76 | `Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10. `_ 77 | 78 | 79 | DeepFM 80 | >>>>>>>> 81 | 82 | DeepFM can be seen as an improvement of WDL and FNN.Compared with WDL,DeepFM use 83 | FM instead of LR in the wide part and use concatenation of embedding vectors as the input of MLP in the deep part. 84 | Compared with FNN,the embedding vector of FM and input to MLP are same. 85 | And they do not need a FM pretrained vector to initialiaze,they are learned end2end. 86 | 87 | **DeepFM api** `link <./deepctr.models.deepfm.html>`_ 88 | 89 | .. image:: ../pics/DeepFM.png 90 | :align: center 91 | :scale: 50 % 92 | 93 | `Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017. `_ 94 | 95 | MLR(Mixed Logistic Regression/Piece-wise Linear Model) 96 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 97 | 98 | MLR can be viewed as a combination of 2*m LR model,m is the piece(region) number. 99 | m LR model learns the weight that the sample belong to each region,another m LR model learn sample's click probability in the region. 100 | Finally,the sample's CTR is a weighted sum of each region's click probability.Notice the weight is normalized weight. 101 | 102 | **MLR api** `link <./deepctr.models.mlr.html>`_ 103 | 104 | .. image:: ../pics/MLR.png 105 | :align: center 106 | :scale: 50 % 107 | 108 | `Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017. `_ 109 | 110 | 111 | NFM (Neural Factorization Machine) 112 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 113 | 114 | NFM use a bi-interaction pooling layer to learn feature interaction between 115 | embedding vectors and compress the result into a singe vector which has the same size as a single embedding vector. 116 | And then fed it into a MLP.The output logit of MLP and the output logit of linear part are added to get the prediction probability. 117 | 118 | **NFM api** `link <./deepctr.models.nfm.html>`_ 119 | 120 | .. image:: ../pics/NFM.png 121 | :align: center 122 | :scale: 50 % 123 | 124 | `He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. `_ 125 | 126 | 127 | AFM (Attentional Factorization Machine) 128 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 129 | 130 | AFM is a variant of FM,tradional FM sums the inner product of embedding vector uniformly. 131 | AFM can be seen as weighted sum of feature interactions.The weight is learned by a small MLP. 132 | 133 | **AFM api** `link <./deepctr.models.afm.html>`_ 134 | 135 | .. image:: ../pics/AFM.png 136 | :align: center 137 | :scale: 70 % 138 | 139 | `Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. `_ 140 | 141 | 142 | DCN (Deep & Cross Network) 143 | >>>>>>>>>>>>>>>>>>>>>>>>>>>> 144 | 145 | DCN use a Cross Net to learn both low and high order feature interaction explicitly,and use a MLP to learn feature interaction implicitly. 146 | The output of Cross Net and MLP are concatenated.The concatenated vector are feed into one fully connected layer to get the prediction probability. 147 | 148 | **DCN api** `link <./deepctr.models.dcn.html>`_ 149 | 150 | .. image:: ../pics/DCN.png 151 | :align: center 152 | :scale: 70 % 153 | 154 | `Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. `_ 155 | 156 | 157 | DIN (Deep Interest Network) 158 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>> 159 | 160 | DIN introduce a attention method to learn from sequence(multi-valued) feature. 161 | Tradional method usually use sum/mean pooling on sequence feature. 162 | DIN use a local activation unit to get the activation score between candidate item and history items. 163 | User's interest are represented by weighted sum of user behaviors. 164 | user's interest vector and other embedding vectors are concatenated and fed into a MLP to get the prediction. 165 | 166 | **DIN api** `link <./deepctr.models.din.html>`_ **DIN demo** `link `_ 168 | 169 | .. image:: ../pics/DIN.png 170 | :align: center 171 | :scale: 70 % 172 | 173 | `Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. `_ 174 | 175 | xDeepFM 176 | >>>>>>>>>> 177 | 178 | xDeepFM use a Compressed Interaction Network (CIN) to learn both low and high order feature interaction explicitly,and use a MLP to learn feature interaction implicitly. 179 | In each layer of CIN,first compute outer products between :math:`x^k` and :math:`x_0` to get a tensor :math:`Z_{k+1}`,then use a 1DConv to learn feature maps :math:`H_{k+1}` on this tensor. 180 | Finally,apply sum pooling on all the feature maps :math:`H_k` to get one vector.The vector is used to compute the logit that CIN contributes. 181 | 182 | 183 | **xDeepFM api** `link <./deepctr.models.xdeepfm.html>`_ 184 | 185 | .. image:: ../pics/CIN.png 186 | :align: center 187 | :scale: 70 % 188 | 189 | .. image:: ../pics/xDeepFM.png 190 | :align: center 191 | :scale: 70 % 192 | 193 | `Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018. `_ 194 | 195 | AutoInt(Automatic Feature Interaction) 196 | >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 197 | 198 | AutoInt use a interacting layer to model the interactions between different features. 199 | Within each interacting layer, each feature is allowed to interact with all the other features and is able to automatically identify relevant features to form meaningful higher-order features via the multi-head attention mechanism. 200 | By stacking multiple interacting layers,AutoInt is able to model different orders of feature interactions. 201 | 202 | **AutoInt api** `link <./deepctr.models.autoint.html>`_ 203 | 204 | .. image:: ../pics/InteractingLayer.png 205 | :align: center 206 | :scale: 70 % 207 | 208 | .. image:: ../pics/AutoInt.png 209 | :align: center 210 | :scale: 70 % 211 | 212 | `Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018. `_ 213 | 214 | Layers 215 | -------- 216 | 217 | The models of deepctr are modular, 218 | so you can use different modules to build your own models. 219 | 220 | The module is a class that inherits from ``tf.keras.layers.Layer``,it has 221 | the same attributes and methods as keras Layers like ``tf.keras.layers.Dense()`` etc 222 | 223 | You can see layers API in `layers <./deepctr.layers.html>`_ 224 | 225 | 226 | Activations 227 | -------------- 228 | 229 | Some custom activation functions. 230 | 231 | You can see activations API in `activations <./deepctr.activations.html>`_ 232 | 233 | Sequence 234 | ---------- 235 | 236 | The sequence module aims to process variable-length sequence data. 237 | 238 | You can see sequences API in `sequence <./deepctr.sequence.html>`_ 239 | -------------------------------------------------------------------------------- /deepRS/models/mlr.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | """ 3 | Author: 4 | Weichen Shen,wcshen1994@163.com 5 | 6 | Reference: 7 | [1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194) 8 | """ 9 | from tensorflow.python.keras.layers import Input, Dense, Embedding, Concatenate, Activation, Reshape, add, dot 10 | from tensorflow.python.keras.models import Model 11 | from tensorflow.python.keras.initializers import TruncatedNormal 12 | from tensorflow.python.keras.regularizers import l2 13 | 14 | 15 | def MLR(region_feature_dim_dict, base_feature_dim_dict={"sparse": {}, "dense": []}, region_num=4, 16 | l2_reg_linear=1e-5, 17 | init_std=0.0001, seed=1024, final_activation='sigmoid', 18 | bias_feature_dim_dict={"sparse": {}, "dense": []}): 19 | """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model. 20 | 21 | :param region_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 22 | :param base_feature_dim_dict: dict or None,to indicate sparse field and dense field of base learner.if None, it is same as region_feature_dim_dict 23 | :param region_num: integer > 1,indicate the piece number 24 | :param l2_reg_linear: float. L2 regularizer strength applied to weight 25 | :param init_std: float,to use as the initialize std of embedding vector 26 | :param seed: integer ,to use as random seed. 27 | :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` 28 | :param bias_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} 29 | :return: A Keras model instance. 30 | """ 31 | 32 | if region_num <= 1: 33 | raise ValueError("region_num must > 1") 34 | if not isinstance(region_feature_dim_dict, 35 | dict) or "sparse" not in region_feature_dim_dict or "dense" not in region_feature_dim_dict: 36 | raise ValueError( 37 | "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") 38 | 39 | same_flag = False 40 | if base_feature_dim_dict == {"sparse": {}, "dense": []}: 41 | base_feature_dim_dict = region_feature_dim_dict 42 | same_flag = True 43 | 44 | region_sparse_input, region_dense_input, base_sparse_input, base_dense_input, bias_sparse_input, bias_dense_input = get_input( 45 | region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, same_flag) 46 | region_embeddings, base_embeddings, bias_embedding = get_embedding( 47 | region_num, region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, init_std, seed, l2_reg_linear) 48 | 49 | if same_flag: 50 | 51 | base_dense_input_ = region_dense_input 52 | 53 | base_sparse_input_ = region_sparse_input 54 | 55 | else: 56 | 57 | base_dense_input_ = base_dense_input 58 | 59 | base_sparse_input_ = base_sparse_input 60 | 61 | region_dense_feature_num = len(region_feature_dim_dict['dense']) 62 | region_sparse_feature_num = len(region_feature_dim_dict['sparse']) 63 | base_dense_feature_num = len(base_feature_dim_dict['dense']) 64 | base_sparse_feature_num = len(base_feature_dim_dict['sparse']) 65 | bias_dense_feature_num = len(bias_feature_dim_dict['dense']) 66 | bias_sparse_feature_num = len(bias_feature_dim_dict['sparse']) 67 | 68 | if region_dense_feature_num > 1: 69 | region_dense_logits_ = [Dense(1, )(Concatenate()(region_dense_input)) for _ in 70 | range(region_num)] 71 | elif region_dense_feature_num == 1: 72 | region_dense_logits_ = [Dense(1, )(region_dense_input[0]) for _ in 73 | range(region_num)] 74 | 75 | if base_dense_feature_num > 1: 76 | base_dense_logits = [Dense(1, )(Concatenate()(base_dense_input_))for _ in 77 | range(region_num)] 78 | elif base_dense_feature_num == 1: 79 | base_dense_logits = [Dense(1, )(base_dense_input_[0])for _ in 80 | range(region_num)] 81 | 82 | if region_dense_feature_num > 0 and region_sparse_feature_num == 0: 83 | region_logits = Concatenate()(region_dense_logits_) 84 | elif region_dense_feature_num == 0 and region_sparse_feature_num > 0: 85 | region_sparse_logits = [ 86 | add([region_embeddings[j][i](region_sparse_input[i]) 87 | for i in range(region_sparse_feature_num)]) 88 | if region_sparse_feature_num > 1 else region_embeddings[j][0](region_sparse_input[0]) 89 | for j in range(region_num)] 90 | region_logits = Concatenate()(region_sparse_logits) 91 | 92 | else: 93 | region_sparse_logits = [ 94 | add([region_embeddings[j][i](region_sparse_input[i]) 95 | for i in range(region_sparse_feature_num)]) 96 | for j in range(region_num)] 97 | region_logits = Concatenate()( 98 | [add([region_sparse_logits[i], region_dense_logits_[i]]) for i in range(region_num)]) 99 | 100 | if base_dense_feature_num > 0 and base_sparse_feature_num == 0: 101 | base_logits = base_dense_logits 102 | elif base_dense_feature_num == 0 and base_sparse_feature_num > 0: 103 | base_sparse_logits = [add( 104 | [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0]) 105 | for j in range(region_num)] 106 | base_logits = base_sparse_logits 107 | else: 108 | base_sparse_logits = [add( 109 | [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0]) 110 | for j in range(region_num)] 111 | base_logits = [add([base_sparse_logits[i], base_dense_logits[i]]) 112 | for i in range(region_num)] 113 | 114 | # Dense(self.region_num, activation='softmax')(final_logit) 115 | region_weights = Activation("softmax")(region_logits) 116 | learner_score = Concatenate()( 117 | [Activation(final_activation, name='learner' + str(i))(base_logits[i]) for i in range(region_num)]) 118 | final_logit = dot([region_weights, learner_score], axes=-1) 119 | 120 | if bias_dense_feature_num + bias_sparse_feature_num > 0: 121 | 122 | if bias_dense_feature_num > 1: 123 | bias_dense_logits = Dense(1,)(Concatenate()(bias_dense_input)) 124 | elif bias_dense_feature_num == 1: 125 | bias_dense_logits = Dense(1,)(bias_dense_input[0]) 126 | else: 127 | pass 128 | 129 | if bias_sparse_feature_num > 1: 130 | bias_cate_logits = add([bias_embedding[i](bias_sparse_input[i]) 131 | for i, feat in enumerate(bias_feature_dim_dict['sparse'])]) 132 | elif bias_sparse_feature_num == 1: 133 | bias_cate_logits = bias_embedding[0](bias_sparse_input[0]) 134 | else: 135 | pass 136 | 137 | if bias_dense_feature_num > 0 and bias_sparse_feature_num > 0: 138 | bias_logits = add([bias_dense_logits, bias_cate_logits]) 139 | elif bias_dense_feature_num > 0: 140 | bias_logits = bias_dense_logits 141 | else: 142 | bias_logits = bias_cate_logits 143 | 144 | bias_prob = Activation('sigmoid')(bias_logits) 145 | final_logit = dot([final_logit, bias_prob], axes=-1) 146 | 147 | output = Reshape([1])(final_logit) 148 | model = Model(inputs=region_sparse_input + region_dense_input+base_sparse_input + 149 | base_dense_input+bias_sparse_input+bias_dense_input, outputs=output) 150 | return model 151 | 152 | 153 | def get_input(region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, same_flag): 154 | region_sparse_input = [Input(shape=(1,), name='region_sparse_' + str(i)+"-"+feat) 155 | for i, feat in enumerate(region_feature_dim_dict["sparse"])] 156 | region_dense_input = [Input(shape=(1,), name='region_dense_' + str(i)+"-"+feat) 157 | for i, feat in enumerate(region_feature_dim_dict["dense"])] 158 | if same_flag == True: 159 | base_sparse_input = [] 160 | base_dense_input = [] 161 | else: 162 | base_sparse_input = [Input(shape=(1,), name='base_sparse_' + str(i) + "-" + feat) for i, feat in 163 | enumerate(base_feature_dim_dict["sparse"])] 164 | base_dense_input = [Input(shape=(1,), name='base_dense_' + str(i) + "-" + feat) for i, feat in 165 | enumerate(base_feature_dim_dict['dense'])] 166 | 167 | bias_sparse_input = [Input(shape=(1,), name='bias_cate_' + str(i) + "-" + feat) for i, feat in 168 | enumerate(bias_feature_dim_dict['sparse'])] 169 | bias_dense_input = [Input(shape=(1,), name='bias_continuous_' + str(i) + "-" + feat) for i, feat in 170 | enumerate(bias_feature_dim_dict['dense'])] 171 | return region_sparse_input, region_dense_input, base_sparse_input, base_dense_input, bias_sparse_input, bias_dense_input 172 | 173 | 174 | def get_embedding(region_num, region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, init_std, seed, l2_reg_linear): 175 | 176 | region_embeddings = [[Embedding(region_feature_dim_dict["sparse"][feat], 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed+j), embeddings_regularizer=l2(l2_reg_linear), 177 | name='region_emb_' + str(j)+'_' + str(i)) for 178 | i, feat in enumerate(region_feature_dim_dict['sparse'])] for j in range(region_num)] 179 | base_embeddings = [[Embedding(base_feature_dim_dict['sparse'][feat], 1, 180 | embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed + j), embeddings_regularizer=l2(l2_reg_linear), 181 | name='base_emb_' + str(j) + '_' + str(i)) for 182 | i, feat in enumerate(base_feature_dim_dict['sparse'])] for j in range(region_num)] 183 | bias_embedding = [Embedding(bias_feature_dim_dict['sparse'][feat], 1, embeddings_initializer=TruncatedNormal(stddev=init_std, seed=seed), embeddings_regularizer=l2(l2_reg_linear), 184 | name='embed_bias' + '_' + str(i)) for 185 | i, feat in enumerate(bias_feature_dim_dict['sparse'])] 186 | 187 | return region_embeddings, base_embeddings, bias_embedding 188 | -------------------------------------------------------------------------------- /examples/movielens_sample.txt: -------------------------------------------------------------------------------- 1 | user_id,movie_id,rating,timestamp,title,genres,gender,age,occupation,zip 2 | 3299,235,4,968035345,Ed Wood (1994),Comedy|Drama,F,25,4,19119 3 | 3630,3256,3,966536874,Patriot Games (1992),Action|Thriller,M,18,4,77005 4 | 517,105,4,976203603,"Bridges of Madison County, The (1995)",Drama|Romance,F,25,14,55408 5 | 785,2115,3,975430389,Indiana Jones and the Temple of Doom (1984),Action|Adventure,M,18,19,29307 6 | 5848,909,5,957782527,"Apartment, The (1960)",Comedy|Drama,M,50,20,20009 7 | 2996,2799,1,972769867,Problem Child 2 (1991),Comedy,M,18,0,63011 8 | 3087,837,5,969738869,Matilda (1996),Children's|Comedy,F,1,1,90802 9 | 872,3092,5,975273310,Chushingura (1962),Drama,M,50,1,20815 10 | 4094,529,5,966223349,Searching for Bobby Fischer (1993),Drama,M,25,17,49017 11 | 1868,3508,3,974694703,"Outlaw Josey Wales, The (1976)",Western,M,50,11,92346 12 | 2913,1387,5,971769808,Jaws (1975),Action|Horror,F,35,20,98119 13 | 380,3481,5,976316283,High Fidelity (2000),Comedy,M,25,2,92024 14 | 2073,1784,5,974759084,As Good As It Gets (1997),Comedy|Drama,F,18,4,13148 15 | 80,2059,3,977788576,"Parent Trap, The (1998)",Children's|Drama,M,56,1,49327 16 | 3679,2557,1,976298130,I Stand Alone (Seul contre tous) (1998),Drama,M,25,4,68108 17 | 2077,788,3,980013556,"Nutty Professor, The (1996)",Comedy|Fantasy|Romance|Sci-Fi,M,18,0,55112 18 | 6036,2085,4,956716684,101 Dalmatians (1961),Animation|Children's,F,25,15,32603 19 | 3675,532,3,966363610,Serial Mom (1994),Comedy|Crime|Horror,M,35,7,06680 20 | 4566,3683,4,964489599,Blood Simple (1984),Drama|Film-Noir,M,35,17,19473 21 | 2996,3763,3,972413564,F/X (1986),Action|Crime|Thriller,M,18,0,63011 22 | 5831,2458,1,957898337,Armed and Dangerous (1986),Comedy|Crime,M,25,1,92120 23 | 1869,1244,2,974695654,Manhattan (1979),Comedy|Drama|Romance,M,45,14,95148 24 | 5389,2657,3,960328279,"Rocky Horror Picture Show, The (1975)",Comedy|Horror|Musical|Sci-Fi,M,45,7,01905 25 | 1391,1535,3,974851275,Love! Valour! Compassion! (1997),Drama|Romance,M,35,15,20723 26 | 3123,2407,3,969324381,Cocoon (1985),Comedy|Sci-Fi,M,25,2,90401 27 | 4694,159,3,963602574,Clockers (1995),Drama,M,56,7,40505 28 | 1680,1988,3,974709821,Hello Mary Lou: Prom Night II (1987),Horror,M,25,20,95380 29 | 2002,1945,4,974677761,On the Waterfront (1954),Crime|Drama,F,56,13,02136-1522 30 | 3430,2690,4,979949863,"Ideal Husband, An (1999)",Comedy,F,45,1,15208 31 | 425,471,4,976284972,"Hudsucker Proxy, The (1994)",Comedy|Romance,M,25,12,55303 32 | 1841,2289,2,974699637,"Player, The (1992)",Comedy|Drama,M,18,0,95037 33 | 4964,2348,4,962619587,Sid and Nancy (1986),Drama,M,35,0,94110 34 | 4520,2160,4,964883648,Rosemary's Baby (1968),Horror|Thriller,M,25,4,45810 35 | 1265,2396,4,1011716691,Shakespeare in Love (1998),Comedy|Romance,F,18,20,49321 36 | 2496,1278,5,974435324,Young Frankenstein (1974),Comedy|Horror,M,50,1,37932 37 | 5511,2174,4,959787754,Beetlejuice (1988),Comedy|Fantasy,M,45,1,92407 38 | 621,833,1,975799925,High School High (1996),Comedy,M,18,4,93560 39 | 3045,2762,5,970189524,"Sixth Sense, The (1999)",Thriller,M,45,1,90631 40 | 2050,2546,4,975522689,"Deep End of the Ocean, The (1999)",Drama,F,35,3,99504 41 | 613,32,4,975812238,Twelve Monkeys (1995),Drama|Sci-Fi,M,35,20,10562 42 | 366,1077,5,978471241,Sleeper (1973),Comedy|Sci-Fi,M,50,15,55126 43 | 5108,367,4,962338215,"Mask, The (1994)",Comedy|Crime|Fantasy,F,25,9,93940 44 | 4502,1960,4,965094644,"Last Emperor, The (1987)",Drama|War,M,50,0,01379 45 | 5512,1801,5,959713840,"Man in the Iron Mask, The (1998)",Action|Drama|Romance,F,25,17,01701 46 | 1861,2642,2,974699627,Superman III (1983),Action|Adventure|Sci-Fi,M,50,16,92129 47 | 1667,1240,4,975016698,"Terminator, The (1984)",Action|Sci-Fi|Thriller,M,50,16,98516 48 | 753,434,3,975460449,Cliffhanger (1993),Action|Adventure|Crime,M,1,10,42754 49 | 1836,2736,5,974826228,Brighton Beach Memoirs (1986),Comedy,M,25,0,10016 50 | 5626,474,5,959052158,In the Line of Fire (1993),Action|Thriller,M,56,16,32043 51 | 1601,1396,4,978576948,Sneakers (1992),Crime|Drama|Sci-Fi,M,25,12,83001 52 | 4725,1100,4,963369546,Days of Thunder (1990),Action|Romance,M,35,5,96707-1321 53 | 2837,2396,5,972571456,Shakespeare in Love (1998),Comedy|Romance,M,18,0,49506 54 | 1776,3882,4,1001558470,Bring It On (2000),Comedy,M,25,0,45801 55 | 2820,457,2,972662398,"Fugitive, The (1993)",Action|Thriller,F,35,0,02138 56 | 1834,2288,3,1038179198,"Thing, The (1982)",Action|Horror|Sci-Fi|Thriller,M,35,5,10990 57 | 284,2716,4,976570902,Ghostbusters (1984),Comedy|Horror,M,25,12,91910 58 | 2744,588,1,973215985,Aladdin (1992),Animation|Children's|Comedy|Musical,M,18,17,53818 59 | 881,4,2,975264028,Waiting to Exhale (1995),Comedy|Drama,M,18,14,76401 60 | 2211,916,3,974607067,Roman Holiday (1953),Comedy|Romance,M,45,6,01950 61 | 2271,2671,4,1007158806,Notting Hill (1999),Comedy|Romance,M,50,14,13210 62 | 1010,2953,1,975222613,Home Alone 2: Lost in New York (1992),Children's|Comedy,M,25,0,10310 63 | 1589,2594,4,974735454,Open Your Eyes (Abre los ojos) (1997),Drama|Romance|Sci-Fi,M,25,0,95136 64 | 1724,597,5,976441106,Pretty Woman (1990),Comedy|Romance,M,18,4,00961 65 | 2590,2097,3,973840056,Something Wicked This Way Comes (1983),Children's|Horror,M,18,4,94044 66 | 1717,1352,3,1009256707,Albino Alligator (1996),Crime|Thriller,F,50,6,30307 67 | 1391,3160,2,974850796,Magnolia (1999),Drama,M,35,15,20723 68 | 1941,1263,3,974954220,"Deer Hunter, The (1978)",Drama|War,M,35,17,94550 69 | 3526,2867,4,966906064,Fright Night (1985),Comedy|Horror,M,35,2,62263-3004 70 | 5767,198,3,958192148,Strange Days (1995),Action|Crime|Sci-Fi,M,25,2,75287 71 | 5355,590,4,960596927,Dances with Wolves (1990),Adventure|Drama|Western,M,56,0,78232 72 | 5788,156,4,958108785,Blue in the Face (1995),Comedy,M,25,0,92646 73 | 1078,1307,4,974938851,When Harry Met Sally... (1989),Comedy|Romance,F,45,9,95661 74 | 3808,61,2,965973222,Eye for an Eye (1996),Drama|Thriller,M,25,7,60010 75 | 974,3897,4,975106398,Almost Famous (2000),Comedy|Drama,M,35,19,94930 76 | 5153,1290,4,961972292,Some Kind of Wonderful (1987),Drama|Romance,M,25,7,60046 77 | 5732,2115,3,958434069,Indiana Jones and the Temple of Doom (1984),Action|Adventure,F,25,11,02111 78 | 4627,2478,3,964110136,Three Amigos! (1986),Comedy|Western,M,56,1,45224 79 | 1884,1831,2,975648062,Lost in Space (1998),Action|Sci-Fi|Thriller,M,45,20,93108 80 | 4284,517,4,965277546,Rising Sun (1993),Action|Drama|Mystery,M,50,7,40601 81 | 1383,468,2,975979732,"Englishman Who Went Up a Hill, But Came Down a Mountain, The (1995)",Comedy|Romance,F,25,7,19806 82 | 2230,2873,3,974599097,Lulu on the Bridge (1998),Drama|Mystery|Romance,F,45,1,60302 83 | 2533,2266,4,974055724,"Butcher's Wife, The (1991)",Comedy|Romance,F,25,3,49423 84 | 6040,3224,5,956716750,Woman in the Dunes (Suna no onna) (1964),Drama,M,25,6,11106 85 | 4384,2918,5,965171739,Ferris Bueller's Day Off (1986),Comedy,M,25,0,43623 86 | 5156,3688,3,961946487,Porky's (1981),Comedy,M,18,14,10024 87 | 615,296,3,975805801,Pulp Fiction (1994),Crime|Drama,M,50,17,32951 88 | 2753,3045,3,973198964,Peter's Friends (1992),Comedy|Drama,F,50,20,27516 89 | 2438,1125,5,974259943,"Return of the Pink Panther, The (1974)",Comedy,M,35,1,22903 90 | 5746,1242,4,958354460,Glory (1989),Action|Drama|War,M,18,15,94061 91 | 5157,3462,5,961944604,Modern Times (1936),Comedy,M,35,1,74012 92 | 3402,1252,5,967433929,Chinatown (1974),Film-Noir|Mystery|Thriller,M,35,20,30306 93 | 76,593,5,977847255,"Silence of the Lambs, The (1991)",Drama|Thriller,M,35,7,55413 94 | 2067,1019,3,974658834,"20,000 Leagues Under the Sea (1954)",Adventure|Children's|Fantasy|Sci-Fi,M,50,16,06430 95 | 2181,2020,3,979353437,Dangerous Liaisons (1988),Drama|Romance,M,25,0,45245 96 | 3947,593,5,965691680,"Silence of the Lambs, The (1991)",Drama|Thriller,M,25,0,90019 97 | 546,218,4,976069421,Boys on the Side (1995),Comedy|Drama,F,25,0,37211 98 | 1246,3030,5,1032056405,Yojimbo (1961),Comedy|Drama|Western,M,18,4,98225 99 | 4214,3186,5,965319143,"Girl, Interrupted (1999)",Drama,F,25,0,20121 100 | 2841,680,3,982805796,Alphaville (1965),Sci-Fi,M,50,12,98056 101 | 4205,3175,4,965321085,Galaxy Quest (1999),Adventure|Comedy|Sci-Fi,F,25,15,87801 102 | 1120,1097,4,974911354,E.T. the Extra-Terrestrial (1982),Children's|Drama|Fantasy|Sci-Fi,M,18,4,95616 103 | 5371,3194,3,960481000,"Way We Were, The (1973)",Drama,M,25,11,55408 104 | 2695,1278,5,973310827,Young Frankenstein (1974),Comedy|Horror,M,35,11,46033 105 | 3312,520,2,976673070,Robin Hood: Men in Tights (1993),Comedy,F,18,4,90039 106 | 5039,1792,1,962513044,U.S. Marshalls (1998),Action|Thriller,F,35,4,97068 107 | 4655,2146,3,963903103,St. Elmo's Fire (1985),Drama|Romance,F,25,1,92037 108 | 3558,1580,5,966802528,Men in Black (1997),Action|Adventure|Comedy|Sci-Fi,M,18,17,66044 109 | 506,3354,1,976208080,Mission to Mars (2000),Sci-Fi,M,25,16,55103-1006 110 | 3568,1230,3,966745594,Annie Hall (1977),Comedy|Romance,M,25,0,98503 111 | 2943,1197,5,971319983,"Princess Bride, The (1987)",Action|Adventure|Comedy|Romance,M,35,12,95864 112 | 716,737,3,982881364,Barb Wire (1996),Action|Sci-Fi,M,18,4,98188 113 | 5964,454,3,956999469,"Firm, The (1993)",Drama|Thriller,M,18,5,97202 114 | 4802,1208,4,996034747,Apocalypse Now (1979),Drama|War,M,56,1,40601 115 | 1106,3624,4,974920622,Shanghai Noon (2000),Action,M,18,4,90241 116 | 3410,2565,3,967419652,"King and I, The (1956)",Musical,M,35,1,20653 117 | 1273,3095,5,974814536,"Grapes of Wrath, The (1940)",Drama,M,35,2,19123 118 | 1706,1916,4,974709448,Buffalo 66 (1998),Action|Comedy|Drama,M,25,20,19134 119 | 4889,590,5,962909224,Dances with Wolves (1990),Adventure|Drama|Western,M,18,4,63108 120 | 4966,2100,3,962609782,Splash (1984),Comedy|Fantasy|Romance,M,50,14,55407 121 | 4238,1884,4,965343416,Fear and Loathing in Las Vegas (1998),Comedy|Drama,M,35,16,44691 122 | 5365,1042,3,960502974,That Thing You Do! (1996),Comedy,M,18,12,90250 123 | 415,1302,3,977501743,Field of Dreams (1989),Drama,F,35,0,55406 124 | 4658,1009,5,963966553,Escape to Witch Mountain (1975),Adventure|Children's|Fantasy,M,25,4,99163 125 | 854,345,3,975357801,"Adventures of Priscilla, Queen of the Desert, The (1994)",Comedy|Drama,F,25,16,44092 126 | 2857,436,4,972509362,Color of Night (1994),Drama|Thriller,M,25,0,10469 127 | 1835,1330,4,974878241,April Fool's Day (1986),Comedy|Horror,M,25,19,11501 128 | 1321,2240,3,974778494,My Bodyguard (1980),Drama,F,25,14,34639 129 | 3274,3698,2,979767184,"Running Man, The (1987)",Action|Adventure|Sci-Fi,M,25,20,02062 130 | 5893,2144,3,957470619,Sixteen Candles (1984),Comedy,M,25,7,02139 131 | 3436,2724,3,967328026,Runaway Bride (1999),Comedy|Romance,M,35,0,98503 132 | 3315,2918,5,967942960,Ferris Bueller's Day Off (1986),Comedy,M,25,12,78731 133 | 5056,2700,5,962488280,"South Park: Bigger, Longer and Uncut (1999)",Animation|Comedy,M,45,1,16673 134 | 5256,208,2,961271616,Waterworld (1995),Action|Adventure,M,25,16,30269 135 | 4290,1193,4,965274348,One Flew Over the Cuckoo's Nest (1975),Drama,M,25,17,98661 136 | 1010,1379,2,975220259,Young Guns II (1990),Action|Comedy|Western,M,25,0,10310 137 | 829,904,4,975368038,Rear Window (1954),Mystery|Thriller,M,1,19,53711 138 | 5953,480,4,957143581,Jurassic Park (1993),Action|Adventure|Sci-Fi,M,1,10,21030 139 | 4732,3016,4,963332896,Creepshow (1982),Horror,M,25,14,24450 140 | 4815,3181,5,972240802,Titus (1999),Drama,F,50,18,04849 141 | 1164,1894,2,1004486985,Six Days Seven Nights (1998),Adventure|Comedy|Romance,F,25,19,90020 142 | 4373,3167,5,965180829,Carnal Knowledge (1971),Drama,M,50,12,32920 143 | 5293,1374,4,961055887,Star Trek: The Wrath of Khan (1982),Action|Adventure|Sci-Fi,M,25,12,95030 144 | 1579,3101,4,981272057,Fatal Attraction (1987),Thriller,M,25,0,60201 145 | 2600,3147,5,973804787,"Green Mile, The (1999)",Drama|Thriller,M,25,14,19312 146 | 1283,480,4,974793389,Jurassic Park (1993),Action|Adventure|Sci-Fi,F,18,1,94607 147 | 3242,3062,5,968341175,"Longest Day, The (1962)",Action|Drama|War,M,50,13,94089 148 | 3618,3374,3,967116272,Daughters of the Dust (1992),Drama,M,56,17,22657 149 | 3762,1337,4,966434517,"Body Snatcher, The (1945)",Horror,M,50,6,11746 150 | 1015,1184,3,975018699,Mediterraneo (1991),Comedy|War,M,35,3,11220 151 | 4645,2344,5,963976808,Runaway Train (1985),Action|Adventure|Drama|Thriller,F,50,6,48094 152 | 3184,1397,4,968709039,Bastard Out of Carolina (1996),Drama,F,25,18,21214 153 | 1285,1794,4,974833328,Love and Death on Long Island (1997),Comedy|Drama,M,35,4,98125 154 | 5521,3354,2,959833154,Mission to Mars (2000),Sci-Fi,F,25,6,02118 155 | 1472,2278,3,974767792,Ronin (1998),Action|Crime|Thriller,M,25,7,90248 156 | 5630,21,4,980085414,Get Shorty (1995),Action|Comedy|Drama,M,35,17,06854 157 | 3710,3033,5,966272980,Spaceballs (1987),Comedy|Sci-Fi,M,1,10,02818 158 | 192,761,1,977028390,"Phantom, The (1996)",Adventure,M,18,1,10977 159 | 1285,1198,5,974880310,Raiders of the Lost Ark (1981),Action|Adventure,M,35,4,98125 160 | 2174,1046,4,974613044,Beautiful Thing (1996),Drama|Romance,M,50,12,87505 161 | 635,1270,4,975768106,Back to the Future (1985),Comedy|Sci-Fi,M,56,17,33785 162 | 910,412,5,975207742,"Age of Innocence, The (1993)",Drama,F,50,0,98226 163 | 1752,2021,4,975729332,Dune (1984),Fantasy|Sci-Fi,M,25,3,96813 164 | 1408,198,4,974762924,Strange Days (1995),Action|Crime|Sci-Fi,M,25,0,90046 165 | 4738,1242,4,963279051,Glory (1989),Action|Drama|War,M,56,1,23608 166 | 1503,1971,2,974748897,"Nightmare on Elm Street 4: The Dream Master, A (1988)",Horror,M,25,12,92688 167 | 3053,1296,3,970601837,"Room with a View, A (1986)",Drama|Romance,F,25,3,55102 168 | 3471,3614,2,973297828,Honeymoon in Vegas (1992),Comedy|Romance,M,18,4,80302 169 | 678,1972,3,988638700,"Nightmare on Elm Street 5: The Dream Child, A (1989)",Horror,M,25,0,34952 170 | 3483,2561,3,986327282,True Crime (1999),Crime|Thriller,F,45,7,30260 171 | 3910,3108,5,965756244,"Fisher King, The (1991)",Comedy|Drama|Romance,M,25,20,91505 172 | 182,1089,1,977085647,Reservoir Dogs (1992),Crime|Thriller,M,18,4,03052 173 | 1755,1653,3,1036917836,Gattaca (1997),Drama|Sci-Fi|Thriller,F,18,4,77005 174 | 3589,70,2,966658567,From Dusk Till Dawn (1996),Action|Comedy|Crime|Horror|Thriller,F,45,0,80010 175 | 471,3481,4,976222483,High Fidelity (2000),Comedy,M,35,7,08904 176 | 1141,813,2,974878678,Larger Than Life (1996),Comedy,F,25,3,84770 177 | 5227,1196,2,961476022,Star Wars: Episode V - The Empire Strikes Back (1980),Action|Adventure|Drama|Sci-Fi|War,M,18,10,64050 178 | 1303,2344,2,974837844,Runaway Train (1985),Action|Adventure|Drama|Thriller,M,25,19,94111 179 | 5080,3102,5,962412804,Jagged Edge (1985),Thriller,F,50,12,95472 180 | 2023,1012,4,1006290836,Old Yeller (1957),Children's|Drama,M,18,4,56001 181 | 3759,2151,5,966094413,"Gods Must Be Crazy II, The (1989)",Comedy,M,35,6,54751 182 | 1685,2664,2,974709721,Invasion of the Body Snatchers (1956),Horror|Sci-Fi,M,35,12,95833 183 | 4715,1221,4,963508830,"Godfather: Part II, The (1974)",Action|Crime|Drama,M,25,2,97205 184 | 1591,350,5,974742941,"Client, The (1994)",Drama|Mystery|Thriller,M,50,7,26501 185 | 4227,3635,3,965411938,"Spy Who Loved Me, The (1977)",Action,M,25,19,11414-2520 186 | 1908,36,5,974697744,Dead Man Walking (1995),Drama,M,56,13,95129 187 | 5365,1892,4,960503255,"Perfect Murder, A (1998)",Mystery|Thriller,M,18,12,90250 188 | 1579,2420,4,981272235,"Karate Kid, The (1984)",Drama,M,25,0,60201 189 | 1866,3948,5,974753321,Meet the Parents (2000),Comedy,M,25,7,94043 190 | 4238,3543,4,965415533,Diner (1982),Comedy|Drama,M,35,16,44691 191 | 3590,2000,5,966657892,Lethal Weapon (1987),Action|Comedy|Crime|Drama,F,18,15,02115 192 | 3401,3256,5,980115327,Patriot Games (1992),Action|Thriller,M,35,7,76109 193 | 3705,540,2,966287116,Sliver (1993),Thriller,M,45,7,30076 194 | 4973,1246,3,962607149,Dead Poets Society (1989),Drama,F,56,2,949702 195 | 4947,380,4,962651180,True Lies (1994),Action|Adventure|Comedy|Romance,M,35,17,90035 196 | 2346,1416,4,974413811,Evita (1996),Drama|Musical,F,1,10,48105 197 | 1427,3596,3,974840560,Screwed (2000),Comedy,M,25,12,21401 198 | 3868,1626,3,965855033,Fire Down Below (1997),Action|Drama|Thriller,M,18,12,73112 199 | 249,2369,3,976730191,Desperately Seeking Susan (1985),Comedy|Romance,F,18,14,48126 200 | 5720,349,4,958503395,Clear and Present Danger (1994),Action|Adventure|Thriller,M,25,0,60610 201 | 877,1485,3,975270899,Liar Liar (1997),Comedy,M,25,0,90631 202 | --------------------------------------------------------------------------------