├── .gitignore ├── LICENSE ├── README.md ├── docs ├── Makefile ├── conf.py ├── index.rst ├── modules │ ├── module.png │ ├── openrec.legacy.modules.extractions.rst │ ├── openrec.legacy.modules.fusions.rst │ ├── openrec.legacy.modules.interactions.rst │ ├── openrec.legacy.modules.module.rst │ └── openrec.legacy.modules.rst ├── openrec.legacy.implicit_model_trainer.rst ├── recommenders │ ├── openrec.legacy.recommenders.bpr.rst │ ├── openrec.legacy.recommenders.cdl.rst │ ├── openrec.legacy.recommenders.cml.rst │ ├── openrec.legacy.recommenders.concat_visual_bpr.rst │ ├── openrec.legacy.recommenders.pmf.rst │ ├── openrec.legacy.recommenders.recommender.rst │ ├── openrec.legacy.recommenders.user_pmf.rst │ ├── openrec.legacy.recommenders.user_visual_pmf.rst │ ├── openrec.legacy.recommenders.visual_bpr.rst │ ├── openrec.legacy.recommenders.visual_cml.rst │ ├── openrec.legacy.recommenders.visual_gmf.rst │ ├── openrec.legacy.recommenders.visual_pmf.rst │ ├── recommender.png │ └── recommenders.rst └── utils │ ├── openrec.legacy.utils.dataset.rst │ ├── openrec.legacy.utils.evaluators.rst │ ├── openrec.legacy.utils.implicit_dataset.rst │ ├── openrec.legacy.utils.samplers.rst │ └── utils.rst ├── docs_requirements.txt ├── openrec ├── __init__.py ├── tf1 │ ├── __init__.py │ ├── fast_dot_product_server.py │ ├── legacy │ │ ├── __init__.py │ │ ├── implicit_model_trainer.py │ │ ├── itr_mlp_model_trainer.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── extractions │ │ │ │ ├── __init__.py │ │ │ │ ├── extraction.py │ │ │ │ ├── identity_mapping.py │ │ │ │ ├── latent_factor.py │ │ │ │ ├── look_up.py │ │ │ │ ├── multi_layer_fc.py │ │ │ │ ├── sdae.py │ │ │ │ └── temporal_latent_factor.py │ │ │ ├── fusions │ │ │ │ ├── __init__.py │ │ │ │ ├── average.py │ │ │ │ ├── concat.py │ │ │ │ └── fusion.py │ │ │ ├── interactions │ │ │ │ ├── __init__.py │ │ │ │ ├── interaction.py │ │ │ │ ├── ns_eu_dist.py │ │ │ │ ├── ns_log.py │ │ │ │ ├── pairwise_eu_dist.py │ │ │ │ ├── pairwise_hinge.py │ │ │ │ ├── pairwise_log.py │ │ │ │ ├── pointwise_ge_ce.py │ │ │ │ ├── pointwise_ge_mlp_ce.py │ │ │ │ ├── pointwise_mlp_ce.py │ │ │ │ └── pointwise_mse.py │ │ │ └── module.py │ │ ├── recommenders │ │ │ ├── __init__.py │ │ │ ├── bpr.py │ │ │ ├── cdl.py │ │ │ ├── cml.py │ │ │ ├── concat_visual_bpr.py │ │ │ ├── gmf.py │ │ │ ├── itr_mlp.py │ │ │ ├── nbpr.py │ │ │ ├── pmf.py │ │ │ ├── recommender.py │ │ │ ├── user_pmf.py │ │ │ ├── user_visual_pmf.py │ │ │ ├── visual_bpr.py │ │ │ ├── visual_cml.py │ │ │ ├── visual_gmf.py │ │ │ ├── visual_pmf.py │ │ │ └── wcml.py │ │ ├── tests │ │ │ └── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ ├── evaluators │ │ │ ├── __init__.py │ │ │ ├── auc.py │ │ │ ├── evaluator.py │ │ │ ├── implicit_eval_manager.py │ │ │ ├── mse.py │ │ │ ├── ndcg.py │ │ │ ├── precision.py │ │ │ └── recall.py │ │ │ ├── implicit_dataset.py │ │ │ └── samplers │ │ │ ├── __init__.py │ │ │ ├── explicit_sampler.py │ │ │ ├── n_pairwise_sampler.py │ │ │ ├── pairwise_sampler.py │ │ │ ├── pointwise_sampler.py │ │ │ └── sampler.py │ ├── model_trainer.py │ ├── modules │ │ ├── __init__.py │ │ ├── extractions │ │ │ ├── __init__.py │ │ │ ├── latent_factor.py │ │ │ └── multi_layer_fc.py │ │ ├── fusions │ │ │ └── __init__.py │ │ └── interactions │ │ │ ├── __init__.py │ │ │ ├── mlp_softmax.py │ │ │ ├── pairwise_eu_dist.py │ │ │ ├── pairwise_log.py │ │ │ ├── pointwise_mlp_ce.py │ │ │ ├── pointwise_mse.py │ │ │ └── rnn_softmax.py │ ├── recommenders │ │ ├── __init__.py │ │ ├── bpr.py │ │ ├── pmf.py │ │ ├── recommender.py │ │ ├── rnn_rec.py │ │ ├── ucml.py │ │ ├── vanilla_youtube_rec.py │ │ ├── vbpr.py │ │ └── youtube_rec.py │ └── utils │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── evaluators │ │ ├── __init__.py │ │ ├── auc.py │ │ ├── eval_manager.py │ │ ├── evaluator.py │ │ ├── mse.py │ │ ├── ndcg.py │ │ ├── precision.py │ │ └── recall.py │ │ └── samplers │ │ ├── __init__.py │ │ ├── evaluation_sampler.py │ │ ├── random_pairwise_sampler.py │ │ ├── random_pointwise_sampler.py │ │ ├── sampler.py │ │ ├── stratified_pointwise_sampler.py │ │ ├── temporal_evaluation_sampler.py │ │ ├── temporal_sampler.py │ │ ├── vbpr_evaluation_sampler.py │ │ ├── vbpr_pairwise_sampler.py │ │ ├── youtube_evaluation_sampler.py │ │ └── youtube_sampler.py └── tf2 │ ├── __init__.py │ ├── data │ ├── __init__.py │ ├── dataset.py │ └── utils.py │ ├── metrics │ ├── __init__.py │ ├── dict_mean.py │ └── ranking_metrics.py │ ├── modules │ ├── __init__.py │ ├── latent_factor.py │ ├── multi_layer_perceptron.py │ ├── pairwise_log_loss.py │ ├── pointwise_mse_loss.py │ └── second_order_feature_interaction.py │ └── recommenders │ ├── __init__.py │ ├── bpr.py │ ├── dlrm.py │ ├── gmf.py │ ├── ucml.py │ └── wrmf.py ├── setup.cfg ├── setup.py ├── tf1_examples ├── bpr_citeulike.py ├── dataloader.py ├── legacy_examples │ ├── config.py │ ├── dataloader.py │ ├── exp_bpr.py │ ├── exp_cml.py │ ├── exp_concat_visual_bpr.py │ ├── exp_itr_mlp.py │ ├── exp_pmf.py │ ├── exp_visual_bpr.py │ ├── exp_visual_cml.py │ ├── exp_visual_gmf.py │ ├── exp_visual_pmf.py │ └── exp_wcml.py ├── notebooks │ └── OpenRec_Tutorial_1.ipynb ├── pmf_citeulike.py ├── rnn_rec_lastfm.py ├── ucml_citeulike.py ├── vanilla_youtube_rec_lastfm.py ├── vbpr_tradesy.py └── youtube_rec_lastfm.py ├── tf1_tutorials ├── Lecture.pdf ├── OpenRec_Basics_Diversity_and_Fairness.ipynb ├── PMF_example.ipynb ├── Temporal_aware_recommendation.ipynb ├── Vanilla_Youtube_Recommender_example.ipynb ├── Youtube_Recommender_example.ipynb └── legacy_tutorials │ ├── OpenRec Tutorial #1.ipynb │ └── OpenRec Tutorial #2.ipynb └── tf2_examples ├── bpr_citeulike.py ├── dataloader.py └── dlrm_criteo.py /.gitignore: -------------------------------------------------------------------------------- 1 | .* 2 | __pycache__ 3 | .idea/ 4 | .venv/ 5 | .vscode/ 6 | build 7 | *.pyc 8 | .DS_Store 9 | dataset/ 10 | /*.egg-info 11 | results/ 12 | *.bak 13 | # Setuptools distribution folder. 14 | /dist/ 15 | *build* 16 | examples/*.index 17 | examples/*.meta 18 | examples/*.data-0* 19 | examples/checkpoint 20 | examples/*.pickle 21 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = openrec 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. openrec documentation master file, created by 2 | sphinx-quickstart on Wed Nov 29 12:06:22 2017. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | OpenRec documentation 7 | =================================== 8 | 9 | Contents 10 | ======== 11 | 12 | OpenRec is an open-source and modular library for neural network-inspired recommendation algorithms 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | 17 | 18 | Modules 19 | Recommenders 20 | Utils 21 | Implicit Model Trainer 22 | 23 | 24 | 25 | Indices and tables 26 | ================== 27 | 28 | * :ref:`genindex` 29 | * :ref:`modindex` 30 | * :ref:`search` 31 | -------------------------------------------------------------------------------- /docs/modules/module.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylongqi/openrec/a00de2345844858194ef43ab6845342114a5be93/docs/modules/module.png -------------------------------------------------------------------------------- /docs/modules/openrec.legacy.modules.extractions.rst: -------------------------------------------------------------------------------- 1 | Extractions 2 | ===================================== 3 | 4 | .. automodule:: openrec.legacy.modules.extractions 5 | .. currentmodule:: openrec.legacy.modules.extractions 6 | 7 | 8 | 9 | Extraction 10 | ----------- 11 | 12 | .. autoclass:: Extraction 13 | :members: 14 | :undoc-members: 15 | :private-members: 16 | 17 | 18 | Look Up 19 | ------- 20 | 21 | .. autoclass:: LookUp 22 | :members: 23 | :undoc-members: 24 | :private-members: 25 | 26 | Identity Mapping 27 | ----------------- 28 | 29 | .. autoclass:: IdentityMapping 30 | :members: 31 | :undoc-members: 32 | :private-members: 33 | 34 | Latent Factor 35 | -------------- 36 | 37 | .. autoclass:: LatentFactor 38 | :members: 39 | :undoc-members: 40 | :private-members: 41 | 42 | Multi Layer FC 43 | -------------- 44 | 45 | .. autoclass:: MultiLayerFC 46 | :members: 47 | :undoc-members: 48 | :private-members: 49 | 50 | SDAE 51 | ------- 52 | 53 | .. autoclass:: SDAE 54 | :members: 55 | :undoc-members: 56 | :private-members: 57 | 58 | TemporalLatentFactor 59 | -------------------- 60 | 61 | .. autoclass:: TemporalLatentFactor 62 | :members: 63 | :undoc-members: 64 | :private-members: 65 | -------------------------------------------------------------------------------- /docs/modules/openrec.legacy.modules.fusions.rst: -------------------------------------------------------------------------------- 1 | Fusions 2 | ===================================== 3 | 4 | .. automodule:: openrec.legacy.modules.fusions 5 | .. currentmodule:: openrec.legacy.modules.fusions 6 | 7 | 8 | 9 | Fusion 10 | ----------- 11 | 12 | .. autoclass:: Fusion 13 | :members: 14 | :undoc-members: 15 | :private-members: 16 | 17 | 18 | Concat 19 | ----------- 20 | 21 | .. autoclass:: Concat 22 | :members: 23 | :undoc-members: 24 | :private-members: 25 | 26 | 27 | Average 28 | ----------- 29 | 30 | .. autoclass:: Average 31 | :members: 32 | :undoc-members: 33 | :private-members: 34 | 35 | -------------------------------------------------------------------------------- /docs/modules/openrec.legacy.modules.interactions.rst: -------------------------------------------------------------------------------- 1 | Interactions 2 | ===================================== 3 | 4 | .. automodule:: openrec.legacy.modules.interactions 5 | .. currentmodule:: openrec.legacy.modules.interactions 6 | 7 | 8 | 9 | Interaction 10 | ------------ 11 | 12 | .. autoclass:: Interaction 13 | :members: 14 | :undoc-members: 15 | :private-members: 16 | 17 | 18 | PairwiseLog 19 | --------------- 20 | 21 | .. autoclass:: PairwiseLog 22 | :members: 23 | :undoc-members: 24 | :private-members: 25 | 26 | 27 | PairwiseEuDist 28 | ---------------- 29 | 30 | .. autoclass:: PairwiseEuDist 31 | :members: 32 | :undoc-members: 33 | :private-members: 34 | 35 | 36 | PointwiseGeCE 37 | --------------- 38 | 39 | .. autoclass:: PointwiseGeCE 40 | :members: 41 | :undoc-members: 42 | :private-members: 43 | 44 | 45 | PointwiseGeMLPCE 46 | ------------------ 47 | 48 | .. autoclass:: PointwiseGeMLPCE 49 | :members: 50 | :undoc-members: 51 | :private-members: 52 | 53 | 54 | PointwiseMLPCE 55 | ---------------- 56 | 57 | .. autoclass:: PointwiseMLPCE 58 | :members: 59 | :undoc-members: 60 | :private-members: 61 | 62 | 63 | PointwiseMSE 64 | ------------ 65 | 66 | .. autoclass:: PointwiseMSE 67 | :members: 68 | :undoc-members: 69 | :private-members: 70 | 71 | -------------------------------------------------------------------------------- /docs/modules/openrec.legacy.modules.module.rst: -------------------------------------------------------------------------------- 1 | Module 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.modules 6 | .. currentmodule:: openrec.legacy.modules 7 | 8 | .. autoclass:: Module 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | 13 | -------------------------------------------------------------------------------- /docs/modules/openrec.legacy.modules.rst: -------------------------------------------------------------------------------- 1 | modules package 2 | ======================== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 5 7 | 8 | Module 9 | Extractions 10 | Fusions 11 | Interactions 12 | 13 | -------------------------------------------------------------------------------- /docs/openrec.legacy.implicit_model_trainer.rst: -------------------------------------------------------------------------------- 1 | implicit\_model\_trainer module 2 | =============================== 3 | 4 | .. automodule:: openrec.legacy.implicit_model_trainer 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.bpr.rst: -------------------------------------------------------------------------------- 1 | BPR 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: BPR 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | 13 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.cdl.rst: -------------------------------------------------------------------------------- 1 | CDL 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: CDL 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.cml.rst: -------------------------------------------------------------------------------- 1 | CML 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: CML 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.concat_visual_bpr.rst: -------------------------------------------------------------------------------- 1 | ConcatVisualBPR 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: ConcatVisualBPR 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | 13 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.pmf.rst: -------------------------------------------------------------------------------- 1 | PMF 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: PMF 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.recommender.rst: -------------------------------------------------------------------------------- 1 | Recommender 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: Recommender 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.user_pmf.rst: -------------------------------------------------------------------------------- 1 | User PMF 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: UserPMF 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | .. user\_pmf module 22 | .. ======================================= 23 | .. automodule openrec.legacy.recommenders.user_pmf 24 | .. :members: 25 | .. :undoc-members: 26 | .. :show-inheritance: 27 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.user_visual_pmf.rst: -------------------------------------------------------------------------------- 1 | User Visual PMF 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: UserVisualPMF 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.visual_bpr.rst: -------------------------------------------------------------------------------- 1 | VisualBPR 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: VisualBPR 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.visual_cml.rst: -------------------------------------------------------------------------------- 1 | Visual CML 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: VisualCML 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.visual_gmf.rst: -------------------------------------------------------------------------------- 1 | VisualGMF 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: VisualGMF 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/openrec.legacy.recommenders.visual_pmf.rst: -------------------------------------------------------------------------------- 1 | VisualPMF 2 | ================================= 3 | 4 | 5 | .. automodule:: openrec.legacy.recommenders 6 | .. currentmodule:: openrec.legacy.recommenders 7 | 8 | .. autoclass:: VisualPMF 9 | :members: 10 | :undoc-members: 11 | :private-members: 12 | -------------------------------------------------------------------------------- /docs/recommenders/recommender.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylongqi/openrec/a00de2345844858194ef43ab6845342114a5be93/docs/recommenders/recommender.png -------------------------------------------------------------------------------- /docs/recommenders/recommenders.rst: -------------------------------------------------------------------------------- 1 | openrec\.legacy\.recommenders package 2 | ===================================== 3 | 4 | 5 | .. toctree:: 6 | 7 | Recommender 8 | BPR 9 | Visual BPR 10 | CDL 11 | CML 12 | Visual CML 13 | Concat Visual BPR 14 | PMF 15 | User PMF 16 | Visual PMF 17 | User Visual PMF 18 | Visual GMF 19 | -------------------------------------------------------------------------------- /docs/utils/openrec.legacy.utils.dataset.rst: -------------------------------------------------------------------------------- 1 | openrec\.legacy\.utils\.dataset module 2 | ======================================= 3 | 4 | .. automodule:: openrec.legacy.utils.dataset 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/utils/openrec.legacy.utils.evaluators.rst: -------------------------------------------------------------------------------- 1 | Evaluators package 2 | ================================== 3 | 4 | .. automodule:: openrec.legacy.utils.evaluators 5 | .. currentmodule:: openrec.legacy.utils.evaluators 6 | 7 | Evaluator 8 | --------- 9 | 10 | .. autoclass:: Evaluator 11 | :members: 12 | :undoc-members: 13 | :private-members: 14 | 15 | 16 | Implicit Eval Manager 17 | --------------------- 18 | 19 | .. autoclass:: ImplicitEvalManager 20 | :members: 21 | :undoc-members: 22 | :private-members: 23 | 24 | 25 | AUC 26 | ---- 27 | 28 | .. autoclass:: AUC 29 | :members: 30 | :undoc-members: 31 | :private-members: 32 | 33 | 34 | Recall 35 | ------- 36 | 37 | .. autoclass:: Recall 38 | :members: 39 | :undoc-members: 40 | :private-members: 41 | 42 | MSE 43 | ------- 44 | 45 | .. autoclass:: MSE 46 | :members: 47 | :undoc-members: 48 | :private-members: 49 | 50 | NDCG 51 | ----- 52 | 53 | .. autoclass:: NDCG 54 | :members: 55 | :undoc-members: 56 | :private-members: 57 | 58 | Precision 59 | ---------- 60 | 61 | .. autoclass:: Precision 62 | :members: 63 | :undoc-members: 64 | :private-members: 65 | -------------------------------------------------------------------------------- /docs/utils/openrec.legacy.utils.implicit_dataset.rst: -------------------------------------------------------------------------------- 1 | openrec\.legacy\.utils\.implicit\_dataset module 2 | ================================================= 3 | 4 | .. automodule:: openrec.legacy.utils.implicit_dataset 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/utils/openrec.legacy.utils.samplers.rst: -------------------------------------------------------------------------------- 1 | Samplers 2 | ================================ 3 | 4 | .. automodule:: openrec.legacy.utils.samplers 5 | .. currentmodule:: openrec.legacy.utils.samplers 6 | 7 | Sampler 8 | ---------------- 9 | 10 | .. autoclass:: Sampler 11 | :members: 12 | :undoc-members: 13 | :private-members: 14 | 15 | Explicit Sampler 16 | ---------------- 17 | 18 | .. autoclass:: ExplicitSampler 19 | :members: 20 | :undoc-members: 21 | :private-members: 22 | 23 | Pairwise Sampler 24 | ---------------- 25 | 26 | .. autoclass:: PairwiseSampler 27 | :members: 28 | :undoc-members: 29 | :private-members: 30 | 31 | Pointwise Sampler 32 | ----------------- 33 | 34 | .. autoclass:: PointwiseSampler 35 | :members: 36 | :undoc-members: 37 | :private-members: 38 | -------------------------------------------------------------------------------- /docs/utils/utils.rst: -------------------------------------------------------------------------------- 1 | utils package 2 | ====================== 3 | 4 | 5 | .. toctree:: 6 | :maxdepth: 2 7 | 8 | Evaluators 9 | Samplers 10 | Dataset 11 | ImplicitDataset 12 | -------------------------------------------------------------------------------- /docs_requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | tensorflow==2.0.1 3 | tqdm 4 | termcolor 5 | -------------------------------------------------------------------------------- /openrec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylongqi/openrec/a00de2345844858194ef43ab6845342114a5be93/openrec/__init__.py -------------------------------------------------------------------------------- /openrec/tf1/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.model_trainer import ModelTrainer 2 | from openrec.tf1.fast_dot_product_server import FastDotProductServer 3 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.implicit_model_trainer import ImplicitModelTrainer 2 | from openrec.tf1.legacy.itr_mlp_model_trainer import ItrMLPModelTrainer 3 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/itr_mlp_model_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from tqdm import tqdm 3 | import math 4 | from termcolor import colored 5 | import numpy as np 6 | import sys 7 | 8 | class ItrMLPModelTrainer(object): 9 | 10 | def __init__(self, batch_size, test_batch_size, train_dataset, model, sampler): 11 | 12 | self._batch_size = batch_size 13 | self._test_batch_size = test_batch_size 14 | 15 | self._train_dataset = train_dataset 16 | self._max_item = self._train_dataset.max_item() 17 | 18 | self._model = model 19 | self._sampler = sampler 20 | 21 | def train(self, num_itr, display_itr, update_itr, eval_datasets=[], evaluators=[]): 22 | 23 | acc_loss = 0 24 | 25 | for itr in range(num_itr): 26 | batch_data = self._sampler.next_batch() 27 | loss = self._model.train(batch_data) 28 | acc_loss += loss 29 | if itr % display_itr == 0 and itr > 0: 30 | print(colored('[Itr %d]' % itr, 'red'), 'loss: %f' % (acc_loss/display_itr), 31 | 'mse: %f' % (acc_loss * 2 / (display_itr * self._batch_size))) 32 | for dataset in eval_datasets: 33 | print(colored('..(dataset: %s) evaluation' % dataset.name, 'green')) 34 | sys.stdout.flush() 35 | eval_results = self._evaluate(eval_dataset=dataset, evaluators=evaluators) 36 | for key, result in eval_results.items(): 37 | average_result = np.mean(result, axis=0) 38 | if type(average_result) is np.ndarray: 39 | print(colored('..(dataset: %s)' % dataset.name, 'green'), 40 | key, ' '.join([str(s) for s in average_result])) 41 | else: 42 | print(colored('..(dataset: %s)' % dataset.name, 'green'), \ 43 | key, average_result) 44 | acc_loss = 0 45 | 46 | if itr % update_itr == 0 and itr > 0: 47 | self._model.update_embeddings() 48 | 49 | 50 | def _evaluate(self, eval_dataset, evaluators): 51 | 52 | metric_results = {} 53 | for evaluator in evaluators: 54 | metric_results[evaluator.name] = [] 55 | 56 | num_entries = len(eval_dataset.data) 57 | batch_data = {'user_id_input': np.zeros(self._test_batch_size, np.int32), 58 | 'item_id_input': np.zeros(self._test_batch_size, np.int32)} 59 | 60 | for ind in tqdm(range(int(num_entries / self._test_batch_size))): 61 | entries = eval_dataset.data[ind*self._test_batch_size:(ind+1)*self._test_batch_size] 62 | user = entries['user_id'] 63 | item = entries['item_id'] 64 | labels = entries['label'] 65 | 66 | batch_data['user_id_input'][:len(user)] = user 67 | batch_data['item_id_input'][:len(item)] = item 68 | 69 | for evaluator in evaluators: 70 | results = evaluator.compute(predictions=self._model.serve(batch_data)[:len(user)], 71 | labels=labels) 72 | metric_results[evaluator.name].append(results[:len(user)]) 73 | 74 | for evaluator in evaluators: 75 | metric_results[evaluator.name] = np.concatenate(metric_results[evaluator.name]) 76 | 77 | return metric_results -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules.module import Module 2 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/extractions/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules.extractions.extraction import Extraction 2 | from openrec.tf1.legacy.modules.extractions.identity_mapping import IdentityMapping 3 | from openrec.tf1.legacy.modules.extractions.latent_factor import LatentFactor 4 | from openrec.tf1.legacy.modules.extractions.look_up import LookUp 5 | from openrec.tf1.legacy.modules.extractions.multi_layer_fc import MultiLayerFC 6 | from openrec.tf1.legacy.modules.extractions.sdae import SDAE 7 | from openrec.tf1.legacy.modules.extractions.temporal_latent_factor import TemporalLatentFactor 8 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/extractions/extraction.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules import Module 2 | 3 | class Extraction(Module): 4 | 5 | """ 6 | A direct inheritance of the Module. 7 | """ 8 | 9 | def __init__(self, train=True, l2_reg=None, scope=None, reuse=False): 10 | 11 | super(Extraction, self).__init__(train=train, l2_reg=l2_reg, scope=scope, reuse=reuse) -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/extractions/identity_mapping.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.legacy.modules.extractions import Extraction 3 | 4 | class IdentityMapping(Extraction): 5 | 6 | """ 7 | The IdentityMapping module executes an identity function. 8 | 9 | Parameters 10 | ---------- 11 | value: Tensorflow tensor 12 | Input tensor 13 | scope: str, optional 14 | Scope for module variables. 15 | reuse: bool, optional 16 | Whether or not to reuse module variables. 17 | """ 18 | 19 | def __init__(self, value, scope=None, reuse=False): 20 | 21 | assert value is not None, 'value cannot be None' 22 | self._value = value 23 | super(IdentityMapping, self).__init__(l2_reg=None, scope=scope, reuse=False) 24 | 25 | def _build_shared_graph(self): 26 | 27 | with tf.variable_scope(self._scope, reuse=self._reuse): 28 | self._outputs.append(self._value) 29 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/extractions/latent_factor.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.legacy.modules.extractions import Extraction 3 | 4 | class LatentFactor(Extraction): 5 | 6 | """ 7 | The LatentFactor module maps (embeds) input ids into latent representations. The module \ 8 | outputs a tensor with shape **shape(ids) + [embedding dimensionality]**. 9 | 10 | Parameters 11 | ---------- 12 | shape: list 13 | Shape of the embedding matrix, i.e. [number of unique ids, embedding dimensionality]. 14 | init: str, optional 15 | Embedding initialization. *'zero'* or *'normal'* (default). 16 | ids: Tensorflow tensor, optionl 17 | List of ids to retrieve embeddings. If *None*, the whole embedding matrix is returned. 18 | l2_reg: float, optional 19 | Weight for L2 regularization, i.e., weight decay. 20 | scope: str, optional 21 | Scope for module variables. 22 | reuse: bool, optional 23 | Whether or not to reuse module variables. 24 | """ 25 | 26 | def __init__(self, shape, init='normal', ids=None, l2_reg=None, scope=None, reuse=False): 27 | 28 | assert shape is not None, 'shape cannot be None' 29 | 30 | if init == 'normal': 31 | self._initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01, dtype=tf.float32) 32 | elif init == 'zero': 33 | self._initializer = tf.constant_initializer(value=0.0, dtype=tf.float32) 34 | self._shape = shape 35 | self._ids = ids 36 | 37 | super(LatentFactor, self).__init__(l2_reg=l2_reg, scope=scope, reuse=reuse) 38 | 39 | def _build_shared_graph(self): 40 | 41 | with tf.variable_scope(self._scope, reuse=self._reuse): 42 | 43 | self._embedding = tf.get_variable('embedding', shape=self._shape, trainable=True, 44 | initializer=self._initializer) 45 | 46 | if self._ids is not None: 47 | self._outputs.append(tf.nn.embedding_lookup(self._embedding, self._ids)) 48 | 49 | if self._l2_reg is not None: 50 | self._loss = self._l2_reg * tf.nn.l2_loss(self._outputs[0]) 51 | 52 | else: 53 | self._outputs.append(self._embedding) 54 | 55 | def censor_l2_norm_op(self, censor_id_list=None, max_norm=1): 56 | 57 | """Limit the norm of embeddings. 58 | 59 | Parameters 60 | ---------- 61 | censor_id_list: list or Tensorflow tensor 62 | list of embeddings to censor (indexed by ids). 63 | max_norm: float, optional 64 | Maximum norm. 65 | 66 | Returns 67 | ------- 68 | Tensorflow operator 69 | An operator for post-training execution. 70 | """ 71 | 72 | 73 | embedding_gather = tf.gather(self._embedding, indices=censor_id_list) 74 | norm = tf.sqrt(tf.reduce_sum(tf.square(embedding_gather), axis=1, keep_dims=True)) 75 | return tf.scatter_update(self._embedding, indices=censor_id_list, updates=embedding_gather / tf.maximum(norm, max_norm)) 76 | 77 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/extractions/look_up.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from termcolor import colored 4 | from openrec.tf1.legacy.modules.extractions import Extraction 5 | 6 | class LookUp(Extraction): 7 | 8 | """ 9 | The LookUp module maps (embeds) input ids into *fixed* representations. The representations are \ 10 | not be updated during training. The module outputs a tensor with shape \ 11 | **shape(ids) + [embedding dimensionality]**. 12 | 13 | Parameters 14 | ---------- 15 | embed: numpy array 16 | Fixed embedding matrix. 17 | ids: Tensorflow tensor, optional 18 | List of ids to retrieve embeddings. If *None*, the whole embedding matrix is returned. 19 | scope: str, optional 20 | Scope for module variables. 21 | reuse: bool, optional 22 | Whether or not to reuse module variables. 23 | """ 24 | 25 | def __init__(self, embed, ids=None, scope=None, reuse=False): 26 | 27 | assert embed is not None, 'embed cannot be None' 28 | self._embed = embed 29 | self._ids = ids 30 | super(LookUp, self).__init__(scope=scope, reuse=reuse) 31 | 32 | def _build_shared_graph(self): 33 | 34 | with tf.variable_scope(self._scope, reuse=self._reuse): 35 | 36 | self.embedding = tf.Variable(self._embed, trainable=False, name='embedding',dtype=tf.float32) 37 | if self._ids is not None: 38 | self._outputs.append(tf.nn.embedding_lookup(self.embedding, self._ids)) 39 | else: 40 | self._outputs.append(self.embedding) 41 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/extractions/sdae.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from termcolor import colored 4 | from openrec.tf1.legacy.modules.extractions import Extraction 5 | from openrec.tf1.legacy.modules.extractions import MultiLayerFC 6 | 7 | class SDAE(Extraction): 8 | 9 | """ 10 | The SDAE module implements Stacked Denoising Autoencoders [bn]_. It outputs SDAE's bottleneck representations \ 11 | (i.e., the encoder outputs). 12 | 13 | Parameters 14 | ---------- 15 | in_tensor: Tensorflow tensor 16 | An input tensor with shape **[*, feature dimensionality]** 17 | dims: list 18 | Specify the *feature size* of each **encoding layer**'s outputs. For example, setting **dims=[512, 258, 128]** to create \ 19 | an three-layer encoder with output shape **[*, 512]**, **[*, 256]**, and **[*, 128]**, and a two-layer decoder with \ 20 | output shape **[*, 256]** and **[*, 512]**. 21 | dropout: float, optional 22 | Dropout rate for the input tensor. If *None*, no dropout is used for the input tensor. 23 | l2_reconst: float, optional 24 | Weight for reconstruction loss. 25 | train: bool, optionl 26 | An indicator for training or servining phase. 27 | l2_reg: float, optional 28 | Weight for L2 regularization, i.e., weight decay. 29 | scope: str, optional 30 | Scope for module variables. 31 | reuse: bool, optional 32 | Whether or not to reuse module variables. 33 | 34 | References 35 | ---------- 36 | .. [bn] Vincent, P., Larochelle, H., Lajoie, I., Bengio, Y. and Manzagol, P.A., 2010. Stacked denoising autoencoders: \ 37 | Learning useful representations in a deep network with a local denoising criterion. Journal of Machine Learning \ 38 | Research, 11(Dec), pp.3371-3408. 39 | """ 40 | 41 | def __init__(self, in_tensor, dims, dropout=None, 42 | l2_reconst=1.0, train=True, l2_reg=None, scope=None, reuse=False): 43 | 44 | assert dims is not None, 'dims cannot be None' 45 | assert in_tensor is not None, 'in_tensor cannot be None' 46 | 47 | self._in_tensor = in_tensor 48 | self._dims = dims 49 | self._dropout = dropout 50 | self._l2_reconst = l2_reconst 51 | 52 | super(SDAE, self).__init__(train=train, l2_reg=l2_reg, scope=scope, reuse=reuse) 53 | 54 | def _build_shared_graph(self): 55 | 56 | with tf.variable_scope(self._scope, reuse=self._reuse): 57 | 58 | _encoder = MultiLayerFC(l2_reg=self._l2_reg, in_tensor=self._in_tensor, dims=self._dims[1:], scope='encoder', 59 | dropout_in=self._dropout, dropout_mid=self._dropout, reuse=self._reuse) 60 | _decoder = MultiLayerFC(l2_reg=self._l2_reg, in_tensor=_encoder.get_outputs()[0], dims=self._dims[::-1][1:], 61 | scope='decoder', relu_in=True, dropout_in=self._dropout, relu_mid=True, 62 | dropout_mid=self._dropout, relu_out=True, dropout_out=self._dropout, reuse=self._reuse) 63 | 64 | self._outputs += _encoder.get_outputs() 65 | self._loss = _encoder.get_loss() + _decoder.get_loss() 66 | self._loss += self._l2_reconst * tf.nn.l2_loss(_decoder.get_outputs()[0] - self._in_tensor) 67 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/fusions/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules.fusions.fusion import Fusion 2 | from openrec.tf1.legacy.modules.fusions.concat import Concat 3 | from openrec.tf1.legacy.modules.fusions.average import Average 4 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/fusions/average.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.legacy.modules.fusions import Fusion 3 | 4 | class Average(Fusion): 5 | 6 | """ 7 | The Average module outputs the element-wise average of the outputs from multiple modules. 8 | 9 | Parameters 10 | ---------- 11 | module_list: list 12 | The list of modules. 13 | weight: float 14 | A value elementwise multiplied to module outputs. 15 | scope: str, optional 16 | Scope for module variables. 17 | reuse: bool, optional 18 | Whether or not to reuse module variables. 19 | """ 20 | 21 | def __init__(self, module_list, weight=1.0, scope=None, reuse=False): 22 | 23 | self._module_list = module_list 24 | self._weight = weight 25 | 26 | super(Average, self).__init__(scope=scope, reuse=reuse) 27 | 28 | def _build_shared_graph(self): 29 | 30 | with tf.variable_scope(self._scope, reuse=self._reuse): 31 | 32 | outputs = sum([module.get_outputs() for module in self._module_list], []) 33 | self._outputs.append(self._weight * tf.add_n(outputs) / len(self._module_list)) 34 | self._loss = 0.0 35 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/fusions/concat.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.legacy.modules.fusions import Fusion 3 | 4 | class Concat(Fusion): 5 | 6 | """ 7 | The Concat module outputs the concatenation of the outputs from multiple modules. 8 | 9 | Parameters 10 | ---------- 11 | module_list: list 12 | The list of modules. 13 | scope: str, optional 14 | Scope for module variables. 15 | reuse: bool, optional 16 | Whether or not to reuse module variables. 17 | """ 18 | 19 | def __init__(self, module_list, axis=1, scope=None, reuse=False): 20 | 21 | self._module_list = module_list 22 | self._axis = axis 23 | 24 | super(Concat, self).__init__(l2_reg=None, scope=scope, reuse=reuse) 25 | 26 | def _build_shared_graph(self): 27 | 28 | with tf.variable_scope(self._scope, reuse=self._reuse): 29 | 30 | outputs = sum([cell.get_outputs() for cell in self._module_list], []) 31 | self._outputs.append(tf.concat(values=outputs, axis=self._axis)) 32 | self._loss = 0.0 33 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/fusions/fusion.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules import Module 2 | 3 | class Fusion(Module): 4 | 5 | """ 6 | A direct inheritance of the Module. 7 | """ 8 | 9 | def __init__(self, train=True, l2_reg=None, scope=None, reuse=False): 10 | 11 | super(Fusion, self).__init__(train=train, l2_reg=l2_reg, scope=scope, reuse=reuse) 12 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/interactions/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules.interactions.interaction import Interaction 2 | from openrec.tf1.legacy.modules.interactions.pairwise_log import PairwiseLog 3 | from openrec.tf1.legacy.modules.interactions.pairwise_hinge import PairwiseHinge 4 | from openrec.tf1.legacy.modules.interactions.ns_log import NsLog 5 | from openrec.tf1.legacy.modules.interactions.pointwise_ge_ce import PointwiseGeCE 6 | from openrec.tf1.legacy.modules.interactions.pointwise_mlp_ce import PointwiseMLPCE 7 | from openrec.tf1.legacy.modules.interactions.pairwise_eu_dist import PairwiseEuDist 8 | from openrec.tf1.legacy.modules.interactions.ns_eu_dist import NSEuDist 9 | from openrec.tf1.legacy.modules.interactions.pointwise_ge_mlp_ce import PointwiseGeMLPCE 10 | from openrec.tf1.legacy.modules.interactions.pointwise_mse import PointwiseMSE 11 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/interactions/interaction.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules import Module 2 | 3 | class Interaction(Module): 4 | 5 | """ 6 | A direct inheritance of the Module. 7 | """ 8 | 9 | def __init__(self, train=True, l2_reg=None, scope=None, reuse=False): 10 | 11 | super(Interaction, self).__init__(train=train, l2_reg=l2_reg, scope=scope, reuse=reuse) -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/interactions/ns_eu_dist.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.legacy.modules.interactions import Interaction 3 | 4 | class NSEuDist(Interaction): 5 | 6 | def __init__(self, user, max_item, item=None, item_bias=None, p_item=None, neg_num=5, 7 | p_item_bias=None, n_item=None, n_item_bias=None, margin=1.0, train=None, 8 | scope=None, reuse=False): 9 | 10 | self._max_item = max_item 11 | self._margin = margin 12 | self._neg_num = neg_num 13 | 14 | assert train is not None, 'train cannot be None' 15 | assert user is not None, 'user cannot be None' 16 | self._user = user 17 | 18 | if train: 19 | 20 | assert p_item is not None, 'p_item cannot be None' 21 | assert n_item is not None, 'n_item cannot be None' 22 | assert p_item_bias is not None, 'p_item_bias cannot be None' 23 | assert n_item_bias is not None, 'n_item_bias cannot be None' 24 | 25 | self._p_item = p_item 26 | self._n_item = n_item 27 | self._p_item_bias = p_item_bias 28 | self._n_item_bias = n_item_bias 29 | else: 30 | assert item is not None, 'item cannot be None' 31 | assert item_bias is not None, 'item_bias cannot be None' 32 | 33 | self._item = item 34 | self._item_bias = item_bias 35 | 36 | super(NSEuDist, self).__init__(train=train, scope=scope, reuse=reuse) 37 | 38 | 39 | def _build_training_graph(self): 40 | 41 | with tf.variable_scope(self._scope, reuse=self._reuse): 42 | tmp_user = tf.tile(tf.expand_dims(self._user, 1), [1, self._neg_num, 1]) 43 | 44 | l2_user_pos = tf.tile(tf.reduce_sum(tf.square(tf.subtract(self._user, self._p_item)), 45 | reduction_indices=1, 46 | keep_dims=True, name="l2_user_pos"), [1, self._neg_num]) 47 | l2_user_neg = tf.reduce_sum(tf.square(tf.subtract(tmp_user, self._n_item)), 48 | reduction_indices=2, 49 | name="l2_user_neg") 50 | pos_score = (-l2_user_pos) + tf.tile(self._p_item_bias, [1, self._neg_num]) 51 | neg_score = (-l2_user_neg) + tf.reduce_sum(self._n_item_bias, reduction_indices=2) 52 | scores = tf.maximum(self._margin - pos_score + neg_score, 0) 53 | weights = tf.count_nonzero(scores, axis=1) 54 | weights = tf.log(tf.floor(self._max_item * tf.to_float(weights) / self._neg_num) + 1.0) 55 | self._loss = tf.reduce_sum(weights * tf.reduce_max(scores, axis=1)) 56 | # self._loss = tf.reduce_sum(tf.tile(tf.reshape(weights, [-1, 1]), [1, self._neg_num]) * scores) 57 | 58 | def _build_serving_graph(self): 59 | 60 | with tf.variable_scope(self._scope, reuse=self._reuse): 61 | item_norms = tf.reduce_sum(tf.square(self._item), axis=1) 62 | self._outputs.append(2 * tf.matmul(self._user, self._item, transpose_b=True) + \ 63 | tf.reshape(self._item_bias, [-1]) - tf.reshape(item_norms, [-1])) 64 | 65 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/interactions/ns_log.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.legacy.modules.interactions import Interaction 3 | 4 | class NsLog(Interaction): 5 | 6 | def __init__(self, user, max_item, item=None, item_bias=None, p_item=None, p_item_bias=None, neg_num=5, 7 | n_item=None, n_item_bias=None, train=None, scope=None, reuse=False): 8 | 9 | assert train is not None, 'train cannot be None' 10 | assert user is not None, 'user cannot be None' 11 | self._user = user 12 | self._neg_num = neg_num 13 | self._max_item = max_item 14 | 15 | if train: 16 | 17 | assert p_item is not None, 'p_item cannot be None' 18 | assert n_item is not None, 'n_item cannot be None' 19 | assert p_item_bias is not None, 'p_item_bias cannot be None' 20 | assert n_item_bias is not None, 'n_item_bias cannot be None' 21 | 22 | self._p_item = p_item 23 | self._n_item = n_item 24 | self._p_item_bias = p_item_bias 25 | self._n_item_bias = n_item_bias 26 | else: 27 | assert item is not None, 'item cannot be None' 28 | assert item_bias is not None, 'item_bias cannot be None' 29 | 30 | self._item = item 31 | self._item_bias = item_bias 32 | 33 | super(NsLog, self).__init__(train=train, scope=scope, reuse=reuse) 34 | 35 | def _build_training_graph(self): 36 | 37 | with tf.variable_scope(self._scope, reuse=self._reuse): 38 | tmp_user = tf.tile(tf.expand_dims(self._user, 1), [1, self._neg_num, 1]) 39 | dot_user_pos = tf.tile(tf.reduce_sum(tf.multiply(self._user, self._p_item), 40 | reduction_indices=1, 41 | keep_dims=True, 42 | name="dot_user_pos"),[1,self._neg_num]) 43 | dot_user_neg = tf.reduce_sum(tf.multiply(tmp_user, self._n_item), 44 | reduction_indices=2, 45 | name="dot_user_neg") 46 | 47 | pos_score = dot_user_pos + tf.tile(self._p_item_bias, [1, self._neg_num]) 48 | neg_score = dot_user_neg + tf.reduce_sum(self._n_item_bias, reduction_indices=2) 49 | diff = pos_score - neg_score 50 | weights = tf.count_nonzero(tf.less(diff, 0.0), axis=1) 51 | weights = tf.log(tf.floor(self._max_item * tf.to_float(weights) / self._neg_num) + 1.0) 52 | self._loss = - tf.reduce_sum(tf.log(tf.sigmoid(tf.maximum(weights * tf.reduce_min(diff, axis = 1), 53 | -30.0)))) 54 | 55 | def _build_serving_graph(self): 56 | 57 | with tf.variable_scope(self._scope, reuse=self._reuse): 58 | self._outputs.append(tf.matmul(self._user, self._item, transpose_b=True) + tf.reshape(self._item_bias, [-1])) 59 | 60 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/modules/module.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Module(object): 5 | 6 | """ 7 | The module is the OpenRec abstraction for modules. A module may belong to one of the three categories, \ 8 | **extractions**, **fusions**, and **interactions**, depending on its functionality (Read [1]_ for details). 9 | 10 | Parameters 11 | ---------- 12 | train: bool, optional 13 | An indicator for training or servining phase. 14 | l2_reg: float, optional 15 | Weight for L2 regularization, i.e., weight decay. 16 | scope: str, optional 17 | Scope for module variables. 18 | reuse: bool, optional 19 | Whether or not to reuse module variables. 20 | 21 | Notes 22 | ----- 23 | The module abstraction is used to construct recommenders. It should be extended by all module implementations. \ 24 | During initialization, functions :code:`self._build_shared_graph`, :code:`self._build_training_graph`, and \ 25 | :code:`self._build_serving_graph` are called as follows. 26 | 27 | .. image:: module.png 28 | :scale: 50 % 29 | :alt: The structure of the module abstraction 30 | :align: center 31 | 32 | A module implementation should follow two steps below: 33 | 34 | * **Build computational graphs.** Override :code:`self._build_shared_graph()`, :code:`self._build_training_graph()`,\ 35 | and/or :code:`self._build_serving_graph()` functions to build training/serving computational graphs. 36 | 37 | * **Define a loss and an output list.** Define a loss (:code:`self._loss`) to be included in training and an output \ 38 | list of Tensorflow tensors (:code:`self._outputs`). 39 | 40 | References 41 | ---------- 42 | .. [1] Yang, L., Bagdasaryan, E., Gruenstein, J., Hsieh, C., and Estrin, D., 2018, June. 43 | OpenRec: A Modular Framework for Extensible and Adaptable Recommendation Algorithms. 44 | In Proceedings of WSDM'18, February 5-9, 2018, Marina Del Rey, CA, USA. 45 | """ 46 | 47 | def __init__(self, train=True, l2_reg=None, scope=None, reuse=False): 48 | 49 | self._scope = self.__class__.__name__ if scope is None else scope 50 | self._reuse = reuse 51 | self._l2_reg = l2_reg 52 | 53 | self._loss = 0.0 54 | self._outputs = [] 55 | self._train = train 56 | 57 | if train: 58 | self._build_shared_graph() 59 | self._build_training_graph() 60 | else: 61 | self._build_shared_graph() 62 | self._build_serving_graph() 63 | 64 | def _build_shared_graph(self): 65 | 66 | """Build shared computational graphs across training and serving (may be overridden). 67 | """ 68 | pass 69 | 70 | def _build_training_graph(self): 71 | 72 | """Build training-specific computational graphs (may be overridden). 73 | """ 74 | pass 75 | 76 | def _build_serving_graph(self): 77 | 78 | """Build serving-specific computational graphs (may be overridden). 79 | """ 80 | pass 81 | 82 | def get_outputs(self): 83 | 84 | """Retrieve the output list of Tensorflow tensors. 85 | 86 | Returns 87 | ------- 88 | list 89 | An output list of Tensorflow tensors 90 | """ 91 | 92 | return self._outputs 93 | 94 | def get_loss(self): 95 | 96 | """Retrieve the training loss. 97 | 98 | Returns 99 | ------- 100 | float or Tensor 101 | Training loss 102 | """ 103 | 104 | return self._loss 105 | 106 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders.recommender import Recommender 2 | from openrec.tf1.legacy.recommenders.bpr import BPR 3 | from openrec.tf1.legacy.recommenders.pmf import PMF 4 | from openrec.tf1.legacy.recommenders.nbpr import NBPR 5 | from openrec.tf1.legacy.recommenders.concat_visual_bpr import ConcatVisualBPR 6 | from openrec.tf1.legacy.recommenders.visual_pmf import VisualPMF 7 | from openrec.tf1.legacy.recommenders.cdl import CDL 8 | from openrec.tf1.legacy.recommenders.cml import CML 9 | from openrec.tf1.legacy.recommenders.wcml import WCML 10 | from openrec.tf1.legacy.recommenders.visual_bpr import VisualBPR 11 | from openrec.tf1.legacy.recommenders.visual_cml import VisualCML 12 | from openrec.tf1.legacy.recommenders.gmf import GMF 13 | from openrec.tf1.legacy.recommenders.visual_gmf import VisualGMF 14 | from openrec.tf1.legacy.recommenders.user_pmf import UserPMF 15 | from openrec.tf1.legacy.recommenders.user_visual_pmf import UserVisualPMF 16 | from openrec.tf1.legacy.recommenders.itr_mlp import ItrMLP 17 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/cdl.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import PMF 2 | from openrec.tf1.legacy.modules.extractions import SDAE 3 | from openrec.tf1.legacy.modules.fusions import Average 4 | 5 | class CDL(PMF): 6 | 7 | def __init__(self, batch_size, max_user, max_item, dim_embed, item_f, dims, dropout=None, test_batch_size=None, 8 | item_serving_size=None, l2_reg=None, l2_reg_mlp=None, l2_reconst=None, opt='SGD', 9 | sess_config=None): 10 | 11 | 12 | self._item_f = item_f 13 | self._dims = dims 14 | self._dropout = dropout 15 | 16 | self._l2_reg_mlp = l2_reg_mlp 17 | self._l2_reconst = l2_reconst 18 | 19 | super(CDL, self).__init__(batch_size=batch_size, max_user=max_user, max_item=max_item, dim_embed=dim_embed, 20 | l2_reg=l2_reg, test_batch_size=test_batch_size, opt=opt, sess_config=sess_config) 21 | 22 | def _build_item_inputs(self, train=True): 23 | 24 | super(CDL, self)._build_item_inputs(train) 25 | if train: 26 | self._add_input(name='item_feature', dtype='float32', shape=[self._batch_size, self._item_f.shape[1]]) 27 | else: 28 | self._add_input(name='item_id', dtype='int32', shape=[None], train=False) 29 | self._add_input(name='item_feature', dtype='float32', shape=[None, self._item_f.shape[1]], train=False) 30 | 31 | def _input_mappings(self, batch_data, train): 32 | 33 | default_input_map = super(CDL, self)._input_mappings(batch_data=batch_data, train=train) 34 | if train: 35 | default_input_map[self._get_input('item_feature')] = self._item_f[batch_data['item_id_input']] 36 | else: 37 | default_input_map[self._get_input('item_id', train=False)] = batch_data['item_id_input'] 38 | default_input_map[self._get_input('item_feature', train=False)] = self._item_f[batch_data['item_id_input']] 39 | return default_input_map 40 | 41 | def _build_item_extractions(self, train=True): 42 | 43 | super(CDL, self)._build_item_extractions(train) 44 | self._add_module('item_f', 45 | SDAE(in_tensor=self._get_input('item_feature', train=train), dims=self._dims, l2_reg=self._l2_reg_mlp, 46 | l2_reconst=self._l2_reconst, dropout=self._dropout, scope='AutoEncoder', reuse=False), 47 | train=train) 48 | 49 | def _build_default_fusions(self, train=True): 50 | 51 | self._add_module('item_vec', 52 | Average(scope='item_average', reuse=not train, module_list=[self._get_module('item_vec', train=train), 53 | self._get_module('item_f', train=train)], weight=2.0), 54 | train=train) 55 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/cml.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.legacy.recommenders import BPR 3 | from openrec.tf1.legacy.modules.interactions import PairwiseEuDist 4 | 5 | class CML(BPR): 6 | 7 | def _build_post_training_ops(self): 8 | unique_user_id, _ = tf.unique(self._get_input('user_id')) 9 | unique_item_id, _ = tf.unique(tf.concat([self._get_input('p_item_id'), self._get_input('n_item_id')], axis=0)) 10 | return [self._get_module('user_vec').censor_l2_norm_op(censor_id_list=unique_user_id), 11 | self._get_module('p_item_vec').censor_l2_norm_op(censor_id_list=unique_item_id)] 12 | 13 | def _build_interactions(self, train=True): 14 | 15 | if train: 16 | self._add_module('interaction', 17 | PairwiseEuDist(user=self._get_module('user_vec').get_outputs()[0], 18 | p_item=self._get_module('p_item_vec').get_outputs()[0], 19 | n_item=self._get_module('n_item_vec').get_outputs()[0], 20 | p_item_bias=self._get_module('p_item_bias').get_outputs()[0], 21 | n_item_bias=self._get_module('n_item_bias').get_outputs()[0], 22 | scope='PairwiseEuDist', reuse=False, train=True), 23 | train=True) 24 | else: 25 | self._add_module('interaction', 26 | PairwiseEuDist(user=self._get_module('user_vec', train=train).get_outputs()[0], 27 | item=self._get_module('item_vec', train=train).get_outputs()[0], 28 | item_bias=self._get_module('item_bias', train=train).get_outputs()[0], 29 | scope='PairwiseEuDist', reuse=True, train=False), 30 | train=False) 31 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/gmf.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import PMF 2 | from openrec.tf1.legacy.modules.interactions import PointwiseGeCE 3 | 4 | class GMF(PMF): 5 | 6 | def __init__(self, batch_size, dim_embed, max_user, max_item, 7 | test_batch_size=None, l2_reg=None, opt='SGD', sess_config=None): 8 | 9 | super(GMF, self).__init__( 10 | batch_size = batch_size, 11 | dim_embed = dim_embed, 12 | max_user = max_user, 13 | max_item = max_item, 14 | test_batch_size = test_batch_size, 15 | l2_reg = l2_reg, 16 | opt = opt, 17 | sess_config = sess_config 18 | ) 19 | 20 | def _build_default_interactions(self, train=True): 21 | 22 | self._add_module( 23 | 'interaction', 24 | PointwiseGeCE( 25 | user=self._get_module('user_vec', train=train).get_outputs()[0], 26 | item=self._get_module('item_vec', train=train).get_outputs()[0], 27 | item_bias=self._get_module('item_bias', train=train).get_outputs()[0], 28 | l2_reg=self._l2_reg, 29 | labels=self._get_input('labels'), 30 | train=train, scope="PointwiseGeCE", reuse=(not train) 31 | ), 32 | train=train 33 | ) 34 | 35 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/nbpr.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import BPR 2 | from openrec.tf1.legacy.modules.extractions import LatentFactor 3 | from openrec.tf1.legacy.modules.interactions import NsLog 4 | import numpy as np 5 | 6 | class NBPR(BPR): 7 | 8 | """ 9 | Pure Baysian Personalized Ranking (BPR) [1]_ based Recommender with negative sampling 10 | 11 | """ 12 | 13 | def __init__(self, batch_size, max_user, max_item, dim_embed, neg_num=5, 14 | test_batch_size=None, l2_reg=None, opt='SGD', lr=None, init_dict=None, sess_config=None): 15 | 16 | self._dim_embed = dim_embed 17 | self._neg_num = neg_num 18 | 19 | super(NBPR, self).__init__(batch_size=batch_size, 20 | test_batch_size=test_batch_size, 21 | max_user=max_user, 22 | max_item=max_item, 23 | dim_embed = dim_embed, 24 | l2_reg=l2_reg, 25 | opt=opt, 26 | lr=lr, 27 | init_dict=init_dict, 28 | sess_config=sess_config) 29 | 30 | 31 | def _input_mappings(self, batch_data, train): 32 | 33 | if train: 34 | return {self._get_input('user_id'): batch_data['user_id_input'], 35 | self._get_input('p_item_id'): batch_data['p_item_id_input'], 36 | self._get_input('n_item_id'): np.array(batch_data['n_item_id_inputs'].tolist())} 37 | else: 38 | return {self._get_input('user_id', train=train): batch_data['user_id_input'], 39 | self._get_input('item_id', train=train): batch_data['item_id_input']} 40 | 41 | def _build_item_inputs(self, train=True): 42 | 43 | if train: 44 | self._add_input(name='p_item_id', dtype='int32', shape=[self._batch_size]) 45 | self._add_input(name='n_item_id', dtype='int32', shape=[self._batch_size, self._neg_num]) 46 | else: 47 | self._add_input(name='item_id', dtype='int32', shape=[None], train=False) 48 | 49 | 50 | def _build_default_interactions(self, train=True): 51 | 52 | if train: 53 | self._add_module('interaction', 54 | NsLog(user=self._get_module('user_vec').get_outputs()[0], 55 | max_item=self._max_item, 56 | p_item=self._get_module('p_item_vec').get_outputs()[0], 57 | n_item=self._get_module('n_item_vec').get_outputs()[0], 58 | p_item_bias=self._get_module('p_item_bias').get_outputs()[0], 59 | n_item_bias=self._get_module('n_item_bias').get_outputs()[0], 60 | scope='pairwise_log', reuse=False, train=True), 61 | train=True) 62 | else: 63 | self._add_module('interaction', 64 | NsLog(user=self._get_module('user_vec', train=train).get_outputs()[0], 65 | max_item=self._max_item, 66 | item=self._get_module('item_vec', train=train).get_outputs()[0], 67 | item_bias=self._get_module('item_bias', train=train).get_outputs()[0], 68 | scope='pairwise_log', reuse=True, train=False), 69 | train=False) 70 | 71 | 72 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/pmf.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import Recommender 2 | from openrec.tf1.legacy.modules.extractions import LatentFactor 3 | from openrec.tf1.legacy.modules.interactions import PointwiseMSE 4 | 5 | class PMF(Recommender): 6 | 7 | def __init__(self, batch_size, dim_embed, max_user, max_item, 8 | test_batch_size=None, l2_reg=None, opt='SGD', sess_config=None): 9 | 10 | self._dim_embed = dim_embed 11 | 12 | super(PMF, self).__init__(batch_size=batch_size, 13 | test_batch_size=test_batch_size, 14 | max_user=max_user, 15 | max_item=max_item, 16 | l2_reg=l2_reg, 17 | opt=opt, sess_config=sess_config) 18 | 19 | def _input_mappings(self, batch_data, train): 20 | 21 | if train: 22 | return {self._get_input('user_id'): batch_data['user_id_input'], 23 | self._get_input('item_id'): batch_data['item_id_input'], 24 | self._get_input('labels'): batch_data['labels']} 25 | else: 26 | return {self._get_input('user_id', train=False): batch_data['user_id_input'], 27 | self._get_input('item_id', train=False): batch_data['item_id_input']} 28 | 29 | def _build_user_inputs(self, train=True): 30 | 31 | if train: 32 | self._add_input(name='user_id', dtype='int32', shape=[self._batch_size]) 33 | else: 34 | self._add_input(name='user_id', dtype='int32', shape=[None], train=False) 35 | 36 | def _build_item_inputs(self, train=True): 37 | 38 | if train: 39 | self._add_input(name='item_id', dtype='int32', shape=[self._batch_size]) 40 | else: 41 | self._add_input(name='item_id', dtype='int32', shape=[None], train=False) 42 | 43 | def _build_extra_inputs(self, train=True): 44 | 45 | if train: 46 | self._add_input(name='labels', dtype='float32', shape=[self._batch_size]) 47 | 48 | def _build_user_extractions(self, train=True): 49 | 50 | self._add_module('user_vec', 51 | LatentFactor(l2_reg=self._l2_reg, init='normal', ids=self._get_input('user_id', train=train), 52 | shape=[self._max_user, self._dim_embed], scope='user', reuse=not train), 53 | train=train) 54 | 55 | def _build_item_extractions(self, train=True): 56 | 57 | self._add_module('item_vec', 58 | LatentFactor(l2_reg=self._l2_reg, init='normal', ids=self._get_input('item_id', train=train), 59 | shape=[self._max_item, self._dim_embed], scope='item', reuse=not train), 60 | train=train) 61 | self._add_module('item_bias', 62 | LatentFactor(l2_reg=self._l2_reg, init='zero', ids=self._get_input('item_id', train=train), 63 | shape=[self._max_item, 1], scope='item_bias', reuse=not train), 64 | train=train) 65 | 66 | def _build_default_interactions(self, train=True): 67 | 68 | self._add_module('interaction', 69 | PointwiseMSE(user=self._get_module('user_vec', train=train).get_outputs()[0], 70 | item=self._get_module('item_vec', train=train).get_outputs()[0], 71 | item_bias=self._get_module('item_bias', train=train).get_outputs()[0], 72 | labels=self._get_input('labels'), a=1.0, b=1.0, sigmoid=True, 73 | train=train, scope='PointwiseMSE', reuse=not train), 74 | train=train) 75 | 76 | def _build_serving_graph(self): 77 | 78 | super(PMF, self)._build_serving_graph() 79 | self._scores = self._get_module('interaction', train=False).get_outputs()[0] 80 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/user_pmf.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import PMF 2 | from openrec.tf1.legacy.modules.extractions import MultiLayerFC 3 | from openrec.tf1.legacy.modules.fusions import Average 4 | 5 | class UserPMF(PMF): 6 | 7 | def __init__(self, batch_size, max_user, max_item, dim_embed, dims, user_f_source, test_batch_size=None, item_serving_size=None, dropout_rate=None, 8 | l2_reg=None, l2_reg_mlp=None, opt='SGD', sess_config=None): 9 | 10 | self._dims = dims 11 | self._dropout_rate = dropout_rate 12 | self._user_f_source = user_f_source 13 | self._item_serving_size = item_serving_size 14 | 15 | self._l2_reg_mlp = l2_reg_mlp 16 | 17 | super(UserPMF, self).__init__(batch_size=batch_size, max_user=max_user, max_item=max_item, dim_embed=dim_embed, 18 | l2_reg=l2_reg, test_batch_size=test_batch_size, opt=opt, sess_config=sess_config) 19 | 20 | def _build_user_inputs(self, train=True): 21 | 22 | super(UserPMF, self)._build_user_inputs(train) 23 | if train: 24 | self._add_input(name='user_feature', dtype='float32', shape=[self._batch_size, self._user_f_source.shape[1]]) 25 | else: 26 | self._add_input(name='user_feature', dtype='float32', shape=[None, self._user_f_source.shape[1]], train=False) 27 | 28 | def _input_mappings(self, batch_data, train): 29 | 30 | default_input_map = super(UserPMF, self)._input_mappings(batch_data=batch_data, train=train) 31 | default_input_map[self._get_input('user_feature', train=train)] = self._user_f_source[batch_data['user_id_input']] 32 | 33 | return default_input_map 34 | 35 | def _build_user_extractions(self, train=True): 36 | 37 | super(UserPMF, self)._build_user_extractions(train) 38 | 39 | self._add_module('user_f', 40 | MultiLayerFC(in_tensor=self._get_input('user_feature', train=train), train=train, 41 | dims=self._dims, l2_reg=self._l2_reg_mlp, dropout_mid=self._dropout_rate, 42 | scope='user_MLP', reuse=not train), 43 | train=train) 44 | 45 | def _build_default_fusions(self, train=True): 46 | 47 | self._add_module('user_vec', 48 | Average(scope='user_average', reuse=not train, module_list=[self._get_module('user_vec', train=train), 49 | self._get_module('user_f', train=train)], weight=2.0), 50 | train=train) 51 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/user_visual_pmf.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import VisualPMF 2 | from openrec.tf1.legacy.modules.extractions import MultiLayerFC 3 | from openrec.tf1.legacy.modules.fusions import Average 4 | 5 | class UserVisualPMF(VisualPMF): 6 | 7 | def __init__(self, batch_size, max_user, max_item, dim_embed, dims_user, dims_item, user_f_source, 8 | item_f_source, test_batch_size=None, item_serving_size=None, dropout_rate=None, 9 | l2_reg=None, l2_reg_mlp=None, opt='SGD', sess_config=None): 10 | 11 | self._dims_user = dims_user 12 | self._user_f_source = user_f_source 13 | 14 | super(UserVisualPMF, self).__init__(batch_size=batch_size, max_user=max_user, 15 | max_item=max_item, dim_embed=dim_embed, dims=dims_item, item_f_source=item_f_source, 16 | test_batch_size=test_batch_size, item_serving_size=item_serving_size, dropout_rate=dropout_rate, 17 | l2_reg=l2_reg, l2_reg_mlp=l2_reg_mlp, opt=opt, sess_config=sess_config) 18 | 19 | def _build_user_inputs(self, train=True): 20 | 21 | super(UserVisualPMF, self)._build_user_inputs(train) 22 | if train: 23 | self._add_input(name='user_feature', dtype='float32', shape=[self._batch_size, self._user_f_source.shape[1]]) 24 | else: 25 | self._add_input(name='user_feature', dtype='float32', shape=[None, self._user_f_source.shape[1]], train=False) 26 | 27 | def _input_mappings(self, batch_data, train): 28 | 29 | default_input_map = super(UserVisualPMF, self)._input_mappings(batch_data=batch_data, train=train) 30 | default_input_map[self._get_input('user_feature', train=train)] = self._user_f_source[batch_data['user_id_input']] 31 | return default_input_map 32 | 33 | def _build_user_extractions(self, train=True): 34 | 35 | super(UserVisualPMF, self)._build_user_extractions(train) 36 | 37 | self._add_module('user_f', 38 | MultiLayerFC(in_tensor=self._get_input('user_feature', train=train), train=train, 39 | dims=self._dims_user, l2_reg=self._l2_reg_mlp, dropout_mid=self._dropout_rate, 40 | scope='user_MLP', reuse=not train), 41 | train=train) 42 | 43 | def _build_default_fusions(self, train=True): 44 | 45 | super(UserVisualPMF, self)._build_default_fusions(train) 46 | self._add_module('user_vec', 47 | Average(scope='user_average', reuse=not train, module_list=[self._get_module('user_vec', train=train), 48 | self._get_module('user_f', train=train)], weight=2.0), 49 | train=train) -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/visual_cml.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.modules.interactions import PairwiseEuDist 2 | from openrec.tf1.legacy.recommenders import VisualBPR 3 | 4 | class VisualCML(VisualBPR): 5 | 6 | def _build_default_interactions(self, train=True): 7 | 8 | if train: 9 | self._add_module('interaction', 10 | PairwiseEuDist(user=self._get_module('user_vec').get_outputs()[0], 11 | p_item=self._get_module('p_item_vec').get_outputs()[0], 12 | n_item=self._get_module('n_item_vec').get_outputs()[0], 13 | p_item_bias=self._get_module('p_item_bias').get_outputs()[0], 14 | n_item_bias=self._get_module('n_item_bias').get_outputs()[0], 15 | scope='PairwiseEuDist', reuse=False, train=True), 16 | train=True) 17 | else: 18 | self._add_module('interaction', 19 | PairwiseEuDist(user=self._get_module('user_vec', train=train).get_outputs()[0], 20 | item=self._get_module('item_vec', train=train).get_outputs()[0], 21 | item_bias=self._get_module('item_bias', train=train).get_outputs()[0], 22 | scope='PairwiseEuDist', reuse=True, train=False), 23 | train=False) -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/visual_gmf.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import VisualPMF 2 | from openrec.tf1.legacy.modules.interactions import PointwiseGeCE 3 | 4 | class VisualGMF(VisualPMF): 5 | 6 | def _build_default_interactions(self, train=True): 7 | 8 | self._add_module('interaction', 9 | PointwiseGeCE(user=self._get_module('user_vec', train=train).get_outputs()[0], 10 | item=self._get_module('item_vec', train=train).get_outputs()[0], 11 | item_bias=self._get_module('item_bias', train=train).get_outputs()[0], 12 | labels=self._get_input('labels'), l2_reg=self._l2_reg, 13 | train=train, scope='PointwiseGeCE', reuse=not train), 14 | train=train) 15 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/recommenders/visual_pmf.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.recommenders import PMF 2 | from openrec.tf1.legacy.modules.extractions import MultiLayerFC 3 | from openrec.tf1.legacy.modules.fusions import Average 4 | 5 | class VisualPMF(PMF): 6 | 7 | def __init__(self, batch_size, max_user, max_item, dim_embed, dims, item_f_source, test_batch_size=None, item_serving_size=None, dropout_rate=None, 8 | l2_reg=None, l2_reg_mlp=None, opt='SGD', sess_config=None): 9 | 10 | self._dims = dims 11 | self._dropout_rate = dropout_rate 12 | self._item_f_source = item_f_source 13 | self._item_serving_size = item_serving_size 14 | 15 | self._l2_reg_mlp = l2_reg_mlp 16 | 17 | super(VisualPMF, self).__init__(batch_size=batch_size, max_user=max_user, max_item=max_item, dim_embed=dim_embed, 18 | l2_reg=l2_reg, test_batch_size=test_batch_size, opt=opt, sess_config=sess_config) 19 | 20 | def _build_item_inputs(self, train=True): 21 | 22 | super(VisualPMF, self)._build_item_inputs(train) 23 | if train: 24 | self._add_input(name='item_vfeature', dtype='float32', shape=[self._batch_size, self._item_f_source.shape[1]]) 25 | else: 26 | self._add_input(name='item_id', dtype='int32', shape=[None], train=False) 27 | self._add_input(name='item_vfeature', dtype='float32', shape=[None, self._item_f_source.shape[1]], train=False) 28 | 29 | def _input_mappings(self, batch_data, train): 30 | 31 | default_input_map = super(VisualPMF, self)._input_mappings(batch_data=batch_data, train=train) 32 | if train: 33 | default_input_map[self._get_input('item_vfeature')] = self._item_f_source[batch_data['item_id_input']] 34 | else: 35 | default_input_map[self._get_input('item_id', train=False)] = batch_data['item_id_input'] 36 | default_input_map[self._get_input('item_vfeature', train=False)] = self._item_f_source[batch_data['item_id_input']] 37 | return default_input_map 38 | 39 | def _build_item_extractions(self, train=True): 40 | 41 | super(VisualPMF, self)._build_item_extractions(train) 42 | self._add_module('item_vf', 43 | MultiLayerFC(in_tensor=self._get_input('item_vfeature', train=train), dims=self._dims, 44 | l2_reg=self._l2_reg_mlp, dropout_mid=self._dropout_rate, scope='item_MLP', reuse=not train, 45 | train=train), 46 | train=train) 47 | 48 | def _build_default_fusions(self, train=True): 49 | 50 | self._add_module('item_vec', 51 | Average(scope='item_average', reuse=not train, module_list=[self._get_module('item_vec', train=train), 52 | self._get_module('item_vf', train=train)], weight=2.0), 53 | train=train) 54 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylongqi/openrec/a00de2345844858194ef43ab6845342114a5be93/openrec/tf1/legacy/tests/__init__.py -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.utils.dataset import Dataset 2 | from openrec.tf1.legacy.utils.implicit_dataset import ImplicitDataset 3 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Dataset(object): 4 | 5 | 6 | """ 7 | The Dataset class stores a sequence of data points for training or evaluation. 8 | 9 | Parameters 10 | ---------- 11 | raw_data: numpy structured array 12 | Input raw data. 13 | max_user: int 14 | Maximum number of users in the recommendation system. 15 | max_item: int 16 | Maximum number of items in the recommendation system. 17 | name: str 18 | Name of the dataset. 19 | 20 | Notes 21 | ----- 22 | The Dataset class expects :code:`raw_data` as a numpy structured array, where each row represents a data 23 | point and contains *at least* two keys: 24 | 25 | * :code:`user_id`: the user involved in the interaction. 26 | * :code:`item_id`: the item involved in the interaction. 27 | 28 | :code:`raw_data` might contain other keys, such as :code:`timestamp`, and :code:`location`, etc. 29 | based on the use cases of different recommendation systems. An user should be uniquely and numerically indexed 30 | from 0 to :code:`total_number_of_users - 1`. The items should be indexed likewise. 31 | """ 32 | 33 | def __init__(self, raw_data, max_user, max_item, name='dataset'): 34 | 35 | self.name = name 36 | if type(raw_data) == np.ndarray: 37 | self.raw_data = raw_data 38 | else: 39 | raise TypeError("Unsupported data input schema. Please use structured numpy array.") 40 | 41 | self.data = None 42 | self._max_user = max_user 43 | self._max_item = max_item 44 | 45 | def max_user(self): 46 | """Maximum number of users. 47 | 48 | Returns 49 | ------- 50 | int 51 | Maximum number of users. 52 | """ 53 | return self._max_user 54 | 55 | def max_item(self): 56 | """Maximum number of items. 57 | 58 | Returns 59 | ------- 60 | int 61 | Maximum number of items. 62 | """ 63 | return self._max_item 64 | 65 | def shuffle(self): 66 | """Shuffle the dataset entries. 67 | """ 68 | if self.data is None: 69 | self.data = self.raw_data.copy() 70 | 71 | np.random.shuffle(self.data) -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.utils.evaluators.evaluator import Evaluator 2 | from openrec.tf1.legacy.utils.evaluators.auc import AUC 3 | from openrec.tf1.legacy.utils.evaluators.recall import Recall 4 | from openrec.tf1.legacy.utils.evaluators.implicit_eval_manager import ImplicitEvalManager 5 | from openrec.tf1.legacy.utils.evaluators.mse import MSE 6 | from openrec.tf1.legacy.utils.evaluators.ndcg import NDCG 7 | from openrec.tf1.legacy.utils.evaluators.precision import Precision 8 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/auc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.legacy.utils.evaluators import Evaluator 3 | 4 | class AUC(Evaluator): 5 | 6 | def __init__(self, name='AUC'): 7 | 8 | super(AUC, self).__init__(etype='rank', name=name) 9 | 10 | def compute(self, rank_above, negative_num): 11 | 12 | return np.mean((negative_num - rank_above) / negative_num) -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/evaluator.py: -------------------------------------------------------------------------------- 1 | 2 | class Evaluator(object): 3 | 4 | def __init__(self, etype, name): 5 | 6 | self.etype = etype 7 | self.name = name 8 | 9 | def compute(self): 10 | 11 | return None 12 | 13 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/implicit_eval_manager.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | 4 | class ImplicitEvalManager(object): 5 | 6 | def __init__(self, evaluators=[]): 7 | 8 | self.evaluators = evaluators 9 | 10 | def _full_rank(self, pos_samples, excl_pos_samples, predictions): 11 | 12 | 13 | pos_samples = np.array(pos_samples, dtype=np.int32) 14 | pos_predictions = predictions[pos_samples] 15 | 16 | excl_pos_samples_set = set(excl_pos_samples) 17 | rank_above = np.zeros(len(pos_samples)) 18 | 19 | pos_samples_len = len(pos_samples) 20 | for ind in range(len(predictions)): 21 | if ind not in excl_pos_samples_set: 22 | for pos_ind in range(pos_samples_len): 23 | if pos_predictions[pos_ind] < predictions[ind]: 24 | rank_above[pos_ind] += 1 25 | 26 | return rank_above, len(predictions) - len(excl_pos_samples) 27 | 28 | def _partial_rank(self, pos_scores, neg_scores): 29 | 30 | pos_scores = np.array(pos_scores) 31 | rank_above = np.zeros(len(pos_scores)) 32 | pos_scores_len = len(pos_scores) 33 | 34 | for score in neg_scores: 35 | for pos_ind in range(pos_scores_len): 36 | if pos_scores[pos_ind] < score: 37 | rank_above[pos_ind] += 1 38 | 39 | return rank_above, len(neg_scores) 40 | 41 | def full_eval(self, pos_samples, excl_pos_samples, predictions): 42 | 43 | results = {} 44 | rank_above, negative_num = self._full_rank(pos_samples, excl_pos_samples, predictions) 45 | for evaluator in self.evaluators: 46 | if evaluator.etype == 'rank': 47 | results[evaluator.name] = evaluator.compute(rank_above=rank_above, negative_num=negative_num) 48 | 49 | return results 50 | 51 | def partial_eval(self, pos_scores, neg_scores): 52 | 53 | results = {} 54 | rank_above, negative_num = self._partial_rank(pos_scores, neg_scores) 55 | for evaluator in self.evaluators: 56 | if evaluator.etype == 'rank': 57 | results[evaluator.name] = evaluator.compute(rank_above=rank_above, negative_num=negative_num) 58 | 59 | return results -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/mse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.legacy.utils.evaluators import Evaluator 3 | 4 | class MSE(Evaluator): 5 | 6 | def __init__(self, name='MSE'): 7 | 8 | super(MSE, self).__init__(etype='regression', name=name) 9 | 10 | def compute(self, predictions, labels): 11 | 12 | return np.square(predictions - labels) 13 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/ndcg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import log 3 | from openrec.tf1.legacy.utils.evaluators import Evaluator 4 | 5 | class NDCG(Evaluator): 6 | 7 | def __init__(self, ndcg_at, name='NDCG'): 8 | 9 | self._ndcg_at = np.array(ndcg_at) 10 | 11 | super(NDCG, self).__init__(etype='rank', name=name) 12 | 13 | def compute(self, rank_above, negative_num): 14 | 15 | del negative_num 16 | denominator = 0.0 17 | for i in range(len(rank_above)): 18 | denominator += 1.0 / log(i+2, 2) 19 | 20 | results = np.zeros(len(self._ndcg_at)) 21 | for r in rank_above: 22 | tmp = 1.0 / log(r+2, 2) 23 | results[r < self._ndcg_at] += tmp 24 | 25 | return results / denominator 26 | 27 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/precision.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.legacy.utils.evaluators import Evaluator 3 | 4 | class Precision(Evaluator): 5 | 6 | def __init__(self, precision_at, name='Precision'): 7 | 8 | self._precision_at = np.array(precision_at) 9 | 10 | super(Precision, self).__init__(etype='rank', name=name) 11 | 12 | def compute(self, rank_above, negative_num): 13 | 14 | del negative_num 15 | results = np.zeros(len(self._precision_at)) 16 | for rank in rank_above: 17 | results += (rank <= self._precision_at).astype(np.float32) 18 | 19 | return results / self._precision_at 20 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/evaluators/recall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.legacy.utils.evaluators import Evaluator 3 | 4 | class Recall(Evaluator): 5 | 6 | def __init__(self, recall_at, name='Recall'): 7 | 8 | self._recall_at = np.array(recall_at) 9 | 10 | super(Recall, self).__init__(etype='rank', name=name) 11 | 12 | def compute(self, rank_above, negative_num): 13 | 14 | del negative_num 15 | results = np.zeros(len(self._recall_at)) 16 | for rank in rank_above: 17 | results += (rank <= self._recall_at).astype(np.float32) 18 | 19 | return results / len(rank_above) 20 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.legacy.utils.samplers.sampler import Sampler 2 | from openrec.tf1.legacy.utils.samplers.pairwise_sampler import PairwiseSampler 3 | from openrec.tf1.legacy.utils.samplers.n_pairwise_sampler import NPairwiseSampler 4 | from openrec.tf1.legacy.utils.samplers.pointwise_sampler import PointwiseSampler 5 | from openrec.tf1.legacy.utils.samplers.explicit_sampler import ExplicitSampler 6 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/samplers/explicit_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from multiprocessing import Process 3 | from openrec.tf1.legacy.utils.samplers import Sampler 4 | 5 | class _ExplicitSampler(Process): 6 | 7 | def __init__(self, dataset, batch_size, q, shuffle=True, loop=True): 8 | 9 | self._dataset = dataset 10 | self._batch_size = batch_size 11 | 12 | self._q = q 13 | self._shuffle = shuffle 14 | self._loop = loop 15 | self._state = 0 16 | 17 | super(_ExplicitSampler, self).__init__() 18 | 19 | 20 | def run(self): 21 | 22 | if self._shuffle: 23 | self._dataset.shuffle() 24 | 25 | while True: 26 | 27 | input_npy = np.zeros(self._batch_size, dtype=[('user_id_input', np.int32), 28 | ('item_id_input', np.int32), 29 | ('labels', np.float32)]) 30 | 31 | if self._state + self._batch_size >= len(self._dataset.data): 32 | break 33 | 34 | for ind in range(self._batch_size): 35 | entry = self._dataset.data[self._state + ind] 36 | input_npy[ind] = (entry['user_id'], entry['item_id'], entry['label']) 37 | 38 | self._state += self._batch_size 39 | self._q.put(input_npy, block=True) 40 | 41 | class ExplicitSampler(Sampler): 42 | 43 | def __init__(self, dataset, batch_size, num_process=5, chronological=False): 44 | 45 | self._chronological = chronological 46 | 47 | if self._chronological: 48 | num_process = 1 49 | 50 | super(ExplicitSampler, self).__init__(dataset=dataset, batch_size=batch_size, num_process=num_process) 51 | 52 | def _get_runner(self): 53 | 54 | return _ExplicitSampler(dataset=self._dataset, 55 | batch_size=self._batch_size, 56 | q=self._q, 57 | shuffle=not self._chronological, 58 | loop=not self._chronological) 59 | 60 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/samplers/n_pairwise_sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import random 4 | from multiprocessing import Process 5 | from openrec.tf1.legacy.utils.samplers import Sampler 6 | 7 | class _NPairwiseSampler(Process): 8 | 9 | def __init__(self, dataset, batch_size, q, chronological, negativenum): 10 | self._dataset = dataset 11 | self._batch_size = batch_size 12 | self._q = q 13 | self._state = 0 14 | self._chronological = chronological 15 | self._negativenum = negativenum 16 | 17 | if not chronological: 18 | self._dataset.shuffle() 19 | 20 | super(_NPairwiseSampler, self).__init__() 21 | 22 | def run(self): 23 | while True: 24 | 25 | input_npy = np.zeros(self._batch_size, dtype=[('user_id_input', np.int32), 26 | ('p_item_id_input', np.int32), 27 | ('n_item_id_inputs', np.ndarray)]) 28 | 29 | if self._state + self._batch_size >= len(self._dataset.data): 30 | if not self._chronological: 31 | self._state = 0 32 | self._dataset.shuffle() 33 | else: 34 | break 35 | 36 | for sample_itr, entry in enumerate(self._dataset.data[self._state:(self._state + self._batch_size)]): 37 | neg_ids = set() 38 | while len(neg_ids) < self._negativenum: 39 | neg_id = int(random.random() * (self._dataset.max_item() - 1)) 40 | while neg_id in self._dataset.get_interactions_by_user_gb_item(entry['user_id']): 41 | neg_id = int(random.random() * (self._dataset.max_item() - 1)) 42 | neg_ids.add(neg_id) 43 | input_npy[sample_itr] = (entry['user_id'], entry['item_id'], np.array(list(neg_ids))) 44 | 45 | self._state += self._batch_size 46 | self._q.put(input_npy, block=True) 47 | 48 | 49 | class NPairwiseSampler(Sampler): 50 | 51 | def __init__(self, dataset, batch_size, chronological=False, negativenum=10, num_process=5, seed=0): 52 | 53 | self._chronological = chronological 54 | self._negativenum = negativenum 55 | if chronological: 56 | num_process = 1 57 | random.seed(seed) 58 | super(NPairwiseSampler, self).__init__(dataset=dataset, batch_size=batch_size, num_process=num_process) 59 | 60 | def _get_runner(self): 61 | 62 | return _NPairwiseSampler(dataset=self._dataset, 63 | batch_size=self._batch_size, 64 | q=self._q, chronological=self._chronological, negativenum=self._negativenum) 65 | 66 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/samplers/pairwise_sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import random 4 | from multiprocessing import Process 5 | from openrec.tf1.legacy.utils.samplers import Sampler 6 | 7 | class _PairwiseSampler(Process): 8 | 9 | def __init__(self, dataset, batch_size, q, chronological): 10 | self._dataset = dataset 11 | self._batch_size = batch_size 12 | self._q = q 13 | self._state = 0 14 | self._chronological = chronological 15 | 16 | if not chronological: 17 | self._dataset.shuffle() 18 | 19 | super(_PairwiseSampler, self).__init__() 20 | 21 | def run(self): 22 | while True: 23 | 24 | input_npy = np.zeros(self._batch_size, dtype=[('user_id_input', np.int32), 25 | ('p_item_id_input', np.int32), 26 | ('n_item_id_input', np.int32)]) 27 | 28 | if self._state + self._batch_size >= len(self._dataset.data): 29 | if not self._chronological: 30 | self._state = 0 31 | self._dataset.shuffle() 32 | else: 33 | break 34 | 35 | for sample_itr, entry in enumerate(self._dataset.data[self._state:(self._state + self._batch_size)]): 36 | neg_id = int(random.random() * (self._dataset.max_item() - 1)) 37 | while neg_id in self._dataset.get_interactions_by_user_gb_item(entry['user_id']): 38 | neg_id = int(random.random() * (self._dataset.max_item() - 1)) 39 | input_npy[sample_itr] = (entry['user_id'], entry['item_id'], neg_id) 40 | 41 | self._state += self._batch_size 42 | self._q.put(input_npy, block=True) 43 | 44 | 45 | class PairwiseSampler(Sampler): 46 | 47 | def __init__(self, dataset, batch_size, chronological=False, num_process=5, seed=0): 48 | 49 | self._chronological = chronological 50 | if chronological: 51 | num_process = 1 52 | random.seed(seed) 53 | super(PairwiseSampler, self).__init__(dataset=dataset, batch_size=batch_size, num_process=num_process) 54 | 55 | def _get_runner(self): 56 | 57 | return _PairwiseSampler(dataset=self._dataset, 58 | batch_size=self._batch_size, 59 | q=self._q, chronological=self._chronological) 60 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/samplers/pointwise_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from multiprocessing import Process 4 | from openrec.tf1.legacy.utils.samplers import Sampler 5 | 6 | class _PointwiseSampler(Process): 7 | 8 | def __init__(self, dataset, batch_size, pos_ratio, q, chronological=False): 9 | 10 | self._dataset = dataset 11 | self._batch_size = batch_size 12 | self._num_pos = int(batch_size * pos_ratio) 13 | 14 | self._user_list = self._dataset.get_unique_user_list() 15 | self._q = q 16 | self._state = 0 17 | self._chronological = chronological 18 | 19 | if not chronological: 20 | self._dataset.shuffle() 21 | super(_PointwiseSampler, self).__init__() 22 | 23 | 24 | def run(self): 25 | while True: 26 | 27 | input_npy = np.zeros(self._batch_size, dtype=[('user_id_input', np.int32), 28 | ('item_id_input', np.int32), 29 | ('labels', np.float32)]) 30 | 31 | if self._state + self._num_pos >= len(self._dataset.data): 32 | if not self._chronological: 33 | self._state = 0 34 | self._dataset.shuffle() 35 | else: 36 | break 37 | 38 | for ind in range(self._num_pos): 39 | entry = self._dataset.data[self._state + ind] 40 | input_npy[ind] = (entry['user_id'], entry['item_id'], 1.0) 41 | 42 | for ind in range(self._batch_size - self._num_pos): 43 | user_ind = int(random.random() * (len(self._user_list) - 1)) 44 | user_id = self._user_list[user_ind] 45 | neg_id = int(random.random() * (self._dataset.max_item() - 1)) 46 | 47 | while neg_id in self._dataset.get_interactions_by_user_gb_item(user_id): 48 | neg_id = int(random.random() * (self._dataset.max_item() - 1)) 49 | input_npy[ind + self._num_pos] = (user_id, neg_id, 0.0) 50 | 51 | self._state += self._num_pos 52 | self._q.put(input_npy, block=True) 53 | 54 | 55 | class PointwiseSampler(Sampler): 56 | 57 | def __init__(self, dataset, batch_size, pos_ratio=0.5, num_process=5, chronological=False, seed=0): 58 | 59 | self._pos_ratio = pos_ratio 60 | self._chronological = chronological 61 | 62 | if chronological: 63 | num_process = 1 64 | random.seed(seed) 65 | super(PointwiseSampler, self).__init__(dataset=dataset, batch_size=batch_size, num_process=num_process) 66 | 67 | def _get_runner(self): 68 | 69 | return _PointwiseSampler(dataset=self._dataset, 70 | pos_ratio=self._pos_ratio, 71 | batch_size=self._batch_size, 72 | q=self._q, chronological=self._chronological) 73 | -------------------------------------------------------------------------------- /openrec/tf1/legacy/utils/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Process, Queue 2 | 3 | class Sampler(object): 4 | 5 | def __init__(self, dataset, batch_size, num_process=5): 6 | 7 | self._dataset = dataset 8 | self._batch_size = batch_size 9 | self._q = Queue(maxsize=5) 10 | self._runner_list = [] 11 | 12 | for i in range(num_process): 13 | runner = self._get_runner() 14 | runner.daemon = True 15 | runner.start() 16 | self._runner_list.append(runner) 17 | 18 | def _get_runner(self): 19 | 20 | return None 21 | 22 | def next_batch(self): 23 | 24 | return self._q.get(block=True) 25 | -------------------------------------------------------------------------------- /openrec/tf1/modules/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /openrec/tf1/modules/extractions/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.modules.extractions.latent_factor import LatentFactor 2 | from openrec.tf1.modules.extractions.multi_layer_fc import MultiLayerFC 3 | -------------------------------------------------------------------------------- /openrec/tf1/modules/extractions/latent_factor.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def LatentFactor(shape, id_=None, l2_reg=None, init='normal', 4 | subgraph=None, scope=None): 5 | 6 | if init == 'normal': 7 | initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01, dtype=tf.float32) 8 | elif init == 'zero': 9 | initializer = tf.constant_initializer(value=0.0, dtype=tf.float32) 10 | else: 11 | initializer = tf.constant_initializer(value=init, dtype=tf.float32) 12 | 13 | with tf.variable_scope(scope, default_name='latentfactor', reuse=tf.AUTO_REUSE): 14 | embedding = tf.get_variable('embedding', shape=shape, trainable=True, 15 | initializer=initializer) 16 | if id_ is None: 17 | output = None 18 | else: 19 | output = tf.nn.embedding_lookup(embedding, id_) 20 | 21 | if l2_reg is not None: 22 | subgraph.register_global_loss(l2_reg * tf.nn.l2_loss(output)) 23 | 24 | return embedding, output -------------------------------------------------------------------------------- /openrec/tf1/modules/fusions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylongqi/openrec/a00de2345844858194ef43ab6845342114a5be93/openrec/tf1/modules/fusions/__init__.py -------------------------------------------------------------------------------- /openrec/tf1/modules/interactions/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.modules.interactions.pairwise_log import PairwiseLog 2 | from openrec.tf1.modules.interactions.pairwise_eu_dist import PairwiseEuDist 3 | from openrec.tf1.modules.interactions.pointwise_mse import PointwiseMSE 4 | from openrec.tf1.modules.interactions.mlp_softmax import MLPSoftmax 5 | from openrec.tf1.modules.interactions.rnn_softmax import RNNSoftmax 6 | from openrec.tf1.modules.interactions.pointwise_mlp_ce import PointwiseMLPCE 7 | -------------------------------------------------------------------------------- /openrec/tf1/modules/interactions/mlp_softmax.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.modules.extractions import MultiLayerFC 3 | 4 | 5 | def MLPSoftmax(user, item, seq_len, max_seq_len, dims, subgraph, item_bias=None, extra=None, 6 | l2_reg=None, labels=None, dropout=None, train=None, scope=None): 7 | 8 | with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): 9 | 10 | # average item vectors user interacted with 11 | seq_mask = tf.sequence_mask(seq_len, max_seq_len, dtype=tf.float32) 12 | item = tf.reduce_mean(item * tf.expand_dims(seq_mask, axis=2), axis=1) 13 | 14 | if user is not None: 15 | in_tensor = tf.concat([user, item], axis=1) 16 | else: 17 | in_tensor = tf.concat([item], axis=1) 18 | 19 | if extra is not None: 20 | in_tensor = tf.concat([in_tensor, extra], axis=1) 21 | 22 | if train: 23 | logits = MultiLayerFC(in_tensor=in_tensor, 24 | dims=dims, 25 | subgraph=subgraph, 26 | bias_in=True, 27 | bias_mid=True, 28 | bias_out=False, 29 | dropout_mid=dropout, 30 | l2_reg=l2_reg, 31 | scope='mlp_reg') 32 | else: 33 | logits = MultiLayerFC(in_tensor=in_tensor, 34 | dims=dims, 35 | subgraph=subgraph, 36 | bias_in=True, 37 | bias_mid=True, 38 | bias_out=False, 39 | l2_reg=l2_reg, 40 | scope='mlp_reg') 41 | 42 | if item_bias is not None: 43 | logits += item_bias 44 | 45 | if train: 46 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, 47 | logits=logits) 48 | subgraph.register_global_loss(tf.reduce_mean(loss)) 49 | else: 50 | subgraph.register_global_output(logits) 51 | -------------------------------------------------------------------------------- /openrec/tf1/modules/interactions/pairwise_eu_dist.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def PairwiseEuDist(user_vec, subgraph, item_vec=None, item_bias=None, p_item_vec=None, 4 | p_item_bias=None, n_item_vec=None, n_item_bias=None, 5 | train=True, weights=1.0, margin=1.0, scope='PointwiseMSE'): 6 | 7 | if train: 8 | l2_user_pos = tf.reduce_sum(tf.square(tf.subtract(user_vec, p_item_vec)), 9 | reduction_indices=1, 10 | keepdims=True, name="l2_user_pos") 11 | l2_user_neg = tf.reduce_sum(tf.square(tf.subtract(user_vec, n_item_vec)), 12 | reduction_indices=1, 13 | keepdims=True, name="l2_user_neg") 14 | pos_score = (-l2_user_pos) + p_item_bias 15 | neg_score = (-l2_user_neg) + n_item_bias 16 | diff = pos_score - neg_score 17 | loss = tf.reduce_sum(weights * tf.maximum(margin - diff, 0)) 18 | subgraph.register_global_loss(loss) 19 | else: 20 | predictions = -tf.reduce_sum(tf.square(tf.subtract(user_vec, item_vec)), 21 | reduction_indices=1, 22 | keepdims=True, name="l2_user_pos") + item_bias 23 | subgraph.register_global_output(predictions) -------------------------------------------------------------------------------- /openrec/tf1/modules/interactions/pairwise_log.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def PairwiseLog(user_vec, subgraph, item_vec=None, item_bias=None, p_item_vec=None, 4 | p_item_bias=None, n_item_vec=None, n_item_bias=None, 5 | train=True, scope='PointwiseMSE'): 6 | 7 | if train: 8 | dot_user_pos = tf.reduce_sum(tf.multiply(user_vec, p_item_vec), 9 | reduction_indices=1, 10 | keepdims=True, 11 | name="dot_user_pos") 12 | dot_user_neg = tf.reduce_sum(tf.multiply(user_vec, n_item_vec), 13 | reduction_indices=1, 14 | keepdims=True, 15 | name="dot_user_neg") 16 | loss = - tf.reduce_sum(tf.log(tf.sigmoid(tf.maximum(dot_user_pos + p_item_bias - 17 | dot_user_neg - n_item_bias, 18 | -30.0)))) 19 | subgraph.register_global_loss(loss) 20 | else: 21 | predictions = tf.reduce_sum(tf.multiply(user_vec, item_vec), 22 | reduction_indices=1, 23 | keepdims=False) + tf.reshape(item_bias, [-1]) 24 | subgraph.register_global_output(predictions) -------------------------------------------------------------------------------- /openrec/tf1/modules/interactions/pointwise_mlp_ce.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.modules.extractions import MultiLayerFC 3 | 4 | 5 | def PointwiseMLPCE(user, item, dims, subgraph, item_bias=None, extra=None, 6 | l2_reg=None, labels=None, dropout=None, train=None, scope=None): 7 | 8 | with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): 9 | 10 | if extra is not None: 11 | in_tensor = tf.concat([user, item, extra], axis=1) 12 | else: 13 | in_tensor = tf.concat([user, item], axis=1) 14 | if train: 15 | reg = MultiLayerFC( 16 | in_tensor=in_tensor, 17 | dims=dims, 18 | subgraph=subgraph, 19 | bias_in=True, 20 | bias_mid=True, 21 | bias_out=False, 22 | dropout_mid=dropout, 23 | l2_reg=l2_reg, 24 | scope='mlp_reg') 25 | else: 26 | reg = MultiLayerFC(in_tensor=in_tensor, 27 | dims=dims, 28 | subgraph=subgraph, 29 | bias_in=True, 30 | bias_mid=True, 31 | bias_out=False, 32 | l2_reg=l2_reg, 33 | scope='mlp_reg') 34 | 35 | logits = reg#.get_outputs()[0] 36 | if item_bias is not None: 37 | logits += item_bias 38 | 39 | if train: 40 | labels_float = tf.reshape(tf.to_float(labels), (-1, 1)) 41 | subgraph.register_global_loss(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_float, logits=logits))) 42 | subgraph.register_global_output(logits) 43 | else: 44 | subgraph.register_global_output(tf.sigmoid(logits)) 45 | -------------------------------------------------------------------------------- /openrec/tf1/modules/interactions/pointwise_mse.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import tensorflow as tf 3 | from termcolor import colored 4 | 5 | def PointwiseMSE(user_vec, item_vec, item_bias, subgraph, 6 | label=None, a=1.0, b=1.0, sigmoid=False, 7 | train=True, scope='PointwiseMSE'): 8 | 9 | dot_user_item = tf.reduce_sum(tf.multiply(user_vec, item_vec), 10 | axis=1, keepdims=False, name="dot_user_item") 11 | 12 | if sigmoid: 13 | prediction = tf.sigmoid(dot_user_item + tf.reshape(item_bias, [-1])) 14 | else: 15 | prediction = dot_user_item + tf.reshape(item_bias, [-1]) 16 | 17 | if train: 18 | label_weight = (a - b) * label + b 19 | subgraph.register_global_loss(tf.reduce_sum(label_weight * tf.square(label - prediction))) 20 | subgraph.register_global_output(prediction) -------------------------------------------------------------------------------- /openrec/tf1/modules/interactions/rnn_softmax.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def RNNSoftmax(seq_item_vec, total_items, seq_len, num_units, cell_type='gru', softmax_samples=None, 4 | label=None, train=True, subgraph=None, scope=None): 5 | 6 | with tf.variable_scope(scope, default_name='RNNSoftmax', reuse=tf.AUTO_REUSE): 7 | if cell_type == 'gru': 8 | rnn_cell = tf.nn.rnn_cell.GRUCell(num_units) 9 | elif cell_type == 'lstm': 10 | rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units) 11 | else: 12 | assert False, "Invalid RNN cell type." 13 | 14 | _, rnn_state = tf.nn.dynamic_rnn(cell=rnn_cell, 15 | inputs=seq_item_vec, 16 | sequence_length=seq_len, 17 | dtype=tf.float32) 18 | weight = tf.get_variable('weights', shape=[total_items, num_units], trainable=True, 19 | initializer=tf.contrib.layers.xavier_initializer()) 20 | bias = tf.get_variable('biases', shape=[total_items], trainable=True, 21 | initializer=tf.zeros_initializer()) 22 | if train: 23 | if softmax_samples is not None: 24 | loss = tf.nn.sampled_sparse_softmax_loss(weight=weight, bias=bias, num_sampled=softmax_samples, 25 | num_classes=total_items, labels=label, inputs=rnn_state) 26 | else: 27 | logits = tf.matmul(rnn_state, tf.transpose(weight)) + bias 28 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits) 29 | subgraph.register_global_loss(tf.reduce_mean(loss)) 30 | else: 31 | logits = tf.matmul(rnn_state, tf.transpose(weight)) + bias 32 | subgraph.register_global_output(tf.squeeze(logits)) -------------------------------------------------------------------------------- /openrec/tf1/recommenders/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.recommenders.recommender import Recommender 2 | from openrec.tf1.recommenders.bpr import BPR 3 | from openrec.tf1.recommenders.pmf import PMF 4 | from openrec.tf1.recommenders.ucml import UCML 5 | from openrec.tf1.recommenders.vbpr import VBPR 6 | from openrec.tf1.recommenders.vanilla_youtube_rec import VanillaYouTubeRec 7 | from openrec.tf1.recommenders.youtube_rec import YouTubeRec 8 | from openrec.tf1.recommenders.rnn_rec import RNNRec 9 | 10 | -------------------------------------------------------------------------------- /openrec/tf1/recommenders/pmf.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.recommenders import Recommender 2 | from openrec.tf1.modules.extractions import LatentFactor 3 | from openrec.tf1.modules.interactions import PointwiseMSE 4 | import tensorflow as tf 5 | 6 | def PMF(batch_size, dim_user_embed, dim_item_embed, total_users, total_items, a=1.0, b=1.0, l2_reg=None, 7 | init_model_dir=None, save_model_dir='Recommender/', train=True, serve=False): 8 | 9 | rec = Recommender(init_model_dir=init_model_dir, save_model_dir=save_model_dir, 10 | train=train, serve=serve) 11 | 12 | t = rec.traingraph 13 | s = rec.servegraph 14 | 15 | @t.inputgraph(outs=['user_id', 'item_id', 'label']) 16 | def train_input_graph(subgraph): 17 | subgraph['user_id'] = tf.placeholder(tf.int32, shape=[batch_size], name='user_id') 18 | subgraph['item_id'] = tf.placeholder(tf.int32, shape=[batch_size], name='item_id') 19 | subgraph['label'] = tf.placeholder(tf.float32, shape=[batch_size], name='label') 20 | subgraph.register_global_input_mapping({'user_id': subgraph['user_id'], 21 | 'item_id': subgraph['item_id'], 22 | 'label': subgraph['label']}) 23 | 24 | @s.inputgraph(outs=['user_id', 'item_id']) 25 | def serve_input_graph(subgraph): 26 | subgraph['user_id'] = tf.placeholder(tf.int32, shape=[None], name='user_id') 27 | subgraph['item_id'] = tf.placeholder(tf.int32, shape=[None], name='item_id') 28 | subgraph.register_global_input_mapping({'user_id': subgraph['user_id'], 29 | 'item_id': subgraph['item_id']}) 30 | 31 | @t.usergraph(ins=['user_id'], outs=['user_vec']) 32 | @s.usergraph(ins=['user_id'], outs=['user_vec']) 33 | def user_graph(subgraph): 34 | _, subgraph['user_vec'] = LatentFactor(l2_reg=l2_reg, 35 | init='normal', 36 | id_=subgraph['user_id'], 37 | shape=[total_users, dim_user_embed], 38 | subgraph=subgraph, 39 | scope='user') 40 | 41 | @t.itemgraph(ins=['item_id'], outs=['item_vec', 'item_bias']) 42 | @s.itemgraph(ins=['item_id'], outs=['item_vec', 'item_bias']) 43 | def item_graph(subgraph): 44 | _, subgraph['item_vec'] = LatentFactor(l2_reg=l2_reg, init='normal', id_=subgraph['item_id'], 45 | shape=[total_items, dim_item_embed], subgraph=subgraph, scope='item') 46 | _, subgraph['item_bias'] = LatentFactor(l2_reg=l2_reg, init='zero', id_=subgraph['item_id'], 47 | shape=[total_items, 1], subgraph=subgraph, scope='item_bias') 48 | 49 | @t.interactiongraph(ins=['user_vec', 'item_vec', 'item_bias', 'label']) 50 | def interaction_graph(subgraph): 51 | PointwiseMSE(user_vec=subgraph['user_vec'], 52 | item_vec=subgraph['item_vec'], 53 | item_bias=subgraph['item_bias'], 54 | label=subgraph['label'], 55 | a=a, b=b, sigmoid=False, 56 | train=True, subgraph=subgraph, scope='PointwiseMSE') 57 | 58 | @s.interactiongraph(ins=['user_vec', 'item_vec', 'item_bias']) 59 | def serve_interaction_graph(subgraph): 60 | PointwiseMSE(user_vec=subgraph['user_vec'], 61 | item_vec=subgraph['item_vec'], 62 | item_bias=subgraph['item_bias'], 63 | a=a, b=b, sigmoid=False, 64 | train=False, subgraph=subgraph, scope='PointwiseMSE') 65 | 66 | @t.optimizergraph 67 | def optimizer_graph(subgraph): 68 | losses = tf.add_n(subgraph.get_global_losses()) 69 | optimizer = tf.train.AdamOptimizer(learning_rate=0.001) 70 | subgraph.register_global_operation(optimizer.minimize(losses)) 71 | 72 | @t.connector 73 | @s.connector 74 | def connect(graph): 75 | graph.usergraph['user_id'] = graph.inputgraph['user_id'] 76 | graph.itemgraph['item_id'] = graph.inputgraph['item_id'] 77 | graph.interactiongraph['user_vec'] = graph.usergraph['user_vec'] 78 | graph.interactiongraph['item_vec'] = graph.itemgraph['item_vec'] 79 | graph.interactiongraph['item_bias'] = graph.itemgraph['item_bias'] 80 | 81 | @t.connector.extend 82 | def connect_label(graph): 83 | graph.interactiongraph['label'] = graph.inputgraph['label'] 84 | 85 | return rec -------------------------------------------------------------------------------- /openrec/tf1/recommenders/rnn_rec.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.recommenders import Recommender 2 | from openrec.tf1.modules.extractions import LatentFactor 3 | from openrec.tf1.modules.interactions import RNNSoftmax 4 | import tensorflow as tf 5 | 6 | def RNNRec(batch_size, dim_item_embed, max_seq_len, total_items, num_units, l2_reg=None, 7 | init_model_dir=None, save_model_dir='Recommender/', train=True, serve=False): 8 | 9 | rec = Recommender(init_model_dir=init_model_dir, save_model_dir=save_model_dir, 10 | train=train, serve=serve) 11 | 12 | @rec.traingraph.inputgraph(outs=['seq_item_id', 'seq_len', 'label']) 13 | def f(subgraph): 14 | subgraph['seq_item_id'] = tf.placeholder(tf.int32, shape=[batch_size, max_seq_len], name='seq_item_id') 15 | subgraph['seq_len'] = tf.placeholder(tf.int32, shape=[batch_size], name='seq_len') 16 | subgraph['label'] = tf.placeholder(tf.int32, shape=[batch_size], name='label') 17 | subgraph.register_global_input_mapping({'seq_item_id': subgraph['seq_item_id'], 18 | 'seq_len': subgraph['seq_len'], 19 | 'label': subgraph['label']}) 20 | 21 | @rec.servegraph.inputgraph(outs=['seq_item_id', 'seq_len']) 22 | def f(subgraph): 23 | subgraph['seq_item_id'] = tf.placeholder(tf.int32, shape=[None, max_seq_len], name='seq_item_id') 24 | subgraph['seq_len'] = tf.placeholder(tf.int32, shape=[None], name='seq_len') 25 | subgraph.register_global_input_mapping({'seq_item_id': subgraph['seq_item_id'], 26 | 'seq_len': subgraph['seq_len']}) 27 | 28 | @rec.traingraph.itemgraph(outs=['seq_item_vec'], ins=['seq_item_id']) 29 | @rec.servegraph.itemgraph(outs=['seq_item_vec'], ins=['seq_item_id']) 30 | def f(subgraph): 31 | _, subgraph['seq_item_vec'] = LatentFactor(l2_reg=l2_reg, 32 | init='normal', 33 | id_=subgraph['seq_item_id'], 34 | shape=[total_items, dim_item_embed], 35 | subgraph=subgraph, 36 | scope='item') 37 | 38 | @rec.traingraph.interactiongraph(ins=['seq_item_vec', 'seq_len', 'label']) 39 | def f(subgraph): 40 | RNNSoftmax(seq_item_vec=subgraph['seq_item_vec'], seq_len=subgraph['seq_len'], 41 | num_units=num_units, total_items=total_items, label=subgraph['label'], train=True, 42 | subgraph=subgraph, scope='RNNSoftmax') 43 | 44 | @rec.servegraph.interactiongraph(ins=['seq_item_vec', 'seq_len']) 45 | def f(subgraph): 46 | RNNSoftmax(seq_item_vec=subgraph['seq_item_vec'], seq_len=subgraph['seq_len'], 47 | num_units=num_units, total_items=total_items, train=False, 48 | subgraph=subgraph, scope='RNNSoftmax') 49 | 50 | @rec.traingraph.optimizergraph 51 | def f(subgraph): 52 | losses = tf.add_n(subgraph.get_global_losses()) 53 | optimizer = tf.train.AdamOptimizer(learning_rate=0.001) 54 | subgraph.register_global_operation(optimizer.minimize(losses)) 55 | 56 | @rec.traingraph.connector 57 | @rec.servegraph.connector 58 | def f(graph): 59 | graph.itemgraph['seq_item_id'] = graph.inputgraph['seq_item_id'] 60 | graph.interactiongraph['seq_item_vec'] = graph.itemgraph['seq_item_vec'] 61 | graph.interactiongraph['seq_len'] = graph.inputgraph['seq_len'] 62 | 63 | @rec.traingraph.connector.extend 64 | def f(graph): 65 | graph.interactiongraph['label'] = graph.inputgraph['label'] 66 | 67 | return rec -------------------------------------------------------------------------------- /openrec/tf1/recommenders/ucml.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from openrec.tf1.recommenders import BPR 3 | from openrec.tf1.modules.interactions import PairwiseEuDist 4 | from openrec.tf1.modules.extractions import LatentFactor 5 | 6 | def UCML(batch_size, dim_user_embed, dim_item_embed, total_users, total_items, l2_reg=None, 7 | init_model_dir=None, save_model_dir='Recommender/', train=True, serve=False): 8 | 9 | rec = BPR(batch_size=batch_size, 10 | dim_user_embed=dim_user_embed, 11 | dim_item_embed=dim_item_embed, 12 | total_users=total_users, 13 | total_items=total_items, 14 | l2_reg=l2_reg, 15 | init_model_dir=init_model_dir, 16 | save_model_dir=save_model_dir, 17 | train=train, 18 | serve=serve) 19 | 20 | t = rec.traingraph 21 | s = rec.servegraph 22 | 23 | def censor_vec(embedding, censor_id): 24 | unique_censor_id, _ = tf.unique(censor_id) 25 | embedding_gather = tf.gather(embedding, indices=unique_censor_id) 26 | norm = tf.sqrt(tf.reduce_sum(tf.square(embedding_gather), axis=1, keepdims=True)) 27 | return tf.scatter_update(embedding, indices=unique_censor_id, updates=embedding_gather / tf.maximum(norm, 1.0)) 28 | 29 | @t.usergraph.extend 30 | def censor_user_vec(subgraph): 31 | user_embedding, _ = LatentFactor(l2_reg=None, 32 | init='normal', 33 | id_=None, 34 | shape=[total_users, dim_user_embed], 35 | scope='user') 36 | user_censor_ops = censor_vec(user_embedding, subgraph['user_id']) 37 | subgraph.register_global_operation(user_censor_ops, 'censor_embedding') 38 | 39 | @t.itemgraph.extend 40 | def censor_item_vec(subgraph): 41 | item_embedding, _ = LatentFactor(l2_reg=None, 42 | init='normal', 43 | id_=None, 44 | shape=[total_items, dim_item_embed], 45 | subgraph=subgraph, 46 | scope='item') 47 | item_censor_ops = censor_vec(item_embedding, tf.concat([subgraph['p_item_id'], subgraph['n_item_id']], axis=0)) 48 | subgraph.register_global_operation(item_censor_ops, 'censor_embedding') 49 | 50 | @t.interactiongraph(ins=['user_vec', 'p_item_vec', 'n_item_vec', 'p_item_bias', 'n_item_bias']) 51 | def interaction_graph(subgraph): 52 | PairwiseEuDist(user_vec=subgraph['user_vec'], 53 | p_item_vec=subgraph['p_item_vec'], 54 | n_item_vec=subgraph['n_item_vec'], 55 | p_item_bias=subgraph['p_item_bias'], 56 | n_item_bias=subgraph['n_item_bias'], 57 | subgraph=subgraph, 58 | train=True, 59 | scope='PairwiseEuDist') 60 | 61 | @s.interactiongraph(ins=['user_vec', 'item_vec', 'item_bias']) 62 | def serving_interaction_graph(subgraph): 63 | PairwiseEuDist(user_vec=subgraph['user_vec'], 64 | item_vec=subgraph['item_vec'], 65 | item_bias=subgraph['item_bias'], 66 | train=False, 67 | subgraph=subgraph, 68 | scope='PairwiseEuDist') 69 | 70 | return rec 71 | -------------------------------------------------------------------------------- /openrec/tf1/recommenders/vanilla_youtube_rec.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.recommenders import Recommender 2 | from openrec.tf1.modules.extractions import LatentFactor, MultiLayerFC 3 | from openrec.tf1.modules.interactions import MLPSoftmax 4 | import tensorflow as tf 5 | 6 | def VanillaYouTubeRec(batch_size, dim_item_embed, max_seq_len, total_items, 7 | l2_reg_embed=None, l2_reg_mlp=None, dropout=None, init_model_dir=None, 8 | save_model_dir='Vanilla_YouTubeRec/', train=True, serve=False): 9 | 10 | rec = Recommender(init_model_dir=init_model_dir, save_model_dir=save_model_dir, 11 | train=train, serve=serve) 12 | 13 | 14 | @rec.traingraph.inputgraph(outs=['seq_item_id', 'seq_len', 'label']) 15 | def train_input_graph(subgraph): 16 | subgraph['seq_item_id'] = tf.placeholder(tf.int32, shape=[batch_size, max_seq_len], name='seq_item_id') 17 | subgraph['seq_len'] = tf.placeholder(tf.int32, shape=[batch_size], name='seq_len') 18 | subgraph['label'] = tf.placeholder(tf.int32, shape=[batch_size], name='label') 19 | subgraph.register_global_input_mapping({'seq_item_id': subgraph['seq_item_id'], 20 | 'seq_len': subgraph['seq_len'], 21 | 'label': subgraph['label']}) 22 | 23 | 24 | @rec.servegraph.inputgraph(outs=['seq_item_id', 'seq_len']) 25 | def serve_input_graph(subgraph): 26 | subgraph['seq_item_id'] = tf.placeholder(tf.int32, shape=[None, max_seq_len], name='seq_item_id') 27 | subgraph['seq_len'] = tf.placeholder(tf.int32, shape=[None], name='seq_len') 28 | subgraph.register_global_input_mapping({'seq_item_id': subgraph['seq_item_id'], 29 | 'seq_len': subgraph['seq_len']}) 30 | 31 | 32 | @rec.traingraph.itemgraph(ins=['seq_item_id', 'seq_len'], outs=['seq_item_vec']) 33 | @rec.servegraph.itemgraph(ins=['seq_item_id', 'seq_len'], outs=['seq_item_vec']) 34 | def item_graph(subgraph): 35 | _, subgraph['seq_item_vec']= LatentFactor(l2_reg=l2_reg_embed, 36 | init='normal', 37 | id_=subgraph['seq_item_id'], 38 | shape=[total_items,dim_item_embed], 39 | subgraph=subgraph, 40 | scope='item') 41 | 42 | 43 | @rec.traingraph.interactiongraph(ins=['seq_item_vec', 'seq_len', 'label']) 44 | def train_interaction_graph(subgraph): 45 | MLPSoftmax(user=None, 46 | item=subgraph['seq_item_vec'], 47 | seq_len=subgraph['seq_len'], 48 | max_seq_len=max_seq_len, 49 | dims=[dim_item_embed, total_items], 50 | l2_reg=l2_reg_mlp, 51 | labels=subgraph['label'], 52 | dropout=dropout, 53 | train=True, 54 | subgraph=subgraph, 55 | scope='MLPSoftmax') 56 | 57 | 58 | @rec.servegraph.interactiongraph(ins=['seq_item_vec', 'seq_len']) 59 | def serve_interaction_graph(subgraph): 60 | MLPSoftmax(user=None, 61 | item=subgraph['seq_item_vec'], 62 | seq_len=subgraph['seq_len'], 63 | max_seq_len=max_seq_len, 64 | dims=[dim_item_embed, total_items], 65 | l2_reg=l2_reg_mlp, 66 | train=False, 67 | subgraph=subgraph, 68 | scope='MLPSoftmax') 69 | 70 | @rec.traingraph.optimizergraph 71 | def optimizer_graph(subgraph): 72 | losses = tf.add_n(subgraph.get_global_losses()) 73 | optimizer = tf.train.AdamOptimizer(learning_rate=0.001) 74 | subgraph.register_global_operation(optimizer.minimize(losses)) 75 | 76 | 77 | @rec.traingraph.connector 78 | @rec.servegraph.connector 79 | def connect(graph): 80 | graph.itemgraph['seq_item_id'] = graph.inputgraph['seq_item_id'] 81 | graph.itemgraph['seq_len'] = graph.inputgraph['seq_len'] 82 | graph.interactiongraph['seq_len'] = graph.inputgraph['seq_len'] 83 | graph.interactiongraph['seq_item_vec'] = graph.itemgraph['seq_item_vec'] 84 | 85 | 86 | @rec.traingraph.connector.extend 87 | def train_connect(graph): 88 | graph.interactiongraph['label'] = graph.inputgraph['label'] 89 | 90 | 91 | return rec 92 | -------------------------------------------------------------------------------- /openrec/tf1/recommenders/vbpr.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.recommenders import BPR 2 | from openrec.tf1.modules.extractions import LatentFactor, MultiLayerFC 3 | import tensorflow as tf 4 | 5 | 6 | def VBPR(batch_size, dim_user_embed, dim_item_embed, dim_v, total_users, total_items, l2_reg_embed=None, 7 | l2_reg_mlp=None, init_model_dir=None, save_model_dir='Recommender/', train=True, serve=False): 8 | 9 | rec = BPR(batch_size=batch_size, 10 | dim_user_embed=dim_user_embed, 11 | dim_item_embed=dim_item_embed, 12 | total_users=total_users, 13 | total_items=total_items, 14 | l2_reg=l2_reg_embed, 15 | init_model_dir=init_model_dir, 16 | save_model_dir=save_model_dir, 17 | train=train, serve=serve) 18 | 19 | t = rec.traingraph 20 | s = rec.servegraph 21 | 22 | @t.inputgraph.extend(outs=['p_item_vfeature', 'n_item_vfeature']) 23 | def train_item_visual_features(subgraph): 24 | subgraph['p_item_vfeature'] = tf.placeholder(tf.float32, shape=[batch_size, dim_v], name='p_item_vfeature') 25 | subgraph['n_item_vfeature'] = tf.placeholder(tf.float32, shape=[batch_size, dim_v], name='n_item_vfeature') 26 | subgraph.update_global_input_mapping({'p_item_vfeature': subgraph['p_item_vfeature'], 27 | 'n_item_vfeature': subgraph['n_item_vfeature']}) 28 | 29 | @s.inputgraph.extend(outs=['item_vfeature']) 30 | def serving_item_visual_features(subgraph): 31 | subgraph['item_vfeature'] = tf.placeholder(tf.float32, shape=[None, dim_v], name='item_vfeature') 32 | subgraph.update_global_input_mapping({'item_vfeature': subgraph['item_vfeature']}) 33 | 34 | @t.itemgraph.extend(ins=['p_item_vfeature', 'n_item_vfeature']) 35 | def train_add_item_graph(subgraph): 36 | p_item_vout = MultiLayerFC(in_tensor=subgraph['p_item_vfeature'], l2_reg=l2_reg_mlp, subgraph=subgraph, 37 | dims=[dim_user_embed-dim_item_embed], scope='item_MLP') 38 | n_item_vout = MultiLayerFC(in_tensor=subgraph['n_item_vfeature'], l2_reg=l2_reg_mlp, subgraph=subgraph, 39 | dims=[dim_user_embed-dim_item_embed], scope='item_MLP') 40 | subgraph['p_item_vec'] = tf.concat([subgraph['p_item_vec'], p_item_vout], axis=1) 41 | subgraph['n_item_vec'] = tf.concat([subgraph['n_item_vec'], n_item_vout], axis=1) 42 | 43 | @s.itemgraph.extend(ins=['item_vfeature']) 44 | def serving_add_item_graph(subgraph): 45 | item_vout = MultiLayerFC(in_tensor=subgraph['item_vfeature'], l2_reg=l2_reg_mlp, subgraph=subgraph, 46 | dims=[dim_user_embed-dim_item_embed], scope='item_MLP') 47 | subgraph['item_vec'] = tf.concat([subgraph['item_vec'], item_vout], axis=1) 48 | 49 | @t.connector.extend 50 | def train_connect(graph): 51 | graph.itemgraph['p_item_vfeature'] = graph.inputgraph['p_item_vfeature'] 52 | graph.itemgraph['n_item_vfeature'] = graph.inputgraph['n_item_vfeature'] 53 | 54 | @s.connector.extend 55 | def serve_connect(graph): 56 | graph.itemgraph['item_vfeature'] = graph.inputgraph['item_vfeature'] 57 | 58 | return rec -------------------------------------------------------------------------------- /openrec/tf1/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.utils.dataset import Dataset 2 | -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.utils.evaluators.evaluator import Evaluator 2 | from openrec.tf1.utils.evaluators.auc import AUC 3 | from openrec.tf1.utils.evaluators.recall import Recall 4 | from openrec.tf1.utils.evaluators.eval_manager import EvalManager 5 | from openrec.tf1.utils.evaluators.mse import MSE 6 | from openrec.tf1.utils.evaluators.ndcg import NDCG 7 | from openrec.tf1.utils.evaluators.precision import Precision 8 | -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/auc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.utils.evaluators import Evaluator 3 | 4 | class AUC(Evaluator): 5 | 6 | def __init__(self, name='AUC'): 7 | 8 | super(AUC, self).__init__(etype='rank', name=name) 9 | 10 | def compute(self, rank_above, negative_num): 11 | 12 | return np.mean((negative_num - rank_above) / negative_num) -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/eval_manager.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class EvalManager(object): 4 | 5 | def __init__(self, evaluators=[]): 6 | 7 | self.evaluators = evaluators 8 | 9 | def _full_rank(self, pos_samples, excl_pos_samples, predictions): 10 | 11 | pos_samples_set = set(pos_samples) 12 | pos_samples = np.array(pos_samples, dtype=np.int32) 13 | pos_predictions = predictions[pos_samples] 14 | 15 | excl_pos_samples_set = set(excl_pos_samples) 16 | rank_above = np.zeros(len(pos_samples)) 17 | 18 | pos_samples_len = len(pos_samples) 19 | for ind in range(len(predictions)): 20 | if ind not in excl_pos_samples_set and ind not in pos_samples_set: 21 | for pos_ind in range(pos_samples_len): 22 | if pos_predictions[pos_ind] < predictions[ind]: 23 | rank_above[pos_ind] += 1 24 | 25 | return rank_above, len(predictions) - len(excl_pos_samples) - len(pos_samples) 26 | 27 | def _partial_rank(self, pos_scores, neg_scores): 28 | 29 | pos_scores = np.array(pos_scores) 30 | rank_above = np.zeros(len(pos_scores)) 31 | pos_scores_len = len(pos_scores) 32 | 33 | for score in neg_scores: 34 | for pos_ind in range(pos_scores_len): 35 | if pos_scores[pos_ind] < score: 36 | rank_above[pos_ind] += 1 37 | 38 | return rank_above, len(neg_scores) 39 | 40 | def full_eval(self, pos_samples, excl_pos_samples, predictions): 41 | 42 | results = {} 43 | rank_above, negative_num = self._full_rank(pos_samples, excl_pos_samples, predictions) 44 | for evaluator in self.evaluators: 45 | if evaluator.etype == 'rank': 46 | results[evaluator.name] = evaluator.compute(rank_above=rank_above, negative_num=negative_num) 47 | 48 | return results 49 | 50 | def partial_eval(self, pos_scores, neg_scores): 51 | 52 | results = {} 53 | rank_above, negative_num = self._partial_rank(pos_scores, neg_scores) 54 | for evaluator in self.evaluators: 55 | if evaluator.etype == 'rank': 56 | results[evaluator.name] = evaluator.compute(rank_above=rank_above, negative_num=negative_num) 57 | 58 | return results 59 | -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/evaluator.py: -------------------------------------------------------------------------------- 1 | 2 | class Evaluator(object): 3 | 4 | def __init__(self, etype, name): 5 | 6 | self.etype = etype 7 | self.name = name 8 | 9 | def compute(self): 10 | 11 | return None 12 | 13 | -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/mse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.utils.evaluators import Evaluator 3 | 4 | class MSE(Evaluator): 5 | 6 | def __init__(self, name='MSE'): 7 | 8 | super(MSE, self).__init__(etype='regression', name=name) 9 | 10 | def compute(self, predictions, labels): 11 | 12 | return np.square(predictions - labels) 13 | -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/ndcg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from math import log 3 | from openrec.tf1.utils.evaluators import Evaluator 4 | 5 | class NDCG(Evaluator): 6 | 7 | def __init__(self, ndcg_at, name='NDCG'): 8 | 9 | self._ndcg_at = np.array(ndcg_at) 10 | 11 | super(NDCG, self).__init__(etype='rank', name=name) 12 | 13 | def compute(self, rank_above, negative_num): 14 | 15 | del negative_num 16 | denominator = 0.0 17 | for i in range(len(rank_above)): 18 | denominator += 1.0 / log(i+2, 2) 19 | 20 | results = np.zeros(len(self._ndcg_at)) 21 | for r in rank_above: 22 | tmp = 1.0 / log(r+2, 2) 23 | results[r < self._ndcg_at] += tmp 24 | 25 | return results / denominator 26 | 27 | -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/precision.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.utils.evaluators import Evaluator 3 | 4 | class Precision(Evaluator): 5 | 6 | def __init__(self, precision_at, name='Precision'): 7 | 8 | self._precision_at = np.array(precision_at) 9 | 10 | super(Precision, self).__init__(etype='rank', name=name) 11 | 12 | def compute(self, rank_above, negative_num): 13 | 14 | del negative_num 15 | results = np.zeros(len(self._precision_at)) 16 | for rank in rank_above: 17 | results += (rank <= self._precision_at).astype(np.float32) 18 | 19 | return results / self._precision_at 20 | -------------------------------------------------------------------------------- /openrec/tf1/utils/evaluators/recall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.utils.evaluators import Evaluator 3 | 4 | class Recall(Evaluator): 5 | 6 | def __init__(self, recall_at, name='Recall'): 7 | 8 | self._recall_at = np.array(recall_at) 9 | 10 | super(Recall, self).__init__(etype='rank', name=name) 11 | 12 | def compute(self, rank_above, negative_num): 13 | 14 | del negative_num 15 | results = np.zeros(len(self._recall_at)) 16 | for rank in rank_above: 17 | results += (rank <= self._recall_at).astype(np.float32) 18 | 19 | return results / len(rank_above) 20 | -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf1.utils.samplers.sampler import Sampler 2 | from openrec.tf1.utils.samplers.random_pairwise_sampler import RandomPairwiseSampler 3 | from openrec.tf1.utils.samplers.vbpr_pairwise_sampler import VBPRPairwiseSampler 4 | from openrec.tf1.utils.samplers.random_pointwise_sampler import RandomPointwiseSampler 5 | from openrec.tf1.utils.samplers.stratified_pointwise_sampler import StratifiedPointwiseSampler 6 | from openrec.tf1.utils.samplers.evaluation_sampler import EvaluationSampler 7 | from openrec.tf1.utils.samplers.vbpr_evaluation_sampler import VBPREvaluationSampler 8 | from openrec.tf1.utils.samplers.temporal_sampler import TemporalSampler 9 | from openrec.tf1.utils.samplers.temporal_evaluation_sampler import TemporalEvaluationSampler 10 | from openrec.tf1.utils.samplers.youtube_sampler import YouTubeSampler 11 | from openrec.tf1.utils.samplers.youtube_evaluation_sampler import YouTubeEvaluationSampler 12 | 13 | -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/evaluation_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import math 4 | from openrec.tf1.utils.samplers import Sampler 5 | 6 | def EvaluationSampler(batch_size, dataset, seed=100): 7 | 8 | random.seed(seed) 9 | def batch(dataset, batch_size=batch_size): 10 | while True: 11 | for user_id in dataset.warm_users(): 12 | positive_items = dataset.get_positive_items(user_id) 13 | negative_items = dataset.get_negative_items(user_id) 14 | all_items = positive_items + negative_items 15 | 16 | for batch_ind in range(int(math.ceil(float(len(all_items)) / batch_size))): 17 | current_batch_size = min(len(all_items)-batch_ind*batch_size, batch_size) 18 | input_npy = np.zeros(current_batch_size, dtype=[('user_id', np.int32), 19 | ('item_id', np.int32)]) 20 | for inst_ind in range(current_batch_size): 21 | input_npy[inst_ind] = (user_id, all_items[batch_ind*batch_size+inst_ind]) 22 | num_positives = len(positive_items) - batch_ind*batch_size 23 | if num_positives > 0: 24 | yield range(num_positives), input_npy 25 | else: 26 | yield [], input_npy 27 | 28 | yield [], [] 29 | yield None, None 30 | 31 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=1) 32 | return s 33 | -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/random_pairwise_sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import random 4 | from openrec.tf1.utils.samplers import Sampler 5 | 6 | def RandomPairwiseSampler(dataset, batch_size, num_process=5, seed=100): 7 | 8 | random.seed(seed) 9 | def batch(dataset, batch_size=batch_size, seed=seed): 10 | 11 | while True: 12 | input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32), 13 | ('p_item_id', np.int32), 14 | ('n_item_id', np.int32)]) 15 | 16 | for ind in range(batch_size): 17 | entry = dataset.next_random_record() 18 | user_id = entry['user_id'] 19 | p_item_id = entry['item_id'] 20 | n_item_id = dataset.sample_negative_items(user_id)[0] 21 | input_npy[ind] = (user_id, p_item_id, n_item_id) 22 | yield input_npy 23 | 24 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) 25 | 26 | return s -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/random_pointwise_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from openrec.tf1.utils.samplers import Sampler 4 | 5 | def RandomPointwiseSampler(dataset, batch_size, num_process=5, seed=100): 6 | 7 | random.seed(seed) 8 | def batch(dataset, batch_size=batch_size): 9 | 10 | while True: 11 | input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32), 12 | ('item_id', np.int32), 13 | ('label', np.float32)]) 14 | 15 | for ind in range(batch_size): 16 | user_id = random.randint(0, dataset.total_users()-1) 17 | item_id = random.randint(0, dataset.total_items()-1) 18 | label = 1.0 if dataset.is_positive(user_id, item_id) else 0.0 19 | input_npy[ind] = (user_id, item_id, label) 20 | yield input_npy 21 | 22 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) 23 | 24 | return s -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Process, Queue 2 | 3 | class _Sampler(Process): 4 | 5 | def __init__(self, dataset, q, generate_batch): 6 | 7 | self._q = q 8 | self._generate_batch = generate_batch 9 | self._dataset = dataset 10 | super(_Sampler, self).__init__() 11 | 12 | def run(self): 13 | for input_npy in self._generate_batch(self._dataset): 14 | self._q.put(input_npy, block=True) 15 | 16 | class Sampler(object): 17 | 18 | def __init__(self, dataset=None, generate_batch=None, num_process=5): 19 | 20 | assert generate_batch is not None, "Batch generation function is not specified" 21 | assert dataset is not None, "Dataset is not specified" 22 | self._q = None 23 | self._dataset = dataset 24 | self._runner_list = [] 25 | self._start = False 26 | self._num_process = num_process 27 | self._generate_batch = generate_batch 28 | self.name = self._dataset.name 29 | 30 | def next_batch(self): 31 | 32 | if not self._start: 33 | self.reset() 34 | 35 | return self._q.get(block=True) 36 | 37 | def reset(self): 38 | 39 | while len(self._runner_list) > 0: 40 | runner = self._runner_list.pop() 41 | runner.terminate() 42 | del runner 43 | 44 | if self._q is not None: 45 | del self._q 46 | self._q = Queue(maxsize=self._num_process) 47 | 48 | for i in range(self._num_process): 49 | runner = _Sampler(self._dataset, self._q, self._generate_batch) 50 | runner.daemon = True 51 | runner.start() 52 | self._runner_list.append(runner) 53 | self._start = True -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/stratified_pointwise_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from openrec.tf1.utils.samplers import Sampler 4 | 5 | 6 | def StratifiedPointwiseSampler(dataset, batch_size, pos_ratio=0.5, num_process=5, seed=100): 7 | 8 | random.seed(seed) 9 | def batch(dataset, batch_size=batch_size, pos_ratio=pos_ratio, seed=seed): 10 | 11 | num_pos = int(batch_size * pos_ratio) 12 | while True: 13 | input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32), 14 | ('item_id', np.int32), 15 | ('label', np.float32)]) 16 | 17 | for ind in range(num_pos): 18 | entry = dataset.next_random_record() 19 | input_npy[ind] = (entry['user_id'], entry['item_id'], 1.0) 20 | 21 | for ind in range(batch_size - num_pos): 22 | user_id = random.randint(0, dataset.total_users()-1) 23 | item_id = random.randint(0, dataset.total_items()-1) 24 | while dataset.is_positive(user_id, item_id): 25 | user_id = random.randint(0, dataset.total_users()-1) 26 | item_id = random.randint(0, dataset.total_items()-1) 27 | input_npy[ind + num_pos] = (user_id, item_id, 0.0) 28 | 29 | yield input_npy 30 | 31 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) 32 | 33 | return s 34 | -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/temporal_evaluation_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from openrec.tf1.utils.samplers import Sampler 4 | 5 | def TemporalEvaluationSampler(dataset, max_seq_len, seed=100): 6 | 7 | random.seed(seed) 8 | def batch(dataset, max_seq_len=max_seq_len): 9 | 10 | while True: 11 | for user_id in dataset.warm_users(): 12 | input_npy = np.zeros(1, dtype=[('seq_item_id', (np.int32, max_seq_len)), 13 | ('seq_len', np.int32)]) 14 | 15 | item_list = dataset.get_positive_items(user_id, sort=True) 16 | if len(item_list) <= 1: 17 | continue 18 | train_items = item_list[-max_seq_len-1:-1] 19 | pad_train_items = np.zeros(max_seq_len, np.int32) 20 | pad_train_items[:len(train_items)] = train_items 21 | input_npy[0] = (pad_train_items, len(train_items)) 22 | yield [train_items[-1]], input_npy 23 | yield [], [] 24 | yield None, None 25 | 26 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=1) 27 | 28 | return s -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/temporal_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from openrec.tf1.utils.samplers import Sampler 4 | 5 | def TemporalSampler(dataset, batch_size, max_seq_len, num_process=5, seed=100): 6 | 7 | random.seed(seed) 8 | def batch(dataset, max_seq_len=max_seq_len, batch_size=batch_size): 9 | 10 | while True: 11 | input_npy = np.zeros(batch_size, dtype=[('seq_item_id', (np.int32, max_seq_len)), 12 | ('seq_len', np.int32), 13 | ('label', np.int32)]) 14 | 15 | for ind in range(batch_size): 16 | user_id = random.randint(0, dataset.total_users()-1) 17 | item_list = dataset.get_positive_items(user_id, sort=True) 18 | while len(item_list) <= 1: 19 | user_id = random.randint(0, dataset.total_users()-1) 20 | item_list = dataset.get_positive_items(user_id, sort=True) 21 | predict_pos = random.randint(1, len(item_list) - 1) 22 | train_items = item_list[max(0, predict_pos-max_seq_len):predict_pos] 23 | pad_train_items = np.zeros(max_seq_len, np.int32) 24 | pad_train_items[:len(train_items)] = train_items 25 | input_npy[ind] = (pad_train_items, len(train_items), item_list[predict_pos]) 26 | yield input_npy 27 | 28 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) 29 | 30 | return s -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/vbpr_evaluation_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from openrec.tf1.utils.samplers import Sampler 4 | import math 5 | 6 | def VBPREvaluationSampler(batch_size, dataset, item_vfeature, seed=100): 7 | 8 | random.seed(seed) 9 | def batch(dataset, batch_size=batch_size, item_vfeature=item_vfeature): 10 | _, dim_v = item_vfeature.shape 11 | while True: 12 | for user_id in dataset.warm_users(): 13 | positive_items = dataset.get_positive_items(user_id) 14 | negative_items = dataset.get_negative_items(user_id) 15 | all_items = positive_items + negative_items 16 | 17 | for batch_ind in range(int(math.ceil(float(len(all_items)) / batch_size))): 18 | current_batch_size = min(len(all_items)-batch_ind*batch_size, batch_size) 19 | input_npy = np.zeros(current_batch_size, dtype=[('user_id', np.int32), 20 | ('item_id', np.int32), 21 | ('item_vfeature', np.float32, (dim_v))]) 22 | for inst_ind in range(current_batch_size): 23 | item_id = all_items[batch_ind*batch_size+inst_ind] 24 | input_npy[inst_ind] = (user_id, item_id, item_vfeature[item_id]) 25 | num_positives = len(positive_items) - batch_ind*batch_size 26 | if num_positives > 0: 27 | yield range(num_positives), input_npy 28 | else: 29 | yield [], input_npy 30 | 31 | yield [], [] 32 | yield None, None 33 | 34 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=1) 35 | return s 36 | -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/vbpr_pairwise_sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | import random 4 | from openrec.tf1.utils.samplers import Sampler 5 | 6 | def VBPRPairwiseSampler(dataset, batch_size, item_vfeature, num_process=5, seed=100): 7 | 8 | random.seed(seed) 9 | def batch(dataset, batch_size=batch_size, 10 | item_vfeature=item_vfeature, seed=seed): 11 | 12 | _, dim_v = item_vfeature.shape 13 | while True: 14 | 15 | input_npy = np.zeros(batch_size, dtype=[('user_id', np.int32), 16 | ('p_item_id', np.int32), 17 | ('n_item_id', np.int32), 18 | ('p_item_vfeature', np.float32, (dim_v)), 19 | ('n_item_vfeature', np.float32, (dim_v))]) 20 | 21 | for ind in range(batch_size): 22 | entry = dataset.next_random_record() 23 | user_id = entry['user_id'] 24 | p_item_id = entry['item_id'] 25 | n_item_id = dataset.sample_negative_items(user_id)[0] 26 | input_npy[ind] = (user_id, p_item_id, n_item_id, 27 | item_vfeature[p_item_id], item_vfeature[n_item_id]) 28 | yield input_npy 29 | 30 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) 31 | 32 | return s -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/youtube_evaluation_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from openrec.tf1.utils.samplers import Sampler 4 | 5 | def YouTubeEvaluationSampler(dataset, max_seq_len, user_feature, seed=100, sort=True): 6 | 7 | random.seed(seed) 8 | def batch(dataset, user_feature=user_feature, max_seq_len=max_seq_len): 9 | 10 | while True: 11 | for user_id in dataset.warm_users(): 12 | input_npy = np.zeros(1, dtype=[('seq_item_id', (np.int32, max_seq_len)), 13 | ('seq_len', np.int32), 14 | ('user_gender', np.int32), 15 | ('user_geo', np.int32)]) 16 | 17 | item_list = dataset.get_positive_items(user_id, sort=sort) 18 | if len(item_list) <= 1: 19 | continue 20 | train_items = item_list[-max_seq_len-1:-1] 21 | pad_train_items = np.zeros(max_seq_len, np.int32) 22 | pad_train_items[:len(train_items)] = train_items 23 | input_npy[0] = (pad_train_items, 24 | len(train_items), 25 | user_feature[user_id]['user_gender'], 26 | user_feature[user_id]['user_geo']) 27 | yield [train_items[-1]], input_npy 28 | yield [], [] 29 | yield None, None 30 | 31 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=1) 32 | 33 | return s 34 | -------------------------------------------------------------------------------- /openrec/tf1/utils/samplers/youtube_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from openrec.tf1.utils.samplers import Sampler 4 | 5 | def YouTubeSampler(dataset, batch_size, max_seq_len, user_feature, num_process=5, seed=100, sort=True): 6 | 7 | random.seed(seed) 8 | def batch(dataset, user_feature=user_feature, max_seq_len=max_seq_len, batch_size=batch_size): 9 | 10 | while True: 11 | input_npy = np.zeros(batch_size, dtype=[('seq_item_id', (np.int32, max_seq_len)), 12 | ('seq_len', np.int32), 13 | ('label', np.int32), 14 | ('user_gender', np.int32), 15 | ('user_geo', np.int32)]) 16 | 17 | for ind in range(batch_size): 18 | user_id = random.randint(0, dataset.total_users()-1) 19 | item_list = dataset.get_positive_items(user_id, sort=sort) 20 | while len(item_list) <= 1: 21 | user_id = random.randint(0, dataset.total_users()-1) 22 | item_list = dataset.get_positive_items(user_id, sort=sort) 23 | predict_pos = random.randint(1, len(item_list) - 1) 24 | train_items = item_list[max(0, predict_pos-max_seq_len):predict_pos] 25 | pad_train_items = np.zeros(max_seq_len, np.int32) 26 | pad_train_items[:len(train_items)] = train_items 27 | input_npy[ind] = (pad_train_items, 28 | len(train_items), 29 | item_list[predict_pos], 30 | user_feature[user_id]['user_gender'], 31 | user_feature[user_id]['user_geo']) 32 | yield input_npy 33 | 34 | s = Sampler(dataset=dataset, generate_batch=batch, num_process=num_process) 35 | 36 | return s 37 | -------------------------------------------------------------------------------- /openrec/tf2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylongqi/openrec/a00de2345844858194ef43ab6845342114a5be93/openrec/tf2/__init__.py -------------------------------------------------------------------------------- /openrec/tf2/data/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf2.data.utils import _DataStore 2 | from openrec.tf2.data.utils import _ParallelDataset 3 | from openrec.tf2.data.dataset import Dataset 4 | -------------------------------------------------------------------------------- /openrec/tf2/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf2.metrics.ranking_metrics import * 2 | from openrec.tf2.metrics.dict_mean import DictMean -------------------------------------------------------------------------------- /openrec/tf2/metrics/dict_mean.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | class DictMean: 5 | 6 | def __init__(self, state_shape): 7 | 8 | self._states = {} 9 | for key in state_shape: 10 | shape = state_shape[key] 11 | self._states[key] = {'sum': tf.Variable(tf.zeros(shape, dtype=tf.float32)), 12 | 'count': tf.Variable(tf.zeros([], dtype=tf.float32))} 13 | 14 | def reset_states(self): 15 | 16 | for key in self._states: 17 | self._states[key]['sum'].assign(tf.zeros(tf.shape(self._states[key]['sum']), 18 | dtype=tf.float32)) 19 | self._states[key]['count'].assign(0.) 20 | 21 | def update_state(self, state): 22 | 23 | for key in state: 24 | self._states[key]['sum'].assign_add(tf.math.reduce_sum(state[key], axis=0)) 25 | self._states[key]['count'].assign_add(tf.cast(tf.shape(state[key])[0], tf.float32)) 26 | 27 | def result(self): 28 | 29 | result = {} 30 | for key in self._states: 31 | result[key] = self._states[key]['sum'] / self._states[key]['count'] 32 | return result 33 | -------------------------------------------------------------------------------- /openrec/tf2/metrics/ranking_metrics.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def _log2(value): 5 | 6 | return tf.math.log(value) / tf.math.log(2.0) 7 | 8 | def AUC(pos_mask, pred, excl_mask): 9 | 10 | def _map_fn(tups): 11 | 12 | user_pos_mask, user_pred, user_excl_mask = tups 13 | 14 | eval_mask = tf.math.logical_not(tf.math.logical_or(user_pos_mask, user_excl_mask)) 15 | eval_pred = user_pred[eval_mask] 16 | pos_pred = user_pred[user_pos_mask] 17 | eval_num = tf.math.count_nonzero(eval_mask, dtype=tf.int32) 18 | user_auc = tf.math.count_nonzero(eval_pred <= tf.reshape(pos_pred, (-1, 1)), dtype=tf.float32) \ 19 | / tf.cast(tf.size(pos_pred) * eval_num, dtype=tf.float32) 20 | 21 | return user_auc 22 | 23 | auc = tf.map_fn(_map_fn, (pos_mask, pred, excl_mask), parallel_iterations=10, dtype=tf.float32) 24 | 25 | return auc 26 | 27 | 28 | def NDCG(pos_mask, pred, excl_mask, at=[100]): 29 | 30 | def _map_fn(tups): 31 | 32 | user_pos_mask, user_pred, user_excl_mask = tups 33 | user_pred = tf.math.exp(user_pred) * tf.cast(tf.math.logical_not(user_excl_mask), tf.float32) 34 | pos_pred = user_pred[user_pos_mask] 35 | rank_above = tf.math.count_nonzero(user_pred > tf.reshape(pos_pred, (-1, 1)), axis=1, dtype=tf.float32) 36 | rank_above = tf.tile(tf.expand_dims(rank_above, 0), [len(at), 1]) 37 | tf_at = tf.reshape(tf.constant(at, dtype=tf.float32), [-1, 1]) 38 | log_recipr = tf.math.reciprocal(_log2(rank_above+2)) 39 | 40 | user_ndcg = tf.reduce_sum(log_recipr * tf.cast(rank_above < tf_at, tf.float32), 41 | axis=1) 42 | 43 | return user_ndcg 44 | 45 | ndcg = tf.map_fn(_map_fn, (pos_mask, pred, excl_mask), parallel_iterations=10, dtype=tf.float32) 46 | 47 | return ndcg 48 | 49 | 50 | def Recall(pos_mask, pred, excl_mask, at=[100]): 51 | 52 | 53 | def _map_fn(tups): 54 | 55 | user_pos_mask, user_pred, user_excl_mask = tups 56 | user_pred = tf.math.exp(user_pred) * tf.cast(tf.math.logical_not(user_excl_mask), tf.float32) 57 | pos_pred = user_pred[user_pos_mask] 58 | rank_above = tf.math.count_nonzero(user_pred > tf.reshape(pos_pred, (-1, 1)), axis=1, dtype=tf.float32) 59 | rank_above = tf.tile(tf.expand_dims(rank_above, 0), [len(at), 1]) 60 | tf_at = tf.reshape(tf.constant(at, dtype=tf.float32), [-1, 1]) 61 | 62 | user_recall = tf.math.count_nonzero(rank_above < tf_at, axis=1, dtype=tf.float32) / \ 63 | tf.cast(tf.size(pos_pred), tf.float32) 64 | 65 | return user_recall 66 | 67 | recall = tf.map_fn(_map_fn, (pos_mask, pred, excl_mask), parallel_iterations=10, dtype=tf.float32) 68 | 69 | return recall -------------------------------------------------------------------------------- /openrec/tf2/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf2.modules.latent_factor import LatentFactor 2 | from openrec.tf2.modules.pairwise_log_loss import PairwiseLogLoss 3 | from openrec.tf2.modules.pointwise_mse_loss import PointwiseMSELoss 4 | from openrec.tf2.modules.multi_layer_perceptron import MLP 5 | from openrec.tf2.modules.second_order_feature_interaction import SecondOrderFeatureInteraction -------------------------------------------------------------------------------- /openrec/tf2/modules/latent_factor.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Embedding 2 | import tensorflow as tf 3 | 4 | class LatentFactor(Embedding): 5 | 6 | def __init__(self, num_instances, dim, zero_init=False, name=None): 7 | 8 | if zero_init: 9 | initializer = 'zeros' 10 | else: 11 | initializer = 'uniform' 12 | super(LatentFactor, self).__init__(input_dim=num_instances, 13 | output_dim=dim, 14 | embeddings_initializer=initializer, 15 | name=name) 16 | 17 | def censor(self, censor_id): 18 | 19 | unique_censor_id, _ = tf.unique(censor_id) 20 | embedding_gather = tf.gather(self.variables[0], indices=unique_censor_id) 21 | norm = tf.norm(embedding_gather, axis=1, keepdims=True) 22 | return self.variables[0].scatter_nd_update(indices=tf.expand_dims(unique_censor_id, 1), 23 | updates=embedding_gather / tf.math.maximum(norm, 0.1)) -------------------------------------------------------------------------------- /openrec/tf2/modules/multi_layer_perceptron.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import Sequential 3 | from tensorflow.keras.layers import Dense 4 | 5 | def MLP(units_list, use_bias=True, activation='relu', out_activation=None): 6 | 7 | mlp = Sequential() 8 | 9 | for units in units_list[:-1]: 10 | mlp.add(Dense(units, 11 | activation=activation, 12 | use_bias=use_bias)) 13 | 14 | mlp.add(Dense(units_list[-1], 15 | activation=out_activation, 16 | use_bias=use_bias)) 17 | 18 | return mlp -------------------------------------------------------------------------------- /openrec/tf2/modules/pairwise_log_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import Layer 3 | 4 | class PairwiseLogLoss(Layer): 5 | 6 | def __call__(self, user_vec, p_item_vec, n_item_vec, p_item_bias=None, n_item_bias=None): 7 | 8 | outputs = super(PairwiseLogLoss, self).__call__((user_vec, 9 | p_item_vec, 10 | n_item_vec, 11 | p_item_bias, 12 | n_item_bias)) 13 | return outputs 14 | 15 | def call(self, inputs): 16 | 17 | user_vec, p_item_vec, n_item_vec, p_item_bias, n_item_bias = inputs 18 | 19 | dot_user_pos = tf.math.reduce_sum(user_vec*p_item_vec, 20 | axis=1, 21 | keepdims=True) 22 | dot_user_neg = tf.math.reduce_sum(user_vec*n_item_vec, 23 | axis=1, 24 | keepdims=True) 25 | 26 | if p_item_bias is not None: 27 | dot_user_pos += p_item_bias 28 | 29 | if n_item_bias is not None: 30 | dot_user_neg += n_item_bias 31 | 32 | loss = -tf.math.reduce_mean(tf.math.log_sigmoid(tf.math.maximum(dot_user_pos-dot_user_neg, -30.0))) 33 | 34 | return loss -------------------------------------------------------------------------------- /openrec/tf2/modules/pointwise_mse_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras.layers import Layer 3 | 4 | class PointwiseMSELoss(Layer): 5 | 6 | def __init__(self, a=1.0, b=1.0, sigmoid=False): 7 | 8 | super(PointwiseMSELoss, self).__init__() 9 | self._a = a 10 | self._b = b 11 | self._sigmoid = sigmoid 12 | 13 | def __call__(self, user_vec, item_vec, item_bias, label): 14 | 15 | outputs = super(PointwiseMSELoss, self).__call__((user_vec, item_vec, item_bias, label)) 16 | return outputs 17 | 18 | def call(self, inputs): 19 | 20 | user_vec, item_vec, item_bias, label = inputs 21 | 22 | dot_user_item = tf.math.reduce_sum(tf.math.multiply(user_vec, item_vec), 23 | axis=1, keepdims=False, name="dot_user_item") 24 | 25 | if self._sigmoid: 26 | prediction = tf.math.sigmoid(dot_user_item + tf.reshape(item_bias, [-1])) 27 | else: 28 | prediction = dot_user_item + tf.reshape(item_bias, [-1]) 29 | 30 | label_weight = (self._a - self._b) * label + self._b 31 | return tf.math.reduce_sum(label_weight * tf.square(label - prediction)) 32 | -------------------------------------------------------------------------------- /openrec/tf2/modules/second_order_feature_interaction.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Layer 2 | import tensorflow as tf 3 | 4 | class SecondOrderFeatureInteraction(Layer): 5 | 6 | def __init__(self, self_interaction=False): 7 | 8 | self._self_interaction = self_interaction 9 | 10 | super(SecondOrderFeatureInteraction, self).__init__() 11 | 12 | def call(self, inputs): 13 | 14 | ''' 15 | inputs: list of features with shape [batch_size, feature_dim] 16 | ''' 17 | 18 | batch_size = tf.shape(inputs[0])[0] 19 | 20 | concat_features = tf.stack(inputs, axis=1) 21 | dot_products = tf.linalg.LinearOperatorLowerTriangular(tf.matmul(concat_features, concat_features, transpose_b=True)).to_dense() 22 | 23 | ones = tf.ones_like(dot_products) 24 | mask = tf.linalg.band_part(ones, 0, -1) 25 | 26 | if not self._self_interaction: 27 | mask = mask - tf.linalg.band_part(ones, 0, 0) 28 | out_dim = int(len(inputs) * (len(inputs)-1) / 2) 29 | else: 30 | out_dim = int(len(inputs) * (len(inputs)+1) / 2) 31 | 32 | flat_interactions = tf.reshape(tf.boolean_mask(dot_products, mask), (batch_size, out_dim)) 33 | 34 | return flat_interactions 35 | -------------------------------------------------------------------------------- /openrec/tf2/recommenders/__init__.py: -------------------------------------------------------------------------------- 1 | from openrec.tf2.recommenders.bpr import BPR 2 | from openrec.tf2.recommenders.wrmf import WRMF 3 | from openrec.tf2.recommenders.dlrm import DLRM 4 | from openrec.tf2.recommenders.gmf import GMF 5 | from openrec.tf2.recommenders.ucml import UCML -------------------------------------------------------------------------------- /openrec/tf2/recommenders/bpr.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import Model 3 | from openrec.tf2.modules import LatentFactor, PairwiseLogLoss 4 | 5 | class BPR(Model): 6 | 7 | def __init__(self, dim_user_embed, dim_item_embed, total_users, total_items): 8 | 9 | super(BPR, self).__init__() 10 | self.user_latent_factor = LatentFactor(num_instances=total_users, 11 | dim=dim_user_embed, 12 | name='user_latent_factor') 13 | self.item_latent_factor = LatentFactor(num_instances=total_items, 14 | dim=dim_item_embed, 15 | name='item_latent_factor') 16 | self.item_bias = LatentFactor(num_instances=total_items, 17 | dim=1, 18 | name='item_bias') 19 | self.pairwise_log_loss = PairwiseLogLoss() 20 | 21 | def call(self, user_id, p_item_id, n_item_id): 22 | 23 | user_vec = self.user_latent_factor(user_id) 24 | p_item_vec = self.item_latent_factor(p_item_id) 25 | p_item_bias = self.item_bias(p_item_id) 26 | n_item_vec = self.item_latent_factor(n_item_id) 27 | n_item_bias = self.item_bias(n_item_id) 28 | 29 | loss = self.pairwise_log_loss(user_vec=user_vec, 30 | p_item_vec=p_item_vec, 31 | p_item_bias=p_item_bias, 32 | n_item_vec=n_item_vec, 33 | n_item_bias=n_item_bias) 34 | 35 | l2_loss = tf.nn.l2_loss(user_vec) + tf.nn.l2_loss(p_item_vec) + tf.nn.l2_loss(n_item_vec) 36 | 37 | return loss, l2_loss 38 | 39 | def inference(self, user_id): 40 | 41 | user_vec = self.user_latent_factor(user_id) 42 | return tf.linalg.matmul(user_vec, self.item_latent_factor.variables[0], transpose_b=True) + \ 43 | tf.reshape(self.item_bias.variables[0], [-1]) -------------------------------------------------------------------------------- /openrec/tf2/recommenders/dlrm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import tensorflow as tf 3 | from tensorflow.keras import Model 4 | from openrec.tf2.modules import LatentFactor, SecondOrderFeatureInteraction, MLP 5 | 6 | class DLRM(Model): 7 | 8 | def __init__( 9 | self, 10 | m_spa, 11 | ln_emb, 12 | ln_bot, 13 | ln_top, 14 | arch_interaction_op='dot', 15 | arch_interaction_itself=False, 16 | sigmoid_bot=False, 17 | sigmoid_top=True, 18 | loss_func='mse', 19 | loss_threshold=0.0): 20 | 21 | ''' 22 | m_spa: the dimensionality of sparse feature embeddings 23 | ln_emb: the size of sparse feature embeddings (num_instances) 24 | ln_bot: the size of the bottom MLP 25 | ln_top: the size of the top MLP 26 | ''' 27 | 28 | super(DLRM, self).__init__() 29 | 30 | self._loss_threshold = loss_threshold 31 | self._loss_func = loss_func 32 | self._latent_factors = [LatentFactor(num_instances=num, 33 | dim=m_spa) for num in ln_emb] 34 | self._mlp_bot = MLP(units_list=ln_bot, 35 | out_activation='sigmoid' if sigmoid_bot else 'relu') 36 | self._mlp_top = MLP(units_list=ln_top, 37 | out_activation='sigmoid' if sigmoid_top else 'relu') 38 | 39 | self._dot_interaction = None 40 | if arch_interaction_op == 'dot': 41 | self._dot_interaction = SecondOrderFeatureInteraction( 42 | self_interaction=arch_interaction_itself 43 | ) 44 | 45 | elif self._arch_interaction_op != 'cat': 46 | sys.exit( 47 | "ERROR: arch_interaction_op=" 48 | + self._arch_interaction_op 49 | + " is not supported" 50 | ) 51 | 52 | if loss_func == 'mse': 53 | self._loss = tf.keras.losses.MeanSquaredError() 54 | elif loss_func == 'bce': 55 | self._loss = tf.keras.losses.BinaryCrossentropy() 56 | else: 57 | sys.exit( 58 | "ERROR: loss_func=" 59 | + loss_func 60 | + " is not supported" 61 | ) 62 | 63 | def call(self, dense_features, sparse_features, label): 64 | 65 | ''' 66 | dense_features shape: [batch_size, num of dense features] 67 | sparse_features shape: [batch_size, num_of_sparse_features] 68 | label shape: [batch_size] 69 | ''' 70 | 71 | prediction = self.inference(dense_features, sparse_features) 72 | loss = self._loss(y_true=label, 73 | y_pred=prediction) 74 | return loss 75 | 76 | def inference(self, dense_features, sparse_features): 77 | 78 | ''' 79 | dense_features shape: [batch_size, num of dense features] 80 | sparse_features shape: [num_of_sparse_features, batch_size] 81 | ''' 82 | 83 | sparse_emb_vecs = list(map(lambda pair: pair[1](pair[0]), 84 | zip(tf.unstack(sparse_features, axis=1), 85 | self._latent_factors))) 86 | 87 | dense_emb_vec = self._mlp_bot(dense_features) 88 | 89 | if self._dot_interaction is not None: 90 | prediction = self._mlp_top(tf.concat([dense_emb_vec, 91 | self._dot_interaction(sparse_emb_vecs + [dense_emb_vec])], 92 | axis=1)) 93 | else: 94 | prediction = self._mlp_top(tf.concat(sparse_emb_vecs + [dense_emb_vec], 95 | axis=1)) 96 | 97 | if 0.0 < self._loss_threshold and self._loss_threshold < 1.0: 98 | prediction = tf.clip_by_value(prediction, self._loss_threshold, 1.0 - self._loss_threshold) 99 | 100 | return tf.reshape(prediction, [-1]) 101 | -------------------------------------------------------------------------------- /openrec/tf2/recommenders/gmf.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import Model 3 | from openrec.tf2.modules import LatentFactor, MLP 4 | 5 | class GMF(Model): 6 | 7 | def __init__(self, dim_user_embed, dim_item_embed, total_users, total_items): 8 | 9 | super(GMF, self).__init__() 10 | self.user_latent_factor = LatentFactor(num_instances=total_users, 11 | dim=dim_user_embed, 12 | name='user_latent_factor') 13 | self.item_latent_factor = LatentFactor(num_instances=total_items, 14 | dim=dim_item_embed, 15 | name='item_latent_factor') 16 | self.item_bias = LatentFactor(num_instances=total_items, 17 | dim=1, 18 | name='item_bias') 19 | self.mlp = MLP(units_list=[1], use_bias=False) 20 | self._bce = tf.keras.losses.BinaryCrossentropy(from_logits=True) 21 | 22 | def call(self, user_id, item_id, label): 23 | 24 | user_vec = self.user_latent_factor(user_id) 25 | item_vec = self.item_latent_factor(item_id) 26 | item_bias = self.item_bias(item_id) 27 | 28 | logit = tf.reshape(self.mlp(user_vec * item_vec) + item_bias, [-1]) 29 | loss = self._bce(y_true=label, y_pred=logit) 30 | 31 | l2_loss = tf.nn.l2_loss(user_vec) + tf.nn.l2_loss(item_vec) \ 32 | + sum([tf.nn.l2_loss(v) for v in self.mlp.trainable_variables]) 33 | 34 | return loss, l2_loss 35 | 36 | def inference(self, user_id): 37 | 38 | user_vec = self.user_latent_factor(user_id) 39 | logit = tf.squeeze(self.mlp(tf.expand_dims(user_vec, 1) * self.item_latent_factor.variables[0]), axis=-1) \ 40 | + tf.reshape(self.item_bias.variables[0], [-1]) 41 | return logit -------------------------------------------------------------------------------- /openrec/tf2/recommenders/ucml.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import Model 3 | from openrec.tf2.modules import LatentFactor 4 | 5 | class UCML(Model): 6 | 7 | def __init__(self, dim_user_embed, dim_item_embed, total_users, total_items, margin=0.5): 8 | 9 | super(UCML, self).__init__() 10 | self.user_latent_factor = LatentFactor(num_instances=total_users, 11 | dim=dim_user_embed, 12 | name='user_latent_factor') 13 | self.item_latent_factor = LatentFactor(num_instances=total_items, 14 | dim=dim_item_embed, 15 | name='item_latent_factor') 16 | self.item_bias = LatentFactor(num_instances=total_items, 17 | dim=1, 18 | name='item_bias') 19 | self.margin = margin 20 | 21 | def call(self, user_id, p_item_id, n_item_id): 22 | 23 | user_vec = self.user_latent_factor(user_id) 24 | p_item_vec = self.item_latent_factor(p_item_id) 25 | p_item_bias = self.item_bias(p_item_id) 26 | n_item_vec = self.item_latent_factor(n_item_id) 27 | n_item_bias = self.item_bias(n_item_id) 28 | 29 | l2_user_pos = tf.math.reduce_sum(tf.math.square(user_vec - p_item_vec), 30 | axis=-1, 31 | keepdims=True) 32 | l2_user_neg = tf.math.reduce_sum(tf.math.square(user_vec - n_item_vec), 33 | axis=-1, 34 | keepdims=True) 35 | pos_score = (-l2_user_pos) + p_item_bias 36 | neg_score = (-l2_user_neg) + n_item_bias 37 | diff = pos_score - neg_score 38 | 39 | loss = tf.reduce_sum(tf.maximum(self.margin - diff, 0)) 40 | l2_loss = tf.nn.l2_loss(user_vec) + tf.nn.l2_loss(p_item_vec) + tf.nn.l2_loss(n_item_vec) 41 | 42 | return loss, l2_loss 43 | 44 | def censor_vec(self, user_id, p_item_id, n_item_id): 45 | 46 | return self.user_latent_factor.censor(user_id), \ 47 | self.item_latent_factor.censor(p_item_id), \ 48 | self.item_latent_factor.censor(n_item_id) 49 | 50 | def inference(self, user_id): 51 | 52 | user_vec = self.user_latent_factor(user_id) 53 | return -tf.math.reduce_sum(tf.math.square(tf.expand_dims(user_vec, axis=1) - self.item_latent_factor.variables[0]), axis=-1, keepdims=False) + tf.reshape(self.item_bias.variables[0], [-1]) -------------------------------------------------------------------------------- /openrec/tf2/recommenders/wrmf.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.keras import Model 3 | from openrec.tf2.modules import LatentFactor, PointwiseMSELoss 4 | 5 | class WRMF(Model): 6 | 7 | def __init__(self, dim_user_embed, dim_item_embed, total_users, total_items, a=1.0, b=1.0): 8 | 9 | super(WRMF, self).__init__() 10 | self.user_latent_factor = LatentFactor(num_instances=total_users, 11 | dim=dim_user_embed, 12 | name='user_latent_factor') 13 | self.item_latent_factor = LatentFactor(num_instances=total_items, 14 | dim=dim_item_embed, 15 | name='item_latent_factor') 16 | self.item_bias = LatentFactor(num_instances=total_items, 17 | dim=1, 18 | name='item_bias') 19 | self.pointwise_mse_loss = PointwiseMSELoss(a=a, b=b) 20 | 21 | def call(self, user_id, item_id, label): 22 | 23 | user_vec = self.user_latent_factor(user_id) 24 | item_vec = self.item_latent_factor(item_id) 25 | item_bias = self.item_bias(item_id) 26 | 27 | loss = self.pointwise_mse_loss(user_vec=user_vec, 28 | item_vec=item_vec, 29 | item_bias=item_bias, 30 | label=label) 31 | 32 | l2_loss = tf.nn.l2_loss(user_vec) + tf.nn.l2_loss(item_vec) 33 | 34 | return loss, l2_loss 35 | 36 | def inference(self, user_id): 37 | 38 | user_vec = self.user_latent_factor(user_id) 39 | return tf.linalg.matmul(user_vec, self.item_latent_factor.variables[0], transpose_b=True) + \ 40 | tf.reshape(self.item_bias.variables[0], [-1]) -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | 4 | setup( 5 | name='openrec', 6 | version='0.2.5', 7 | packages=find_packages(exclude=("tutorials",)), 8 | description="An open-source and modular library for neural network-inspired recommendation algorithms", 9 | url="http://openrec.ai", 10 | license='Apache 2.0', 11 | author='Longqi Yang', 12 | author_email='ylongqi@cs.cornell.edu', 13 | install_requires=[ 14 | 'tqdm>=4.15.0', 15 | 'numpy>=1.13.0', 16 | 'termcolor>=1.1.0' 17 | ], 18 | classifiers=['Development Status :: 3 - Alpha', 19 | 'License :: OSI Approved :: Apache Software License', 20 | 'Programming Language :: Python :: 2.7', 21 | 'Programming Language :: Python :: 3.5', 22 | 'Programming Language :: Python :: 3.6', 23 | 'Topic :: Scientific/Engineering :: Artificial Intelligence'], 24 | ) 25 | -------------------------------------------------------------------------------- /tf1_examples/bpr_citeulike.py: -------------------------------------------------------------------------------- 1 | from openrec import ModelTrainer 2 | from openrec.tf1.utils import Dataset 3 | from openrec.tf1.recommenders import BPR 4 | from openrec.tf1.utils.evaluators import AUC 5 | from openrec.tf1.utils.samplers import RandomPairwiseSampler 6 | from openrec.tf1.utils.samplers import EvaluationSampler 7 | import dataloader 8 | 9 | raw_data = dataloader.load_citeulike() 10 | dim_embed = 50 11 | total_iter = int(1e5) 12 | batch_size = 1000 13 | eval_iter = 10000 14 | save_iter = eval_iter 15 | 16 | train_dataset = Dataset(raw_data['train_data'], raw_data['total_users'], raw_data['total_items'], name='Train') 17 | val_dataset = Dataset(raw_data['val_data'], raw_data['total_users'], raw_data['total_items'], name='Val', num_negatives=500) 18 | test_dataset = Dataset(raw_data['test_data'], raw_data['total_users'], raw_data['total_items'], name='Test', num_negatives=500) 19 | 20 | train_sampler = RandomPairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) 21 | val_sampler = EvaluationSampler(batch_size=batch_size, dataset=val_dataset) 22 | test_sampler = EvaluationSampler(batch_size=batch_size, dataset=test_dataset) 23 | 24 | bpr_model = BPR(batch_size=batch_size, total_users=train_dataset.total_users(), total_items=train_dataset.total_items(), 25 | dim_user_embed=dim_embed, dim_item_embed=dim_embed, save_model_dir='bpr_recommender/', train=True, serve=True) 26 | 27 | model_trainer = ModelTrainer(model=bpr_model) 28 | 29 | auc_evaluator = AUC() 30 | model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, save_iter=save_iter, train_sampler=train_sampler, 31 | eval_samplers=[val_sampler, test_sampler], evaluators=[auc_evaluator]) -------------------------------------------------------------------------------- /tf1_examples/dataloader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def load_amazon_book(): 4 | 5 | raw_data = dict() 6 | raw_data['total_users'] = 99473 7 | raw_data['total_items'] = 450166 8 | 9 | raw_data['train_data'] = np.load('dataset/amazon/user_data_train.npy') 10 | raw_data['val_data'] = np.load('dataset/amazon/user_data_val.npy') 11 | raw_data['test_data'] = np.load('dataset/amazon/user_data_test.npy') 12 | 13 | raw_data['item_features'] = np.array(np.memmap('dataset/amazon/book_features_update.mem', 14 | dtype=np.float32, mode='r', shape=(raw_data['max_item'], 4096))) 15 | raw_data['user_features'] = np.load('dataset/amazon/user_features_categories.npy') 16 | return raw_data 17 | 18 | def load_citeulike(): 19 | 20 | raw_data = dict() 21 | raw_data['total_users'] = 5551 22 | raw_data['total_items'] = 16980 23 | 24 | raw_data['train_data'] = np.load('dataset/citeulike/user_data_train.npy') 25 | raw_data['val_data'] = np.load('dataset/citeulike/user_data_val.npy') 26 | raw_data['test_data'] = np.load('dataset/citeulike/user_data_test.npy') 27 | 28 | return raw_data 29 | 30 | def load_tradesy(): 31 | 32 | raw_data = dict() 33 | raw_data['total_users'] = 19243 34 | raw_data['total_items'] = 165906 35 | 36 | raw_data['train_data'] = np.load('dataset/tradesy/user_data_train.npy') 37 | raw_data['val_data'] = np.load('dataset/tradesy/user_data_val.npy') 38 | raw_data['test_data'] = np.load('dataset/tradesy/user_data_test.npy') 39 | 40 | raw_data['item_features'] = np.load('dataset/tradesy/item_features.npy') / 32.671101 41 | return raw_data 42 | 43 | 44 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/config.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | sess_config = tf.ConfigProto() 4 | sess_config.gpu_options.allow_growth = True 5 | sess_config.gpu_options.visible_device_list = '0' 6 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/dataloader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def load_amazon_book(): 4 | 5 | raw_data = dict() 6 | raw_data['max_user'] = 99473 7 | raw_data['max_item'] = 450166 8 | 9 | raw_data['train_data'] = np.load('dataset/amazon/user_data_train.npy') 10 | raw_data['val_data'] = np.load('dataset/amazon/user_data_val.npy') 11 | raw_data['test_data'] = np.load('dataset/amazon/user_data_test.npy') 12 | 13 | raw_data['item_features'] = np.array(np.memmap('dataset/amazon/book_features_update.mem', 14 | dtype=np.float32, mode='r', shape=(raw_data['max_item'], 4096))) 15 | raw_data['user_features'] = np.load('dataset/amazon/user_features_categories.npy') 16 | return raw_data 17 | 18 | def load_citeulike(): 19 | 20 | raw_data = dict() 21 | raw_data['max_user'] = 5551 22 | raw_data['max_item'] = 16980 23 | 24 | raw_data['train_data'] = np.load('dataset/citeulike/user_data_train.npy') 25 | raw_data['val_data'] = np.load('dataset/citeulike/user_data_val.npy') 26 | raw_data['test_data'] = np.load('dataset/citeulike/user_data_test.npy') 27 | 28 | return raw_data 29 | 30 | def load_tradesy(): 31 | 32 | raw_data = dict() 33 | raw_data['max_user'] = 19243 34 | raw_data['max_item'] = 165906 35 | 36 | raw_data['train_data'] = np.load('dataset/tradesy/user_data_train.npy') 37 | raw_data['val_data'] = np.load('dataset/tradesy/user_data_val.npy') 38 | raw_data['test_data'] = np.load('dataset/tradesy/user_data_test.npy') 39 | 40 | raw_data['item_features'] = np.load('dataset/tradesy/item_features.npy') / 32.671101 41 | return raw_data 42 | 43 | 44 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_bpr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import BPR 8 | from openrec.tf1.legacy.utils.evaluators import AUC 9 | from openrec.tf1.legacy.utils.samplers import PairwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_citeulike() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | display_itr = 10000 17 | 18 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 19 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 20 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 21 | 22 | bpr_model = BPR(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 23 | dim_embed=20, opt='Adam', sess_config=sess_config) 24 | sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) 25 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, 26 | train_dataset=train_dataset, model=bpr_model, sampler=sampler) 27 | auc_evaluator = AUC() 28 | 29 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 30 | evaluators=[auc_evaluator]) 31 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_cml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import CML 8 | from openrec.tf1.legacy.utils.evaluators import AUC 9 | from openrec.tf1.legacy.utils.samplers import PairwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_citeulike() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | display_itr = 10000 17 | 18 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 19 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 20 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 21 | 22 | cml_model = CML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 23 | dim_embed=20, opt='Adam', sess_config=sess_config) 24 | sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) 25 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, 26 | train_dataset=train_dataset, model=cml_model, sampler=sampler) 27 | auc_evaluator = AUC() 28 | 29 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 30 | evaluators=[auc_evaluator]) 31 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_concat_visual_bpr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import ConcatVisualBPR 8 | from openrec.tf1.legacy.utils.evaluators import AUC 9 | from openrec.tf1.legacy.utils.samplers import PairwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_tradesy() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | item_serving_size = 1000 17 | display_itr = 10000 18 | 19 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 20 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 21 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 22 | 23 | model = ConcatVisualBPR(batch_size=batch_size, max_user=raw_data['max_user'], max_item=raw_data['max_item'], item_serving_size=item_serving_size, 24 | dim_embed=20, dim_ve=10, item_f_source=raw_data['item_features'], l2_reg=None, sess_config=sess_config) 25 | sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) 26 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, item_serving_size=item_serving_size, 27 | train_dataset=train_dataset, model=model, sampler=sampler) 28 | 29 | auc_evaluator = AUC() 30 | 31 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 32 | evaluators=[auc_evaluator], num_negatives=1000) 33 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_itr_mlp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from openrec.tf1.legacy import ItrMLPModelTrainer 3 | from openrec.tf1.legacy.utils import Dataset 4 | from openrec.tf1.legacy.recommenders import ItrMLP 5 | from openrec.tf1.legacy.utils.evaluators import MSE 6 | from openrec.tf1.legacy.utils.samplers import ExplicitSampler 7 | 8 | batch_size = 32 9 | test_batch_size = 32 10 | display_itr = 4096 11 | update_itr = 4096 12 | 13 | max_user = 480189 14 | max_item = 17770 15 | 16 | pretrained_user_embeddings = np.load('dataset/netflix/pretrained_user_embeddings.npy') 17 | pretrained_item_embeddings = np.load('dataset/netflix/pretrained_item_embeddings.npy') 18 | netflix_ratings = np.load('dataset/netflix/netflix_ratings_formatted.npy') 19 | 20 | train_dataset = Dataset(netflix_ratings[:-int(1e7)], max_user=max_user, max_item=max_item, name='Train') 21 | val_dataset = Dataset(netflix_ratings[-int(1e7):-int(5e6)], max_user=max_user, max_item=max_item, name='Val') 22 | test_dataset = Dataset(netflix_ratings[-int(5e6):], max_user=max_user, max_item=max_item, name='Test') 23 | 24 | model = ItrMLP(batch_size=batch_size, max_user=max_user, max_item=max_item, dim_embed=20, opt='SGD', 25 | pretrained_user_embeddings=pretrained_user_embeddings, pretrained_item_embeddings=pretrained_item_embeddings, 26 | user_dims=[30, 30, 20], item_dims=[30, 30, 20], test_batch_size=test_batch_size) 27 | 28 | sampler = ExplicitSampler(batch_size=batch_size, dataset=train_dataset, chronological=True) 29 | model_trainer = ItrMLPModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, 30 | train_dataset=train_dataset, model=model, sampler=sampler) 31 | 32 | mse_evaluator = MSE() 33 | 34 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, update_itr=update_itr, 35 | eval_datasets=[val_dataset, test_dataset], 36 | evaluators=[mse_evaluator]) 37 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_pmf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import PMF 8 | from openrec.tf1.legacy.utils.evaluators import AUC, Recall 9 | from openrec.tf1.legacy.utils.samplers import PointwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_citeulike() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | display_itr = 10000 17 | 18 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 19 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 20 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 21 | 22 | model = PMF(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 23 | dim_embed=50, opt='Adam', sess_config=sess_config) 24 | sampler = PointwiseSampler(batch_size=batch_size, dataset=train_dataset, pos_ratio=0.2, num_process=5) 25 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, 26 | train_dataset=train_dataset, model=model, sampler=sampler) 27 | 28 | auc_evaluator = AUC() 29 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 30 | 31 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 32 | evaluators=[auc_evaluator, recall_evaluator]) 33 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_visual_bpr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import VisualBPR 8 | from openrec.tf1.legacy.utils.evaluators import AUC, Recall 9 | from openrec.tf1.legacy.utils.samplers import PairwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_amazon_book() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | item_serving_size = 1000 17 | display_itr = 10000 18 | 19 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 20 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 21 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 22 | 23 | model = VisualBPR(batch_size=batch_size, max_user=raw_data['max_user'], max_item=raw_data['max_item'], l2_reg=0.001, l2_reg_mlp=0.001, 24 | dropout_rate=0.5, dim_embed=50, item_f_source=raw_data['item_features'], dims=[1028, 128, 50], sess_config=sess_config, opt='Adam') 25 | sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) 26 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, item_serving_size=item_serving_size, 27 | train_dataset=train_dataset, model=model, sampler=sampler) 28 | 29 | auc_evaluator = AUC() 30 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 31 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 32 | evaluators=[auc_evaluator, recall_evaluator], num_negatives=1000) 33 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_visual_cml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import VisualCML 8 | from openrec.tf1.legacy.utils.evaluators import AUC, Recall 9 | from openrec.tf1.legacy.utils.samplers import PairwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_amazon_book() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | item_serving_size = 1000 17 | display_itr = 10000 18 | 19 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 20 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 21 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 22 | 23 | model = VisualCML(batch_size=batch_size, max_user=raw_data['max_user'], max_item=raw_data['max_item'], l2_reg=0.001, l2_reg_mlp=0.001, 24 | dropout_rate=0.5, dim_embed=50, item_f_source=raw_data['item_features'], dims=[1028, 128, 50], sess_config=sess_config, opt='Adam') 25 | sampler = PairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) 26 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, item_serving_size=item_serving_size, 27 | train_dataset=train_dataset, model=model, sampler=sampler) 28 | 29 | auc_evaluator = AUC() 30 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 31 | 32 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 33 | evaluators=[auc_evaluator, recall_evaluator], num_negatives=1000) 34 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_visual_gmf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import VisualGMF 8 | from openrec.tf1.legacy.utils.evaluators import AUC, Recall 9 | from openrec.tf1.legacy.utils.samplers import PointwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_amazon_book() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | item_serving_size = 1000 17 | display_itr = 10000 18 | 19 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 20 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 21 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 22 | 23 | model = VisualGMF(batch_size=batch_size, max_user=raw_data['max_user'], max_item=raw_data['max_item'], l2_reg=0.001, l2_reg_mlp=0.001, 24 | dropout_rate=0.5, dim_embed=50, item_f_source=raw_data['item_features'], dims=[1028, 128, 50], sess_config=sess_config, opt='Adam') 25 | sampler = PointwiseSampler(batch_size=batch_size, dataset=train_dataset, pos_ratio=0.2, num_process=5) 26 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, item_serving_size=item_serving_size, 27 | train_dataset=train_dataset, model=model, sampler=sampler) 28 | 29 | auc_evaluator = AUC() 30 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 31 | 32 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 33 | evaluators=[auc_evaluator, recall_evaluator], num_negatives=1000) 34 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_visual_pmf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import VisualPMF 8 | from openrec.tf1.legacy.utils.evaluators import AUC, Recall 9 | from openrec.tf1.legacy.utils.samplers import PointwiseSampler 10 | from config import sess_config 11 | import dataloader 12 | 13 | raw_data = dataloader.load_amazon_book() 14 | batch_size = 1000 15 | test_batch_size = 100 16 | item_serving_size = 1000 17 | display_itr = 10000 18 | 19 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 20 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 21 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 22 | 23 | model = VisualPMF(batch_size=batch_size, max_user=raw_data['max_user'], max_item=raw_data['max_item'], l2_reg=0.001, l2_reg_mlp=0.001, 24 | dropout_rate=0.5, dim_embed=50, item_f_source=raw_data['item_features'], dims=[1028, 128, 50], sess_config=sess_config, opt='Adam') 25 | sampler = PointwiseSampler(batch_size=batch_size, dataset=train_dataset, pos_ratio=0.2, num_process=5) 26 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, item_serving_size=item_serving_size, 27 | train_dataset=train_dataset, model=model, sampler=sampler) 28 | 29 | auc_evaluator = AUC() 30 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 31 | 32 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset, test_dataset], 33 | evaluators=[auc_evaluator, recall_evaluator], num_negatives=1000) 34 | -------------------------------------------------------------------------------- /tf1_examples/legacy_examples/exp_wcml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append(os.getcwd()) 4 | 5 | from openrec.tf1.legacy import ImplicitModelTrainer 6 | from openrec.tf1.legacy.utils import ImplicitDataset 7 | from openrec.tf1.legacy.recommenders import WCML 8 | from openrec.tf1.legacy.utils.evaluators import AUC 9 | from openrec.tf1.legacy.utils.samplers import NPairwiseSampler 10 | import dataloader 11 | 12 | raw_data = dataloader.load_citeulike() 13 | batch_size = 2000 14 | test_batch_size = 100 15 | display_itr = 500 16 | 17 | train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train') 18 | val_dataset = ImplicitDataset(raw_data['val_data'], raw_data['max_user'], raw_data['max_item'], name='Val') 19 | test_dataset = ImplicitDataset(raw_data['test_data'], raw_data['max_user'], raw_data['max_item'], name='Test') 20 | 21 | model = WCML(batch_size=batch_size, max_user=train_dataset.max_user(), max_item=train_dataset.max_item(), 22 | dim_embed=20, neg_num=5, l2_reg=None, opt='Adam', sess_config=None) 23 | sampler = NPairwiseSampler(batch_size=batch_size, dataset=train_dataset, negativenum=5, num_process=5) 24 | model_trainer = ImplicitModelTrainer(batch_size=batch_size, test_batch_size=test_batch_size, 25 | train_dataset=train_dataset, model=model, sampler=sampler) 26 | auc_evaluator = AUC() 27 | 28 | model_trainer.train(num_itr=int(1e5), display_itr=display_itr, eval_datasets=[val_dataset], 29 | evaluators=[auc_evaluator], num_negatives=200) 30 | -------------------------------------------------------------------------------- /tf1_examples/pmf_citeulike.py: -------------------------------------------------------------------------------- 1 | from openrec import ModelTrainer 2 | from openrec.tf1.utils import Dataset 3 | from openrec.tf1.recommenders import PMF 4 | from openrec.tf1.utils.evaluators import AUC, Recall 5 | from openrec.tf1.utils.samplers import StratifiedPointwiseSampler 6 | from openrec.tf1.utils.samplers import EvaluationSampler 7 | import dataloader 8 | 9 | raw_data = dataloader.load_citeulike() 10 | dim_embed = 50 11 | total_iter = int(1e5) 12 | batch_size = 1000 13 | eval_iter = 10000 14 | save_iter = eval_iter 15 | 16 | train_dataset = Dataset(raw_data['train_data'], raw_data['total_users'], raw_data['total_items'], name='Train') 17 | val_dataset = Dataset(raw_data['val_data'], raw_data['total_users'], raw_data['total_items'], name='Val', num_negatives=500) 18 | test_dataset = Dataset(raw_data['test_data'], raw_data['total_users'], raw_data['total_items'], name='Test', num_negatives=500) 19 | 20 | train_sampler = StratifiedPointwiseSampler(pos_ratio=0.2, batch_size=batch_size, dataset=train_dataset, num_process=5) 21 | val_sampler = EvaluationSampler(batch_size=batch_size, dataset=val_dataset) 22 | test_sampler = EvaluationSampler(batch_size=batch_size, dataset=test_dataset) 23 | 24 | model = PMF(batch_size=batch_size, total_users=train_dataset.total_users(), total_items=train_dataset.total_items(), 25 | dim_user_embed=dim_embed, dim_item_embed=dim_embed, save_model_dir='pmf_recommender/', train=True, serve=True) 26 | 27 | auc_evaluator = AUC() 28 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 29 | model_trainer = ModelTrainer(model=model) 30 | 31 | model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, save_iter=save_iter, train_sampler=train_sampler, 32 | eval_samplers=[val_sampler, test_sampler], evaluators=[auc_evaluator, recall_evaluator]) 33 | -------------------------------------------------------------------------------- /tf1_examples/rnn_rec_lastfm.py: -------------------------------------------------------------------------------- 1 | from openrec import ModelTrainer 2 | from openrec.tf1.utils import Dataset 3 | from openrec.tf1.recommenders import RNNRec 4 | from openrec.tf1.utils.evaluators import AUC, Recall 5 | from openrec.tf1.utils.samplers import TemporalSampler 6 | from openrec.tf1.utils.samplers import TemporalEvaluationSampler 7 | import numpy as np 8 | 9 | lastfm_train = np.load('dataset/lastfm/lastfm_train.npy') 10 | lastfm_test = np.load('dataset/lastfm/lastfm_test.npy') 11 | total_users = 992 12 | total_items = 14598 13 | 14 | dim_item_embed = 50 15 | max_seq_len = 100 16 | num_units = 32 17 | batch_size = 256 18 | total_iter = int(1e5) 19 | eval_iter = 100 20 | save_iter = eval_iter 21 | 22 | train_dataset = Dataset(raw_data=lastfm_train, total_users=total_users, 23 | total_items=total_items, sortby='ts', name='Train') 24 | test_dataset = Dataset(raw_data=lastfm_test, total_users=total_users, 25 | total_items=total_items, sortby='ts', name='Test') 26 | 27 | train_sampler = TemporalSampler(batch_size=batch_size, max_seq_len=max_seq_len, dataset=train_dataset, num_process=1) 28 | test_sampler = TemporalEvaluationSampler(dataset=test_dataset, max_seq_len=max_seq_len) 29 | 30 | rnn_model = RNNRec(batch_size=batch_size, dim_item_embed=dim_item_embed, max_seq_len=max_seq_len, total_items=train_dataset.total_items(), 31 | num_units=num_units, save_model_dir='rnn_recommender/', train=True, serve=True) 32 | 33 | model_trainer = ModelTrainer(model=rnn_model) 34 | 35 | auc_evaluator = AUC() 36 | recall_evaluator = Recall(recall_at=[100, 500]) 37 | model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, save_iter=save_iter, train_sampler=train_sampler, 38 | eval_samplers=[test_sampler], evaluators=[auc_evaluator, recall_evaluator]) -------------------------------------------------------------------------------- /tf1_examples/ucml_citeulike.py: -------------------------------------------------------------------------------- 1 | from openrec import ModelTrainer 2 | from openrec.tf1.utils import Dataset 3 | from openrec.tf1.recommenders import UCML 4 | from openrec.tf1.utils.evaluators import AUC, Recall 5 | from openrec.tf1.utils.samplers import RandomPairwiseSampler 6 | from openrec.tf1.utils.samplers import EvaluationSampler 7 | import dataloader 8 | 9 | raw_data = dataloader.load_citeulike() 10 | dim_embed = 50 11 | total_iter = int(1e5) 12 | batch_size = 1000 13 | eval_iter = 10000 14 | save_iter = eval_iter 15 | 16 | train_dataset = Dataset(raw_data['train_data'], raw_data['total_users'], raw_data['total_items'], name='Train') 17 | val_dataset = Dataset(raw_data['val_data'], raw_data['total_users'], raw_data['total_items'], name='Val', num_negatives=500) 18 | test_dataset = Dataset(raw_data['test_data'], raw_data['total_users'], raw_data['total_items'], name='Test', num_negatives=500) 19 | 20 | train_sampler = RandomPairwiseSampler(batch_size=batch_size, dataset=train_dataset, num_process=5) 21 | val_sampler = EvaluationSampler(batch_size=batch_size, dataset=val_dataset) 22 | test_sampler = EvaluationSampler(batch_size=batch_size, dataset=test_dataset) 23 | 24 | model = UCML(batch_size=batch_size, total_users=train_dataset.total_users(), total_items=train_dataset.total_items(), 25 | dim_user_embed=dim_embed, dim_item_embed=dim_embed, save_model_dir='ucml_recommender/', train=True, serve=True) 26 | 27 | def train_iter_func(model, batch_data): 28 | loss = model.train(batch_data)['losses'][0] 29 | model.train(batch_data, operations_id='censor_embedding') 30 | return loss 31 | 32 | model_trainer = ModelTrainer(model=model, 33 | train_iter_func=train_iter_func) 34 | 35 | auc_evaluator = AUC() 36 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 37 | model_trainer.train(total_iter=int(1e5), eval_iter=eval_iter, save_iter=save_iter, train_sampler=train_sampler, 38 | eval_samplers=[val_sampler, test_sampler], evaluators=[auc_evaluator, recall_evaluator]) 39 | -------------------------------------------------------------------------------- /tf1_examples/vanilla_youtube_rec_lastfm.py: -------------------------------------------------------------------------------- 1 | from openrec import ModelTrainer 2 | from openrec.tf1.utils import Dataset 3 | from openrec.tf1.recommenders import VanillaYouTubeRec 4 | from openrec.tf1.utils.evaluators import AUC, Recall 5 | from openrec.tf1.utils.samplers import TemporalSampler, TemporalEvaluationSampler 6 | import numpy as np 7 | 8 | total_users = 992 9 | total_items = 14598 10 | train_data = np.load('dataset/lastfm/lastfm_train.npy') 11 | test_data = np.load('dataset/lastfm/lastfm_test.npy') 12 | 13 | dim_item_embed = 50 14 | max_seq_len = 20 15 | total_iter = int(1e5) 16 | batch_size = 100 17 | eval_iter = 100 18 | save_iter = eval_iter 19 | 20 | train_dataset = Dataset(train_data, total_users, total_items, sortby='ts', 21 | name='Train') 22 | test_dataset = Dataset(test_data, total_users, total_items, sortby='ts', 23 | name='Test') 24 | 25 | 26 | train_sampler = TemporalSampler(batch_size=batch_size, max_seq_len=max_seq_len, 27 | dataset=train_dataset, num_process=1) 28 | test_sampler = TemporalEvaluationSampler(dataset=test_dataset, 29 | max_seq_len=max_seq_len) 30 | 31 | 32 | model = VanillaYouTubeRec(batch_size=batch_size, 33 | total_items=train_dataset.total_items(), 34 | max_seq_len=max_seq_len, 35 | dim_item_embed=dim_item_embed, 36 | save_model_dir='vanilla_youtube_recommender/', 37 | train=True, serve=True) 38 | 39 | 40 | model_trainer = ModelTrainer(model=model) 41 | 42 | auc_evaluator = AUC() 43 | recall_evaluator = Recall(recall_at=[100, 200, 300, 400, 500]) 44 | 45 | model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, 46 | save_iter=save_iter,train_sampler=train_sampler, 47 | eval_samplers=[test_sampler], evaluators=[auc_evaluator, recall_evaluator]) 48 | -------------------------------------------------------------------------------- /tf1_examples/vbpr_tradesy.py: -------------------------------------------------------------------------------- 1 | from openrec import ModelTrainer 2 | from openrec import FastDotProductServer 3 | from openrec.tf1.utils import Dataset 4 | from openrec.tf1.recommenders import VBPR 5 | from openrec.tf1.utils.evaluators import AUC, Recall 6 | from openrec.tf1.utils.samplers import VBPRPairwiseSampler 7 | from openrec.tf1.utils.samplers import EvaluationSampler 8 | import dataloader 9 | 10 | raw_data = dataloader.load_tradesy() 11 | dim_user_embed = 50 12 | dim_item_embed = 25 13 | total_iter = int(1e5) 14 | batch_size = 10000 15 | eval_iter = 100 16 | save_iter = eval_iter 17 | 18 | train_dataset = Dataset(raw_data['train_data'], raw_data['total_users'], raw_data['total_items'], name='Train') 19 | val_dataset = Dataset(raw_data['val_data'], raw_data['total_users'], raw_data['total_items'], name='Val', num_negatives=1000) 20 | test_dataset = Dataset(raw_data['test_data'], raw_data['total_users'], raw_data['total_items'], name='Test', num_negatives=1000) 21 | 22 | train_sampler = VBPRPairwiseSampler(batch_size=batch_size, dataset=train_dataset, 23 | item_vfeature=raw_data['item_features'], num_process=5) 24 | val_sampler = EvaluationSampler(batch_size=batch_size, dataset=val_dataset) 25 | test_sampler = EvaluationSampler(batch_size=batch_size, dataset=test_dataset) 26 | 27 | _, dim_v = raw_data['item_features'].shape 28 | 29 | model = VBPR(batch_size=batch_size, 30 | dim_v=dim_v, 31 | total_users=train_dataset.total_users(), 32 | total_items=train_dataset.total_items(), 33 | dim_user_embed=dim_user_embed, 34 | dim_item_embed=dim_item_embed, 35 | save_model_dir='vbpr_recommender/', 36 | l2_reg_embed=0.001, 37 | l2_reg_mlp=0.001, 38 | train=True, serve=True) 39 | 40 | 41 | def extract_user_lf_func(model, user_id): 42 | return model.serve_inspect_ports({'user_id':user_id}, 43 | ports=[model.servegraph.usergraph['user_vec']])[0] 44 | 45 | def extract_item_lf_func(model, item_id, item_vfeature=raw_data['item_features']): 46 | return model.serve_inspect_ports({'item_id':item_id, 47 | 'item_vfeature': item_vfeature[item_id]}, 48 | ports=[model.servegraph.interactiongraph['item_vec']])[0] 49 | 50 | def extract_item_bias_func(model, item_id): 51 | return model.serve_inspect_ports({'item_id':item_id}, 52 | ports=[model.servegraph.interactiongraph['item_bias']])[0] 53 | 54 | 55 | fastmodel = FastDotProductServer(model=model, batch_size=batch_size, 56 | dim_embed=dim_user_embed, 57 | total_users=train_dataset.total_users(), 58 | total_items=train_dataset.total_items(), 59 | extract_user_lf_func=extract_user_lf_func, 60 | extract_item_lf_func=extract_item_lf_func, 61 | extract_item_bias_func=extract_item_bias_func) 62 | 63 | model_trainer = ModelTrainer(model=fastmodel) 64 | 65 | auc_evaluator = AUC() 66 | recall_evaluator = Recall(recall_at=[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) 67 | model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, save_iter=save_iter, train_sampler=train_sampler, 68 | eval_samplers=[val_sampler, test_sampler], evaluators=[auc_evaluator, recall_evaluator]) -------------------------------------------------------------------------------- /tf1_examples/youtube_rec_lastfm.py: -------------------------------------------------------------------------------- 1 | from openrec import ModelTrainer 2 | from openrec.tf1.utils import Dataset 3 | from openrec.tf1.recommenders import YouTubeRec 4 | from openrec.tf1.utils.evaluators import AUC, Recall 5 | from openrec.tf1.utils.samplers import YouTubeSampler, YouTubeEvaluationSampler 6 | import numpy as np 7 | 8 | train_data = np.load('dataset/lastfm/lastfm_train.npy') 9 | test_data = np.load('dataset/lastfm/lastfm_test.npy') 10 | user_feature = np.load('dataset/lastfm/user_feature.npy') 11 | 12 | total_users = 992 13 | total_items = 14598 14 | 15 | user_dict = {'gender': 3, 'geo': 67} 16 | item_dict = {'id': total_items} 17 | 18 | dim_item_embed = {'total': 50, 'id': 50} 19 | dim_user_embed = {'total': 30, 'geo': 20, 'gender': 10} 20 | max_seq_len = 20 21 | 22 | total_iter = int(1e5) 23 | batch_size = 100 24 | eval_iter = 100 25 | save_iter = eval_iter 26 | 27 | train_dataset = Dataset(train_data, total_users, total_items, sortby='ts', 28 | name='Train') 29 | test_dataset = Dataset(test_data, total_users, total_items, sortby='ts', 30 | name='Test') 31 | 32 | 33 | train_sampler = YouTubeSampler(user_feature=user_feature, batch_size=batch_size, max_seq_len=max_seq_len, dataset=train_dataset, num_process=1) 34 | test_sampler = YouTubeEvaluationSampler(user_feature=user_feature, dataset=test_dataset, max_seq_len=max_seq_len) 35 | 36 | 37 | model = YouTubeRec(batch_size=batch_size, 38 | user_dict=user_dict, 39 | item_dict=item_dict, 40 | max_seq_len=max_seq_len, 41 | dim_item_embed=dim_item_embed, 42 | dim_user_embed=dim_user_embed, 43 | save_model_dir='youtube_recommender/', 44 | train=True, serve=True) 45 | 46 | 47 | model_trainer = ModelTrainer(model=model) 48 | 49 | auc_evaluator = AUC() 50 | recall_evaluator = Recall(recall_at=[100, 200, 300, 400, 500]) 51 | 52 | model_trainer.train(total_iter=total_iter, eval_iter=eval_iter, 53 | save_iter=save_iter,train_sampler=train_sampler, 54 | eval_samplers=[test_sampler], evaluators=[auc_evaluator, recall_evaluator]) 55 | -------------------------------------------------------------------------------- /tf1_tutorials/Lecture.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ylongqi/openrec/a00de2345844858194ef43ab6845342114a5be93/tf1_tutorials/Lecture.pdf -------------------------------------------------------------------------------- /tf2_examples/bpr_citeulike.py: -------------------------------------------------------------------------------- 1 | from openrec.tf2.data import Dataset 2 | from openrec.tf2.recommenders import BPR 3 | from openrec.tf2.metrics import AUC, NDCG, Recall, DictMean 4 | from tqdm.auto import tqdm 5 | import tensorflow as tf 6 | import numpy as np 7 | from tensorflow.keras import optimizers 8 | 9 | import dataloader 10 | 11 | raw_data = dataloader.load_citeulike('../dataset/') 12 | dim_embed = 50 13 | total_iter = int(1e5) 14 | batch_size = 1000 15 | eval_interval = 1000 16 | save_interval = eval_interval 17 | 18 | train_dataset = Dataset(raw_data=raw_data['train_data'], 19 | total_users=raw_data['total_users'], 20 | total_items=raw_data['total_items']) 21 | 22 | val_dataset = Dataset(raw_data=raw_data['val_data'], 23 | total_users=raw_data['total_users'], 24 | total_items=raw_data['total_items']) 25 | 26 | bpr_model = BPR(total_users=raw_data['total_users'], 27 | total_items=raw_data['total_items'], 28 | dim_user_embed=dim_embed, 29 | dim_item_embed=dim_embed) 30 | 31 | optimizer = optimizers.Adam() 32 | 33 | @tf.function 34 | def train_step(user_id, p_item_id, n_item_id): 35 | with tf.GradientTape() as tape: 36 | loss_value = bpr_model(user_id, p_item_id, n_item_id) 37 | gradients = tape.gradient(loss_value, bpr_model.trainable_variables) 38 | optimizer.apply_gradients(zip(gradients, bpr_model.trainable_variables)) 39 | return loss_value 40 | 41 | @tf.function 42 | def eval_step(user_id, pos_mask, excl_mask): 43 | pred = bpr_model.inference(user_id) 44 | auc = AUC(pos_mask=pos_mask, pred=pred, excl_mask=excl_mask) 45 | recall = Recall(pos_mask=pos_mask, pred=pred, excl_mask=excl_mask, at=[50, 100]) 46 | return {'AUC': auc, 'Recall':recall} 47 | 48 | average_loss = tf.keras.metrics.Mean() 49 | average_metrics = DictMean({'AUC': [], 'Recall': [2]}) 50 | 51 | for train_iter, batch_data in enumerate(train_dataset.pairwise(batch_size=batch_size, 52 | num_parallel_calls=5)): 53 | loss = train_step(**batch_data) 54 | average_loss.update_state(loss) 55 | print('%d iter training.' % train_iter, end='\r') 56 | 57 | if train_iter % eval_interval == 0: 58 | for eval_batch_data in tqdm(val_dataset.evaluation(batch_size=batch_size, 59 | excl_datasets=[train_dataset]), 60 | leave=False, desc='%d iter evaluation' % train_iter): 61 | eval_results = eval_step(**eval_batch_data) 62 | average_metrics.update_state(eval_results) 63 | result = average_metrics.result() 64 | print("Iter: %d, Loss: %.2f, AUC: %.4f, Recall(50, 100): %s" % (train_iter, average_loss.result().numpy(), 65 | result['AUC'].numpy(), result['Recall'].numpy())) 66 | average_loss.reset_states() 67 | average_metrics.reset_states() -------------------------------------------------------------------------------- /tf2_examples/dataloader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | def load_amazon_book(dataset_folder='dataset/'): 5 | 6 | raw_data = dict() 7 | raw_data['total_users'] = 99473 8 | raw_data['total_items'] = 450166 9 | 10 | raw_data['train_data'] = np.load(dataset_folder + 'amazon/user_data_train.npy') 11 | raw_data['val_data'] = np.load(dataset_folder + 'amazon/user_data_val.npy') 12 | raw_data['test_data'] = np.load(dataset_folder + 'amazon/user_data_test.npy') 13 | 14 | raw_data['item_features'] = np.array(np.memmap(dataset_folder + 'amazon/book_features_update.mem', 15 | dtype=np.float32, mode='r', shape=(raw_data['max_item'], 4096))) 16 | raw_data['user_features'] = np.load(dataset_folder + 'amazon/user_features_categories.npy') 17 | return raw_data 18 | 19 | def load_citeulike(dataset_folder='dataset/'): 20 | 21 | raw_data = dict() 22 | raw_data['total_users'] = 5551 23 | raw_data['total_items'] = 16980 24 | 25 | raw_data['train_data'] = np.load(dataset_folder + 'citeulike/user_data_train.npy') 26 | raw_data['val_data'] = np.load(dataset_folder + 'citeulike/user_data_val.npy') 27 | raw_data['test_data'] = np.load(dataset_folder + 'citeulike/user_data_test.npy') 28 | 29 | return raw_data 30 | 31 | def load_tradesy(dataset_folder='dataset/'): 32 | 33 | raw_data = dict() 34 | raw_data['total_users'] = 19243 35 | raw_data['total_items'] = 165906 36 | 37 | raw_data['train_data'] = np.load(dataset_folder + 'tradesy/user_data_train.npy') 38 | raw_data['val_data'] = np.load(dataset_folder + 'tradesy/user_data_val.npy') 39 | raw_data['test_data'] = np.load(dataset_folder + 'tradesy/user_data_test.npy') 40 | 41 | raw_data['item_features'] = np.load(dataset_folder + 'tradesy/item_features.npy') / 32.671101 42 | return raw_data 43 | 44 | def load_criteo(dataset_folder='dataset/'): 45 | 46 | # Data processing code adapted from https://github.com/facebookresearch/dlrm 47 | # Follow steps in https://github.com/ylongqi/dlrm/blob/master/data_utils.py to generate kaggle_processed.npz 48 | # Or using `./download_dataset.sh criteo` command to download the processed data. 49 | 50 | with np.load(dataset_folder + 'criteo/kaggle_processed.npz') as data: 51 | 52 | X_int = data["X_int"] 53 | X_cat = data["X_cat"] 54 | y = data["y"] 55 | counts = data["counts"] 56 | 57 | indices = np.arange(len(y)) 58 | indices = np.array_split(indices, 7) 59 | for i in range(len(indices)): 60 | indices[i] = np.random.permutation(indices[i]) 61 | 62 | train_indices = np.concatenate(indices[:-1]) 63 | test_indices = indices[-1] 64 | val_indices, test_indices = np.array_split(test_indices, 2) 65 | train_indices = np.random.permutation(train_indices) 66 | 67 | raw_data = dict() 68 | 69 | raw_data['counts'] = counts 70 | 71 | raw_data['X_cat_train'] = X_cat[train_indices].astype(np.int32) 72 | raw_data['X_int_train'] = np.log(X_int[train_indices]+1).astype(np.float32) 73 | raw_data['y_train'] = y[train_indices].astype(np.float32) 74 | 75 | raw_data['X_cat_val'] = X_cat[val_indices] 76 | raw_data['X_int_val'] = np.log(X_int[val_indices]+1).astype(np.float32) 77 | raw_data['y_val'] = y[val_indices] 78 | 79 | raw_data['X_cat_test'] = X_cat[test_indices] 80 | raw_data['X_int_test'] = np.log(X_int[test_indices]+1).astype(np.float32) 81 | raw_data['y_test'] = y[test_indices] 82 | 83 | return raw_data 84 | -------------------------------------------------------------------------------- /tf2_examples/dlrm_criteo.py: -------------------------------------------------------------------------------- 1 | from tensorflow.data import Dataset 2 | from openrec.tf2.recommenders import DLRM 3 | from tensorflow.keras import optimizers 4 | from tqdm import tqdm 5 | import tensorflow as tf 6 | import dataloader 7 | 8 | raw_data = dataloader.load_criteo('../dataset/') 9 | dim_embed = 4 10 | bottom_mlp_size = [8, 4] 11 | top_mlp_size = [128, 64, 1] 12 | total_iter = int(1e5) 13 | batch_size = 1024 14 | eval_interval = 100 15 | save_interval = eval_interval 16 | 17 | # Sample 1000 batches for training 18 | train_dataset = Dataset.from_tensor_slices({ 19 | 'dense_features': raw_data['X_int_train'][:batch_size*1000], 20 | 'sparse_features': raw_data['X_cat_train'][:batch_size*1000], 21 | 'label': raw_data['y_train'][:batch_size*1000] 22 | }).batch(batch_size).prefetch(1).shuffle(5*batch_size) 23 | 24 | # Sample 100 batches for validation 25 | val_dataset = Dataset.from_tensor_slices({ 26 | 'dense_features': raw_data['X_int_val'][:batch_size*100], 27 | 'sparse_features': raw_data['X_cat_val'][:batch_size*100], 28 | 'label': raw_data['y_val'][:batch_size*100] 29 | }).batch(batch_size) 30 | 31 | optimizer = optimizers.Adam() 32 | 33 | dlrm_model = DLRM( 34 | m_spa=dim_embed, 35 | ln_emb=raw_data['counts'], 36 | ln_bot=bottom_mlp_size, 37 | ln_top=top_mlp_size 38 | ) 39 | 40 | auc = tf.keras.metrics.AUC() 41 | 42 | @tf.function 43 | def train_step(dense_features, sparse_features, label): 44 | with tf.GradientTape() as tape: 45 | loss_value = dlrm_model(dense_features, sparse_features, label) 46 | gradients = tape.gradient(loss_value, dlrm_model.trainable_variables) 47 | optimizer.apply_gradients(zip(gradients, dlrm_model.trainable_variables)) 48 | return loss_value 49 | 50 | @tf.function 51 | def eval_step(dense_features, sparse_features, label): 52 | pred = dlrm_model.inference(dense_features, sparse_features) 53 | auc.update_state(y_true=label, y_pred=pred) 54 | 55 | average_loss = tf.keras.metrics.Mean() 56 | 57 | for train_iter, batch_data in enumerate(train_dataset): 58 | 59 | loss = train_step(**batch_data) 60 | average_loss.update_state(loss) 61 | print('%d iter training.' % train_iter, end='\r') 62 | 63 | if train_iter % eval_interval == 0: 64 | for eval_batch_data in tqdm(val_dataset, 65 | leave=False, 66 | desc='%d iter evaluation' % train_iter): 67 | eval_step(**eval_batch_data) 68 | print("Iter: %d, Loss: %.2f, AUC: %.4f" % (train_iter, 69 | average_loss.result().numpy(), 70 | auc.result().numpy())) 71 | average_loss.reset_states() 72 | auc.reset_states() --------------------------------------------------------------------------------