├── .gitignore ├── AutoEncoder.png ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md ├── azkaban ├── AutoRec │ ├── ConstrainedRecoEncoder.job │ ├── ConstrainedRecoEncoderNoLastLayerNl.job │ ├── RecoEncoder.job │ ├── RecoEncoderNoLastLayerNl.job │ ├── done.job │ └── netflix_data_preprocess.job └── AutoRecAllSplits │ ├── RecoEncoder1Y.job │ ├── RecoEncoderN3m.job │ ├── RecoEncoderN6m.job │ ├── RecoEncoderNF.job │ ├── done.job │ └── netflix_data_preprocess.job ├── compute_RMSE.py ├── data_utils ├── movie_lense_data_converter.py └── netflix_data_convert.py ├── infer.py ├── logger.py ├── reco_encoder ├── __init__.py ├── data │ ├── __init__.py │ └── input_layer.py └── model │ ├── __init__.py │ └── model.py ├── run.py └── test ├── __init__.py ├── data_layer_tests.py ├── testData_iRec ├── .part-00199-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt.crc ├── _SUCCESS ├── part-00000-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt └── part-00003-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt ├── testData_uRec ├── ._SUCCESS.crc ├── .part-00000-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt.crc ├── part-00161-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt ├── part-00196-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt └── part-00199-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt └── test_model.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Data files and folders 2 | download/ 3 | training_set/ 4 | nf_prize_dataset.tar.gz 5 | Netflix/ 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # dotenv 89 | .env 90 | 91 | # virtualenv 92 | .venv 93 | venv/ 94 | ENV/ 95 | 96 | # Spyder project settings 97 | .spyderproject 98 | .spyproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | # mkdocs documentation 104 | /site 105 | 106 | # mypy 107 | .mypy_cache/ 108 | -------------------------------------------------------------------------------- /AutoEncoder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/DeepRecommender/a32a8a5c23092c551616acf6fac5b32e1155d18b/AutoEncoder.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 NVIDIA Corporation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | torch = "*" 10 | numpy = "*" 11 | 12 | [requires] 13 | python_version = "3.6" 14 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "3be011e0d66a2db2fc86c9454891ca8da0bb70018ae828e6a08d97fc6e744a14" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.6" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "numpy": { 20 | "hashes": [ 21 | "sha256:0df89ca13c25eaa1621a3f09af4c8ba20da849692dcae184cb55e80952c453fb", 22 | "sha256:154c35f195fd3e1fad2569930ca51907057ae35e03938f89a8aedae91dd1b7c7", 23 | "sha256:18e84323cdb8de3325e741a7a8dd4a82db74fde363dce32b625324c7b32aa6d7", 24 | "sha256:1e8956c37fc138d65ded2d96ab3949bd49038cc6e8a4494b1515b0ba88c91565", 25 | "sha256:23557bdbca3ccbde3abaa12a6e82299bc92d2b9139011f8c16ca1bb8c75d1e95", 26 | "sha256:24fd645a5e5d224aa6e39d93e4a722fafa9160154f296fd5ef9580191c755053", 27 | "sha256:36e36b6868e4440760d4b9b44587ea1dc1f06532858d10abba98e851e154ca70", 28 | "sha256:3d734559db35aa3697dadcea492a423118c5c55d176da2f3be9c98d4803fc2a7", 29 | "sha256:416a2070acf3a2b5d586f9a6507bb97e33574df5bd7508ea970bbf4fc563fa52", 30 | "sha256:4a22dc3f5221a644dfe4a63bf990052cc674ef12a157b1056969079985c92816", 31 | "sha256:4d8d3e5aa6087490912c14a3c10fbdd380b40b421c13920ff468163bc50e016f", 32 | "sha256:4f41fd159fba1245e1958a99d349df49c616b133636e0cf668f169bce2aeac2d", 33 | "sha256:561ef098c50f91fbac2cc9305b68c915e9eb915a74d9038ecf8af274d748f76f", 34 | "sha256:56994e14b386b5c0a9b875a76d22d707b315fa037affc7819cda08b6d0489756", 35 | "sha256:73a1f2a529604c50c262179fcca59c87a05ff4614fe8a15c186934d84d09d9a5", 36 | "sha256:7da99445fd890206bfcc7419f79871ba8e73d9d9e6b82fe09980bc5bb4efc35f", 37 | "sha256:99d59e0bcadac4aa3280616591fb7bcd560e2218f5e31d5223a2e12a1425d495", 38 | "sha256:a4cc09489843c70b22e8373ca3dfa52b3fab778b57cf81462f1203b0852e95e3", 39 | "sha256:a61dc29cfca9831a03442a21d4b5fd77e3067beca4b5f81f1a89a04a71cf93fa", 40 | "sha256:b1853df739b32fa913cc59ad9137caa9cc3d97ff871e2bbd89c2a2a1d4a69451", 41 | "sha256:b1f44c335532c0581b77491b7715a871d0dd72e97487ac0f57337ccf3ab3469b", 42 | "sha256:b261e0cb0d6faa8fd6863af26d30351fd2ffdb15b82e51e81e96b9e9e2e7ba16", 43 | "sha256:c857ae5dba375ea26a6228f98c195fec0898a0fd91bcf0e8a0cae6d9faf3eca7", 44 | "sha256:cf5bb4a7d53a71bb6a0144d31df784a973b36d8687d615ef6a7e9b1809917a9b", 45 | "sha256:db9814ff0457b46f2e1d494c1efa4111ca089e08c8b983635ebffb9c1573361f", 46 | "sha256:df04f4bad8a359daa2ff74f8108ea051670cafbca533bb2636c58b16e962989e", 47 | "sha256:ecf81720934a0e18526177e645cbd6a8a21bb0ddc887ff9738de07a1df5c6b61", 48 | "sha256:edfa6fba9157e0e3be0f40168eb142511012683ac3dc82420bee4a3f3981b30e" 49 | ], 50 | "index": "pypi", 51 | "version": "==1.15.4" 52 | }, 53 | "torch": { 54 | "hashes": [ 55 | "sha256:012a9c7efce86c7a0ce78cd7945fe7c798049537fc3e85af9f14e8789d13c17f", 56 | "sha256:4aadc7124afc431ac6a227a05dc8eff417b6fd8f90fc6c44d514ddfca9a6b474", 57 | "sha256:53e12607830ccb1e5fc4076aafe19bdbbc380799793fbaad696714b72859bde6", 58 | "sha256:7e73a141bf817c0a914131dec51ea24a2f1946b96749b003af664230a9b95197", 59 | "sha256:cb92ac65fcc7685fa6c5920b24101182dcb706d841fc6154ada604f749b615e3", 60 | "sha256:cedbc382a0e992a169c73d2c469887c2e5ce0c6fa88b1dabe8f9021e1acb564f", 61 | "sha256:ded9e2e59c086127423c23e902e2bec42b3b443a0e458fae76c013f62a7e0748", 62 | "sha256:df005dff3e3f12911630e48e0e75d3594a424a317d785b49426c23d0810a4682", 63 | "sha256:f4196ce8ba17797f3c2d13c0d53cf461a8e32f6130a08e6e4ce917637afccdc6" 64 | ], 65 | "index": "pypi", 66 | "version": "==1.0.0" 67 | } 68 | }, 69 | "develop": {} 70 | } 71 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep AutoEncoders for Collaborative Filtering 2 | This is not an official NVIDIA product. It is a research project described in: "Training Deep AutoEncoders for Collaborative Filtering"(https://arxiv.org/abs/1708.01715) 3 | 4 | ### The model 5 | The model is based on deep AutoEncoders. 6 | 7 | ![AutEncoderPic](./AutoEncoder.png) 8 | 9 | ## Requirements 10 | * Python 3.6 11 | * [Pytorch](http://pytorch.org/): `pipenv install` 12 | * CUDA (recommended version >= 8.0) 13 | 14 | 15 | ## Training using mixed precision with Tensor Cores 16 | * You would need NVIDIA Volta-based GPU 17 | * Checkout [mixed precision branch](https://github.com/NVIDIA/DeepRecommender/tree/mp_branch) 18 | * For theory on mixed precision training see [Mixed Precision Training paper](https://arxiv.org/abs/1710.03740) 19 | 20 | ## Getting Started 21 | 22 | ### Run unittests first 23 | The code is intended to run on GPU. Last test can take a minute or two. 24 | ``` 25 | $ python -m unittest test/data_layer_tests.py 26 | $ python -m unittest test/test_model.py 27 | ``` 28 | 29 | ### Tutorial 30 | Checkout [this tutorial](https://github.com/miguelgfierro/sciblog_support/blob/master/Intro_to_Recommendation_Systems/Intro_Recommender.ipynb) by [miguelgfierro](https://github.com/miguelgfierro). 31 | 32 | ### Get the data 33 | 34 | **Note: Run all these commands within your `DeepRecommender` folder** 35 | 36 | [Netflix prize](http://netflixprize.com/) 37 | 38 | * Download from [here](http://academictorrents.com/details/9b13183dc4d60676b773c9e2cd6de5e5542cee9a) into your ```DeepRecommender``` folder 39 | ``` 40 | $ tar -xvf nf_prize_dataset.tar.gz 41 | $ tar -xf download/training_set.tar 42 | $ python ./data_utils/netflix_data_convert.py training_set Netflix 43 | ``` 44 | 45 | #### Data stats 46 | | Dataset | Netflix 3 months | Netflix 6 months | Netflix 1 year | Netflix full | 47 | | -------- | ---------------- | ---------------- | ----------- | ------------ | 48 | | Ratings train | 13,675,402 | 29,179,009 | 41,451,832 | 98,074,901 | 49 | | Users train | 311,315 |390,795 | 345,855 | 477,412 | 50 | | Items train | 17,736 |17,757 | 16,907 | 17,768 | 51 | | Time range train | 2005-09-01 to 2005-11-31 | 2005-06-01 to 2005-11-31 | 2004-06-01 to 2005-05-31 | 1999-12-01 to 2005-11-31 52 | | -------- | ---------------- | ----------- | ------------ | 53 | | Ratings test | 2,082,559 | 2,175,535 | 3,888,684| 2,250,481 | 54 | | Users test | 160,906 | 169,541 | 197,951| 173,482 | 55 | | Items test | 17,261 | 17,290 | 16,506| 17,305 | 56 | | Time range test | 2005-12-01 to 2005-12-31 | 2005-12-01 to 2005-12-31 | 2005-06-01 to 2005-06-31 | 2005-12-01 to 2005-12-31 57 | 58 | ### Train the model 59 | In this example, the model will be trained for 12 epochs. In paper we train for 102. 60 | ``` 61 | python run.py --gpu_ids 0 \ 62 | --path_to_train_data Netflix/NF_TRAIN \ 63 | --path_to_eval_data Netflix/NF_VALID \ 64 | --hidden_layers 512,512,1024 \ 65 | --non_linearity_type selu \ 66 | --batch_size 128 \ 67 | --logdir model_save \ 68 | --drop_prob 0.8 \ 69 | --optimizer momentum \ 70 | --lr 0.005 \ 71 | --weight_decay 0 \ 72 | --aug_step 1 \ 73 | --noise_prob 0 \ 74 | --num_epochs 12 \ 75 | --summary_frequency 1000 76 | ``` 77 | 78 | Note that you can run Tensorboard in parallel 79 | ``` 80 | $ tensorboard --logdir=model_save 81 | ``` 82 | 83 | ### Run inference on the Test set 84 | ``` 85 | python infer.py \ 86 | --path_to_train_data Netflix/NF_TRAIN \ 87 | --path_to_eval_data Netflix/NF_TEST \ 88 | --hidden_layers 512,512,1024 \ 89 | --non_linearity_type selu \ 90 | --save_path model_save/model.epoch_11 \ 91 | --drop_prob 0.8 \ 92 | --predictions_path preds.txt 93 | ``` 94 | 95 | ### Compute Test RMSE 96 | ``` 97 | python compute_RMSE.py --path_to_predictions=preds.txt 98 | ``` 99 | After 12 epochs you should get RMSE around 0.927. Train longer to get below 0.92 100 | 101 | # Results 102 | It should be possible to achieve the following results. Iterative output re-feeding should be applied 103 | once during each iteration. 104 | 105 | (exact numbers will vary due to randomization) 106 | 107 | | DataSet | RMSE | Model Architecture | 108 | | -------- | ---------------- | ---------------- | 109 | | Netflix 3 months | 0.9373 | n,128,256,256,dp(0.65),256,128,n | 110 | | Netflix 6 months | 0.9207 | n,256,256,512,dp(0.8),256,256,n | 111 | | Netflix 1 year | 0.9225 | n,256,256,512,dp(0.8),256,256,n | 112 | | Netflix full | 0.9099 | n,512,512,1024,dp(0.8),512,512,n | 113 | -------------------------------------------------------------------------------- /azkaban/AutoRec/ConstrainedRecoEncoder.job: -------------------------------------------------------------------------------- 1 | # simple constrained CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} 3 | type=command 4 | dependencies=netflix_data_preprocess 5 | command=python ${source_dir}/run.py --gpu_ids 0 \ 6 | --path_to_train_data ${save_root}/Netflix/NF_TRAIN \ 7 | --path_to_eval_data ${save_root}/Netflix/NF_VALID \ 8 | --hidden_layers ${hidden_layers} \ 9 | --non_linearity_type ${nl_kind} \ 10 | --batch_size ${batch_size} \ 11 | --logdir ${save_root}/c-nfull-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 12 | --drop_prob ${drop_prob} \ 13 | --optimizer ${optimizer} \ 14 | --lr ${lr} \ 15 | --constrained \ 16 | --weight_decay ${weight_decay} \ 17 | --aug_step ${aug_step} \ 18 | --noise_prob ${noise_prob} \ 19 | --num_epochs ${num_epochs} \ 20 | --summary_frequency ${summary_frequency} 21 | -------------------------------------------------------------------------------- /azkaban/AutoRec/ConstrainedRecoEncoderNoLastLayerNl.job: -------------------------------------------------------------------------------- 1 | # simple constrained CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} 3 | type=command 4 | dependencies=netflix_data_preprocess 5 | command=python ${source_dir}/run.py --gpu_ids 0 \ 6 | --path_to_train_data ${save_root}/Netflix/NF_TRAIN \ 7 | --path_to_eval_data ${save_root}/Netflix/NF_VALID \ 8 | --hidden_layers ${hidden_layers} \ 9 | --non_linearity_type ${nl_kind} \ 10 | --batch_size ${batch_size} \ 11 | --logdir ${save_root}/c-nfull-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-noll-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 12 | --drop_prob ${drop_prob} \ 13 | --optimizer ${optimizer} \ 14 | --lr ${lr} \ 15 | --constrained \ 16 | --skip_last_layer_nl \ 17 | --weight_decay ${weight_decay} \ 18 | --aug_step ${aug_step} \ 19 | --noise_prob ${noise_prob} \ 20 | --num_epochs ${num_epochs} \ 21 | --summary_frequency ${summary_frequency} 22 | -------------------------------------------------------------------------------- /azkaban/AutoRec/RecoEncoder.job: -------------------------------------------------------------------------------- 1 | # simple CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} 3 | type=command 4 | dependencies=netflix_data_preprocess 5 | command=python ${source_dir}/run.py --gpu_ids 0 \ 6 | --path_to_train_data ${save_root}/Netflix/NF_TRAIN \ 7 | --path_to_eval_data ${save_root}/Netflix/NF_VALID \ 8 | --hidden_layers ${hidden_layers} \ 9 | --non_linearity_type ${nl_kind} \ 10 | --batch_size ${batch_size} \ 11 | --logdir ${save_root}/nfull-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 12 | --drop_prob ${drop_prob} \ 13 | --optimizer ${optimizer} \ 14 | --lr ${lr} \ 15 | --weight_decay ${weight_decay} \ 16 | --aug_step ${aug_step} \ 17 | --noise_prob ${noise_prob} \ 18 | --num_epochs ${num_epochs} \ 19 | --summary_frequency ${summary_frequency} -------------------------------------------------------------------------------- /azkaban/AutoRec/RecoEncoderNoLastLayerNl.job: -------------------------------------------------------------------------------- 1 | # simple CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} 3 | type=command 4 | dependencies=netflix_data_preprocess 5 | command=python ${source_dir}/run.py --gpu_ids 0 \ 6 | --path_to_train_data ${save_root}/Netflix/NF_TRAIN \ 7 | --path_to_eval_data ${save_root}/Netflix/NF_VALID \ 8 | --hidden_layers ${hidden_layers} \ 9 | --non_linearity_type ${nl_kind} \ 10 | --batch_size ${batch_size} \ 11 | --logdir ${save_root}/nfull-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-noll-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 12 | --drop_prob ${drop_prob} \ 13 | --optimizer ${optimizer} \ 14 | --lr ${lr} \ 15 | --weight_decay ${weight_decay} \ 16 | --aug_step ${aug_step} \ 17 | --noise_prob ${noise_prob} \ 18 | --num_epochs ${num_epochs} \ 19 | --skip_last_layer_nl \ 20 | --summary_frequency ${summary_frequency} -------------------------------------------------------------------------------- /azkaban/AutoRec/done.job: -------------------------------------------------------------------------------- 1 | type=command 2 | dependencies=ConstrainedRecoEncoder,RecoEncoder,ConstrainedRecoEncoderNoLastLayerNl,RecoEncoderNoLastLayerNl 3 | command=echo 'Done' -------------------------------------------------------------------------------- /azkaban/AutoRec/netflix_data_preprocess.job: -------------------------------------------------------------------------------- 1 | type=command 2 | command=mkdir -p ${save_root}/Netflix/N3M_TRAIN 3 | command.1=mkdir -p ${save_root}/Netflix/N3M_VALID 4 | command.2=mkdir -p ${save_root}/Netflix/N3M_TEST 5 | 6 | command.3=mkdir -p ${save_root}/Netflix/N6M_TRAIN 7 | command.4=mkdir -p ${save_root}/Netflix/N6M_VALID 8 | command.5=mkdir -p ${save_root}/Netflix/N6M_TEST 9 | 10 | command.6=mkdir -p ${save_root}/Netflix/N1Y_TRAIN 11 | command.7=mkdir -p ${save_root}/Netflix/N1Y_VALID 12 | command.8=mkdir -p ${save_root}/Netflix/N1Y_TEST 13 | 14 | command.9=mkdir -p ${save_root}/Netflix/NF_TRAIN 15 | command.10=mkdir -p ${save_root}/Netflix/NF_VALID 16 | command.11=mkdir -p ${save_root}/Netflix/NF_TEST 17 | 18 | command.12=python ${source_dir}/data_utils/netflix_data_convert.py ${path2raw_netflix_train} ${save_root}/Netflix -------------------------------------------------------------------------------- /azkaban/AutoRecAllSplits/RecoEncoder1Y.job: -------------------------------------------------------------------------------- 1 | # simple CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=0 3 | source_dir=/home/okuchaiev/repos/cuRecs/code/RecoEncoder 4 | type=command 5 | dependencies=netflix_data_preprocess 6 | command=python ${source_dir}/run.py --gpu_ids 0 \ 7 | --path_to_train_data ${save_root}/Netflix/N1Y_TRAIN \ 8 | --path_to_eval_data ${save_root}/Netflix/N1Y_VALID \ 9 | --hidden_layers ${hidden_layers} \ 10 | --non_linearity_type ${nl_kind} \ 11 | --batch_size ${batch_size} \ 12 | --logdir ${save_root}/n1y-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 13 | --drop_prob ${drop_prob} \ 14 | --optimizer ${optimizer} \ 15 | --lr ${lr} \ 16 | --weight_decay ${weight_decay} \ 17 | --aug_step ${aug_step} \ 18 | --noise_prob ${noise_prob} \ 19 | --num_epochs ${num_epochs} \ 20 | --summary_frequency ${summary_frequency} -------------------------------------------------------------------------------- /azkaban/AutoRecAllSplits/RecoEncoderN3m.job: -------------------------------------------------------------------------------- 1 | # simple CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=1 3 | source_dir=/home/okuchaiev/repos/cuRecs/code/RecoEncoder 4 | type=command 5 | dependencies=netflix_data_preprocess 6 | command=python ${source_dir}/run.py --gpu_ids 0 \ 7 | --path_to_train_data ${save_root}/Netflix/N3M_TRAIN \ 8 | --path_to_eval_data ${save_root}/Netflix/N3M_VALID \ 9 | --hidden_layers ${hidden_layers} \ 10 | --non_linearity_type ${nl_kind} \ 11 | --batch_size ${batch_size} \ 12 | --logdir ${save_root}/n3m-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 13 | --drop_prob ${drop_prob} \ 14 | --optimizer ${optimizer} \ 15 | --lr ${lr} \ 16 | --weight_decay ${weight_decay} \ 17 | --aug_step ${aug_step} \ 18 | --noise_prob ${noise_prob} \ 19 | --num_epochs ${num_epochs} \ 20 | --summary_frequency ${summary_frequency} -------------------------------------------------------------------------------- /azkaban/AutoRecAllSplits/RecoEncoderN6m.job: -------------------------------------------------------------------------------- 1 | # simple CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=2 3 | source_dir=/home/okuchaiev/repos/cuRecs/code/RecoEncoder 4 | type=command 5 | dependencies=netflix_data_preprocess 6 | command=python ${source_dir}/run.py --gpu_ids 0 \ 7 | --path_to_train_data ${save_root}/Netflix/N6M_TRAIN \ 8 | --path_to_eval_data ${save_root}/Netflix/N6M_VALID \ 9 | --hidden_layers ${hidden_layers} \ 10 | --non_linearity_type ${nl_kind} \ 11 | --batch_size ${batch_size} \ 12 | --logdir ${save_root}/n6m-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 13 | --drop_prob ${drop_prob} \ 14 | --optimizer ${optimizer} \ 15 | --lr ${lr} \ 16 | --weight_decay ${weight_decay} \ 17 | --aug_step ${aug_step} \ 18 | --noise_prob ${noise_prob} \ 19 | --num_epochs ${num_epochs} \ 20 | --summary_frequency ${summary_frequency} -------------------------------------------------------------------------------- /azkaban/AutoRecAllSplits/RecoEncoderNF.job: -------------------------------------------------------------------------------- 1 | # simple CF auto encoder job 2 | env.CUDA_VISIBLE_DEVICES=3 3 | source_dir=/home/okuchaiev/repos/cuRecs/code/RecoEncoder 4 | type=command 5 | dependencies=netflix_data_preprocess 6 | command=python ${source_dir}/run.py --gpu_ids 0 \ 7 | --path_to_train_data ${save_root}/Netflix/NF_TRAIN \ 8 | --path_to_eval_data ${save_root}/Netflix/NF_VALID \ 9 | --hidden_layers ${hidden_layers} \ 10 | --non_linearity_type ${nl_kind} \ 11 | --batch_size ${batch_size} \ 12 | --logdir ${save_root}/nfull-aug-${aug_step}-np-${noise_prob}-${hidden_layers}-bs-${batch_size}-${nl_kind}-dp-${drop_prob}-${num_epochs}-${weight_decay}-${lr}-${optimizer} \ 13 | --drop_prob ${drop_prob} \ 14 | --optimizer ${optimizer} \ 15 | --lr ${lr} \ 16 | --weight_decay ${weight_decay} \ 17 | --aug_step ${aug_step} \ 18 | --noise_prob ${noise_prob} \ 19 | --num_epochs ${num_epochs} \ 20 | --summary_frequency ${summary_frequency} -------------------------------------------------------------------------------- /azkaban/AutoRecAllSplits/done.job: -------------------------------------------------------------------------------- 1 | type=command 2 | dependencies=RecoEncoder1Y,RecoEncoderN3m,RecoEncoderN6m,RecoEncoderNF 3 | command=echo 'Done' -------------------------------------------------------------------------------- /azkaban/AutoRecAllSplits/netflix_data_preprocess.job: -------------------------------------------------------------------------------- 1 | source_dir=/home/okuchaiev/repos/cuRecs/code/RecoEncoder 2 | type=command 3 | command=mkdir -p ${save_root}/Netflix/N3M_TRAIN 4 | command.1=mkdir -p ${save_root}/Netflix/N3M_VALID 5 | command.2=mkdir -p ${save_root}/Netflix/N3M_TEST 6 | 7 | command.3=mkdir -p ${save_root}/Netflix/N6M_TRAIN 8 | command.4=mkdir -p ${save_root}/Netflix/N6M_VALID 9 | command.5=mkdir -p ${save_root}/Netflix/N6M_TEST 10 | 11 | command.6=mkdir -p ${save_root}/Netflix/N1Y_TRAIN 12 | command.7=mkdir -p ${save_root}/Netflix/N1Y_VALID 13 | command.8=mkdir -p ${save_root}/Netflix/N1Y_TEST 14 | 15 | command.9=mkdir -p ${save_root}/Netflix/NF_TRAIN 16 | command.10=mkdir -p ${save_root}/Netflix/NF_VALID 17 | command.11=mkdir -p ${save_root}/Netflix/NF_TEST 18 | 19 | command.12=python ${source_dir}/data_utils/netflix_data_convert.py ${path2raw_netflix_train} ${save_root}/Netflix -------------------------------------------------------------------------------- /compute_RMSE.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | import argparse 3 | from math import sqrt 4 | 5 | parser = argparse.ArgumentParser(description='RMSE_calculator') 6 | 7 | parser.add_argument('--path_to_predictions', type=str, default="", metavar='N', 8 | help='Path file with actual ratings and predictions') 9 | parser.add_argument('--round', action='store_true', 10 | help='round predictions to nearest') 11 | 12 | args = parser.parse_args() 13 | print(args) 14 | 15 | def main(): 16 | with open(args.path_to_predictions, 'r') as inpt: 17 | lines = inpt.readlines() 18 | n = 0 19 | denom = 0.0 20 | for line in lines: 21 | parts = line.split('\t') 22 | prediction = float(parts[2]) if not args.round else round(float(parts[2])) 23 | rating = float(parts[3]) 24 | denom += (prediction - rating)*(prediction - rating) 25 | n += 1 26 | print("####################") 27 | print("RMSE: {}".format(sqrt(denom/n))) 28 | print("####################") 29 | 30 | if __name__ == '__main__': 31 | main() -------------------------------------------------------------------------------- /data_utils/movie_lense_data_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | import sys 3 | import datetime 4 | import random 5 | from math import floor 6 | 7 | def print_stats(data): 8 | total_ratings = 0 9 | print("STATS") 10 | for user in data: 11 | total_ratings += len(data[user]) 12 | print("Total Ratings: {}".format(total_ratings)) 13 | print("Total User count: {}".format(len(data.keys()))) 14 | 15 | def save_data_to_file(data, filename): 16 | with open(filename, 'w') as out: 17 | for userId in data: 18 | for record in data[userId]: 19 | out.write("{}\t{}\t{}\n".format(userId, record[0], record[1])) 20 | 21 | def main(args): 22 | inpt = args[1] 23 | out_prefix = args[2] 24 | percent = 0.7 25 | user2id_map = dict() 26 | item2id_map = dict() 27 | userId = 0 28 | itemId = 0 29 | data = dict() 30 | 31 | min_ts = 100000000000 32 | max_ts = 0 33 | total_rating_count = 0 34 | with open(inpt, 'r') as inpt_f: #ratings.csv headers: userId,movieId,rating,timestamp 35 | for line in inpt_f: 36 | if 'userId' in line: 37 | continue 38 | parts = line.split(',') 39 | user = int(parts[0]) 40 | item = int(parts[1]) 41 | rating = float(parts[2]) 42 | ts = int(parts[3]) 43 | if min_ts > ts: 44 | min_ts = ts 45 | if max_ts < ts: 46 | max_ts = ts 47 | if not user in user2id_map: 48 | user2id_map[user] = userId 49 | userId += 1 50 | if not item in item2id_map: 51 | item2id_map[item] = itemId 52 | itemId += 1 53 | total_rating_count += 1 54 | if user2id_map[user] not in data: 55 | data[user2id_map[user]] = [] 56 | data[user2id_map[user]].append((item2id_map[item], rating, ts)) 57 | 58 | print("STATS") 59 | print("Total Ratings: {}".format(total_rating_count)) 60 | print("Total User count: {}".format(len(user2id_map))) 61 | print("Total Item count: {}".format(len(item2id_map))) 62 | print("Minimum ts: {}, which is {}".format(min_ts, datetime.datetime.fromtimestamp(min_ts).strftime('%Y-%m-%d'))) 63 | print("Maximum ts: {}, which is {}".format(max_ts, datetime.datetime.fromtimestamp(max_ts).strftime('%Y-%m-%d'))) 64 | 65 | training_data = dict() 66 | validation_data = dict() 67 | test_data = dict() 68 | train_set_items = set() 69 | 70 | for userId in data.keys(): 71 | if len(data[userId]) < 2: 72 | #print("WARNING, userId {} has less than 2 ratings, skipping user...".format(userId)) 73 | continue 74 | time_sorted_ratings = sorted(data[userId], key=lambda x: x[2]) # sort by timestamp 75 | last_train_ind = floor(percent * len(time_sorted_ratings)) 76 | training_data[userId] = time_sorted_ratings[:last_train_ind] 77 | for rating_item in time_sorted_ratings[:last_train_ind]: 78 | train_set_items.add(rating_item[0]) # keep track of items from training set 79 | p = random.random() 80 | if p <= 0.5: 81 | validation_data[userId] = time_sorted_ratings[last_train_ind:] 82 | else: 83 | test_data[userId] = time_sorted_ratings[last_train_ind:] 84 | 85 | # remove items not not seen in training set 86 | for userId, userRatings in test_data.items(): 87 | test_data[userId] = [rating for rating in userRatings if rating[0] in train_set_items] 88 | for userId, userRatings in validation_data.items(): 89 | validation_data[userId] = [rating for rating in userRatings if rating[0] in train_set_items] 90 | 91 | print("Training Data") 92 | print_stats(training_data) 93 | save_data_to_file(training_data, out_prefix+".train") 94 | print("Validation Data") 95 | print_stats(validation_data) 96 | save_data_to_file(validation_data, out_prefix + ".valid") 97 | print("Test Data") 98 | print_stats(test_data) 99 | save_data_to_file(test_data, out_prefix + ".test") 100 | 101 | 102 | 103 | if __name__ == "__main__": 104 | main(sys.argv) 105 | 106 | -------------------------------------------------------------------------------- /data_utils/netflix_data_convert.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | from os import listdir, path, makedirs 3 | import random 4 | import sys 5 | import time 6 | import datetime 7 | 8 | def print_stats(data): 9 | total_ratings = 0 10 | print("STATS") 11 | for user in data: 12 | total_ratings += len(data[user]) 13 | print("Total Ratings: {}".format(total_ratings)) 14 | print("Total User count: {}".format(len(data.keys()))) 15 | 16 | def save_data_to_file(data, filename): 17 | with open(filename, 'w') as out: 18 | for userId in data: 19 | for record in data[userId]: 20 | out.write("{}\t{}\t{}\n".format(userId, record[0], record[1])) 21 | 22 | def create_NETFLIX_data_timesplit(all_data, 23 | train_min, 24 | train_max, 25 | test_min, 26 | test_max): 27 | """ 28 | Creates time-based split of NETFLIX data into train, and (validation, test) 29 | :param all_data: 30 | :param train_min: 31 | :param train_max: 32 | :param test_min: 33 | :param test_max: 34 | :return: 35 | """ 36 | train_min_ts = time.mktime(datetime.datetime.strptime(train_min,"%Y-%m-%d").timetuple()) 37 | train_max_ts = time.mktime(datetime.datetime.strptime(train_max, "%Y-%m-%d").timetuple()) 38 | test_min_ts = time.mktime(datetime.datetime.strptime(test_min, "%Y-%m-%d").timetuple()) 39 | test_max_ts = time.mktime(datetime.datetime.strptime(test_max, "%Y-%m-%d").timetuple()) 40 | 41 | training_data = dict() 42 | validation_data = dict() 43 | test_data = dict() 44 | 45 | train_set_items = set() 46 | 47 | for userId, userRatings in all_data.items(): 48 | time_sorted_ratings = sorted(userRatings, key=lambda x: x[2]) # sort by timestamp 49 | for rating_item in time_sorted_ratings: 50 | if rating_item[2] >= train_min_ts and rating_item[2] <= train_max_ts: 51 | if not userId in training_data: 52 | training_data[userId] = [] 53 | training_data[userId].append(rating_item) 54 | train_set_items.add(rating_item[0]) # keep track of items from training set 55 | elif rating_item[2] >= test_min_ts and rating_item[2] <= test_max_ts: 56 | if not userId in training_data: # only include users seen in the training set 57 | continue 58 | p = random.random() 59 | if p <=0.5: 60 | if not userId in validation_data: 61 | validation_data[userId] = [] 62 | validation_data[userId].append(rating_item) 63 | else: 64 | if not userId in test_data: 65 | test_data[userId] = [] 66 | test_data[userId].append(rating_item) 67 | 68 | # remove items not not seen in training set 69 | for userId, userRatings in test_data.items(): 70 | test_data[userId] = [rating for rating in userRatings if rating[0] in train_set_items] 71 | for userId, userRatings in validation_data.items(): 72 | validation_data[userId] = [rating for rating in userRatings if rating[0] in train_set_items] 73 | 74 | return training_data, validation_data, test_data 75 | 76 | 77 | def main(args): 78 | user2id_map = dict() 79 | item2id_map = dict() 80 | userId = 0 81 | itemId = 0 82 | all_data = dict() 83 | 84 | folder = args[1] 85 | out_folder = args[2] 86 | # create necessary folders: 87 | for output_dir in [(out_folder + f) for f in [ 88 | "/N3M_TRAIN", "/N3M_VALID", "/N3M_TEST", "/N6M_TRAIN", 89 | "/N6M_VALID", "/N6M_TEST", "/N1Y_TRAIN", "/N1Y_VALID", 90 | "/N1Y_TEST", "/NF_TRAIN", "/NF_VALID", "/NF_TEST"]]: 91 | makedirs(output_dir, exist_ok=True) 92 | 93 | text_files = [path.join(folder, f) 94 | for f in listdir(folder) 95 | if path.isfile(path.join(folder, f)) and ('.txt' in f)] 96 | 97 | for text_file in text_files: 98 | with open(text_file, 'r') as f: 99 | print("Processing: {}".format(text_file)) 100 | lines = f.readlines() 101 | item = int(lines[0][:-2]) # remove newline and : 102 | if not item in item2id_map: 103 | item2id_map[item] = itemId 104 | itemId += 1 105 | 106 | for rating in lines[1:]: 107 | parts = rating.strip().split(",") 108 | user = int(parts[0]) 109 | if not user in user2id_map: 110 | user2id_map[user] = userId 111 | userId += 1 112 | rating = float(parts[1]) 113 | ts = int(time.mktime(datetime.datetime.strptime(parts[2],"%Y-%m-%d").timetuple())) 114 | if user2id_map[user] not in all_data: 115 | all_data[user2id_map[user]] = [] 116 | all_data[user2id_map[user]].append((item2id_map[item], rating, ts)) 117 | 118 | print("STATS FOR ALL INPUT DATA") 119 | print_stats(all_data) 120 | 121 | # Netflix full 122 | (nf_train, nf_valid, nf_test) = create_NETFLIX_data_timesplit(all_data, 123 | "1999-12-01", 124 | "2005-11-30", 125 | "2005-12-01", 126 | "2005-12-31") 127 | print("Netflix full train") 128 | print_stats(nf_train) 129 | save_data_to_file(nf_train, out_folder + "/NF_TRAIN/nf.train.txt") 130 | print("Netflix full valid") 131 | print_stats(nf_valid) 132 | save_data_to_file(nf_valid, out_folder + "/NF_VALID/nf.valid.txt") 133 | print("Netflix full test") 134 | print_stats(nf_test) 135 | save_data_to_file(nf_test, out_folder + "/NF_TEST/nf.test.txt") 136 | 137 | 138 | (n3m_train, n3m_valid, n3m_test) = create_NETFLIX_data_timesplit(all_data, 139 | "2005-09-01", 140 | "2005-11-30", 141 | "2005-12-01", 142 | "2005-12-31") 143 | print("Netflix 3m train") 144 | print_stats(n3m_train) 145 | save_data_to_file(n3m_train, out_folder+"/N3M_TRAIN/n3m.train.txt") 146 | print("Netflix 3m valid") 147 | print_stats(n3m_valid) 148 | save_data_to_file(n3m_valid, out_folder + "/N3M_VALID/n3m.valid.txt") 149 | print("Netflix 3m test") 150 | print_stats(n3m_test) 151 | save_data_to_file(n3m_test, out_folder + "/N3M_TEST/n3m.test.txt") 152 | 153 | (n6m_train, n6m_valid, n6m_test) = create_NETFLIX_data_timesplit(all_data, 154 | "2005-06-01", 155 | "2005-11-30", 156 | "2005-12-01", 157 | "2005-12-31") 158 | print("Netflix 6m train") 159 | print_stats(n6m_train) 160 | save_data_to_file(n6m_train, out_folder+"/N6M_TRAIN/n6m.train.txt") 161 | print("Netflix 6m valid") 162 | print_stats(n6m_valid) 163 | save_data_to_file(n6m_valid, out_folder + "/N6M_VALID/n6m.valid.txt") 164 | print("Netflix 6m test") 165 | print_stats(n6m_test) 166 | save_data_to_file(n6m_test, out_folder + "/N6M_TEST/n6m.test.txt") 167 | 168 | # Netflix 1 year 169 | (n1y_train, n1y_valid, n1y_test) = create_NETFLIX_data_timesplit(all_data, 170 | "2004-06-01", 171 | "2005-05-31", 172 | "2005-06-01", 173 | "2005-06-30") 174 | print("Netflix 1y train") 175 | print_stats(n1y_train) 176 | save_data_to_file(n1y_train, out_folder + "/N1Y_TRAIN/n1y.train.txt") 177 | print("Netflix 1y valid") 178 | print_stats(n1y_valid) 179 | save_data_to_file(n1y_valid, out_folder + "/N1Y_VALID/n1y.valid.txt") 180 | print("Netflix 1y test") 181 | print_stats(n1y_test) 182 | save_data_to_file(n1y_test, out_folder + "/N1Y_TEST/n1y.test.txt") 183 | 184 | if __name__ == "__main__": 185 | main(sys.argv) 186 | 187 | -------------------------------------------------------------------------------- /infer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | import torch 3 | import argparse 4 | import copy 5 | from reco_encoder.data import input_layer 6 | from reco_encoder.model import model 7 | from torch.autograd import Variable 8 | from pathlib import Path 9 | 10 | parser = argparse.ArgumentParser(description='RecoEncoder') 11 | 12 | parser.add_argument('--drop_prob', type=float, default=0.0, metavar='N', 13 | help='dropout drop probability') 14 | parser.add_argument('--constrained', action='store_true', 15 | help='constrained autoencoder') 16 | parser.add_argument('--skip_last_layer_nl', action='store_true', 17 | help='if present, decoder\'s last layer will not apply non-linearity function') 18 | parser.add_argument('--hidden_layers', type=str, default="1024,512,512,128", metavar='N', 19 | help='hidden layer sizes, comma-separated') 20 | parser.add_argument('--path_to_train_data', type=str, default="", metavar='N', 21 | help='Path to training data') 22 | parser.add_argument('--path_to_eval_data', type=str, default="", metavar='N', 23 | help='Path to evaluation data') 24 | parser.add_argument('--non_linearity_type', type=str, default="selu", metavar='N', 25 | help='type of the non-linearity used in activations') 26 | parser.add_argument('--save_path', type=str, default="autorec.pt", metavar='N', 27 | help='where to save model') 28 | parser.add_argument('--predictions_path', type=str, default="out.txt", metavar='N', 29 | help='where to save predictions') 30 | 31 | args = parser.parse_args() 32 | print(args) 33 | 34 | use_gpu = torch.cuda.is_available() # global flag 35 | if use_gpu: 36 | print('GPU is available.') 37 | else: 38 | print('GPU is not available.') 39 | 40 | def main(): 41 | params = dict() 42 | params['batch_size'] = 1 43 | params['data_dir'] = args.path_to_train_data 44 | params['major'] = 'users' 45 | params['itemIdInd'] = 1 46 | params['userIdInd'] = 0 47 | print("Loading training data") 48 | data_layer = input_layer.UserItemRecDataProvider(params=params) 49 | print("Data loaded") 50 | print("Total items found: {}".format(len(data_layer.data.keys()))) 51 | print("Vector dim: {}".format(data_layer.vector_dim)) 52 | 53 | print("Loading eval data") 54 | eval_params = copy.deepcopy(params) 55 | # must set eval batch size to 1 to make sure no examples are missed 56 | eval_params['batch_size'] = 1 57 | eval_params['data_dir'] = args.path_to_eval_data 58 | eval_data_layer = input_layer.UserItemRecDataProvider(params=eval_params, 59 | user_id_map=data_layer.userIdMap, 60 | item_id_map=data_layer.itemIdMap) 61 | 62 | rencoder = model.AutoEncoder(layer_sizes=[data_layer.vector_dim] + [int(l) for l in args.hidden_layers.split(',')], 63 | nl_type=args.non_linearity_type, 64 | is_constrained=args.constrained, 65 | dp_drop_prob=args.drop_prob, 66 | last_layer_activations=not args.skip_last_layer_nl) 67 | 68 | path_to_model = Path(args.save_path) 69 | if path_to_model.is_file(): 70 | print("Loading model from: {}".format(path_to_model)) 71 | rencoder.load_state_dict(torch.load(args.save_path)) 72 | 73 | print('######################################################') 74 | print('######################################################') 75 | print('############# AutoEncoder Model: #####################') 76 | print(rencoder) 77 | print('######################################################') 78 | print('######################################################') 79 | rencoder.eval() 80 | if use_gpu: rencoder = rencoder.cuda() 81 | 82 | inv_userIdMap = {v: k for k, v in data_layer.userIdMap.items()} 83 | inv_itemIdMap = {v: k for k, v in data_layer.itemIdMap.items()} 84 | 85 | eval_data_layer.src_data = data_layer.data 86 | with open(args.predictions_path, 'w') as outf: 87 | for i, ((out, src), majorInd) in enumerate(eval_data_layer.iterate_one_epoch_eval(for_inf=True)): 88 | inputs = Variable(src.cuda().to_dense() if use_gpu else src.to_dense()) 89 | targets_np = out.to_dense().numpy()[0, :] 90 | outputs = rencoder(inputs).cpu().data.numpy()[0, :] 91 | non_zeros = targets_np.nonzero()[0].tolist() 92 | major_key = inv_userIdMap [majorInd] 93 | for ind in non_zeros: 94 | outf.write("{}\t{}\t{}\t{}\n".format(major_key, inv_itemIdMap[ind], outputs[ind], targets_np[ind])) 95 | if i % 10000 == 0: 96 | print("Done: {}".format(i)) 97 | 98 | if __name__ == '__main__': 99 | main() 100 | 101 | 102 | -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | # THIS FILE IS COPY-PASTED FROM HERE: https://github.com/yunjey/pytorch-tutorial/tree/master/tutorials/04-utils/tensorboard 2 | 3 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 4 | import tensorflow as tf 5 | import numpy as np 6 | import scipy.misc 7 | 8 | try: 9 | from StringIO import StringIO # Python 2.7 10 | except ImportError: 11 | from io import BytesIO # Python 3.x 12 | 13 | 14 | class Logger(object): 15 | def __init__(self, log_dir): 16 | """Create a summary writer logging to log_dir.""" 17 | self.writer = tf.summary.FileWriter(log_dir) 18 | 19 | def scalar_summary(self, tag, value, step): 20 | """Log a scalar variable.""" 21 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 22 | self.writer.add_summary(summary, step) 23 | 24 | def image_summary(self, tag, images, step): 25 | """Log a list of images.""" 26 | 27 | img_summaries = [] 28 | for i, img in enumerate(images): 29 | # Write the image to a string 30 | try: 31 | s = StringIO() 32 | except: 33 | s = BytesIO() 34 | scipy.misc.toimage(img).save(s, format="png") 35 | 36 | # Create an Image object 37 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 38 | height=img.shape[0], 39 | width=img.shape[1]) 40 | # Create a Summary value 41 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 42 | 43 | # Create and write Summary 44 | summary = tf.Summary(value=img_summaries) 45 | self.writer.add_summary(summary, step) 46 | 47 | def histo_summary(self, tag, values, step, bins=1000): 48 | """Log a histogram of the tensor of values.""" 49 | 50 | # Create a histogram using numpy 51 | counts, bin_edges = np.histogram(values, bins=bins) 52 | 53 | # Fill the fields of the histogram proto 54 | hist = tf.HistogramProto() 55 | hist.min = float(np.min(values)) 56 | hist.max = float(np.max(values)) 57 | hist.num = int(np.prod(values.shape)) 58 | hist.sum = float(np.sum(values)) 59 | hist.sum_squares = float(np.sum(values ** 2)) 60 | 61 | # Drop the start of the first bin 62 | bin_edges = bin_edges[1:] 63 | 64 | # Add bin edges and counts 65 | for edge in bin_edges: 66 | hist.bucket_limit.append(edge) 67 | for c in counts: 68 | hist.bucket.append(c) 69 | 70 | # Create and write Summary 71 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 72 | self.writer.add_summary(summary, step) 73 | self.writer.flush() -------------------------------------------------------------------------------- /reco_encoder/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | -------------------------------------------------------------------------------- /reco_encoder/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | -------------------------------------------------------------------------------- /reco_encoder/data/input_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | """Data Layer Classes""" 3 | from os import listdir, path 4 | from random import shuffle 5 | import torch 6 | 7 | class UserItemRecDataProvider: 8 | def __init__(self, params, user_id_map=None, item_id_map=None): 9 | self._params = params 10 | self._data_dir = self.params['data_dir'] 11 | self._extension = ".txt" if 'extension' not in self.params else self.params['extension'] 12 | self._i_id = 0 if 'itemIdInd' not in self.params else self.params['itemIdInd'] 13 | self._u_id = 1 if 'userIdInd' not in self.params else self.params['userIdInd'] 14 | self._r_id = 2 if 'ratingInd' not in self.params else self.params['ratingInd'] 15 | self._major = 'items' if 'major' not in self.params else self.params['major'] 16 | if not (self._major == 'items' or self._major == 'users'): 17 | raise ValueError("Major must be 'users' or 'items', but got {}".format(self._major)) 18 | 19 | self._major_ind = self._i_id if self._major == 'items' else self._u_id 20 | self._minor_ind = self._u_id if self._major == 'items' else self._i_id 21 | self._delimiter = '\t' if 'delimiter' not in self.params else self.params['delimiter'] 22 | 23 | if user_id_map is None or item_id_map is None: 24 | self._build_maps() 25 | else: 26 | self._user_id_map = user_id_map 27 | self._item_id_map = item_id_map 28 | 29 | major_map = self._item_id_map if self._major == 'items' else self._user_id_map 30 | minor_map = self._user_id_map if self._major == 'items' else self._item_id_map 31 | self._vector_dim = len(minor_map) 32 | 33 | src_files = [path.join(self._data_dir, f) 34 | for f in listdir(self._data_dir) 35 | if path.isfile(path.join(self._data_dir, f)) and f.endswith(self._extension)] 36 | 37 | self._batch_size = self.params['batch_size'] 38 | 39 | self.data = dict() 40 | 41 | for source_file in src_files: 42 | with open(source_file, 'r') as src: 43 | for line in src.readlines(): 44 | parts = line.strip().split(self._delimiter) 45 | if len(parts)<3: 46 | raise ValueError('Encountered badly formatted line in {}'.format(source_file)) 47 | key = major_map[int(parts[self._major_ind])] 48 | value = minor_map[int(parts[self._minor_ind])] 49 | rating = float(parts[self._r_id]) 50 | #print("Key: {}, Value: {}, Rating: {}".format(key, value, rating)) 51 | if key not in self.data: 52 | self.data[key] = [] 53 | self.data[key].append((value, rating)) 54 | 55 | def _build_maps(self): 56 | self._user_id_map = dict() 57 | self._item_id_map = dict() 58 | 59 | src_files = [path.join(self._data_dir, f) 60 | for f in listdir(self._data_dir) 61 | if path.isfile(path.join(self._data_dir, f)) and f.endswith(self._extension)] 62 | 63 | u_id = 0 64 | i_id = 0 65 | for source_file in src_files: 66 | with open(source_file, 'r') as src: 67 | for line in src.readlines(): 68 | parts = line.strip().split(self._delimiter) 69 | if len(parts)<3: 70 | raise ValueError('Encountered badly formatted line in {}'.format(source_file)) 71 | 72 | u_id_orig = int(parts[self._u_id]) 73 | if u_id_orig not in self._user_id_map: 74 | self._user_id_map[u_id_orig] = u_id 75 | u_id += 1 76 | 77 | i_id_orig = int(parts[self._i_id]) 78 | if i_id_orig not in self._item_id_map: 79 | self._item_id_map[i_id_orig] = i_id 80 | i_id += 1 81 | 82 | 83 | def iterate_one_epoch(self): 84 | data = self.data 85 | keys = list(data.keys()) 86 | shuffle(keys) 87 | s_ind = 0 88 | e_ind = self._batch_size 89 | while e_ind < len(keys): 90 | local_ind = 0 91 | inds1 = [] 92 | inds2 = [] 93 | vals = [] 94 | for ind in range(s_ind, e_ind): 95 | inds2 += [v[0] for v in data[keys[ind]]] 96 | inds1 += [local_ind]*len([v[0] for v in data[keys[ind]]]) 97 | vals += [v[1] for v in data[keys[ind]]] 98 | local_ind += 1 99 | 100 | i_torch = torch.LongTensor([inds1, inds2]) 101 | v_torch = torch.FloatTensor(vals) 102 | 103 | mini_batch = torch.sparse.FloatTensor(i_torch, v_torch, torch.Size([self._batch_size, self._vector_dim])) 104 | s_ind += self._batch_size 105 | e_ind += self._batch_size 106 | yield mini_batch 107 | 108 | def iterate_one_epoch_eval(self, for_inf=False): 109 | keys = list(self.data.keys()) 110 | s_ind = 0 111 | while s_ind < len(keys): 112 | inds1 = [0] * len([v[0] for v in self.data[keys[s_ind]]]) 113 | inds2 = [v[0] for v in self.data[keys[s_ind]]] 114 | vals = [v[1] for v in self.data[keys[s_ind]]] 115 | 116 | src_inds1 = [0] * len([v[0] for v in self.src_data[keys[s_ind]]]) 117 | src_inds2 = [v[0] for v in self.src_data[keys[s_ind]]] 118 | src_vals = [v[1] for v in self.src_data[keys[s_ind]]] 119 | 120 | i_torch = torch.LongTensor([inds1, inds2]) 121 | v_torch = torch.FloatTensor(vals) 122 | 123 | src_i_torch = torch.LongTensor([src_inds1, src_inds2]) 124 | src_v_torch = torch.FloatTensor(src_vals) 125 | 126 | mini_batch = (torch.sparse.FloatTensor(i_torch, v_torch, torch.Size([1, self._vector_dim])), 127 | torch.sparse.FloatTensor(src_i_torch, src_v_torch, torch.Size([1, self._vector_dim]))) 128 | s_ind += 1 129 | if not for_inf: 130 | yield mini_batch 131 | else: 132 | yield mini_batch, keys[s_ind - 1] 133 | 134 | @property 135 | def vector_dim(self): 136 | return self._vector_dim 137 | 138 | @property 139 | def userIdMap(self): 140 | return self._user_id_map 141 | 142 | @property 143 | def itemIdMap(self): 144 | return self._item_id_map 145 | 146 | @property 147 | def params(self): 148 | return self._params 149 | -------------------------------------------------------------------------------- /reco_encoder/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | -------------------------------------------------------------------------------- /reco_encoder/model/model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.nn.init as weight_init 6 | from torch.autograd import Variable 7 | 8 | def activation(input, kind): 9 | #print("Activation: {}".format(kind)) 10 | if kind == 'selu': 11 | return F.selu(input) 12 | elif kind == 'relu': 13 | return F.relu(input) 14 | elif kind == 'relu6': 15 | return F.relu6(input) 16 | elif kind == 'sigmoid': 17 | return F.sigmoid(input) 18 | elif kind == 'tanh': 19 | return F.tanh(input) 20 | elif kind == 'elu': 21 | return F.elu(input) 22 | elif kind == 'lrelu': 23 | return F.leaky_relu(input) 24 | elif kind == 'swish': 25 | return input*F.sigmoid(input) 26 | elif kind == 'none': 27 | return input 28 | else: 29 | raise ValueError('Unknown non-linearity type') 30 | 31 | def MSEloss(inputs, targets, size_average=False): 32 | mask = targets != 0 33 | num_ratings = torch.sum(mask.float()) 34 | criterion = nn.MSELoss(reduction='sum' if not size_average else 'mean') 35 | return criterion(inputs * mask.float(), targets), Variable(torch.Tensor([1.0])) if size_average else num_ratings 36 | 37 | class AutoEncoder(nn.Module): 38 | def __init__(self, layer_sizes, nl_type='selu', is_constrained=True, dp_drop_prob=0.0, last_layer_activations=True): 39 | """ 40 | Describes an AutoEncoder model 41 | :param layer_sizes: Encoder network description. Should start with feature size (e.g. dimensionality of x). 42 | For example: [10000, 1024, 512] will result in: 43 | - encoder 2 layers: 10000x1024 and 1024x512. Representation layer (z) will be 512 44 | - decoder 2 layers: 512x1024 and 1024x10000. 45 | :param nl_type: (default 'selu') Type of no-linearity 46 | :param is_constrained: (default: True) Should constrain decoder weights 47 | :param dp_drop_prob: (default: 0.0) Dropout drop probability 48 | :param last_layer_activations: (default: True) Whether to apply activations on last decoder layer 49 | """ 50 | super(AutoEncoder, self).__init__() 51 | self._dp_drop_prob = dp_drop_prob 52 | self._last_layer_activations = last_layer_activations 53 | if dp_drop_prob > 0: 54 | self.drop = nn.Dropout(dp_drop_prob) 55 | self._last = len(layer_sizes) - 2 56 | self._nl_type = nl_type 57 | self.encode_w = nn.ParameterList( 58 | [nn.Parameter(torch.rand(layer_sizes[i + 1], layer_sizes[i])) for i in range(len(layer_sizes) - 1)]) 59 | for ind, w in enumerate(self.encode_w): 60 | weight_init.xavier_uniform_(w) 61 | 62 | self.encode_b = nn.ParameterList( 63 | [nn.Parameter(torch.zeros(layer_sizes[i + 1])) for i in range(len(layer_sizes) - 1)]) 64 | 65 | reversed_enc_layers = list(reversed(layer_sizes)) 66 | 67 | self.is_constrained = is_constrained 68 | if not is_constrained: 69 | self.decode_w = nn.ParameterList( 70 | [nn.Parameter(torch.rand(reversed_enc_layers[i + 1], reversed_enc_layers[i])) for i in range(len(reversed_enc_layers) - 1)]) 71 | for ind, w in enumerate(self.decode_w): 72 | weight_init.xavier_uniform(w) 73 | self.decode_b = nn.ParameterList( 74 | [nn.Parameter(torch.zeros(reversed_enc_layers[i + 1])) for i in range(len(reversed_enc_layers) - 1)]) 75 | 76 | print("******************************") 77 | print("******************************") 78 | print(layer_sizes) 79 | print("Dropout drop probability: {}".format(self._dp_drop_prob)) 80 | print("Encoder pass:") 81 | for ind, w in enumerate(self.encode_w): 82 | print(w.data.size()) 83 | print(self.encode_b[ind].size()) 84 | print("Decoder pass:") 85 | if self.is_constrained: 86 | print('Decoder is constrained') 87 | for ind, w in enumerate(list(reversed(self.encode_w))): 88 | print(w.transpose(0, 1).size()) 89 | print(self.decode_b[ind].size()) 90 | else: 91 | for ind, w in enumerate(self.decode_w): 92 | print(w.data.size()) 93 | print(self.decode_b[ind].size()) 94 | print("******************************") 95 | print("******************************") 96 | 97 | 98 | def encode(self, x): 99 | for ind, w in enumerate(self.encode_w): 100 | x = activation(input=F.linear(input=x, weight=w, bias=self.encode_b[ind]), kind=self._nl_type) 101 | if self._dp_drop_prob > 0: # apply dropout only on code layer 102 | x = self.drop(x) 103 | return x 104 | 105 | def decode(self, z): 106 | if self.is_constrained: 107 | for ind, w in enumerate(list(reversed(self.encode_w))): # constrained autoencode re-uses weights from encoder 108 | z = activation(input=F.linear(input=z, weight=w.transpose(0, 1), bias=self.decode_b[ind]), 109 | # last layer or decoder should not apply non linearities 110 | kind=self._nl_type if ind!=self._last or self._last_layer_activations else 'none') 111 | #if self._dp_drop_prob > 0 and ind!=self._last: # and no dp on last layer 112 | # z = self.drop(z) 113 | else: 114 | for ind, w in enumerate(self.decode_w): 115 | z = activation(input=F.linear(input=z, weight=w, bias=self.decode_b[ind]), 116 | # last layer or decoder should not apply non linearities 117 | kind=self._nl_type if ind!=self._last or self._last_layer_activations else 'none') 118 | #if self._dp_drop_prob > 0 and ind!=self._last: # and no dp on last layer 119 | # z = self.drop(z) 120 | return z 121 | 122 | def forward(self, x): 123 | return self.decode(self.encode(x)) 124 | 125 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | import torch 3 | import argparse 4 | from reco_encoder.data import input_layer 5 | from reco_encoder.model import model 6 | import torch.optim as optim 7 | from torch.optim.lr_scheduler import MultiStepLR 8 | import torch.nn as nn 9 | from torch.autograd import Variable 10 | import copy 11 | import time 12 | from pathlib import Path 13 | from logger import Logger 14 | from math import sqrt 15 | import numpy as np 16 | import os 17 | 18 | parser = argparse.ArgumentParser(description='RecoEncoder') 19 | parser.add_argument('--lr', type=float, default=0.00001, metavar='N', 20 | help='learning rate') 21 | parser.add_argument('--weight_decay', type=float, default=0.0, metavar='N', 22 | help='L2 weight decay') 23 | parser.add_argument('--drop_prob', type=float, default=0.0, metavar='N', 24 | help='dropout drop probability') 25 | parser.add_argument('--noise_prob', type=float, default=0.0, metavar='N', 26 | help='noise probability') 27 | parser.add_argument('--batch_size', type=int, default=64, metavar='N', 28 | help='global batch size') 29 | parser.add_argument('--summary_frequency', type=int, default=100, metavar='N', 30 | help='how often to save summaries') 31 | parser.add_argument('--aug_step', type=int, default=-1, metavar='N', 32 | help='do data augmentation every X step') 33 | parser.add_argument('--constrained', action='store_true', 34 | help='constrained autoencoder') 35 | parser.add_argument('--skip_last_layer_nl', action='store_true', 36 | help='if present, decoder\'s last layer will not apply non-linearity function') 37 | parser.add_argument('--num_epochs', type=int, default=50, metavar='N', 38 | help='maximum number of epochs') 39 | parser.add_argument('--save_every', type=int, default=3, metavar='N', 40 | help='save every N number of epochs') 41 | parser.add_argument('--optimizer', type=str, default="momentum", metavar='N', 42 | help='optimizer kind: adam, momentum, adagrad or rmsprop') 43 | parser.add_argument('--hidden_layers', type=str, default="1024,512,512,128", metavar='N', 44 | help='hidden layer sizes, comma-separated') 45 | parser.add_argument('--gpu_ids', type=str, default="0", metavar='N', 46 | help='comma-separated gpu ids to use for data parallel training') 47 | parser.add_argument('--path_to_train_data', type=str, default="", metavar='N', 48 | help='Path to training data') 49 | parser.add_argument('--path_to_eval_data', type=str, default="", metavar='N', 50 | help='Path to evaluation data') 51 | parser.add_argument('--non_linearity_type', type=str, default="selu", metavar='N', 52 | help='type of the non-linearity used in activations') 53 | parser.add_argument('--logdir', type=str, default="logs", metavar='N', 54 | help='where to save model and write logs') 55 | 56 | args = parser.parse_args() 57 | print(args) 58 | 59 | use_gpu = torch.cuda.is_available() # global flag 60 | if use_gpu: 61 | print('GPU is available.') 62 | else: 63 | print('GPU is not available.') 64 | 65 | def do_eval(encoder, evaluation_data_layer): 66 | encoder.eval() 67 | denom = 0.0 68 | total_epoch_loss = 0.0 69 | for i, (eval, src) in enumerate(evaluation_data_layer.iterate_one_epoch_eval()): 70 | inputs = Variable(src.cuda().to_dense() if use_gpu else src.to_dense()) 71 | targets = Variable(eval.cuda().to_dense() if use_gpu else eval.to_dense()) 72 | outputs = encoder(inputs) 73 | loss, num_ratings = model.MSEloss(outputs, targets) 74 | total_epoch_loss += loss.item() 75 | denom += num_ratings.item() 76 | return sqrt(total_epoch_loss / denom) 77 | 78 | def log_var_and_grad_summaries(logger, layers, global_step, prefix, log_histograms=False): 79 | """ 80 | Logs variable and grad stats for layer. Transfers data from GPU to CPU automatically 81 | :param logger: TB logger 82 | :param layers: param list 83 | :param global_step: global step for TB 84 | :param prefix: name prefix 85 | :param log_histograms: (default: False) whether or not log histograms 86 | :return: 87 | """ 88 | for ind, w in enumerate(layers): 89 | # Variables 90 | w_var = w.data.cpu().numpy() 91 | logger.scalar_summary("Variables/FrobNorm/{}_{}".format(prefix, ind), np.linalg.norm(w_var), 92 | global_step) 93 | if log_histograms: 94 | logger.histo_summary(tag="Variables/{}_{}".format(prefix, ind), values=w.data.cpu().numpy(), 95 | step=global_step) 96 | 97 | # Gradients 98 | w_grad = w.grad.data.cpu().numpy() 99 | logger.scalar_summary("Gradients/FrobNorm/{}_{}".format(prefix, ind), np.linalg.norm(w_grad), 100 | global_step) 101 | if log_histograms: 102 | logger.histo_summary(tag="Gradients/{}_{}".format(prefix, ind), values=w.grad.data.cpu().numpy(), 103 | step=global_step) 104 | 105 | def main(): 106 | logger = Logger(args.logdir) 107 | params = dict() 108 | params['batch_size'] = args.batch_size 109 | params['data_dir'] = args.path_to_train_data 110 | params['major'] = 'users' 111 | params['itemIdInd'] = 1 112 | params['userIdInd'] = 0 113 | print("Loading training data") 114 | data_layer = input_layer.UserItemRecDataProvider(params=params) 115 | print("Data loaded") 116 | print("Total items found: {}".format(len(data_layer.data.keys()))) 117 | print("Vector dim: {}".format(data_layer.vector_dim)) 118 | 119 | print("Loading eval data") 120 | eval_params = copy.deepcopy(params) 121 | # must set eval batch size to 1 to make sure no examples are missed 122 | eval_params['data_dir'] = args.path_to_eval_data 123 | eval_data_layer = input_layer.UserItemRecDataProvider(params=eval_params, 124 | user_id_map=data_layer.userIdMap, # the mappings are provided 125 | item_id_map=data_layer.itemIdMap) 126 | eval_data_layer.src_data = data_layer.data 127 | rencoder = model.AutoEncoder(layer_sizes=[data_layer.vector_dim] + [int(l) for l in args.hidden_layers.split(',')], 128 | nl_type=args.non_linearity_type, 129 | is_constrained=args.constrained, 130 | dp_drop_prob=args.drop_prob, 131 | last_layer_activations=not args.skip_last_layer_nl) 132 | os.makedirs(args.logdir, exist_ok=True) 133 | model_checkpoint = args.logdir + "/model" 134 | path_to_model = Path(model_checkpoint) 135 | if path_to_model.is_file(): 136 | print("Loading model from: {}".format(model_checkpoint)) 137 | rencoder.load_state_dict(torch.load(model_checkpoint)) 138 | 139 | print('######################################################') 140 | print('######################################################') 141 | print('############# AutoEncoder Model: #####################') 142 | print(rencoder) 143 | print('######################################################') 144 | print('######################################################') 145 | 146 | gpu_ids = [int(g) for g in args.gpu_ids.split(',')] 147 | print('Using GPUs: {}'.format(gpu_ids)) 148 | if len(gpu_ids)>1: 149 | rencoder = nn.DataParallel(rencoder, 150 | device_ids=gpu_ids) 151 | 152 | if use_gpu: rencoder = rencoder.cuda() 153 | 154 | if args.optimizer == "adam": 155 | optimizer = optim.Adam(rencoder.parameters(), 156 | lr=args.lr, 157 | weight_decay=args.weight_decay) 158 | elif args.optimizer == "adagrad": 159 | optimizer = optim.Adagrad(rencoder.parameters(), 160 | lr=args.lr, 161 | weight_decay=args.weight_decay) 162 | elif args.optimizer == "momentum": 163 | optimizer = optim.SGD(rencoder.parameters(), 164 | lr=args.lr, momentum=0.9, 165 | weight_decay=args.weight_decay) 166 | scheduler = MultiStepLR(optimizer, milestones=[24, 36, 48, 66, 72], gamma=0.5) 167 | elif args.optimizer == "rmsprop": 168 | optimizer = optim.RMSprop(rencoder.parameters(), 169 | lr=args.lr, momentum=0.9, 170 | weight_decay=args.weight_decay) 171 | else: 172 | raise ValueError('Unknown optimizer kind') 173 | 174 | t_loss = 0.0 175 | t_loss_denom = 0.0 176 | global_step = 0 177 | 178 | if args.noise_prob > 0.0: 179 | dp = nn.Dropout(p=args.noise_prob) 180 | 181 | for epoch in range(args.num_epochs): 182 | print('Doing epoch {} of {}'.format(epoch, args.num_epochs)) 183 | e_start_time = time.time() 184 | rencoder.train() 185 | total_epoch_loss = 0.0 186 | denom = 0.0 187 | if args.optimizer == "momentum": 188 | scheduler.step() 189 | for i, mb in enumerate(data_layer.iterate_one_epoch()): 190 | inputs = Variable(mb.cuda().to_dense() if use_gpu else mb.to_dense()) 191 | optimizer.zero_grad() 192 | outputs = rencoder(inputs) 193 | loss, num_ratings = model.MSEloss(outputs, inputs) 194 | loss = loss / num_ratings 195 | loss.backward() 196 | optimizer.step() 197 | global_step += 1 198 | t_loss += loss.item() 199 | t_loss_denom += 1 200 | 201 | if i % args.summary_frequency == 0: 202 | print('[%d, %5d] RMSE: %.7f' % (epoch, i, sqrt(t_loss / t_loss_denom))) 203 | logger.scalar_summary("Training_RMSE", sqrt(t_loss/t_loss_denom), global_step) 204 | t_loss = 0 205 | t_loss_denom = 0.0 206 | log_var_and_grad_summaries(logger, rencoder.encode_w, global_step, "Encode_W") 207 | log_var_and_grad_summaries(logger, rencoder.encode_b, global_step, "Encode_b") 208 | if not rencoder.is_constrained: 209 | log_var_and_grad_summaries(logger, rencoder.decode_w, global_step, "Decode_W") 210 | log_var_and_grad_summaries(logger, rencoder.decode_b, global_step, "Decode_b") 211 | 212 | total_epoch_loss += loss.item() 213 | denom += 1 214 | 215 | #if args.aug_step > 0 and i % args.aug_step == 0 and i > 0: 216 | if args.aug_step > 0: 217 | # Magic data augmentation trick happen here 218 | for t in range(args.aug_step): 219 | inputs = Variable(outputs.data) 220 | if args.noise_prob > 0.0: 221 | inputs = dp(inputs) 222 | optimizer.zero_grad() 223 | outputs = rencoder(inputs) 224 | loss, num_ratings = model.MSEloss(outputs, inputs) 225 | loss = loss / num_ratings 226 | loss.backward() 227 | optimizer.step() 228 | 229 | e_end_time = time.time() 230 | print('Total epoch {} finished in {} seconds with TRAINING RMSE loss: {}' 231 | .format(epoch, e_end_time - e_start_time, sqrt(total_epoch_loss/denom))) 232 | logger.scalar_summary("Training_RMSE_per_epoch", sqrt(total_epoch_loss/denom), epoch) 233 | logger.scalar_summary("Epoch_time", e_end_time - e_start_time, epoch) 234 | if epoch % args.save_every == 0 or epoch == args.num_epochs - 1: 235 | eval_loss = do_eval(rencoder, eval_data_layer) 236 | print('Epoch {} EVALUATION LOSS: {}'.format(epoch, eval_loss)) 237 | logger.scalar_summary("EVALUATION_RMSE", eval_loss, epoch) 238 | print("Saving model to {}".format(model_checkpoint + ".epoch_"+str(epoch))) 239 | torch.save(rencoder.state_dict(), model_checkpoint + ".epoch_"+str(epoch)) 240 | 241 | print("Saving model to {}".format(model_checkpoint + ".last")) 242 | torch.save(rencoder.state_dict(), model_checkpoint + ".last") 243 | 244 | # save to onnx 245 | dummy_input = Variable(torch.randn(params['batch_size'], data_layer.vector_dim).type(torch.float)) 246 | torch.onnx.export(rencoder.float(), dummy_input.cuda() if use_gpu else dummy_input, 247 | model_checkpoint + ".onnx", verbose=True) 248 | print("ONNX model saved to {}!".format(model_checkpoint + ".onnx")) 249 | 250 | if __name__ == '__main__': 251 | main() 252 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation -------------------------------------------------------------------------------- /test/data_layer_tests.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | import unittest 3 | from reco_encoder.data.input_layer import UserItemRecDataProvider 4 | 5 | class UserItemRecDataProviderTest(unittest.TestCase): 6 | def test_1(self): 7 | print("Test 1 started") 8 | params = {} 9 | params['batch_size'] = 64 10 | params['data_dir'] = 'test/testData_iRec' 11 | data_layer = UserItemRecDataProvider(params=params) 12 | print("Total items found: {}".format(len(data_layer.data.keys()))) 13 | self.assertTrue(len(data_layer.data.keys())>0) 14 | 15 | def test_iterations(self): 16 | params = {} 17 | params['batch_size'] = 32 18 | params['data_dir'] = 'test/testData_iRec' 19 | data_layer = UserItemRecDataProvider(params=params) 20 | print("Total items found: {}".format(len(data_layer.data.keys()))) 21 | for i, data in enumerate(data_layer.iterate_one_epoch()): 22 | print(i) 23 | print(data.size()) 24 | 25 | if __name__ == '__main__': 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /test/testData_iRec/.part-00199-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/DeepRecommender/a32a8a5c23092c551616acf6fac5b32e1155d18b/test/testData_iRec/.part-00199-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt.crc -------------------------------------------------------------------------------- /test/testData_iRec/_SUCCESS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/DeepRecommender/a32a8a5c23092c551616acf6fac5b32e1155d18b/test/testData_iRec/_SUCCESS -------------------------------------------------------------------------------- /test/testData_uRec/._SUCCESS.crc: -------------------------------------------------------------------------------- 1 | crc -------------------------------------------------------------------------------- /test/testData_uRec/.part-00000-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt.crc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/DeepRecommender/a32a8a5c23092c551616acf6fac5b32e1155d18b/test/testData_uRec/.part-00000-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt.crc -------------------------------------------------------------------------------- /test/test_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 NVIDIA Corporation 2 | import unittest 3 | import sys 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | from reco_encoder.data.input_layer import UserItemRecDataProvider 7 | from reco_encoder.model.model import AutoEncoder, MSEloss 8 | sys.path.append('data') 9 | sys.path.append('model') 10 | 11 | class iRecAutoEncoderTest(unittest.TestCase): 12 | def test_CPU(self): 13 | print("iRecAutoEncoderTest Test on CPU started") 14 | params = {} 15 | params['batch_size'] = 64 16 | params['data_dir'] = 'test/testData_iRec' 17 | data_layer = UserItemRecDataProvider(params=params) 18 | print("Vector dim: {}".format(data_layer.vector_dim)) 19 | print("Total items found: {}".format(len(data_layer.data.keys()))) 20 | self.assertTrue(len(data_layer.data.keys())>0) 21 | encoder = AutoEncoder(layer_sizes=[data_layer.vector_dim, 256, 128], is_constrained=True) 22 | print(encoder) 23 | print(encoder.parameters()) 24 | optimizer = optim.SGD(encoder.parameters(), lr=0.01, momentum=0.9) 25 | for epoch in range(20): 26 | for i, mb in enumerate(data_layer.iterate_one_epoch()): 27 | inputs = Variable(mb.to_dense()) 28 | optimizer.zero_grad() 29 | outputs = encoder(inputs) 30 | loss, num_ratings = MSEloss(outputs, inputs) 31 | loss = loss / num_ratings 32 | loss.backward() 33 | optimizer.step() 34 | print('[%d, %5d] loss: %.7f' % (epoch, i, loss.item())) 35 | 36 | def test_GPU(self): 37 | print("iRecAutoEncoderTest Test on GPU started") 38 | params = {} 39 | params['batch_size'] = 32 40 | params['data_dir'] = 'test/testData_iRec' 41 | data_layer = UserItemRecDataProvider(params=params) 42 | print("Total items found: {}".format(len(data_layer.data.keys()))) 43 | self.assertTrue(len(data_layer.data.keys()) > 0) 44 | encoder = AutoEncoder(layer_sizes=[data_layer.vector_dim, 1024, 512, 512, 512, 512, 128]) 45 | encoder.cuda() 46 | optimizer = optim.Adam(encoder.parameters()) 47 | print(encoder) 48 | for epoch in range(30): 49 | total_epoch_loss = 0.0 50 | denom = 0.0 51 | for i, mb in enumerate(data_layer.iterate_one_epoch()): 52 | inputs = Variable(mb.to_dense().cuda()) 53 | optimizer.zero_grad() 54 | outputs = encoder(inputs) 55 | loss, num_ratings = MSEloss(outputs, inputs) 56 | loss = loss / num_ratings 57 | loss.backward() 58 | optimizer.step() 59 | total_epoch_loss += loss.item() 60 | denom += 1 61 | print("Total epoch {} loss: {}".format(epoch, total_epoch_loss/denom)) 62 | 63 | class uRecAutoEncoderTest(unittest.TestCase): 64 | def test_CPU(self): 65 | print("uRecAutoEncoderTest Test on CPU started") 66 | params = {} 67 | params['batch_size'] = 256 68 | params['data_dir'] = 'test/testData_uRec' 69 | data_layer = UserItemRecDataProvider(params=params) 70 | print("Vector dim: {}".format(data_layer.vector_dim)) 71 | print("Total items found: {}".format(len(data_layer.data.keys()))) 72 | self.assertTrue(len(data_layer.data.keys())>0) 73 | encoder = AutoEncoder(layer_sizes=[data_layer.vector_dim, 128, data_layer.vector_dim]) 74 | optimizer = optim.SGD(encoder.parameters(), lr=0.1, momentum=0.9) 75 | for epoch in range(1): 76 | for i, mb in enumerate(data_layer.iterate_one_epoch()): 77 | inputs = Variable(mb.to_dense()) 78 | optimizer.zero_grad() 79 | outputs = encoder(inputs) 80 | loss, num_ratings = MSEloss(outputs, inputs) 81 | loss = loss / num_ratings 82 | loss.backward() 83 | optimizer.step() 84 | print('[%d, %5d] loss: %.7f' % (epoch, i, loss.item())) 85 | if i == 5: # too much compute for CPU 86 | break 87 | 88 | def test_GPU(self): 89 | print("uRecAutoEncoderTest Test on GPU started") 90 | params = {} 91 | params['batch_size'] = 64 92 | params['data_dir'] = 'test/testData_uRec' 93 | data_layer = UserItemRecDataProvider(params=params) 94 | print("Total items found: {}".format(len(data_layer.data.keys()))) 95 | self.assertTrue(len(data_layer.data.keys()) > 0) 96 | encoder = AutoEncoder(layer_sizes=[data_layer.vector_dim, 1024, 512, 512, 128]) 97 | encoder.cuda() 98 | optimizer = optim.Adam(encoder.parameters()) 99 | print(encoder) 100 | for epoch in range(2): 101 | total_epoch_loss = 0.0 102 | denom = 0.0 103 | for i, mb in enumerate(data_layer.iterate_one_epoch()): 104 | inputs = Variable(mb.to_dense().cuda()) 105 | optimizer.zero_grad() 106 | outputs = encoder(inputs) 107 | loss, num_ratings = MSEloss(outputs, inputs) 108 | loss = loss / num_ratings 109 | loss.backward() 110 | optimizer.step() 111 | total_epoch_loss += loss.item() 112 | denom += 1 113 | print("Total epoch {} loss: {}".format(epoch, total_epoch_loss / denom)) 114 | 115 | 116 | if __name__ == '__main__': 117 | unittest.main() 118 | 119 | --------------------------------------------------------------------------------