├── .gitignore ├── LICENSE ├── README.md ├── clases ├── s1-c2-SlopeOne.pdf ├── s1-c2-UBCF_clustering.pdf ├── s1-c2-nonpers-UBCF.pdf ├── s10_c1_activelearning.pdf ├── s11_c1_deep_learning.pdf ├── s11_c2_secu_deep_learning.pdf ├── s12_c1_deep_learning_s.pdf ├── s12_c2_10bigproblems-recsys-small.pdf ├── s2_c1-IBCF.pdf ├── s2_c2-Factorizacion_matricial.pdf ├── s3_c1-Implicit-feedback.pdf ├── s3_c2-BPR.pdf ├── s4_c1-metricas.pdf ├── s4_c2-tests_estadisticos.pdf ├── s5_c1-content.pdf ├── s5_c1_p2-content.pdf ├── s5_c2-content.pdf ├── s6_c1-contexto.pdf ├── s6_c1-hibridos.pdf ├── s6_c2_p2-FMachines.pdf ├── s6_c2_p3-blending_ensemble.pdf ├── s9_c1_usercentric.pdf └── s9_c2_FATv2.pdf ├── posters ├── A1-andrade_dominguez_pattillo.jpg ├── A3-olguin_lopez_ibarra.png ├── A4_donosoguzman.png ├── A5-codoceo_escudero_torres.pptx.png ├── A6-Contreras_Molina_Stambuk.png ├── A7-ovalle_valdes.png ├── N2-cartegana_huerfano_toscano.png ├── N3-Valencia-González.png ├── N4_castro_casassus.png ├── N5-Labarca_Fuentes.png ├── N6-Aguilera_Everke.png ├── N7-suarez_carreno_alipanah.png ├── N8-Guinez_Ruiz_Sanchez.png ├── V1-salinas.png ├── V2-waugh_hanuch_ricke.jpg ├── V3_duarte_lopez_rodriguez.png ├── V4-diaz_vinay.png ├── V5-alliende.png ├── V6_perez_ramos.png ├── V7-tapia_villagran.png └── V8-biskupovic.png ├── practicos ├── Ayudantia_TIRLol.ipynb ├── Content_Based_imagenes.ipynb ├── Content_Based_texto.ipynb ├── FastFM_factorization_machines.ipynb ├── GRU4Rec.ipynb ├── Implicit_feedback.ipynb ├── MultiVAE_Practico.ipynb ├── README.md ├── Reinforcement_Learning_Recsim.ipynb ├── pyRecLab_FunkSVD.ipynb ├── pyRecLab_MostPopular.ipynb ├── pyRecLab_SlopeOne.ipynb ├── pyRecLab_iKNN.ipynb └── pyRecLab_uKNN.ipynb ├── proyecto ├── Denis-IdeasProyectosFinales-2020.pdf ├── Enunciado_Proyecto_Final_RecSys_2020_2.pdf ├── IIC3633-propuestas-2018_p1.pdf ├── IIC3633-propuestas-2018_p2.pdf ├── Vladimir-RecSysLoL2020.pdf ├── proy_finales_2018 │ ├── Araujo_etal_LoL_2018.pdf │ ├── Barrios_et_al_fakenews_2018.pdf │ ├── Cerda_etal_rnn_2018.pdf │ ├── Guzman_etal_steam_2018.pdf │ ├── Munoz_etal_playlist_2018.pdf │ └── Rencoret_etal_selfattention_2018.pdf ├── proy_finales_2019 │ └── README.md ├── proy_finales_2020 │ ├── Alipanah et al.pdf │ ├── Alliende.pdf │ ├── Andrade et al.pdf │ ├── Biskupovic et al.pdf │ ├── Cartagena et al.pdf │ ├── Castro et al.pdf │ ├── Codoceo et al.pdf │ ├── Contreras et al.pdf │ ├── Diaz et al.pdf │ ├── Donoso et al.pdf │ ├── Duarte et al.pdf │ ├── Everke et al.pdf │ ├── Friedl et al.pdf │ ├── Fuentes et al.pdf │ ├── Guinez et al.pdf │ ├── Hanuch et al.pdf │ ├── Olguin et al.pdf │ ├── Ramos et al.pdf │ ├── Salinas et al.pdf │ ├── Tapia et al.pdf │ ├── Valdes et al.pdf │ └── Valencia et al.pdf └── recsys2016_zorich-troncoso.pdf └── tareas └── Tarea_1_RecSys_2020_2.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Editors 2 | .vscode/ 3 | .idea/ 4 | 5 | # Vagrant 6 | .vagrant/ 7 | 8 | # Mac/OSX 9 | .DS_Store 10 | 11 | # Windows 12 | Thumbs.db 13 | 14 | # Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .nox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # celery beat schedule file 98 | celerybeat-schedule 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 PUC-RecSys-Class 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IIC3633 Sistemas Recomendadores 2 | Agosto-Diciembre 2020 3 | 4 | 5 | ### Equipo Docente e Información Administrativa 6 | **Instructor**: [Denis Parra](http://dparra.sitios.ing.uc.cl), Profesor Asociado PUC Chile, Ph.D. University of Pittsburgh 7 | 8 | **Ayudantes**: 9 | [Vladimir Araujo](https://vgaraujov.github.io/), Alumno de Doctorado en Ciencia de la Computación PUC Chile. 10 | Manuel Cartagena, Alumno de Magister en Ciencia de la Computación PUC Chile. 11 | [Andrés Carvallo](https://scholar.google.com/citations?user=DinpmCUAAAAJ&hl=es), Alumno de Doctorado en Ciencia de la Computación PUC Chile. 12 | [Francisca Cattan](https://www.linkedin.com/in/franciscacattan/), Alumna de Doctorado en Ciencia de la Computación PUC Chile. 13 | [Andrés Villa](https://www.linkedin.com/in/andres-felipe-villa-ojeda-b3132811b), Alumno de Doctorado en Ciencia de la Computación PUC Chile. 14 | 15 | **Institución**: Pontificia Universidad Católica de Chile 16 | 17 | **Horario**: Martes y Jueves, Módulo 3 (11:30 a 12:50). 18 | 19 | Programa IIC 3633, 2do Semestre 2020: [pdf](http://dparra.sitios.ing.uc.cl/classes/recsys-2019-2/IIC3633Sist%20Recomendadores_v3.pdf). 20 | 21 | ### Descripción del Curso 22 | 23 | El curso de Sistemas Recomendadores cubre las principales tareas de recomendación, algoritmos, fuentes de datos y evaluación de estos sistemas. Al final de este curso serás capaz de decidir qué técnicas y fuentes de datos usar para implementar y evaluar sistemas recomendadores. 24 | 25 | **Software**: [pyRecLab](https://github.com/gasevi/pyreclab/). 26 | 27 | La componente práctica de este curso se enseña a través del uso de pyRecLab desarrollado por Gabriel Sepúlveda (ex-alumno de este curso), biblioteca de software para desarrollo de sistemas recomendadores en Python. 28 | 29 | **Contenido**: 30 | 31 | ## Contenidos por Semana 32 | 33 | 34 | 35 | 36 | | Semana | Tema | link slide(s) | link video | comentario(s) | 37 | |:--------|:-----------------|:-------------:|:----------:|:-------------:| 38 | | 1 | Introducción | x | [video](https://drive.google.com/file/d/1lhHoO0JN5PI-6tc9samHx7ui7iB-ycV-/view?usp=sharing) | | 39 | | 1 | Ranking no personalizado y Filtrado colaborativo (FC) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s1-c2-nonpers-UBCF.pdf) | [video](https://drive.google.com/file/d/1PrlxIa_qx5n4s-OylzXyMQeuErqCl_Py/view?usp=sharing) | | 40 | | 1 | User-based FC con clustering | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s1-c2-UBCF_clustering.pdf) | [video](https://drive.google.com/file/d/15gwdIm7MlVhBltB1vjOhsKW6n7ezqw0f/view?usp=sharing) | | 41 | | 1 | Pendiente Uno | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s1-c2-SlopeOne.pdf) | [video](https://drive.google.com/file/d/15gwdIm7MlVhBltB1vjOhsKW6n7ezqw0f/view?usp=sharing) | | 42 | | 2 | Item-based FC | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s2_c1-IBCF.pdf) | [video](https://drive.google.com/file/d/15gwdIm7MlVhBltB1vjOhsKW6n7ezqw0f/view?usp=sharing) | | 43 | | 2 | Factorización Matricial: FunkSVD | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s2_c2-Factorizacion_matricial.pdf) | [video](https://drive.google.com/file/d/1Wi5O1VKrx72Ux0LdhxcAAL0FXSGV7FVQ/view) | | 44 | | 3 | Implicit Feedback CF | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s3_c1-Implicit-feedback.pdf) | [video1](https://drive.google.com/file/d/1hAxJpXX6VMgEcTQTfUtHNu-7ttDYTG9o/view?usp=sharing) [video2](https://drive.google.com/file/d/1EH59esO_bnd6t3YJm-gw6PhM0mHbqRqd/view?usp=sharing) | | 45 | | 3 | Bayesian Personalized Ranking (BPR) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s3_c2-BPR.pdf) | [video](https://drive.google.com/file/d/1VV_s8c7b-ftCcg68q0oRRBh00T4JypPc/view?usp=sharing) | | 46 | | 4 | Evaluación: metricas de error y ranking | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s4_c1-metricas.pdf) | [video](https://drive.google.com/file/d/1wFQc9h1pdaJH1YbbUgbw8Wb56m1mHvZf/view?usp=sharing) | [slides P Castells LARS 2019](http://ir.ii.uam.es/castells/lars2019.pdf) | 47 | | 4 | Evaluación II: Cobertura, diversidad, novedad | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s4_c1-metricas.pdf) | [video](https://drive.google.com/file/d/1TPjmn7FWK2C4G3rg7UYY4NGG2bLJyTOy/view?usp=sharing) | | 48 | | 4 | Evaluación III: Tests estadísticos | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s4_c2-tests_estadisticos.pdf) | [video](https://drive.google.com/file/d/1jis_iIjFFWk7NcggeMn26rLAhrWuRK5h/view?usp=sharing) | | 49 | | 5 | Recomendación basada en contenido 1 | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s5_c1-content.pdf) | [video](https://drive.google.com/file/d/1HiBLWh0l-aRq4ldVjk3Z0bf56gwQP2wp/view?usp=sharing) | | 50 | | 5 | Recomendación basada en contenido 2 | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s5_c2-content.pdf) | [video](https://drive.google.com/file/d/1cUR-bApLvOlVYRMAdGzEqeatP44jol0N/view?usp=sharing) | | 51 | | 6 | Recomendación híbrida | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s6_c1-hibridos.pdf) | [video](https://drive.google.com/file/d/1aUNYNli4l4xk_hGRB7v-PaRr0xtJWQau/view?usp=sharing) | | 52 | | 6 | Recomendación por ensambles | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s6_c2_p3-blending_ensemble.pdf) | [video](https://drive.google.com/file/d/1o5cL5JspHI8QizFeMT8_9HQKqNT2rmqm/view?usp=sharing) | | 53 | | 6 | Recomendación basada en contexto | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s6_c1-contexto.pdf) | [video1](https://drive.google.com/file/d/1fC3Ypg5aF8Be_8b7ZpPFqwVdtJS2kwzm/view?usp=sharing) [video2](https://drive.google.com/file/d/10r_6DzrKflF8sVzgZ1mU8xb8mJC2BcgO/view?usp=sharing) | | 54 | | 6 | Máquinas de Factorización | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s6_c2_p2-FMachines.pdf) | [video](https://drive.google.com/file/d/111IK4ZIE-bqiNWmYLyafQoS0kthETLUr/view?usp=sharing) | | 55 | | 7 | Semana Break | Break | Break | | 56 | | 8 | [Ideas de Proyecto](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/tree/master/proyecto) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/Denis-IdeasProyectosFinales-2020.pdf) | [video1](https://drive.google.com/file/d/1o8TFNtax9cYiIrVFdnxGxJ3P3fItIFwA/view?usp=sharing) [video2](https://drive.google.com/file/d/1NjC6iw9LRDmaKGh-rSAHVm_iolcsvMy1/view?usp=sharing) | | 57 | | 9 | Evaluación centrada en usuarios | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s9_c1_usercentric.pdf) | [video1](https://drive.google.com/file/d/1N6KBXkGN6Gh409mxkkD8KFGLs8Bvntz7/view?usp=sharing) [video2](https://drive.google.com/file/d/1NlALFJzBcLv1SZGFSiOgjFoINlKzkdN4/view?usp=sharing) | | 58 | | 9 | Sistemas Justos, Explicables y Transparentes | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s9_c2_FATv2.pdf) | [video](https://drive.google.com/file/d/1kgJHc3DoKem0VW1gKvm2RaweGJJF3uPv/view?usp=sharing) | | 59 | | 10 | Aprendizaje Activo (Active Learning) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s10_c1_activelearning.pdf) | [video](https://drive.google.com/file/d/1tb1qhBWUgO5jHrpQIYhWhvmy3z93Jimc/view?usp=sharing) | | 60 | | 10 | Bandits: Invitada [PhD(c) Andrea Barraza](https://apbarraza.com/) | [slides](https://gitlab.insight-centre.org/andbar/bears/raw/c4f04b377f6ad30a1ab0e5f1c97d05eaacec364e/tutorials/RECSYS2020/slides/%5BRecSys2020%5D%20Introduction%20to%20Bandits%20in%20Recommender%20Systems.pdf) | [video1](https://drive.google.com/file/d/1tTnkIjVOUu8Y_PkNYKZVBuWWscwn2Zni/view?usp=sharing) [video2](https://drive.google.com/file/d/1Q8dYPUQ-EPmg_JEYK86Jw7smtr0ZPrEc/view?usp=sharing) | Tutorial eng. RecSys 2020 [video](https://player.vimeo.com/video/460128124) [slides](https://gitlab.insight-centre.org/andbar/bears/raw/c4f04b377f6ad30a1ab0e5f1c97d05eaacec364e/tutorials/RECSYS2020/slides/%5BRecSys2020%5D%20Introduction%20to%20Bandits%20in%20Recommender%20Systems.pdf) | 61 | | 11 | Aprendizaje Profundo para RecSys (Intro y FC) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s11_c1_deep_learning.pdf) | [video](https://drive.google.com/file/d/1-T_vxETMbzqk2FNJzUPGaPJtIpfe-R9g/view?usp=sharing) | | 62 | | 11 | Aprendizaje Profundo para RecSys (Secuencias) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s11_c2_secu_deep_learning.pdf) | [video](https://drive.google.com/file/d/1krx9jda3TC8xyIn0n5FGuR-xgp-zezev/view?usp=sharing) | | 63 | | 12 | Aprendizaje Profundo para RecSys (Imágenes, Transformer, Grafos) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s12_c1_deep_learning_s.pdf) | [video1](https://drive.google.com/file/d/11aUkSvabuHZMevDd4yP0dBTrc3_2eNtV/view?usp=sharing) [video2](https://drive.google.com/file/d/1LJ6jI3gHPqS45Olv-Iu29W5DunO8ZbF6/view?usp=sharing) | | 64 | | 12 | 10 problemas en Sistemas de Recomendación | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/clases/s12_c2_10bigproblems-recsys-small.pdf) | [video](https://drive.google.com/file/d/1DtyzGOfMZY8UE223SMM2ROczH_qHhkNc/view?usp=sharing) | | 65 | 66 | ### Parte II del curso: seminario 67 | 68 | A partir de noviembre el curso toma modalidad seminario, los alumnos hacen presentaciones de los siguientes papers: 69 | 70 | 71 | 72 | 73 | | Semana | Tema | link slide(s) | link video | conferencia | 74 | |:--------|:-----------------|:-------------:|:----------:|:-----------| 75 | | 13 | CTRec: A Long-Short Demands Evolution Model for Continuous-Time Recommendation | [slides](https://drive.google.com/file/d/1Ys-8asyvEYR1Fr_lbY3D_pJkU88f1jHs/view?usp=sharing) | [video](https://drive.google.com/file/d/12wMh58M3ogqaWdmfKJpXDsZRyTOba9r1/view?usp=sharing) | [SIGIR 2019](https://dl.acm.org/doi/abs/10.1145/3331184.3331199) | 76 | | 13 | Collaborative Similarity Embedding for Recommender Systems | [slides](https://drive.google.com/file/d/1QoreUxFm_Qu5hHvzywPsj02pslXvefkK/view?usp=sharing) | [video](https://drive.google.com/file/d/15ewTouX8aLn2ib3LEt_2s226zd2VZyi0/view?usp=sharing) | [WWW 2019](https://dl.acm.org/doi/abs/10.1145/3308558.3313493) | 77 | | 13 | Personalized re-ranking for recommendation | [slides](https://drive.google.com/file/d/1Xnt_NpqR_7CHIBvhkB-J5MFOpxwSiAwR/view?usp=sharing) | [video](https://drive.google.com/file/d/155ljkGXASvx-AwWN7STHL4xjWoUEYOMP/view?usp=sharing) | [RecSys 2019](https://dl.acm.org/doi/abs/10.1145/3298689.3347000 ) | 78 | | 13 | Recommending what video to watch next: a multitask ranking system | [slides](https://drive.google.com/file/d/1qzi589_zrzgV0zKDY4d3MPCikjcHgf64/view?usp=sharing) | [video](https://drive.google.com/file/d/1uZGioos0x9xHo01AW3jAlip8jVn9k2Hv/view?usp=sharing) | [RecSys 2019](https://dl.acm.org/doi/abs/10.1145/3298689.3346997) | 79 | | 13 | Interactive Recommender System via Knowledge Graph-enhanced Reinforcement Learning | [slides](https://drive.google.com/file/d/1IdR28OnjbzY2YVmlekyNQippslYJB4cZ/view?usp=sharing) | [video](https://drive.google.com/file/d/1i_ASoIHXTn8LTK4z-xE5NQNSfDnNb-rw/view?usp=sharing) | [SIGIR 2020](https://dl.acm.org/doi/abs/10.1145/3397271.3401174) | 80 | | 13 | The Impact of More Transparent Interfaces on Behavior in Personalized Recommendation | [slides](https://drive.google.com/file/d/14LUW4ZQE9kHaAilSzDMl96tCPHH5UcdE/view?usp=sharing) | [video](https://drive.google.com/file/d/14LUW4ZQE9kHaAilSzDMl96tCPHH5UcdE/view?usp=sharing) | [SIGIR 2020](https://dl.acm.org/doi/abs/10.1145/3397271.3401117) | 81 | | 14 | Complete the Look: Scene-based Complementary Product Recommendation | [slides](https://drive.google.com/file/d/1RaB0XwZwfIPtKSOPAIxPEMtOoLE8Ty3V/view?usp=sharing) | [video](https://drive.google.com/file/d/1efJnREr6D1lr5fx4lm1CHRyIf9-fqJW4/view?usp=sharing) | [CVPR 2019](http://openaccess.thecvf.com/content_CVPR_2019/html/Kang_Complete_the_Look_Scene-Based_Complementary_Product_Recommendation_CVPR_2019_paper.html) | 82 | | 14 | Dynamic Online Conversation Recommendation | [slides](https://drive.google.com/file/d/1S4ITTihWbGj5mb-FvsIOWYFJIJt98doK/view?usp=sharing) | [video](https://drive.google.com/file/d/1mz2_L3vBfx8asvRqmvCYnE4lSvInpa0F/view?usp=sharing) | [ACL 2020 ](https://www.aclweb.org/anthology/2020.acl-main.305/) | 83 | | 14 | Temporal-Contextual Recommendation in Real-Time | [slides](https://drive.google.com/file/d/1AM9HgY75MptUkboOZNKktesPLP17Rjln/view?usp=sharing) | [video](https://drive.google.com/file/d/18DkeuikFjL3Rp477T3J1LIEsNJPxoiOV/view?usp=sharing) | [KDD 2020](https://dl.acm.org/doi/abs/10.1145/3394486.3403278) | 84 | | 14 | What does BERT Know about Books, Movies and Music? Probing BERT for Conversational Recommendation | [slides](https://drive.google.com/file/d/1TKKuA1_V9X2x_JaIHee-iUl2g1Q6IsbF/view?usp=sharing) | [video](https://drive.google.com/file/d/1sQ9jgyOVe5vwUUTh5mYxGC8ae4PEfO9k/view?usp=sharing) | [RecSys 2020](https://dl.acm.org/doi/abs/10.1145/3383313.3412249) | 85 | | 14 | Fairness-Aware Explainable Recommendation over Knowledge Graphs | [slides](https://drive.google.com/file/d/169p_F5YWoIPrn9QyiuepyqhvNnP6ufOZ/view?usp=sharing) | [video](https://drive.google.com/file/d/19U2Fpq3Pj7qKEuCr47HPkPGRTdwvBUk4/view?usp=sharing) | [SIGIR 2020](https://doi.org/10.1145/3397271.3401051) | 86 | | 15 | Deep generative ranking for personalized recommendation | [slides](https://drive.google.com/file/d/1nDnz6GlFN6xifzBNRTkBzWFD40LIC7Wg/view?usp=sharing) | [video](https://drive.google.com/file/d/1fq4Jx54aOl_PDx3Y-xd6yL7oJ92nXPCO/view?usp=sharing) | [RecSys 2019](https://dl.acm.org/doi/abs/10.1145/3298689.3347012) | 87 | | 15 | Revisiting Adversarially Learned Injection Attacks Against Recommender Systems | [slides](https://drive.google.com/file/d/1SWNGR3xdaJv4JAfs2GQEf1GyR378r03T/view?usp=sharing) | [video](https://drive.google.com/file/d/1NrrtuyT6X4EoZFG-LJ_aBcvKb-dhpZIP/view?usp=sharing) | [RecSys 2020](https://dl.acm.org/doi/abs/10.1145/3383313.3412243) | 88 | | 15 | Neural Interactive Collaborative Filtering | [slides](https://drive.google.com/file/d/1g52DPanTM5a8MLZoQKT8req6Sz_WHfvL/view?usp=sharing) | [video](https://drive.google.com/file/d/1EdZQTASqlLEAFQcnyCdFY4dj9K4d_Wok/view?usp=sharing) | [SIGIR 2020](https://dl.acm.org/doi/abs/10.1145/3397271.3401181) | 89 | | 15 | Explanation Mining: Post Hoc Interpretability of Latent Factor Models for Recommendation Systems | [slides](https://drive.google.com/file/d/1rHFyqUxY08fXWq1rRwwZboy1EWBvaEXU/view?usp=sharing) | [video](https://drive.google.com/file/d/1WxaAlz16gYEuIQU3dgOx4_2hR2BzJq-F/view?usp=sharing) | [KDD 2018](https://dl.acm.org/doi/abs/10.1145/3219819.3220072) | 90 | | 15 | Towards Conversational Recommendation over Multi-Type Dialogs | [slides](https://drive.google.com/file/d/1rbY2cPF95GJMOHeRqyuSwRWaLTkcKqrW/view?usp=sharing) | [video](https://drive.google.com/file/d/1jSAPxTDkVan7XGHeZk93KJQp_xiyypSv/view?usp=sharing) | [ACL 2020](https://www.aclweb.org/anthology/2020.acl-main.98.pdf) | 91 | | 15 | Self-Supervised Reinforcement Learning for Recommender Systems | [slides](https://drive.google.com/file/d/1Idtcd7hHRgkvVP5Ccx_5x1R4hiXWOLz8/view?usp=sharing) | [video](https://drive.google.com/file/d/1REin9NnKcHXodV4XQmMuADlOXfUZpxab/view?usp=sharing) | [SIGIR 2020](https://doi.org/10.1145/3397271.3401147) | 92 | | 16 | Controlling Fairness and Bias in Dynamic Learning-to-Rank | [slides](https://drive.google.com/file/d/1jk4i1NPB9kqhRJ9RMsoOJ2cgVrqX3GaQ/view?usp=sharing) | [video](https://drive.google.com/file/d/1m0t6gP18q70J00KUNhZkjalLv5QVvXk7/view?usp=sharing) | [SIGIR 2020](https://doi.org/10.1145/3397271.3401100) | 93 | | 16 | Are we really making much progress? A worrying analysis of recent neural recommendation approaches | [slides](https://drive.google.com/file/d/1CUYBMfGddQZoNJlllQrokxi-81tjpLeN/view?usp=sharing) | [video](https://drive.google.com/file/d/1mXz-3tYWFeuyZ_5CSUL90W4VZkgTwHHb/view?usp=sharing) | [RecSys 2019](https://doi.org/10.1145/3298689.3347058) | 94 | | 16 | KRED: Knowledge-aware Document Representation for News Recommendations | [slides](https://drive.google.com/file/d/1ZxCrNNPK500qDYOsovfxHw1IhlIihCjJ/view?usp=sharing) | [video](https://drive.google.com/file/d/1mlqZgWTZ-b-7Ca0r--QuEstc6ylUCzYD/view?usp=sharing) | [RecSys 2020](https://doi.org/10.1145/3383313.3412237) | 95 | | 16 | Improving Relevance Prediction with Transfer Learning in Large-scale Retrieval Systems | [slides](https://drive.google.com/file/d/1_-EwRThH0wqdreZItcoF4Hz25aEJhnEl/view?usp=sharing) | [video](https://drive.google.com/file/d/1ErySt516e6BllC0kqc_3MHPGvpdoswLW/view?usp=sharing) | [Workshop ICML 2019](https://openreview.net/pdf?id=SJxPVcSonN) | 96 | | 16 | BERT4Rec: Sequential Recommendation with Bidirectional Encoder Representations from Transformer | [slides](https://drive.google.com/file/d/1LVFYEIK1ulzQ0pX3oDk0lcfMguR3QPaw/view?usp=sharing) | [video](https://drive.google.com/file/d/1ILaajKL1vPPBXm7XkcfOA-xNUAh2n3Ci/view?usp=sharing) | [CIKM 2019](https://doi.org/10.1145/3357384.3357895) | 97 | 98 | ### Proyectos finales 99 | 100 | L@s estudiantes trabajaron en grupo sobre proyectos finales de curso, produciendo un poster, paper y repositorio con código para cada uno: 101 | 102 | Imagen de la sesión de posters en gather.town del 15 de diciembre de 2020. 103 | ![iic3633-2020-2-posters](https://user-images.githubusercontent.com/208111/102280274-9a3fe180-3f0b-11eb-80ba-a5975227984a.png) 104 | 105 | 106 | 107 | 108 | | Grupo | Proyecto | Poster | Paper | 109 | |:------|:---------------------|:------:|:-----:| 110 | | 1 | Recomendación a conjuntos de usuarios en grupos heterogeneos
Cartagena, Huerfano, Toscano | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/N2-cartegana_huerfano_toscano.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Cartagena%20et%20al.pdf) | 111 | | 2 | Personality bias in music recommendation:Beyond accuracy Objectives
Valencia, Gonzalez | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/N3-Valencia-Gonz%C3%A1lez.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Valencia%20et%20al.pdf) | 112 | | 3 | Attack learning: a method using GANs
Castro, Casassus | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/N4_castro_casassus.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Castro%20et%20al.pdf) | 113 | | 4 | Metrica Beyond Accuracy: Personal
Labarca, Fuentes | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/N5-Labarca_Fuentes.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Fuentes%20et%20al.pdf) | 114 | | 5 | Sequential Recommenders for MeLiDataChallenge 2020
Aguilera, Everke | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/N6-Aguilera_Everke.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Everke%20et%20al.pdf) | 115 | | 6 | Exploración de recomendadores híbridos para música
Suarez, Carreño, Alipanah | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/N7-suarez_carreno_alipanah.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Alipanah%20et%20al.pdf) | 116 | | 7 | Impacto del Sesgo de Popularidad en el tiempo, considerando multiples stakeholders
Guiñez, Ruiz, Sanchez | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/N8-Guinez_Ruiz_Sanchez.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Guinez%20et%20al.pdf) | 117 | | 8 | Recomendación de items basada en la secuencia de compras: Aplicación DotA
Salinas | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V1-salinas.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Salinas%20et%20al.pdf) | 118 | | 9 | Delay and preference based flight recommendation
Waugh, Hanuch, Ricke | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V2-waugh_hanuch_ricke.jpg) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Hanuch%20et%20al.pdf) | 119 | | 10 | Changing the way we predict game purchases
Duarte, Lopez, Rodriguez | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V3_duarte_lopez_rodriguez.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Duarte%20et%20al.pdf) | 120 | | 11 | MelAE: A content-based next track recommendation from Mel Sprectrograms
Diaz, Vinay | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V4-diaz_vinay.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Diaz%20et%20al.pdf) | 121 | | 12 | Optimizing Hyper-parameters un RecSys using rolling averages
Alliende | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V5-alliende.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Alliende.pdf) | 122 | | 13 | Ramos Perez on MeliChallenge 2020
Perez, Ramos | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V6_perez_ramos.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Ramos%20et%20al.pdf) | 123 | | 14 | RL Algorithms for video game recsys
Tapia, Villagrán | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V7-tapia_villagran.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Tapia%20et%20al.pdf) | 124 | | 15 | Matrix factorizacion and content-based recsysy for playlist continuation
Biskupovic | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/V8-biskupovic.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Biskupovic%20et%20al.pdf) | 125 | | 16 | Topic Recommendation for Call Centers
Andrade, Dominguez, Patillo | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/A1-andrade_dominguez_pattillo.jpg) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Andrade%20et%20al.pdf) | 126 | | 17 | Recomendacion a grupos: metrica de similaridad y modelos de agrupacion
Olguin, Ibarra | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/A3-olguin_lopez_ibarra.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Olguin%20et%20al.pdf) | 127 | | 18 | Recommendation of COVID-19 articles using Deep Knowledge-Aware Network
Donoso-Guzmán | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/A4_donosoguzman.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Donoso%20et%20al.pdf) | 128 | | 19 | RecGAN as anime recommender systems
Codoceo, Escudero, Torres | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/A5-codoceo_escudero_torres.pptx.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Codoceo%20et%20al.pdf) | 129 | | 20 | Sistema Recomendador de Lecciones Aprendidas en Cursos Capstone
Contreras, Molina, Stambuk | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/A6-Contreras_Molina_Stambuk.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Contreras%20et%20al.pdf) | 130 | | 21 | Finding Similar Users with Recommender Systems
Ovalle, Valdes | [poster](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/posters/A7-ovalle_valdes.png) | [paper](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/proyecto/proy_finales_2020/Valdes%20et%20al.pdf) | 131 | 132 | 133 | **MES 1** En las primeras semanas nos enfocaremos en métodos básicos para hacer recomendación usando y prediciendo ratings (filtrado colaborativo User-based & item-based, slope-one). Luego veremos métodos de factorización matricial para ratings y para feedback implícito. En la 3ra semana veremos formas adicionales de evaluar más alla de la métricas de error de predicción de rating (MAE, MSE, RMSE) e incorporaremos métricas para evaluar listas de ítems (precision, recall, MAP, P@n, nDCG). Veremos métodos basados en contenido y sistemas híbridos. 134 | 135 | **MES 2** Métodos basados en contexto, máquinas de factorización y modelos fundamentales de deep learning para recomendación. Recapitulación de las tareas de recomendacion (predecir rating, predecir una lista de items, recomendar una secuencia, recomendación TopN) y de su evaluacion considerando diversidad, novedad, coverage, y otras métricas. 136 | 137 | **MES 3** User-centric RecSys, FAT (Fairness, Accountability and Transparency), Aplicaciones de Deep learning para problemas más específicos: recomendación de ropa, multimedia, etc. Modelos profundos generativos para recomendación. Revisaremos problemas de recomendación aún no resueltos en el área. 138 | 139 | **MES 4** Principalmente presentaciones de alumnos. 140 | 141 | ### Código de Honor 142 | 143 | Este curso adscribe el Código de Honor establecido por la Escuela de Ingeniería el que es vinculante. Todo trabajo evaluado en este curso debe ser propio. En caso de que exista colaboración permitida con otros estudiantes, el trabajo deberá referenciar y atribuir correctamente dicha contribución a quien corresponda. Como estudiante es su deber conocer la versión en línea del Código de Honor 144 | 145 | ### Evaluaciones 146 | 147 | Detalles de las evaluaciones en [esta presentacion](https://docs.google.com/presentation/d/1DkBNxdazzUH_UCo2ufBnvwA4nNuhHLKkthd_MJfc1VM/edit?usp=sharing). 148 | 149 | **Tarea 1** 150 | 151 | Al final de las primeras 4 semanas, las(los) estudiantes implementarán mecanismos de recomendación para predecir ratings y para rankear items en un dataset que se entregará durante clases. Usarán la biblioteca pyreclab para los métodos básicos, pero si quieren optar a la nota máxima debe hacer un sistema híbrido o contextual que utilice información de contenido, como texto o imágenes. Para tener una idea de qué se trata la tarea, pueden revisar el [enunciado de la tarea del año 2019](https://github.com/PUC-RecSys-Class/RecSysPUC-2019/blob/master/tarea/Tarea_1_RecSys_2019_2.pdf) 152 | 153 | **Lecturas: Blog y Presentación** 154 | 155 | Fecha de revisión de blogs: El post de la semana x, tiene fecha de entrega el lunes a las 12pm de la semana x+1. Ejemplo: Las lecturas de la semana 1 (del 10 al 14 de agosto) se entregan a más tardar el lunes 17 de agosto de 2020 a las 8pm. 156 | 157 | Cada alumno tendrá un repositorio en github (debe indicarlo en [este formulario](https://docs.google.com/forms/d/e/1FAIpQLSe06lSVzL7cUdFrTfmwDHdW6CFAVKQEnIs5CocOyiNpFmD4PA/viewform)) donde escribirá en markdown sus comentarios respecto de los papers indicados como obligatorios. No es necesario hacer un resumen largo del paper, sino indicar un resumen corto, puntos que pueden abrir discusión, mejoras o controversias: Evaluación inadecuada, parámetros importantes no considerados, potenciales mejoras de los algoritmos, fuentes de datos que podían mejorar los resultados, etc. 158 | 159 | Adicionalmente, cada alumno presentará al menos una vez durante el semestre un paper sobre un tópico, con el objetivo de abrir una discusión sobre el tema durante la clase. 160 | 161 | **Proyecto Final** 162 | 163 | Durante septiembre, las(los) estudiantes enviarán una idea de proyecto final, la cual desarrollarán durante octubre y noviembre. Enviarán un informe de avance a fines de octubre, para hacer una presentación de su proyecto al final del curso en una sesión de posters. 164 | 165 | ## Planificación general (sujeta a actualización) 166 | 167 | (actualizada el 8 de octubre de 2020) 168 | 169 | ![Planificacion RecSys 2020](https://user-images.githubusercontent.com/208111/95472551-92871e00-0959-11eb-8725-5fb2fe7842f9.png) 170 | 171 | 172 | 173 | ## Lecturas por Semana 174 | 175 | ### Semana 1: 176 | 177 | **Obligatorias** 178 | * Sarwar, B., Karypis, G., Konstan, J., & Riedl, J. (2001). Item-based collaborative filtering recommendation algorithms. In Proceedings of the 10th international conference on World Wide Web (pp. 285-295). 179 | 180 | * [Post original FunkSVD](https://sifter.org/~simon/journal/20061211.html) 181 | 182 | **Sugeridas** 183 | 184 | * Schafer, J. B., Frankowski, D., Herlocker, J., & Sen, S. (2007). Collaborative filtering recommender systems. In The adaptive web (pp. 291-324). Springer Berlin Heidelberg. 185 | * Lemire, D., & Maclachlan, A. (2005). Slope One Predictors for Online Rating-Based Collaborative Filtering. In SDM (Vol. 5, pp. 1-5). 186 | 187 | ### Semana 2: 188 | 189 | **Obligatorias** 190 | * Hu, Y., Koren, Y., & Volinsky, C. (2008). Collaborative filtering for implicit feedback datasets. In Data Mining, 2008. ICDM’08. Eighth IEEE International Conference on (pp. 263-272). IEEE. 191 | * Rendle, S., Freudenthaler, C., Gantner, Z., & Schmidt-Thieme, L. (2009). BPR: Bayesian personalized ranking from implicit feedback. In Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (pp. 452-461). AUAI Press. 192 | 193 | **Sugeridas** 194 | * Jannach, D., Lerche, L., & Zanker, M. (2018). Recommending based on implicit feedback. In Social Information Access (pp. 510-569). Springer, Cham. 195 | * Takács, G., Pilászy, I., Németh, B., & Tikk, D. (2009). Scalable collaborative filtering approaches for large recommender systems. Journal of machine learning research, 10(Mar), 623-656. 196 | * Pan, R., Zhou, Y., Cao, B., Liu, N. N., Lukose, R., Scholz, M., & Yang, Q. (2008). One-class collaborative filtering. In 2008 Eighth IEEE International Conference on Data Mining (pp. 502-511). IEEE. En este artículo aparecen la derivación y reglas de actualización de los parámetros así como las nociones de AMAN y AMAU. 197 | * Srebro, N., & Jaakkola, T. (2003). Weighted low-rank approximations. In Proceedings of the 20th International Conference on Machine Learning (ICML-03) (pp. 720-727). Artículo citado por Pan et al. (2008) indicando detalles de la versión no regularizada que inspira OCCF. 198 | * El siguiente paper es opcional, pero permite entender cómo se deriva e del paper de Hu et al.: Takács, G., Pilászy, I., & Tikk, D. (2011). Applications of the conjugate gradient method for implicit feedback collaborative filtering. In Proceedings of the fifth ACM conference on Recommender systems (pp. 297-300). ACM. 199 | * Verstrepen, K., Bhaduriy, K., Cule, B., & Goethals, B. (2017). Collaborative filtering for binary, positiveonly data. ACM Sigkdd Explorations Newsletter, 19(1), 1-21. 200 | 201 | ### Semana 3: 202 | 203 | **Obligatorias** 204 | * Cremonesi, P., Koren, Y., & Turrin, R. (2010). Performance of recommender algorithms on top-n recommendation tasks. In Proceedings of the fourth ACM conference on Recommender systems (pp. 39-46). ACM. 205 | * Guy, S., & Gunawardana, A.. (2011) “Evaluating recommendation systems.” In Recommender systems handbook, pp. 257-297. Springer US, 2011. 206 | 207 | **Sugeridas** 208 | * Herlocker, J. L., Konstan, J. A., Terveen, L. G., & Riedl, J. T. (2004). Evaluating collaborative filtering recommender systems. ACM Transactions on Information Systems (TOIS), 22(1), 5-53. 209 | 210 | ### Semana 4: 211 | 212 | **Obligatorias** 213 | * Pazzani, M. J., & Billsus, D. (2007). Content-based recommendation systems. In The adaptive web (pp. 325-341). Springer Berlin Heidelberg. Xu, W., Liu, X., & Gong, Y. (2003). 214 | * Document clustering based on non-negative matrix factorization. In Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval (pp. 267-273). ACM. 215 | 216 | **Sugeridas** 217 | * Messina, P., Dominguez, V., Parra, D., Trattner, C., & Soto, A. (2019). Content-based artwork recommendation: integrating painting metadata with neural and manually-engineered visual features. User Modeling and User-Adapted Interaction, 29(2), 251-290. 218 | * Celma, Ò., & Herrera, P. (2008). A new approach to evaluating novel recommendations. In Proceedings of the 2008 ACM conference on Recommender systems (pp. 179-186). 219 | * Van den Oord, A., Dieleman, S., & Schrauwen, B. (2013). Deep content-based music recommendation. In Advances in neural information processing systems (pp. 2643-2651). 220 | 221 | 222 | ### Semana 5: 223 | 224 | **Obligatorias (esta semana se puede elegir una de las dos para entregar*)** 225 | * Adomavicius, G., Mobasher, B., Ricci, F. and Tuzhilin, A. (2011). Context-Aware Recommender Systems. AI Magazine, 32(3), 67-80. 226 | * Jahrer, M., Töscher, A. and Legenstein, R. (2010). Combining predictions for accurate recommender systems. In Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 693-702. ACM. 227 | 228 | **Sugeridas** 229 | * Pigi K., Shobeir F., James F., Magdalini E. and Lise G. (2015). HyPER: A Flexible and Extensible Probabilistic Framework for Hybrid Recommender Systems. In Proceedings of the 9th ACM Conference on Recommender Systems (RecSys '15), 99–106. ACM. 230 | * Rendle, S. (2010). Factorization machines. In 2010 IEEE International Conference on Data Mining (pp. 995-1000). IEEE. 231 | 232 | *No olvidar declarar en la crítica el título elegido. 233 | 234 | ### Semanas 6 y 7: 235 | 236 | Libre de lecturas (fiestas patrias) 237 | 238 | ### Semana 8 : 239 | 240 | **Obligatorias con entrega lunes 5 de octubre** 241 | * Pu, P., Chen, L. and Hu, R. (2011). A user-centric evaluation framework for recommender systems. RecSys'11 - Proceedings of the 5th ACM Conference on Recommender Systems. 157-164. 242 | * Parra, D., Brusilovsky, P., and Trattner, C. (2014). See What You Want to See: Visual User-Driven Approach for Hybrid Recommendation. International Conference on Intelligent User Interfaces, Proceedings IUI. 243 | 244 | **Obligatoria con entrega miércoles 7 de octubre** 245 | * Knijnenburg, B., Bostandjiev, S., O'Donovan, J., and Kobsa, A. (2012). Inspectability and control in social recommenders. RecSys'12 - Proceedings of the 6th ACM Conference on Recommender Systems. 246 | 247 | ### Semana 9 : 248 | 249 | **Obligatorias** 250 | * Cañamares, R., Redondo, M., & Castells, P. (2019). Multi-armed recommender system bandit ensembles. In Proceedings of the 13th ACM Conference on Recommender Systems (pp. 432-436). 251 | * Bendada, W., Salha, G., & Bontempelli, T. (2020). Carousel Personalization in Music Streaming Apps with Contextual Bandits. In Fourteenth ACM Conference on Recommender Systems (pp. 420-425). 252 | 253 | **Sugeridas** 254 | * Lacerda, A., Santos, R. L., Veloso, A., & Ziviani, N. (2015). Improving daily deals recommendation using explore-then-exploit strategies. Information Retrieval Journal, 18(2), 95-122. 255 | * Guillou, F., Gaudel, R., & Preux, P. (2016). Scalable explore-exploit collaborative filtering. In Pacific Asia Conference On Information Systems (PACIS). Association For Information System. 256 | * Teo, C. H., Nassif, H., Hill, D., Srinivasan, S., Goodman, M., Mohan, V., & Vishwanathan, S. V. N. (2016). Adaptive, personalized diversity for visual discovery. In Proceedings of the 10th ACM conference on recommender systems (pp. 35-38). 257 | 258 | ### Semana 10 : 259 | 260 | **Obligatorias** 261 | * Hasta la sección 3.4 (incluyendo 3.4): Zhang, S., Yao, L., Sun, A., & Tay, Y. (2019). Deep learning based recommender system: A survey and new perspectives. ACM Computing Surveys (CSUR), 52(1), 1-38. 262 | 263 | **Sugeridas** 264 | * Covington, P., Adams, J., & Sargin, E. (2016). Deep neural networks for youtube recommendations. In Proceedings of the 10th ACM conference on recommender systems (pp. 191-198). 265 | * Bansal, T., Belanger, D., & McCallum, A. (2016). Ask the gru: Multi-task learning for deep text recommendations. In Proceedings of the 10th ACM Conference on Recommender Systems (pp. 107-114). 266 | 267 | ### Semana 11 : 268 | 269 | **Obligatorias** 270 | 271 | * Desde la sección 3.5 en adelante: Zhang, S., Yao, L., Sun, A., & Tay, Y. (2019). Deep learning based recommender system: A survey and new perspectives. ACM Computing Surveys (CSUR), 52(1), 1-38. 272 | 273 | **Sugeridas** 274 | * Chen, J., Zhang, H., He, X., Nie, L., Liu, W., & Chua, T. S. (2017). Attentive collaborative filtering: Multimedia recommendation with item-and component-level attention. In Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval (pp. 335-344). 275 | * Liang, D., Krishnan, R. G., Hoffman, M. D., & Jebara, T. (2018). Variational autoencoders for collaborative filtering. In Proceedings of the 2018 World Wide Web Conference (pp. 689-698). 276 | -------------------------------------------------------------------------------- /clases/s1-c2-SlopeOne.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s1-c2-SlopeOne.pdf -------------------------------------------------------------------------------- /clases/s1-c2-UBCF_clustering.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s1-c2-UBCF_clustering.pdf -------------------------------------------------------------------------------- /clases/s1-c2-nonpers-UBCF.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s1-c2-nonpers-UBCF.pdf -------------------------------------------------------------------------------- /clases/s10_c1_activelearning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s10_c1_activelearning.pdf -------------------------------------------------------------------------------- /clases/s11_c1_deep_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s11_c1_deep_learning.pdf -------------------------------------------------------------------------------- /clases/s11_c2_secu_deep_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s11_c2_secu_deep_learning.pdf -------------------------------------------------------------------------------- /clases/s12_c1_deep_learning_s.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s12_c1_deep_learning_s.pdf -------------------------------------------------------------------------------- /clases/s12_c2_10bigproblems-recsys-small.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s12_c2_10bigproblems-recsys-small.pdf -------------------------------------------------------------------------------- /clases/s2_c1-IBCF.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s2_c1-IBCF.pdf -------------------------------------------------------------------------------- /clases/s2_c2-Factorizacion_matricial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s2_c2-Factorizacion_matricial.pdf -------------------------------------------------------------------------------- /clases/s3_c1-Implicit-feedback.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s3_c1-Implicit-feedback.pdf -------------------------------------------------------------------------------- /clases/s3_c2-BPR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s3_c2-BPR.pdf -------------------------------------------------------------------------------- /clases/s4_c1-metricas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s4_c1-metricas.pdf -------------------------------------------------------------------------------- /clases/s4_c2-tests_estadisticos.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s4_c2-tests_estadisticos.pdf -------------------------------------------------------------------------------- /clases/s5_c1-content.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s5_c1-content.pdf -------------------------------------------------------------------------------- /clases/s5_c1_p2-content.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s5_c1_p2-content.pdf -------------------------------------------------------------------------------- /clases/s5_c2-content.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s5_c2-content.pdf -------------------------------------------------------------------------------- /clases/s6_c1-contexto.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s6_c1-contexto.pdf -------------------------------------------------------------------------------- /clases/s6_c1-hibridos.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s6_c1-hibridos.pdf -------------------------------------------------------------------------------- /clases/s6_c2_p2-FMachines.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s6_c2_p2-FMachines.pdf -------------------------------------------------------------------------------- /clases/s6_c2_p3-blending_ensemble.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s6_c2_p3-blending_ensemble.pdf -------------------------------------------------------------------------------- /clases/s9_c1_usercentric.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s9_c1_usercentric.pdf -------------------------------------------------------------------------------- /clases/s9_c2_FATv2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/clases/s9_c2_FATv2.pdf -------------------------------------------------------------------------------- /posters/A1-andrade_dominguez_pattillo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/A1-andrade_dominguez_pattillo.jpg -------------------------------------------------------------------------------- /posters/A3-olguin_lopez_ibarra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/A3-olguin_lopez_ibarra.png -------------------------------------------------------------------------------- /posters/A4_donosoguzman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/A4_donosoguzman.png -------------------------------------------------------------------------------- /posters/A5-codoceo_escudero_torres.pptx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/A5-codoceo_escudero_torres.pptx.png -------------------------------------------------------------------------------- /posters/A6-Contreras_Molina_Stambuk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/A6-Contreras_Molina_Stambuk.png -------------------------------------------------------------------------------- /posters/A7-ovalle_valdes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/A7-ovalle_valdes.png -------------------------------------------------------------------------------- /posters/N2-cartegana_huerfano_toscano.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/N2-cartegana_huerfano_toscano.png -------------------------------------------------------------------------------- /posters/N3-Valencia-González.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/N3-Valencia-González.png -------------------------------------------------------------------------------- /posters/N4_castro_casassus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/N4_castro_casassus.png -------------------------------------------------------------------------------- /posters/N5-Labarca_Fuentes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/N5-Labarca_Fuentes.png -------------------------------------------------------------------------------- /posters/N6-Aguilera_Everke.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/N6-Aguilera_Everke.png -------------------------------------------------------------------------------- /posters/N7-suarez_carreno_alipanah.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/N7-suarez_carreno_alipanah.png -------------------------------------------------------------------------------- /posters/N8-Guinez_Ruiz_Sanchez.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/N8-Guinez_Ruiz_Sanchez.png -------------------------------------------------------------------------------- /posters/V1-salinas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V1-salinas.png -------------------------------------------------------------------------------- /posters/V2-waugh_hanuch_ricke.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V2-waugh_hanuch_ricke.jpg -------------------------------------------------------------------------------- /posters/V3_duarte_lopez_rodriguez.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V3_duarte_lopez_rodriguez.png -------------------------------------------------------------------------------- /posters/V4-diaz_vinay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V4-diaz_vinay.png -------------------------------------------------------------------------------- /posters/V5-alliende.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V5-alliende.png -------------------------------------------------------------------------------- /posters/V6_perez_ramos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V6_perez_ramos.png -------------------------------------------------------------------------------- /posters/V7-tapia_villagran.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V7-tapia_villagran.png -------------------------------------------------------------------------------- /posters/V8-biskupovic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/posters/V8-biskupovic.png -------------------------------------------------------------------------------- /practicos/FastFM_factorization_machines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Práctico librería fastFM - Factorization Machines\n", 18 | "\n", 19 | "Clase: IIC3633 Sistemas Recomendadores, PUC Chile\n", 20 | "\n", 21 | "En este práctico vamos a utilizar la biblioteca de Python [fastFM](https://github.com/ibayer/fastFM) para recomendación utilizando máquinas de factorización. \n", 22 | "\n", 23 | "En este caso utilizaremos un dataset de cervezas, donde además de incluir interacciones de usuarios con los items agregaremos feature de tipo de cerveza. (style-id)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 36 | " Dload Upload Total Spent Left Speed\n", 37 | "100 388 0 388 0 0 388 0 --:--:-- --:--:-- --:--:-- 388\n", 38 | "100 775k 100 775k 0 0 525k 0 0:00:01 0:00:01 --:--:-- 525k\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "!curl -L -o \"beer_data.base\" \"https://docs.google.com/uc?export=download&id=1yp9UpqPCESNySlWlDoSEau5aBNKx0nYB\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "\u001b[33mThe directory '/Users/andrescarvallo/Library/Caches/pip/http' or its parent directory is not owned by the current user and the cache has been disabled. Please check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\u001b[0m\n", 56 | "\u001b[33mThe directory '/Users/andrescarvallo/Library/Caches/pip' or its parent directory is not owned by the current user and caching wheels has been disabled. check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\u001b[0m\n", 57 | "Requirement already satisfied: fastFM in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages\n", 58 | "Requirement already satisfied: cython in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 59 | "Requirement already satisfied: scikit-learn in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 60 | "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 61 | "Requirement already satisfied: numpy in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 62 | "\u001b[33mYou are using pip version 9.0.1, however version 20.2.2 is available.\n", 63 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "!pip3 install fastFM" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 6, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import numpy as np\n", 78 | "import pandas as pd\n", 79 | "import fastFM\n", 80 | "from fastFM.datasets import make_user_item_regression\n", 81 | "from sklearn.model_selection import train_test_split\n", 82 | "from fastFM import sgd\n", 83 | "from fastFM import als\n", 84 | "from sklearn.metrics import mean_squared_error\n", 85 | "import matplotlib.pyplot as plt\n", 86 | "from scipy.sparse import csc_matrix\n", 87 | "from fastFM import mcmc\n", 88 | "import functools as fct\n", 89 | "import itertools as itools\n", 90 | "import random, scipy" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "# Antes de recomendar hacemos un analisis de los datos " 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 7, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/html": [ 108 | "
\n", 109 | "\n", 122 | "\n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | "
userIDitemIDstyleIDrating
049241175711994.5
14924544111994.5
249241996011995.0
329165590011992.5
4291657110148794.0
\n", 170 | "
" 171 | ], 172 | "text/plain": [ 173 | " userID itemID styleID rating\n", 174 | "0 4924 11757 1199 4.5\n", 175 | "1 4924 5441 1199 4.5\n", 176 | "2 4924 19960 1199 5.0\n", 177 | "3 2916 55900 1199 2.5\n", 178 | "4 2916 57110 14879 4.0" 179 | ] 180 | }, 181 | "execution_count": 7, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "df = pd.read_csv('beer_data.base', sep=',',encoding='latin-1')\n", 188 | "df.head()" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 8, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "Num. of items: 1836\n", 201 | "Num. of users: 8320\n", 202 | "Num. of ratings: 44379\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "num_of_items = len(df['itemID'].unique().tolist())\n", 208 | "num_of_users = len(df['userID'].unique().tolist())\n", 209 | "num_of_ratings = len(df['userID'])\n", 210 | "\n", 211 | "print('Num. of items: {}\\nNum. of users: {}\\nNum. of ratings: {}'.format(num_of_items, num_of_users, num_of_ratings))" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 11, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "count 44379.000000\n", 223 | "mean 3.865105\n", 224 | "std 0.712633\n", 225 | "min 0.000000\n", 226 | "25% 3.500000\n", 227 | "50% 4.000000\n", 228 | "75% 4.500000\n", 229 | "max 5.000000\n", 230 | "Name: rating, dtype: float64" 231 | ] 232 | }, 233 | "execution_count": 11, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "# rating promedio \n", 240 | "df.describe()['rating']" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 17, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "11757 2206\n", 252 | "19960 1681\n", 253 | "16074 1260\n", 254 | "5441 1253\n", 255 | "429 1183\n", 256 | "Name: itemID, dtype: int64" 257 | ] 258 | }, 259 | "execution_count": 17, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "# items que han recibido mas ratings\n", 266 | "df.itemID.value_counts().head()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 18, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/plain": [ 277 | "13 181\n", 278 | "24 129\n", 279 | "490 115\n", 280 | "100 111\n", 281 | "695 106\n", 282 | "Name: userID, dtype: int64" 283 | ] 284 | }, 285 | "execution_count": 18, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "# usuarios que han dado mas rating \n", 292 | "df.userID.value_counts().head()" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 19, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "1199 17400\n", 304 | "394 3584\n", 305 | "14879 2656\n", 306 | "263 2104\n", 307 | "3268 1503\n", 308 | "Name: styleID, dtype: int64" 309 | ] 310 | }, 311 | "execution_count": 19, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "# estilos que han recibido más ratings \n", 318 | "df.styleID.value_counts().head()" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "# Convertir a formato fastFM" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "## funciones " 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 23, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "def get_single_entries_in_fm_input_format(data, itemlist):\n", 342 | " \n", 343 | " '''Cree el formato de entrada necesario (datos, (fila, columna)) para la matriz csc para\n", 344 | " las entradas individuales en los datos. Cada entrada ocuparía una fila. Esto significa que\n", 345 | " daría como resultado una matriz csc con dimensión (| datos | x | lista de elementos |).\n", 346 | " '''\n", 347 | " \n", 348 | " column = len(itemlist)\n", 349 | " row = len(data)\n", 350 | " shape = (row, column)\n", 351 | "\n", 352 | " row_inds = np.zeros(len(data), dtype=np.int)\n", 353 | " col_inds = np.zeros(len(data), dtype=np.int)\n", 354 | " datalist = np.zeros(len(data), dtype=np.float)\n", 355 | " \n", 356 | " for i in range(len(data)):\n", 357 | " item = data[i]\n", 358 | " val = 1\n", 359 | " datalist[i] = val\n", 360 | " \n", 361 | " # ubica su posición en la lista de elementos, arroja un error si el elemento no es un\n", 362 | " # artículo posible\n", 363 | " col_ind = np.where(itemlist==item)[0]\n", 364 | " \n", 365 | " # no deben ser elementos duplicados en la lista de elementos\n", 366 | " assert len(col_ind) == 1\n", 367 | " col_ind = col_ind[0]\n", 368 | " row_ind = i\n", 369 | "\n", 370 | " col_inds[i] = col_ind\n", 371 | " row_inds[i] = row_ind\n", 372 | "\n", 373 | " return datalist, row_inds, col_inds, shape\n", 374 | "\n", 375 | "\n", 376 | "def get_multi_entries_in_fm_input_format(data, itemlist, norm_func=None):\n", 377 | " \n", 378 | " '''Cree el formato de entrada necesario (datos, (fila, columna)) para la matriz csc para\n", 379 | " las entradas múltiples en los datos. Cada conjunto de entradas múltiples ocuparía una fila.\n", 380 | " Esto significa que daría como resultado una matriz csc con dimensión\n", 381 | " (| conjuntos de entradas en datos | x | lista de elementos |).\n", 382 | " '''\n", 383 | " \n", 384 | " column = len(itemlist)\n", 385 | " \n", 386 | " # número de conjuntos de entradas en los datos\n", 387 | " row = len(data)\n", 388 | " shape = (row, column)\n", 389 | "\n", 390 | " # numero de datos \n", 391 | " num_of_data = fct.reduce(lambda x, y: x + len(y), data, 0)\n", 392 | " row_inds = np.zeros(num_of_data, dtype=np.int)\n", 393 | " col_inds = np.zeros(num_of_data, dtype=np.int)\n", 394 | " datalist = np.zeros(num_of_data, dtype=np.float)\n", 395 | " cnt = 0\n", 396 | " for i in range(len(data)):\n", 397 | " multi_entry = data[i]\n", 398 | "\n", 399 | " if norm_func != None:\n", 400 | " # función que recibe el tamaño del multi_entry para decidir cómo normalizarlo\n", 401 | " val = norm_func(len(multi_entry))\n", 402 | " else:\n", 403 | " # asignación de valor binario por defecto\n", 404 | " val = 1 if len(multi_entry) > 0 else 0\n", 405 | "\n", 406 | " # para cada entrada en multi_entry, ubique su posición en la lista de elementos,\n", 407 | " # arroja error si el elemento no es un elemento posible\n", 408 | " # todas las entradas permanecen en la misma fila\n", 409 | " row_ind = i\n", 410 | " for item in multi_entry:\n", 411 | " col_ind = np.where(itemlist==item)[0]\n", 412 | " assert len(col_ind) == 1\n", 413 | " col_ind = col_ind[0]\n", 414 | " \n", 415 | " datalist[cnt] = val\n", 416 | " col_inds[cnt] = col_ind\n", 417 | " row_inds[cnt] = row_ind\n", 418 | " \n", 419 | " # actualiza contador\n", 420 | " cnt += 1\n", 421 | "\n", 422 | " return datalist, row_inds, col_inds, shape\n" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": {}, 428 | "source": [ 429 | "## conversion de los datos " 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 24, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "name": "stdout", 439 | "output_type": "stream", 440 | "text": [ 441 | "User feature set shape: (44379, 8320)\n", 442 | "Item feature set shape: (44379, 1836)\n", 443 | "Style feature set shape: (44379, 210)\n", 444 | "Dimension of FM input: (44379, 10366)\n" 445 | ] 446 | } 447 | ], 448 | "source": [ 449 | "beerlist = df.sort_values('itemID')['itemID'].unique()\n", 450 | "userlist = df.sort_values('userID')['userID'].unique()\n", 451 | "stylelist = df.sort_values('styleID')['styleID'].unique()\n", 452 | "\n", 453 | "# usuarios que dieron ratings \n", 454 | "user_data = df['userID'].values\n", 455 | "\n", 456 | "# items que recibieron ratings\n", 457 | "beer_data = df['itemID'].values\n", 458 | "\n", 459 | "# data de estilo de cerveza \n", 460 | "styles_data = df['styleID'].values\n", 461 | "\n", 462 | "# target vector: ratings\n", 463 | "rating_data = df['rating'].values\n", 464 | "\n", 465 | "\n", 466 | "# convertir a formato fastFM utilizando funciones de arriba \n", 467 | "user_datalist, user_row_inds, user_col_inds, user_shape = get_single_entries_in_fm_input_format(data=user_data, \n", 468 | " itemlist=userlist)\n", 469 | "\n", 470 | "beer_datalist, beer_row_inds, beer_col_inds, beer_shape = get_single_entries_in_fm_input_format(data=beer_data,\n", 471 | " itemlist=beerlist)\n", 472 | "\n", 473 | "style_datalist, style_row_inds, style_col_inds, style_shape = get_single_entries_in_fm_input_format(data=styles_data,\n", 474 | " itemlist=stylelist)\n", 475 | "\n", 476 | "# Concatena las dos columnas cambiando los índices de las columnas relacionadas con beer.\n", 477 | "# cambiar por el número de columnas en las columnas de usuario\n", 478 | "shift_by = len(userlist)\n", 479 | "beer_col_inds += shift_by\n", 480 | "beer_col_inds += shift_by\n", 481 | "\n", 482 | "# concatena los datos (agregamos item_styles)\n", 483 | "datalist = np.append(user_datalist, [beer_datalist, style_datalist])\n", 484 | "row_inds = np.append(user_row_inds, [beer_row_inds, style_row_inds])\n", 485 | "col_inds = np.append(user_col_inds, [beer_col_inds,style_col_inds])\n", 486 | "\n", 487 | "# asegúrese de que ambos conjuntos de características tengan el mismo número de filas\n", 488 | "print('User feature set shape: {}\\nItem feature set shape: {}\\nStyle feature set shape: {}'.format(user_shape, beer_shape, style_shape))\n", 489 | "\n", 490 | "assert user_shape[0] == beer_shape[0]\n", 491 | "shape = (user_shape[0], user_shape[0] + beer_shape[0] + style_shape[0])\n", 492 | "print('Dimension of FM input: {}'.format(shape))\n", 493 | "\n", 494 | "X = csc_matrix((datalist, (row_inds, col_inds)), shape=shape)\n", 495 | "y = rating_data" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 25, 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [ 504 | "# split train y test\n", 505 | "X_train, X_test, y_train, y_test = train_test_split(X, y)\n" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 26, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "CPU times: user 18.1 s, sys: 117 ms, total: 18.2 s\n", 518 | "Wall time: 18.2 s\n" 519 | ] 520 | } 521 | ], 522 | "source": [ 523 | "# entrenar modelo optimizando con ALS y hacer la prediccion \n", 524 | "fm = als.FMRegression(n_iter=1000, init_stdev=0.1, rank=10, l2_reg_w=0.1, l2_reg_V=0.5)\n", 525 | "fm.fit(X_train, y_train)\n", 526 | "y_pred = fm.predict(X_test)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 27, 532 | "metadata": {}, 533 | "outputs": [ 534 | { 535 | "name": "stdout", 536 | "output_type": "stream", 537 | "text": [ 538 | "Mean squared error under ALS: 0.6832341009172921\n" 539 | ] 540 | } 541 | ], 542 | "source": [ 543 | "error_als = mean_squared_error(y_test, y_pred)\n", 544 | "print('Mean squared error under ALS: {}'.format(error_als))" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 29, 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [ 553 | "# entrenar modelo optimizando con SGD y hacer la prediccion \n", 554 | "fm_sgd = sgd.FMRegression(n_iter=10000000, init_stdev=0.01, rank=10, random_state=123, \n", 555 | " l2_reg_w=0.1, l2_reg_V=0.5, step_size=0.01)\n", 556 | "fm_sgd.fit(X_train, y_train)\n", 557 | "y_pred_sgd = fm_sgd.predict(X_test)" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 30, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "name": "stdout", 567 | "output_type": "stream", 568 | "text": [ 569 | "Mean squared error under SGD: 0.45126971767960844\n" 570 | ] 571 | } 572 | ], 573 | "source": [ 574 | "error_sgd = mean_squared_error(y_test, y_pred_sgd)\n", 575 | "print('Mean squared error under SGD: {}'.format(error_sgd))" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "metadata": {}, 582 | "outputs": [], 583 | "source": [] 584 | } 585 | ], 586 | "metadata": { 587 | "kernelspec": { 588 | "display_name": "Python 3", 589 | "language": "python", 590 | "name": "python3" 591 | }, 592 | "language_info": { 593 | "codemirror_mode": { 594 | "name": "ipython", 595 | "version": 3 596 | }, 597 | "file_extension": ".py", 598 | "mimetype": "text/x-python", 599 | "name": "python", 600 | "nbconvert_exporter": "python", 601 | "pygments_lexer": "ipython3", 602 | "version": "3.6.0" 603 | }, 604 | "toc": { 605 | "base_numbering": 1, 606 | "nav_menu": {}, 607 | "number_sections": true, 608 | "sideBar": true, 609 | "skip_h1_title": false, 610 | "title_cell": "Table of Contents", 611 | "title_sidebar": "Contents", 612 | "toc_cell": false, 613 | "toc_position": {}, 614 | "toc_section_display": true, 615 | "toc_window_display": false 616 | } 617 | }, 618 | "nbformat": 4, 619 | "nbformat_minor": 2 620 | } 621 | -------------------------------------------------------------------------------- /practicos/Implicit_feedback.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "colab_type": "text", 17 | "id": "-xFADzCKvytx" 18 | }, 19 | "source": [ 20 | "# Práctico librería implicit - ALS y BPR\n", 21 | "\n", 22 | "Clase: IIC3633 Sistemas Recomendadores, PUC Chile\n", 23 | "\n", 24 | "En este práctico vamos a utilizar la biblioteca de Python [implicit](https://implicit.readthedocs.io/en/latest/quickstart.html) para recomendación utilizando ALS y BPR. \n" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": { 31 | "colab": { 32 | "base_uri": "https://localhost:8080/", 33 | "height": 212 34 | }, 35 | "colab_type": "code", 36 | "id": "sUlFGZprHneQ", 37 | "outputId": "58c09b34-3d01-45ab-e98c-1d787c7531c4" 38 | }, 39 | "outputs": [ 40 | { 41 | "name": "stdout", 42 | "output_type": "stream", 43 | "text": [ 44 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 45 | " Dload Upload Total Spent Left Speed\n", 46 | "\r", 47 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r", 48 | " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r", 49 | "100 388 0 388 0 0 412 0 --:--:-- --:--:-- --:--:-- 412\n", 50 | "100 1546k 100 1546k 0 0 1191k 0 0:00:01 0:00:01 --:--:-- 10.0M\n", 51 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 52 | " Dload Upload Total Spent Left Speed\n", 53 | "100 388 0 388 0 0 564 0 --:--:-- --:--:-- --:--:-- 564\n", 54 | "100 385k 100 385k 0 0 384k 0 0:00:01 0:00:01 --:--:-- 3446k\n", 55 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 56 | " Dload Upload Total Spent Left Speed\n", 57 | "100 388 0 388 0 0 612 0 --:--:-- --:--:-- --:--:-- 611\n", 58 | "100 230k 100 230k 0 0 247k 0 --:--:-- --:--:-- --:--:-- 247k\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "!curl -L -o \"u2.base\" \"https://drive.google.com/uc?export=download&id=1bGweNw7NbOHoJz11v6ld7ymLR8MLvBsA\"\n", 64 | "!curl -L -o \"u2.test\" \"https://drive.google.com/uc?export=download&id=1f_HwJWC_1HFzgAjKAWKwkuxgjkhkXrVg\"\n", 65 | "!curl -L -o \"u.item\" \"https://drive.google.com/uc?export=download&id=10YLhxkO2-M_flQtyo9OYV4nT9IvSESuz\"" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 2, 71 | "metadata": { 72 | "colab": { 73 | "base_uri": "https://localhost:8080/", 74 | "height": 228 75 | }, 76 | "colab_type": "code", 77 | "id": "rtscg3KuMwRL", 78 | "outputId": "900d11b6-2647-4e96-daca-d14fa4a2c737" 79 | }, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "Collecting implicit\n", 86 | "\u001b[?25l Downloading https://files.pythonhosted.org/packages/5a/d8/6b4f1374ffa2647b72ac76960c71b984c6f3238090359fb419d03827d87a/implicit-0.4.2.tar.gz (1.1MB)\n", 87 | "\u001b[K |████████████████████████████████| 1.1MB 2.7MB/s \n", 88 | "\u001b[?25hRequirement already satisfied, skipping upgrade: numpy in /usr/local/lib/python3.6/dist-packages (from implicit) (1.18.5)\n", 89 | "Requirement already satisfied, skipping upgrade: scipy>=0.16 in /usr/local/lib/python3.6/dist-packages (from implicit) (1.4.1)\n", 90 | "Requirement already satisfied, skipping upgrade: tqdm>=4.27 in /usr/local/lib/python3.6/dist-packages (from implicit) (4.41.1)\n", 91 | "Building wheels for collected packages: implicit\n", 92 | " Building wheel for implicit (setup.py) ... \u001b[?25l\u001b[?25hdone\n", 93 | " Created wheel for implicit: filename=implicit-0.4.2-cp36-cp36m-linux_x86_64.whl size=3420096 sha256=21fae0d9b72fb94e8b33c6d23d5d8d7f5e7fc302db8222bd16b4489b5e50d6b3\n", 94 | " Stored in directory: /root/.cache/pip/wheels/1b/48/b1/1aebe3acc3afb5589e72d3e7c3ffc3f637dc4721c1a974dff7\n", 95 | "Successfully built implicit\n", 96 | "Installing collected packages: implicit\n", 97 | "Successfully installed implicit-0.4.2\n" 98 | ] 99 | } 100 | ], 101 | "source": [ 102 | "!pip3 install implicit --upgrade" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 3, 108 | "metadata": { 109 | "colab": {}, 110 | "colab_type": "code", 111 | "id": "3Ii2pB-LO0Xy" 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "import pandas as pd\n", 116 | "import numpy as np\n", 117 | "import implicit\n", 118 | "import scipy.sparse as sparse" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 4, 124 | "metadata": { 125 | "colab": {}, 126 | "colab_type": "code", 127 | "id": "JGxmaexNPv3p" 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "columns = ['movieid', 'title', 'release_date', 'video_release_date', \\\n", 132 | " 'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation', \\\n", 133 | " 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', \\\n", 134 | " 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', \\\n", 135 | " 'Thriller', 'War', 'Western']" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": 5, 141 | "metadata": { 142 | "colab": {}, 143 | "colab_type": "code", 144 | "id": "zT11_REYOyFO" 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "# Primero creamos el dataframe con los datos\n", 149 | "df_train = pd.read_csv('u2.base',\n", 150 | " sep='\\t',\n", 151 | " names=['userid', 'itemid', 'rating', 'timestamp'],\n", 152 | " header=None)\n", 153 | "\n", 154 | "# rating >= 3 , relevante (1) y rating menor a 3 es no relevante (0)\n", 155 | "df_train.rating = [1 if x >=3 else 0 for x in df_train.rating ]" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 6, 161 | "metadata": { 162 | "colab": { 163 | "base_uri": "https://localhost:8080/", 164 | "height": 191 165 | }, 166 | "colab_type": "code", 167 | "id": "eZUGyYwpIExB", 168 | "outputId": "f472e85b-3a45-4453-ac7c-4e97b508345c" 169 | }, 170 | "outputs": [ 171 | { 172 | "data": { 173 | "text/html": [ 174 | "
\n", 175 | "\n", 188 | "\n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | "
useriditemidratingtimestamp
0131878542960
1141876893119
2151889751712
3161887431973
4171875071561
\n", 236 | "
" 237 | ], 238 | "text/plain": [ 239 | " userid itemid rating timestamp\n", 240 | "0 1 3 1 878542960\n", 241 | "1 1 4 1 876893119\n", 242 | "2 1 5 1 889751712\n", 243 | "3 1 6 1 887431973\n", 244 | "4 1 7 1 875071561" 245 | ] 246 | }, 247 | "execution_count": 6, 248 | "metadata": { 249 | "tags": [] 250 | }, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "df_train.head()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 7, 261 | "metadata": { 262 | "colab": {}, 263 | "colab_type": "code", 264 | "id": "ThhWLfrwPA_5" 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "# Cargamos el dataset con los items\n", 269 | "df_items = pd.read_csv('u.item',\n", 270 | " sep='|',\n", 271 | " index_col=0,\n", 272 | " names = columns,\n", 273 | " header=None, \n", 274 | " encoding='latin-1')" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 8, 280 | "metadata": { 281 | "colab": { 282 | "base_uri": "https://localhost:8080/", 283 | "height": 370 284 | }, 285 | "colab_type": "code", 286 | "id": "jok78eOXIH5P", 287 | "outputId": "4f18e7bd-49ff-46c4-a271-cf5c01b2d5d4" 288 | }, 289 | "outputs": [ 290 | { 291 | "data": { 292 | "text/html": [ 293 | "
\n", 294 | "\n", 307 | "\n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | "
titlerelease_datevideo_release_dateIMDb_URLunknownActionAdventureAnimationChildrenComedyCrimeDocumentaryDramaFantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
movieid
1Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...0001110000000000000
2GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...0110000000000000100
3Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...0000000000000000100
4Get Shorty (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Get%20Shorty%...0100010010000000000
5Copycat (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Copycat%20(1995)0000001010000000100
\n", 495 | "
" 496 | ], 497 | "text/plain": [ 498 | " title release_date ... War Western\n", 499 | "movieid ... \n", 500 | "1 Toy Story (1995) 01-Jan-1995 ... 0 0\n", 501 | "2 GoldenEye (1995) 01-Jan-1995 ... 0 0\n", 502 | "3 Four Rooms (1995) 01-Jan-1995 ... 0 0\n", 503 | "4 Get Shorty (1995) 01-Jan-1995 ... 0 0\n", 504 | "5 Copycat (1995) 01-Jan-1995 ... 0 0\n", 505 | "\n", 506 | "[5 rows x 23 columns]" 507 | ] 508 | }, 509 | "execution_count": 8, 510 | "metadata": { 511 | "tags": [] 512 | }, 513 | "output_type": "execute_result" 514 | } 515 | ], 516 | "source": [ 517 | "df_items.head()" 518 | ] 519 | }, 520 | { 521 | "cell_type": "code", 522 | "execution_count": 9, 523 | "metadata": { 524 | "colab": {}, 525 | "colab_type": "code", 526 | "id": "Oua55v2xuNS0" 527 | }, 528 | "outputs": [], 529 | "source": [ 530 | "# Cargamos el dataset de testing\n", 531 | "df_test = pd.read_csv('u2.test',\n", 532 | " sep='\\t',\n", 533 | " names=['userid', 'itemid', 'rating', 'timestamp'],\n", 534 | " header=None)\n", 535 | "\n", 536 | "\n", 537 | "# rating >= 3 es relevante (1) y rating menor a 3 es no relevante (0) \n", 538 | "df_test.rating = [1 if x >=3 else 0 for x in df_test.rating ]\n", 539 | "\n", 540 | "\n", 541 | "user_items_test = {}\n", 542 | "\n", 543 | "for row in df_test.itertuples():\n", 544 | " if row[1] not in user_items_test:\n", 545 | " user_items_test[row[1]] = []\n", 546 | " \n", 547 | " user_items_test[row[1]].append(row[2])" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 10, 553 | "metadata": { 554 | "colab": { 555 | "base_uri": "https://localhost:8080/", 556 | "height": 191 557 | }, 558 | "colab_type": "code", 559 | "id": "wc9gKy7OIPkX", 560 | "outputId": "4f9f8f84-066b-4b51-de0a-dd91853fbd43" 561 | }, 562 | "outputs": [ 563 | { 564 | "data": { 565 | "text/html": [ 566 | "
\n", 567 | "\n", 580 | "\n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | "
useriditemidratingtimestamp
0111874965758
1121876893171
2180875072484
3191878543541
41210878542772
\n", 628 | "
" 629 | ], 630 | "text/plain": [ 631 | " userid itemid rating timestamp\n", 632 | "0 1 1 1 874965758\n", 633 | "1 1 2 1 876893171\n", 634 | "2 1 8 0 875072484\n", 635 | "3 1 9 1 878543541\n", 636 | "4 1 21 0 878542772" 637 | ] 638 | }, 639 | "execution_count": 10, 640 | "metadata": { 641 | "tags": [] 642 | }, 643 | "output_type": "execute_result" 644 | } 645 | ], 646 | "source": [ 647 | "df_test.head()" 648 | ] 649 | }, 650 | { 651 | "cell_type": "markdown", 652 | "metadata": { 653 | "colab_type": "text", 654 | "id": "vjqFGDdzOJGc" 655 | }, 656 | "source": [ 657 | "### Métricas" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 11, 663 | "metadata": { 664 | "colab": {}, 665 | "colab_type": "code", 666 | "id": "fESMjaBBA8mx" 667 | }, 668 | "outputs": [], 669 | "source": [ 670 | "# Definicion de métricas (No editar)\n", 671 | "# Obtenido de https://gist.github.com/bwhite/3726239\n", 672 | "\n", 673 | "def precision_at_k(r, k):\n", 674 | " assert k >= 1\n", 675 | " r = np.asarray(r)[:k] != 0\n", 676 | " if r.size != k:\n", 677 | " raise ValueError('Relevance score length < k')\n", 678 | " return np.mean(r)\n", 679 | "\n", 680 | "def average_precision(r):\n", 681 | " r = np.asarray(r) != 0\n", 682 | " out = [precision_at_k(r, k + 1) for k in range(r.size) if r[k]]\n", 683 | " if not out:\n", 684 | " return 0.\n", 685 | " return np.mean(out)\n", 686 | "\n", 687 | "def mean_average_precision(rs):\n", 688 | " return np.mean([average_precision(r) for r in rs])\n", 689 | " \n", 690 | "def dcg_at_k(r, k):\n", 691 | " r = np.asfarray(r)[:k]\n", 692 | " if r.size:\n", 693 | " return np.sum(np.subtract(np.power(2, r), 1) / np.log2(np.arange(2, r.size + 2)))\n", 694 | " return 0.\n", 695 | "\n", 696 | "\n", 697 | "def ndcg_at_k(r, k):\n", 698 | " idcg = dcg_at_k(sorted(r, reverse=True), k)\n", 699 | "\n", 700 | " if not idcg:\n", 701 | " return 0.\n", 702 | " return dcg_at_k(r, k) / idcg" 703 | ] 704 | }, 705 | { 706 | "cell_type": "markdown", 707 | "metadata": { 708 | "colab_type": "text", 709 | "id": "S0WSWAhBrc32" 710 | }, 711 | "source": [ 712 | "### Preprocesamiento de los datos a formato sparse" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": 12, 718 | "metadata": { 719 | "colab": {}, 720 | "colab_type": "code", 721 | "id": "iP4Cj-eJ7Qzm" 722 | }, 723 | "outputs": [], 724 | "source": [ 725 | "user_items = {}\n", 726 | "itemset = set()\n", 727 | "\n", 728 | "for row in df_train.itertuples():\n", 729 | " if row[1] not in user_items:\n", 730 | " user_items[row[1]] = []\n", 731 | " \n", 732 | " user_items[row[1]].append(row[2])\n", 733 | " itemset.add(row[2])\n", 734 | "\n", 735 | "itemset = np.sort(list(itemset))\n", 736 | "\n", 737 | "sparse_matrix = np.zeros((len(user_items), len(itemset)))\n", 738 | "\n", 739 | "for i, items in enumerate(user_items.values()):\n", 740 | " sparse_matrix[i] = np.isin(itemset, items, assume_unique=True).astype(int)\n", 741 | " \n", 742 | "matrix = sparse.csr_matrix(sparse_matrix.T)\n", 743 | "\n", 744 | "user_ids = {key: i for i, key in enumerate(user_items.keys())}\n", 745 | "user_item_matrix = matrix.T.tocsr()" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 13, 751 | "metadata": { 752 | "colab": {}, 753 | "colab_type": "code", 754 | "id": "VvM6ecOVrp7R" 755 | }, 756 | "outputs": [], 757 | "source": [ 758 | "def evaluate_model(model, n):\n", 759 | " mean_map = 0.\n", 760 | " mean_ndcg = 0.\n", 761 | " for u in user_items_test.keys():\n", 762 | " rec = [t[0] for t in model.recommend(u, user_item_matrix, n)]\n", 763 | " rel_vector = [np.isin(user_items_test[u], rec, assume_unique=True).astype(int)]\n", 764 | " mean_map += mean_average_precision(rel_vector)\n", 765 | " mean_ndcg += ndcg_at_k(rel_vector, n)\n", 766 | "\n", 767 | " mean_map /= len(user_items_test)\n", 768 | " mean_ndcg /= len(user_items_test)\n", 769 | " \n", 770 | " return mean_map, mean_ndcg" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": 14, 776 | "metadata": { 777 | "colab": {}, 778 | "colab_type": "code", 779 | "id": "LevzqwMhteNA" 780 | }, 781 | "outputs": [], 782 | "source": [ 783 | "def show_recommendations(model, user, n):\n", 784 | " recommendations = [t[0] for t in model.recommend(user, user_item_matrix, n)]\n", 785 | " return df_items.loc[recommendations]['title']" 786 | ] 787 | }, 788 | { 789 | "cell_type": "code", 790 | "execution_count": 15, 791 | "metadata": { 792 | "colab": {}, 793 | "colab_type": "code", 794 | "id": "dwC238H52UuY" 795 | }, 796 | "outputs": [], 797 | "source": [ 798 | "def show_similar_movies(model, item, n=10):\n", 799 | " sim_items = [t[0] for t in model.similar_items(item, n)]\n", 800 | " return df_items.loc[sim_items]['title']" 801 | ] 802 | }, 803 | { 804 | "cell_type": "markdown", 805 | "metadata": { 806 | "colab_type": "text", 807 | "id": "9jBOy1W_ayKt" 808 | }, 809 | "source": [ 810 | "## ALS (Implicit Feedback)" 811 | ] 812 | }, 813 | { 814 | "cell_type": "markdown", 815 | "metadata": { 816 | "colab_type": "text", 817 | "id": "paa71OzHJsal" 818 | }, 819 | "source": [ 820 | "**Pregunta 1:** Explique brevemente cómo funciona el algoritmo ALS.\n", 821 | "\n", 822 | "**Respuesta:**" 823 | ] 824 | }, 825 | { 826 | "cell_type": "code", 827 | "execution_count": 16, 828 | "metadata": { 829 | "colab": { 830 | "base_uri": "https://localhost:8080/", 831 | "height": 81, 832 | "referenced_widgets": [ 833 | "d9d66e46a75d4e16857283d32cee6814", 834 | "6fcaf7490d86437b9cfd5abe976177ac", 835 | "f86af93bbc184ec5a1a5371298586f88", 836 | "a5c5a5b9563f4f2faa361dcb718922e6", 837 | "4be8b62f37ab47c68a5531e18fc1dc7a", 838 | "a27c462034c7470d84f5d012cedc5bbe", 839 | "1607eab87ecb4f78860dc75f835dad18", 840 | "b091ac212c524a5dba947c6b73001f19" 841 | ] 842 | }, 843 | "colab_type": "code", 844 | "id": "SP9Mtz_-7Q2a", 845 | "outputId": "ea8b8ac8-40ea-4ea8-a5d2-ddeff888cb56" 846 | }, 847 | "outputs": [ 848 | { 849 | "name": "stderr", 850 | "output_type": "stream", 851 | "text": [ 852 | "WARNING:root:OpenBLAS detected. Its highly recommend to set the environment variable 'export OPENBLAS_NUM_THREADS=1' to disable its internal multithreading\n" 853 | ] 854 | }, 855 | { 856 | "data": { 857 | "application/vnd.jupyter.widget-view+json": { 858 | "model_id": "d9d66e46a75d4e16857283d32cee6814", 859 | "version_major": 2, 860 | "version_minor": 0 861 | }, 862 | "text/plain": [ 863 | "HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))" 864 | ] 865 | }, 866 | "metadata": { 867 | "tags": [] 868 | }, 869 | "output_type": "display_data" 870 | }, 871 | { 872 | "name": "stdout", 873 | "output_type": "stream", 874 | "text": [ 875 | "\n" 876 | ] 877 | } 878 | ], 879 | "source": [ 880 | "# Definimos y entrenamos el modelo con optimización ALS\n", 881 | "model_als = implicit.als.AlternatingLeastSquares(factors=100, iterations=10, use_gpu=False)\n", 882 | "model_als.fit(matrix)" 883 | ] 884 | }, 885 | { 886 | "cell_type": "markdown", 887 | "metadata": { 888 | "colab_type": "text", 889 | "id": "NjZw2krFJD8g" 890 | }, 891 | "source": [ 892 | "Ejemplo de recomendación y búsqueda de items similares con los factores latentes ya entrenados:" 893 | ] 894 | }, 895 | { 896 | "cell_type": "code", 897 | "execution_count": 17, 898 | "metadata": { 899 | "colab": { 900 | "base_uri": "https://localhost:8080/", 901 | "height": 212 902 | }, 903 | "colab_type": "code", 904 | "id": "ycaOuzuKvOqc", 905 | "outputId": "04a114ef-8e9e-475e-fed9-831582f0eaf0" 906 | }, 907 | "outputs": [ 908 | { 909 | "data": { 910 | "text/plain": [ 911 | "movieid\n", 912 | "256 When the Cats Away (Chacun cherche son chat) (...\n", 913 | "754 Red Corner (1997)\n", 914 | "258 Contact (1997)\n", 915 | "292 Rosewood (1997)\n", 916 | "273 Heat (1995)\n", 917 | "409 Jack (1996)\n", 918 | "1012 Private Parts (1997)\n", 919 | "49 I.Q. (1994)\n", 920 | "864 My Fellow Americans (1996)\n", 921 | "248 Grosse Pointe Blank (1997)\n", 922 | "Name: title, dtype: object" 923 | ] 924 | }, 925 | "execution_count": 17, 926 | "metadata": { 927 | "tags": [] 928 | }, 929 | "output_type": "execute_result" 930 | } 931 | ], 932 | "source": [ 933 | "show_recommendations(model_als, user=77, n=10)" 934 | ] 935 | }, 936 | { 937 | "cell_type": "code", 938 | "execution_count": 18, 939 | "metadata": { 940 | "colab": { 941 | "base_uri": "https://localhost:8080/", 942 | "height": 50 943 | }, 944 | "colab_type": "code", 945 | "id": "qmvWfuWir8VY", 946 | "outputId": "8ccdb92a-52dc-42b9-88f9-c9119b0db9bd" 947 | }, 948 | "outputs": [ 949 | { 950 | "name": "stdout", 951 | "output_type": "stream", 952 | "text": [ 953 | "map: 0.05968577258082353\n", 954 | "ndcg: 0.32618683001531396\n" 955 | ] 956 | } 957 | ], 958 | "source": [ 959 | "maprec, ndcg = evaluate_model(model_als, n=10)\n", 960 | "print('map: {}\\nndcg: {}'.format(maprec, ndcg))" 961 | ] 962 | }, 963 | { 964 | "cell_type": "markdown", 965 | "metadata": { 966 | "colab_type": "text", 967 | "id": "QmgRFQE6J_Ns" 968 | }, 969 | "source": [ 970 | "**Pregunta 2:** Pruebe distintos valores para los parámetros de ALS y muestre gráficos de cómo se ven afectadas las métricas recién mostradas." 971 | ] 972 | }, 973 | { 974 | "cell_type": "markdown", 975 | "metadata": { 976 | "colab_type": "text", 977 | "id": "lWWHNXTRqCEy" 978 | }, 979 | "source": [ 980 | "## BPR" 981 | ] 982 | }, 983 | { 984 | "cell_type": "markdown", 985 | "metadata": { 986 | "colab_type": "text", 987 | "id": "e-C7WNrRSqkg" 988 | }, 989 | "source": [ 990 | "**Pregunta 3:** Explique con sus palabras la intuición del framework BPR." 991 | ] 992 | }, 993 | { 994 | "cell_type": "code", 995 | "execution_count": 19, 996 | "metadata": { 997 | "colab": { 998 | "base_uri": "https://localhost:8080/", 999 | "height": 65, 1000 | "referenced_widgets": [ 1001 | "f3c6e14112e548a1822af3a378809201", 1002 | "6e3c6a76053d44a5872991d8741fb1e4", 1003 | "edc5723524b046f6a3d658bc02e0e2da", 1004 | "c57815b4b98948c2a03ce20cc982b870", 1005 | "db1b375f107249f1a323b72444bf2467", 1006 | "53341e5401b24f5c8095cb92596c9fea", 1007 | "d822ba4957034079bc05e4120d692346", 1008 | "aa547a263c2a41678c434774b090d655" 1009 | ] 1010 | }, 1011 | "colab_type": "code", 1012 | "id": "h_ylm7IRXYiJ", 1013 | "outputId": "2de8f3e9-cd66-416a-abe8-b548437b3be4" 1014 | }, 1015 | "outputs": [ 1016 | { 1017 | "data": { 1018 | "application/vnd.jupyter.widget-view+json": { 1019 | "model_id": "f3c6e14112e548a1822af3a378809201", 1020 | "version_major": 2, 1021 | "version_minor": 0 1022 | }, 1023 | "text/plain": [ 1024 | "HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))" 1025 | ] 1026 | }, 1027 | "metadata": { 1028 | "tags": [] 1029 | }, 1030 | "output_type": "display_data" 1031 | }, 1032 | { 1033 | "name": "stdout", 1034 | "output_type": "stream", 1035 | "text": [ 1036 | "\n" 1037 | ] 1038 | } 1039 | ], 1040 | "source": [ 1041 | "# Definimos y entrenamos el modelo de implicit feedback utilizando optimizacion BPR\n", 1042 | "model_bpr = implicit.bpr.BayesianPersonalizedRanking(factors=400, iterations=40, use_gpu=False)\n", 1043 | "model_bpr.fit(matrix)" 1044 | ] 1045 | }, 1046 | { 1047 | "cell_type": "markdown", 1048 | "metadata": { 1049 | "colab_type": "text", 1050 | "id": "CsoJLbbgJLz0" 1051 | }, 1052 | "source": [ 1053 | "Ejemplo de recomendación y búsqueda de items similares con los factores latentes ya entrenados:" 1054 | ] 1055 | }, 1056 | { 1057 | "cell_type": "code", 1058 | "execution_count": 20, 1059 | "metadata": { 1060 | "colab": { 1061 | "base_uri": "https://localhost:8080/", 1062 | "height": 212 1063 | }, 1064 | "colab_type": "code", 1065 | "id": "lnV72Op4vU9k", 1066 | "outputId": "77ea4a34-d5f9-4a6a-9644-74c8265cabd1" 1067 | }, 1068 | "outputs": [ 1069 | { 1070 | "data": { 1071 | "text/plain": [ 1072 | "movieid\n", 1073 | "285 Secrets & Lies (1996)\n", 1074 | "257 Men in Black (1997)\n", 1075 | "244 Smilla's Sense of Snow (1997)\n", 1076 | "327 Cop Land (1997)\n", 1077 | "746 Real Genius (1985)\n", 1078 | "300 Air Force One (1997)\n", 1079 | "299 Hoodlum (1997)\n", 1080 | "332 Kiss the Girls (1997)\n", 1081 | "267 unknown\n", 1082 | "99 Snow White and the Seven Dwarfs (1937)\n", 1083 | "Name: title, dtype: object" 1084 | ] 1085 | }, 1086 | "execution_count": 20, 1087 | "metadata": { 1088 | "tags": [] 1089 | }, 1090 | "output_type": "execute_result" 1091 | } 1092 | ], 1093 | "source": [ 1094 | "show_recommendations(model_bpr, user=77, n=10)" 1095 | ] 1096 | }, 1097 | { 1098 | "cell_type": "code", 1099 | "execution_count": 21, 1100 | "metadata": { 1101 | "colab": { 1102 | "base_uri": "https://localhost:8080/", 1103 | "height": 212 1104 | }, 1105 | "colab_type": "code", 1106 | "id": "C0ssIyH4rIT7", 1107 | "outputId": "10af02af-960e-48f5-a810-0cc7d630140a" 1108 | }, 1109 | "outputs": [ 1110 | { 1111 | "data": { 1112 | "text/plain": [ 1113 | "movieid\n", 1114 | "171 Delicatessen (1991)\n", 1115 | "209 This Is Spinal Tap (1984)\n", 1116 | "21 Muppet Treasure Island (1996)\n", 1117 | "68 Crow, The (1994)\n", 1118 | "203 Unforgiven (1992)\n", 1119 | "173 Princess Bride, The (1987)\n", 1120 | "172 Empire Strikes Back, The (1980)\n", 1121 | "194 Sting, The (1973)\n", 1122 | "78 Free Willy (1993)\n", 1123 | "199 Bridge on the River Kwai, The (1957)\n", 1124 | "Name: title, dtype: object" 1125 | ] 1126 | }, 1127 | "execution_count": 21, 1128 | "metadata": { 1129 | "tags": [] 1130 | }, 1131 | "output_type": "execute_result" 1132 | } 1133 | ], 1134 | "source": [ 1135 | "show_similar_movies(model_bpr, item=171, n=10)" 1136 | ] 1137 | }, 1138 | { 1139 | "cell_type": "code", 1140 | "execution_count": 22, 1141 | "metadata": { 1142 | "colab": { 1143 | "base_uri": "https://localhost:8080/", 1144 | "height": 50 1145 | }, 1146 | "colab_type": "code", 1147 | "id": "e-QMLEOEq_Qb", 1148 | "outputId": "074fe83e-0a61-4d7c-e9d6-9992a6561f26" 1149 | }, 1150 | "outputs": [ 1151 | { 1152 | "name": "stdout", 1153 | "output_type": "stream", 1154 | "text": [ 1155 | "map: 0.05408205018391552\n", 1156 | "ndcg: 0.3614088820826952\n" 1157 | ] 1158 | } 1159 | ], 1160 | "source": [ 1161 | "maprec, ndcg = evaluate_model(model_bpr, n=10)\n", 1162 | "print('map: {}\\nndcg: {}'.format(maprec, ndcg))" 1163 | ] 1164 | }, 1165 | { 1166 | "cell_type": "markdown", 1167 | "metadata": { 1168 | "colab_type": "text", 1169 | "id": "x1q3RCSEKmCP" 1170 | }, 1171 | "source": [ 1172 | "**Pregunta 4:** Pruebe distintos valores para los parámetros de BPR y muestre gráficos de cómo se ven afectadas las métricas de ranking (nDCG@10 y MAP) recién mostradas." 1173 | ] 1174 | }, 1175 | { 1176 | "cell_type": "code", 1177 | "execution_count": null, 1178 | "metadata": { 1179 | "colab": {}, 1180 | "colab_type": "code", 1181 | "id": "W35rKGjxJY5O" 1182 | }, 1183 | "outputs": [], 1184 | "source": [] 1185 | } 1186 | ], 1187 | "metadata": { 1188 | "colab": { 1189 | "collapsed_sections": [], 1190 | "name": "Implicit_implicit_feedback.ipynb", 1191 | "provenance": [] 1192 | }, 1193 | "kernelspec": { 1194 | "display_name": "Python 3", 1195 | "language": "python", 1196 | "name": "python3" 1197 | }, 1198 | "language_info": { 1199 | "codemirror_mode": { 1200 | "name": "ipython", 1201 | "version": 3 1202 | }, 1203 | "file_extension": ".py", 1204 | "mimetype": "text/x-python", 1205 | "name": "python", 1206 | "nbconvert_exporter": "python", 1207 | "pygments_lexer": "ipython3", 1208 | "version": "3.6.0" 1209 | }, 1210 | "toc": { 1211 | "base_numbering": 1, 1212 | "nav_menu": {}, 1213 | "number_sections": true, 1214 | "sideBar": true, 1215 | "skip_h1_title": false, 1216 | "title_cell": "Table of Contents", 1217 | "title_sidebar": "Contents", 1218 | "toc_cell": false, 1219 | "toc_position": {}, 1220 | "toc_section_display": true, 1221 | "toc_window_display": false 1222 | }, 1223 | "widgets": { 1224 | "application/vnd.jupyter.widget-state+json": { 1225 | "1607eab87ecb4f78860dc75f835dad18": { 1226 | "model_module": "@jupyter-widgets/controls", 1227 | "model_name": "DescriptionStyleModel", 1228 | "state": { 1229 | "_model_module": "@jupyter-widgets/controls", 1230 | "_model_module_version": "1.5.0", 1231 | "_model_name": "DescriptionStyleModel", 1232 | "_view_count": null, 1233 | "_view_module": "@jupyter-widgets/base", 1234 | "_view_module_version": "1.2.0", 1235 | "_view_name": "StyleView", 1236 | "description_width": "" 1237 | } 1238 | }, 1239 | "4be8b62f37ab47c68a5531e18fc1dc7a": { 1240 | "model_module": "@jupyter-widgets/controls", 1241 | "model_name": "ProgressStyleModel", 1242 | "state": { 1243 | "_model_module": "@jupyter-widgets/controls", 1244 | "_model_module_version": "1.5.0", 1245 | "_model_name": "ProgressStyleModel", 1246 | "_view_count": null, 1247 | "_view_module": "@jupyter-widgets/base", 1248 | "_view_module_version": "1.2.0", 1249 | "_view_name": "StyleView", 1250 | "bar_color": null, 1251 | "description_width": "initial" 1252 | } 1253 | }, 1254 | "53341e5401b24f5c8095cb92596c9fea": { 1255 | "model_module": "@jupyter-widgets/base", 1256 | "model_name": "LayoutModel", 1257 | "state": { 1258 | "_model_module": "@jupyter-widgets/base", 1259 | "_model_module_version": "1.2.0", 1260 | "_model_name": "LayoutModel", 1261 | "_view_count": null, 1262 | "_view_module": "@jupyter-widgets/base", 1263 | "_view_module_version": "1.2.0", 1264 | "_view_name": "LayoutView", 1265 | "align_content": null, 1266 | "align_items": null, 1267 | "align_self": null, 1268 | "border": null, 1269 | "bottom": null, 1270 | "display": null, 1271 | "flex": null, 1272 | "flex_flow": null, 1273 | "grid_area": null, 1274 | "grid_auto_columns": null, 1275 | "grid_auto_flow": null, 1276 | "grid_auto_rows": null, 1277 | "grid_column": null, 1278 | "grid_gap": null, 1279 | "grid_row": null, 1280 | "grid_template_areas": null, 1281 | "grid_template_columns": null, 1282 | "grid_template_rows": null, 1283 | "height": null, 1284 | "justify_content": null, 1285 | "justify_items": null, 1286 | "left": null, 1287 | "margin": null, 1288 | "max_height": null, 1289 | "max_width": null, 1290 | "min_height": null, 1291 | "min_width": null, 1292 | "object_fit": null, 1293 | "object_position": null, 1294 | "order": null, 1295 | "overflow": null, 1296 | "overflow_x": null, 1297 | "overflow_y": null, 1298 | "padding": null, 1299 | "right": null, 1300 | "top": null, 1301 | "visibility": null, 1302 | "width": null 1303 | } 1304 | }, 1305 | "6e3c6a76053d44a5872991d8741fb1e4": { 1306 | "model_module": "@jupyter-widgets/base", 1307 | "model_name": "LayoutModel", 1308 | "state": { 1309 | "_model_module": "@jupyter-widgets/base", 1310 | "_model_module_version": "1.2.0", 1311 | "_model_name": "LayoutModel", 1312 | "_view_count": null, 1313 | "_view_module": "@jupyter-widgets/base", 1314 | "_view_module_version": "1.2.0", 1315 | "_view_name": "LayoutView", 1316 | "align_content": null, 1317 | "align_items": null, 1318 | "align_self": null, 1319 | "border": null, 1320 | "bottom": null, 1321 | "display": null, 1322 | "flex": null, 1323 | "flex_flow": null, 1324 | "grid_area": null, 1325 | "grid_auto_columns": null, 1326 | "grid_auto_flow": null, 1327 | "grid_auto_rows": null, 1328 | "grid_column": null, 1329 | "grid_gap": null, 1330 | "grid_row": null, 1331 | "grid_template_areas": null, 1332 | "grid_template_columns": null, 1333 | "grid_template_rows": null, 1334 | "height": null, 1335 | "justify_content": null, 1336 | "justify_items": null, 1337 | "left": null, 1338 | "margin": null, 1339 | "max_height": null, 1340 | "max_width": null, 1341 | "min_height": null, 1342 | "min_width": null, 1343 | "object_fit": null, 1344 | "object_position": null, 1345 | "order": null, 1346 | "overflow": null, 1347 | "overflow_x": null, 1348 | "overflow_y": null, 1349 | "padding": null, 1350 | "right": null, 1351 | "top": null, 1352 | "visibility": null, 1353 | "width": null 1354 | } 1355 | }, 1356 | "6fcaf7490d86437b9cfd5abe976177ac": { 1357 | "model_module": "@jupyter-widgets/base", 1358 | "model_name": "LayoutModel", 1359 | "state": { 1360 | "_model_module": "@jupyter-widgets/base", 1361 | "_model_module_version": "1.2.0", 1362 | "_model_name": "LayoutModel", 1363 | "_view_count": null, 1364 | "_view_module": "@jupyter-widgets/base", 1365 | "_view_module_version": "1.2.0", 1366 | "_view_name": "LayoutView", 1367 | "align_content": null, 1368 | "align_items": null, 1369 | "align_self": null, 1370 | "border": null, 1371 | "bottom": null, 1372 | "display": null, 1373 | "flex": null, 1374 | "flex_flow": null, 1375 | "grid_area": null, 1376 | "grid_auto_columns": null, 1377 | "grid_auto_flow": null, 1378 | "grid_auto_rows": null, 1379 | "grid_column": null, 1380 | "grid_gap": null, 1381 | "grid_row": null, 1382 | "grid_template_areas": null, 1383 | "grid_template_columns": null, 1384 | "grid_template_rows": null, 1385 | "height": null, 1386 | "justify_content": null, 1387 | "justify_items": null, 1388 | "left": null, 1389 | "margin": null, 1390 | "max_height": null, 1391 | "max_width": null, 1392 | "min_height": null, 1393 | "min_width": null, 1394 | "object_fit": null, 1395 | "object_position": null, 1396 | "order": null, 1397 | "overflow": null, 1398 | "overflow_x": null, 1399 | "overflow_y": null, 1400 | "padding": null, 1401 | "right": null, 1402 | "top": null, 1403 | "visibility": null, 1404 | "width": null 1405 | } 1406 | }, 1407 | "a27c462034c7470d84f5d012cedc5bbe": { 1408 | "model_module": "@jupyter-widgets/base", 1409 | "model_name": "LayoutModel", 1410 | "state": { 1411 | "_model_module": "@jupyter-widgets/base", 1412 | "_model_module_version": "1.2.0", 1413 | "_model_name": "LayoutModel", 1414 | "_view_count": null, 1415 | "_view_module": "@jupyter-widgets/base", 1416 | "_view_module_version": "1.2.0", 1417 | "_view_name": "LayoutView", 1418 | "align_content": null, 1419 | "align_items": null, 1420 | "align_self": null, 1421 | "border": null, 1422 | "bottom": null, 1423 | "display": null, 1424 | "flex": null, 1425 | "flex_flow": null, 1426 | "grid_area": null, 1427 | "grid_auto_columns": null, 1428 | "grid_auto_flow": null, 1429 | "grid_auto_rows": null, 1430 | "grid_column": null, 1431 | "grid_gap": null, 1432 | "grid_row": null, 1433 | "grid_template_areas": null, 1434 | "grid_template_columns": null, 1435 | "grid_template_rows": null, 1436 | "height": null, 1437 | "justify_content": null, 1438 | "justify_items": null, 1439 | "left": null, 1440 | "margin": null, 1441 | "max_height": null, 1442 | "max_width": null, 1443 | "min_height": null, 1444 | "min_width": null, 1445 | "object_fit": null, 1446 | "object_position": null, 1447 | "order": null, 1448 | "overflow": null, 1449 | "overflow_x": null, 1450 | "overflow_y": null, 1451 | "padding": null, 1452 | "right": null, 1453 | "top": null, 1454 | "visibility": null, 1455 | "width": null 1456 | } 1457 | }, 1458 | "a5c5a5b9563f4f2faa361dcb718922e6": { 1459 | "model_module": "@jupyter-widgets/controls", 1460 | "model_name": "HTMLModel", 1461 | "state": { 1462 | "_dom_classes": [], 1463 | "_model_module": "@jupyter-widgets/controls", 1464 | "_model_module_version": "1.5.0", 1465 | "_model_name": "HTMLModel", 1466 | "_view_count": null, 1467 | "_view_module": "@jupyter-widgets/controls", 1468 | "_view_module_version": "1.5.0", 1469 | "_view_name": "HTMLView", 1470 | "description": "", 1471 | "description_tooltip": null, 1472 | "layout": "IPY_MODEL_b091ac212c524a5dba947c6b73001f19", 1473 | "placeholder": "​", 1474 | "style": "IPY_MODEL_1607eab87ecb4f78860dc75f835dad18", 1475 | "value": " 10/10 [00:11<00:00, 1.15s/it]" 1476 | } 1477 | }, 1478 | "aa547a263c2a41678c434774b090d655": { 1479 | "model_module": "@jupyter-widgets/base", 1480 | "model_name": "LayoutModel", 1481 | "state": { 1482 | "_model_module": "@jupyter-widgets/base", 1483 | "_model_module_version": "1.2.0", 1484 | "_model_name": "LayoutModel", 1485 | "_view_count": null, 1486 | "_view_module": "@jupyter-widgets/base", 1487 | "_view_module_version": "1.2.0", 1488 | "_view_name": "LayoutView", 1489 | "align_content": null, 1490 | "align_items": null, 1491 | "align_self": null, 1492 | "border": null, 1493 | "bottom": null, 1494 | "display": null, 1495 | "flex": null, 1496 | "flex_flow": null, 1497 | "grid_area": null, 1498 | "grid_auto_columns": null, 1499 | "grid_auto_flow": null, 1500 | "grid_auto_rows": null, 1501 | "grid_column": null, 1502 | "grid_gap": null, 1503 | "grid_row": null, 1504 | "grid_template_areas": null, 1505 | "grid_template_columns": null, 1506 | "grid_template_rows": null, 1507 | "height": null, 1508 | "justify_content": null, 1509 | "justify_items": null, 1510 | "left": null, 1511 | "margin": null, 1512 | "max_height": null, 1513 | "max_width": null, 1514 | "min_height": null, 1515 | "min_width": null, 1516 | "object_fit": null, 1517 | "object_position": null, 1518 | "order": null, 1519 | "overflow": null, 1520 | "overflow_x": null, 1521 | "overflow_y": null, 1522 | "padding": null, 1523 | "right": null, 1524 | "top": null, 1525 | "visibility": null, 1526 | "width": null 1527 | } 1528 | }, 1529 | "b091ac212c524a5dba947c6b73001f19": { 1530 | "model_module": "@jupyter-widgets/base", 1531 | "model_name": "LayoutModel", 1532 | "state": { 1533 | "_model_module": "@jupyter-widgets/base", 1534 | "_model_module_version": "1.2.0", 1535 | "_model_name": "LayoutModel", 1536 | "_view_count": null, 1537 | "_view_module": "@jupyter-widgets/base", 1538 | "_view_module_version": "1.2.0", 1539 | "_view_name": "LayoutView", 1540 | "align_content": null, 1541 | "align_items": null, 1542 | "align_self": null, 1543 | "border": null, 1544 | "bottom": null, 1545 | "display": null, 1546 | "flex": null, 1547 | "flex_flow": null, 1548 | "grid_area": null, 1549 | "grid_auto_columns": null, 1550 | "grid_auto_flow": null, 1551 | "grid_auto_rows": null, 1552 | "grid_column": null, 1553 | "grid_gap": null, 1554 | "grid_row": null, 1555 | "grid_template_areas": null, 1556 | "grid_template_columns": null, 1557 | "grid_template_rows": null, 1558 | "height": null, 1559 | "justify_content": null, 1560 | "justify_items": null, 1561 | "left": null, 1562 | "margin": null, 1563 | "max_height": null, 1564 | "max_width": null, 1565 | "min_height": null, 1566 | "min_width": null, 1567 | "object_fit": null, 1568 | "object_position": null, 1569 | "order": null, 1570 | "overflow": null, 1571 | "overflow_x": null, 1572 | "overflow_y": null, 1573 | "padding": null, 1574 | "right": null, 1575 | "top": null, 1576 | "visibility": null, 1577 | "width": null 1578 | } 1579 | }, 1580 | "c57815b4b98948c2a03ce20cc982b870": { 1581 | "model_module": "@jupyter-widgets/controls", 1582 | "model_name": "HTMLModel", 1583 | "state": { 1584 | "_dom_classes": [], 1585 | "_model_module": "@jupyter-widgets/controls", 1586 | "_model_module_version": "1.5.0", 1587 | "_model_name": "HTMLModel", 1588 | "_view_count": null, 1589 | "_view_module": "@jupyter-widgets/controls", 1590 | "_view_module_version": "1.5.0", 1591 | "_view_name": "HTMLView", 1592 | "description": "", 1593 | "description_tooltip": null, 1594 | "layout": "IPY_MODEL_aa547a263c2a41678c434774b090d655", 1595 | "placeholder": "​", 1596 | "style": "IPY_MODEL_d822ba4957034079bc05e4120d692346", 1597 | "value": " 40/40 [00:11<00:00, 3.59it/s, correct=75.54%, skipped=24.73%]" 1598 | } 1599 | }, 1600 | "d822ba4957034079bc05e4120d692346": { 1601 | "model_module": "@jupyter-widgets/controls", 1602 | "model_name": "DescriptionStyleModel", 1603 | "state": { 1604 | "_model_module": "@jupyter-widgets/controls", 1605 | "_model_module_version": "1.5.0", 1606 | "_model_name": "DescriptionStyleModel", 1607 | "_view_count": null, 1608 | "_view_module": "@jupyter-widgets/base", 1609 | "_view_module_version": "1.2.0", 1610 | "_view_name": "StyleView", 1611 | "description_width": "" 1612 | } 1613 | }, 1614 | "d9d66e46a75d4e16857283d32cee6814": { 1615 | "model_module": "@jupyter-widgets/controls", 1616 | "model_name": "HBoxModel", 1617 | "state": { 1618 | "_dom_classes": [], 1619 | "_model_module": "@jupyter-widgets/controls", 1620 | "_model_module_version": "1.5.0", 1621 | "_model_name": "HBoxModel", 1622 | "_view_count": null, 1623 | "_view_module": "@jupyter-widgets/controls", 1624 | "_view_module_version": "1.5.0", 1625 | "_view_name": "HBoxView", 1626 | "box_style": "", 1627 | "children": [ 1628 | "IPY_MODEL_f86af93bbc184ec5a1a5371298586f88", 1629 | "IPY_MODEL_a5c5a5b9563f4f2faa361dcb718922e6" 1630 | ], 1631 | "layout": "IPY_MODEL_6fcaf7490d86437b9cfd5abe976177ac" 1632 | } 1633 | }, 1634 | "db1b375f107249f1a323b72444bf2467": { 1635 | "model_module": "@jupyter-widgets/controls", 1636 | "model_name": "ProgressStyleModel", 1637 | "state": { 1638 | "_model_module": "@jupyter-widgets/controls", 1639 | "_model_module_version": "1.5.0", 1640 | "_model_name": "ProgressStyleModel", 1641 | "_view_count": null, 1642 | "_view_module": "@jupyter-widgets/base", 1643 | "_view_module_version": "1.2.0", 1644 | "_view_name": "StyleView", 1645 | "bar_color": null, 1646 | "description_width": "initial" 1647 | } 1648 | }, 1649 | "edc5723524b046f6a3d658bc02e0e2da": { 1650 | "model_module": "@jupyter-widgets/controls", 1651 | "model_name": "FloatProgressModel", 1652 | "state": { 1653 | "_dom_classes": [], 1654 | "_model_module": "@jupyter-widgets/controls", 1655 | "_model_module_version": "1.5.0", 1656 | "_model_name": "FloatProgressModel", 1657 | "_view_count": null, 1658 | "_view_module": "@jupyter-widgets/controls", 1659 | "_view_module_version": "1.5.0", 1660 | "_view_name": "ProgressView", 1661 | "bar_style": "success", 1662 | "description": "100%", 1663 | "description_tooltip": null, 1664 | "layout": "IPY_MODEL_53341e5401b24f5c8095cb92596c9fea", 1665 | "max": 40, 1666 | "min": 0, 1667 | "orientation": "horizontal", 1668 | "style": "IPY_MODEL_db1b375f107249f1a323b72444bf2467", 1669 | "value": 40 1670 | } 1671 | }, 1672 | "f3c6e14112e548a1822af3a378809201": { 1673 | "model_module": "@jupyter-widgets/controls", 1674 | "model_name": "HBoxModel", 1675 | "state": { 1676 | "_dom_classes": [], 1677 | "_model_module": "@jupyter-widgets/controls", 1678 | "_model_module_version": "1.5.0", 1679 | "_model_name": "HBoxModel", 1680 | "_view_count": null, 1681 | "_view_module": "@jupyter-widgets/controls", 1682 | "_view_module_version": "1.5.0", 1683 | "_view_name": "HBoxView", 1684 | "box_style": "", 1685 | "children": [ 1686 | "IPY_MODEL_edc5723524b046f6a3d658bc02e0e2da", 1687 | "IPY_MODEL_c57815b4b98948c2a03ce20cc982b870" 1688 | ], 1689 | "layout": "IPY_MODEL_6e3c6a76053d44a5872991d8741fb1e4" 1690 | } 1691 | }, 1692 | "f86af93bbc184ec5a1a5371298586f88": { 1693 | "model_module": "@jupyter-widgets/controls", 1694 | "model_name": "FloatProgressModel", 1695 | "state": { 1696 | "_dom_classes": [], 1697 | "_model_module": "@jupyter-widgets/controls", 1698 | "_model_module_version": "1.5.0", 1699 | "_model_name": "FloatProgressModel", 1700 | "_view_count": null, 1701 | "_view_module": "@jupyter-widgets/controls", 1702 | "_view_module_version": "1.5.0", 1703 | "_view_name": "ProgressView", 1704 | "bar_style": "success", 1705 | "description": "100%", 1706 | "description_tooltip": null, 1707 | "layout": "IPY_MODEL_a27c462034c7470d84f5d012cedc5bbe", 1708 | "max": 10, 1709 | "min": 0, 1710 | "orientation": "horizontal", 1711 | "style": "IPY_MODEL_4be8b62f37ab47c68a5531e18fc1dc7a", 1712 | "value": 10 1713 | } 1714 | } 1715 | } 1716 | } 1717 | }, 1718 | "nbformat": 4, 1719 | "nbformat_minor": 1 1720 | } 1721 | -------------------------------------------------------------------------------- /practicos/README.md: -------------------------------------------------------------------------------- 1 | ## Prácticos Sistemas Recomendadores IIC3633 2 | 3 | 4 | **Links ayudantías grabadas** 5 | 6 | [17 de Agosto](https://drive.google.com/file/d/1TGUKYi-jV7vJ5ns27pEMYyOqagQcfh_1/view?usp=sharing) 7 | 8 | [24 de Agosto](https://drive.google.com/file/d/1JsA1d5ZiS4IVNTMgDO1B2_gwnCIKhbzD/view?usp=sharing) 9 | 10 | [13 de Septiembre](https://drive.google.com/file/d/1x0iGCrcLvLBTch_cAeqnD17K3DeSKUhd/view) 11 | 12 | [21 de Septiembre ](https://drive.google.com/file/d/1LeCCUQPNkRA9RFbw4N4nnaRgr4ZKPUjD/view) 13 | 14 | [27 de Octubre ](https://drive.google.com/file/d/171kVEF-etFr2NN7VaB3pbv_l_kcr2WaK/view?usp=sharing) 15 | 16 | ------------------------------------ 17 | 18 | A continuación encontrarán la lista de los prácticos que deberán realizar durante este semestre. 19 | 20 | | # | Práctico | Semana | Fecha Ayudantía | Encargado | Video | 21 | |----|-------------------------------------------------------------------------------------------------------------------------------------------|--------|-----------------|--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 22 | | 1 | [Most Popular y Item Average Rating](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_MostPopular.ipynb) | 1 | 17 Agosto | Andrés C. | Open Video | 23 | | 2 | [User KNN](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_uKNN.ipynb) | 1 | 17 Agosto | Manuel C. | Open Video | 24 | | 3 | [Slope One](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_SlopeOne.ipynb) | 2 | 24 Agosto | Francisca C. | Open Video | 25 | | 4 | [Item KNN](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_iKNN.ipynb) | 2 | 24 Agosto | Andrés V. | Open Video | 26 | | 5 | [Funk SVD](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_FunkSVD.ipynb) | 2 | 24 Agosto | Vladimir A. | Open Video | 27 | | 6 | [Implicit Feedback y BPR](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Implicit_feedback.ipynb) | 3 | 31 Agosto | Andrés C. | Open Video | 28 | | 7 | [Content-based (Texto)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Content_Based_texto.ipynb) | 4 | 7 Septiembre | Andrés C. | Open Video | 29 | | 8 | [Content-based (Imágenes)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Content_Based_imagenes.ipynb) | 4 | 7 Septiembre | Andrés C. | Open Video | 30 | | 9 | [Factorization Machines](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/FastFM_factorization_machines.ipynb) | 5 | 14 Septiembre | Andrés C. | Open Video | 31 | | 10 | [Reinforcement Learning](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Reinforcement_Learning_Recsim.ipynb) | 10 | N/D | Manuel C. | Open Video | 32 | | 11 | [CF con Deep Learning](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/MultiVAE_Practico.ipynb) | 11 | 19 Octubre | Andrés V. | Open Video | 33 | | 12 | [Recomendación Secuencial](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/GRU4Rec.ipynb) | 12 | 26 Octubre | Vladimir A. | Open Video | 34 | | 13 | [Deep learning Avanzado (no se responde)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Ayudantia_TIRLol.ipynb) | 14 | 2 Noviembre | Andrés V. | Open Video | 35 | -------------------------------------------------------------------------------- /practicos/Reinforcement_Learning_Recsim.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Recsim_Tutorial_IIC3633-2_2020.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "n9PTfi4n7oS6" 20 | }, 21 | "source": [ 22 | "# Práctico Recsim\n", 23 | "\n", 24 | "Adaptado de los tutoriales disponibles en: https://github.com/google-research/recsim por Manuel Cartagena." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "id": "WeT1yUJSOjDh" 31 | }, 32 | "source": [ 33 | "# Install Recsim\n", 34 | "!pip install --upgrade --no-cache-dir recsim" 35 | ], 36 | "execution_count": null, 37 | "outputs": [] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "WVOrpYsfi_kq" 43 | }, 44 | "source": [ 45 | "## Reinforcement Learning\n", 46 | "\n", 47 | "![RL setup](https://github.com/bamine/recsys-summer-school/raw/12e57cc4fd1cb26164d2beebf3ca29ebe2eab960/notebooks/images/rl-setup.png)\n", 48 | "\n", 49 | "\n", 50 | "## Tipos de interacción\n", 51 | "\n", 52 | "![texto alternativo](https://github.com/bamine/recsys-summer-school/raw/12e57cc4fd1cb26164d2beebf3ca29ebe2eab960/notebooks/images/organic-bandit.png)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "id": "5hhwKK1fOqq1" 59 | }, 60 | "source": [ 61 | "## Importar paquetes necesarios" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "metadata": { 67 | "id": "OpCXt6tkYD_w" 68 | }, 69 | "source": [ 70 | "import functools\n", 71 | "import numpy as np\n", 72 | "from gym import spaces\n", 73 | "import matplotlib.pyplot as plt\n", 74 | "from scipy import stats\n", 75 | "\n", 76 | "from recsim import document\n", 77 | "from recsim import user\n", 78 | "from recsim.choice_model import MultinomialLogitChoiceModel\n", 79 | "from recsim.simulator import environment\n", 80 | "from recsim.simulator import recsim_gym\n", 81 | "from recsim.simulator import runner_lib\n", 82 | "\n", 83 | "import tensorflow as tf\n", 84 | "tf.compat.v1.disable_eager_execution()" 85 | ], 86 | "execution_count": null, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "id": "94GLvAnijJIm" 93 | }, 94 | "source": [ 95 | "## Recsim\n", 96 | "![RecSim implementation](https://github.com/google-research/recsim/blob/master/recsim/colab/figures/simulator_implemented.png?raw=true)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": { 102 | "id": "t8cuzWjdj3oz" 103 | }, 104 | "source": [ 105 | "# Resumen\n", 106 | "\n", 107 | "Un paso en la simulación de Recsim consiste en:\n", 108 | "\n", 109 | "\n", 110 | "1. La Base de Datos de Documentos (items) provee un corpus de *D* documentos al recomendador.\n", 111 | "2. El recomendador observa los *D* documentos (y sus features) junto a las respuestas del usuario para la última recomendación. Luego hace una selección ordenada de *k* documentos para presentárselos al usuario.\n", 112 | "3. El usuario examina la lista y escoge a lo más un documento (no escoger uno es una opción). Esto genera una transición del estado del usuario. Finalmente el usuario emite una observación del documento, que en la siguiente iteración el recomendador podrá ver." 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "id": "fT2pJkTf3Io7" 119 | }, 120 | "source": [ 121 | "# Escenario de la simulación: Videos de Memes vs Educativos\n", 122 | "\n", 123 | "Los documentos de nuestro corpus corresponderan a items (en este caso videos) que se caracterizan por su grado de educativo o de meme. Documentos \"meme\" generan alto compromiso (**engagement**), pero _hipotéticamente_ el consumo a largo plazo de estos documentos lleva a disminuir la satisfacción del usuario. Por otro lado, documentus educativos generan relativamente bajo engagement, pero su consumo conlleva a una mayor satisfacción a largo plazo. Modelaremos esta propiedad de los documentos como una feature continua que puede tomar valores entre [0,1], le llamaremos Educativeness-scale. Un documento con score 1 es totalmente educativo, mientras que un document con score 0 es totalmente meme.\n", 124 | "\n", 125 | "El estado latente del usuario consiste en una variable de dimensión 1 llamada *satisfacción*. Cada vez que consume un documento \"educativo\", esta variable tiende a incrementar, y opuestamente, un documento meme tiende a disminuir la satisfacción.\n", 126 | "\n", 127 | "Al consumir un documento, el usuario emite una medida estocástica del engagement (tiempo que ve el video) sobre el documento. Este valor es proporcional a la satisfacción del usuario e inversamente proporcional a la educatividad del documento en cuestión.\n", 128 | "\n", 129 | "Por lo tanto, el objetivo es encontrar el mix óptimo de documentos para mantener el engagement del usuario por un período largo de tiempo." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "id": "gDdSxkJjBmN5" 136 | }, 137 | "source": [ 138 | "## Document" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": { 144 | "id": "Ifnex8kHBrZx" 145 | }, 146 | "source": [ 147 | "### Model" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": { 153 | "id": "wsSuXHgNOyvl" 154 | }, 155 | "source": [ 156 | "Clase que define los documentos, LTS es una abreviación de Long Term Satisfaction" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "metadata": { 162 | "id": "-0zLUsmXBcM-" 163 | }, 164 | "source": [ 165 | "class LTSDocument(document.AbstractDocument):\n", 166 | " def __init__(self, doc_id, educativeness, cluster_id):\n", 167 | " self.educativeness = educativeness\n", 168 | " self.cluster_id = cluster_id\n", 169 | " # doc_id es un ID unico para el documento\n", 170 | " super(LTSDocument, self).__init__(doc_id)\n", 171 | "\n", 172 | " NUM_CLUSTERS = 4\n", 173 | "\n", 174 | " # Una observación son los valores públicos del documento\n", 175 | " def create_observation(self):\n", 176 | " return {'educativeness': np.array(self.educativeness), 'cluster_id': self.cluster_id}\n", 177 | "\n", 178 | " # El espacio de la observación utiliza la el estándar del gym de OpenAI: https://gym.openai.com/docs/#spaces\n", 179 | " @classmethod\n", 180 | " def observation_space(self):\n", 181 | " return spaces.Dict({\n", 182 | " 'educativeness': spaces.Box(shape=(1,), dtype=np.float32, low=0.0, high=1.0),\n", 183 | " 'cluster_id': spaces.Discrete(self.NUM_CLUSTERS)\n", 184 | " })\n", 185 | " \n", 186 | " # Método para definir cómo se imprime un documento\n", 187 | " def __str__(self):\n", 188 | " return \"Document {} from cluster {} with educativeness {}.\".format(self._doc_id, self.cluster_id, self.educativeness)" 189 | ], 190 | "execution_count": null, 191 | "outputs": [] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "id": "P7aiBraXBpH2" 197 | }, 198 | "source": [ 199 | "### Sampler" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": { 205 | "id": "SVFC_Z5oPfGL" 206 | }, 207 | "source": [ 208 | "Un Sampler es una clase que creará una instancia del objeto en cuestión, en este caso para los documentos" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "metadata": { 214 | "id": "tHM9XL-1Bc1i" 215 | }, 216 | "source": [ 217 | "class LTSDocumentSampler(document.AbstractDocumentSampler):\n", 218 | " def __init__(self, doc_ctor=LTSDocument, **kwargs):\n", 219 | " super(LTSDocumentSampler, self).__init__(doc_ctor, **kwargs)\n", 220 | " self._doc_count = 0\n", 221 | "\n", 222 | " def sample_document(self):\n", 223 | " doc_features = {}\n", 224 | " doc_features['doc_id'] = self._doc_count\n", 225 | " doc_features['educativeness'] = self._rng.random_sample()\n", 226 | " doc_features['cluster_id'] = self._rng.choice(self._doc_ctor.NUM_CLUSTERS)\n", 227 | " self._doc_count += 1\n", 228 | " return self._doc_ctor(**doc_features)" 229 | ], 230 | "execution_count": null, 231 | "outputs": [] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "id": "S02-Non-PqXD" 237 | }, 238 | "source": [ 239 | "Ejemplo de sampleo de documentos" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "metadata": { 245 | "id": "HZHSsTUPy51Y" 246 | }, 247 | "source": [ 248 | "sampler = LTSDocumentSampler()\n", 249 | "for i in range(5): print(sampler.sample_document())\n", 250 | "d = sampler.sample_document()\n", 251 | "print(\"Documents have observation space:\", d.observation_space(), \"\\n\"\n", 252 | " \"An example realization is: \", d.create_observation())" 253 | ], 254 | "execution_count": null, 255 | "outputs": [] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "id": "qW_jfP8-BxJJ" 261 | }, 262 | "source": [ 263 | "## User" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": { 269 | "id": "7fU1KAmX8GeU" 270 | }, 271 | "source": [ 272 | "El modelo de usuario para este tutorial es:\n", 273 | "* Cada usuario tiene una feature llamada net educativeness exposure ($\\text{nee}_t$), y satisfacción ($\\text{sat}_t$). Están relacionadas mediante una función logística para reflejar que la satisfacción no puede no tener un límite.\n", 274 | "$$\\text{sat}_t = \\sigma(\\tau\\cdot\\text{nee}_t),$$\n", 275 | "donde $\\tau$ es un parámetro de sensitividad específico por usuario.\n", 276 | "* Dado un slate $S$, el usuario escoge un item basado en un modelo de decisión multinomial con la educativeness como feature: $p(\\text{usuario escoja }d_i \\text{ del slate }S) \\sim e^{1-\\mathrm{educativeness}(d_i)}$\n", 277 | "* Una vez el usuario escoge un documento, la net educativeness exposure evoluciona de la manera:\n", 278 | "$$\\text{nee}_{t+1} = \\beta \\cdot \\text{nee}_t + 2(k_d - 1/2) + {\\cal N}(0, \\eta),$$\n", 279 | "donde $\\beta$ es un factor específico por usuario que llamaremos memory discount (factor de olvido), $k_d$ es la educativeness del documento escogido y $\\eta$ es ruido proveniente de una distribución normal que llamaremos innovación (innovation).\n", 280 | "* Finalmente, el usuario interactúa con el contenido escogido por $s_d$ segundos, donde $s_d$ es sacado de alguna distribución\n", 281 | "$$s_d\\sim\\log{\\cal N}(k_d\\mu_k + (1-k_d)\\mu_c, k_d\\sigma_k + (1-k_d)\\sigma_c),$$\n", 282 | "por ejemplo, una distribución log-normal con interpolando linealmente entre una respuesta puramente educativa $(\\mu_k, \\sigma_k)$ y una respuesta puramente meme $(\\mu_c, \\sigma_c)$.\n", 283 | "\n", 284 | "De acuerdo a esto, el estado de un usuario está definido por la tupla $(\\text{sat}, \\tau, \\beta, \\eta, \\mu_k, \\sigma_k, \\mu_c, \\sigma_c).$ La satisfacción es la única variable dinámica del estado.\n", 285 | "\n" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "id": "tAqJN4J1BzPH" 292 | }, 293 | "source": [ 294 | "### State" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": { 300 | "id": "Uzd5qrcdPtgw" 301 | }, 302 | "source": [ 303 | "Esta clase maneja el estado del usuario durante una simulación, tanto las variables públicas como privadas de este durante el tiempo." 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "metadata": { 309 | "id": "wPMnZJGyBe3U" 310 | }, 311 | "source": [ 312 | "class LTSUserState(user.AbstractUserState):\n", 313 | " def __init__(self, memory_discount, sensitivity, innovation_stddev,\n", 314 | " meme_mean, meme_stddev, educ_mean, educ_stddev,\n", 315 | " net_educativeness_exposure, time_budget, observation_noise_stddev=0.1\n", 316 | " ):\n", 317 | " ## Transition model parameters\n", 318 | " self.memory_discount = memory_discount\n", 319 | " self.sensitivity = sensitivity\n", 320 | " self.innovation_stddev = innovation_stddev\n", 321 | "\n", 322 | " ## Engagement parameters\n", 323 | " self.meme_mean = meme_mean\n", 324 | " self.meme_stddev = meme_stddev\n", 325 | " self.educ_mean = educ_mean\n", 326 | " self.educ_stddev = educ_stddev\n", 327 | "\n", 328 | " ## State variables\n", 329 | " self.net_educativeness_exposure = net_educativeness_exposure\n", 330 | " self.satisfaction = 1 / (1 + np.exp(-sensitivity * net_educativeness_exposure))\n", 331 | " self.time_budget = time_budget\n", 332 | "\n", 333 | " # Noise\n", 334 | " self._observation_noise = observation_noise_stddev\n", 335 | "\n", 336 | " # Al igual que con los documentos, se retorna la observación del estado del usuario, en este caso lo único público es su satisfacción\n", 337 | " def create_observation(self):\n", 338 | " \"\"\"User's state is not observable.\"\"\"\n", 339 | " clip_low, clip_high = (-1.0 / (1.0 * self._observation_noise),\n", 340 | " 1.0 / (1.0 * self._observation_noise))\n", 341 | " noise = stats.truncnorm(\n", 342 | " clip_low, clip_high, loc=0.0, scale=self._observation_noise).rvs()\n", 343 | " noisy_sat = self.satisfaction + noise\n", 344 | " return np.array([noisy_sat,])\n", 345 | "\n", 346 | " # También hay que definir el espacio de las variables que se retornen de una observación\n", 347 | " @staticmethod\n", 348 | " def observation_space():\n", 349 | " return spaces.Box(shape=(1,), dtype=np.float32, low=-2.0, high=2.0)\n", 350 | " \n", 351 | " # Función de score para usar en el modelo de selección del usuario: en este caso el usuario tenderá a elegir más contenido de memes\n", 352 | " def score_document(self, doc_obs):\n", 353 | " return 1 - doc_obs['educativeness']\n" 354 | ], 355 | "execution_count": null, 356 | "outputs": [] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": { 361 | "id": "V21OxBX0B3nH" 362 | }, 363 | "source": [ 364 | "### Sampler" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": { 370 | "id": "WA92tNtyQd_t" 371 | }, 372 | "source": [ 373 | "Clase que sampleará los usuarios para la simulación, en este caso hay muchos parámetros que quedarán hardcodeados, pero se puede hacer dinámico." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "metadata": { 379 | "id": "E4NYbjnEB3Fu" 380 | }, 381 | "source": [ 382 | "class LTSStaticUserSampler(user.AbstractUserSampler):\n", 383 | " _state_parameters = None\n", 384 | "\n", 385 | " def __init__(self,\n", 386 | " user_ctor=LTSUserState,\n", 387 | " memory_discount=0.9,\n", 388 | " sensitivity=0.01,\n", 389 | " innovation_stddev=0.05,\n", 390 | " meme_mean=5.0,\n", 391 | " meme_stddev=1.0,\n", 392 | " educ_mean=4.0,\n", 393 | " educ_stddev=1.0,\n", 394 | " time_budget=60,\n", 395 | " **kwargs):\n", 396 | " self._state_parameters = {'memory_discount': memory_discount,\n", 397 | " 'sensitivity': sensitivity,\n", 398 | " 'innovation_stddev': innovation_stddev,\n", 399 | " 'meme_mean': meme_mean,\n", 400 | " 'meme_stddev': meme_stddev,\n", 401 | " 'educ_mean': educ_mean,\n", 402 | " 'educ_stddev': educ_stddev,\n", 403 | " 'time_budget': time_budget\n", 404 | " }\n", 405 | " super(LTSStaticUserSampler, self).__init__(user_ctor, **kwargs)\n", 406 | "\n", 407 | " def sample_user(self):\n", 408 | " starting_nee = ((self._rng.random_sample() - .5) *\n", 409 | " (1 / (1.0 - self._state_parameters['memory_discount'])))\n", 410 | " self._state_parameters['net_educativeness_exposure'] = starting_nee\n", 411 | " return self._user_ctor(**self._state_parameters)" 412 | ], 413 | "execution_count": null, 414 | "outputs": [] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": { 419 | "id": "7x9OsvaqB9Pg" 420 | }, 421 | "source": [ 422 | "### Response" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": { 428 | "id": "MWq6dHamQob5" 429 | }, 430 | "source": [ 431 | "Clase que define como es la respuesta de un usuario al interactuar con un documento." 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "metadata": { 437 | "id": "pwvLS9wrB5Pu" 438 | }, 439 | "source": [ 440 | "class LTSResponse(user.AbstractResponse):\n", 441 | " # The maximum degree of engagement.\n", 442 | " MAX_ENGAGEMENT_MAGNITUDE = 100.0\n", 443 | "\n", 444 | " def __init__(self, cluster_id, clicked=False, engagement=0.0):\n", 445 | " self.clicked = clicked\n", 446 | " self.engagement = engagement\n", 447 | " self.cluster_id = cluster_id\n", 448 | "\n", 449 | " # Se crea la observación: si dió o no click, cuanto tiempo vió el item y a que cluster pertenece.\n", 450 | " def create_observation(self):\n", 451 | " return {'click': int(self.clicked),\n", 452 | " 'engagement': np.array(self.engagement),\n", 453 | " 'cluster_id': self.cluster_id}\n", 454 | "\n", 455 | " # Se define el espacio de estas variables\n", 456 | " @classmethod\n", 457 | " def response_space(cls):\n", 458 | " # `engagement` feature range is [0, MAX_ENGAGEMENT_MAGNITUDE]\n", 459 | " return spaces.Dict({\n", 460 | " 'click':\n", 461 | " spaces.Discrete(2),\n", 462 | " 'engagement':\n", 463 | " spaces.Box(\n", 464 | " low=0.0,\n", 465 | " high=cls.MAX_ENGAGEMENT_MAGNITUDE,\n", 466 | " shape=tuple(),\n", 467 | " dtype=np.float32),\n", 468 | " 'cluster_id':\n", 469 | " spaces.Discrete(4)\n", 470 | " })" 471 | ], 472 | "execution_count": null, 473 | "outputs": [] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": { 478 | "id": "t_niBbg0NuId" 479 | }, 480 | "source": [ 481 | "### Model" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": { 487 | "id": "32AW3hr9Q7VG" 488 | }, 489 | "source": [ 490 | "Finalmente se define el modelo del usuario, el cual se compone por las clases definidas anteriormente" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "metadata": { 496 | "id": "VfXKd4nZCDvZ" 497 | }, 498 | "source": [ 499 | "class LTSUserModel(user.AbstractUserModel):\n", 500 | " def __init__(self, slate_size, seed=0):\n", 501 | " super(LTSUserModel, self).__init__(LTSResponse, LTSStaticUserSampler(LTSUserState, seed=seed), slate_size)\n", 502 | " self.choice_model = MultinomialLogitChoiceModel({})\n", 503 | " \n", 504 | " def is_terminal(self):\n", 505 | " # Retorna un boolean si la sesión se terminó, ya que el user tiene una variable de tiempo disponible (time_budget)\n", 506 | " return self._user_state.time_budget <= 0\n", 507 | "\n", 508 | " def simulate_response(self, slate_documents):\n", 509 | " # Lista con respuestas vacías a partir del slate\n", 510 | " responses = [self._response_model_ctor(d.cluster_id) for d in slate_documents]\n", 511 | " # Se usa el choice_model del user para saber a qué documento le hace click\n", 512 | " self.choice_model.score_documents(self._user_state,\n", 513 | " [doc.create_observation() for doc in slate_documents])\n", 514 | " scores = self.choice_model.scores\n", 515 | " selected_index = self.choice_model.choose_item()\n", 516 | " # Se genera la respuesta para el item que se clickeó\n", 517 | " self.generate_response(slate_documents[selected_index],\n", 518 | " responses[selected_index])\n", 519 | " return responses\n", 520 | "\n", 521 | " def generate_response(self, doc, response):\n", 522 | " response.clicked = True\n", 523 | " # Se interpola linealmente entre meme y educativo\n", 524 | " engagement_loc = (doc.educativeness * self._user_state.meme_mean + (1 - doc.educativeness) * self._user_state.educ_mean)\n", 525 | " engagement_loc *= self._user_state.satisfaction\n", 526 | " engagement_scale = (doc.educativeness * self._user_state.meme_stddev + ((1 - doc.educativeness) * self._user_state.educ_stddev))\n", 527 | " log_engagement = np.random.normal(loc=engagement_loc,\n", 528 | " scale=engagement_scale)\n", 529 | " response.engagement = np.exp(log_engagement)\n", 530 | "\n", 531 | " # Función que hace update del estado del usuario\n", 532 | " def update_state(self, slate_documents, responses):\n", 533 | " for doc, response in zip(slate_documents, responses):\n", 534 | " if response.clicked:\n", 535 | " innovation = np.random.normal(scale=self._user_state.innovation_stddev)\n", 536 | " net_educativeness_exposure = (self._user_state.memory_discount * self._user_state.net_educativeness_exposure - 2.0 * (doc.educativeness - 0.5) + innovation)\n", 537 | " self._user_state.net_educativeness_exposure = net_educativeness_exposure\n", 538 | " satisfaction = 1 / (1.0 + np.exp(-self._user_state.sensitivity * net_educativeness_exposure))\n", 539 | " self._user_state.satisfaction = satisfaction\n", 540 | " self._user_state.time_budget -= 1\n", 541 | " return\n" 542 | ], 543 | "execution_count": null, 544 | "outputs": [] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": { 549 | "id": "FG0h-b0eqt3M" 550 | }, 551 | "source": [ 552 | "## Crear environment: parámetros\n", 553 | "* *slate_size*: Tamaño del set de items a presentar al usuario.\n", 554 | "* *num_candidates*: número de documentos presentes en la base de datos en cualquier momento de la simulación.\n", 555 | "* *resample_documents*: especifica si se vuelven a samplear los documentos desde la base de datos entre episodios de la simulación." 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "metadata": { 561 | "id": "-bUw9z6KB_QL" 562 | }, 563 | "source": [ 564 | "slate_size = 3\n", 565 | "num_candidates = 10\n", 566 | "ltsenv = environment.Environment(\n", 567 | " LTSUserModel(slate_size),\n", 568 | " LTSDocumentSampler(),\n", 569 | " num_candidates,\n", 570 | " slate_size,\n", 571 | " resample_documents=True)\n" 572 | ], 573 | "execution_count": null, 574 | "outputs": [] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": { 579 | "id": "zTzyMHe9rYj2" 580 | }, 581 | "source": [ 582 | "### Parámetro a optimizar: Engagement" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "metadata": { 588 | "id": "ODqAlrjrO2__" 589 | }, 590 | "source": [ 591 | "def clicked_engagement_reward(responses):\n", 592 | " reward = 0.0\n", 593 | " for response in responses:\n", 594 | " if response.clicked:\n", 595 | " reward += response.engagement\n", 596 | " return reward" 597 | ], 598 | "execution_count": null, 599 | "outputs": [] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "metadata": { 604 | "id": "swYsZBh7PAdm" 605 | }, 606 | "source": [ 607 | "# Instanciar environment\n", 608 | "lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)" 609 | ], 610 | "execution_count": null, 611 | "outputs": [] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "metadata": { 616 | "id": "aLrJ4MWSPMyO" 617 | }, 618 | "source": [ 619 | "observation_0 = lts_gym_env.reset()\n", 620 | "print('Observation 0')\n", 621 | "print('Available documents')\n", 622 | "doc_strings = ['doc_id ' + key + \" educativeness \" + str(value) for key, value\n", 623 | " in observation_0['doc'].items()]\n", 624 | "print('\\n'.join(doc_strings))\n", 625 | "print('Noisy user state observation')\n", 626 | "print(observation_0['user'])\n", 627 | "# \"Agente\" recomienda los primeros 3 documentos\n", 628 | "recommendation_slate_0 = [0, 1, 2]\n", 629 | "observation_1, reward, done, _ = lts_gym_env.step(recommendation_slate_0)\n", 630 | "print('Observation 1')\n", 631 | "print('Available documents')\n", 632 | "doc_strings = ['doc_id ' + key + \" educativeness \" + str(value) for key, value\n", 633 | " in observation_1['doc'].items()]\n", 634 | "print('\\n'.join(doc_strings))\n", 635 | "rsp_strings = [str(response) for response in observation_1['response']]\n", 636 | "print('User responses to documents in the slate')\n", 637 | "print('\\n'.join(rsp_strings))\n", 638 | "print('Noisy user state observation')\n", 639 | "print(observation_1['user'])" 640 | ], 641 | "execution_count": null, 642 | "outputs": [] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": { 647 | "id": "6EvnGBRFqgLB" 648 | }, 649 | "source": [ 650 | "## Agent" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "metadata": { 656 | "id": "BII0kzpcPOJY" 657 | }, 658 | "source": [ 659 | "from recsim import agent\n", 660 | "from recsim.agents.layers.abstract_click_bandit import AbstractClickBanditLayer\n", 661 | "from recsim.agents.layers.cluster_click_statistics import ClusterClickStatsLayer" 662 | ], 663 | "execution_count": null, 664 | "outputs": [] 665 | }, 666 | { 667 | "cell_type": "markdown", 668 | "metadata": { 669 | "id": "Fmbbm7otSg42" 670 | }, 671 | "source": [ 672 | "Crearemos un agente simple que ordene los documentos de un tópico de acuerdo a su educativeness" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "metadata": { 678 | "id": "k47hd5pdqkjk" 679 | }, 680 | "source": [ 681 | "class GreedyClusterAgent(agent.AbstractEpisodicRecommenderAgent):\n", 682 | " def __init__(self, observation_space, action_space, cluster_id, pro_educ, **kwargs):\n", 683 | " del observation_space\n", 684 | " super(GreedyClusterAgent, self).__init__(action_space)\n", 685 | " self._cluster_id = cluster_id\n", 686 | " self.pro_educ = pro_educ\n", 687 | "\n", 688 | " def step(self, reward, observation):\n", 689 | " del reward\n", 690 | " my_docs = []\n", 691 | " my_doc_educativeness = []\n", 692 | " for i, doc in enumerate(observation['doc'].values()):\n", 693 | " if doc['cluster_id'] == self._cluster_id:\n", 694 | " my_docs.append(i)\n", 695 | " my_doc_educativeness.append(doc['educativeness'])\n", 696 | " if not bool(my_docs):\n", 697 | " return []\n", 698 | " # Agregamos esta variable booleana para determinar si ordena los documentos de mayor a menor o al revés (algunos agentes preferirán recomendar los memes primero)\n", 699 | " if self.pro_educ:\n", 700 | " sorted_indices = np.argsort(my_doc_educativeness)[::-1]\n", 701 | " else:\n", 702 | " sorted_indices = np.argsort(my_doc_educativeness)\n", 703 | " return list(np.array(my_docs)[sorted_indices])\n" 704 | ], 705 | "execution_count": null, 706 | "outputs": [] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "metadata": { 711 | "id": "IprHtI5TwEGm" 712 | }, 713 | "source": [ 714 | "# Obtenemos el número de tópicos disponibles\n", 715 | "num_topics = LTSDocument.observation_space()['cluster_id'].n\n", 716 | "# Creamos un agente para cada tópico\n", 717 | "base_agent_ctors = [functools.partial(GreedyClusterAgent, cluster_id=i, pro_educ=np.random.choice([True, False], 1)[0]) for i in range(num_topics)]" 718 | ], 719 | "execution_count": null, 720 | "outputs": [] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "metadata": { 725 | "id": "ZUW3In1zwYaa" 726 | }, 727 | "source": [ 728 | "# Recsim posee clases que se pueden usar como \"capas\" en keras o pytorch, aquí usamos AbstractBanditLayer que recibe un conjunto de agents que trata como arms\n", 729 | "bandit_ctor = functools.partial(AbstractClickBanditLayer, arm_base_agent_ctors=base_agent_ctors)\n", 730 | "# Otra capa que se puede usar es ClusterClickStatsLayer la cual le pasa información del número de clicks que ha hecho el usuario a cada cluster\n", 731 | "cluster_bandit = ClusterClickStatsLayer(bandit_ctor,\n", 732 | " lts_gym_env.observation_space,\n", 733 | " lts_gym_env.action_space)" 734 | ], 735 | "execution_count": null, 736 | "outputs": [] 737 | }, 738 | { 739 | "cell_type": "markdown", 740 | "metadata": { 741 | "id": "JaV0-YuFUEH1" 742 | }, 743 | "source": [ 744 | "Ejemplo de recomendación hecho por este cluster de bandits" 745 | ] 746 | }, 747 | { 748 | "cell_type": "code", 749 | "metadata": { 750 | "id": "tWqSBCxjw2IP" 751 | }, 752 | "source": [ 753 | "observation0 = lts_gym_env.reset()\n", 754 | "slate = cluster_bandit.begin_episode(observation0)\n", 755 | "print(\"Cluster bandit slate 0:\")\n", 756 | "doc_list = list(observation0['doc'].values())\n", 757 | "for doc_position in slate:\n", 758 | " print(doc_list[doc_position])" 759 | ], 760 | "execution_count": null, 761 | "outputs": [] 762 | }, 763 | { 764 | "cell_type": "markdown", 765 | "metadata": { 766 | "id": "OUA_EiXFUNPg" 767 | }, 768 | "source": [ 769 | "Agregaremos una función que toma los parámetros de la simulación y crea nuestro agente" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "metadata": { 775 | "id": "QWO22Ldm0qYp" 776 | }, 777 | "source": [ 778 | "def create_agent(sess, environment, eval_mode, summary_writer=None):\n", 779 | " kwargs = {\n", 780 | " 'observation_space': environment.observation_space,\n", 781 | " 'action_space': environment.action_space,\n", 782 | " 'summary_writer': summary_writer,\n", 783 | " 'eval_mode': eval_mode,\n", 784 | " }\n", 785 | " return ClusterClickStatsLayer(bandit_ctor, **kwargs)" 786 | ], 787 | "execution_count": null, 788 | "outputs": [] 789 | }, 790 | { 791 | "cell_type": "markdown", 792 | "metadata": { 793 | "id": "08PkwPjI5cf8" 794 | }, 795 | "source": [ 796 | "### Entrenamiento" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "metadata": { 802 | "id": "pPKOmb_-w4Cu" 803 | }, 804 | "source": [ 805 | "tmp_base_dir = '/tmp/recsim/'\n", 806 | "lts_gym_env.reset()\n", 807 | "runner = runner_lib.TrainRunner(\n", 808 | " base_dir=tmp_base_dir,\n", 809 | " create_agent_fn=create_agent,\n", 810 | " env=lts_gym_env,\n", 811 | " episode_log_file=\"\",\n", 812 | " max_training_steps=100,\n", 813 | " num_iterations=20)\n", 814 | "runner.run_experiment()" 815 | ], 816 | "execution_count": null, 817 | "outputs": [] 818 | }, 819 | { 820 | "cell_type": "markdown", 821 | "metadata": { 822 | "id": "oRhd4y1t1OqK" 823 | }, 824 | "source": [ 825 | "## Tensorboard" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "metadata": { 831 | "id": "6WshLZLs1OCI" 832 | }, 833 | "source": [ 834 | "# Load the TensorBoard notebook extension\n", 835 | "%load_ext tensorboard" 836 | ], 837 | "execution_count": null, 838 | "outputs": [] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "metadata": { 843 | "id": "QPVb_LSP002c" 844 | }, 845 | "source": [ 846 | "%tensorboard --logdir=/tmp/recsim/" 847 | ], 848 | "execution_count": null, 849 | "outputs": [] 850 | }, 851 | { 852 | "cell_type": "markdown", 853 | "metadata": { 854 | "id": "BLnC2g6E5ISA" 855 | }, 856 | "source": [ 857 | "# Actividades" 858 | ] 859 | }, 860 | { 861 | "cell_type": "markdown", 862 | "metadata": { 863 | "id": "6hYgL5MN5MSq" 864 | }, 865 | "source": [ 866 | "### Actividad 1:\n", 867 | "\n", 868 | "Entrene por más episodios y describa lo que está ocurriendo con el agente y el usuario." 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "metadata": { 874 | "id": "Enu8Kf565Lfm" 875 | }, 876 | "source": [ 877 | "" 878 | ], 879 | "execution_count": null, 880 | "outputs": [] 881 | }, 882 | { 883 | "cell_type": "markdown", 884 | "metadata": { 885 | "id": "9s0gJp7s5h9a" 886 | }, 887 | "source": [ 888 | "### Actividad 2\n", 889 | "\n", 890 | "Explique con sus palabras cuál es la principal ventaja de utilizar una librería como recsim o recogym para Reinforcement Learning" 891 | ] 892 | }, 893 | { 894 | "cell_type": "code", 895 | "metadata": { 896 | "id": "nFpSI7U-5jl6" 897 | }, 898 | "source": [ 899 | "" 900 | ], 901 | "execution_count": null, 902 | "outputs": [] 903 | }, 904 | { 905 | "cell_type": "markdown", 906 | "metadata": { 907 | "id": "VlWGRYYiUsFR" 908 | }, 909 | "source": [ 910 | "### Actividad 3\n", 911 | "\n", 912 | "¿Cómo se podría mejorar la forma de modelar al usuario?" 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "metadata": { 918 | "id": "DwO4JNR8Uu_A" 919 | }, 920 | "source": [ 921 | "" 922 | ], 923 | "execution_count": null, 924 | "outputs": [] 925 | } 926 | ] 927 | } -------------------------------------------------------------------------------- /practicos/pyRecLab_SlopeOne.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "colab_type": "text", 17 | "id": "view-in-youtube" 18 | }, 19 | "source": [ 20 | "\"Open\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "colab_type": "text", 27 | "id": "0kY7Xfgi84HC" 28 | }, 29 | "source": [ 30 | "# **Práctico Sistemas Recomendadores: pyreclab - Slope One**\n", 31 | "\n", 32 | "En este práctico seguiremos utilizando [pyreclab](https://github.com/gasevi/pyreclab), con el cual estamos aprendiendo distintas técnicas de recomendación. Seguiremos usando la misma base de datos de los prácticos anteriores, para que puedan comparar los métodos y sus implementaciones. Este práctico está acompañado de un [video comentando la actividad](https://youtu.be/A2euuevpYis).\n", 33 | "\n", 34 | "En esta oportunidad exploraremos el recomendador de Pendiente Uno o **Slope One** [1].\n", 35 | "\n", 36 | "**Adaptado y preparado por:** Francisca Cattan 📩 fpcattan@uc.cl\n", 37 | "\n", 38 | "Referencias 📖\n", 39 | "------\n", 40 | "[1] *Lemire, D., & Maclachlan, A. (2005, April). Slope One Predictors for Online Rating-Based Collaborative Filtering. In SDM (Vol. 5, pp. 1-5).*\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "colab_type": "text", 47 | "id": "1s6Ac_Kh9qiH" 48 | }, 49 | "source": [ 50 | "**Nombre**: completa tu nombre aquí :D" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": { 56 | "colab_type": "text", 57 | "id": "Z4Jr74pP-4gt" 58 | }, 59 | "source": [ 60 | "## Actividad 1 👓\n", 61 | "\n", 62 | "Antes de empezar con el práctico, responde la siguiente pregunta con lo visto en clases.\n", 63 | "\n", 64 | "**Pregunta:** Explique cómo funciona Slope One (como modelo teórico, no piense en la implementación). En particular explique:\n", 65 | "\n", 66 | "- Repasemos: ¿Por qué este recomendador es un algoritmo de Filtrado Colaborativo?\n", 67 | "- Este Filtrado Colaborativo, ¿está basado en el usuario o en los items? ¿Por qué?\n", 68 | "- ¿Qué datos recibe Slope One y qué hace con ellos? (qué tipo de columnas y qué calculo)\n", 69 | "- ¿Qué pasaría si se agrega un nuevo rating a la base de datos?\n", 70 | "- Opcional: ¿Cómo crees que le iría al recomendador con un usuario que acaba de entrar al sistema y ha asignado muy pocos ratings?\n", 71 | "\n", 72 | "💡 *Hint: La bibliografía todo lo puede.*\n", 73 | "\n", 74 | "**Respuesta:**\n", 75 | "\n", 76 | "\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": { 82 | "colab_type": "text", 83 | "id": "HaFY1qNzHyCD" 84 | }, 85 | "source": [ 86 | "# **Configuración Inicial**" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "colab_type": "text", 93 | "id": "MvGNsjiIIC6G" 94 | }, 95 | "source": [ 96 | "## Paso 1:\n", 97 | "Descargue directamente a Colab los archivos del dataset ejecutando las siguientes 3 celdas:\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 34, 103 | "metadata": { 104 | "colab": { 105 | "base_uri": "https://localhost:8080/", 106 | "height": 85 107 | }, 108 | "colab_type": "code", 109 | "id": "_7M_ehHq8ti2", 110 | "outputId": "407a5c44-7810-4273-c82c-c3f343621e2b" 111 | }, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 118 | " Dload Upload Total Spent Left Speed\n", 119 | "100 388 0 388 0 0 311 0 --:--:-- 0:00:01 --:--:-- 311\n", 120 | "100 1546k 100 1546k 0 0 914k 0 0:00:01 0:00:01 --:--:-- 914k\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "!curl -L -o \"u1.base\" \"https://drive.google.com/uc?export=download&id=1bGweNw7NbOHoJz11v6ld7ymLR8MLvBsA\"" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 35, 131 | "metadata": { 132 | "colab": { 133 | "base_uri": "https://localhost:8080/", 134 | "height": 85 135 | }, 136 | "colab_type": "code", 137 | "id": "CSdhurW1InVW", 138 | "outputId": "0f573bf7-7a7b-4c6d-f893-25f1d02aa149" 139 | }, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 146 | " Dload Upload Total Spent Left Speed\n", 147 | "100 388 0 388 0 0 675 0 --:--:-- --:--:-- --:--:-- 675\n", 148 | "100 385k 100 385k 0 0 434k 0 --:--:-- --:--:-- --:--:-- 94.2M\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "!curl -L -o \"u1.test\" \"https://drive.google.com/uc?export=download&id=1f_HwJWC_1HFzgAjKAWKwkuxgjkhkXrVg\"" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 36, 159 | "metadata": { 160 | "colab": { 161 | "base_uri": "https://localhost:8080/", 162 | "height": 85 163 | }, 164 | "colab_type": "code", 165 | "id": "QoGzmPG2InzM", 166 | "outputId": "e66b0f74-3405-4b98-b09a-94e398f89647" 167 | }, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 174 | " Dload Upload Total Spent Left Speed\n", 175 | "100 388 0 388 0 0 556 0 --:--:-- --:--:-- --:--:-- 555\n", 176 | "100 230k 100 230k 0 0 193k 0 0:00:01 0:00:01 --:--:-- 225M\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "!curl -L -o \"u.item\" \"https://drive.google.com/uc?export=download&id=10YLhxkO2-M_flQtyo9OYV4nT9IvSESuz\"" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": { 187 | "colab_type": "text", 188 | "id": "kluVF2eJIyjq" 189 | }, 190 | "source": [ 191 | "Los archivos **u1.base** y **u1.test** tienen tuplas {usuario, item, rating, timestamp}, que es la información de preferencias de usuarios sobre películas en una muestra del dataset [movielens](https://grouplens.org/datasets/movielens/)." 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "colab_type": "text", 198 | "id": "uopKG59rJWM9" 199 | }, 200 | "source": [ 201 | "## Paso 2:\n", 202 | "\n", 203 | "Instalamos pyreclab utilizando pip." 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 37, 209 | "metadata": { 210 | "colab": { 211 | "base_uri": "https://localhost:8080/", 212 | "height": 34 213 | }, 214 | "colab_type": "code", 215 | "id": "hi9L5FZdJWfI", 216 | "outputId": "e966155f-6bef-47d1-c124-458905663b38" 217 | }, 218 | "outputs": [ 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "Requirement already up-to-date: pyreclab in /usr/local/lib/python3.6/dist-packages (0.1.14)\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "!pip install pyreclab --upgrade" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": { 234 | "colab_type": "text", 235 | "id": "M28DoBHtN5qD" 236 | }, 237 | "source": [ 238 | "## Paso 3:\n", 239 | "\n", 240 | "Hacemos los imports necesarios para este práctico." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 38, 246 | "metadata": { 247 | "colab": {}, 248 | "colab_type": "code", 249 | "id": "KQ0jQ_JcN3el" 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "import pyreclab\n", 254 | "import numpy as np\n", 255 | "import pandas as pd" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": { 261 | "colab_type": "text", 262 | "id": "bmxNQtXvfJy3" 263 | }, 264 | "source": [ 265 | "# **El dataset**\n", 266 | "\n", 267 | "💡 *En prácticos anteriores, vimos como analizar este dataset. Puedes revisarlos en caso de dudas.*" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": { 273 | "colab_type": "text", 274 | "id": "nAdXLT7haQe1" 275 | }, 276 | "source": [ 277 | "## Paso 4:\n", 278 | "\n", 279 | "Ya que queremos crear una lista de recomendación de items para un usuario en especifico, necesitamos obtener información adicional de cada película tal como título, fecha de lanzamiento, género, etc. Cargaremos el archivo de items descargado \"u.item\" para poder mapear cada identificador de ítem al conjunto de datos que lo describe." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 39, 285 | "metadata": { 286 | "colab": {}, 287 | "colab_type": "code", 288 | "id": "p0vHiGJheUkB" 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "# Definimos el orden de las columnas\n", 293 | "info_cols = [ 'movieid', 'title', 'release_date', 'video_release_date', 'IMDb_URL', \\\n", 294 | " 'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', \\\n", 295 | " 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', \\\n", 296 | " 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western' ]\n", 297 | "\n", 298 | "# Asignamos a una variable la estructura de datos de los items\n", 299 | "info_file = pd.read_csv('u.item', sep='|', index_col = 0, names = info_cols, header=None, encoding='latin-1')" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": { 305 | "colab_type": "text", 306 | "id": "z4pjNDlMOOdo" 307 | }, 308 | "source": [ 309 | "# **Slope One**" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": { 315 | "colab_type": "text", 316 | "id": "H5sx7xkfQE6W" 317 | }, 318 | "source": [ 319 | "## Paso 5:\n", 320 | "\n", 321 | "Seguiremos un camino muy similar a los ejercicios de User KNN e Item KNN. Crearemos una instancia del algoritmo de recomendación y luego pasaremos a la fase de entrenamiento." 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 40, 327 | "metadata": { 328 | "colab": {}, 329 | "colab_type": "code", 330 | "id": "v3D7CAoyPfko" 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "# Declaramos la instancia SlopeOne\n", 335 | "mySlopeOne = pyreclab.SlopeOne(dataset='u1.base', dlmchar=b'\\t', header=False, usercol=0, itemcol=1, ratingcol=2)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 41, 341 | "metadata": { 342 | "colab": {}, 343 | "colab_type": "code", 344 | "id": "EhacKomkTs51" 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "# Y enntrenamos\n", 349 | "mySlopeOne.train()" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": { 355 | "colab_type": "text", 356 | "id": "Wnb3DkVVTttZ" 357 | }, 358 | "source": [ 359 | "## Actividad 2 👓\n", 360 | "\n", 361 | "**Pregunta:** Explique qué hace el método `train()` en este caso, dado el modelo teórico. ¿Calcula información?, ¿no hace nada?, ¿ordena los datos? \n", 362 | "\n", 363 | "**Respuesta:**" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": { 369 | "colab_type": "text", 370 | "id": "XqR8bZB2VboJ" 371 | }, 372 | "source": [ 373 | "## Paso 6:\n", 374 | "\n", 375 | "Llego la hora de predecir el rating." 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 42, 381 | "metadata": { 382 | "colab": { 383 | "base_uri": "https://localhost:8080/", 384 | "height": 34 385 | }, 386 | "colab_type": "code", 387 | "id": "H9yigNx9VYZ3", 388 | "outputId": "3c58b8ee-c431-4ec2-ac32-4f1ddb47b92e" 389 | }, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "3.2408759593963623" 395 | ] 396 | }, 397 | "execution_count": 42, 398 | "metadata": { 399 | "tags": [] 400 | }, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "# Esta es la predicción de rating que el usuario ID:457 otorgaría al ítem ID:37\n", 406 | "# De esta forma podemos comparar el resultado con los prácticos anteriores\n", 407 | "mySlopeOne.predict(\"457\", \"37\")" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 43, 413 | "metadata": { 414 | "colab": {}, 415 | "colab_type": "code", 416 | "id": "-0ShLi6gYhh_" 417 | }, 418 | "outputs": [], 419 | "source": [ 420 | "# También podemos guardar la predicción en una variable\n", 421 | "prediction = mySlopeOne.predict(\"457\", \"37\")" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 44, 427 | "metadata": { 428 | "colab": { 429 | "base_uri": "https://localhost:8080/", 430 | "height": 419 431 | }, 432 | "colab_type": "code", 433 | "id": "2yFtdiVcUcKW", 434 | "outputId": "257c9bbc-584c-46a0-814d-c7cb048b5596" 435 | }, 436 | "outputs": [ 437 | { 438 | "data": { 439 | "text/html": [ 440 | "
\n", 441 | "\n", 454 | "\n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | "
useriditemidratingtimestamp
3726945714882393244
3727045774882393278
3727145795882393485
37272457114882397020
37273457133882393883
...............
3742045710472882395964
3742145711194882398308
3742245711685882548761
3742345712104882549905
3742445712214882549438
\n", 544 | "

156 rows × 4 columns

\n", 545 | "
" 546 | ], 547 | "text/plain": [ 548 | " userid itemid rating timestamp\n", 549 | "37269 457 1 4 882393244\n", 550 | "37270 457 7 4 882393278\n", 551 | "37271 457 9 5 882393485\n", 552 | "37272 457 11 4 882397020\n", 553 | "37273 457 13 3 882393883\n", 554 | "... ... ... ... ...\n", 555 | "37420 457 1047 2 882395964\n", 556 | "37421 457 1119 4 882398308\n", 557 | "37422 457 1168 5 882548761\n", 558 | "37423 457 1210 4 882549905\n", 559 | "37424 457 1221 4 882549438\n", 560 | "\n", 561 | "[156 rows x 4 columns]" 562 | ] 563 | }, 564 | "execution_count": 44, 565 | "metadata": { 566 | "tags": [] 567 | }, 568 | "output_type": "execute_result" 569 | } 570 | ], 571 | "source": [ 572 | "# Podemos comprobar las peliculas rankeadas por el usuario ID:457\n", 573 | "# Que ciertamente ha participado activamente (¡156 items!)\n", 574 | "train_file = pd.read_csv('u1.base', sep='\\t', names = ['userid', 'itemid', 'rating', 'timestamp'], header=None)\n", 575 | "train_file[train_file['userid'] == 457]" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 45, 581 | "metadata": { 582 | "colab": { 583 | "base_uri": "https://localhost:8080/", 584 | "height": 266 585 | }, 586 | "colab_type": "code", 587 | "id": "liVoMHlAYhH_", 588 | "outputId": "fa37e458-633d-4937-d232-4825ae8fc3cc" 589 | }, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/html": [ 594 | "
\n", 595 | "\n", 608 | "\n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | "
useriditemidratingtimestamp
130213371882397011
14851201372884114635
19670268373876514002
29489363372891498510
31084385374880013483
32996405371885548384
62777773373888540352
\n", 670 | "
" 671 | ], 672 | "text/plain": [ 673 | " userid itemid rating timestamp\n", 674 | "1302 13 37 1 882397011\n", 675 | "14851 201 37 2 884114635\n", 676 | "19670 268 37 3 876514002\n", 677 | "29489 363 37 2 891498510\n", 678 | "31084 385 37 4 880013483\n", 679 | "32996 405 37 1 885548384\n", 680 | "62777 773 37 3 888540352" 681 | ] 682 | }, 683 | "execution_count": 45, 684 | "metadata": { 685 | "tags": [] 686 | }, 687 | "output_type": "execute_result" 688 | } 689 | ], 690 | "source": [ 691 | "# Y también cuáles usuarios han rankeado la pelicula ID:37\n", 692 | "train_file[train_file['itemid'] == 37]" 693 | ] 694 | }, 695 | { 696 | "cell_type": "markdown", 697 | "metadata": { 698 | "colab_type": "text", 699 | "id": "u2P14hwFXytK" 700 | }, 701 | "source": [ 702 | "## Actividad 3 👓\n", 703 | "\n", 704 | "Haremos un pequeño experimento para entender mejor como funciona Slope One. Gracias al ejercicio anterior, sabemos que el usuario 457 ya ha asignado el mejor rating (5 ⭐) a las dos peliculas ID:9 e ID:1168. Comparemos.\n", 705 | "\n", 706 | "**Pregunta:** ¿Cómo se explican estos resultados? \n", 707 | "\n", 708 | "**Respuesta:**" 709 | ] 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": 46, 714 | "metadata": { 715 | "colab": { 716 | "base_uri": "https://localhost:8080/", 717 | "height": 51 718 | }, 719 | "colab_type": "code", 720 | "id": "gaqUjAQaVseB", 721 | "outputId": "e26f5b40-0e11-4cbe-8bd3-f35fc4e6f68e" 722 | }, 723 | "outputs": [ 724 | { 725 | "name": "stdout", 726 | "output_type": "stream", 727 | "text": [ 728 | "Prediction for ID:9 : 4.530702114105225\n", 729 | "Prediction for ID:1168 : 4.166153907775879\n" 730 | ] 731 | } 732 | ], 733 | "source": [ 734 | "prediction_id9 = mySlopeOne.predict(\"457\", \"9\")\n", 735 | "prediction_id1168 = mySlopeOne.predict(\"457\", \"1168\")\n", 736 | "\n", 737 | "print('Prediction for ID:9 :', prediction_id9)\n", 738 | "print('Prediction for ID:1168 :', prediction_id1168)" 739 | ] 740 | }, 741 | { 742 | "cell_type": "markdown", 743 | "metadata": { 744 | "colab_type": "text", 745 | "id": "N8dIE2n_YYnV" 746 | }, 747 | "source": [ 748 | "## Paso 7:\n", 749 | "\n", 750 | "Generaremos ahora una lista ordenada de las top-N recomendaciones, dado un usuario.\n", 751 | "\n" 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": 47, 757 | "metadata": { 758 | "colab": { 759 | "base_uri": "https://localhost:8080/", 760 | "height": 34 761 | }, 762 | "colab_type": "code", 763 | "id": "CVcJ1I1nZaK1", 764 | "outputId": "4f3d974b-611c-4293-aceb-6714b75d9238" 765 | }, 766 | "outputs": [ 767 | { 768 | "name": "stdout", 769 | "output_type": "stream", 770 | "text": [ 771 | "Lista de items según ID: ['1592', '1589', '1656', '1431', '1653']\n" 772 | ] 773 | } 774 | ], 775 | "source": [ 776 | "# Mediante el método recommend() genereremos una lista top-5 recomendaciones para el usuario ID:457\n", 777 | "reclist_slopeone = mySlopeOne.recommend(\"457\", 5)\n", 778 | "\n", 779 | "# Y visualizaremos el resultado\n", 780 | "print('Lista de items según ID:', reclist_slopeone)" 781 | ] 782 | }, 783 | { 784 | "cell_type": "code", 785 | "execution_count": 48, 786 | "metadata": { 787 | "colab": { 788 | "base_uri": "https://localhost:8080/", 789 | "height": 153 790 | }, 791 | "colab_type": "code", 792 | "id": "BFdRZKQcZtHE", 793 | "outputId": "563f54db-1f10-4c67-87fc-8dcffc68e0e5" 794 | }, 795 | "outputs": [ 796 | { 797 | "name": "stdout", 798 | "output_type": "stream", 799 | "text": [ 800 | "Lista de items por nombre:\n" 801 | ] 802 | }, 803 | { 804 | "data": { 805 | "text/plain": [ 806 | "movieid\n", 807 | "1592 Magic Hour, The (1998)\n", 808 | "1589 Schizopolis (1996)\n", 809 | "1656 Little City (1998)\n", 810 | "1431 Legal Deceit (1997)\n", 811 | "1653 Entertaining Angels: The Dorothy Day Story (1996)\n", 812 | "Name: title, dtype: object" 813 | ] 814 | }, 815 | "execution_count": 48, 816 | "metadata": { 817 | "tags": [] 818 | }, 819 | "output_type": "execute_result" 820 | } 821 | ], 822 | "source": [ 823 | "# Lo convertimos a numpy array\n", 824 | "recmovies_slopeone = np.array(reclist_slopeone).astype(int)\n", 825 | "\n", 826 | "# Utilizamos la estructura de datos de los items para encontrar los títulos recomendados\n", 827 | "print('Lista de items por nombre:')\n", 828 | "info_file.loc[recmovies_slopeone]['title']" 829 | ] 830 | }, 831 | { 832 | "cell_type": "markdown", 833 | "metadata": { 834 | "colab_type": "text", 835 | "id": "NlX6FLTxdPRm" 836 | }, 837 | "source": [ 838 | "## Actividad 4 👩🏻‍💻\n", 839 | "\n", 840 | "Genera una nueva recomendacion, modificando los hiperparametros de usuario y topN a tu elección.\n", 841 | "\n", 842 | "**Pregunta:** ¿Ves una diferencia en la recomendación entre el nuevo usuario y el usuario ID:457?\n", 843 | "\n", 844 | "**Respuesta:**" 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": null, 850 | "metadata": { 851 | "colab": {}, 852 | "colab_type": "code", 853 | "id": "YQM7mfwDdY3j" 854 | }, 855 | "outputs": [], 856 | "source": [ 857 | "# Escribe el nuevo codigo aqui\n" 858 | ] 859 | }, 860 | { 861 | "cell_type": "markdown", 862 | "metadata": { 863 | "colab_type": "text", 864 | "id": "iU2NlezaiIur" 865 | }, 866 | "source": [ 867 | "## Actividad 5 👩🏻‍💻\n", 868 | "\n", 869 | "Dado el usuario ID:44, cree dos listas de películas recomendadas; la primera utilizando el algoritmo Most Popular y la segunda utilizando el algoritmo Slope One.\n", 870 | "\n", 871 | "**Pregunta:** Realice un analisis apreciativo de las similitudes y diferencias entre ambas recomendaciones.\n", 872 | "\n", 873 | "**Respuesta:**" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": null, 879 | "metadata": { 880 | "colab": {}, 881 | "colab_type": "code", 882 | "id": "Rxk9ySLQiMC4" 883 | }, 884 | "outputs": [], 885 | "source": [ 886 | "# Escribe el nuevo codigo aqui\n" 887 | ] 888 | } 889 | ], 890 | "metadata": { 891 | "colab": { 892 | "name": "pyRecLab_SlopeOne.ipynb", 893 | "provenance": [], 894 | "toc_visible": true 895 | }, 896 | "kernelspec": { 897 | "display_name": "Python 3", 898 | "language": "python", 899 | "name": "python3" 900 | }, 901 | "language_info": { 902 | "codemirror_mode": { 903 | "name": "ipython", 904 | "version": 3 905 | }, 906 | "file_extension": ".py", 907 | "mimetype": "text/x-python", 908 | "name": "python", 909 | "nbconvert_exporter": "python", 910 | "pygments_lexer": "ipython3", 911 | "version": "3.6.0" 912 | }, 913 | "toc": { 914 | "base_numbering": 1, 915 | "nav_menu": {}, 916 | "number_sections": true, 917 | "sideBar": true, 918 | "skip_h1_title": false, 919 | "title_cell": "Table of Contents", 920 | "title_sidebar": "Contents", 921 | "toc_cell": false, 922 | "toc_position": {}, 923 | "toc_section_display": true, 924 | "toc_window_display": false 925 | } 926 | }, 927 | "nbformat": 4, 928 | "nbformat_minor": 1 929 | } 930 | -------------------------------------------------------------------------------- /proyecto/Denis-IdeasProyectosFinales-2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/Denis-IdeasProyectosFinales-2020.pdf -------------------------------------------------------------------------------- /proyecto/Enunciado_Proyecto_Final_RecSys_2020_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/Enunciado_Proyecto_Final_RecSys_2020_2.pdf -------------------------------------------------------------------------------- /proyecto/IIC3633-propuestas-2018_p1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/IIC3633-propuestas-2018_p1.pdf -------------------------------------------------------------------------------- /proyecto/IIC3633-propuestas-2018_p2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/IIC3633-propuestas-2018_p2.pdf -------------------------------------------------------------------------------- /proyecto/Vladimir-RecSysLoL2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/Vladimir-RecSysLoL2020.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Araujo_etal_LoL_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2018/Araujo_etal_LoL_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Barrios_et_al_fakenews_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2018/Barrios_et_al_fakenews_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Cerda_etal_rnn_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2018/Cerda_etal_rnn_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Guzman_etal_steam_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2018/Guzman_etal_steam_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Munoz_etal_playlist_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2018/Munoz_etal_playlist_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Rencoret_etal_selfattention_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2018/Rencoret_etal_selfattention_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2019/README.md: -------------------------------------------------------------------------------- 1 | ### Proyectos finales de 2019 para consultar de ejemplo: 2 | 3 | - Dominguez, Krebs, Lira 4 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-dominguez-krebs-lira) 5 | 6 | - Gazali, Fischer 7 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-gazali-fischer) 8 | 9 | - Navon, Andrade 10 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-recmen) 11 | 12 | - Catan, Villa 13 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-catttan-villa) 14 | -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Alipanah et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Alipanah et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Alliende.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Alliende.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Andrade et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Andrade et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Biskupovic et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Biskupovic et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Cartagena et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Cartagena et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Castro et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Castro et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Codoceo et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Codoceo et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Contreras et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Contreras et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Diaz et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Diaz et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Donoso et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Donoso et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Duarte et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Duarte et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Everke et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Everke et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Friedl et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Friedl et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Fuentes et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Fuentes et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Guinez et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Guinez et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Hanuch et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Hanuch et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Olguin et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Olguin et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Ramos et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Ramos et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Salinas et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Salinas et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Tapia et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Tapia et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Valdes et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Valdes et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Valencia et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/proy_finales_2020/Valencia et al.pdf -------------------------------------------------------------------------------- /proyecto/recsys2016_zorich-troncoso.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/proyecto/recsys2016_zorich-troncoso.pdf -------------------------------------------------------------------------------- /tareas/Tarea_1_RecSys_2020_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2020/3f548241460a7c01e484c63a7a39dcbcd9999f7f/tareas/Tarea_1_RecSys_2020_2.pdf --------------------------------------------------------------------------------