├── .gitignore ├── LICENSE ├── README.md ├── Tarea_1_RecSys_2021_2.pdf ├── clases ├── s1-c2-SlopeOne.pdf ├── s1-c2-UBCF_clustering.pdf ├── s1-c2-nonpers-UBCF.pdf ├── s10_c1_activelearning.pdf ├── s10_c1_activelearning_v3.pdf ├── s11_c1_deep_learning.pdf ├── s11_c2_secu_deep_learning.pdf ├── s12_c1_deep_learning_s.pdf ├── s12_c2_10bigproblems-recsys-small.pdf ├── s2_c1-IBCF.pdf ├── s2_c2-Factorizacion_matricial.pdf ├── s3_c1-Implicit-feedback.pdf ├── s3_c2-BPR.pdf ├── s4_c1-metricas.pdf ├── s4_c2-tests_estadisticos.pdf ├── s5_c1-content.pdf ├── s5_c1_p2-content.pdf ├── s5_c2-content.pdf ├── s6_c1-contexto.pdf ├── s6_c1-hibridos.pdf ├── s6_c2_p2-FMachines.pdf ├── s6_c2_p3-blending_ensemble.pdf ├── s7_c1_deep_learning.pdf ├── s7_c2_deep_learning_s.pdf ├── s7_c2_secu_deep_learning.pdf ├── s9_c1_usercentric.pdf └── s9_c2_FATv2.pdf ├── posters ├── A1-andrade_dominguez_pattillo.jpg ├── A3-olguin_lopez_ibarra.png ├── A4_donosoguzman.png ├── A5-codoceo_escudero_torres.pptx.png ├── A6-Contreras_Molina_Stambuk.png ├── A7-ovalle_valdes.png ├── N2-cartegana_huerfano_toscano.png ├── N3-Valencia-González.png ├── N4_castro_casassus.png ├── N5-Labarca_Fuentes.png ├── N6-Aguilera_Everke.png ├── N7-suarez_carreno_alipanah.png ├── N8-Guinez_Ruiz_Sanchez.png ├── V1-salinas.png ├── V2-waugh_hanuch_ricke.jpg ├── V3_duarte_lopez_rodriguez.png ├── V4-diaz_vinay.png ├── V5-alliende.png ├── V6_perez_ramos.png ├── V7-tapia_villagran.png └── V8-biskupovic.png ├── practicos ├── Ayudantia_TIRLol.ipynb ├── Content_Based_imagenes.ipynb ├── Content_Based_texto.ipynb ├── FastFM_factorization_machines.ipynb ├── GRU4Rec.ipynb ├── HT4Rec.ipynb ├── Implicit_feedback.ipynb ├── MultiVAE_Practico.ipynb ├── README.md ├── Reinforcement_Learning_Mabwiser.ipynb ├── Reinforcement_Learning_Recsim.ipynb ├── pyRecLab_FunkSVD.ipynb ├── pyRecLab_MostPopular.ipynb ├── pyRecLab_SlopeOne.ipynb ├── pyRecLab_iKNN.ipynb └── pyRecLab_uKNN.ipynb ├── proyecto ├── Denis-IdeasProyectosFinales-2020.pdf ├── Enunciado_Proyecto_Final_RecSys_2020_2.pdf ├── Enunciado_Proyecto_Final_RecSys_2021_2.pdf ├── IIC3633-propuestas-2018_p1.pdf ├── IIC3633-propuestas-2018_p2.pdf ├── Vladimir-RecSysLoL2020.pdf ├── proy_finales_2018 │ ├── Araujo_etal_LoL_2018.pdf │ ├── Barrios_et_al_fakenews_2018.pdf │ ├── Cerda_etal_rnn_2018.pdf │ ├── Guzman_etal_steam_2018.pdf │ ├── Munoz_etal_playlist_2018.pdf │ └── Rencoret_etal_selfattention_2018.pdf ├── proy_finales_2019 │ └── README.md ├── proy_finales_2020 │ ├── Alipanah et al.pdf │ ├── Alliende.pdf │ ├── Andrade et al.pdf │ ├── Biskupovic et al.pdf │ ├── Cartagena et al.pdf │ ├── Castro et al.pdf │ ├── Codoceo et al.pdf │ ├── Contreras et al.pdf │ ├── Diaz et al.pdf │ ├── Donoso et al.pdf │ ├── Duarte et al.pdf │ ├── Everke et al.pdf │ ├── Friedl et al.pdf │ ├── Fuentes et al.pdf │ ├── Guinez et al.pdf │ ├── Hanuch et al.pdf │ ├── Olguin et al.pdf │ ├── Ramos et al.pdf │ ├── Salinas et al.pdf │ ├── Tapia et al.pdf │ ├── Valdes et al.pdf │ └── Valencia et al.pdf └── recsys2016_zorich-troncoso.pdf └── tareas ├── Tarea_1_RecSys_2020_2.pdf └── Tarea_1_RecSys_2021_2.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | # Editors 2 | .vscode/ 3 | .idea/ 4 | 5 | # Vagrant 6 | .vagrant/ 7 | 8 | # Mac/OSX 9 | .DS_Store 10 | 11 | # Windows 12 | Thumbs.db 13 | 14 | # Source for the following rules: https://raw.githubusercontent.com/github/gitignore/master/Python.gitignore 15 | # Byte-compiled / optimized / DLL files 16 | __pycache__/ 17 | *.py[cod] 18 | *$py.class 19 | 20 | # C extensions 21 | *.so 22 | 23 | # Distribution / packaging 24 | .Python 25 | build/ 26 | develop-eggs/ 27 | dist/ 28 | downloads/ 29 | eggs/ 30 | .eggs/ 31 | lib/ 32 | lib64/ 33 | parts/ 34 | sdist/ 35 | var/ 36 | wheels/ 37 | *.egg-info/ 38 | .installed.cfg 39 | *.egg 40 | MANIFEST 41 | 42 | # PyInstaller 43 | # Usually these files are written by a python script from a template 44 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 45 | *.manifest 46 | *.spec 47 | 48 | # Installer logs 49 | pip-log.txt 50 | pip-delete-this-directory.txt 51 | 52 | # Unit test / coverage reports 53 | htmlcov/ 54 | .tox/ 55 | .nox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | .python-version 96 | 97 | # celery beat schedule file 98 | celerybeat-schedule 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 PUC-RecSys-Class 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IIC3633 Sistemas Recomendadores 2 | Agosto-Diciembre 2021 3 | 4 | **AVISOS** 5 | El Jueves 26 de Septiembre hay clase en sala A7, es hibrida así es que pueden asistir presencialmente o de forma remota por el link zoom de siempre. 6 | 7 | 8 | ### Equipo Docente e Información Administrativa 9 | **Instructor**: [Denis Parra](http://dparra.sitios.ing.uc.cl), Profesor Asociado PUC Chile, Ph.D. University of Pittsburgh 10 | 11 | **Ayudantes**: 12 | [Vladimir Araujo](https://vgaraujov.github.io/), Estudiante de Doctorado en Ciencia de la Computación PUC Chile. 13 | [Andrés Carvallo](https://scholar.google.com/citations?user=DinpmCUAAAAJ&hl=es), Estudiante de Doctorado en Ciencia de la Computación PUC Chile. 14 | [Francisca Cattan](https://www.linkedin.com/in/franciscacattan/), Estudiante de Doctorado en Ciencia de la Computación PUC Chile. 15 | [Alvaro Labarca](#), Estudiante de Magister, en Ciencia de la Computación PUC Chile. 16 | [Jorge Pérez Facuse](#), Estudiante de Magister, en Ciencia de la Computación PUC Chile. 17 | 18 | 19 | **Institución**: Pontificia Universidad Católica de Chile 20 | 21 | **Horario**: Martes y Jueves, Módulo 3 (11:30 a 12:50). 22 | 23 | Programa IIC 3633, 2do Semestre 2021: [pdf](https://dparra.sitios.ing.uc.cl/pdfs/IIC3633Sist%20Recomendadores_v5.pdf). 24 | 25 | ### Descripción del Curso 26 | 27 | El curso de Sistemas Recomendadores cubre las principales tareas de recomendación, algoritmos, fuentes de datos y evaluación de estos sistemas. Al final de este curso serás capaz de decidir qué técnicas y fuentes de datos usar para implementar y evaluar sistemas recomendadores. 28 | 29 | **Software**: [pyRecLab](https://github.com/gasevi/pyreclab/). 30 | 31 | La componente práctica de este curso se enseña a través del uso de pyRecLab desarrollado por Gabriel Sepúlveda (ex-alumno de este curso), biblioteca de software para desarrollo de sistemas recomendadores en Python. 32 | 33 | **Contenido**: 34 | 35 | ## Contenidos por Semana 36 | 37 | 38 | 39 | 40 | | Semana | Tema | link slide(s) | link video | comentario(s) | 41 | |:--------|:-----------------|:-------------:|:----------:|:-------------:| 42 | | 1 | Introducción | x | [video](https://drive.google.com/file/d/1LwlP2avrSnU6gId_I9ywF3nFU7nboL09/view?usp=sharing) | | 43 | | 1 | Ranking no personalizado y Filtrado colaborativo (FC) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s1-c2-nonpers-UBCF.pdf) | [video](https://drive.google.com/file/d/10rL3XbuNVCCETiSvncFmcIpkKJ9B6Krg/view?usp=sharing) | | 44 | | 1 | User-based FC con clustering | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s1-c2-UBCF_clustering.pdf) | [video](#) | | 45 | | 2 | Pendiente Uno | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s1-c2-SlopeOne.pdf) | [video](https://drive.google.com/file/d/1MAAo4hlJKnNvBAyFyHmGimtHju-ytSpi/view?usp=sharing) | | 46 | | 2 | Item-based FC | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s2_c1-IBCF.pdf) | [video](https://drive.google.com/file/d/1MAAo4hlJKnNvBAyFyHmGimtHju-ytSpi/view?usp=sharing) | | 47 | | 2 | Factorización Matricial: FunkSVD | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s2_c2-Factorizacion_matricial.pdf) | [video](https://drive.google.com/file/d/1GSVTKI6QXDEmhWfRb0YqjMe7HHcUVfbv/view?usp=sharing) | | 48 | | 3 | Implicit Feedback CF | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s3_c1-Implicit-feedback.pdf) | [video](https://drive.google.com/file/d/1xR3QK2erxukNCMlyQJLB1rA6Qu45gLlG/view?usp=sharing) | | 49 | | 3 | Bayesian Personalized Ranking (BPR) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s3_c2-BPR.pdf) | [video](https://drive.google.com/file/d/1cq0_y7q5G6xqMa7HM9nxSrRCOGZHXzBa/view?usp=sharing) | | 50 | | 4 | Evaluación: metricas de error y ranking | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s4_c1-metricas.pdf) | [video](https://drive.google.com/file/d/1wFQc9h1pdaJH1YbbUgbw8Wb56m1mHvZf/view) | [slides P Castells LARS 2019](http://ir.ii.uam.es/castells/lars2019.pdf) | 51 | | 4 | Evaluación II: Cobertura, diversidad, novedad | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s4_c1-metricas.pdf) | [video](https://drive.google.com/file/d/1TPjmn7FWK2C4G3rg7UYY4NGG2bLJyTOy/view) | | 52 | | 4 | Evaluación III: Tests estadísticos | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s4_c2-tests_estadisticos.pdf) | [video](https://drive.google.com/file/d/1jis_iIjFFWk7NcggeMn26rLAhrWuRK5h/view) | | 53 | | 5 | Recomendación basada en contenido 1 | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s5_c1-content.pdf) | [video](https://drive.google.com/file/d/1SGsph_uYSxJuWBhLfh1YhZCazZXFCTCQ/view?usp=sharing) | | 54 | | 5 | Recomendación basada en contenido 2 | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s5_c2-content.pdf) | [video](#) | | 55 | | 6 | Recomendación híbrida | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s6_c1-hibridos.pdf) | [video](https://drive.google.com/file/d/1aUNYNli4l4xk_hGRB7v-PaRr0xtJWQau/view?usp=sharing) | | 56 | | 6 | Recomendación por ensambles | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s6_c2_p3-blending_ensemble.pdf) | [video](https://drive.google.com/file/d/1o5cL5JspHI8QizFeMT8_9HQKqNT2rmqm/view?usp=sharing) | | 57 | | 6 | Recomendación basada en contexto | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s6_c1-contexto.pdf) | [video1](https://drive.google.com/file/d/1fC3Ypg5aF8Be_8b7ZpPFqwVdtJS2kwzm/view?usp=sharing) [video2](https://drive.google.com/file/d/10r_6DzrKflF8sVzgZ1mU8xb8mJC2BcgO/view?usp=sharing) | | 58 | | 6 | Máquinas de Factorización | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s6_c2_p2-FMachines.pdf) | [video](https://drive.google.com/file/d/111IK4ZIE-bqiNWmYLyafQoS0kthETLUr/view?usp=sharing) | | 59 | | 7 | Deep Learning I: Intro | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s7_c1_deep_learning.pdf) | [video](https://drive.google.com/file/d/1KHpLMWd4ISSNOadeXdngK-fQSXpVUols/view?usp=sharing) | | 60 | | 7 | Deep Learning II: Tres proyectos | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s7_c2_deep_learning_s.pdf) | [video](https://drive.google.com/file/d/1l3fkgyubvrbMjziieKiLglbaJR9USTVA/view?usp=sharing) | | 61 | | 8 | SR Centrados en el Usuario | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s9_c1_usercentric.pdf) | [video](https://drive.google.com/file/d/1B8-N2ZtyE9p5KBr-EuoT1YQ-Fy0GDUF1/view?usp=sharing) | | 62 | | 8 | Sistemas Justos, Explicables y Transparentes | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s9_c2_FATv2.pdf) | [video](https://drive.google.com/file/d/1HooxTDVwa9WQM1YXQWjDTRT-i1dmkO8m/view?usp=sharing) | | 63 | | 9 | Aprendizaje Activo (Active Learning) | [slides](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/clases/s10_c1_activelearning_v3.pdf) | [video](https://drive.google.com/file/d/1_hMeckheVQLauyNrFNheBrcn3qpoCwJO/view?usp=sharing) | | 64 | | 9 | Bandits | [slides](https://drive.google.com/file/d/1SlDt7UCDrtJBIPUxAMjhqzsjz6uelyIn/view?usp=sharing) | [video](https://drive.google.com/file/d/1PyIFL8dDtQ4W0y_t89jrXzJnZcl56TEj/view?usp=sharing) | | 65 | | -- | Semana Break | Break | Break | | 66 | 67 | 68 | 69 | 79 | 80 | ### Parte II del curso: seminario 81 | 82 | A partir de noviembre el curso toma modalidad seminario, los alumnos hacen presentaciones de los siguientes papers: 83 | 84 | 85 | 86 | 87 | | Semana | Paper | link slide(s) | link video | conferencia | 88 | |:--------|:-----------------|:-------------:|:----------:|:-------------:| 89 | |10 |Towards Question-based Recommender Systems|[slides](https://drive.google.com/file/d/10XYI2tyArpuUwU_kUx4XbwMLuS4ayqxa/view?usp=sharing)|[video](https://drive.google.com/file/d/1P10wU06ISrKbzb7ZG7J_bKVRxOcB-MU4/view?usp=sharing)|[SIGIR 2020](https://arxiv.org/abs/2005.14255)| 90 | |10 |AutoDebias: Learning to Debias for Recommendation|[slides](https://drive.google.com/file/d/16dNsfR8YXzFi99tXOdofEbxF6CA59e_u/view?usp=sharing)|[video](https://drive.google.com/file/d/16dNsfR8YXzFi99tXOdofEbxF6CA59e_u/view?usp=sharing)|[SIGIR 2021](https://arxiv.org/pdf/2105.04170.pdf)| 91 | |10 |An Audit of Misinformation Filter Bubbles on YouTube: Bubble Bursting and Recent Behavior Changes|[slides](https://drive.google.com/file/d/1heKsYaps3kXTVRJ4kNvep-uQT5r-dKcm/view?usp=sharing)|[video](https://drive.google.com/file/d/18KdwYhiJjOMgFKOCBe5F7BELSrVU0-ru/view?usp=sharing)|[RECSYS 2021](https://dl.acm.org/doi/pdf/10.1145/3460231.3474241)| 92 | |10 |Counterfactual Explainable Recommendation|[slides](https://drive.google.com/file/d/1bbf2wGidclixsyh21SlMyR1LYEKOr7AI/view?usp=sharing)|[video](https://drive.google.com/file/d/10IMr4MFXLgzZQsX5Krn_M0wUBrb8qJhg/view?usp=sharing)|[CIKM 2021](https://arxiv.org/abs/2108.10539)| 93 | |11 |A Neural Influence Diffusion Model for Social Recommendation|[slides](https://drive.google.com/file/d/1rakUMbq7lvMgHT0z-v03peZuutz5ZjPt/view?usp=sharing)|[video](https://drive.google.com/file/d/1GK3DbExUMrG_CdSEoldNIrYpSaG1L96h/view?usp=sharing)|[SIGIR 2019](https://dl.acm.org/citation.cfm?id=3331214)| 94 | |11 |An Efficient Adaptive Transfer Neural Network for Social-aware Recommendation|[slides](https://drive.google.com/file/d/14_armbAS3eraJtdl8pYpeiO6i7QRboxh/view?usp=sharing)|[video](https://drive.google.com/file/d/1CYzYkSuxxYlPYiEqyAa02TM6IMzudTlC/view?usp=sharing)|[SIGIR 2019](https://dl.acm.org/doi/10.1145/3331184.3331192)| 95 | |11 |Sequential Recommendation for Cold-start Users with Meta Transitional Learning|[slides](https://drive.google.com/file/d/1Gtl0Wya5ZzqGodNKIIwEZKt04mAfPIQU/view?usp=sharing)|[video](https://drive.google.com/file/d/1Z64bDh-0a3Z38P4LKIRZceoGKxfcddsI/view?usp=sharing)|[SIGIR 2021](http://people.tamu.edu/~jwang713/pubs/MetaTL-sigir2021)| 96 | |11 |“Serving Each User”: Supporting Different Eating Goals Through a Multi-List Recommender Interface|[slides](https://drive.google.com/file/d/1p-xbuH34xfilAmZ9YwSeLdJO1ar_n62O/view?usp=sharing)|[video](https://drive.google.com/file/d/1sVQ2vm_6CCwZXJCkskkPBDrK8aWht79Q/view?usp=sharing)|[RECSYS 2021](https://dl.acm.org/doi/pdf/10.1145/3460231.3474232)| 97 | |11 |Learning disentangled representations for recommendation|[slides](https://drive.google.com/file/d/1JZGVGwzrJ61sT7KK3v_o64EzoyyWg5yY/view?usp=sharing)|[video](https://drive.google.com/file/d/14Q4oD-1sKjfL99EvA72gkwL0dYmrUjT9/view?usp=sharing)|[Neurips 2019](https://dl.acm.org/doi/abs/10.5555/3454287.3454800)| 98 | |12 |Cold Start Similar Artists Ranking with Gravity-Inspired Graph Autoencoders|[slides](https://drive.google.com/file/d/1MoKhaqvM1xFrL786lO1ImabyGZ2i5BpS/view?usp=sharing)|[video](https://drive.google.com/file/d/1HdGrK2g2dgc7gzM6cgclYIFBYD7nKBs-/view?usp=sharing)|[RECSYS 2021](https://arxiv.org/pdf/2108.01053.pdf)| 99 | |12 |A Study of Defensive Methods to Protect Visual Recommendation Against Adversarial Manipulation of Images|[slides](https://drive.google.com/file/d/1z3i3GYMLXzrxDalQMi-QX6tv_GtHZUpF/view?usp=sharing)|[video](https://drive.google.com/file/d/1o6lsiuUyyGb8wu4P9_p8etV0EON7JN93/view?usp=sharing)|[SIGIR 21](https://dl.acm.org/doi/abs/10.1145/3404835.3462848)| 100 | |12 |Reward Constrained Interactive Recommendation with Natural Language Feedback|[slides](https://drive.google.com/file/d/1MMeiiQfnraZXYDBJiONgs5Q0dPWduBo2/view?usp=sharing)|[video](https://drive.google.com/file/d/1kpidqRMutix3GBP49_LRVFLb3_ILL0LF/view?usp=sharing)|[Neurips 2019](https://papers.nips.cc/paper/2019/hash/52130c418d4f02c74f74a5bc1f8020b2-Abstract.html)| 101 | |12 |Neural Attentional Rating Regression with Review-level Explanations|[slides](https://drive.google.com/file/d/1Vf1XvQEet1O7eoC4t2hi_Jha7iWvmnJu/view?usp=sharing)|[video](https://drive.google.com/file/d/1M2pSKx2hQHiPahwEpp3uJ_LZc7H36QAM/view?usp=sharing)|[WWW'18](https://dl.acm.org/doi/10.1145/3178876.3186070)| 102 | |12 |Justifying Recommendations using Distantly-Labeled Reviews and Fine-Grained Aspects|[slides](https://drive.google.com/file/d/1LyMIUACq-_wrXL661BvBArtQ4_pVudsA/view?usp=sharing)|[video](https://drive.google.com/file/d/1jayH4aDRbPnw8_k36agLfgzyisxU5TTd/view?usp=sharing)|[EMNLP-IJCNLP '19](https://cseweb.ucsd.edu/~jmcauley/pdfs/emnlp19a.pdf)| 103 | |12 |Neural Personalized Ranking for Image Recommendation|[slides](https://drive.google.com/file/d/1GZGqHAf7SRxhSVEZ4O3bNAWngGnJnqxJ/view?usp=sharing)|[video](https://drive.google.com/file/d/1dACQJJnTCu2hPzOyqjCuOJY5yiiEe7h6/view?usp=sharing)|[WSDM '18](https://dl.acm.org/doi/10.1145/3159652.3159728)| 104 | |13 |Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations|[slides](https://drive.google.com/file/d/1kQf0uv-7mAiCESALmCJInBo6__DEac_g/view?usp=sharing)|[video](https://drive.google.com/file/d/1EHz5xNHPixO0-Fp2P1DwM85lQdDldu3o/view?usp=sharing)|[RecSys'20](https://dl.acm.org/doi/10.1145/3383313.3412236)| 105 | |13 |Alleviating Cold-Start Problems in Recommendation through Pseudo-Labelling over Knowledge Graph|[slides](https://drive.google.com/file/d/1HiKyHMkWpd0s2bdedb-BoNE7awRdpEpd/view?usp=sharing)|[video](https://drive.google.com/file/d/1KPx84eReljnuy4yjgtnkf8490UoL4PRb/view?usp=sharing)|[WSDM 2021](https://arxiv.org/abs/2011.05061)| 106 | |13 |Towards Deep Conversational Recommenders|[slides](https://drive.google.com/file/d/1ei9CCwlb73oGGh00xEehq8Vk7H06Ybc6/view?usp=sharing)|[video](https://drive.google.com/file/d/1hT5k2urDP5pHY7rjwB95q89VlahGX56q/view?usp=sharing)|[Neurips'18](https://proceedings.neurips.cc/paper/2018/file/800de15c79c8d840f4e78d3af937d4d4-Paper.pdf)| 107 | |13 |Graph Convolutional Network for Recommendation with Low-pass Collaborative Filters|[slides](https://drive.google.com/file/d/1yqY5D-vgxICvIQ0iIsUNhrsttrzyX4M9/view?usp=sharing)|[video](https://drive.google.com/file/d/1qoOqiqB0MFvijqlgX4V4pWMWt5E4EoK9/view?usp=sharing)|[ICML'20](https://proceedings.icml.cc/static/paper_files/icml/2020/530-Paper.pdf)| 108 | |13 |On Sampled Metrics for Item Recommendation|[slides](https://drive.google.com/file/d/1tfIs8xXJLzkD8XiYKOVPB7rbYlYYi_Z7/view?usp=sharing)|[video](https://drive.google.com/file/d/1JgEyER47enp52cXF8_K4U2UOx85XqSqV/view?usp=sharing)|[KDD'20](https://doi.org/10.1145/3394486.3403226)| 109 | |13 |Pessimistic Reward Models for Off-Policy Learning in Recommendation|[slides](https://drive.google.com/file/d/16A32tcnfy6ho8noaIwW2TBr2QyPbwPjY/view?usp=sharing)|[video](https://drive.google.com/file/d/1G5tsMZcFxqcIj97pwmLWb7CYVFN1fSKr/view?usp=sharing)|[RECSYS 2021](https://dl.acm.org/doi/10.1145/3460231.3474247)| 110 | 111 | ### Proyectos finales 112 | 113 | L@s estudiantes trabajan en grupo sobre proyectos finales de curso, produciendo un poster, paper y repositorio con código para cada uno: 114 | 115 | 118 | 119 | 120 | 121 | 122 | | Grupo | Proyecto | Poster | Paper | 123 | |:------|:---------------------|:------:|:-----:| 124 | | 1 | VAE based model for single-target cross-domain recommendation
De Diego, Hernández, Schuit | [poster](https://github.com/PUC-RecSys-Class/proyecto-serendipity/blob/main/Poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-serendipity/blob/main/Informe.pdf) | 125 | | 2 | Spotify Playlist Continuation
Carstens, López, Mendoza | [poster](https://github.com/PUC-RecSys-Class/proyecto-elwin-s-soons/blob/main/Poster/Poster%20v1.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-elwin-s-soons/blob/main/Paper%20v0.pdf) | 126 | | 3 | Hybrid recommender of articles based on Topic Modeling and Collaborative Filtering
Flores | [poster](https://github.com/PUC-RecSys-Class/proyecto-freesolo/blob/master/poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-freesolo/blob/master/IIC3633_Paper.pdf) | 127 | | 4 | Efficiency of Shilling Attacks in Modern Recommenders
Brancoli, Gazali, Murtagh | [poster](https://github.com/PUC-RecSys-Class/proyecto-content-aware-inc/blob/master/Poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-content-aware-inc/blob/master/Paper.pdf) | 128 | | 5 | Métricas de Sesgo de Posición para Recomendaciones de Artículos Científicos
Brancoli, Gazali, Murtagh | [poster](https://github.com/PUC-RecSys-Class/proyecto-thepapiros/blob/main/Poster/poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-thepapiros/blob/main/Proyecto_Final_RecSys_2021_2.pdf) | 129 | | 6 | RoyaleNet: Deck Recommendation for Card-Based Competitive Online Videogames
Farías, Lepe, Romero | [poster](https://github.com/PUC-RecSys-Class/proyecto-recsteam-recsys-team/blob/main/docs/Poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-recsteam-recsys-team/blob/main/docs/RoyaleNet%2C%20Deck%20Recommendation%20for%20Card-Based%20Competitive%20Online%20Videogames.pdf) | 130 | | 7 | Evaluation of Web-Scale Transfer Learning on Art Image Recommendations
Tirreau | [poster](https://github.com/PUC-RecSys-Class/proyecto-coloro-s/blob/main/Final_Poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-coloro-s/blob/main/Paper.pdf) | 131 | | 8 | Apertron: un recomendador de aperturas de ajedrez basado en estilo de juego
Klemmer | [poster](https://github.com/PUC-RecSys-Class/proyecto-secopalamongas/blob/main/poster/Apertron_Poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-secopalamongas/blob/main/Klemmer_InformeProyecto.pdf) | 132 | | 9 | Recomendación Multimodal Adversaria
Ramirez, Mallea | [poster](https://github.com/PUC-RecSys-Class/proyecto-juan-banach/blob/main/Poster/Poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-juan-banach/blob/main/paper_.pdf) | 133 | | 10 | Evaluacion de diversas estrategias de recomendación grupal para grupos heterogéneos
Cuturrufo, Berríos | [poster](https://github.com/PUC-RecSys-Class/proyecto-melonconvino-1/blob/main/Poster/poster-v1.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-melonconvino-1/blob/main/Paper/BerriosCuturrufo.pdf) | 134 | | 11 | Personalized News Recommendation and Bandits
Alvarado | [poster](https://github.com/PUC-RecSys-Class/proyecto-nfalvara2/blob/main/Poster_RecSys%20(1).pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-nfalvara2/blob/main/Personalized_News_Recommendation_and_Bandits%20(1).pdf) | 135 | | 12 | Group Recommender Systems for Board Games
Muñoz, Piña, Vega | [poster](https://github.com/PUC-RecSys-Class/proyecto-recsysters/blob/master/Entrega%20Final/poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-recsysters/blob/master/Entrega%20Final/Paper.pdf) | 136 | | 13 | Subtitles Content Based Movie Recommendation System
Tramon, Yon, Jimenez | [poster](https://github.com/PUC-RecSys-Class/proyecto-recomiendo-chile-proyect-team/blob/main/Poster_Entrega_Final.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-recomiendo-chile-proyect-team/blob/main/Paper_Final_RecSys.pdf) | 137 | | 14 | Context-Aware Course Grade Prediction Using Tensor and Coupled Matrix Factorization
Carrasco, García, Velásquez | [poster](https://github.com/PUC-RecSys-Class/proyecto-vilenewts/blob/main/Poster/Poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-vilenewts/blob/main/Entrega03/IIC3633___Proyecto_03___Paper.pdf) | 138 | | 15 | EEG Signal Processing for Recommender Systems
Iruretagoyena, Mendeta, Sumonte | [poster](https://github.com/PUC-RecSys-Class/proyecto-cerebritos/blob/main/Entrega%20final/Poster.png) | [paper](https://github.com/PUC-RecSys-Class/proyecto-cerebritos/blob/main/Entrega%20final/EEG%20Signal%20Processing%20for%20Recommender%20Systems.pdf) | 139 | | 16 | Recomendación de Videojuegos en Steam Basada en Imágenes
Guzman, Hernandez, Molina | [poster](https://github.com/PUC-RecSys-Class/proyecto-recsystence/blob/main/Poster_IIC3633.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-recsystence/blob/main/Paper_IIC3633.pdf) | 140 | | 17 | Bias analysis in recommendations from content creators point of view on Twitter
Álvarez, Tapia, Trejo | [poster](https://github.com/PUC-RecSys-Class/proyecto-rectweet/blob/main/PosterFinal.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-rectweet/blob/main/Informe%20final.pdf) | 141 | | 18 | Optimización de subsets para generar datos de entrenamientos y de pruebas para el desarrollo de sistemas recomendadores
Magna, Fuenzalida, Gonzales | [poster](https://github.com/PUC-RecSys-Class/proyecto-nombreoriginal/blob/main/poster.pptx) | [paper](https://github.com/PUC-RecSys-Class/proyecto-nombreoriginal/blob/main/Paper%20RecSys.pdf) | 142 | | 19 | Recomendación Personalizada de Rutinas Deportivas
Prieto, Sepúlveda, Olea | [poster](https://github.com/PUC-RecSys-Class/proyecto-panconcilantro/blob/main/poster.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-panconcilantro/blob/main/paper.pdf) | 143 | | 20 | Recomendación basada en contenido a partir de imágenes de artículos de Home & Kitchen
Guzmán | [poster](https://github.com/PUC-RecSys-Class/proyecto-maguzman1/blob/master/Poster_IIC3633_GuzmanMax.pdf) | [paper](https://github.com/PUC-RecSys-Class/proyecto-maguzman1/blob/master/Paper_IIC3633_GuzmanMax.pdf) | 144 | | 21 | Recomendadores de Noticias: MIND
Dlugoszewski, Sfeir, Burgos | [poster](https://github.com/PUC-RecSys-Class/proyecto-mind/blob/master/Poster/poster.pptx.svg) | [paper](https://github.com/PUC-RecSys-Class/proyecto-mind/blob/master/Paper/mind.pdf) | 145 | | 22 | Recomendar composiciones de imagenes de manera personalizada.
Ramirez | [poster]() | [paper](https://github.com/PUC-RecSys-Class/proyecto-www/blob/main/Paper_Final_SysRec.pdf) | 146 | 147 | 148 | 149 | 169 | 170 | ### Planificación y Evaluaciones 171 | 172 | **MES 1** En las primeras semanas nos enfocaremos en métodos básicos para hacer recomendación usando y prediciendo ratings (filtrado colaborativo User-based & item-based, slope-one). Luego veremos métodos de factorización matricial para ratings y para feedback implícito. En la 3ra semana veremos formas adicionales de evaluar más alla de la métricas de error de predicción de rating (MAE, MSE, RMSE) e incorporaremos métricas para evaluar listas de ítems (precision, recall, MAP, P@n, nDCG). Veremos métodos basados en contenido y sistemas híbridos. 173 | 174 | **MES 2** Métodos basados en contexto, máquinas de factorización y modelos fundamentales de deep learning para recomendación. Recapitulación de las tareas de recomendacion (predecir rating, predecir una lista de items, recomendar una secuencia, recomendación TopN) y de su evaluacion considerando diversidad, novedad, coverage, y otras métricas. 175 | 176 | **MES 3** User-centric RecSys, FAT (Fairness, Accountability and Transparency), Aplicaciones de Deep learning para problemas más específicos: recomendación de ropa, multimedia, etc. Modelos profundos generativos para recomendación. Revisaremos problemas de recomendación aún no resueltos en el área. 177 | 178 | **MES 4** Principalmente presentaciones de alumnos. 179 | 180 | ### Código de Honor 181 | 182 | Este curso adscribe el Código de Honor establecido por la Escuela de Ingeniería el que es vinculante. Todo trabajo evaluado en este curso debe ser propio. En caso de que exista colaboración permitida con otros estudiantes, el trabajo deberá referenciar y atribuir correctamente dicha contribución a quien corresponda. Como estudiante es su deber conocer la versión en línea del Código de Honor 183 | 184 | ### Evaluaciones 185 | 186 | Detalles de las evaluaciones en [esta presentacion](https://docs.google.com/presentation/d/1cuLgkwgrYTrJc-fuE8mSq0DWYUX82ockdMxCyLAJn6Y/edit#slide=id.g15879d578a_0_66). 187 | 188 | **Tarea 1** 189 | 190 | Al final de las primeras 4 semanas, las(los) estudiantes implementarán mecanismos de recomendación para predecir ratings y para rankear items en un dataset que se entregará durante clases. Usarán la biblioteca pyreclab para los métodos básicos, pero si quieren optar a la nota máxima debe hacer un sistema híbrido o contextual que utilice información de contenido, como texto o imágenes. Para tener una idea de qué se trata la tarea, pueden revisar el [enunciado de la tarea](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/tareas/Tarea_1_RecSys_2021_2.pdf) 191 | 192 | 193 | 194 | **Lecturas: Blog y Presentación** 195 | 196 | Fecha de revisión de blogs: El post de la semana x, tiene fecha de entrega el lunes a las 20:00 de la semana x+1. Ejemplo: Las lecturas de la semana 1 (clases el 17 y 19 de agosto) se entregan a más tardar el lunes 23 de agosto de 2021 a las 20:00. 197 | 198 | Cada alumno tendrá un repositorio en github (debe indicarlo en [este formulario](https://forms.gle/5L9T1TDy8ZsZqgWh7)) donde escribirá en markdown sus comentarios respecto de los papers indicados como obligatorios. No es necesario hacer un resumen largo del paper, sino indicar un resumen corto, puntos que pueden abrir discusión, mejoras o controversias: Evaluación inadecuada, parámetros importantes no considerados, potenciales mejoras de los algoritmos, fuentes de datos que podían mejorar los resultados, etc. 199 | 200 | Adicionalmente, cada alumno presentará al menos una vez durante el semestre un paper sobre un tópico, con el objetivo de abrir una discusión sobre el tema durante la clase. 201 | 202 | **Proyecto Final** 203 | 204 | Durante septiembre, las(los) estudiantes enviarán una idea de proyecto final, la cual desarrollarán durante octubre y noviembre. Enviarán un informe de avance a fines de octubre, para hacer una presentación de su proyecto al final del curso en una sesión de posters. 205 | 206 | ## Planificación general (sujeta a actualización) 207 | 208 | (actualizada el 17 de agosto de 2021) 209 | 210 | ![Planificacion RecSys 2021](https://user-images.githubusercontent.com/37160854/134259748-be5f1ab5-3e1c-4a42-a5d6-b75c4627945c.png) 211 | 212 | ## Lecturas por Semana 213 | 214 | Para descargar los archivos se sugiere buscarlos en [Scholar](https://scholar.google.cl) o a través de [EZProxy](https://login.pucdechile.idm.oclc.org/) 215 | 216 | ### Semana 1 (entrega el 23 de agosto): 217 | 218 | **Obligatorias** 219 | * Schafer, J. B., Frankowski, D., Herlocker, J., & Sen, S. (2007). Collaborative filtering recommender systems. In The adaptive web (pp. 291-324). Springer Berlin Heidelberg. [pdf](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.130.4520&rep=rep1&type=pdf) 220 | 221 | **Sugeridas** 222 | * Sarwar, B., Karypis, G., Konstan, J., & Riedl, J. (2001). Item-based collaborative filtering recommendation algorithms. In Proceedings of the 10th international conference on World Wide Web (pp. 285-295). 223 | * [Post original FunkSVD](https://sifter.org/~simon/journal/20061211.html) 224 | * Lemire, D., & Maclachlan, A. (2005). Slope One Predictors for Online Rating-Based Collaborative Filtering. In SDM (Vol. 5, pp. 1-5). 225 | * Kluver, D., Ekstrand, M. D., & Konstan, J. A. (2018). Rating-based collaborative filtering: algorithms and evaluation. Social Information Access, 344-390. 226 | 227 | ### Semana 2 (entrega el 30 de agosto): 228 | 229 | **Obligatorias** 230 | * Koren, Y., Bell, R., & Volinsky, C. (2009). Matrix factorization techniques for recommender systems. Computer, 42(8), 30-37. [pdf](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.441.3234&rep=rep1&type=pdf) 231 | 232 | **Sugeridas** 233 | * Hu, Y., Koren, Y., & Volinsky, C. (2008). Collaborative filtering for implicit feedback datasets. In Data Mining, 2008. ICDM’08. Eighth IEEE International Conference on (pp. 263-272). IEEE. 234 | * Takács, G., Pilászy, I., Németh, B., & Tikk, D. (2009). Scalable collaborative filtering approaches for large recommender systems. Journal of machine learning research, 10(Mar), 623-656. 235 | * Rendle, S., Freudenthaler, C., Gantner, Z., & Schmidt-Thieme, L. (2009). BPR: Bayesian personalized ranking from implicit feedback. In Proceedings of the Twenty-Fifth Conference on Uncertainty in Artificial Intelligence (pp. 452-461). AUAI Press. 236 | * Pan, R., Zhou, Y., Cao, B., Liu, N. N., Lukose, R., Scholz, M., & Yang, Q. (2008). One-class collaborative filtering. In 2008 Eighth IEEE International Conference on Data Mining (pp. 502-511). IEEE. En este artículo aparecen la derivación y reglas de actualización de los parámetros así como las nociones de AMAN y AMAU. 237 | * Jannach, D., Lerche, L., & Zanker, M. (2018). Recommending based on implicit feedback. In Social Information Access (pp. 510-569). Springer, Cham. 238 | * Srebro, N., & Jaakkola, T. (2003). Weighted low-rank approximations. In Proceedings of the 20th International Conference on Machine Learning (ICML-03) (pp. 720-727). Artículo citado por Pan et al. (2008) indicando detalles de la versión no regularizada que inspira OCCF. 239 | * El siguiente paper es opcional, pero permite entender cómo se deriva e del paper de Hu et al.: Takács, G., Pilászy, I., & Tikk, D. (2011). Applications of the conjugate gradient method for implicit feedback collaborative filtering. In Proceedings of the fifth ACM conference on Recommender systems (pp. 297-300). ACM. 240 | * Verstrepen, K., Bhaduriy, K., Cule, B., & Goethals, B. (2017). Collaborative filtering for binary, positiveonly data. ACM Sigkdd Explorations Newsletter, 19(1), 1-21. 241 | 242 | ### Semana 3 (entrega el 6 de septiembre): 243 | 244 | **Obligatorias** 245 | * Guy, S., & Gunawardana, A.. (2011) “Evaluating recommendation systems.” In Recommender systems handbook, pp. 257-297. Springer US, 2011. [pdf](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.4138&rep=rep1&type=pdf) 246 | 247 | **Sugeridas** 248 | * Herlocker, J. L., Konstan, J. A., Terveen, L. G., & Riedl, J. T. (2004). Evaluating collaborative filtering recommender systems. ACM Transactions on Information Systems (TOIS), 22(1), 5-53. 249 | * Cremonesi, P., Koren, Y., & Turrin, R. (2010). Performance of recommender algorithms on top-n recommendation tasks. In Proceedings of the fourth ACM conference on Recommender systems (pp. 39-46). ACM. 250 | 251 | ### Semana 4 (entrega el 13 de septiembre): 252 | 253 | **Obligatorias** 254 | * Pazzani, M. J., & Billsus, D. (2007). Content-based recommendation systems. In The adaptive web (pp. 325-341). Springer Berlin Heidelberg. Xu, W., Liu, X., & Gong, Y. (2003).[pdf](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.448.662&rep=rep1&type=pdf) 255 | 256 | **Sugeridas** 257 | * Document clustering based on non-negative matrix factorization. In Proceedings of the 26th annual international ACM SIGIR conference on Research and development in informaion retrieval (pp. 267-273). ACM. 258 | * Messina, P., Dominguez, V., Parra, D., Trattner, C., & Soto, A. (2019). Content-based artwork recommendation: integrating painting metadata with neural and manually-engineered visual features. User Modeling and User-Adapted Interaction, 29(2), 251-290. 259 | * Celma, Ò., & Herrera, P. (2008). A new approach to evaluating novel recommendations. In Proceedings of the 2008 ACM conference on Recommender systems (pp. 179-186). 260 | * Van den Oord, A., Dieleman, S., & Schrauwen, B. (2013). Deep content-based music recommendation. In Advances in neural information processing systems (pp. 2643-2651). 261 | 262 | ### Semana 5 (entrega el 20 de septiembre): 263 | 264 | **Obligatorias (esta semana se puede elegir una de las dos para entregar*)** 265 | * Adomavicius, G., Mobasher, B., Ricci, F. and Tuzhilin, A. (2011). Context-Aware Recommender Systems. AI Magazine, 32(3), 67-80. 266 | * Jahrer, M., Töscher, A. and Legenstein, R. (2010). Combining predictions for accurate recommender systems. In Proceedings of the 16th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, 693-702. ACM. 267 | 268 | **Sugeridas** 269 | * Pigi K., Shobeir F., James F., Magdalini E. and Lise G. (2015). HyPER: A Flexible and Extensible Probabilistic Framework for Hybrid Recommender Systems. In Proceedings of the 9th ACM Conference on Recommender Systems (RecSys '15), 99–106. ACM. 270 | * Rendle, S. (2010). Factorization machines. In 2010 IEEE International Conference on Data Mining (pp. 995-1000). IEEE. 271 | 272 | *No olvidar declarar en la crítica el título elegido. 273 | 274 | ### Semanas 6 (entrega el 27 de septiembre): 275 | 276 | **Obligatorias** 277 | * Hasta la sección 3.4 (incluyendo 3.4): Zhang, S., Yao, L., Sun, A., & Tay, Y. (2019). Deep learning based recommender system: A survey and new perspectives. ACM Computing Surveys (CSUR), 52(1), 1-38. 278 | 279 | **Sugeridas** 280 | * Covington, P., Adams, J., & Sargin, E. (2016). Deep neural networks for youtube recommendations. In Proceedings of the 10th ACM conference on recommender systems (pp. 191-198). 281 | * Bansal, T., Belanger, D., & McCallum, A. (2016). Ask the gru: Multi-task learning for deep text recommendations. In Proceedings of the 10th ACM Conference on Recommender Systems (pp. 107-114). 282 | 283 | ### Semana 7 (entrega el 4 de octubre): 284 | 285 | **Obligatorias** 286 | * He, C., Parra, D., & Verbert, K. (2016). Interactive recommender systems: A survey of the state of the art and future research challenges and opportunities. Expert Systems with Applications, 56, 9-27. 287 | 288 | **Sugeridas** 289 | * Bostandjiev, S., O'Donovan, J., & Höllerer, T. (2012). TasteWeights: a visual interactive hybrid recommender system. In Proceedings of the sixth ACM conference on Recommender systems (pp. 35-42). 290 | * Knijnenburg, B., Bostandjiev, S., O'Donovan, J., and Kobsa, A. (2012). Inspectability and control in social recommenders. RecSys'12 - Proceedings of the 6th ACM Conference on Recommender Systems. 291 | * Pu, P., Chen, L. and Hu, R. (2011). A user-centric evaluation framework for recommender systems. RecSys'11 - Proceedings of the 5th ACM Conference on Recommender Systems. 157-164. 292 | * Parra, D., Brusilovsky, P., and Trattner, C. (2014). See What You Want to See: Visual User-Driven Approach for Hybrid Recommendation. International Conference on Intelligent User Interfaces, Proceedings IUI. 293 | * Andjelkovic, I., Parra, D., & O’Donovan, J. (2019). Moodplay: Interactive music recommendation based on Artists’ mood similarity. International Journal of Human-Computer Studies, 121, 142-159. 294 | 295 | 296 | 328 | -------------------------------------------------------------------------------- /Tarea_1_RecSys_2021_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/Tarea_1_RecSys_2021_2.pdf -------------------------------------------------------------------------------- /clases/s1-c2-SlopeOne.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s1-c2-SlopeOne.pdf -------------------------------------------------------------------------------- /clases/s1-c2-UBCF_clustering.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s1-c2-UBCF_clustering.pdf -------------------------------------------------------------------------------- /clases/s1-c2-nonpers-UBCF.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s1-c2-nonpers-UBCF.pdf -------------------------------------------------------------------------------- /clases/s10_c1_activelearning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s10_c1_activelearning.pdf -------------------------------------------------------------------------------- /clases/s10_c1_activelearning_v3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s10_c1_activelearning_v3.pdf -------------------------------------------------------------------------------- /clases/s11_c1_deep_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s11_c1_deep_learning.pdf -------------------------------------------------------------------------------- /clases/s11_c2_secu_deep_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s11_c2_secu_deep_learning.pdf -------------------------------------------------------------------------------- /clases/s12_c1_deep_learning_s.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s12_c1_deep_learning_s.pdf -------------------------------------------------------------------------------- /clases/s12_c2_10bigproblems-recsys-small.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s12_c2_10bigproblems-recsys-small.pdf -------------------------------------------------------------------------------- /clases/s2_c1-IBCF.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s2_c1-IBCF.pdf -------------------------------------------------------------------------------- /clases/s2_c2-Factorizacion_matricial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s2_c2-Factorizacion_matricial.pdf -------------------------------------------------------------------------------- /clases/s3_c1-Implicit-feedback.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s3_c1-Implicit-feedback.pdf -------------------------------------------------------------------------------- /clases/s3_c2-BPR.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s3_c2-BPR.pdf -------------------------------------------------------------------------------- /clases/s4_c1-metricas.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s4_c1-metricas.pdf -------------------------------------------------------------------------------- /clases/s4_c2-tests_estadisticos.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s4_c2-tests_estadisticos.pdf -------------------------------------------------------------------------------- /clases/s5_c1-content.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s5_c1-content.pdf -------------------------------------------------------------------------------- /clases/s5_c1_p2-content.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s5_c1_p2-content.pdf -------------------------------------------------------------------------------- /clases/s5_c2-content.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s5_c2-content.pdf -------------------------------------------------------------------------------- /clases/s6_c1-contexto.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s6_c1-contexto.pdf -------------------------------------------------------------------------------- /clases/s6_c1-hibridos.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s6_c1-hibridos.pdf -------------------------------------------------------------------------------- /clases/s6_c2_p2-FMachines.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s6_c2_p2-FMachines.pdf -------------------------------------------------------------------------------- /clases/s6_c2_p3-blending_ensemble.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s6_c2_p3-blending_ensemble.pdf -------------------------------------------------------------------------------- /clases/s7_c1_deep_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s7_c1_deep_learning.pdf -------------------------------------------------------------------------------- /clases/s7_c2_deep_learning_s.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s7_c2_deep_learning_s.pdf -------------------------------------------------------------------------------- /clases/s7_c2_secu_deep_learning.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s7_c2_secu_deep_learning.pdf -------------------------------------------------------------------------------- /clases/s9_c1_usercentric.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s9_c1_usercentric.pdf -------------------------------------------------------------------------------- /clases/s9_c2_FATv2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/clases/s9_c2_FATv2.pdf -------------------------------------------------------------------------------- /posters/A1-andrade_dominguez_pattillo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/A1-andrade_dominguez_pattillo.jpg -------------------------------------------------------------------------------- /posters/A3-olguin_lopez_ibarra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/A3-olguin_lopez_ibarra.png -------------------------------------------------------------------------------- /posters/A4_donosoguzman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/A4_donosoguzman.png -------------------------------------------------------------------------------- /posters/A5-codoceo_escudero_torres.pptx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/A5-codoceo_escudero_torres.pptx.png -------------------------------------------------------------------------------- /posters/A6-Contreras_Molina_Stambuk.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/A6-Contreras_Molina_Stambuk.png -------------------------------------------------------------------------------- /posters/A7-ovalle_valdes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/A7-ovalle_valdes.png -------------------------------------------------------------------------------- /posters/N2-cartegana_huerfano_toscano.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/N2-cartegana_huerfano_toscano.png -------------------------------------------------------------------------------- /posters/N3-Valencia-González.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/N3-Valencia-González.png -------------------------------------------------------------------------------- /posters/N4_castro_casassus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/N4_castro_casassus.png -------------------------------------------------------------------------------- /posters/N5-Labarca_Fuentes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/N5-Labarca_Fuentes.png -------------------------------------------------------------------------------- /posters/N6-Aguilera_Everke.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/N6-Aguilera_Everke.png -------------------------------------------------------------------------------- /posters/N7-suarez_carreno_alipanah.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/N7-suarez_carreno_alipanah.png -------------------------------------------------------------------------------- /posters/N8-Guinez_Ruiz_Sanchez.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/N8-Guinez_Ruiz_Sanchez.png -------------------------------------------------------------------------------- /posters/V1-salinas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V1-salinas.png -------------------------------------------------------------------------------- /posters/V2-waugh_hanuch_ricke.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V2-waugh_hanuch_ricke.jpg -------------------------------------------------------------------------------- /posters/V3_duarte_lopez_rodriguez.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V3_duarte_lopez_rodriguez.png -------------------------------------------------------------------------------- /posters/V4-diaz_vinay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V4-diaz_vinay.png -------------------------------------------------------------------------------- /posters/V5-alliende.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V5-alliende.png -------------------------------------------------------------------------------- /posters/V6_perez_ramos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V6_perez_ramos.png -------------------------------------------------------------------------------- /posters/V7-tapia_villagran.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V7-tapia_villagran.png -------------------------------------------------------------------------------- /posters/V8-biskupovic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/posters/V8-biskupovic.png -------------------------------------------------------------------------------- /practicos/FastFM_factorization_machines.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "# Práctico librería fastFM - Factorization Machines\n", 18 | "\n", 19 | "Clase: IIC3633 Sistemas Recomendadores, PUC Chile\n", 20 | "\n", 21 | "En este práctico vamos a utilizar la biblioteca de Python [fastFM](https://github.com/ibayer/fastFM) para recomendación utilizando máquinas de factorización. \n", 22 | "\n", 23 | "En este caso utilizaremos un dataset de cervezas, donde además de incluir interacciones de usuarios con los items agregaremos feature de tipo de cerveza. (style-id)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 4, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "name": "stdout", 33 | "output_type": "stream", 34 | "text": [ 35 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 36 | " Dload Upload Total Spent Left Speed\n", 37 | "100 388 0 388 0 0 388 0 --:--:-- --:--:-- --:--:-- 388\n", 38 | "100 775k 100 775k 0 0 525k 0 0:00:01 0:00:01 --:--:-- 525k\n" 39 | ] 40 | } 41 | ], 42 | "source": [ 43 | "!curl -L -o \"beer_data.base\" \"https://docs.google.com/uc?export=download&id=1yp9UpqPCESNySlWlDoSEau5aBNKx0nYB\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "\u001b[33mThe directory '/Users/andrescarvallo/Library/Caches/pip/http' or its parent directory is not owned by the current user and the cache has been disabled. Please check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\u001b[0m\n", 56 | "\u001b[33mThe directory '/Users/andrescarvallo/Library/Caches/pip' or its parent directory is not owned by the current user and caching wheels has been disabled. check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.\u001b[0m\n", 57 | "Requirement already satisfied: fastFM in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages\n", 58 | "Requirement already satisfied: cython in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 59 | "Requirement already satisfied: scikit-learn in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 60 | "Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 61 | "Requirement already satisfied: numpy in /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages (from fastFM)\n", 62 | "\u001b[33mYou are using pip version 9.0.1, however version 20.2.2 is available.\n", 63 | "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" 64 | ] 65 | } 66 | ], 67 | "source": [ 68 | "!pip3 install fastFM" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 6, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "import numpy as np\n", 78 | "import pandas as pd\n", 79 | "import fastFM\n", 80 | "from fastFM.datasets import make_user_item_regression\n", 81 | "from sklearn.model_selection import train_test_split\n", 82 | "from fastFM import sgd\n", 83 | "from fastFM import als\n", 84 | "from sklearn.metrics import mean_squared_error\n", 85 | "import matplotlib.pyplot as plt\n", 86 | "from scipy.sparse import csc_matrix\n", 87 | "from fastFM import mcmc\n", 88 | "import functools as fct\n", 89 | "import itertools as itools\n", 90 | "import random, scipy" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "# Antes de recomendar hacemos un analisis de los datos " 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 7, 103 | "metadata": {}, 104 | "outputs": [ 105 | { 106 | "data": { 107 | "text/html": [ 108 | "
\n", 109 | "\n", 122 | "\n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | "
userIDitemIDstyleIDrating
049241175711994.5
14924544111994.5
249241996011995.0
329165590011992.5
4291657110148794.0
\n", 170 | "
" 171 | ], 172 | "text/plain": [ 173 | " userID itemID styleID rating\n", 174 | "0 4924 11757 1199 4.5\n", 175 | "1 4924 5441 1199 4.5\n", 176 | "2 4924 19960 1199 5.0\n", 177 | "3 2916 55900 1199 2.5\n", 178 | "4 2916 57110 14879 4.0" 179 | ] 180 | }, 181 | "execution_count": 7, 182 | "metadata": {}, 183 | "output_type": "execute_result" 184 | } 185 | ], 186 | "source": [ 187 | "df = pd.read_csv('beer_data.base', sep=',',encoding='latin-1')\n", 188 | "df.head()" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 8, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "Num. of items: 1836\n", 201 | "Num. of users: 8320\n", 202 | "Num. of ratings: 44379\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "num_of_items = len(df['itemID'].unique().tolist())\n", 208 | "num_of_users = len(df['userID'].unique().tolist())\n", 209 | "num_of_ratings = len(df['userID'])\n", 210 | "\n", 211 | "print('Num. of items: {}\\nNum. of users: {}\\nNum. of ratings: {}'.format(num_of_items, num_of_users, num_of_ratings))" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 11, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "count 44379.000000\n", 223 | "mean 3.865105\n", 224 | "std 0.712633\n", 225 | "min 0.000000\n", 226 | "25% 3.500000\n", 227 | "50% 4.000000\n", 228 | "75% 4.500000\n", 229 | "max 5.000000\n", 230 | "Name: rating, dtype: float64" 231 | ] 232 | }, 233 | "execution_count": 11, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "# rating promedio \n", 240 | "df.describe()['rating']" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 17, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "11757 2206\n", 252 | "19960 1681\n", 253 | "16074 1260\n", 254 | "5441 1253\n", 255 | "429 1183\n", 256 | "Name: itemID, dtype: int64" 257 | ] 258 | }, 259 | "execution_count": 17, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "# items que han recibido mas ratings\n", 266 | "df.itemID.value_counts().head()" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 18, 272 | "metadata": {}, 273 | "outputs": [ 274 | { 275 | "data": { 276 | "text/plain": [ 277 | "13 181\n", 278 | "24 129\n", 279 | "490 115\n", 280 | "100 111\n", 281 | "695 106\n", 282 | "Name: userID, dtype: int64" 283 | ] 284 | }, 285 | "execution_count": 18, 286 | "metadata": {}, 287 | "output_type": "execute_result" 288 | } 289 | ], 290 | "source": [ 291 | "# usuarios que han dado mas rating \n", 292 | "df.userID.value_counts().head()" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": 19, 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "1199 17400\n", 304 | "394 3584\n", 305 | "14879 2656\n", 306 | "263 2104\n", 307 | "3268 1503\n", 308 | "Name: styleID, dtype: int64" 309 | ] 310 | }, 311 | "execution_count": 19, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "# estilos que han recibido más ratings \n", 318 | "df.styleID.value_counts().head()" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "# Convertir a formato fastFM" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "## funciones " 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 23, 338 | "metadata": {}, 339 | "outputs": [], 340 | "source": [ 341 | "def get_single_entries_in_fm_input_format(data, itemlist):\n", 342 | " \n", 343 | " '''Cree el formato de entrada necesario (datos, (fila, columna)) para la matriz csc para\n", 344 | " las entradas individuales en los datos. Cada entrada ocuparía una fila. Esto significa que\n", 345 | " daría como resultado una matriz csc con dimensión (| datos | x | lista de elementos |).\n", 346 | " '''\n", 347 | " \n", 348 | " column = len(itemlist)\n", 349 | " row = len(data)\n", 350 | " shape = (row, column)\n", 351 | "\n", 352 | " row_inds = np.zeros(len(data), dtype=np.int)\n", 353 | " col_inds = np.zeros(len(data), dtype=np.int)\n", 354 | " datalist = np.zeros(len(data), dtype=np.float)\n", 355 | " \n", 356 | " for i in range(len(data)):\n", 357 | " item = data[i]\n", 358 | " val = 1\n", 359 | " datalist[i] = val\n", 360 | " \n", 361 | " # ubica su posición en la lista de elementos, arroja un error si el elemento no es un\n", 362 | " # artículo posible\n", 363 | " col_ind = np.where(itemlist==item)[0]\n", 364 | " \n", 365 | " # no deben ser elementos duplicados en la lista de elementos\n", 366 | " assert len(col_ind) == 1\n", 367 | " col_ind = col_ind[0]\n", 368 | " row_ind = i\n", 369 | "\n", 370 | " col_inds[i] = col_ind\n", 371 | " row_inds[i] = row_ind\n", 372 | "\n", 373 | " return datalist, row_inds, col_inds, shape\n", 374 | "\n", 375 | "\n", 376 | "def get_multi_entries_in_fm_input_format(data, itemlist, norm_func=None):\n", 377 | " \n", 378 | " '''Cree el formato de entrada necesario (datos, (fila, columna)) para la matriz csc para\n", 379 | " las entradas múltiples en los datos. Cada conjunto de entradas múltiples ocuparía una fila.\n", 380 | " Esto significa que daría como resultado una matriz csc con dimensión\n", 381 | " (| conjuntos de entradas en datos | x | lista de elementos |).\n", 382 | " '''\n", 383 | " \n", 384 | " column = len(itemlist)\n", 385 | " \n", 386 | " # número de conjuntos de entradas en los datos\n", 387 | " row = len(data)\n", 388 | " shape = (row, column)\n", 389 | "\n", 390 | " # numero de datos \n", 391 | " num_of_data = fct.reduce(lambda x, y: x + len(y), data, 0)\n", 392 | " row_inds = np.zeros(num_of_data, dtype=np.int)\n", 393 | " col_inds = np.zeros(num_of_data, dtype=np.int)\n", 394 | " datalist = np.zeros(num_of_data, dtype=np.float)\n", 395 | " cnt = 0\n", 396 | " for i in range(len(data)):\n", 397 | " multi_entry = data[i]\n", 398 | "\n", 399 | " if norm_func != None:\n", 400 | " # función que recibe el tamaño del multi_entry para decidir cómo normalizarlo\n", 401 | " val = norm_func(len(multi_entry))\n", 402 | " else:\n", 403 | " # asignación de valor binario por defecto\n", 404 | " val = 1 if len(multi_entry) > 0 else 0\n", 405 | "\n", 406 | " # para cada entrada en multi_entry, ubique su posición en la lista de elementos,\n", 407 | " # arroja error si el elemento no es un elemento posible\n", 408 | " # todas las entradas permanecen en la misma fila\n", 409 | " row_ind = i\n", 410 | " for item in multi_entry:\n", 411 | " col_ind = np.where(itemlist==item)[0]\n", 412 | " assert len(col_ind) == 1\n", 413 | " col_ind = col_ind[0]\n", 414 | " \n", 415 | " datalist[cnt] = val\n", 416 | " col_inds[cnt] = col_ind\n", 417 | " row_inds[cnt] = row_ind\n", 418 | " \n", 419 | " # actualiza contador\n", 420 | " cnt += 1\n", 421 | "\n", 422 | " return datalist, row_inds, col_inds, shape\n" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": {}, 428 | "source": [ 429 | "## conversion de los datos " 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 24, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "name": "stdout", 439 | "output_type": "stream", 440 | "text": [ 441 | "User feature set shape: (44379, 8320)\n", 442 | "Item feature set shape: (44379, 1836)\n", 443 | "Style feature set shape: (44379, 210)\n", 444 | "Dimension of FM input: (44379, 10366)\n" 445 | ] 446 | } 447 | ], 448 | "source": [ 449 | "beerlist = df.sort_values('itemID')['itemID'].unique()\n", 450 | "userlist = df.sort_values('userID')['userID'].unique()\n", 451 | "stylelist = df.sort_values('styleID')['styleID'].unique()\n", 452 | "\n", 453 | "# usuarios que dieron ratings \n", 454 | "user_data = df['userID'].values\n", 455 | "\n", 456 | "# items que recibieron ratings\n", 457 | "beer_data = df['itemID'].values\n", 458 | "\n", 459 | "# data de estilo de cerveza \n", 460 | "styles_data = df['styleID'].values\n", 461 | "\n", 462 | "# target vector: ratings\n", 463 | "rating_data = df['rating'].values\n", 464 | "\n", 465 | "\n", 466 | "# convertir a formato fastFM utilizando funciones de arriba \n", 467 | "user_datalist, user_row_inds, user_col_inds, user_shape = get_single_entries_in_fm_input_format(data=user_data, \n", 468 | " itemlist=userlist)\n", 469 | "\n", 470 | "beer_datalist, beer_row_inds, beer_col_inds, beer_shape = get_single_entries_in_fm_input_format(data=beer_data,\n", 471 | " itemlist=beerlist)\n", 472 | "\n", 473 | "style_datalist, style_row_inds, style_col_inds, style_shape = get_single_entries_in_fm_input_format(data=styles_data,\n", 474 | " itemlist=stylelist)\n", 475 | "\n", 476 | "# Concatena las dos columnas cambiando los índices de las columnas relacionadas con beer.\n", 477 | "# cambiar por el número de columnas en las columnas de usuario\n", 478 | "shift_by = len(userlist)\n", 479 | "beer_col_inds += shift_by\n", 480 | "beer_col_inds += shift_by\n", 481 | "\n", 482 | "# concatena los datos (agregamos item_styles)\n", 483 | "datalist = np.append(user_datalist, [beer_datalist, style_datalist])\n", 484 | "row_inds = np.append(user_row_inds, [beer_row_inds, style_row_inds])\n", 485 | "col_inds = np.append(user_col_inds, [beer_col_inds,style_col_inds])\n", 486 | "\n", 487 | "# asegúrese de que ambos conjuntos de características tengan el mismo número de filas\n", 488 | "print('User feature set shape: {}\\nItem feature set shape: {}\\nStyle feature set shape: {}'.format(user_shape, beer_shape, style_shape))\n", 489 | "\n", 490 | "assert user_shape[0] == beer_shape[0]\n", 491 | "shape = (user_shape[0], user_shape[0] + beer_shape[0] + style_shape[0])\n", 492 | "print('Dimension of FM input: {}'.format(shape))\n", 493 | "\n", 494 | "X = csc_matrix((datalist, (row_inds, col_inds)), shape=shape)\n", 495 | "y = rating_data" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 25, 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [ 504 | "# split train y test\n", 505 | "X_train, X_test, y_train, y_test = train_test_split(X, y)\n" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 26, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "name": "stdout", 515 | "output_type": "stream", 516 | "text": [ 517 | "CPU times: user 18.1 s, sys: 117 ms, total: 18.2 s\n", 518 | "Wall time: 18.2 s\n" 519 | ] 520 | } 521 | ], 522 | "source": [ 523 | "# entrenar modelo optimizando con ALS y hacer la prediccion \n", 524 | "fm = als.FMRegression(n_iter=1000, init_stdev=0.1, rank=10, l2_reg_w=0.1, l2_reg_V=0.5)\n", 525 | "fm.fit(X_train, y_train)\n", 526 | "y_pred = fm.predict(X_test)" 527 | ] 528 | }, 529 | { 530 | "cell_type": "code", 531 | "execution_count": 27, 532 | "metadata": {}, 533 | "outputs": [ 534 | { 535 | "name": "stdout", 536 | "output_type": "stream", 537 | "text": [ 538 | "Mean squared error under ALS: 0.6832341009172921\n" 539 | ] 540 | } 541 | ], 542 | "source": [ 543 | "error_als = mean_squared_error(y_test, y_pred)\n", 544 | "print('Mean squared error under ALS: {}'.format(error_als))" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 29, 550 | "metadata": {}, 551 | "outputs": [], 552 | "source": [ 553 | "# entrenar modelo optimizando con SGD y hacer la prediccion \n", 554 | "fm_sgd = sgd.FMRegression(n_iter=10000000, init_stdev=0.01, rank=10, random_state=123, \n", 555 | " l2_reg_w=0.1, l2_reg_V=0.5, step_size=0.01)\n", 556 | "fm_sgd.fit(X_train, y_train)\n", 557 | "y_pred_sgd = fm_sgd.predict(X_test)" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 30, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "name": "stdout", 567 | "output_type": "stream", 568 | "text": [ 569 | "Mean squared error under SGD: 0.45126971767960844\n" 570 | ] 571 | } 572 | ], 573 | "source": [ 574 | "error_sgd = mean_squared_error(y_test, y_pred_sgd)\n", 575 | "print('Mean squared error under SGD: {}'.format(error_sgd))" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": null, 581 | "metadata": {}, 582 | "outputs": [], 583 | "source": [] 584 | } 585 | ], 586 | "metadata": { 587 | "kernelspec": { 588 | "display_name": "Python 3", 589 | "language": "python", 590 | "name": "python3" 591 | }, 592 | "language_info": { 593 | "codemirror_mode": { 594 | "name": "ipython", 595 | "version": 3 596 | }, 597 | "file_extension": ".py", 598 | "mimetype": "text/x-python", 599 | "name": "python", 600 | "nbconvert_exporter": "python", 601 | "pygments_lexer": "ipython3", 602 | "version": "3.6.0" 603 | }, 604 | "toc": { 605 | "base_numbering": 1, 606 | "nav_menu": {}, 607 | "number_sections": true, 608 | "sideBar": true, 609 | "skip_h1_title": false, 610 | "title_cell": "Table of Contents", 611 | "title_sidebar": "Contents", 612 | "toc_cell": false, 613 | "toc_position": {}, 614 | "toc_section_display": true, 615 | "toc_window_display": false 616 | } 617 | }, 618 | "nbformat": 4, 619 | "nbformat_minor": 2 620 | } 621 | -------------------------------------------------------------------------------- /practicos/HT4Rec.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "HT4Rec.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | }, 17 | "accelerator": "GPU" 18 | }, 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": { 23 | "colab_type": "text", 24 | "id": "view-in-github" 25 | }, 26 | "source": [ 27 | "\"Open\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": { 33 | "id": "ZZujf16p_c2y" 34 | }, 35 | "source": [ 36 | "# H-Transformer for Item Recommendation in MOBA Games\n", 37 | "\n", 38 | "Vladimir Araujo" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "id": "ybR1dWGRAIda" 45 | }, 46 | "source": [ 47 | "## Dependencies" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "BKIBwjERa-dT" 54 | }, 55 | "source": [ 56 | "!pip install python-box\n", 57 | "!wget https://gist.githubusercontent.com/vgaraujov/47ef44430fdbcc95dcb6c87233c3ef92/raw/97c4608ee2b62c77929784e0d07e05ff27b56ee4/drive_download.py" 58 | ], 59 | "execution_count": null, 60 | "outputs": [] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "metadata": { 65 | "id": "3FBcRa_eWsTO" 66 | }, 67 | "source": [ 68 | "from pydrive.auth import GoogleAuth\n", 69 | "from pydrive.drive import GoogleDrive\n", 70 | "from google.colab import auth\n", 71 | "from oauth2client.client import GoogleCredentials\n", 72 | "# Authenticate and create the PyDrive client.\n", 73 | "# This only needs to be done once per notebook.\n", 74 | "auth.authenticate_user()\n", 75 | "gauth = GoogleAuth()\n", 76 | "gauth.credentials = GoogleCredentials.get_application_default()\n", 77 | "drive = GoogleDrive(gauth)" 78 | ], 79 | "execution_count": null, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "metadata": { 85 | "id": "PzgP0fnAa-3f" 86 | }, 87 | "source": [ 88 | "import drive_download\n", 89 | "\n", 90 | "idx = '19oln5xzNGI50KwO7kIP3HADQOeIXMW-R'\n", 91 | "drive_download.drive_download(drive, idx)" 92 | ], 93 | "execution_count": null, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "eboJpTf7fSN3" 100 | }, 101 | "source": [ 102 | "!mv drive_download/* ." 103 | ], 104 | "execution_count": null, 105 | "outputs": [] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "metadata": { 110 | "id": "gTPvgHwFQJs_" 111 | }, 112 | "source": [ 113 | "import time\n", 114 | "import os\n", 115 | "import logging\n", 116 | "import yaml\n", 117 | "from timeit import default_timer as timer\n", 118 | "\n", 119 | "## Libraries\n", 120 | "import numpy as np\n", 121 | "from box import box_from_file\n", 122 | "from pathlib import Path\n", 123 | "\n", 124 | "## Torch\n", 125 | "import torch\n", 126 | "import torch.nn as nn\n", 127 | "from torch.utils import data\n", 128 | "import torch.optim as optim\n", 129 | "\n", 130 | "## Custom Imports\n", 131 | "from logger import setup_logs\n", 132 | "from seed import set_seed\n", 133 | "from train import train, snapshot\n", 134 | "from validation import validation\n", 135 | "from dataset import DotaDataset, DataCollatorForDota\n", 136 | "from model_aux import HTransformer\n", 137 | "import losses" 138 | ], 139 | "execution_count": null, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "id": "jGJmV8y0BwdZ" 146 | }, 147 | "source": [ 148 | "## Training Model" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "metadata": { 154 | "id": "_1OG_Wy_d7Fr" 155 | }, 156 | "source": [ 157 | "############ Control Center and Hyperparameter ###############\n", 158 | "config = box_from_file(Path('config.yaml'), file_type='yaml')\n", 159 | "config.training.logging_dir = '.'\n", 160 | "config.dataset.train_data_path = '/content/training_all.pkl'\n", 161 | "config.dataset.test_data_path = '/content/testing_all.pkl'\n", 162 | "config.dataset.item_path = '/content/item_ids.csv'\n", 163 | "config.dataset.champ_path = '/content/hero_names.csv'" 164 | ], 165 | "execution_count": null, 166 | "outputs": [] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "metadata": { 171 | "id": "5Ze3ArW1k5xW" 172 | }, 173 | "source": [ 174 | "run_name = config.model.model_type + time.strftime(\"-%Y-%m-%d_%H_%M_%S\")\n", 175 | "# setup logger \n", 176 | "global_timer = timer() # global timer\n", 177 | "logger = setup_logs(config.training.logging_dir, run_name) # setup logs\n", 178 | "logger.info('### Experiment {} ###'.format(run_name))\n", 179 | "logger.info('### Hyperparameter summary below ###\\n {}'.format(config))\n", 180 | " \n", 181 | "# define if gpu or cpu\n", 182 | "use_cuda = not config.training.no_cuda and torch.cuda.is_available()\n", 183 | "device = torch.device(\"cuda\" if use_cuda else \"cpu\")\n", 184 | "logger.info('===> use_cuda is {}'.format(use_cuda))\n", 185 | "# set seed for reproducibility\n", 186 | "set_seed(config.training.seed, use_cuda)" 187 | ], 188 | "execution_count": null, 189 | "outputs": [] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "metadata": { 194 | "id": "b5_MPUo3k6m3" 195 | }, 196 | "source": [ 197 | "## Loading the dataset\n", 198 | "logger.info('===> loading train and validation dataset')\n", 199 | "train_dataset = DotaDataset(config, 'train')\n", 200 | "validation_dataset = DotaDataset(config, 'test')\n", 201 | "\n", 202 | "data_collator = DataCollatorForDota(max_length = config.dataset.max_seq_length)\n", 203 | "\n", 204 | "multiplier = torch.cuda.device_count() if not config.training.no_cuda else 1\n", 205 | "batch_size = int(config.training.batch_size*multiplier)\n", 206 | "train_loader = data.DataLoader(train_dataset,\n", 207 | " batch_size=batch_size,\n", 208 | " collate_fn=data_collator,\n", 209 | " drop_last=True\n", 210 | " )\n", 211 | "validation_loader = data.DataLoader(validation_dataset, \n", 212 | " batch_size=batch_size, # batch 1 for evaluate variable length\n", 213 | " collate_fn=data_collator,\n", 214 | " drop_last=True\n", 215 | " )\n", 216 | "\n", 217 | "config.dataset.n_items = len(train_dataset.id2item)\n", 218 | "config.dataset.n_champs = len(train_dataset.id2champ)" 219 | ], 220 | "execution_count": null, 221 | "outputs": [] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "metadata": { 226 | "id": "K06uuWT-mbAK" 227 | }, 228 | "source": [ 229 | "model = HTransformer(config=config)\n", 230 | "# move to device\n", 231 | "model.to(device)\n", 232 | "\n", 233 | "# Adam optimizer\n", 234 | "optimizer = optim.Adam(\n", 235 | " filter(lambda p: p.requires_grad, model.parameters()), \n", 236 | " lr=2e-4, betas=(0.9, 0.98), eps=1e-09, weight_decay=1e-4, amsgrad=True)\n", 237 | "if config.training.resume_name:\n", 238 | " optimizer.load_state_dict(checkpoint['optimizer'])\n", 239 | "\n", 240 | "# create loss function\n", 241 | "loss_fn = losses.LossFunction(loss_type=config.model.loss_fn)\n", 242 | " \n", 243 | "model_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n", 244 | "logger.info('### Model summary below ###\\n {}'.format(str(model)))\n", 245 | "logger.info('===> Model total parameter: {}\\n'.format(model_params))" 246 | ], 247 | "execution_count": null, 248 | "outputs": [] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "metadata": { 253 | "id": "w-RLv8sdmmbo" 254 | }, 255 | "source": [ 256 | "best_acc = 0\n", 257 | "best_loss = np.inf\n", 258 | "best_epoch = -1 \n", 259 | "step = 0\n", 260 | "initial_epoch = 1\n", 261 | " \n", 262 | "logger.info('### Training begins at epoch {} and step {} ###'.format(initial_epoch,step))\n", 263 | "for epoch in range(initial_epoch, config.training.epochs + 1):\n", 264 | " epoch_timer = timer()\n", 265 | " # Train and validate\n", 266 | " tr_acc, tr_loss, step = train(\n", 267 | " step, \n", 268 | " model, \n", 269 | " train_loader, \n", 270 | " loss_fn, \n", 271 | " device, \n", 272 | " optimizer, \n", 273 | " epoch, \n", 274 | " config.training.log_interval)\n", 275 | " \n", 276 | " if not epoch % 10: \n", 277 | " val_acc, val_loss = validation(\n", 278 | " step, \n", 279 | " model, \n", 280 | " validation_loader, \n", 281 | " loss_fn, \n", 282 | " device)\n", 283 | " # Save\n", 284 | " if val_loss < best_loss: \n", 285 | " best_loss = min(val_loss, best_loss)\n", 286 | " if torch.cuda.device_count() > 1 and not config.training.no_cuda:\n", 287 | " dict_to_save = model.module.state_dict()\n", 288 | " else:\n", 289 | " dict_to_save = model.state_dict()\n", 290 | " snapshot(config.training.logging_dir, run_name, {\n", 291 | " 'epoch': epoch,\n", 292 | " 'step_train': step,\n", 293 | " 'validation_acc': val_acc,\n", 294 | " 'validation_loss': val_loss,\n", 295 | " 'state_dict': dict_to_save,\n", 296 | " 'optimizer': optimizer.state_dict(),\n", 297 | " })\n", 298 | " best_epoch = epoch\n", 299 | "\n", 300 | " end_epoch_timer = timer()\n", 301 | " logger.info(\"#### End epoch {}/{}, elapsed time: {}\".format(epoch, config.training.epochs, end_epoch_timer - epoch_timer))\n", 302 | " \n", 303 | "## end \n", 304 | "end_global_timer = timer()\n", 305 | "logger.info(\"################## Success #########################\")\n", 306 | "logger.info(\"Total elapsed time: %s\" % (end_global_timer - global_timer))" 307 | ], 308 | "execution_count": null, 309 | "outputs": [] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": { 314 | "id": "g6vF5SRAEXGN" 315 | }, 316 | "source": [ 317 | "## Visualization" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "metadata": { 323 | "id": "7njMbj5v_8Nd" 324 | }, 325 | "source": [ 326 | "import numpy as np; np.random.seed(0)\n", 327 | "import seaborn as sns\n", 328 | "import matplotlib.pyplot as plt" 329 | ], 330 | "execution_count": null, 331 | "outputs": [] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "metadata": { 336 | "id": "kUlTVc5SB2EW" 337 | }, 338 | "source": [ 339 | "import pandas as pd\n", 340 | "\n", 341 | "def open_champs_info(champs_path):\n", 342 | " champs_df = pd.read_csv(champs_path)\n", 343 | " champs_df.drop([106, 111], inplace = True)\n", 344 | " champs_df.drop(['name'], axis = 1, inplace = True)\n", 345 | " champs_df.reset_index(drop=True, inplace=True)\n", 346 | " names = champs_df['localized_name'].tolist()\n", 347 | " dictionary = {names[i] : v for i, v in enumerate(champs_df['hero_id'].tolist())}\n", 348 | " dictionary[0] = 0\n", 349 | " reversed_dictionary = {value : key for (key, value) in dictionary.items()}\n", 350 | " return dictionary, reversed_dictionary\n", 351 | "\n", 352 | "_, mapping = open_champs_info(config.dataset.champ_path)" 353 | ], 354 | "execution_count": null, 355 | "outputs": [] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "metadata": { 360 | "id": "S9-h5i5eO4op" 361 | }, 362 | "source": [ 363 | "run_name = 'HTransformerV3-2021-10-30_04_30_23'\n", 364 | "logger.info('===> loading a checkpoint')\n", 365 | "checkpoint = torch.load('{}/{}-{}'.format(config.training.logging_dir, run_name, 'model_best.pth'))\n", 366 | "model.load_state_dict(checkpoint['state_dict'])" 367 | ], 368 | "execution_count": null, 369 | "outputs": [] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "metadata": { 374 | "id": "S4sW9jNz_qVy" 375 | }, 376 | "source": [ 377 | "validation_loader = data.DataLoader(validation_dataset, \n", 378 | " batch_size=1, # batch 1 for evaluate variable length\n", 379 | " collate_fn=data_collator,\n", 380 | " drop_last=True,\n", 381 | " shuffle=True\n", 382 | " )" 383 | ], 384 | "execution_count": null, 385 | "outputs": [] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "metadata": { 390 | "id": "k4nRAiVn_6q5" 391 | }, 392 | "source": [ 393 | "champs, items, target, attn_mask = next(iter(validation_loader))" 394 | ], 395 | "execution_count": null, 396 | "outputs": [] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "metadata": { 401 | "id": "zKaI0GLdAKR8" 402 | }, 403 | "source": [ 404 | "output, attn_1, attn_2 = model(champs.cuda(), items.cuda())" 405 | ], 406 | "execution_count": null, 407 | "outputs": [] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "metadata": { 412 | "id": "yNesJdO-AYVT" 413 | }, 414 | "source": [ 415 | "data_1=attn_1.detach().cpu().numpy()\n", 416 | "data_2=attn_2.squeeze(0).detach().cpu().numpy()" 417 | ], 418 | "execution_count": null, 419 | "outputs": [] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "metadata": { 424 | "id": "Meu8ytkJB48Q" 425 | }, 426 | "source": [ 427 | "heros = champs.detach().cpu().squeeze(0).tolist()\n", 428 | "name_heros = []\n", 429 | "for i in heros:\n", 430 | " name_heros.append(mapping[int(i)])" 431 | ], 432 | "execution_count": null, 433 | "outputs": [] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "metadata": { 438 | "id": "9Oq4kOllC1U1" 439 | }, 440 | "source": [ 441 | "fig,axn = plt.subplots(5, 1, sharex=True, sharey=True, figsize=(10,6))\n", 442 | "\n", 443 | "aux = 2\n", 444 | "for i, ax in enumerate(axn.flat):\n", 445 | " df = pd.DataFrame(data_1[i+aux], index=name_heros, columns=name_heros)\n", 446 | " df.drop(labels=name_heros[1:], axis=0, inplace=True)\n", 447 | "# ax.set_title(\"Step \"+str(i+aux))\n", 448 | " sns.heatmap(df, ax=ax, cmap=\"Blues\", cbar=True)\n", 449 | "plt.xlabel(\"Heros\")" 450 | ], 451 | "execution_count": null, 452 | "outputs": [] 453 | }, 454 | { 455 | "cell_type": "code", 456 | "metadata": { 457 | "id": "Y-9UU6SzAnbV" 458 | }, 459 | "source": [ 460 | "plt.figure(figsize=(8, 6))\n", 461 | "ax = sns.heatmap(data_2, cmap=\"Blues\")\n", 462 | "plt.xlabel(\"Sequence Step\")\n", 463 | "plt.ylabel(\"Sequence Step\")" 464 | ], 465 | "execution_count": null, 466 | "outputs": [] 467 | } 468 | ] 469 | } -------------------------------------------------------------------------------- /practicos/README.md: -------------------------------------------------------------------------------- 1 | ## Prácticos Sistemas Recomendadores IIC3633 2 | 3 | ## Material 2021 4 | 5 | A continuación encontrarán la lista de los prácticos que deberán realizar durante este semestre. 6 | 7 | **Links ayudantías grabadas** 8 | 9 | [23 de Agosto](https://drive.google.com/file/d/1lCws-nLPFlFRPL43Cn3K-w0S5bLld9Nc/view?usp=sharing) 10 | [30 de Agosto](https://drive.google.com/file/d/1epgL1GwZWijIv0Gex0IlZ0GWDYgH4O2_/view?usp=sharing) 11 | 12 | | # | Práctico | Semana | Fecha Ayudantía | Encargado | Video | 13 | |----|-------------------------------------------------------------------------------------------------------------------------------------------|--------|-----------------|--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 14 | | 1 | [Most Popular y Item Average Rating](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/practicos/pyRecLab_MostPopular.ipynb) | 2 | 23 Agosto | Jorge F. | Open Video | 15 | | 2 | [User KNN](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/practicos/pyRecLab_uKNN.ipynb) | 2 | 23 Agosto | Jorge F. | Open Video | 16 | | 3 | [Slope One](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/practicos/pyRecLab_SlopeOne.ipynb) | 2 | 23 Agosto | Francisca C. | Open Video | 17 | | 4 | [Item KNN](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/practicos/pyRecLab_iKNN.ipynb) | 3 | 30 Agosto | Jorge F. | Open Video | 18 | | 5 | [Funk SVD](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/practicos/pyRecLab_FunkSVD.ipynb) | 3 | 30 Agosto | Jorge F. | Open Video | 19 | | 6 | [Implicit Feedback y BPR](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Implicit_feedback.ipynb) | 4 | 6 Septiembre | Andrés C. | Open Video | 20 | | 7 | [Content-based (Texto)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Content_Based_texto.ipynb) | 5 | 13 Septiembre | Andrés C. | Open Video | 21 | | 8 | [Content-based (Imágenes)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Content_Based_imagenes.ipynb) | 5 | 13 Septiembre | Andrés C. | Open Video | 22 | | 9 | [CF con Deep Learning](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/MultiVAE_Practico.ipynb) | 7 | 27 Septiembre | Alvaro L. | Open Video | 23 | | 10 | [Recomendación Secuencial](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/GRU4Rec.ipynb) | 8 | 4 Octubre | Vladimir A. | Open Video | 24 | | 11 | [Reinforcement Learning](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/practicos/Reinforcement_Learning_Mabwiser.ipynb) | 9 | 25 Octubre | Alvaro L. | Open Video | 25 | | 12 | [Deep learning Avanzado (no se responde)](https://github.com/PUC-RecSys-Class/RecSysPUC-2021/blob/master/practicos/HT4Rec.ipynb) | 10 | 1 Noviembre | Vladimir A. | Open Video | 26 | 27 | ## Material 2020 28 | 29 | **Links ayudantías grabadas** 30 | 31 | [23 de Agosto](https://drive.google.com/file/d/1TGUKYi-jV7vJ5ns27pEMYyOqagQcfh_1/view?usp=sharing) 32 | 33 | [30 de Agosto](https://drive.google.com/file/d/1JsA1d5ZiS4IVNTMgDO1B2_gwnCIKhbzD/view?usp=sharing) 34 | 35 | [6 de Septiembre](https://drive.google.com/file/d/1x0iGCrcLvLBTch_cAeqnD17K3DeSKUhd/view) 36 | 37 | [13 de Septiembre ](https://drive.google.com/file/d/1LeCCUQPNkRA9RFbw4N4nnaRgr4ZKPUjD/view) 38 | 39 | [27 de Octubre ](https://drive.google.com/file/d/171kVEF-etFr2NN7VaB3pbv_l_kcr2WaK/view?usp=sharing) 40 | 41 | ------------------------------------ 42 | 43 | | # | Práctico | Semana | Fecha Ayudantía | Encargado | Video | 44 | |----|-------------------------------------------------------------------------------------------------------------------------------------------|--------|-----------------|--------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 45 | | 1 | [Most Popular y Item Average Rating](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_MostPopular.ipynb) | 1 | 17 Agosto | Andrés C. | Open Video | 46 | | 2 | [User KNN](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_uKNN.ipynb) | 1 | 17 Agosto | Manuel C. | Open Video | 47 | | 3 | [Slope One](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_SlopeOne.ipynb) | 2 | 24 Agosto | Francisca C. | Open Video | 48 | | 4 | [Item KNN](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_iKNN.ipynb) | 2 | 24 Agosto | Andrés V. | Open Video | 49 | | 5 | [Funk SVD](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/pyRecLab_FunkSVD.ipynb) | 2 | 24 Agosto | Vladimir A. | Open Video | 50 | | 6 | [Implicit Feedback y BPR](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Implicit_feedback.ipynb) | 3 | 31 Agosto | Andrés C. | Open Video | 51 | | 7 | [Content-based (Texto)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Content_Based_texto.ipynb) | 4 | 7 Septiembre | Andrés C. | Open Video | 52 | | 8 | [Content-based (Imágenes)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Content_Based_imagenes.ipynb) | 4 | 7 Septiembre | Andrés C. | Open Video | 53 | | 9 | [Factorization Machines](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/FastFM_factorization_machines.ipynb) | 5 | 14 Septiembre | Andrés C. | Open Video | 54 | | 10 | [Reinforcement Learning](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Reinforcement_Learning_Recsim.ipynb) | 10 | N/D | Manuel C. | Open Video | 55 | | 11 | [CF con Deep Learning](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/MultiVAE_Practico.ipynb) | 11 | 19 Octubre | Andrés V. | Open Video | 56 | | 12 | [Recomendación Secuencial](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/GRU4Rec.ipynb) | 12 | 26 Octubre | Vladimir A. | Open Video | 57 | | 13 | [Deep learning Avanzado (no se responde)](https://github.com/PUC-RecSys-Class/RecSysPUC-2020/blob/master/practicos/Ayudantia_TIRLol.ipynb) | 14 | 2 Noviembre | Andrés V. | Open Video | 58 | -------------------------------------------------------------------------------- /practicos/Reinforcement_Learning_Recsim.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Recsim_Tutorial_IIC3633-2_2020.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "n9PTfi4n7oS6" 20 | }, 21 | "source": [ 22 | "# Práctico Recsim\n", 23 | "\n", 24 | "Adaptado de los tutoriales disponibles en: https://github.com/google-research/recsim por Manuel Cartagena." 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "metadata": { 30 | "id": "WeT1yUJSOjDh" 31 | }, 32 | "source": [ 33 | "# Install Recsim\n", 34 | "!pip install --upgrade --no-cache-dir recsim" 35 | ], 36 | "execution_count": null, 37 | "outputs": [] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "WVOrpYsfi_kq" 43 | }, 44 | "source": [ 45 | "## Reinforcement Learning\n", 46 | "\n", 47 | "![RL setup](https://github.com/bamine/recsys-summer-school/raw/12e57cc4fd1cb26164d2beebf3ca29ebe2eab960/notebooks/images/rl-setup.png)\n", 48 | "\n", 49 | "\n", 50 | "## Tipos de interacción\n", 51 | "\n", 52 | "![texto alternativo](https://github.com/bamine/recsys-summer-school/raw/12e57cc4fd1cb26164d2beebf3ca29ebe2eab960/notebooks/images/organic-bandit.png)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "id": "5hhwKK1fOqq1" 59 | }, 60 | "source": [ 61 | "## Importar paquetes necesarios" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "metadata": { 67 | "id": "OpCXt6tkYD_w" 68 | }, 69 | "source": [ 70 | "import functools\n", 71 | "import numpy as np\n", 72 | "from gym import spaces\n", 73 | "import matplotlib.pyplot as plt\n", 74 | "from scipy import stats\n", 75 | "\n", 76 | "from recsim import document\n", 77 | "from recsim import user\n", 78 | "from recsim.choice_model import MultinomialLogitChoiceModel\n", 79 | "from recsim.simulator import environment\n", 80 | "from recsim.simulator import recsim_gym\n", 81 | "from recsim.simulator import runner_lib\n", 82 | "\n", 83 | "import tensorflow as tf\n", 84 | "tf.compat.v1.disable_eager_execution()" 85 | ], 86 | "execution_count": null, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "id": "94GLvAnijJIm" 93 | }, 94 | "source": [ 95 | "## Recsim\n", 96 | "![RecSim implementation](https://github.com/google-research/recsim/blob/master/recsim/colab/figures/simulator_implemented.png?raw=true)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": { 102 | "id": "t8cuzWjdj3oz" 103 | }, 104 | "source": [ 105 | "# Resumen\n", 106 | "\n", 107 | "Un paso en la simulación de Recsim consiste en:\n", 108 | "\n", 109 | "\n", 110 | "1. La Base de Datos de Documentos (items) provee un corpus de *D* documentos al recomendador.\n", 111 | "2. El recomendador observa los *D* documentos (y sus features) junto a las respuestas del usuario para la última recomendación. Luego hace una selección ordenada de *k* documentos para presentárselos al usuario.\n", 112 | "3. El usuario examina la lista y escoge a lo más un documento (no escoger uno es una opción). Esto genera una transición del estado del usuario. Finalmente el usuario emite una observación del documento, que en la siguiente iteración el recomendador podrá ver." 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "id": "fT2pJkTf3Io7" 119 | }, 120 | "source": [ 121 | "# Escenario de la simulación: Videos de Memes vs Educativos\n", 122 | "\n", 123 | "Los documentos de nuestro corpus corresponderan a items (en este caso videos) que se caracterizan por su grado de educativo o de meme. Documentos \"meme\" generan alto compromiso (**engagement**), pero _hipotéticamente_ el consumo a largo plazo de estos documentos lleva a disminuir la satisfacción del usuario. Por otro lado, documentus educativos generan relativamente bajo engagement, pero su consumo conlleva a una mayor satisfacción a largo plazo. Modelaremos esta propiedad de los documentos como una feature continua que puede tomar valores entre [0,1], le llamaremos Educativeness-scale. Un documento con score 1 es totalmente educativo, mientras que un document con score 0 es totalmente meme.\n", 124 | "\n", 125 | "El estado latente del usuario consiste en una variable de dimensión 1 llamada *satisfacción*. Cada vez que consume un documento \"educativo\", esta variable tiende a incrementar, y opuestamente, un documento meme tiende a disminuir la satisfacción.\n", 126 | "\n", 127 | "Al consumir un documento, el usuario emite una medida estocástica del engagement (tiempo que ve el video) sobre el documento. Este valor es proporcional a la satisfacción del usuario e inversamente proporcional a la educatividad del documento en cuestión.\n", 128 | "\n", 129 | "Por lo tanto, el objetivo es encontrar el mix óptimo de documentos para mantener el engagement del usuario por un período largo de tiempo." 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": { 135 | "id": "gDdSxkJjBmN5" 136 | }, 137 | "source": [ 138 | "## Document" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "metadata": { 144 | "id": "Ifnex8kHBrZx" 145 | }, 146 | "source": [ 147 | "### Model" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": { 153 | "id": "wsSuXHgNOyvl" 154 | }, 155 | "source": [ 156 | "Clase que define los documentos, LTS es una abreviación de Long Term Satisfaction" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "metadata": { 162 | "id": "-0zLUsmXBcM-" 163 | }, 164 | "source": [ 165 | "class LTSDocument(document.AbstractDocument):\n", 166 | " def __init__(self, doc_id, educativeness, cluster_id):\n", 167 | " self.educativeness = educativeness\n", 168 | " self.cluster_id = cluster_id\n", 169 | " # doc_id es un ID unico para el documento\n", 170 | " super(LTSDocument, self).__init__(doc_id)\n", 171 | "\n", 172 | " NUM_CLUSTERS = 4\n", 173 | "\n", 174 | " # Una observación son los valores públicos del documento\n", 175 | " def create_observation(self):\n", 176 | " return {'educativeness': np.array(self.educativeness), 'cluster_id': self.cluster_id}\n", 177 | "\n", 178 | " # El espacio de la observación utiliza la el estándar del gym de OpenAI: https://gym.openai.com/docs/#spaces\n", 179 | " @classmethod\n", 180 | " def observation_space(self):\n", 181 | " return spaces.Dict({\n", 182 | " 'educativeness': spaces.Box(shape=(1,), dtype=np.float32, low=0.0, high=1.0),\n", 183 | " 'cluster_id': spaces.Discrete(self.NUM_CLUSTERS)\n", 184 | " })\n", 185 | " \n", 186 | " # Método para definir cómo se imprime un documento\n", 187 | " def __str__(self):\n", 188 | " return \"Document {} from cluster {} with educativeness {}.\".format(self._doc_id, self.cluster_id, self.educativeness)" 189 | ], 190 | "execution_count": null, 191 | "outputs": [] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": { 196 | "id": "P7aiBraXBpH2" 197 | }, 198 | "source": [ 199 | "### Sampler" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": { 205 | "id": "SVFC_Z5oPfGL" 206 | }, 207 | "source": [ 208 | "Un Sampler es una clase que creará una instancia del objeto en cuestión, en este caso para los documentos" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "metadata": { 214 | "id": "tHM9XL-1Bc1i" 215 | }, 216 | "source": [ 217 | "class LTSDocumentSampler(document.AbstractDocumentSampler):\n", 218 | " def __init__(self, doc_ctor=LTSDocument, **kwargs):\n", 219 | " super(LTSDocumentSampler, self).__init__(doc_ctor, **kwargs)\n", 220 | " self._doc_count = 0\n", 221 | "\n", 222 | " def sample_document(self):\n", 223 | " doc_features = {}\n", 224 | " doc_features['doc_id'] = self._doc_count\n", 225 | " doc_features['educativeness'] = self._rng.random_sample()\n", 226 | " doc_features['cluster_id'] = self._rng.choice(self._doc_ctor.NUM_CLUSTERS)\n", 227 | " self._doc_count += 1\n", 228 | " return self._doc_ctor(**doc_features)" 229 | ], 230 | "execution_count": null, 231 | "outputs": [] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "id": "S02-Non-PqXD" 237 | }, 238 | "source": [ 239 | "Ejemplo de sampleo de documentos" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "metadata": { 245 | "id": "HZHSsTUPy51Y" 246 | }, 247 | "source": [ 248 | "sampler = LTSDocumentSampler()\n", 249 | "for i in range(5): print(sampler.sample_document())\n", 250 | "d = sampler.sample_document()\n", 251 | "print(\"Documents have observation space:\", d.observation_space(), \"\\n\"\n", 252 | " \"An example realization is: \", d.create_observation())" 253 | ], 254 | "execution_count": null, 255 | "outputs": [] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "id": "qW_jfP8-BxJJ" 261 | }, 262 | "source": [ 263 | "## User" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": { 269 | "id": "7fU1KAmX8GeU" 270 | }, 271 | "source": [ 272 | "El modelo de usuario para este tutorial es:\n", 273 | "* Cada usuario tiene una feature llamada net educativeness exposure ($\\text{nee}_t$), y satisfacción ($\\text{sat}_t$). Están relacionadas mediante una función logística para reflejar que la satisfacción no puede no tener un límite.\n", 274 | "$$\\text{sat}_t = \\sigma(\\tau\\cdot\\text{nee}_t),$$\n", 275 | "donde $\\tau$ es un parámetro de sensitividad específico por usuario.\n", 276 | "* Dado un slate $S$, el usuario escoge un item basado en un modelo de decisión multinomial con la educativeness como feature: $p(\\text{usuario escoja }d_i \\text{ del slate }S) \\sim e^{1-\\mathrm{educativeness}(d_i)}$\n", 277 | "* Una vez el usuario escoge un documento, la net educativeness exposure evoluciona de la manera:\n", 278 | "$$\\text{nee}_{t+1} = \\beta \\cdot \\text{nee}_t + 2(k_d - 1/2) + {\\cal N}(0, \\eta),$$\n", 279 | "donde $\\beta$ es un factor específico por usuario que llamaremos memory discount (factor de olvido), $k_d$ es la educativeness del documento escogido y $\\eta$ es ruido proveniente de una distribución normal que llamaremos innovación (innovation).\n", 280 | "* Finalmente, el usuario interactúa con el contenido escogido por $s_d$ segundos, donde $s_d$ es sacado de alguna distribución\n", 281 | "$$s_d\\sim\\log{\\cal N}(k_d\\mu_k + (1-k_d)\\mu_c, k_d\\sigma_k + (1-k_d)\\sigma_c),$$\n", 282 | "por ejemplo, una distribución log-normal con interpolando linealmente entre una respuesta puramente educativa $(\\mu_k, \\sigma_k)$ y una respuesta puramente meme $(\\mu_c, \\sigma_c)$.\n", 283 | "\n", 284 | "De acuerdo a esto, el estado de un usuario está definido por la tupla $(\\text{sat}, \\tau, \\beta, \\eta, \\mu_k, \\sigma_k, \\mu_c, \\sigma_c).$ La satisfacción es la única variable dinámica del estado.\n", 285 | "\n" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": { 291 | "id": "tAqJN4J1BzPH" 292 | }, 293 | "source": [ 294 | "### State" 295 | ] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": { 300 | "id": "Uzd5qrcdPtgw" 301 | }, 302 | "source": [ 303 | "Esta clase maneja el estado del usuario durante una simulación, tanto las variables públicas como privadas de este durante el tiempo." 304 | ] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "metadata": { 309 | "id": "wPMnZJGyBe3U" 310 | }, 311 | "source": [ 312 | "class LTSUserState(user.AbstractUserState):\n", 313 | " def __init__(self, memory_discount, sensitivity, innovation_stddev,\n", 314 | " meme_mean, meme_stddev, educ_mean, educ_stddev,\n", 315 | " net_educativeness_exposure, time_budget, observation_noise_stddev=0.1\n", 316 | " ):\n", 317 | " ## Transition model parameters\n", 318 | " self.memory_discount = memory_discount\n", 319 | " self.sensitivity = sensitivity\n", 320 | " self.innovation_stddev = innovation_stddev\n", 321 | "\n", 322 | " ## Engagement parameters\n", 323 | " self.meme_mean = meme_mean\n", 324 | " self.meme_stddev = meme_stddev\n", 325 | " self.educ_mean = educ_mean\n", 326 | " self.educ_stddev = educ_stddev\n", 327 | "\n", 328 | " ## State variables\n", 329 | " self.net_educativeness_exposure = net_educativeness_exposure\n", 330 | " self.satisfaction = 1 / (1 + np.exp(-sensitivity * net_educativeness_exposure))\n", 331 | " self.time_budget = time_budget\n", 332 | "\n", 333 | " # Noise\n", 334 | " self._observation_noise = observation_noise_stddev\n", 335 | "\n", 336 | " # Al igual que con los documentos, se retorna la observación del estado del usuario, en este caso lo único público es su satisfacción\n", 337 | " def create_observation(self):\n", 338 | " \"\"\"User's state is not observable.\"\"\"\n", 339 | " clip_low, clip_high = (-1.0 / (1.0 * self._observation_noise),\n", 340 | " 1.0 / (1.0 * self._observation_noise))\n", 341 | " noise = stats.truncnorm(\n", 342 | " clip_low, clip_high, loc=0.0, scale=self._observation_noise).rvs()\n", 343 | " noisy_sat = self.satisfaction + noise\n", 344 | " return np.array([noisy_sat,])\n", 345 | "\n", 346 | " # También hay que definir el espacio de las variables que se retornen de una observación\n", 347 | " @staticmethod\n", 348 | " def observation_space():\n", 349 | " return spaces.Box(shape=(1,), dtype=np.float32, low=-2.0, high=2.0)\n", 350 | " \n", 351 | " # Función de score para usar en el modelo de selección del usuario: en este caso el usuario tenderá a elegir más contenido de memes\n", 352 | " def score_document(self, doc_obs):\n", 353 | " return 1 - doc_obs['educativeness']\n" 354 | ], 355 | "execution_count": null, 356 | "outputs": [] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": { 361 | "id": "V21OxBX0B3nH" 362 | }, 363 | "source": [ 364 | "### Sampler" 365 | ] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": { 370 | "id": "WA92tNtyQd_t" 371 | }, 372 | "source": [ 373 | "Clase que sampleará los usuarios para la simulación, en este caso hay muchos parámetros que quedarán hardcodeados, pero se puede hacer dinámico." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "metadata": { 379 | "id": "E4NYbjnEB3Fu" 380 | }, 381 | "source": [ 382 | "class LTSStaticUserSampler(user.AbstractUserSampler):\n", 383 | " _state_parameters = None\n", 384 | "\n", 385 | " def __init__(self,\n", 386 | " user_ctor=LTSUserState,\n", 387 | " memory_discount=0.9,\n", 388 | " sensitivity=0.01,\n", 389 | " innovation_stddev=0.05,\n", 390 | " meme_mean=5.0,\n", 391 | " meme_stddev=1.0,\n", 392 | " educ_mean=4.0,\n", 393 | " educ_stddev=1.0,\n", 394 | " time_budget=60,\n", 395 | " **kwargs):\n", 396 | " self._state_parameters = {'memory_discount': memory_discount,\n", 397 | " 'sensitivity': sensitivity,\n", 398 | " 'innovation_stddev': innovation_stddev,\n", 399 | " 'meme_mean': meme_mean,\n", 400 | " 'meme_stddev': meme_stddev,\n", 401 | " 'educ_mean': educ_mean,\n", 402 | " 'educ_stddev': educ_stddev,\n", 403 | " 'time_budget': time_budget\n", 404 | " }\n", 405 | " super(LTSStaticUserSampler, self).__init__(user_ctor, **kwargs)\n", 406 | "\n", 407 | " def sample_user(self):\n", 408 | " starting_nee = ((self._rng.random_sample() - .5) *\n", 409 | " (1 / (1.0 - self._state_parameters['memory_discount'])))\n", 410 | " self._state_parameters['net_educativeness_exposure'] = starting_nee\n", 411 | " return self._user_ctor(**self._state_parameters)" 412 | ], 413 | "execution_count": null, 414 | "outputs": [] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": { 419 | "id": "7x9OsvaqB9Pg" 420 | }, 421 | "source": [ 422 | "### Response" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": { 428 | "id": "MWq6dHamQob5" 429 | }, 430 | "source": [ 431 | "Clase que define como es la respuesta de un usuario al interactuar con un documento." 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "metadata": { 437 | "id": "pwvLS9wrB5Pu" 438 | }, 439 | "source": [ 440 | "class LTSResponse(user.AbstractResponse):\n", 441 | " # The maximum degree of engagement.\n", 442 | " MAX_ENGAGEMENT_MAGNITUDE = 100.0\n", 443 | "\n", 444 | " def __init__(self, cluster_id, clicked=False, engagement=0.0):\n", 445 | " self.clicked = clicked\n", 446 | " self.engagement = engagement\n", 447 | " self.cluster_id = cluster_id\n", 448 | "\n", 449 | " # Se crea la observación: si dió o no click, cuanto tiempo vió el item y a que cluster pertenece.\n", 450 | " def create_observation(self):\n", 451 | " return {'click': int(self.clicked),\n", 452 | " 'engagement': np.array(self.engagement),\n", 453 | " 'cluster_id': self.cluster_id}\n", 454 | "\n", 455 | " # Se define el espacio de estas variables\n", 456 | " @classmethod\n", 457 | " def response_space(cls):\n", 458 | " # `engagement` feature range is [0, MAX_ENGAGEMENT_MAGNITUDE]\n", 459 | " return spaces.Dict({\n", 460 | " 'click':\n", 461 | " spaces.Discrete(2),\n", 462 | " 'engagement':\n", 463 | " spaces.Box(\n", 464 | " low=0.0,\n", 465 | " high=cls.MAX_ENGAGEMENT_MAGNITUDE,\n", 466 | " shape=tuple(),\n", 467 | " dtype=np.float32),\n", 468 | " 'cluster_id':\n", 469 | " spaces.Discrete(4)\n", 470 | " })" 471 | ], 472 | "execution_count": null, 473 | "outputs": [] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": { 478 | "id": "t_niBbg0NuId" 479 | }, 480 | "source": [ 481 | "### Model" 482 | ] 483 | }, 484 | { 485 | "cell_type": "markdown", 486 | "metadata": { 487 | "id": "32AW3hr9Q7VG" 488 | }, 489 | "source": [ 490 | "Finalmente se define el modelo del usuario, el cual se compone por las clases definidas anteriormente" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "metadata": { 496 | "id": "VfXKd4nZCDvZ" 497 | }, 498 | "source": [ 499 | "class LTSUserModel(user.AbstractUserModel):\n", 500 | " def __init__(self, slate_size, seed=0):\n", 501 | " super(LTSUserModel, self).__init__(LTSResponse, LTSStaticUserSampler(LTSUserState, seed=seed), slate_size)\n", 502 | " self.choice_model = MultinomialLogitChoiceModel({})\n", 503 | " \n", 504 | " def is_terminal(self):\n", 505 | " # Retorna un boolean si la sesión se terminó, ya que el user tiene una variable de tiempo disponible (time_budget)\n", 506 | " return self._user_state.time_budget <= 0\n", 507 | "\n", 508 | " def simulate_response(self, slate_documents):\n", 509 | " # Lista con respuestas vacías a partir del slate\n", 510 | " responses = [self._response_model_ctor(d.cluster_id) for d in slate_documents]\n", 511 | " # Se usa el choice_model del user para saber a qué documento le hace click\n", 512 | " self.choice_model.score_documents(self._user_state,\n", 513 | " [doc.create_observation() for doc in slate_documents])\n", 514 | " scores = self.choice_model.scores\n", 515 | " selected_index = self.choice_model.choose_item()\n", 516 | " # Se genera la respuesta para el item que se clickeó\n", 517 | " self.generate_response(slate_documents[selected_index],\n", 518 | " responses[selected_index])\n", 519 | " return responses\n", 520 | "\n", 521 | " def generate_response(self, doc, response):\n", 522 | " response.clicked = True\n", 523 | " # Se interpola linealmente entre meme y educativo\n", 524 | " engagement_loc = (doc.educativeness * self._user_state.meme_mean + (1 - doc.educativeness) * self._user_state.educ_mean)\n", 525 | " engagement_loc *= self._user_state.satisfaction\n", 526 | " engagement_scale = (doc.educativeness * self._user_state.meme_stddev + ((1 - doc.educativeness) * self._user_state.educ_stddev))\n", 527 | " log_engagement = np.random.normal(loc=engagement_loc,\n", 528 | " scale=engagement_scale)\n", 529 | " response.engagement = np.exp(log_engagement)\n", 530 | "\n", 531 | " # Función que hace update del estado del usuario\n", 532 | " def update_state(self, slate_documents, responses):\n", 533 | " for doc, response in zip(slate_documents, responses):\n", 534 | " if response.clicked:\n", 535 | " innovation = np.random.normal(scale=self._user_state.innovation_stddev)\n", 536 | " net_educativeness_exposure = (self._user_state.memory_discount * self._user_state.net_educativeness_exposure - 2.0 * (doc.educativeness - 0.5) + innovation)\n", 537 | " self._user_state.net_educativeness_exposure = net_educativeness_exposure\n", 538 | " satisfaction = 1 / (1.0 + np.exp(-self._user_state.sensitivity * net_educativeness_exposure))\n", 539 | " self._user_state.satisfaction = satisfaction\n", 540 | " self._user_state.time_budget -= 1\n", 541 | " return\n" 542 | ], 543 | "execution_count": null, 544 | "outputs": [] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": { 549 | "id": "FG0h-b0eqt3M" 550 | }, 551 | "source": [ 552 | "## Crear environment: parámetros\n", 553 | "* *slate_size*: Tamaño del set de items a presentar al usuario.\n", 554 | "* *num_candidates*: número de documentos presentes en la base de datos en cualquier momento de la simulación.\n", 555 | "* *resample_documents*: especifica si se vuelven a samplear los documentos desde la base de datos entre episodios de la simulación." 556 | ] 557 | }, 558 | { 559 | "cell_type": "code", 560 | "metadata": { 561 | "id": "-bUw9z6KB_QL" 562 | }, 563 | "source": [ 564 | "slate_size = 3\n", 565 | "num_candidates = 10\n", 566 | "ltsenv = environment.Environment(\n", 567 | " LTSUserModel(slate_size),\n", 568 | " LTSDocumentSampler(),\n", 569 | " num_candidates,\n", 570 | " slate_size,\n", 571 | " resample_documents=True)\n" 572 | ], 573 | "execution_count": null, 574 | "outputs": [] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": { 579 | "id": "zTzyMHe9rYj2" 580 | }, 581 | "source": [ 582 | "### Parámetro a optimizar: Engagement" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "metadata": { 588 | "id": "ODqAlrjrO2__" 589 | }, 590 | "source": [ 591 | "def clicked_engagement_reward(responses):\n", 592 | " reward = 0.0\n", 593 | " for response in responses:\n", 594 | " if response.clicked:\n", 595 | " reward += response.engagement\n", 596 | " return reward" 597 | ], 598 | "execution_count": null, 599 | "outputs": [] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "metadata": { 604 | "id": "swYsZBh7PAdm" 605 | }, 606 | "source": [ 607 | "# Instanciar environment\n", 608 | "lts_gym_env = recsim_gym.RecSimGymEnv(ltsenv, clicked_engagement_reward)" 609 | ], 610 | "execution_count": null, 611 | "outputs": [] 612 | }, 613 | { 614 | "cell_type": "code", 615 | "metadata": { 616 | "id": "aLrJ4MWSPMyO" 617 | }, 618 | "source": [ 619 | "observation_0 = lts_gym_env.reset()\n", 620 | "print('Observation 0')\n", 621 | "print('Available documents')\n", 622 | "doc_strings = ['doc_id ' + key + \" educativeness \" + str(value) for key, value\n", 623 | " in observation_0['doc'].items()]\n", 624 | "print('\\n'.join(doc_strings))\n", 625 | "print('Noisy user state observation')\n", 626 | "print(observation_0['user'])\n", 627 | "# \"Agente\" recomienda los primeros 3 documentos\n", 628 | "recommendation_slate_0 = [0, 1, 2]\n", 629 | "observation_1, reward, done, _ = lts_gym_env.step(recommendation_slate_0)\n", 630 | "print('Observation 1')\n", 631 | "print('Available documents')\n", 632 | "doc_strings = ['doc_id ' + key + \" educativeness \" + str(value) for key, value\n", 633 | " in observation_1['doc'].items()]\n", 634 | "print('\\n'.join(doc_strings))\n", 635 | "rsp_strings = [str(response) for response in observation_1['response']]\n", 636 | "print('User responses to documents in the slate')\n", 637 | "print('\\n'.join(rsp_strings))\n", 638 | "print('Noisy user state observation')\n", 639 | "print(observation_1['user'])" 640 | ], 641 | "execution_count": null, 642 | "outputs": [] 643 | }, 644 | { 645 | "cell_type": "markdown", 646 | "metadata": { 647 | "id": "6EvnGBRFqgLB" 648 | }, 649 | "source": [ 650 | "## Agent" 651 | ] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "metadata": { 656 | "id": "BII0kzpcPOJY" 657 | }, 658 | "source": [ 659 | "from recsim import agent\n", 660 | "from recsim.agents.layers.abstract_click_bandit import AbstractClickBanditLayer\n", 661 | "from recsim.agents.layers.cluster_click_statistics import ClusterClickStatsLayer" 662 | ], 663 | "execution_count": null, 664 | "outputs": [] 665 | }, 666 | { 667 | "cell_type": "markdown", 668 | "metadata": { 669 | "id": "Fmbbm7otSg42" 670 | }, 671 | "source": [ 672 | "Crearemos un agente simple que ordene los documentos de un tópico de acuerdo a su educativeness" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "metadata": { 678 | "id": "k47hd5pdqkjk" 679 | }, 680 | "source": [ 681 | "class GreedyClusterAgent(agent.AbstractEpisodicRecommenderAgent):\n", 682 | " def __init__(self, observation_space, action_space, cluster_id, pro_educ, **kwargs):\n", 683 | " del observation_space\n", 684 | " super(GreedyClusterAgent, self).__init__(action_space)\n", 685 | " self._cluster_id = cluster_id\n", 686 | " self.pro_educ = pro_educ\n", 687 | "\n", 688 | " def step(self, reward, observation):\n", 689 | " del reward\n", 690 | " my_docs = []\n", 691 | " my_doc_educativeness = []\n", 692 | " for i, doc in enumerate(observation['doc'].values()):\n", 693 | " if doc['cluster_id'] == self._cluster_id:\n", 694 | " my_docs.append(i)\n", 695 | " my_doc_educativeness.append(doc['educativeness'])\n", 696 | " if not bool(my_docs):\n", 697 | " return []\n", 698 | " # Agregamos esta variable booleana para determinar si ordena los documentos de mayor a menor o al revés (algunos agentes preferirán recomendar los memes primero)\n", 699 | " if self.pro_educ:\n", 700 | " sorted_indices = np.argsort(my_doc_educativeness)[::-1]\n", 701 | " else:\n", 702 | " sorted_indices = np.argsort(my_doc_educativeness)\n", 703 | " return list(np.array(my_docs)[sorted_indices])\n" 704 | ], 705 | "execution_count": null, 706 | "outputs": [] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "metadata": { 711 | "id": "IprHtI5TwEGm" 712 | }, 713 | "source": [ 714 | "# Obtenemos el número de tópicos disponibles\n", 715 | "num_topics = LTSDocument.observation_space()['cluster_id'].n\n", 716 | "# Creamos un agente para cada tópico\n", 717 | "base_agent_ctors = [functools.partial(GreedyClusterAgent, cluster_id=i, pro_educ=np.random.choice([True, False], 1)[0]) for i in range(num_topics)]" 718 | ], 719 | "execution_count": null, 720 | "outputs": [] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "metadata": { 725 | "id": "ZUW3In1zwYaa" 726 | }, 727 | "source": [ 728 | "# Recsim posee clases que se pueden usar como \"capas\" en keras o pytorch, aquí usamos AbstractBanditLayer que recibe un conjunto de agents que trata como arms\n", 729 | "bandit_ctor = functools.partial(AbstractClickBanditLayer, arm_base_agent_ctors=base_agent_ctors)\n", 730 | "# Otra capa que se puede usar es ClusterClickStatsLayer la cual le pasa información del número de clicks que ha hecho el usuario a cada cluster\n", 731 | "cluster_bandit = ClusterClickStatsLayer(bandit_ctor,\n", 732 | " lts_gym_env.observation_space,\n", 733 | " lts_gym_env.action_space)" 734 | ], 735 | "execution_count": null, 736 | "outputs": [] 737 | }, 738 | { 739 | "cell_type": "markdown", 740 | "metadata": { 741 | "id": "JaV0-YuFUEH1" 742 | }, 743 | "source": [ 744 | "Ejemplo de recomendación hecho por este cluster de bandits" 745 | ] 746 | }, 747 | { 748 | "cell_type": "code", 749 | "metadata": { 750 | "id": "tWqSBCxjw2IP" 751 | }, 752 | "source": [ 753 | "observation0 = lts_gym_env.reset()\n", 754 | "slate = cluster_bandit.begin_episode(observation0)\n", 755 | "print(\"Cluster bandit slate 0:\")\n", 756 | "doc_list = list(observation0['doc'].values())\n", 757 | "for doc_position in slate:\n", 758 | " print(doc_list[doc_position])" 759 | ], 760 | "execution_count": null, 761 | "outputs": [] 762 | }, 763 | { 764 | "cell_type": "markdown", 765 | "metadata": { 766 | "id": "OUA_EiXFUNPg" 767 | }, 768 | "source": [ 769 | "Agregaremos una función que toma los parámetros de la simulación y crea nuestro agente" 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "metadata": { 775 | "id": "QWO22Ldm0qYp" 776 | }, 777 | "source": [ 778 | "def create_agent(sess, environment, eval_mode, summary_writer=None):\n", 779 | " kwargs = {\n", 780 | " 'observation_space': environment.observation_space,\n", 781 | " 'action_space': environment.action_space,\n", 782 | " 'summary_writer': summary_writer,\n", 783 | " 'eval_mode': eval_mode,\n", 784 | " }\n", 785 | " return ClusterClickStatsLayer(bandit_ctor, **kwargs)" 786 | ], 787 | "execution_count": null, 788 | "outputs": [] 789 | }, 790 | { 791 | "cell_type": "markdown", 792 | "metadata": { 793 | "id": "08PkwPjI5cf8" 794 | }, 795 | "source": [ 796 | "### Entrenamiento" 797 | ] 798 | }, 799 | { 800 | "cell_type": "code", 801 | "metadata": { 802 | "id": "pPKOmb_-w4Cu" 803 | }, 804 | "source": [ 805 | "tmp_base_dir = '/tmp/recsim/'\n", 806 | "lts_gym_env.reset()\n", 807 | "runner = runner_lib.TrainRunner(\n", 808 | " base_dir=tmp_base_dir,\n", 809 | " create_agent_fn=create_agent,\n", 810 | " env=lts_gym_env,\n", 811 | " episode_log_file=\"\",\n", 812 | " max_training_steps=100,\n", 813 | " num_iterations=20)\n", 814 | "runner.run_experiment()" 815 | ], 816 | "execution_count": null, 817 | "outputs": [] 818 | }, 819 | { 820 | "cell_type": "markdown", 821 | "metadata": { 822 | "id": "oRhd4y1t1OqK" 823 | }, 824 | "source": [ 825 | "## Tensorboard" 826 | ] 827 | }, 828 | { 829 | "cell_type": "code", 830 | "metadata": { 831 | "id": "6WshLZLs1OCI" 832 | }, 833 | "source": [ 834 | "# Load the TensorBoard notebook extension\n", 835 | "%load_ext tensorboard" 836 | ], 837 | "execution_count": null, 838 | "outputs": [] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "metadata": { 843 | "id": "QPVb_LSP002c" 844 | }, 845 | "source": [ 846 | "%tensorboard --logdir=/tmp/recsim/" 847 | ], 848 | "execution_count": null, 849 | "outputs": [] 850 | }, 851 | { 852 | "cell_type": "markdown", 853 | "metadata": { 854 | "id": "BLnC2g6E5ISA" 855 | }, 856 | "source": [ 857 | "# Actividades" 858 | ] 859 | }, 860 | { 861 | "cell_type": "markdown", 862 | "metadata": { 863 | "id": "6hYgL5MN5MSq" 864 | }, 865 | "source": [ 866 | "### Actividad 1:\n", 867 | "\n", 868 | "Entrene por más episodios y describa lo que está ocurriendo con el agente y el usuario." 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "metadata": { 874 | "id": "Enu8Kf565Lfm" 875 | }, 876 | "source": [ 877 | "" 878 | ], 879 | "execution_count": null, 880 | "outputs": [] 881 | }, 882 | { 883 | "cell_type": "markdown", 884 | "metadata": { 885 | "id": "9s0gJp7s5h9a" 886 | }, 887 | "source": [ 888 | "### Actividad 2\n", 889 | "\n", 890 | "Explique con sus palabras cuál es la principal ventaja de utilizar una librería como recsim o recogym para Reinforcement Learning" 891 | ] 892 | }, 893 | { 894 | "cell_type": "code", 895 | "metadata": { 896 | "id": "nFpSI7U-5jl6" 897 | }, 898 | "source": [ 899 | "" 900 | ], 901 | "execution_count": null, 902 | "outputs": [] 903 | }, 904 | { 905 | "cell_type": "markdown", 906 | "metadata": { 907 | "id": "VlWGRYYiUsFR" 908 | }, 909 | "source": [ 910 | "### Actividad 3\n", 911 | "\n", 912 | "¿Cómo se podría mejorar la forma de modelar al usuario?" 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "metadata": { 918 | "id": "DwO4JNR8Uu_A" 919 | }, 920 | "source": [ 921 | "" 922 | ], 923 | "execution_count": null, 924 | "outputs": [] 925 | } 926 | ] 927 | } -------------------------------------------------------------------------------- /practicos/pyRecLab_SlopeOne.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "colab_type": "text", 7 | "id": "view-in-github" 8 | }, 9 | "source": [ 10 | "\"Open\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": { 16 | "colab_type": "text", 17 | "id": "view-in-youtube" 18 | }, 19 | "source": [ 20 | "\"Open\n" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "colab_type": "text", 27 | "id": "0kY7Xfgi84HC" 28 | }, 29 | "source": [ 30 | "# **Práctico Sistemas Recomendadores: pyreclab - Slope One**\n", 31 | "\n", 32 | "En este práctico seguiremos utilizando [pyreclab](https://github.com/gasevi/pyreclab), con el cual estamos aprendiendo distintas técnicas de recomendación. Seguiremos usando la misma base de datos de los prácticos anteriores, para que puedan comparar los métodos y sus implementaciones. Este práctico está acompañado de un [video comentando la actividad](https://youtu.be/A2euuevpYis).\n", 33 | "\n", 34 | "En esta oportunidad exploraremos el recomendador de Pendiente Uno o **Slope One** [1].\n", 35 | "\n", 36 | "**Adaptado y preparado por:** Francisca Cattan 📩 fpcattan@uc.cl\n", 37 | "\n", 38 | "Referencias 📖\n", 39 | "------\n", 40 | "[1] *Lemire, D., & Maclachlan, A. (2005, April). Slope One Predictors for Online Rating-Based Collaborative Filtering. In SDM (Vol. 5, pp. 1-5).*\n" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "colab_type": "text", 47 | "id": "1s6Ac_Kh9qiH" 48 | }, 49 | "source": [ 50 | "**Nombre**: completa tu nombre aquí :D" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": { 56 | "colab_type": "text", 57 | "id": "Z4Jr74pP-4gt" 58 | }, 59 | "source": [ 60 | "## Actividad 1 👓\n", 61 | "\n", 62 | "Antes de empezar con el práctico, responde la siguiente pregunta con lo visto en clases.\n", 63 | "\n", 64 | "**Pregunta:** Explique cómo funciona Slope One (como modelo teórico, no piense en la implementación). En particular explique:\n", 65 | "\n", 66 | "- Repasemos: ¿Por qué este recomendador es un algoritmo de Filtrado Colaborativo?\n", 67 | "- Este Filtrado Colaborativo, ¿está basado en el usuario o en los items? ¿Por qué?\n", 68 | "- ¿Qué datos recibe Slope One y qué hace con ellos? (qué tipo de columnas y qué calculo)\n", 69 | "- ¿Qué pasaría si se agrega un nuevo rating a la base de datos?\n", 70 | "- Opcional: ¿Cómo crees que le iría al recomendador con un usuario que acaba de entrar al sistema y ha asignado muy pocos ratings?\n", 71 | "\n", 72 | "💡 *Hint: La bibliografía todo lo puede.*\n", 73 | "\n", 74 | "**Respuesta:**\n", 75 | "\n", 76 | "\n" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": { 82 | "colab_type": "text", 83 | "id": "HaFY1qNzHyCD" 84 | }, 85 | "source": [ 86 | "# **Configuración Inicial**" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": { 92 | "colab_type": "text", 93 | "id": "MvGNsjiIIC6G" 94 | }, 95 | "source": [ 96 | "## Paso 1:\n", 97 | "Descargue directamente a Colab los archivos del dataset ejecutando las siguientes 3 celdas:\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 34, 103 | "metadata": { 104 | "colab": { 105 | "base_uri": "https://localhost:8080/", 106 | "height": 85 107 | }, 108 | "colab_type": "code", 109 | "id": "_7M_ehHq8ti2", 110 | "outputId": "407a5c44-7810-4273-c82c-c3f343621e2b" 111 | }, 112 | "outputs": [ 113 | { 114 | "name": "stdout", 115 | "output_type": "stream", 116 | "text": [ 117 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 118 | " Dload Upload Total Spent Left Speed\n", 119 | "100 388 0 388 0 0 311 0 --:--:-- 0:00:01 --:--:-- 311\n", 120 | "100 1546k 100 1546k 0 0 914k 0 0:00:01 0:00:01 --:--:-- 914k\n" 121 | ] 122 | } 123 | ], 124 | "source": [ 125 | "!curl -L -o \"u1.base\" \"https://drive.google.com/uc?export=download&id=1bGweNw7NbOHoJz11v6ld7ymLR8MLvBsA\"" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 35, 131 | "metadata": { 132 | "colab": { 133 | "base_uri": "https://localhost:8080/", 134 | "height": 85 135 | }, 136 | "colab_type": "code", 137 | "id": "CSdhurW1InVW", 138 | "outputId": "0f573bf7-7a7b-4c6d-f893-25f1d02aa149" 139 | }, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 146 | " Dload Upload Total Spent Left Speed\n", 147 | "100 388 0 388 0 0 675 0 --:--:-- --:--:-- --:--:-- 675\n", 148 | "100 385k 100 385k 0 0 434k 0 --:--:-- --:--:-- --:--:-- 94.2M\n" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "!curl -L -o \"u1.test\" \"https://drive.google.com/uc?export=download&id=1f_HwJWC_1HFzgAjKAWKwkuxgjkhkXrVg\"" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 36, 159 | "metadata": { 160 | "colab": { 161 | "base_uri": "https://localhost:8080/", 162 | "height": 85 163 | }, 164 | "colab_type": "code", 165 | "id": "QoGzmPG2InzM", 166 | "outputId": "e66b0f74-3405-4b98-b09a-94e398f89647" 167 | }, 168 | "outputs": [ 169 | { 170 | "name": "stdout", 171 | "output_type": "stream", 172 | "text": [ 173 | " % Total % Received % Xferd Average Speed Time Time Time Current\n", 174 | " Dload Upload Total Spent Left Speed\n", 175 | "100 388 0 388 0 0 556 0 --:--:-- --:--:-- --:--:-- 555\n", 176 | "100 230k 100 230k 0 0 193k 0 0:00:01 0:00:01 --:--:-- 225M\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "!curl -L -o \"u.item\" \"https://drive.google.com/uc?export=download&id=10YLhxkO2-M_flQtyo9OYV4nT9IvSESuz\"" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": { 187 | "colab_type": "text", 188 | "id": "kluVF2eJIyjq" 189 | }, 190 | "source": [ 191 | "Los archivos **u1.base** y **u1.test** tienen tuplas {usuario, item, rating, timestamp}, que es la información de preferencias de usuarios sobre películas en una muestra del dataset [movielens](https://grouplens.org/datasets/movielens/)." 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "colab_type": "text", 198 | "id": "uopKG59rJWM9" 199 | }, 200 | "source": [ 201 | "## Paso 2:\n", 202 | "\n", 203 | "Instalamos pyreclab utilizando pip." 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 37, 209 | "metadata": { 210 | "colab": { 211 | "base_uri": "https://localhost:8080/", 212 | "height": 34 213 | }, 214 | "colab_type": "code", 215 | "id": "hi9L5FZdJWfI", 216 | "outputId": "e966155f-6bef-47d1-c124-458905663b38" 217 | }, 218 | "outputs": [ 219 | { 220 | "name": "stdout", 221 | "output_type": "stream", 222 | "text": [ 223 | "Requirement already up-to-date: pyreclab in /usr/local/lib/python3.6/dist-packages (0.1.14)\n" 224 | ] 225 | } 226 | ], 227 | "source": [ 228 | "!pip install pyreclab --upgrade" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": { 234 | "colab_type": "text", 235 | "id": "M28DoBHtN5qD" 236 | }, 237 | "source": [ 238 | "## Paso 3:\n", 239 | "\n", 240 | "Hacemos los imports necesarios para este práctico." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 38, 246 | "metadata": { 247 | "colab": {}, 248 | "colab_type": "code", 249 | "id": "KQ0jQ_JcN3el" 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "import pyreclab\n", 254 | "import numpy as np\n", 255 | "import pandas as pd" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": { 261 | "colab_type": "text", 262 | "id": "bmxNQtXvfJy3" 263 | }, 264 | "source": [ 265 | "# **El dataset**\n", 266 | "\n", 267 | "💡 *En prácticos anteriores, vimos como analizar este dataset. Puedes revisarlos en caso de dudas.*" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": { 273 | "colab_type": "text", 274 | "id": "nAdXLT7haQe1" 275 | }, 276 | "source": [ 277 | "## Paso 4:\n", 278 | "\n", 279 | "Ya que queremos crear una lista de recomendación de items para un usuario en especifico, necesitamos obtener información adicional de cada película tal como título, fecha de lanzamiento, género, etc. Cargaremos el archivo de items descargado \"u.item\" para poder mapear cada identificador de ítem al conjunto de datos que lo describe." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 39, 285 | "metadata": { 286 | "colab": {}, 287 | "colab_type": "code", 288 | "id": "p0vHiGJheUkB" 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "# Definimos el orden de las columnas\n", 293 | "info_cols = [ 'movieid', 'title', 'release_date', 'video_release_date', 'IMDb_URL', \\\n", 294 | " 'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', \\\n", 295 | " 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', \\\n", 296 | " 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western' ]\n", 297 | "\n", 298 | "# Asignamos a una variable la estructura de datos de los items\n", 299 | "info_file = pd.read_csv('u.item', sep='|', index_col = 0, names = info_cols, header=None, encoding='latin-1')" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": { 305 | "colab_type": "text", 306 | "id": "z4pjNDlMOOdo" 307 | }, 308 | "source": [ 309 | "# **Slope One**" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "metadata": { 315 | "colab_type": "text", 316 | "id": "H5sx7xkfQE6W" 317 | }, 318 | "source": [ 319 | "## Paso 5:\n", 320 | "\n", 321 | "Seguiremos un camino muy similar a los ejercicios de User KNN e Item KNN. Crearemos una instancia del algoritmo de recomendación y luego pasaremos a la fase de entrenamiento." 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 40, 327 | "metadata": { 328 | "colab": {}, 329 | "colab_type": "code", 330 | "id": "v3D7CAoyPfko" 331 | }, 332 | "outputs": [], 333 | "source": [ 334 | "# Declaramos la instancia SlopeOne\n", 335 | "mySlopeOne = pyreclab.SlopeOne(dataset='u1.base', dlmchar=b'\\t', header=False, usercol=0, itemcol=1, ratingcol=2)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 41, 341 | "metadata": { 342 | "colab": {}, 343 | "colab_type": "code", 344 | "id": "EhacKomkTs51" 345 | }, 346 | "outputs": [], 347 | "source": [ 348 | "# Y enntrenamos\n", 349 | "mySlopeOne.train()" 350 | ] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": { 355 | "colab_type": "text", 356 | "id": "Wnb3DkVVTttZ" 357 | }, 358 | "source": [ 359 | "## Actividad 2 👓\n", 360 | "\n", 361 | "**Pregunta:** Explique qué hace el método `train()` en este caso, dado el modelo teórico. ¿Calcula información?, ¿no hace nada?, ¿ordena los datos? \n", 362 | "\n", 363 | "**Respuesta:**" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": { 369 | "colab_type": "text", 370 | "id": "XqR8bZB2VboJ" 371 | }, 372 | "source": [ 373 | "## Paso 6:\n", 374 | "\n", 375 | "Llego la hora de predecir el rating." 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 42, 381 | "metadata": { 382 | "colab": { 383 | "base_uri": "https://localhost:8080/", 384 | "height": 34 385 | }, 386 | "colab_type": "code", 387 | "id": "H9yigNx9VYZ3", 388 | "outputId": "3c58b8ee-c431-4ec2-ac32-4f1ddb47b92e" 389 | }, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "3.2408759593963623" 395 | ] 396 | }, 397 | "execution_count": 42, 398 | "metadata": { 399 | "tags": [] 400 | }, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "# Esta es la predicción de rating que el usuario ID:457 otorgaría al ítem ID:37\n", 406 | "# De esta forma podemos comparar el resultado con los prácticos anteriores\n", 407 | "mySlopeOne.predict(\"457\", \"37\")" 408 | ] 409 | }, 410 | { 411 | "cell_type": "code", 412 | "execution_count": 43, 413 | "metadata": { 414 | "colab": {}, 415 | "colab_type": "code", 416 | "id": "-0ShLi6gYhh_" 417 | }, 418 | "outputs": [], 419 | "source": [ 420 | "# También podemos guardar la predicción en una variable\n", 421 | "prediction = mySlopeOne.predict(\"457\", \"37\")" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 44, 427 | "metadata": { 428 | "colab": { 429 | "base_uri": "https://localhost:8080/", 430 | "height": 419 431 | }, 432 | "colab_type": "code", 433 | "id": "2yFtdiVcUcKW", 434 | "outputId": "257c9bbc-584c-46a0-814d-c7cb048b5596" 435 | }, 436 | "outputs": [ 437 | { 438 | "data": { 439 | "text/html": [ 440 | "
\n", 441 | "\n", 454 | "\n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | "
useriditemidratingtimestamp
3726945714882393244
3727045774882393278
3727145795882393485
37272457114882397020
37273457133882393883
...............
3742045710472882395964
3742145711194882398308
3742245711685882548761
3742345712104882549905
3742445712214882549438
\n", 544 | "

156 rows × 4 columns

\n", 545 | "
" 546 | ], 547 | "text/plain": [ 548 | " userid itemid rating timestamp\n", 549 | "37269 457 1 4 882393244\n", 550 | "37270 457 7 4 882393278\n", 551 | "37271 457 9 5 882393485\n", 552 | "37272 457 11 4 882397020\n", 553 | "37273 457 13 3 882393883\n", 554 | "... ... ... ... ...\n", 555 | "37420 457 1047 2 882395964\n", 556 | "37421 457 1119 4 882398308\n", 557 | "37422 457 1168 5 882548761\n", 558 | "37423 457 1210 4 882549905\n", 559 | "37424 457 1221 4 882549438\n", 560 | "\n", 561 | "[156 rows x 4 columns]" 562 | ] 563 | }, 564 | "execution_count": 44, 565 | "metadata": { 566 | "tags": [] 567 | }, 568 | "output_type": "execute_result" 569 | } 570 | ], 571 | "source": [ 572 | "# Podemos comprobar las peliculas rankeadas por el usuario ID:457\n", 573 | "# Que ciertamente ha participado activamente (¡156 items!)\n", 574 | "train_file = pd.read_csv('u1.base', sep='\\t', names = ['userid', 'itemid', 'rating', 'timestamp'], header=None)\n", 575 | "train_file[train_file['userid'] == 457]" 576 | ] 577 | }, 578 | { 579 | "cell_type": "code", 580 | "execution_count": 45, 581 | "metadata": { 582 | "colab": { 583 | "base_uri": "https://localhost:8080/", 584 | "height": 266 585 | }, 586 | "colab_type": "code", 587 | "id": "liVoMHlAYhH_", 588 | "outputId": "fa37e458-633d-4937-d232-4825ae8fc3cc" 589 | }, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/html": [ 594 | "
\n", 595 | "\n", 608 | "\n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | "
useriditemidratingtimestamp
130213371882397011
14851201372884114635
19670268373876514002
29489363372891498510
31084385374880013483
32996405371885548384
62777773373888540352
\n", 670 | "
" 671 | ], 672 | "text/plain": [ 673 | " userid itemid rating timestamp\n", 674 | "1302 13 37 1 882397011\n", 675 | "14851 201 37 2 884114635\n", 676 | "19670 268 37 3 876514002\n", 677 | "29489 363 37 2 891498510\n", 678 | "31084 385 37 4 880013483\n", 679 | "32996 405 37 1 885548384\n", 680 | "62777 773 37 3 888540352" 681 | ] 682 | }, 683 | "execution_count": 45, 684 | "metadata": { 685 | "tags": [] 686 | }, 687 | "output_type": "execute_result" 688 | } 689 | ], 690 | "source": [ 691 | "# Y también cuáles usuarios han rankeado la pelicula ID:37\n", 692 | "train_file[train_file['itemid'] == 37]" 693 | ] 694 | }, 695 | { 696 | "cell_type": "markdown", 697 | "metadata": { 698 | "colab_type": "text", 699 | "id": "u2P14hwFXytK" 700 | }, 701 | "source": [ 702 | "## Actividad 3 👓\n", 703 | "\n", 704 | "Haremos un pequeño experimento para entender mejor como funciona Slope One. Gracias al ejercicio anterior, sabemos que el usuario 457 ya ha asignado el mejor rating (5 ⭐) a las dos peliculas ID:9 e ID:1168. Comparemos.\n", 705 | "\n", 706 | "**Pregunta:** ¿Cómo se explican estos resultados? \n", 707 | "\n", 708 | "**Respuesta:**" 709 | ] 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": 46, 714 | "metadata": { 715 | "colab": { 716 | "base_uri": "https://localhost:8080/", 717 | "height": 51 718 | }, 719 | "colab_type": "code", 720 | "id": "gaqUjAQaVseB", 721 | "outputId": "e26f5b40-0e11-4cbe-8bd3-f35fc4e6f68e" 722 | }, 723 | "outputs": [ 724 | { 725 | "name": "stdout", 726 | "output_type": "stream", 727 | "text": [ 728 | "Prediction for ID:9 : 4.530702114105225\n", 729 | "Prediction for ID:1168 : 4.166153907775879\n" 730 | ] 731 | } 732 | ], 733 | "source": [ 734 | "prediction_id9 = mySlopeOne.predict(\"457\", \"9\")\n", 735 | "prediction_id1168 = mySlopeOne.predict(\"457\", \"1168\")\n", 736 | "\n", 737 | "print('Prediction for ID:9 :', prediction_id9)\n", 738 | "print('Prediction for ID:1168 :', prediction_id1168)" 739 | ] 740 | }, 741 | { 742 | "cell_type": "markdown", 743 | "metadata": { 744 | "colab_type": "text", 745 | "id": "N8dIE2n_YYnV" 746 | }, 747 | "source": [ 748 | "## Paso 7:\n", 749 | "\n", 750 | "Generaremos ahora una lista ordenada de las top-N recomendaciones, dado un usuario.\n", 751 | "\n" 752 | ] 753 | }, 754 | { 755 | "cell_type": "code", 756 | "execution_count": 47, 757 | "metadata": { 758 | "colab": { 759 | "base_uri": "https://localhost:8080/", 760 | "height": 34 761 | }, 762 | "colab_type": "code", 763 | "id": "CVcJ1I1nZaK1", 764 | "outputId": "4f3d974b-611c-4293-aceb-6714b75d9238" 765 | }, 766 | "outputs": [ 767 | { 768 | "name": "stdout", 769 | "output_type": "stream", 770 | "text": [ 771 | "Lista de items según ID: ['1592', '1589', '1656', '1431', '1653']\n" 772 | ] 773 | } 774 | ], 775 | "source": [ 776 | "# Mediante el método recommend() genereremos una lista top-5 recomendaciones para el usuario ID:457\n", 777 | "reclist_slopeone = mySlopeOne.recommend(\"457\", 5)\n", 778 | "\n", 779 | "# Y visualizaremos el resultado\n", 780 | "print('Lista de items según ID:', reclist_slopeone)" 781 | ] 782 | }, 783 | { 784 | "cell_type": "code", 785 | "execution_count": 48, 786 | "metadata": { 787 | "colab": { 788 | "base_uri": "https://localhost:8080/", 789 | "height": 153 790 | }, 791 | "colab_type": "code", 792 | "id": "BFdRZKQcZtHE", 793 | "outputId": "563f54db-1f10-4c67-87fc-8dcffc68e0e5" 794 | }, 795 | "outputs": [ 796 | { 797 | "name": "stdout", 798 | "output_type": "stream", 799 | "text": [ 800 | "Lista de items por nombre:\n" 801 | ] 802 | }, 803 | { 804 | "data": { 805 | "text/plain": [ 806 | "movieid\n", 807 | "1592 Magic Hour, The (1998)\n", 808 | "1589 Schizopolis (1996)\n", 809 | "1656 Little City (1998)\n", 810 | "1431 Legal Deceit (1997)\n", 811 | "1653 Entertaining Angels: The Dorothy Day Story (1996)\n", 812 | "Name: title, dtype: object" 813 | ] 814 | }, 815 | "execution_count": 48, 816 | "metadata": { 817 | "tags": [] 818 | }, 819 | "output_type": "execute_result" 820 | } 821 | ], 822 | "source": [ 823 | "# Lo convertimos a numpy array\n", 824 | "recmovies_slopeone = np.array(reclist_slopeone).astype(int)\n", 825 | "\n", 826 | "# Utilizamos la estructura de datos de los items para encontrar los títulos recomendados\n", 827 | "print('Lista de items por nombre:')\n", 828 | "info_file.loc[recmovies_slopeone]['title']" 829 | ] 830 | }, 831 | { 832 | "cell_type": "markdown", 833 | "metadata": { 834 | "colab_type": "text", 835 | "id": "NlX6FLTxdPRm" 836 | }, 837 | "source": [ 838 | "## Actividad 4 👩🏻‍💻\n", 839 | "\n", 840 | "Genera una nueva recomendacion, modificando los hiperparametros de usuario y topN a tu elección.\n", 841 | "\n", 842 | "**Pregunta:** ¿Ves una diferencia en la recomendación entre el nuevo usuario y el usuario ID:457?\n", 843 | "\n", 844 | "**Respuesta:**" 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": null, 850 | "metadata": { 851 | "colab": {}, 852 | "colab_type": "code", 853 | "id": "YQM7mfwDdY3j" 854 | }, 855 | "outputs": [], 856 | "source": [ 857 | "# Escribe el nuevo codigo aqui\n" 858 | ] 859 | }, 860 | { 861 | "cell_type": "markdown", 862 | "metadata": { 863 | "colab_type": "text", 864 | "id": "iU2NlezaiIur" 865 | }, 866 | "source": [ 867 | "## Actividad 5 👩🏻‍💻\n", 868 | "\n", 869 | "Dado el usuario ID:44, cree dos listas de películas recomendadas; la primera utilizando el algoritmo Most Popular y la segunda utilizando el algoritmo Slope One.\n", 870 | "\n", 871 | "**Pregunta:** Realice un analisis apreciativo de las similitudes y diferencias entre ambas recomendaciones.\n", 872 | "\n", 873 | "**Respuesta:**" 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "execution_count": null, 879 | "metadata": { 880 | "colab": {}, 881 | "colab_type": "code", 882 | "id": "Rxk9ySLQiMC4" 883 | }, 884 | "outputs": [], 885 | "source": [ 886 | "# Escribe el nuevo codigo aqui\n" 887 | ] 888 | } 889 | ], 890 | "metadata": { 891 | "colab": { 892 | "name": "pyRecLab_SlopeOne.ipynb", 893 | "provenance": [], 894 | "toc_visible": true 895 | }, 896 | "kernelspec": { 897 | "display_name": "Python 3", 898 | "language": "python", 899 | "name": "python3" 900 | }, 901 | "language_info": { 902 | "codemirror_mode": { 903 | "name": "ipython", 904 | "version": 3 905 | }, 906 | "file_extension": ".py", 907 | "mimetype": "text/x-python", 908 | "name": "python", 909 | "nbconvert_exporter": "python", 910 | "pygments_lexer": "ipython3", 911 | "version": "3.6.0" 912 | }, 913 | "toc": { 914 | "base_numbering": 1, 915 | "nav_menu": {}, 916 | "number_sections": true, 917 | "sideBar": true, 918 | "skip_h1_title": false, 919 | "title_cell": "Table of Contents", 920 | "title_sidebar": "Contents", 921 | "toc_cell": false, 922 | "toc_position": {}, 923 | "toc_section_display": true, 924 | "toc_window_display": false 925 | } 926 | }, 927 | "nbformat": 4, 928 | "nbformat_minor": 1 929 | } 930 | -------------------------------------------------------------------------------- /proyecto/Denis-IdeasProyectosFinales-2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/Denis-IdeasProyectosFinales-2020.pdf -------------------------------------------------------------------------------- /proyecto/Enunciado_Proyecto_Final_RecSys_2020_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/Enunciado_Proyecto_Final_RecSys_2020_2.pdf -------------------------------------------------------------------------------- /proyecto/Enunciado_Proyecto_Final_RecSys_2021_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/Enunciado_Proyecto_Final_RecSys_2021_2.pdf -------------------------------------------------------------------------------- /proyecto/IIC3633-propuestas-2018_p1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/IIC3633-propuestas-2018_p1.pdf -------------------------------------------------------------------------------- /proyecto/IIC3633-propuestas-2018_p2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/IIC3633-propuestas-2018_p2.pdf -------------------------------------------------------------------------------- /proyecto/Vladimir-RecSysLoL2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/Vladimir-RecSysLoL2020.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Araujo_etal_LoL_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2018/Araujo_etal_LoL_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Barrios_et_al_fakenews_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2018/Barrios_et_al_fakenews_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Cerda_etal_rnn_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2018/Cerda_etal_rnn_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Guzman_etal_steam_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2018/Guzman_etal_steam_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Munoz_etal_playlist_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2018/Munoz_etal_playlist_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2018/Rencoret_etal_selfattention_2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2018/Rencoret_etal_selfattention_2018.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2019/README.md: -------------------------------------------------------------------------------- 1 | ### Proyectos finales de 2019 para consultar de ejemplo: 2 | 3 | - Dominguez, Krebs, Lira 4 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-dominguez-krebs-lira) 5 | 6 | - Gazali, Fischer 7 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-gazali-fischer) 8 | 9 | - Navon, Andrade 10 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-recmen) 11 | 12 | - Catan, Villa 13 | [repo](https://github.com/PUC-RecSys-Class/proyecto-final-recsys-2019-2-catttan-villa) 14 | -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Alipanah et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Alipanah et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Alliende.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Alliende.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Andrade et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Andrade et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Biskupovic et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Biskupovic et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Cartagena et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Cartagena et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Castro et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Castro et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Codoceo et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Codoceo et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Contreras et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Contreras et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Diaz et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Diaz et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Donoso et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Donoso et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Duarte et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Duarte et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Everke et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Everke et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Friedl et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Friedl et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Fuentes et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Fuentes et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Guinez et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Guinez et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Hanuch et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Hanuch et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Olguin et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Olguin et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Ramos et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Ramos et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Salinas et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Salinas et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Tapia et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Tapia et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Valdes et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Valdes et al.pdf -------------------------------------------------------------------------------- /proyecto/proy_finales_2020/Valencia et al.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/proy_finales_2020/Valencia et al.pdf -------------------------------------------------------------------------------- /proyecto/recsys2016_zorich-troncoso.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/proyecto/recsys2016_zorich-troncoso.pdf -------------------------------------------------------------------------------- /tareas/Tarea_1_RecSys_2020_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/tareas/Tarea_1_RecSys_2020_2.pdf -------------------------------------------------------------------------------- /tareas/Tarea_1_RecSys_2021_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PUC-RecSys-Class/RecSysPUC-2021/39a9f6d7dba2e869118012dc5fec1140a2e0e007/tareas/Tarea_1_RecSys_2021_2.pdf --------------------------------------------------------------------------------