├── application ├── wsgi.py ├── static │ └── styles │ │ ├── favicon.ico │ │ ├── 404.css │ │ └── style.css ├── requirements.txt ├── templates │ ├── 404.html │ ├── index.html │ └── recomendations.html ├── Dockerfile ├── app.py ├── run_backend.py └── ml_utils.py ├── LICENSE ├── .gitignore └── README.md /application/wsgi.py: -------------------------------------------------------------------------------- 1 | from app import app as application -------------------------------------------------------------------------------- /application/static/styles/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arturlunardi/real-estate-recommender-system/HEAD/application/static/styles/favicon.ico -------------------------------------------------------------------------------- /application/requirements.txt: -------------------------------------------------------------------------------- 1 | gunicorn==20.0.4 2 | Flask==1.1.2 3 | pandas==1.1.5 4 | numpy==1.19.2 5 | requests==2.24.0 6 | nltk==3.5 7 | scikit-learn==0.23.2 8 | scipy==1.5.2 9 | apscheduler==3.7.0 -------------------------------------------------------------------------------- /application/templates/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
7 |
8 |
9 |

Error 404

10 |

The page you are looking for might have been removed, doesn't exist, had its name changed or is temporarily 11 | unavailable.

12 |

Please return to the homepage.

13 |

Good luck.

14 |
-------------------------------------------------------------------------------- /application/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.7-slim 2 | COPY . /app 3 | WORKDIR /app 4 | RUN apt-get update && \ 5 | apt-get install -y --no-install-recommends \ 6 | ca-certificates \ 7 | cmake \ 8 | build-essential \ 9 | gcc \ 10 | g++ 11 | RUN pip install -r requirements.txt 12 | RUN python -m nltk.downloader punkt 13 | RUN python -m nltk.downloader wordnet 14 | # RUN python app.py 15 | #CMD python ./app.py 16 | 17 | # Run the image as a non-root user 18 | #RUN adduser -D myuser 19 | #USER myuser 20 | 21 | # Run the app. CMD is required to run on Heroku 22 | # $PORT is set by Heroku 23 | CMD gunicorn --bind 0.0.0.0:$PORT wsgi 24 | #CMD gunicorn --bind 0.0.0.0:80 wsgi 25 | 26 | 27 | #https://github.com/microsoft/LightGBM/blob/master/docker/dockerfile-python 28 | #https://github.com/heroku/alpinehelloworld 29 | #https://devcenter.heroku.com/articles/container-registry-and-runtime 30 | 31 | #Creating app... done, ⬢ sheltered-reef-65520 32 | #https://sheltered-reef-65520.herokuapp.com/ | https://git.heroku.com/sheltered-reef-65520.git -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Artur Lunardi Di Fante 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /application/app.py: -------------------------------------------------------------------------------- 1 | import run_backend 2 | from flask import Flask, request, render_template, url_for, redirect 3 | import os 4 | from datetime import datetime 5 | 6 | os.chdir(os.path.abspath(os.path.dirname(__file__))) 7 | 8 | TEMPLATE_DIR = os.path.abspath('./templates') 9 | STATIC_DIR = os.path.abspath('./static') 10 | 11 | app = Flask(__name__, template_folder=TEMPLATE_DIR, static_folder=STATIC_DIR) 12 | 13 | 14 | @app.before_first_request 15 | def run_first_model(): 16 | run_backend.get_api() 17 | return None 18 | 19 | 20 | @app.route('/', methods=['GET', 'POST']) 21 | def main_page(): 22 | if request.method == 'GET': 23 | # Get the first x id's to show on the main page 24 | ids = run_backend.get_ids(20) 25 | return render_template('index.html', ids=ids) 26 | else: 27 | # redirect for /predict - button 28 | return redirect(url_for("predict_api", 29 | imovel_id=request.form.get('predict'))) 30 | 31 | 32 | @app.route('/predict', methods=['GET', 'POST']) 33 | def predict_api(): 34 | if request.method == 'GET': 35 | imovel_id = request.args.get("imovel_id", default=None) 36 | elif request.method == 'POST': 37 | # redirect for /predict - button 38 | return redirect(url_for("predict_api", 39 | imovel_id=request.form.get('predict'))) 40 | if imovel_id is None: 41 | return render_template('404.html') 42 | else: 43 | # run the predictions and return the y similar items. 44 | imovel_data, original_property, date = run_backend.get_predictions( 45 | id_=imovel_id, quantity_similar_items=5) 46 | if imovel_data is None: 47 | return render_template('404.html') 48 | last_update = abs(datetime.now()-date).seconds / 60 49 | return render_template('recomendations.html', 50 | df_dict=imovel_data, 51 | original_property=original_property, 52 | last_update=last_update) 53 | 54 | 55 | if __name__ == '__main__': 56 | app.run(debug=False, host='0.0.0.0') 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | .ipynb_checkpoints 6 | .ipynb_checkpoints/ 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | db.sqlite3-journal 65 | 66 | # Flask stuff: 67 | instance/ 68 | .webassets-cache 69 | 70 | # Scrapy stuff: 71 | .scrapy 72 | 73 | # Sphinx documentation 74 | docs/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # pipenv 90 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 91 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 92 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 93 | # install all needed dependencies. 94 | #Pipfile.lock 95 | 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 97 | __pypackages__/ 98 | 99 | # Celery stuff 100 | celerybeat-schedule 101 | celerybeat.pid 102 | 103 | # SageMath parsed files 104 | *.sage.py 105 | 106 | # Environments 107 | .env 108 | .venv 109 | env/ 110 | venv/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /application/static/styles/404.css: -------------------------------------------------------------------------------- 1 | /*====================== 2 | 404 page 3 | =======================*/ 4 | 5 | 6 | @import 'https://fonts.googleapis.com/css?family=Inconsolata'; 7 | 8 | html { 9 | min-height: 100%; 10 | } 11 | 12 | body { 13 | box-sizing: border-box; 14 | height: 100%; 15 | background-color: #000000; 16 | background-image: radial-gradient(#11581E, #041607), url("https://media.giphy.com/media/oEI9uBYSzLpBK/giphy.gif"); 17 | background-repeat: no-repeat; 18 | background-size: cover; 19 | font-family: 'Inconsolata', Helvetica, sans-serif; 20 | font-size: 1.5rem; 21 | color: rgba(128, 255, 128, 0.8); 22 | text-shadow: 23 | 0 0 1ex rgba(51, 255, 51, 1), 24 | 0 0 2px rgba(255, 255, 255, 0.8); 25 | } 26 | 27 | .noise { 28 | pointer-events: none; 29 | position: absolute; 30 | width: 100%; 31 | height: 100%; 32 | background-image: url("https://media.giphy.com/media/oEI9uBYSzLpBK/giphy.gif"); 33 | background-repeat: no-repeat; 34 | background-size: cover; 35 | z-index: -1; 36 | opacity: .02; 37 | } 38 | 39 | .overlay { 40 | pointer-events: none; 41 | position: absolute; 42 | width: 100%; 43 | height: 100%; 44 | background: 45 | repeating-linear-gradient( 46 | 180deg, 47 | rgba(0, 0, 0, 0) 0, 48 | rgba(0, 0, 0, 0.3) 50%, 49 | rgba(0, 0, 0, 0) 100%); 50 | background-size: auto 4px; 51 | z-index: 1; 52 | } 53 | 54 | .overlay::before { 55 | content: ""; 56 | pointer-events: none; 57 | position: absolute; 58 | display: block; 59 | top: 0; 60 | left: 0; 61 | right: 0; 62 | bottom: 0; 63 | width: 100%; 64 | height: 100%; 65 | background-image: linear-gradient( 66 | 0deg, 67 | transparent 0%, 68 | rgba(32, 128, 32, 0.2) 2%, 69 | rgba(32, 128, 32, 0.8) 3%, 70 | rgba(32, 128, 32, 0.2) 3%, 71 | transparent 100%); 72 | background-repeat: no-repeat; 73 | animation: scan 7.5s linear 0s infinite; 74 | } 75 | 76 | @keyframes scan { 77 | 0% { background-position: 0 -100vh; } 78 | 35%, 100% { background-position: 0 100vh; } 79 | } 80 | 81 | .terminal { 82 | box-sizing: inherit; 83 | position: absolute; 84 | height: 100%; 85 | width: 1000px; 86 | max-width: 100%; 87 | padding: 4rem; 88 | text-transform: uppercase; 89 | } 90 | 91 | .output { 92 | color: rgba(128, 255, 128, 0.8); 93 | text-shadow: 94 | 0 0 1px rgba(51, 255, 51, 0.4), 95 | 0 0 2px rgba(255, 255, 255, 0.8); 96 | } 97 | 98 | .output::before { 99 | content: "> "; 100 | } 101 | 102 | a { 103 | color: #fff; 104 | text-decoration: none; 105 | } 106 | 107 | a::before { 108 | content: "["; 109 | } 110 | 111 | a::after { 112 | content: "]"; 113 | } 114 | 115 | .errorcode { 116 | color: white; 117 | } 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Real Estate Recommender System 2 | Access the application [here](http://real-estate-recommender-system.herokuapp.com/). 3 | 4 | ## About: 5 | This project was built to generate recommendations of real estate rent properties based on their characteristics or atributes. It was an end-end project, that means that passes through all the steps since collect to deploy the project. 6 | 7 | ## Steps: 8 | - Definition of the problem 9 | - Collect 10 | - Data Wrangling 11 | - Modelling 12 | - Deploy 13 | 14 | ## Model: 15 | Recommender systems aim to sugest relevant items to the user. To build this model, the Content-based recommender system was chosen.\ 16 | Content Based Filtering are a type of recommender system that uses item features to recommend other items similar to what the user likes, based on their previous actions or explicit feedback.\ 17 | One of the advantages of these system it is that it is possible to generate recommendations without have a historic of the behavior of another users. 18 | 19 | ## Tools: 20 | - Python for programming language 21 | - HTML and CSS for front-end 22 | - Docker for containerization 23 | - Heroku for cloud hosting 24 | 25 | ## Project structure: 26 | 1. **Definition of the problem**\ 27 | The main purpose of a recommender system it is to suggest relevant items to the user. It saves time, increase the user experience and increase the profit from a company. Imagine that you are on a website that have thousands of products, you wouldn't have the time to go through every product to find the one you are searching for. To improve your experience and help you to find your product, the recommender system it's gonna suggest products that matches your searches. 28 | 29 | - Therefore, to solve this problem it was defined a simple structure: 30 | 31 | - Problem to be solved: a system that recommends me the most similar properties based on what I've searched. 32 | - How can I solve this problem: create a content based recommender system that will return me the n similar properties based on the features of the item the user select. 33 | 34 | 2. **Data Collect** \ 35 | The data collect was made through an API that was provided by [Órion](https://orionsm.com.br/), a real estate company. 36 | 37 | 3. **Data Wrangling**\ 38 | In this step we had to clean and transform the data. It was necessary to select only the columns that we want to feed our model from. The data had some missing values, so it was necessary to fill this values. Some of the data were categorical, so we had to transform these in numbers so our model could read those. Since we had some numerical values that have some very different scales, it was performed a standardization in order to normalize the data. Some of the columns were textual data, so we chose to work with TfidfVectorize from Scikit-Learn to perform the transformation. 39 | 40 | 4. **Modelling**\ 41 | There are a lot of ways to develop a content based filter. We chose to work with the cosine similarity, it is a metric that measure the distance between two vectors of n dimensional spaces. It is measured by the cosine of the angle between these vectors. The model itself it is simple. It is important to say that we are not predicting anything, we are measuring the similarity between vectors, that represent our items. 42 | 43 | 5. **Deploy**\ 44 | The system was released through this [application](http://real-estate-recommender-system.herokuapp.com/), made in Flask. The application collect the data through the API every n minutes and perform all the steps above to deliver the top n recommendations for the item that the user selected. 45 | 46 | ## Additional information: 47 | Author - Artur Lunardi Di Fante | Contacts: [LinkedIn](https://www.linkedin.com/in/artur-lunardi-di-fante-393611194/) 48 | -------------------------------------------------------------------------------- /application/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Real Estate Recommender 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 28 |
29 |
30 |

Real Estate Recommender

31 |
    32 |
  • Category: Recommender System
  • 33 |
  • Date of the project: 20, February, 2021.
  • 34 |
35 |
36 |
37 |

Content Based Recommender System

38 |
    39 |
  • These recommender system was built to generate recommendations of properties based on their 40 | characteristics or atributes.
  • 41 |
  • Content Based Filtering are a type of recommender system that uses item features to recommend other items 42 | similar to what the user likes, based on their previous actions or explicit feedback.
  • 43 |
  • One of the advantages of these system it is that it is possible to generate recommendations without have a 44 | historic of the behavior of another users.
  • 45 |
46 |
47 |
48 |

First {{ids|length}} Id's available

49 |
    50 | {% for id in ids %} 51 |
  • {{ id }} |
  • 52 | {% endfor %} 53 |
54 |
55 |
56 |
57 |
58 | 61 | 62 |
63 |
64 |
65 |
66 | 83 | 84 |
85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /application/run_backend.py: -------------------------------------------------------------------------------- 1 | import ml_utils 2 | import requests 3 | import pandas as pd 4 | import json 5 | from apscheduler.schedulers.background import BackgroundScheduler 6 | from apscheduler.triggers.interval import IntervalTrigger 7 | import atexit 8 | from datetime import datetime 9 | 10 | 11 | def get_api(): 12 | """Access the api and return the original dataframe.""" 13 | global df_original 14 | global date 15 | headers = { 16 | 'accept': 'application/json' 17 | } 18 | url = 'API-KEY-HERE' 19 | try: 20 | response = requests.get(url, headers=headers) 21 | response.raise_for_status() 22 | except requests.exceptions.RequestException as err: 23 | raise SystemExit(err) 24 | except requests.exceptions.HTTPError as errh: 25 | raise SystemExit(errh) 26 | except requests.exceptions.Timeout as errt: 27 | raise SystemExit(errt) 28 | except requests.exceptions.ConnectionError as errc: 29 | raise SystemExit(errc) 30 | 31 | dataframes = [] 32 | for i in range(1, json.loads(response.content)['paginas'] + 1): 33 | url = f'API-KEY-HERE' 34 | response = requests.get(url, headers=headers) 35 | dataframes.append(json.loads(response.content)) 36 | 37 | datasets = [] 38 | for item in dataframes: 39 | df = pd.DataFrame(item).T.iloc[:-4] 40 | datasets.append(df) 41 | 42 | df_original = pd.concat(item for item in datasets) 43 | df_original = df_original.loc[(df_original['Status'] == 'Aluguel') | ( 44 | df_original['Status'] == 'Venda e Aluguel')] 45 | 46 | df_original['Mobiliado'] = df_original['Caracteristicas'].apply( 47 | lambda x: 2 if x['Mobiliado'] == 'Sim' else 1 if x['Semi Mobiliado'] == 'Sim' else 0) 48 | keys = df_original['Caracteristicas'].iloc[0].keys() 49 | df_original['Caracteristicas'] = df_original['Caracteristicas'].apply( 50 | lambda x: ", ".join([key for key in keys if x[key] == 'Sim'])) 51 | 52 | df_original.loc[df_original['Categoria'] == 53 | 'Salas/Conjuntos', 'Categoria'] = 'Sala' 54 | df_original.loc[df_original['Categoria'] == 55 | 'Prédio Comercial', 'Categoria'] = 'Prédio' 56 | df_original.loc[df_original['Categoria'] == 57 | 'Ponto Comercial', 'Categoria'] = 'Ponto' 58 | df_original.loc[df_original['Categoria'] == 59 | 'Casa Em Condomínio', 'Categoria'] = 'Casa' 60 | 61 | residenciais = ['Apartamento', 'Kitnet', 'Casa', 'Cobertura'] 62 | comerciais = ['Empreendimento', 'Sala', 'Galpão', 'Loja', 63 | 'Prédio', 'Ponto', 'Terreno', 'Box', 'Casa Comercial'] 64 | 65 | df_original.loc[df_original['Categoria'].isin( 66 | residenciais), 'Finalidade'] = 'Residencial' 67 | df_original.loc[df_original['Categoria'].isin( 68 | comerciais), 'Finalidade'] = 'Comercial' 69 | 70 | date = datetime.now() 71 | return df_original, date 72 | 73 | 74 | # create schedule in the background to atualize df and date 75 | scheduler = BackgroundScheduler() 76 | scheduler.start() 77 | scheduler.add_job( 78 | func=get_api, 79 | trigger=IntervalTrigger(minutes=15), 80 | id='df, date from get_api', 81 | name='Return dataframe, date every 15 minutes', 82 | replace_existing=True) 83 | # Shut down the scheduler when exiting the app 84 | atexit.register(lambda: scheduler.shutdown()) 85 | 86 | 87 | def get_predictions(id_, quantity_similar_items): 88 | """Return the df with the predictions with the API Key.""" 89 | predictions, original_property = ml_utils.recommend( 90 | id_, df_original, quantity_similar_items) 91 | 92 | return (predictions, original_property, date) 93 | 94 | 95 | def get_ids(quantidade): 96 | return df_original['Codigo'].values[0:quantidade] 97 | -------------------------------------------------------------------------------- /application/templates/recomendations.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Real Estate Recommender 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | Home 28 | 29 | 30 |
31 |
32 |

Real Estate Recommender

33 |
The last update from the API was in {{last_update | round(3)}} minutes.
34 |

Original Property

35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
IdTypologyBedroomsBathroomsParking SpacesRent ValueNeighborhood
{{ original_property['Codigo'] }}{{ original_property['Categoria'] }}{{ original_property['Dormitorios'] | int }}{{ original_property['BanheiroSocialQtd'] | int }}{{ original_property['Vagas'] | int }}{{ "R$ " + original_property['ValorLocacao'] | string }}{{ original_property['Bairro'] }}

55 |

Recommendations

56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | {% for key, value in df_dict.iterrows() %} 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | {% endfor %} 79 |
IdTypologyBedroomsBathroomsParking SpacesRent ValueNeighborhoodScore
{{ value['Codigo'] }}{{ value['Categoria'] }}{{ value['Dormitorios'] | int }}{{ value['BanheiroSocialQtd'] | int }}{{ value['Vagas'] | int }}{{ "R$ " + value['ValorLocacao'] | string }}{{ value['Bairro'] }}{{ value['Score'] }}
80 |
81 |
82 |
83 |
84 | 87 | 89 |
90 |
91 |
92 |
93 | 110 | 111 |
112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /application/ml_utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import nltk 3 | from sklearn.feature_extraction.text import TfidfVectorizer 4 | from scipy.sparse import hstack 5 | from sklearn.metrics.pairwise import cosine_similarity 6 | from sklearn.preprocessing import LabelEncoder, StandardScaler 7 | import numpy as np 8 | 9 | 10 | def number_columns(data): 11 | df_imoveis = data.copy() 12 | numeric_columns = ["Codigo", "Dormitorios", "Suites", 13 | "BanheiroSocialQtd", "Vagas", "AreaPrivativa", "ValorLocacao"] 14 | for column in numeric_columns: 15 | df_imoveis[column] = df_imoveis[column].replace( 16 | '', np.nan).astype(float) 17 | 18 | return df_imoveis 19 | 20 | 21 | def clean_data(data): 22 | """Return the a copy of the original dataframe with the 23 | columns that are in the model and treated for missing values.""" 24 | df_imoveis = number_columns(data) 25 | 26 | columns = ['Finalidade', 'Categoria', 'Dormitorios', 'Suites', 27 | 'BanheiroSocialQtd', 'Vagas', 'AreaPrivativa', 'ValorLocacao', 28 | 'Bairro', 'Caracteristicas', 'DescricaoWeb'] 29 | df_imoveis = df_imoveis[columns] 30 | 31 | df_imoveis['Dormitorios'] = df_imoveis.groupby( 32 | 'Categoria')['Dormitorios'].apply(lambda x: x.fillna(x.median())) 33 | df_imoveis['Dormitorios'].fillna(0, inplace=True) 34 | df_imoveis['Suites'] = df_imoveis.groupby( 35 | 'Categoria')['Suites'].apply(lambda x: x.fillna(x.median())) 36 | df_imoveis['Suites'].fillna(0, inplace=True) 37 | df_imoveis['BanheiroSocialQtd'].fillna(0, inplace=True) 38 | df_imoveis['Vagas'].fillna(0, inplace=True) 39 | df_imoveis['AreaPrivativa'] = df_imoveis.groupby( 40 | 'Categoria')['AreaPrivativa'].apply(lambda x: x.fillna(x.mean())) 41 | df_imoveis['AreaPrivativa'].fillna(0, inplace=True) 42 | df_imoveis['Caracteristicas'].fillna('', inplace=True) 43 | df_imoveis['DescricaoWeb'].fillna('', inplace=True) 44 | 45 | return df_imoveis 46 | 47 | 48 | def transform_data(data): 49 | """Return the dataframe with categorical columns 50 | encoded with label encoder and numerical columns standardized.""" 51 | df_imoveis = clean_data(data) 52 | 53 | le = LabelEncoder() 54 | categorical_columns = ['Finalidade', 'Categoria', 'Bairro'] 55 | for column in categorical_columns: 56 | df_imoveis[column] = le.fit_transform(df_imoveis[column]) 57 | sc = StandardScaler() 58 | numerical_columns = ['AreaPrivativa', 'Bairro'] 59 | df_imoveis[numerical_columns] = sc.fit_transform( 60 | df_imoveis[numerical_columns]) 61 | 62 | return df_imoveis 63 | 64 | 65 | def stack_data(data): 66 | """Return the matrix that contain the similarity score between items. 67 | It have been used the tfidf vectorizer in order to work with text data. 68 | The metric that have been chosen it is cosine similarity.""" 69 | df_imoveis = transform_data(data) 70 | 71 | title_metadescription = df_imoveis['DescricaoWeb'] 72 | title_comodidades = df_imoveis['Caracteristicas'] 73 | df_imoveis.drop(['DescricaoWeb', 'Caracteristicas'], 74 | axis=1, inplace=True) 75 | nltk.download('stopwords') 76 | stopwords = nltk.corpus.stopwords.words('portuguese') 77 | title_vec = TfidfVectorizer( 78 | min_df=2, ngram_range=(1, 3), stop_words=stopwords) 79 | title_vec2 = TfidfVectorizer( 80 | min_df=2, ngram_range=(1, 1), stop_words=stopwords) 81 | title_bow_metadescription = title_vec.fit_transform(title_metadescription) 82 | title_bow_comodidades = title_vec2.fit_transform(title_comodidades) 83 | Xtrain_wtitle = hstack( 84 | [df_imoveis, title_bow_metadescription, title_bow_comodidades]) 85 | nearest_neighbor = cosine_similarity(Xtrain_wtitle, Xtrain_wtitle) 86 | 87 | return nearest_neighbor 88 | 89 | 90 | def recommend(id_, conteudo, quantity_similar_items): 91 | """Return the original df with the most similar 92 | items in descending order.""" 93 | nearest_neighbor = stack_data(conteudo) 94 | 95 | columns = ['Codigo', 'Finalidade', 'Categoria', 'Mobiliado', 'Dormitorios', 96 | 'Suites', 'BanheiroSocialQtd', 'Vagas', 'AreaPrivativa', 97 | 'ValorLocacao', 'Bairro', 'Caracteristicas', 98 | 'Score'] 99 | 100 | similar_listing_ids = [] 101 | df_original = conteudo 102 | df_original.reset_index(drop=True, inplace=True) 103 | try: 104 | idx = df_original.loc[df_original['Codigo'] == id_].index[0] 105 | except: 106 | return None, None 107 | # creating a Series with the similarity scores in descending order 108 | score_series = pd.Series( 109 | nearest_neighbor[idx]).sort_values(ascending=False) 110 | df_original['Score'] = score_series 111 | # getting the indexes of the most similar listings 112 | top_indexes = list(score_series.index) 113 | # populate the list with the ids of the top matching listings 114 | # checking if the goal of the rent it's the same 115 | # excluding if the property it is itself 116 | for i in top_indexes: 117 | if df_original['Finalidade'][idx] == df_original['Finalidade'][i] \ 118 | and df_original['Codigo'][idx] != df_original['Codigo'][i]: 119 | similar_listing_ids.append(i) 120 | 121 | # return the top similar properties and the original property 122 | return \ 123 | df_original.iloc[similar_listing_ids][0:quantity_similar_items], \ 124 | df_original.iloc[idx] 125 | -------------------------------------------------------------------------------- /application/static/styles/style.css: -------------------------------------------------------------------------------- 1 | /* -------------------------------------------------------------- 2 | # General 3 | --------------------------------------------------------------*/ 4 | 5 | body { 6 | font-family: "Open Sans", sans-serif; 7 | color: #272829; 8 | margin-left: auto; 9 | margin-right: auto; 10 | } 11 | 12 | h1, h2, h3, h4, h5, h6 { 13 | font-family: "Raleway", sans-serif; 14 | } 15 | 16 | .ids ul li { 17 | display: inline-block; 18 | padding-right: 4px; 19 | } 20 | 21 | /*-------------------------------------------------------------- 22 | # project Details 23 | --------------------------------------------------------------*/ 24 | 25 | .project-details { 26 | padding-top: 40px; 27 | } 28 | 29 | .project-details .project-info { 30 | padding: 30px; 31 | box-shadow: 0px 0 30px rgba(69, 80, 91, 0.08); 32 | } 33 | 34 | .project-details .project-info h2 { 35 | font-size: 22px; 36 | font-weight: 700; 37 | margin-bottom: 20px; 38 | padding-bottom: 20px; 39 | border-bottom: 1px solid grey; 40 | } 41 | 42 | .project-details .project-info ul { 43 | list-style: none; 44 | padding: 0; 45 | font-size: 15px; 46 | } 47 | 48 | .project-details .project-info ul li+li { 49 | margin-top: 10px; 50 | } 51 | 52 | .project-details .project-description { 53 | padding: 30px; 54 | box-shadow: 0px 0 30px rgba(69, 80, 91, 0.08); 55 | } 56 | 57 | .project-details .project-description h2 { 58 | font-size: 22px; 59 | font-weight: 700; 60 | margin-bottom: 20px; 61 | padding-bottom: 20px; 62 | border-bottom: 1px solid grey; 63 | } 64 | 65 | .project-details .project-description ul { 66 | list-style: none; 67 | padding: 0; 68 | font-size: 15px; 69 | } 70 | 71 | .project-details .project-description ul li+li { 72 | margin-top: 10px; 73 | } 74 | 75 | .project-details .ids { 76 | padding: 30px; 77 | box-shadow: 0px 0 30px rgba(69, 80, 91, 0.08); 78 | } 79 | 80 | .project-details .ids h2 { 81 | font-size: 22px; 82 | font-weight: 700; 83 | margin-bottom: 20px; 84 | padding-bottom: 20px; 85 | border-bottom: 1px solid grey; 86 | } 87 | 88 | .project-details .ids ul { 89 | list-style: none; 90 | padding: 0; 91 | font-size: 15px; 92 | } 93 | 94 | .project-details .ids ul li+li { 95 | margin-top: 10px; 96 | } 97 | 98 | /*-------------------------------------------------------------- 99 | # Button Details 100 | --------------------------------------------------------------*/ 101 | 102 | .button.is-rounded { 103 | border-radius: 9999px; 104 | } 105 | 106 | .button { 107 | background-color: #36b67e; 108 | background-color: rgba(54, 182, 126, var(--bg-opacity)); 109 | --text-opacity: 1; 110 | color: #fff; 111 | color: rgba(255, 255, 255, var(--text-opacity)); 112 | border-radius: .25rem; 113 | cursor: pointer; 114 | box-shadow: 0 1px 3px 0 rgb(0 0 0 / 10%), 0 1px 2px 0 rgb(0 0 0 / 6%); 115 | padding: .75rem 1rem; 116 | line-height: 1; 117 | transition: all .3s ease; 118 | } 119 | 120 | .broker-card-avatar, .button { 121 | display: inline-block; 122 | --bg-opacity: 1; 123 | } 124 | 125 | [type=button], [type=reset], [type=submit], button { 126 | -webkit-appearance: button; 127 | } 128 | 129 | button, input { 130 | overflow: visible; 131 | } 132 | 133 | button, input, optgroup, select, textarea { 134 | font-family: inherit; 135 | font-size: 100%; 136 | line-height: 1.15; 137 | margin: 0; 138 | padding: 0; 139 | line-height: inherit; 140 | color: inherit; 141 | } 142 | 143 | input { 144 | border-radius: 9999px; 145 | -webkit-writing-mode-: horizontal-tb !important; 146 | text-rendering: auto; 147 | color: -internal-light-dark(black, white); 148 | letter-spacing: normal; 149 | word-spacing: normal; 150 | text-transform: none; 151 | text-indent: 0px; 152 | text-shadow: none; 153 | display: inline-block; 154 | text-align: start; 155 | appearance: textfield; 156 | -webkit-rtl-ordering: logical; 157 | cursor: text; 158 | margin: 0em; 159 | font: 400 13.3333px Arial; 160 | padding: 3px 2px; 161 | border-width: 2px; 162 | border-style: auto; 163 | } 164 | 165 | .input { 166 | display: block; 167 | -webkit-appearance: none; 168 | -moz-appearance: none; 169 | appearance: none; 170 | width: 100%; 171 | padding: .75rem; 172 | --text-opacity: 1; 173 | color: #4a5568; 174 | color: rgba(74, 85, 104, var(--text-opacity)); 175 | line-height: 1.25; 176 | border-width: 1px; 177 | --border-opacity: 1; 178 | border-color: #e2e8f0; 179 | border-color: rgba(226, 232, 240, var(--border-opacity)); 180 | border-radius: .25rem; 181 | --bg-opacity: 1; 182 | background-color: #fff; 183 | background-color: rgba(255, 255, 255, var(--bg-opacity)); 184 | padding: 30px; 185 | box-shadow: 0px 0 30px rgba(69, 80, 91, 0.08); 186 | } 187 | 188 | *, :after, :before { 189 | box-sizing: border-box; 190 | border: 0 solid #e2e8f0; 191 | } 192 | 193 | /*-------------------------------------------------------------- 194 | # Recomendation Table Details 195 | --------------------------------------------------------------*/ 196 | 197 | table { 198 | background: #f5f5f5; 199 | border-collapse: separate; 200 | box-shadow: inset 0 1px 0 #fff; 201 | line-height: 24px; 202 | text-align: left; 203 | width: 1000px; 204 | } 205 | 206 | .recomendation-details { 207 | padding-top: 10px; 208 | padding-left: 5px; 209 | padding-right: 5px; 210 | } 211 | 212 | .recomendation-details .recomendation-info { 213 | padding: 30px; 214 | box-shadow: 0px 0 30px rgba(69, 80, 91, 0.08); 215 | } 216 | 217 | .recomendation-details .recomendation-info h2 { 218 | font-size: 22px; 219 | font-weight: 700; 220 | margin-bottom: 20px; 221 | padding-bottom: 20px; 222 | border-bottom: 1px solid grey; 223 | } 224 | 225 | .recomendation-details .recomendation-info h6 { 226 | font-size: 10px; 227 | font-weight: 700; 228 | margin-bottom: 20px; 229 | padding-bottom: 20px; 230 | border-bottom: 1px solid grey; 231 | } 232 | 233 | /*-------------------------------------------------------------- 234 | # Home Button 235 | --------------------------------------------------------------*/ 236 | 237 | .btn { 238 | background-color: rgba(54, 182, 126, var(--bg-opacity)); 239 | --text-opacity: 1; 240 | color: rgba(255, 255, 255, var(--text-opacity)); 241 | border-radius: 9999px; 242 | cursor: pointer; 243 | box-shadow: 0 1px 3px 0 rgb(0 0 0 / 10%), 0 1px 2px 0 rgb(0 0 0 / 6%); 244 | padding: .55rem 1rem; 245 | line-height: .8; 246 | transition: all .3s ease; 247 | } 248 | 249 | .btn { 250 | display: inline-block; 251 | --bg-opacity: 1; 252 | } 253 | 254 | .btn { 255 | overflow: visible; 256 | } 257 | 258 | a:link { 259 | color: white; 260 | text-decoration: none; 261 | } 262 | 263 | a:hover, a:active { 264 | background-color: #36b67e; 265 | } 266 | 267 | .btn { 268 | padding: .55rem 1rem; 269 | --text-opacity: 1; 270 | color: #4a5568; 271 | color: rgba(74, 85, 104, var(--text-opacity)); 272 | line-height: 1.25; 273 | border-width: 1px; 274 | --border-opacity: 1; 275 | border-color: rgba(226, 232, 240, var(--border-opacity)); 276 | border-radius: .25rem; 277 | --bg-opacity: 1; 278 | background-color: rgba(255, 255, 255, var(--bg-opacity)); 279 | box-shadow: 0px 0 30px rgba(69, 80, 91, 0.08); 280 | } 281 | 282 | /*-------------------------------------------------------------- 283 | # Footer 284 | --------------------------------------------------------------*/ 285 | 286 | #section-footer { 287 | padding: 10px 0px 10px 0px; 288 | background-color: #eee; 289 | text-align: center; 290 | } 291 | 292 | .align-items-center { 293 | -ms-flex-align: center!important; 294 | align-items: center!important; 295 | } 296 | 297 | .fa { 298 | padding: 8px; 299 | font-size: 16px; 300 | color: rgb(59, 59, 59); 301 | text-align: center; 302 | border-radius: 10%; 303 | margin: 0 5px; 304 | text-decoration: none !important; 305 | } 306 | 307 | .fa { 308 | display: inline-block; 309 | font: normal normal normal 14px/1 FontAwesome; 310 | font-size: inherit; 311 | text-rendering: auto; 312 | -webkit-font-smoothing: antialiased; 313 | -moz-osx-font-smoothing: grayscale; 314 | } 315 | 316 | .fa:hover { 317 | opacity: 0.7; 318 | } 319 | 320 | a { 321 | color: #007bff; 322 | text-decoration: none; 323 | background-color: transparent; 324 | } 325 | 326 | a:-webkit-any-link { 327 | color: -webkit-link; 328 | cursor: pointer; 329 | } --------------------------------------------------------------------------------