├── .gitignore ├── __pycache__ ├── cluster.cpython-37.pyc ├── cluster.cpython-38.pyc ├── content.cpython-37.pyc ├── content.cpython-38.pyc ├── location.cpython-37.pyc ├── location.cpython-38.pyc └── super_score.cpython-38.pyc ├── super_score.py ├── location.py ├── LICENSE ├── cluster.py ├── README.md ├── content.py ├── collaborate.py ├── app.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | -------------------------------------------------------------------------------- /__pycache__/cluster.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/cluster.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/cluster.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/cluster.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/content.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/content.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/content.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/content.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/location.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/location.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/location.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/location.cpython-38.pyc -------------------------------------------------------------------------------- /__pycache__/super_score.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/super_score.cpython-38.pyc -------------------------------------------------------------------------------- /super_score.py: -------------------------------------------------------------------------------- 1 | def score(data): 2 | # Computing Super-Score Rating for Reviews 3 | data['super_score'] = data['polarity'] * data['compound'] 4 | data['super_score'] = data['super_score'] + data['stars'] 5 | 6 | return data -------------------------------------------------------------------------------- /location.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | # URL = "https://geocode.search.hereapi.com/v1/geocode" 4 | # #location = input("Enter the location here: ") #taking user input 5 | # api_key = 'ODfYgIX45wrL41qboC3F_z2hg8e5_ABJYi71Pu6o948' # Acquire from developer.here.com 6 | # PARAMS = {'apikey':api_key,'q':location} 7 | 8 | def get_location(url, params): 9 | # sending get request and saving the response as response object 10 | r = requests.get(url, params) 11 | data = r.json() 12 | 13 | latitude = data['items'][0]['position']['lat'] 14 | longitude = data['items'][0]['position']['lng'] 15 | 16 | loc = [latitude, longitude] 17 | 18 | return loc 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Cathy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /cluster.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import sklearn 4 | from sklearn.cluster import KMeans 5 | 6 | 7 | # business_URL= "/home/cate/Cate/recommender_system/business_final.csv" 8 | # def load_data(url): 9 | # data = pd.read_csv(url) 10 | # return data 11 | 12 | 13 | # Creating Location-Based Recommendation Function 14 | def location_based_recommendation(data, latitude, longitude): 15 | 16 | # Putting the Coordinates of Restaurants together into a dataframe 17 | coordinates = data[['longitude','latitude']] 18 | 19 | kmeans = KMeans(n_clusters = 10, init = 'k-means++') 20 | kmeans.fit(coordinates) 21 | y = kmeans.labels_ 22 | 23 | data['cluster'] = kmeans.predict(data[['longitude','latitude']]) 24 | top_restaurants_toronto = data.sort_values(by=['stars', 'review_count'], ascending=False) 25 | 26 | 27 | """Predict the cluster for longitude and latitude provided""" 28 | cluster = kmeans.predict(np.array([longitude,latitude]).reshape(1,-1))[0] 29 | 30 | 31 | """Get the best restaurant in this cluster along with the relevant information for a user to make a decision""" 32 | return top_restaurants_toronto[top_restaurants_toronto['cluster']==cluster].iloc[0:10][['name', 'latitude','longitude','categories','stars', 'review_count','cluster']] 33 | 34 | 35 | #location_based_recommendation(top_restaurants_toronto, 43.6677, -79.3948) 36 | 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # restaurant recommendation system 2 | 3 | This project is my Capstone project for the DSI 2020 training. The aim of the project is to create a restaurant recommender system that will be able to give suggestions of restaurants to users based on the restaurants reviews and previous restaurants the user has been to. 4 | 5 | There are three product features/ models for the system: 6 | - Location based recommender system 7 | - Content based recommender system 8 | - Collaborative filtering recommender system 9 | 10 | 11 | 12 | ## Summary 13 | 14 | - [Getting Started](#getting-started) 15 | - [Deployment](#deployment) 16 | - [Challenges](#Challenges) 17 | - [Authors](#authors) 18 | - [License](#license) 19 | - [Acknowledgments](#acknowledgments) 20 | 21 | 22 | ### Getting Started 23 | To get this project up and running in your machine, follow the steps below: 24 | 25 | - Clone this repository to your local machine by opening your terminal and typing: 26 | ``` 27 | git clone https://github.com/CateGitau/restaurant-recommendation-system 28 | ``` 29 | 30 | - install the required packages: 31 | ``` 32 | pip3 install -r requirements.txt 33 | ``` 34 | 35 | - Run the app.py file to get the project running in your local machine using Streamlit 36 | ``` 37 | streamlit run app.py 38 | ``` 39 | 40 | ### Deployment 41 | We used [streamlit sharing](https://www.streamlit.io/sharing) to deploy the application. All you have to do is send a request to get an invite so that you start sharing the app then follow the instructions given. 42 | 43 | ### Challenges 44 | The operations that are being done when the `app.py` is run takes up a lot of RAM therefore could not accommodate both content and collaborative models, if you find a fix for this please feel free to send in a PR. 45 | 46 | ### Authors 47 | - [Catherine Gitau](https://github.com/CateGitau) 48 | 49 | ### License 50 | [MIT](https://mit-license.org/) 51 | 52 | ### acknowledgements 53 | We'd like to thank [Evandar Nyoni](https://github.com/Evandernyoni) who was my Tutor for the duration of this project. -------------------------------------------------------------------------------- /content.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import sklearn 4 | import streamlit as st 5 | 6 | from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer 7 | from sklearn.metrics.pairwise import cosine_similarity, linear_kernel 8 | 9 | #path to data 10 | toronto_URL= "/home/cate/Cate/restaurant-recommendation-system/data/new_toronto_data.csv" 11 | 12 | #function to load in the data 13 | @st.cache(persist=True, allow_output_mutation=True) 14 | def load_data(url): 15 | data = pd.read_csv(url) 16 | return data 17 | 18 | toronto_data = load_data(toronto_URL) 19 | 20 | # Combining the text in Keywords and categories columns 21 | #toronto_data['All_Keywords'] = toronto_data['categories'].str.cat(toronto_data['Keywords'],sep=", ") 22 | 23 | # Formating the All_Keywords Column 24 | toronto_data['All_Keywords'] = toronto_data['All_Keywords'].map(lambda x: str(x)) 25 | toronto_data['All_Keywords'] = toronto_data['All_Keywords'].map(lambda x: x.lower()) 26 | 27 | # Adding and Grouping Rows together by Restaurant Name 28 | toronto_final = toronto_data.groupby('name')['All_Keywords'].sum() 29 | toronto_final = toronto_final.to_frame(name = 'sum').reset_index() 30 | 31 | # Getting a list of Unique Keywords per Restaurant 32 | 33 | toronto_final['sum'] = toronto_final['sum'].map(lambda x: x.replace(", ","', '")) 34 | toronto_final['sum'] = toronto_final['sum'].map(lambda x: str("'") + x + str("'")) 35 | f = lambda x: x["sum"].split(", ") 36 | toronto_final['sum'] = toronto_final.apply(f, axis=1) 37 | toronto_final['sum'] = toronto_final['sum'].map(lambda x: set(x)) 38 | toronto_final.set_index('name', inplace = True) 39 | 40 | # Creating Bag of Words 41 | toronto_final['bag_of_words'] = '' 42 | columns = toronto_final.columns 43 | for index, row in toronto_final.iterrows(): 44 | words = '' 45 | for col in columns: 46 | words = words + ' '.join(row[col])+ ' ' 47 | row['bag_of_words'] = words 48 | 49 | toronto_final.drop(columns = [col for col in toronto_final.columns if col!= 'bag_of_words'], inplace = True) 50 | 51 | # Remove quotation marks 52 | toronto_final['bag_of_words'] = toronto_final['bag_of_words'].map(lambda x: x.replace("'", "")) 53 | 54 | # instantiating and generating the count matrix 55 | count = CountVectorizer() 56 | count_matrix = count.fit_transform(toronto_final['bag_of_words']) 57 | 58 | # creating a Series for the restaurant names so they are associated to an ordered numerical 59 | # list I will use later to match the indexes 60 | indices = pd.Series(toronto_final.index) 61 | 62 | # generating the cosine similarity matrix 63 | cosine_sim = cosine_similarity(count_matrix, count_matrix) 64 | 65 | # function that takes in restaurant name as input and returns the top 10 recommended restaurants 66 | def content_based_recommendations(name, cosine_sim = cosine_sim): 67 | 68 | recommended_restaurants = [] 69 | 70 | # gettin the index of the movie that matches the title 71 | idx = indices[indices == name].index[0] 72 | 73 | # creating a Series with the similarity scores in descending order 74 | score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False) 75 | 76 | # getting the indexes of the 10 most similar movies 77 | top_10_indexes = list(score_series.iloc[1:11].index) 78 | 79 | # populating the list with the titles of the best 10 matching movies 80 | for i in top_10_indexes: 81 | recommended_restaurants.append(list(toronto_final.index)[i]) 82 | 83 | return recommended_restaurants -------------------------------------------------------------------------------- /collaborate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import sklearn 4 | import streamlit as st 5 | from sklearn.metrics.pairwise import cosine_similarity, linear_kernel 6 | 7 | # Importing scipy Packages 8 | from scipy.sparse.linalg import svds 9 | 10 | SC = __import__("super_score") 11 | 12 | #path to data 13 | toronto_URL= "/home/cate/Cate/recommender_system/data/new_toronto_data.csv" 14 | 15 | #function to load in the data 16 | @st.cache(persist=True) 17 | def load_data(url): 18 | data = pd.read_csv(url) 19 | return data 20 | 21 | @st.cache(persist=True) 22 | def mean_center_rows(df): 23 | return (df.T - df.mean(axis = 1)).T 24 | 25 | @st.cache(persist=True) 26 | def cos_matrix(data): 27 | # Combining the text in Keywords and categories columns 28 | # data['All_Keywords'] = data['categories'].str.cat(data['Keywords'],sep=", ") 29 | 30 | # Creating the Matrix by using the Pivot Table Function 31 | toronto_restaurant_rating = data.pivot_table(index = 'user_id', columns = 'name', values = 'super_score') 32 | 33 | # Normalizing the Rating Scores 34 | toronto_restaurant_rating = mean_center_rows(toronto_restaurant_rating) 35 | 36 | # Filling all Null Values with 0.0 37 | toronto_restaurant_rating = toronto_restaurant_rating.fillna(0) 38 | 39 | #cosine similarity 40 | 41 | # List of first 10 Yelp Customer User_ids in the Matrix 42 | user_ids = list(toronto_restaurant_rating.index) 43 | 44 | # Converting the Matrix DataFrame into a NumPy array 45 | toronto_matrix = toronto_restaurant_rating.to_numpy() 46 | 47 | # Applying Singular Value Decomposition (SVD) 48 | #The number of factors to factor the user-item matrix. 49 | NUMBER_OF_FACTORS_MF = 15 50 | 51 | #Performs matrix factorization of the original user item matrix 52 | U, sigma, Vt = svds(toronto_matrix, k = NUMBER_OF_FACTORS_MF) 53 | 54 | sigma = np.diag(sigma) 55 | 56 | # Overview of user ratings across all Restaurants in Toronto 57 | all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 58 | 59 | # Converting the reconstructed matrix back to a Pandas dataframe 60 | cf_preds_df = pd.DataFrame(all_user_predicted_ratings, columns = toronto_restaurant_rating.columns, index=user_ids).transpose() 61 | 62 | return cf_preds_df 63 | 64 | @st.cache(persist=True) 65 | def item_matrix()): 66 | # Creating Item-Item Matrix based on Cosine Similarity 67 | item_item_matrix = cosine_similarity(cf_preds_df) 68 | item_item_matrix= pd.DataFrame(item_item_matrix, columns=cf_preds_df.index, index = cf_preds_df.index) 69 | 70 | return item_item_matrix 71 | 72 | toronto_data = load_data(toronto_URL) 73 | cf_preds_df = cos_matrix(toronto_data) 74 | item_item_matrix = item_matrix() 75 | 76 | 77 | # Creating Collaborative Filtering Function for Restaurant-Restaurant Recommendation System 78 | def cf_recommender(restaurant): 79 | 80 | """Getting the correlation of a specific restaurant with other Toronto Restaurants""" 81 | restaurant_ratings = cf_preds_df.T[restaurant] 82 | similar_restaurant_ratings = cf_preds_df.T.corrwith(restaurant_ratings) 83 | corr_ratings = pd.DataFrame(similar_restaurant_ratings, columns=['Correlation']) 84 | corr_ratings.dropna(inplace=True) 85 | 86 | """Retrieving the Ratings Scores from the Item-Item Matrix""" 87 | ratings_sim = item_item_matrix[restaurant] 88 | 89 | """Filtering for positively correlated restaurants""" 90 | ratings_sim = ratings_sim[ratings_sim>0] 91 | 92 | """Generate Top 10 Recommended Restaurants""" 93 | """Exclude top row as that will be the same restaurant""" 94 | return ratings_sim.sort_values(ascending= False).head(11)[1:] -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import csv 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | import numpy as np 6 | import seaborn as sns 7 | pd.set_option('display.max_columns', 50) 8 | 9 | # Importing Plotly Packages 10 | 11 | import plotly 12 | import plotly.offline as py 13 | import plotly.graph_objs as go 14 | import plotly_express as px 15 | 16 | from bokeh.io import output_file, show 17 | from bokeh.models import ColumnDataSource, GMapOptions 18 | from bokeh.plotting import gmap 19 | 20 | 21 | from plotly.subplots import make_subplots 22 | import plotly.graph_objects as go 23 | from wordcloud import WordCloud, STOPWORDS 24 | import matplotlib.pyplot as plt 25 | 26 | #importing python scripts 27 | KM = __import__("cluster") 28 | LOC= __import__("location") 29 | CT = __import__("content") 30 | #CF = __import__("collaborate") 31 | 32 | 33 | st.title("Toronto Restaurant Recommendation platform") 34 | st.markdown("This application is for recommending restaurants to visit for users in Toronto 🍔🍕🍹🍺") 35 | 36 | 37 | st.sidebar.title("Toronto Restaurant Recommendation platform") 38 | st.sidebar.subheader("Africa DSI Capstone Project") 39 | st.sidebar.markdown("By: Catherine Gitau") 40 | st.sidebar.markdown("This application is for recommending restaurants to visit for users in Toronto 🍔🍕🍹🍺") 41 | 42 | business_URL= "/home/cate/Cate/restaurant-recommendation-system/data/business_final.csv" 43 | final_URL="/home/cate/Cate/restaurant-recommendation-system/data/final_reviews.csv" 44 | toronto_URL= "/home/cate/Cate/restaurant-recommendation-system/data/new_toronto_data.csv" 45 | 46 | 47 | @st.cache(persist=True, allow_output_mutation=True) 48 | def load_data(url): 49 | data = pd.read_csv(url) 50 | return data 51 | 52 | def clean(data): 53 | data.drop(['Unnamed: 0'], axis=1, inplace = True) 54 | data['business_id'] = data['business_id '] 55 | data = data[['business_id', 'name', 'categories','stars','review_count','latitude','longitude','postal_code']] 56 | return data 57 | 58 | business_data = load_data(business_URL) 59 | toronto_data = load_data(toronto_URL) 60 | final_reviews = load_data(final_URL) 61 | 62 | # st.write(all_data.head()) 63 | 64 | # create a list of our conditions 65 | toronto_data['super_score'] = toronto_data['super_score'].round(0) 66 | conditions = [ 67 | (toronto_data['super_score'] <=2), 68 | (toronto_data['super_score'] == 3), 69 | (toronto_data['super_score'] >= 4) 70 | ] 71 | 72 | # create a list of the values we want to assign for each condition 73 | values = ['negative', 'neutral', 'positive'] 74 | 75 | # create a new column and use np.select to assign values to it using our lists as arguments 76 | toronto_data['sentiment'] = np.select(conditions, values) 77 | 78 | @st.cache(persist=True) 79 | def plot_sentiment(restaurant): 80 | df = toronto_data[toronto_data['name']==restaurant] 81 | count = df['sentiment'].value_counts() 82 | count = pd.DataFrame({'Sentiment':count.index, 'text':count.values.flatten()}) 83 | return count 84 | 85 | def main(): 86 | st.sidebar.markdown("### Recommendation type") 87 | section = st.sidebar.selectbox('choose recommendation type', ['Pick a Value', 'Location based', 'Content based', 'Collaborative Filtering'], key= 1) 88 | 89 | #fig.update_layout(mapbox_style="dark") 90 | #fig.show() 91 | # 92 | 93 | 94 | if section == "Pick a Value": 95 | st.markdown("## How to get the most out of this platform") 96 | st.markdown('This platform contains 3 recommendation system models to recommend to you restaurants based on Yelp reviews in Toronto city') 97 | st.markdown("- If you're a new user of this platform or in this city and you have never tried any restaurant around toronto, please select the **location based** recommender on the sidebar to get recommended top restaurants around where you are.") 98 | st.markdown("- If you want recommendations of restaurants similar to one you have previously visited and liked, please select **content-based** on the sidebar.") 99 | st.markdown("- If this isn't your first time using this platform and would like to get recommendations based on previous restaurants you have visited and rated please select the **collaborative filtering** option on the sidebar.") 100 | #st.markdown("- If you just want to compare the ratings of different restaurants you have in mind, please select **Restaurant Analytics** on the sidebar.") 101 | 102 | 103 | st.subheader("Graphical Overview of Restaurants in Toronto City") 104 | px.set_mapbox_access_token("pk.eyJ1Ijoic2hha2Fzb20iLCJhIjoiY2plMWg1NGFpMXZ5NjJxbjhlM2ttN3AwbiJ9.RtGYHmreKiyBfHuElgYq_w") 105 | fig = px.scatter_mapbox(business_data, lat="latitude", lon="longitude", color="stars", size='review_count', 106 | size_max=15, zoom=10, width=1000, height=700) 107 | st.plotly_chart(fig) 108 | 109 | if section == "Location based": 110 | 111 | st.subheader('Location Based Recommendation System') 112 | 113 | st.markdown("please enter your location") 114 | location = st.text_area('Input your location here') 115 | 116 | if location: 117 | URL = "https://geocode.search.hereapi.com/v1/geocode" 118 | api_key = 'ODfYgIX45wrL41qboC3F_z2hg8e5_ABJYi71Pu6o948' # Acquire from developer.here.com 119 | PARAMS = {'apikey':api_key,'q':location} 120 | 121 | lat_long = LOC.get_location(URL, PARAMS) 122 | latitude = lat_long[0] 123 | longitude = lat_long[1] 124 | df = KM.location_based_recommendation(business_data, latitude, longitude) 125 | 126 | if st.sidebar.checkbox("Show data", False): 127 | st.write(df) 128 | 129 | st.markdown("## Geographical Plot of Nearby Recommended Restaurants from "+ location) 130 | px.set_mapbox_access_token("pk.eyJ1Ijoic2hha2Fzb20iLCJhIjoiY2plMWg1NGFpMXZ5NjJxbjhlM2ttN3AwbiJ9.RtGYHmreKiyBfHuElgYq_w") 131 | fig = px.scatter_mapbox(df, lat="latitude", lon="longitude", 132 | zoom=10, width=1000, height=700, hover_data= ['name', 'latitude', 'longitude', 'categories', 'stars', 'review_count']) 133 | fig.add_scattermapbox(lat=[latitude], lon=[longitude]).update_traces(dict(mode='markers', marker = dict(size = 15))) 134 | fig.update_layout(mapbox_style="dark") 135 | st.plotly_chart(fig) 136 | 137 | if section == 'Content based': 138 | st.subheader('Content based recommendation system') 139 | st.markdown("please select a restaurant similar to the one you'd like to visit") 140 | restaurant = st.selectbox('select restaurant',toronto_data['name'].unique()) 141 | 142 | if restaurant: 143 | restaurant_recommendations = CT.content_based_recommendations(restaurant) 144 | restaurant1 = toronto_data[toronto_data['name'] == restaurant_recommendations[0]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean() 145 | restaurant2 = toronto_data[toronto_data['name'] == restaurant_recommendations[1]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean() 146 | restaurant3 = toronto_data[toronto_data['name'] == restaurant_recommendations[2]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean() 147 | restaurant4 = toronto_data[toronto_data['name'] == restaurant_recommendations[3]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean() 148 | restaurant5 = toronto_data[toronto_data['name'] == restaurant_recommendations[4]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean() 149 | 150 | 151 | rest_merged = pd.concat([restaurant1.head(1), restaurant2.head(1), restaurant3.head(1), restaurant4.head(1), restaurant5.head(1)]) 152 | st.write(rest_merged) 153 | 154 | # st.subheader('Collaborative Filtering recommendation system') 155 | 156 | # if restaurant: 157 | # collab_recommendations = content_based_recommendations(restaurant) 158 | # collab_recommendations = pd.DataFrame(data = restaurant_recommendations) 159 | 160 | # st.write(restaurant_recommendations) 161 | 162 | 163 | if section != 'Pick a Value': 164 | if st.sidebar.checkbox("Compare restaurants by sentiments", False): 165 | choice = st.sidebar.multiselect('Pick restaurants', toronto_data['name'].unique()) 166 | if len(choice) > 0: 167 | st.subheader("Breakdown restaurant by sentiment") 168 | fig_3 = make_subplots(rows=1, cols=len(choice), subplot_titles=choice) 169 | for i in range(1): 170 | for j in range(len(choice)): 171 | fig_3.add_trace( 172 | go.Bar(x=plot_sentiment(choice[j]).Sentiment, y=plot_sentiment(choice[j]).text, showlegend=False), 173 | row=i+1, col=j+1 174 | ) 175 | fig_3.update_layout(height=600, width=800) 176 | st.plotly_chart(fig_3) 177 | 178 | # st.write(toronot_data.head()) 179 | # st.sidebar.header("Word Cloud") 180 | # word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?', ('positive', 'neutral', 'negative')) 181 | # if not st.sidebar.checkbox("Close", True, key='3'): 182 | # st.subheader('Word cloud for %s sentiment' % (word_sentiment)) 183 | # df = toronto_data[toronto_data['sentiment']==word_sentiment] 184 | # words = ' '.join(df['text']) 185 | # processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word != 'RT']) 186 | # wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=640).generate(processed_words) 187 | # plt.imshow(wordcloud) 188 | # plt.xticks([]) 189 | # plt.yticks([]) 190 | # st.pyplot() 191 | 192 | 193 | # if section == 'Collaborative Filtering': 194 | # st.subheader("Collaborative Filtering Recommendation System") 195 | 196 | # st.markdown("please select a restaurant you've visited before") 197 | # restaurant = st.selectbox('select restaurant', ['Pai Northern Thai Kitchen', 'Sabor Del Pacifico']) 198 | 199 | 200 | 201 | 202 | 203 | if __name__ == "__main__": 204 | main() 205 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.10.0 2 | alabaster==0.7.12 3 | altair==4.0.1 4 | anaconda-client==1.7.2 5 | anaconda-navigator==1.9.12 6 | anaconda-project==0.8.3 7 | aniso8601==8.1.0 8 | argh==0.26.2 9 | argon2-cffi @ file:///tmp/build/80754af9/argon2-cffi_1596828452693/work 10 | arrow==0.17.0 11 | asn1crypto @ file:///tmp/build/80754af9/asn1crypto_1596577642040/work 12 | astor==0.8.1 13 | astroid @ file:///tmp/build/80754af9/astroid_1592495881661/work 14 | astropy==4.0.1.post1 15 | async-generator==1.10 16 | atomicwrites==1.4.0 17 | attrs @ file:///tmp/build/80754af9/attrs_1600298409949/work 18 | autopep8 @ file:///tmp/build/80754af9/autopep8_1596578164842/work 19 | Babel==2.8.0 20 | backcall==0.2.0 21 | backports.functools-lru-cache==1.6.1 22 | backports.shutil-get-terminal-size==1.0.0 23 | backports.tempfile==1.0 24 | backports.weakref==1.0.post1 25 | base58==2.0.0 26 | beautifulsoup4 @ file:///tmp/build/80754af9/beautifulsoup4_1601924105527/work 27 | billiard==3.6.1.0 28 | binaryornot==0.4.4 29 | bitarray @ file:///tmp/build/80754af9/bitarray_1598884989496/work 30 | bkcharts==0.2 31 | bleach @ file:///tmp/build/80754af9/bleach_1600439572647/work 32 | blinker==1.4 33 | blis==0.4.1 34 | bokeh @ file:///tmp/build/80754af9/bokeh_1598903502831/work 35 | boto==2.49.0 36 | boto3==1.11.3 37 | botocore==1.14.3 38 | Bottleneck==1.3.2 39 | bpemb==0.3.0 40 | brotlipy==0.7.0 41 | bs4==0.0.1 42 | cachetools==4.1.1 43 | catalogue==1.0.0 44 | catboost==0.24.1 45 | certifi==2020.6.20 46 | cffi @ file:///tmp/build/80754af9/cffi_1600699180754/work 47 | chardet==3.0.4 48 | click==7.1.2 49 | cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1598884132938/work 50 | clyent==1.2.2 51 | coala-utils==0.5.1 52 | colorama==0.4.3 53 | conda==4.8.5 54 | conda-build==3.20.3 55 | conda-package-handling==1.7.0 56 | conda-verify==3.4.2 57 | confuse==1.3.0 58 | contextlib2==0.6.0.post1 59 | cookiecutter==1.7.2 60 | cryptography @ file:///tmp/build/80754af9/cryptography_1601046817403/work 61 | cvxopt==1.2.0 62 | cycler==0.10.0 63 | cymem==2.0.3 64 | Cython @ file:///tmp/build/80754af9/cython_1594831564311/work 65 | cytoolz==0.11.0 66 | dartsclone==0.9.0 67 | dask @ file:///tmp/build/80754af9/dask-core_1602083700509/work 68 | de-core-news-sm @ https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.3.0/de_core_news_sm-2.3.0.tar.gz 69 | decorator==4.4.2 70 | defusedxml==0.6.0 71 | Deprecated==1.2.7 72 | diff-match-patch @ file:///tmp/build/80754af9/diff-match-patch_1594828741838/work 73 | distributed @ file:///tmp/build/80754af9/distributed_1602083907176/work 74 | docutils==0.15.2 75 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz 76 | entrypoints==0.3 77 | enum-compat==0.0.3 78 | et-xmlfile==1.0.1 79 | fastai==1.0.60 80 | fastcache==1.1.0 81 | fastprogress==0.2.3 82 | fasttext==0.9.2 83 | filelock==3.0.12 84 | flair==0.4.4 85 | flake8 @ file:///tmp/build/80754af9/flake8_1601911421857/work 86 | Flask==1.0.3 87 | Flask-RESTful==0.3.7 88 | fsspec @ file:///tmp/build/80754af9/fsspec_1597944003862/work 89 | future==0.18.2 90 | gast==0.4.0 91 | gdown==3.12.2 92 | gensim==3.8.1 93 | gevent @ file:///tmp/build/80754af9/gevent_1601397565838/work 94 | glob2==0.7 95 | gmpy2==2.0.8 96 | google-api-core==1.21.0 97 | google-api-python-client==1.9.3 98 | google-auth==1.18.0 99 | google-auth-httplib2==0.0.3 100 | google-auth-oauthlib==0.4.2 101 | google-pasta==0.2.0 102 | googleapis-common-protos==1.52.0 103 | googletrans==3.0.0 104 | GPy==1.9.9 105 | graphviz==0.14.1 106 | greenlet @ file:///tmp/build/80754af9/greenlet_1600873995270/work 107 | grpcio==1.32.0rc1 108 | h11==0.9.0 109 | h2==3.2.0 110 | h5py==2.10.0 111 | HeapDict==1.0.1 112 | hpack==3.0.0 113 | hstspreload==2020.8.18 114 | html5lib @ file:///tmp/build/80754af9/html5lib_1593446221756/work 115 | htmlmin==0.1.12 116 | httpcore==0.9.1 117 | httpie==1.0.3 118 | httplib2==0.18.1 119 | httpx==0.13.3 120 | hyperframe==5.2.0 121 | hyperopt==0.2.3 122 | icu==0.0.1 123 | idna @ file:///tmp/build/80754af9/idna_1593446292537/work 124 | ImageHash==4.1.0 125 | imageio @ file:///tmp/build/80754af9/imageio_1594161405741/work 126 | imagesize==1.2.0 127 | importlib-metadata @ file:///tmp/build/80754af9/importlib-metadata_1593446408836/work 128 | imutils==0.5.3 129 | iniconfig @ file:///tmp/build/80754af9/iniconfig_1596827328212/work 130 | intervaltree @ file:///tmp/build/80754af9/intervaltree_1598376443606/work 131 | ipdb==0.13.4 132 | ipykernel @ file:///tmp/build/80754af9/ipykernel_1596206598566/work/dist/ipykernel-5.3.4-py3-none-any.whl 133 | ipython @ file:///tmp/build/80754af9/ipython_1598883837425/work 134 | ipython-genutils==0.2.0 135 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1601490159889/work 136 | isort @ file:///tmp/build/80754af9/isort_1601490204941/work 137 | itsdangerous==1.1.0 138 | Janome==0.3.10 139 | jdcal==1.4.1 140 | jedi @ file:///tmp/build/80754af9/jedi_1592841891421/work 141 | jeepney==0.4.3 142 | Jinja2==2.11.2 143 | jinja2-time==0.2.0 144 | jmespath==0.9.4 145 | joblib @ file:///tmp/build/80754af9/joblib_1601912903842/work 146 | joeynmt==0.0.1 147 | json5==0.9.5 148 | jsonlines==1.2.0 149 | jsonschema @ file:///tmp/build/80754af9/jsonschema_1594363551272/work 150 | jupyter==1.0.0 151 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1601311786391/work 152 | jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1598884538475/work 153 | jupyter-core==4.6.3 154 | jupyterlab==2.2.6 155 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work 156 | jupyterlab-server @ file:///tmp/build/80754af9/jupyterlab_server_1594164409481/work 157 | kaggle==1.5.6 158 | kenlm==0.0.0 159 | Keras==2.3.0 160 | Keras-Applications==1.0.8 161 | Keras-Preprocessing==1.1.2 162 | keyring @ file:///tmp/build/80754af9/keyring_1601490840626/work 163 | kiwisolver==1.2.0 164 | kytea==0.1.5 165 | langdetect==1.0.7 166 | lazy-object-proxy==1.4.3 167 | libarchive-c==2.9 168 | llvmlite==0.34.0 169 | locket==0.2.0 170 | lxml @ file:///tmp/build/80754af9/lxml_1594824402764/work 171 | Markdown==3.2.2 172 | MarkupSafe @ file:///tmp/build/80754af9/markupsafe_1594371495811/work 173 | matplotlib==3.3.3 174 | mccabe==0.6.1 175 | missingno==0.4.2 176 | mistune @ file:///tmp/build/80754af9/mistune_1594373098390/work 177 | mkl-fft==1.2.0 178 | mkl-random==1.1.1 179 | mkl-service==2.3.0 180 | mock==4.0.2 181 | more-itertools @ file:///tmp/build/80754af9/more-itertools_1598884071673/work 182 | Morfessor==2.0.6 183 | mpld3==0.3 184 | mpmath==1.1.0 185 | msgpack==1.0.0 186 | multipledispatch==0.6.0 187 | munch==2.5.0 188 | murmurhash==1.0.2 189 | natto-py==0.9.2 190 | navigator-updater==0.2.1 191 | nbclient @ file:///tmp/build/80754af9/nbclient_1601059699549/work 192 | nbconvert @ file:///tmp/build/80754af9/nbconvert_1601914821128/work 193 | nbformat==5.0.7 194 | nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1601499549014/work 195 | networkx @ file:///tmp/build/80754af9/networkx_1598376031484/work 196 | nlpaug==0.0.20 197 | nltk @ file:///tmp/build/80754af9/nltk_1592496090529/work 198 | nose @ file:///tmp/build/80754af9/nose_1594377616924/work 199 | notebook @ file:///tmp/build/80754af9/notebook_1601501580008/work 200 | numba @ file:///tmp/build/80754af9/numba_1600102479638/work 201 | numexpr==2.7.1 202 | numpy==1.18.5 203 | numpydoc @ file:///tmp/build/80754af9/numpydoc_1594166760263/work 204 | nvidia-ml-py3==7.352.0 205 | oauth2client==4.1.3 206 | oauthlib==3.1.0 207 | olefile==0.46 208 | omegaconf==2.0.2 209 | openpyxl @ file:///tmp/build/80754af9/openpyxl_1598113097404/work 210 | opustools-pkg==0.0.52 211 | packaging==20.4 212 | pandas @ file:///tmp/build/80754af9/pandas_1602088128026/work 213 | pandas-profiling==2.9.0 214 | pandocfilters==1.4.2 215 | paramz==0.9.5 216 | parso==0.7.0 217 | partd==1.1.0 218 | path @ file:///tmp/build/80754af9/path_1596907209691/work 219 | pathlib2 @ file:///tmp/build/80754af9/pathlib2_1594380969706/work 220 | pathtools==0.1.2 221 | patsy==0.5.1 222 | pep8==1.7.1 223 | pexpect @ file:///tmp/build/80754af9/pexpect_1594383317248/work 224 | phik==0.10.0 225 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1594384075987/work 226 | Pillow @ file:///tmp/build/80754af9/pillow_1594307325547/work 227 | pkginfo==1.5.0.1 228 | plac==1.1.3 229 | plotly==4.10.0 230 | plotly-express==0.4.1 231 | pluggy==0.13.1 232 | ply==3.11 233 | polyglot==16.7.4 234 | portalocker==2.0.0 235 | poyo==0.5.0 236 | preshed==3.0.2 237 | pretrainedmodels==0.7.4 238 | prometheus-client==0.8.0 239 | prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1598885458782/work 240 | protobuf==3.12.2 241 | psutil @ file:///tmp/build/80754af9/psutil_1598370249042/work 242 | ptyprocess==0.6.0 243 | py @ file:///tmp/build/80754af9/py_1593446248552/work 244 | pyasn1==0.4.8 245 | pyasn1-modules==0.2.8 246 | pybind11==2.5.0 247 | pycld2==0.41 248 | pycodestyle==2.6.0 249 | pycosat==0.6.3 250 | pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work 251 | pycrypto==2.6.1 252 | pycurl==7.43.0.5 253 | pydeck==0.2.0 254 | pydocstyle @ file:///tmp/build/80754af9/pydocstyle_1598885001695/work 255 | PyDrive==1.3.1 256 | pyflakes==2.2.0 257 | Pygments @ file:///tmp/build/80754af9/pygments_1600458456400/work 258 | pylint @ file:///tmp/build/80754af9/pylint_1598624038450/work 259 | pymongo==3.10.1 260 | pyodbc===4.0.0-unsupported 261 | pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1594392929924/work 262 | pyparsing==2.4.7 263 | pyrsistent @ file:///tmp/build/80754af9/pyrsistent_1600141707582/work 264 | PySocks @ file:///tmp/build/80754af9/pysocks_1594394576006/work 265 | pyter3==0.3 266 | pytest==0.0.0 267 | python-dateutil==2.8.0 268 | python-jsonrpc-server @ file:///tmp/build/80754af9/python-jsonrpc-server_1600278539111/work 269 | python-language-server @ file:///tmp/build/80754af9/python-language-server_1600454544709/work 270 | python-slugify==4.0.0 271 | python-utils==2.4.0 272 | pytz==2020.1 273 | PyWavelets @ file:///tmp/build/80754af9/pywavelets_1601658308664/work 274 | pyxdg==0.26 275 | PyYAML==5.3.1 276 | pyzmq==19.0.2 277 | QDarkStyle==2.8.1 278 | QtAwesome==0.7.2 279 | qtconsole @ file:///tmp/build/80754af9/qtconsole_1600870028330/work 280 | QtPy==1.9.0 281 | regex @ file:///tmp/build/80754af9/regex_1596829692676/work 282 | requests @ file:///tmp/build/80754af9/requests_1592841827918/work 283 | requests-oauthlib==1.3.0 284 | retrying==1.3.3 285 | rfc3986==1.4.0 286 | rope==0.17.0 287 | rsa==4.6 288 | Rtree==0.9.4 289 | ruamel-yaml==0.15.87 290 | s3transfer==0.3.0 291 | sacrebleu==1.4.13 292 | sacremoses==0.0.38 293 | scikit-image==0.16.2 294 | scikit-learn==0.24.0 295 | scipy==1.5.4 296 | seaborn @ file:///tmp/build/80754af9/seaborn_1600553570093/work 297 | SecretStorage @ file:///tmp/build/80754af9/secretstorage_1594419219833/work 298 | segtok==1.5.7 299 | selenium==3.141.0 300 | Send2Trash==1.5.0 301 | sentencepiece==0.1.85 302 | simplegeneric==0.8.1 303 | singledispatch==3.4.0.3 304 | six==1.15.0 305 | sklearn==0.0 306 | smart-open==1.9.0 307 | sniffio==1.1.0 308 | snowballstemmer==2.0.0 309 | sortedcollections==1.2.1 310 | sortedcontainers==2.2.2 311 | soupsieve==2.0.1 312 | spacy==2.3.0 313 | Sphinx @ file:///tmp/build/80754af9/sphinx_1597428793432/work 314 | sphinxcontrib-applehelp==1.0.2 315 | sphinxcontrib-devhelp==1.0.2 316 | sphinxcontrib-htmlhelp==1.0.3 317 | sphinxcontrib-jsmath==1.0.1 318 | sphinxcontrib-qthelp==1.0.3 319 | sphinxcontrib-serializinghtml==1.1.4 320 | sphinxcontrib-websupport @ file:///tmp/build/80754af9/sphinxcontrib-websupport_1597081412696/work 321 | spyder @ file:///tmp/build/80754af9/spyder_1599056984254/work 322 | spyder-kernels @ file:///tmp/build/80754af9/spyder-kernels_1599056754119/work 323 | SQLAlchemy @ file:///tmp/build/80754af9/sqlalchemy_1598374732873/work 324 | sqlitedict==1.6.0 325 | srsly==1.0.2 326 | statsmodels==0.11.1 327 | stopwordsiso==0.5.1 328 | streamlit==0.53.0 329 | subword-nmt==0.3.7 330 | SudachiDict-core @ https://object-storage.tyo2.conoha.io/v1/nc_2520839e1f9641b08211a5c85243124a/sudachi/SudachiDict_core-20190927.tar.gz 331 | SudachiPy==0.4.4 332 | sympy @ file:///tmp/build/80754af9/sympy_1597083144364/work 333 | tables==3.6.1 334 | tabulate==0.8.6 335 | tangled-up-in-unicode==0.0.6 336 | tb-nightly==1.14.0a20190603 337 | tblib @ file:///tmp/build/80754af9/tblib_1597928476713/work 338 | tensorboard==2.3.0 339 | tensorboard-plugin-wit==1.7.0 340 | tensorflow==2.0.0b1 341 | termcolor==1.1.0 342 | terminado==0.8.3 343 | testpath==0.4.4 344 | text-unidecode==1.3 345 | textaugment @ git+https://github.com/CateGitau/textaugment.git@189a81066cf7b5be46c4f7ae4faae5a681e6b151 346 | textblob==0.15.3 347 | tf-estimator-nightly==1.14.0.dev2019060501 348 | thinc==7.4.1 349 | threadpoolctl @ file:///tmp/tmp9twdgx9k/threadpoolctl-2.1.0-py3-none-any.whl 350 | tiny-tokenizer==3.1.0 351 | toml @ file:///tmp/build/80754af9/toml_1592853716807/work 352 | toolz @ file:///tmp/build/80754af9/toolz_1601054250827/work 353 | torch==1.6.0+cpu 354 | torchtext==0.6.0 355 | torchvision==0.7.0+cpu 356 | tornado==5.1.1 357 | tqdm @ file:///tmp/build/80754af9/tqdm_1602185206534/work 358 | traitlets @ file:///tmp/build/80754af9/traitlets_1600712679583/work 359 | transformers==2.3.0 360 | tweepy==3.8.0 361 | twitterscraper==1.4.0 362 | typed-ast==1.4.1 363 | typing-extensions @ file:///tmp/build/80754af9/typing_extensions_1598376058250/work 364 | tzlocal==2.0.0 365 | ujson @ file:///tmp/build/80754af9/ujson_1602185184149/work 366 | unicodecsv==0.14.1 367 | uritemplate==3.0.1 368 | urllib3 @ file:///tmp/build/80754af9/urllib3_1597086586889/work 369 | validators==0.14.1 370 | virtualenv==16.7.9 371 | visions==0.5.0 372 | wasabi==0.6.0 373 | watchdog @ file:///tmp/build/80754af9/watchdog_1593447306189/work 374 | wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work 375 | webencodings==0.5.1 376 | Werkzeug==1.0.1 377 | wget==3.2 378 | widgetsnbextension==3.5.1 379 | word2word==1.0.0 380 | wordcloud==1.8.1 381 | wrapt==1.11.1 382 | wurlitzer @ file:///tmp/build/80754af9/wurlitzer_1594751868473/work 383 | xlrd==1.2.0 384 | XlsxWriter @ file:///tmp/build/80754af9/xlsxwriter_1602001750554/work 385 | xlwt==1.3.0 386 | xmltodict==0.12.0 387 | yapf @ file:///tmp/build/80754af9/yapf_1593528177422/work 388 | zict==2.0.0 389 | zipp @ file:///tmp/build/80754af9/zipp_1602002379921/work 390 | zope.event==4.4 391 | zope.interface @ file:///tmp/build/80754af9/zope.interface_1602002440259/work 392 | --------------------------------------------------------------------------------