├── .gitignore
├── __pycache__
    ├── cluster.cpython-37.pyc
    ├── cluster.cpython-38.pyc
    ├── content.cpython-37.pyc
    ├── content.cpython-38.pyc
    ├── location.cpython-37.pyc
    ├── location.cpython-38.pyc
    └── super_score.cpython-38.pyc
├── super_score.py
├── location.py
├── LICENSE
├── cluster.py
├── README.md
├── content.py
├── collaborate.py
├── app.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | *.csv
2 | 


--------------------------------------------------------------------------------
/__pycache__/cluster.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/cluster.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/cluster.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/cluster.cpython-38.pyc


--------------------------------------------------------------------------------
/__pycache__/content.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/content.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/content.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/content.cpython-38.pyc


--------------------------------------------------------------------------------
/__pycache__/location.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/location.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/location.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/location.cpython-38.pyc


--------------------------------------------------------------------------------
/__pycache__/super_score.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CateGitau/restaurant-recommendation-system/HEAD/__pycache__/super_score.cpython-38.pyc


--------------------------------------------------------------------------------
/super_score.py:
--------------------------------------------------------------------------------
1 | def score(data):
2 |     # Computing Super-Score Rating for Reviews
3 |     data['super_score'] = data['polarity'] *  data['compound']
4 |     data['super_score'] = data['super_score'] + data['stars']
5 | 
6 |     return data


--------------------------------------------------------------------------------
/location.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | 
 3 | # URL = "https://geocode.search.hereapi.com/v1/geocode"
 4 | # #location = input("Enter the location here: ") #taking user input
 5 | # api_key = 'ODfYgIX45wrL41qboC3F_z2hg8e5_ABJYi71Pu6o948' # Acquire from developer.here.com
 6 | # PARAMS = {'apikey':api_key,'q':location} 
 7 | 
 8 | def get_location(url, params):
 9 |     # sending get request and saving the response as response object 
10 |     r = requests.get(url, params) 
11 |     data = r.json()
12 | 
13 |     latitude = data['items'][0]['position']['lat']
14 |     longitude = data['items'][0]['position']['lng']
15 | 
16 |     loc = [latitude, longitude]
17 | 
18 |     return loc
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Cathy
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/cluster.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd 
 3 | import sklearn
 4 | from sklearn.cluster import KMeans
 5 | 
 6 | 
 7 | # business_URL= "/home/cate/Cate/recommender_system/business_final.csv"
 8 | # def load_data(url):
 9 | #     data = pd.read_csv(url)
10 | #     return data
11 | 
12 | 
13 | # Creating Location-Based Recommendation Function
14 | def location_based_recommendation(data, latitude, longitude):
15 | 
16 |     # Putting the Coordinates of Restaurants together into a dataframe
17 |     coordinates = data[['longitude','latitude']]
18 | 
19 |     kmeans = KMeans(n_clusters = 10, init = 'k-means++')
20 |     kmeans.fit(coordinates)
21 |     y = kmeans.labels_
22 | 
23 |     data['cluster'] = kmeans.predict(data[['longitude','latitude']])
24 |     top_restaurants_toronto = data.sort_values(by=['stars', 'review_count'], ascending=False)
25 | 
26 |     
27 |     """Predict the cluster for longitude and latitude provided"""
28 |     cluster = kmeans.predict(np.array([longitude,latitude]).reshape(1,-1))[0]
29 |     
30 |    
31 |     """Get the best restaurant in this cluster along with the relevant information for a user to make a decision"""
32 |     return top_restaurants_toronto[top_restaurants_toronto['cluster']==cluster].iloc[0:10][['name', 'latitude','longitude','categories','stars', 'review_count','cluster']]
33 | 
34 | 
35 | #location_based_recommendation(top_restaurants_toronto, 43.6677, -79.3948)
36 | 
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # restaurant recommendation system
 2 | 
 3 | This project is my Capstone project for the DSI 2020 training. The aim of the project is to create a restaurant recommender system that will be able to give suggestions of restaurants to users based on the restaurants reviews and previous restaurants the user has been to. 
 4 | 
 5 | There are three product features/ models for the system:
 6 | - Location based recommender system
 7 | - Content based recommender system
 8 | - Collaborative filtering recommender system
 9 | 
10 | 
11 | 
12 | ## Summary
13 | 
14 |  - [Getting Started](#getting-started)
15 |  - [Deployment](#deployment)
16 |  - [Challenges](#Challenges)
17 |  - [Authors](#authors)
18 |  - [License](#license)
19 |  - [Acknowledgments](#acknowledgments)
20 | 
21 | 
22 | ### Getting Started
23 |  To get this project up and running in your machine, follow the steps below:
24 | 
25 |  - Clone this repository to your local machine by opening your terminal and typing:
26 |  ```
27 |  git clone https://github.com/CateGitau/restaurant-recommendation-system
28 |  ```
29 | 
30 |  - install the required packages:
31 |  ```
32 |  pip3 install -r requirements.txt
33 |  ```
34 | 
35 |  - Run the app.py file to get the project running in your local machine using Streamlit
36 |  ```
37 |  streamlit run app.py
38 |  ```
39 | 
40 |  ### Deployment
41 |  We used [streamlit sharing](https://www.streamlit.io/sharing) to deploy the application. All you have to do is send a request to get an invite so that you start sharing the app then follow the instructions given.
42 | 
43 |  ### Challenges
44 |  The operations that are being done when the `app.py` is run takes up a lot of RAM therefore could not accommodate both content and collaborative models, if you find a fix for this please feel free to send in a PR.
45 | 
46 |  ### Authors
47 |  - [Catherine Gitau](https://github.com/CateGitau)
48 | 
49 |  ### License
50 |  [MIT](https://mit-license.org/)
51 | 
52 |  ### acknowledgements
53 |   We'd like to thank [Evandar Nyoni](https://github.com/Evandernyoni) who was my Tutor for the duration of this project.


--------------------------------------------------------------------------------
/content.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd 
 3 | import sklearn
 4 | import streamlit as st
 5 | 
 6 | from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
 7 | from sklearn.metrics.pairwise import cosine_similarity, linear_kernel
 8 | 
 9 | #path to data
10 | toronto_URL= "/home/cate/Cate/restaurant-recommendation-system/data/new_toronto_data.csv"
11 | 
12 | #function to load in the data
13 | @st.cache(persist=True, allow_output_mutation=True)
14 | def load_data(url):
15 |     data = pd.read_csv(url)
16 |     return data
17 | 
18 | toronto_data = load_data(toronto_URL)
19 | 
20 | # Combining the text in Keywords and categories columns
21 | #toronto_data['All_Keywords'] = toronto_data['categories'].str.cat(toronto_data['Keywords'],sep=", ")
22 | 
23 | # Formating the All_Keywords Column
24 | toronto_data['All_Keywords'] = toronto_data['All_Keywords'].map(lambda x: str(x))
25 | toronto_data['All_Keywords'] = toronto_data['All_Keywords'].map(lambda x: x.lower())
26 | 
27 | # Adding and Grouping Rows together by Restaurant Name
28 | toronto_final = toronto_data.groupby('name')['All_Keywords'].sum()
29 | toronto_final = toronto_final.to_frame(name = 'sum').reset_index()
30 | 
31 | # Getting a list of Unique Keywords per Restaurant
32 | 
33 | toronto_final['sum'] = toronto_final['sum'].map(lambda x: x.replace(", ","', '"))
34 | toronto_final['sum'] = toronto_final['sum'].map(lambda x: str("'") + x + str("'"))
35 | f = lambda x: x["sum"].split(", ")
36 | toronto_final['sum'] = toronto_final.apply(f, axis=1)
37 | toronto_final['sum'] = toronto_final['sum'].map(lambda x: set(x))
38 | toronto_final.set_index('name', inplace = True)
39 | 
40 | # Creating Bag of Words
41 | toronto_final['bag_of_words'] = ''
42 | columns = toronto_final.columns
43 | for index, row in toronto_final.iterrows():
44 |     words = ''
45 |     for col in columns:
46 |             words = words + ' '.join(row[col])+ ' '
47 |     row['bag_of_words'] = words
48 |     
49 | toronto_final.drop(columns = [col for col in toronto_final.columns if col!= 'bag_of_words'], inplace = True)
50 | 
51 | # Remove quotation marks
52 | toronto_final['bag_of_words'] = toronto_final['bag_of_words'].map(lambda x: x.replace("'", ""))
53 | 
54 | # instantiating and generating the count matrix
55 | count = CountVectorizer()
56 | count_matrix = count.fit_transform(toronto_final['bag_of_words'])
57 | 
58 | # creating a Series for the restaurant names so they are associated to an ordered numerical
59 | # list I will use later to match the indexes
60 | indices = pd.Series(toronto_final.index)
61 | 
62 | # generating the cosine similarity matrix
63 | cosine_sim = cosine_similarity(count_matrix, count_matrix)
64 | 
65 | # function that takes in restaurant name as input and returns the top 10 recommended restaurants
66 | def content_based_recommendations(name, cosine_sim = cosine_sim):
67 |     
68 |     recommended_restaurants = []
69 |     
70 |     # gettin the index of the movie that matches the title
71 |     idx = indices[indices == name].index[0]
72 | 
73 |     # creating a Series with the similarity scores in descending order
74 |     score_series = pd.Series(cosine_sim[idx]).sort_values(ascending = False)
75 | 
76 |     # getting the indexes of the 10 most similar movies
77 |     top_10_indexes = list(score_series.iloc[1:11].index)
78 |     
79 |     # populating the list with the titles of the best 10 matching movies
80 |     for i in top_10_indexes:
81 |         recommended_restaurants.append(list(toronto_final.index)[i])
82 |         
83 |     return recommended_restaurants


--------------------------------------------------------------------------------
/collaborate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd 
 3 | import sklearn
 4 | import streamlit as st
 5 | from sklearn.metrics.pairwise import cosine_similarity, linear_kernel
 6 | 
 7 | # Importing scipy Packages
 8 | from scipy.sparse.linalg import svds
 9 | 
10 | SC = __import__("super_score")
11 | 
12 | #path to data
13 | toronto_URL= "/home/cate/Cate/recommender_system/data/new_toronto_data.csv"
14 | 
15 | #function to load in the data
16 | @st.cache(persist=True)
17 | def load_data(url):
18 |     data = pd.read_csv(url)
19 |     return data
20 | 
21 | @st.cache(persist=True)
22 | def mean_center_rows(df):
23 |     return (df.T - df.mean(axis = 1)).T
24 | 
25 | @st.cache(persist=True)
26 | def cos_matrix(data):
27 |     # Combining the text in Keywords and categories columns
28 |     # data['All_Keywords'] = data['categories'].str.cat(data['Keywords'],sep=", ")
29 | 
30 |     # Creating the Matrix by using the Pivot Table Function
31 |     toronto_restaurant_rating = data.pivot_table(index = 'user_id', columns = 'name', values = 'super_score')
32 | 
33 |     # Normalizing the Rating Scores
34 |     toronto_restaurant_rating = mean_center_rows(toronto_restaurant_rating)
35 | 
36 |     # Filling all Null Values with 0.0
37 |     toronto_restaurant_rating = toronto_restaurant_rating.fillna(0)
38 | 
39 |     #cosine similarity
40 | 
41 |     # List of first 10 Yelp Customer User_ids in the Matrix
42 |     user_ids = list(toronto_restaurant_rating.index)
43 | 
44 |     # Converting the Matrix DataFrame into a NumPy array
45 |     toronto_matrix = toronto_restaurant_rating.to_numpy()
46 | 
47 |     # Applying Singular Value Decomposition (SVD)
48 |     #The number of factors to factor the user-item matrix.
49 |     NUMBER_OF_FACTORS_MF = 15
50 | 
51 |     #Performs matrix factorization of the original user item matrix
52 |     U, sigma, Vt = svds(toronto_matrix, k = NUMBER_OF_FACTORS_MF)
53 | 
54 |     sigma = np.diag(sigma)
55 | 
56 |     # Overview of user ratings across all Restaurants in Toronto
57 |     all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 
58 | 
59 |     # Converting the reconstructed matrix back to a Pandas dataframe
60 |     cf_preds_df = pd.DataFrame(all_user_predicted_ratings, columns = toronto_restaurant_rating.columns, index=user_ids).transpose()
61 | 
62 |     return cf_preds_df
63 | 
64 | @st.cache(persist=True)
65 | def item_matrix()):
66 |     # Creating Item-Item Matrix based on Cosine Similarity
67 |     item_item_matrix = cosine_similarity(cf_preds_df)
68 |     item_item_matrix= pd.DataFrame(item_item_matrix, columns=cf_preds_df.index, index = cf_preds_df.index)
69 | 
70 |     return item_item_matrix
71 | 
72 | toronto_data = load_data(toronto_URL)
73 | cf_preds_df = cos_matrix(toronto_data)
74 | item_item_matrix = item_matrix()
75 | 
76 | 
77 | # Creating Collaborative Filtering Function for Restaurant-Restaurant Recommendation System
78 | def cf_recommender(restaurant):
79 |     
80 |     """Getting the correlation of a specific restaurant with other Toronto Restaurants"""
81 |     restaurant_ratings = cf_preds_df.T[restaurant]
82 |     similar_restaurant_ratings = cf_preds_df.T.corrwith(restaurant_ratings)
83 |     corr_ratings = pd.DataFrame(similar_restaurant_ratings, columns=['Correlation'])
84 |     corr_ratings.dropna(inplace=True)
85 |     
86 |     """Retrieving the Ratings Scores from the Item-Item Matrix"""
87 |     ratings_sim = item_item_matrix[restaurant]
88 |     
89 |     """Filtering for positively correlated restaurants"""
90 |     ratings_sim = ratings_sim[ratings_sim>0]
91 |     
92 |     """Generate Top 10 Recommended Restaurants"""
93 |     """Exclude top row as that will be the same restaurant"""
94 |     return ratings_sim.sort_values(ascending= False).head(11)[1:]


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import csv
  3 | import matplotlib.pyplot as plt
  4 | import pandas as pd
  5 | import numpy as np
  6 | import seaborn as sns
  7 | pd.set_option('display.max_columns', 50)
  8 | 
  9 | # Importing Plotly Packages
 10 | 
 11 | import plotly 
 12 | import plotly.offline as py
 13 | import plotly.graph_objs as go
 14 | import plotly_express as px
 15 | 
 16 | from bokeh.io import output_file, show
 17 | from bokeh.models import ColumnDataSource, GMapOptions
 18 | from bokeh.plotting import gmap
 19 | 
 20 | 
 21 | from plotly.subplots import make_subplots
 22 | import plotly.graph_objects as go
 23 | from wordcloud import WordCloud, STOPWORDS
 24 | import matplotlib.pyplot as plt
 25 | 
 26 | #importing python scripts
 27 | KM = __import__("cluster")
 28 | LOC= __import__("location")
 29 | CT = __import__("content")
 30 | #CF = __import__("collaborate")
 31 | 
 32 | 
 33 | st.title("Toronto Restaurant Recommendation platform")
 34 | st.markdown("This application is for recommending restaurants to visit for users in Toronto  🍔🍕🍹🍺")
 35 | 
 36 | 
 37 | st.sidebar.title("Toronto Restaurant Recommendation platform")
 38 | st.sidebar.subheader("Africa DSI Capstone Project")
 39 | st.sidebar.markdown("By: Catherine Gitau")
 40 | st.sidebar.markdown("This application is for recommending restaurants to visit for users in Toronto  🍔🍕🍹🍺")
 41 | 
 42 | business_URL= "/home/cate/Cate/restaurant-recommendation-system/data/business_final.csv"
 43 | final_URL="/home/cate/Cate/restaurant-recommendation-system/data/final_reviews.csv"
 44 | toronto_URL= "/home/cate/Cate/restaurant-recommendation-system/data/new_toronto_data.csv"
 45 | 
 46 | 
 47 | @st.cache(persist=True, allow_output_mutation=True)
 48 | def load_data(url):
 49 |     data = pd.read_csv(url)
 50 |     return data
 51 | 
 52 | def clean(data):
 53 |     data.drop(['Unnamed: 0'], axis=1, inplace = True)
 54 |     data['business_id'] = data['business_id ']
 55 |     data = data[['business_id', 'name', 'categories','stars','review_count','latitude','longitude','postal_code']]
 56 |     return data
 57 | 
 58 | business_data = load_data(business_URL)
 59 | toronto_data = load_data(toronto_URL)
 60 | final_reviews = load_data(final_URL)
 61 | 
 62 | # st.write(all_data.head())
 63 | 
 64 | # create a list of our conditions
 65 | toronto_data['super_score'] = toronto_data['super_score'].round(0)
 66 | conditions = [
 67 |     (toronto_data['super_score'] <=2),
 68 |     (toronto_data['super_score'] == 3),
 69 |     (toronto_data['super_score'] >= 4)
 70 |     ]
 71 | 
 72 | # create a list of the values we want to assign for each condition
 73 | values = ['negative', 'neutral', 'positive']
 74 | 
 75 | # create a new column and use np.select to assign values to it using our lists as arguments
 76 | toronto_data['sentiment'] = np.select(conditions, values)
 77 | 
 78 | @st.cache(persist=True)
 79 | def plot_sentiment(restaurant):
 80 |     df = toronto_data[toronto_data['name']==restaurant]
 81 |     count = df['sentiment'].value_counts()
 82 |     count = pd.DataFrame({'Sentiment':count.index, 'text':count.values.flatten()})
 83 |     return count
 84 | 
 85 | def main():
 86 |     st.sidebar.markdown("### Recommendation type")
 87 |     section = st.sidebar.selectbox('choose recommendation type', ['Pick a Value', 'Location based', 'Content based', 'Collaborative Filtering'], key= 1)
 88 | 
 89 |     #fig.update_layout(mapbox_style="dark")
 90 |     #fig.show()
 91 |     # 
 92 |         
 93 | 
 94 |     if section == "Pick a Value":
 95 |         st.markdown("## How to get the most out of this platform")
 96 |         st.markdown('This platform contains 3 recommendation system models to recommend to you restaurants based on Yelp reviews in Toronto city')
 97 |         st.markdown("- If you're a new user of this platform or in this city and you have never tried any restaurant around toronto, please select the **location based** recommender on the sidebar to get recommended top restaurants around where you are.")
 98 |         st.markdown("- If you want recommendations of restaurants similar to one you have previously visited and liked, please select **content-based** on the sidebar.")
 99 |         st.markdown("- If this isn't your first time using this platform and would like to get recommendations based on previous restaurants you have visited and rated please select the **collaborative filtering** option on the sidebar.")
100 |         #st.markdown("- If you just want to compare the ratings of different restaurants you have in mind, please select **Restaurant Analytics** on the sidebar.")
101 | 
102 | 
103 |         st.subheader("Graphical Overview of Restaurants in Toronto City")
104 |         px.set_mapbox_access_token("pk.eyJ1Ijoic2hha2Fzb20iLCJhIjoiY2plMWg1NGFpMXZ5NjJxbjhlM2ttN3AwbiJ9.RtGYHmreKiyBfHuElgYq_w")
105 |         fig = px.scatter_mapbox(business_data, lat="latitude", lon="longitude", color="stars", size='review_count',
106 |                         size_max=15, zoom=10, width=1000, height=700)
107 |         st.plotly_chart(fig)
108 | 
109 |     if section == "Location based":
110 | 
111 |         st.subheader('Location Based Recommendation System')
112 | 
113 |         st.markdown("please enter your location")
114 |         location = st.text_area('Input your location here')
115 | 
116 |         if location:
117 |             URL = "https://geocode.search.hereapi.com/v1/geocode"
118 |             api_key = 'ODfYgIX45wrL41qboC3F_z2hg8e5_ABJYi71Pu6o948' # Acquire from developer.here.com
119 |             PARAMS = {'apikey':api_key,'q':location}
120 | 
121 |             lat_long = LOC.get_location(URL, PARAMS)
122 |             latitude = lat_long[0]
123 |             longitude = lat_long[1]
124 |             df = KM.location_based_recommendation(business_data, latitude, longitude)
125 | 
126 |             if st.sidebar.checkbox("Show data", False):
127 |                 st.write(df)
128 | 
129 |             st.markdown("## Geographical Plot of Nearby Recommended Restaurants from "+ location)
130 |             px.set_mapbox_access_token("pk.eyJ1Ijoic2hha2Fzb20iLCJhIjoiY2plMWg1NGFpMXZ5NjJxbjhlM2ttN3AwbiJ9.RtGYHmreKiyBfHuElgYq_w")
131 |             fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",  
132 |                             zoom=10, width=1000, height=700, hover_data= ['name', 'latitude', 'longitude', 'categories', 'stars', 'review_count'])
133 |             fig.add_scattermapbox(lat=[latitude], lon=[longitude]).update_traces(dict(mode='markers', marker = dict(size = 15)))
134 |             fig.update_layout(mapbox_style="dark")
135 |             st.plotly_chart(fig)
136 |     
137 |     if section == 'Content based':
138 |         st.subheader('Content based recommendation system')
139 |         st.markdown("please select a restaurant similar to the one you'd like to visit")
140 |         restaurant = st.selectbox('select restaurant',toronto_data['name'].unique())
141 | 
142 |         if restaurant:
143 |             restaurant_recommendations = CT.content_based_recommendations(restaurant)
144 |             restaurant1 = toronto_data[toronto_data['name'] == restaurant_recommendations[0]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean()
145 |             restaurant2 = toronto_data[toronto_data['name'] == restaurant_recommendations[1]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean()
146 |             restaurant3 = toronto_data[toronto_data['name'] == restaurant_recommendations[2]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean()
147 |             restaurant4 = toronto_data[toronto_data['name'] == restaurant_recommendations[3]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean()
148 |             restaurant5 = toronto_data[toronto_data['name'] == restaurant_recommendations[4]][['name','categories','super_score']].groupby(['name', 'categories'], as_index=False).mean()
149 | 
150 | 
151 |             rest_merged = pd.concat([restaurant1.head(1), restaurant2.head(1), restaurant3.head(1), restaurant4.head(1), restaurant5.head(1)])
152 |             st.write(rest_merged)
153 | 
154 |         # st.subheader('Collaborative Filtering recommendation system')
155 | 
156 |         # if restaurant:
157 |         #     collab_recommendations = content_based_recommendations(restaurant)
158 |         #     collab_recommendations = pd.DataFrame(data = restaurant_recommendations)
159 | 
160 |         #     st.write(restaurant_recommendations)
161 | 
162 | 
163 |         if section != 'Pick a Value':
164 |             if st.sidebar.checkbox("Compare restaurants by sentiments", False):
165 |                 choice = st.sidebar.multiselect('Pick restaurants', toronto_data['name'].unique())
166 |                 if len(choice) > 0:
167 |                     st.subheader("Breakdown restaurant by sentiment")
168 |                     fig_3 = make_subplots(rows=1, cols=len(choice), subplot_titles=choice)
169 |                     for i in range(1):
170 |                         for j in range(len(choice)):
171 |                             fig_3.add_trace(
172 |                                 go.Bar(x=plot_sentiment(choice[j]).Sentiment, y=plot_sentiment(choice[j]).text, showlegend=False),
173 |                                 row=i+1, col=j+1
174 |                             )
175 |                     fig_3.update_layout(height=600, width=800)
176 |                     st.plotly_chart(fig_3)
177 | 
178 |                 # st.write(toronot_data.head())
179 |                 # st.sidebar.header("Word Cloud")
180 |                 # word_sentiment = st.sidebar.radio('Display word cloud for what sentiment?', ('positive', 'neutral', 'negative'))
181 |                 # if not st.sidebar.checkbox("Close", True, key='3'):
182 |                 #     st.subheader('Word cloud for %s sentiment' % (word_sentiment))
183 |                 #     df = toronto_data[toronto_data['sentiment']==word_sentiment]
184 |                 #     words = ' '.join(df['text'])
185 |                 #     processed_words = ' '.join([word for word in words.split() if 'http' not in word and not word.startswith('@') and word != 'RT'])
186 |                 #     wordcloud = WordCloud(stopwords=STOPWORDS, background_color='white', width=800, height=640).generate(processed_words)
187 |                 #     plt.imshow(wordcloud)
188 |                 #     plt.xticks([])
189 |                 #     plt.yticks([])
190 |                 #     st.pyplot()
191 |            
192 | 
193 |     # if section == 'Collaborative Filtering':
194 |     #     st.subheader("Collaborative Filtering Recommendation System")
195 | 
196 |     #     st.markdown("please select a restaurant you've visited before")
197 |     #     restaurant = st.selectbox('select restaurant', ['Pai Northern Thai Kitchen', 'Sabor Del Pacifico'])
198 | 
199 | 
200 |         
201 |    
202 | 
203 | if __name__ == "__main__":
204 |     main()
205 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.10.0
  2 | alabaster==0.7.12
  3 | altair==4.0.1
  4 | anaconda-client==1.7.2
  5 | anaconda-navigator==1.9.12
  6 | anaconda-project==0.8.3
  7 | aniso8601==8.1.0
  8 | argh==0.26.2
  9 | argon2-cffi @ file:///tmp/build/80754af9/argon2-cffi_1596828452693/work
 10 | arrow==0.17.0
 11 | asn1crypto @ file:///tmp/build/80754af9/asn1crypto_1596577642040/work
 12 | astor==0.8.1
 13 | astroid @ file:///tmp/build/80754af9/astroid_1592495881661/work
 14 | astropy==4.0.1.post1
 15 | async-generator==1.10
 16 | atomicwrites==1.4.0
 17 | attrs @ file:///tmp/build/80754af9/attrs_1600298409949/work
 18 | autopep8 @ file:///tmp/build/80754af9/autopep8_1596578164842/work
 19 | Babel==2.8.0
 20 | backcall==0.2.0
 21 | backports.functools-lru-cache==1.6.1
 22 | backports.shutil-get-terminal-size==1.0.0
 23 | backports.tempfile==1.0
 24 | backports.weakref==1.0.post1
 25 | base58==2.0.0
 26 | beautifulsoup4 @ file:///tmp/build/80754af9/beautifulsoup4_1601924105527/work
 27 | billiard==3.6.1.0
 28 | binaryornot==0.4.4
 29 | bitarray @ file:///tmp/build/80754af9/bitarray_1598884989496/work
 30 | bkcharts==0.2
 31 | bleach @ file:///tmp/build/80754af9/bleach_1600439572647/work
 32 | blinker==1.4
 33 | blis==0.4.1
 34 | bokeh @ file:///tmp/build/80754af9/bokeh_1598903502831/work
 35 | boto==2.49.0
 36 | boto3==1.11.3
 37 | botocore==1.14.3
 38 | Bottleneck==1.3.2
 39 | bpemb==0.3.0
 40 | brotlipy==0.7.0
 41 | bs4==0.0.1
 42 | cachetools==4.1.1
 43 | catalogue==1.0.0
 44 | catboost==0.24.1
 45 | certifi==2020.6.20
 46 | cffi @ file:///tmp/build/80754af9/cffi_1600699180754/work
 47 | chardet==3.0.4
 48 | click==7.1.2
 49 | cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1598884132938/work
 50 | clyent==1.2.2
 51 | coala-utils==0.5.1
 52 | colorama==0.4.3
 53 | conda==4.8.5
 54 | conda-build==3.20.3
 55 | conda-package-handling==1.7.0
 56 | conda-verify==3.4.2
 57 | confuse==1.3.0
 58 | contextlib2==0.6.0.post1
 59 | cookiecutter==1.7.2
 60 | cryptography @ file:///tmp/build/80754af9/cryptography_1601046817403/work
 61 | cvxopt==1.2.0
 62 | cycler==0.10.0
 63 | cymem==2.0.3
 64 | Cython @ file:///tmp/build/80754af9/cython_1594831564311/work
 65 | cytoolz==0.11.0
 66 | dartsclone==0.9.0
 67 | dask @ file:///tmp/build/80754af9/dask-core_1602083700509/work
 68 | de-core-news-sm @ https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.3.0/de_core_news_sm-2.3.0.tar.gz
 69 | decorator==4.4.2
 70 | defusedxml==0.6.0
 71 | Deprecated==1.2.7
 72 | diff-match-patch @ file:///tmp/build/80754af9/diff-match-patch_1594828741838/work
 73 | distributed @ file:///tmp/build/80754af9/distributed_1602083907176/work
 74 | docutils==0.15.2
 75 | en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
 76 | entrypoints==0.3
 77 | enum-compat==0.0.3
 78 | et-xmlfile==1.0.1
 79 | fastai==1.0.60
 80 | fastcache==1.1.0
 81 | fastprogress==0.2.3
 82 | fasttext==0.9.2
 83 | filelock==3.0.12
 84 | flair==0.4.4
 85 | flake8 @ file:///tmp/build/80754af9/flake8_1601911421857/work
 86 | Flask==1.0.3
 87 | Flask-RESTful==0.3.7
 88 | fsspec @ file:///tmp/build/80754af9/fsspec_1597944003862/work
 89 | future==0.18.2
 90 | gast==0.4.0
 91 | gdown==3.12.2
 92 | gensim==3.8.1
 93 | gevent @ file:///tmp/build/80754af9/gevent_1601397565838/work
 94 | glob2==0.7
 95 | gmpy2==2.0.8
 96 | google-api-core==1.21.0
 97 | google-api-python-client==1.9.3
 98 | google-auth==1.18.0
 99 | google-auth-httplib2==0.0.3
100 | google-auth-oauthlib==0.4.2
101 | google-pasta==0.2.0
102 | googleapis-common-protos==1.52.0
103 | googletrans==3.0.0
104 | GPy==1.9.9
105 | graphviz==0.14.1
106 | greenlet @ file:///tmp/build/80754af9/greenlet_1600873995270/work
107 | grpcio==1.32.0rc1
108 | h11==0.9.0
109 | h2==3.2.0
110 | h5py==2.10.0
111 | HeapDict==1.0.1
112 | hpack==3.0.0
113 | hstspreload==2020.8.18
114 | html5lib @ file:///tmp/build/80754af9/html5lib_1593446221756/work
115 | htmlmin==0.1.12
116 | httpcore==0.9.1
117 | httpie==1.0.3
118 | httplib2==0.18.1
119 | httpx==0.13.3
120 | hyperframe==5.2.0
121 | hyperopt==0.2.3
122 | icu==0.0.1
123 | idna @ file:///tmp/build/80754af9/idna_1593446292537/work
124 | ImageHash==4.1.0
125 | imageio @ file:///tmp/build/80754af9/imageio_1594161405741/work
126 | imagesize==1.2.0
127 | importlib-metadata @ file:///tmp/build/80754af9/importlib-metadata_1593446408836/work
128 | imutils==0.5.3
129 | iniconfig @ file:///tmp/build/80754af9/iniconfig_1596827328212/work
130 | intervaltree @ file:///tmp/build/80754af9/intervaltree_1598376443606/work
131 | ipdb==0.13.4
132 | ipykernel @ file:///tmp/build/80754af9/ipykernel_1596206598566/work/dist/ipykernel-5.3.4-py3-none-any.whl
133 | ipython @ file:///tmp/build/80754af9/ipython_1598883837425/work
134 | ipython-genutils==0.2.0
135 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1601490159889/work
136 | isort @ file:///tmp/build/80754af9/isort_1601490204941/work
137 | itsdangerous==1.1.0
138 | Janome==0.3.10
139 | jdcal==1.4.1
140 | jedi @ file:///tmp/build/80754af9/jedi_1592841891421/work
141 | jeepney==0.4.3
142 | Jinja2==2.11.2
143 | jinja2-time==0.2.0
144 | jmespath==0.9.4
145 | joblib @ file:///tmp/build/80754af9/joblib_1601912903842/work
146 | joeynmt==0.0.1
147 | json5==0.9.5
148 | jsonlines==1.2.0
149 | jsonschema @ file:///tmp/build/80754af9/jsonschema_1594363551272/work
150 | jupyter==1.0.0
151 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1601311786391/work
152 | jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1598884538475/work
153 | jupyter-core==4.6.3
154 | jupyterlab==2.2.6
155 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
156 | jupyterlab-server @ file:///tmp/build/80754af9/jupyterlab_server_1594164409481/work
157 | kaggle==1.5.6
158 | kenlm==0.0.0
159 | Keras==2.3.0
160 | Keras-Applications==1.0.8
161 | Keras-Preprocessing==1.1.2
162 | keyring @ file:///tmp/build/80754af9/keyring_1601490840626/work
163 | kiwisolver==1.2.0
164 | kytea==0.1.5
165 | langdetect==1.0.7
166 | lazy-object-proxy==1.4.3
167 | libarchive-c==2.9
168 | llvmlite==0.34.0
169 | locket==0.2.0
170 | lxml @ file:///tmp/build/80754af9/lxml_1594824402764/work
171 | Markdown==3.2.2
172 | MarkupSafe @ file:///tmp/build/80754af9/markupsafe_1594371495811/work
173 | matplotlib==3.3.3
174 | mccabe==0.6.1
175 | missingno==0.4.2
176 | mistune @ file:///tmp/build/80754af9/mistune_1594373098390/work
177 | mkl-fft==1.2.0
178 | mkl-random==1.1.1
179 | mkl-service==2.3.0
180 | mock==4.0.2
181 | more-itertools @ file:///tmp/build/80754af9/more-itertools_1598884071673/work
182 | Morfessor==2.0.6
183 | mpld3==0.3
184 | mpmath==1.1.0
185 | msgpack==1.0.0
186 | multipledispatch==0.6.0
187 | munch==2.5.0
188 | murmurhash==1.0.2
189 | natto-py==0.9.2
190 | navigator-updater==0.2.1
191 | nbclient @ file:///tmp/build/80754af9/nbclient_1601059699549/work
192 | nbconvert @ file:///tmp/build/80754af9/nbconvert_1601914821128/work
193 | nbformat==5.0.7
194 | nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1601499549014/work
195 | networkx @ file:///tmp/build/80754af9/networkx_1598376031484/work
196 | nlpaug==0.0.20
197 | nltk @ file:///tmp/build/80754af9/nltk_1592496090529/work
198 | nose @ file:///tmp/build/80754af9/nose_1594377616924/work
199 | notebook @ file:///tmp/build/80754af9/notebook_1601501580008/work
200 | numba @ file:///tmp/build/80754af9/numba_1600102479638/work
201 | numexpr==2.7.1
202 | numpy==1.18.5
203 | numpydoc @ file:///tmp/build/80754af9/numpydoc_1594166760263/work
204 | nvidia-ml-py3==7.352.0
205 | oauth2client==4.1.3
206 | oauthlib==3.1.0
207 | olefile==0.46
208 | omegaconf==2.0.2
209 | openpyxl @ file:///tmp/build/80754af9/openpyxl_1598113097404/work
210 | opustools-pkg==0.0.52
211 | packaging==20.4
212 | pandas @ file:///tmp/build/80754af9/pandas_1602088128026/work
213 | pandas-profiling==2.9.0
214 | pandocfilters==1.4.2
215 | paramz==0.9.5
216 | parso==0.7.0
217 | partd==1.1.0
218 | path @ file:///tmp/build/80754af9/path_1596907209691/work
219 | pathlib2 @ file:///tmp/build/80754af9/pathlib2_1594380969706/work
220 | pathtools==0.1.2
221 | patsy==0.5.1
222 | pep8==1.7.1
223 | pexpect @ file:///tmp/build/80754af9/pexpect_1594383317248/work
224 | phik==0.10.0
225 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1594384075987/work
226 | Pillow @ file:///tmp/build/80754af9/pillow_1594307325547/work
227 | pkginfo==1.5.0.1
228 | plac==1.1.3
229 | plotly==4.10.0
230 | plotly-express==0.4.1
231 | pluggy==0.13.1
232 | ply==3.11
233 | polyglot==16.7.4
234 | portalocker==2.0.0
235 | poyo==0.5.0
236 | preshed==3.0.2
237 | pretrainedmodels==0.7.4
238 | prometheus-client==0.8.0
239 | prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1598885458782/work
240 | protobuf==3.12.2
241 | psutil @ file:///tmp/build/80754af9/psutil_1598370249042/work
242 | ptyprocess==0.6.0
243 | py @ file:///tmp/build/80754af9/py_1593446248552/work
244 | pyasn1==0.4.8
245 | pyasn1-modules==0.2.8
246 | pybind11==2.5.0
247 | pycld2==0.41
248 | pycodestyle==2.6.0
249 | pycosat==0.6.3
250 | pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
251 | pycrypto==2.6.1
252 | pycurl==7.43.0.5
253 | pydeck==0.2.0
254 | pydocstyle @ file:///tmp/build/80754af9/pydocstyle_1598885001695/work
255 | PyDrive==1.3.1
256 | pyflakes==2.2.0
257 | Pygments @ file:///tmp/build/80754af9/pygments_1600458456400/work
258 | pylint @ file:///tmp/build/80754af9/pylint_1598624038450/work
259 | pymongo==3.10.1
260 | pyodbc===4.0.0-unsupported
261 | pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1594392929924/work
262 | pyparsing==2.4.7
263 | pyrsistent @ file:///tmp/build/80754af9/pyrsistent_1600141707582/work
264 | PySocks @ file:///tmp/build/80754af9/pysocks_1594394576006/work
265 | pyter3==0.3
266 | pytest==0.0.0
267 | python-dateutil==2.8.0
268 | python-jsonrpc-server @ file:///tmp/build/80754af9/python-jsonrpc-server_1600278539111/work
269 | python-language-server @ file:///tmp/build/80754af9/python-language-server_1600454544709/work
270 | python-slugify==4.0.0
271 | python-utils==2.4.0
272 | pytz==2020.1
273 | PyWavelets @ file:///tmp/build/80754af9/pywavelets_1601658308664/work
274 | pyxdg==0.26
275 | PyYAML==5.3.1
276 | pyzmq==19.0.2
277 | QDarkStyle==2.8.1
278 | QtAwesome==0.7.2
279 | qtconsole @ file:///tmp/build/80754af9/qtconsole_1600870028330/work
280 | QtPy==1.9.0
281 | regex @ file:///tmp/build/80754af9/regex_1596829692676/work
282 | requests @ file:///tmp/build/80754af9/requests_1592841827918/work
283 | requests-oauthlib==1.3.0
284 | retrying==1.3.3
285 | rfc3986==1.4.0
286 | rope==0.17.0
287 | rsa==4.6
288 | Rtree==0.9.4
289 | ruamel-yaml==0.15.87
290 | s3transfer==0.3.0
291 | sacrebleu==1.4.13
292 | sacremoses==0.0.38
293 | scikit-image==0.16.2
294 | scikit-learn==0.24.0
295 | scipy==1.5.4
296 | seaborn @ file:///tmp/build/80754af9/seaborn_1600553570093/work
297 | SecretStorage @ file:///tmp/build/80754af9/secretstorage_1594419219833/work
298 | segtok==1.5.7
299 | selenium==3.141.0
300 | Send2Trash==1.5.0
301 | sentencepiece==0.1.85
302 | simplegeneric==0.8.1
303 | singledispatch==3.4.0.3
304 | six==1.15.0
305 | sklearn==0.0
306 | smart-open==1.9.0
307 | sniffio==1.1.0
308 | snowballstemmer==2.0.0
309 | sortedcollections==1.2.1
310 | sortedcontainers==2.2.2
311 | soupsieve==2.0.1
312 | spacy==2.3.0
313 | Sphinx @ file:///tmp/build/80754af9/sphinx_1597428793432/work
314 | sphinxcontrib-applehelp==1.0.2
315 | sphinxcontrib-devhelp==1.0.2
316 | sphinxcontrib-htmlhelp==1.0.3
317 | sphinxcontrib-jsmath==1.0.1
318 | sphinxcontrib-qthelp==1.0.3
319 | sphinxcontrib-serializinghtml==1.1.4
320 | sphinxcontrib-websupport @ file:///tmp/build/80754af9/sphinxcontrib-websupport_1597081412696/work
321 | spyder @ file:///tmp/build/80754af9/spyder_1599056984254/work
322 | spyder-kernels @ file:///tmp/build/80754af9/spyder-kernels_1599056754119/work
323 | SQLAlchemy @ file:///tmp/build/80754af9/sqlalchemy_1598374732873/work
324 | sqlitedict==1.6.0
325 | srsly==1.0.2
326 | statsmodels==0.11.1
327 | stopwordsiso==0.5.1
328 | streamlit==0.53.0
329 | subword-nmt==0.3.7
330 | SudachiDict-core @ https://object-storage.tyo2.conoha.io/v1/nc_2520839e1f9641b08211a5c85243124a/sudachi/SudachiDict_core-20190927.tar.gz
331 | SudachiPy==0.4.4
332 | sympy @ file:///tmp/build/80754af9/sympy_1597083144364/work
333 | tables==3.6.1
334 | tabulate==0.8.6
335 | tangled-up-in-unicode==0.0.6
336 | tb-nightly==1.14.0a20190603
337 | tblib @ file:///tmp/build/80754af9/tblib_1597928476713/work
338 | tensorboard==2.3.0
339 | tensorboard-plugin-wit==1.7.0
340 | tensorflow==2.0.0b1
341 | termcolor==1.1.0
342 | terminado==0.8.3
343 | testpath==0.4.4
344 | text-unidecode==1.3
345 | textaugment @ git+https://github.com/CateGitau/textaugment.git@189a81066cf7b5be46c4f7ae4faae5a681e6b151
346 | textblob==0.15.3
347 | tf-estimator-nightly==1.14.0.dev2019060501
348 | thinc==7.4.1
349 | threadpoolctl @ file:///tmp/tmp9twdgx9k/threadpoolctl-2.1.0-py3-none-any.whl
350 | tiny-tokenizer==3.1.0
351 | toml @ file:///tmp/build/80754af9/toml_1592853716807/work
352 | toolz @ file:///tmp/build/80754af9/toolz_1601054250827/work
353 | torch==1.6.0+cpu
354 | torchtext==0.6.0
355 | torchvision==0.7.0+cpu
356 | tornado==5.1.1
357 | tqdm @ file:///tmp/build/80754af9/tqdm_1602185206534/work
358 | traitlets @ file:///tmp/build/80754af9/traitlets_1600712679583/work
359 | transformers==2.3.0
360 | tweepy==3.8.0
361 | twitterscraper==1.4.0
362 | typed-ast==1.4.1
363 | typing-extensions @ file:///tmp/build/80754af9/typing_extensions_1598376058250/work
364 | tzlocal==2.0.0
365 | ujson @ file:///tmp/build/80754af9/ujson_1602185184149/work
366 | unicodecsv==0.14.1
367 | uritemplate==3.0.1
368 | urllib3 @ file:///tmp/build/80754af9/urllib3_1597086586889/work
369 | validators==0.14.1
370 | virtualenv==16.7.9
371 | visions==0.5.0
372 | wasabi==0.6.0
373 | watchdog @ file:///tmp/build/80754af9/watchdog_1593447306189/work
374 | wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work
375 | webencodings==0.5.1
376 | Werkzeug==1.0.1
377 | wget==3.2
378 | widgetsnbextension==3.5.1
379 | word2word==1.0.0
380 | wordcloud==1.8.1
381 | wrapt==1.11.1
382 | wurlitzer @ file:///tmp/build/80754af9/wurlitzer_1594751868473/work
383 | xlrd==1.2.0
384 | XlsxWriter @ file:///tmp/build/80754af9/xlsxwriter_1602001750554/work
385 | xlwt==1.3.0
386 | xmltodict==0.12.0
387 | yapf @ file:///tmp/build/80754af9/yapf_1593528177422/work
388 | zict==2.0.0
389 | zipp @ file:///tmp/build/80754af9/zipp_1602002379921/work
390 | zope.event==4.4
391 | zope.interface @ file:///tmp/build/80754af9/zope.interface_1602002440259/work
392 | 


--------------------------------------------------------------------------------