├── Procfile ├── Dockerfile ├── server ├── index.py ├── welcome.py ├── __init__.py ├── recommend.py └── update.py ├── wsgi.py ├── database ├── orgs_clicking.csv ├── events_clicking.csv ├── events_category.csv └── orgs_category.csv ├── requirements.txt ├── rec_server.md ├── README.md ├── LICENSE └── collaborative_filtering ├── orgs.py └── events.py /Procfile: -------------------------------------------------------------------------------- 1 | web: gunicorn wsgi:app -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | WORKDIR /app 3 | COPY . . 4 | RUN pip install -r requirements.txt 5 | ENTRYPOINT ["python"] 6 | CMD ["wsgi.py"] 7 | -------------------------------------------------------------------------------- /server/index.py: -------------------------------------------------------------------------------- 1 | from server import app 2 | from server import cross_origin 3 | @app.route('/') 4 | @cross_origin() 5 | def index(): 6 | return 'hello' -------------------------------------------------------------------------------- /wsgi.py: -------------------------------------------------------------------------------- 1 | from server import app 2 | from server.update import update, set_interval 3 | 4 | if __name__=="__main__": 5 | set_interval(update,5) 6 | app.run(debug=True, host="0.0.0.0") 7 | 8 | -------------------------------------------------------------------------------- /database/orgs_clicking.csv: -------------------------------------------------------------------------------- 1 | user_id,item_id,Clicking 2 | e663db9d-2298-4b99-bec3-19ea195d6a20,dba4c3ef-6048-4367-89a2-4f56b072d829,2 3 | e663db9d-2298-4b99-bec3-19ea195d6a20,0616ca63-b056-43db-a905-023cee908ec9,1 4 | a858dcba-b613-42fe-b859-b1e99745db8e,651f73ed-5afa-458b-8e01-34af95b79006,1 5 | a858dcba-b613-42fe-b859-b1e99745db8e,80c1a7fa-01af-418c-b950-168733457e59,1 6 | -------------------------------------------------------------------------------- /database/events_clicking.csv: -------------------------------------------------------------------------------- 1 | user_id,item_id,Clicking 2 | e663db9d-2298-4b99-bec3-19ea195d6a20,f6fc4e73-8b46-4cbe-9d7f-34a13e92e811,2 3 | e663db9d-2298-4b99-bec3-19ea195d6a20,49681079-10bf-4401-85a6-1797b439e68a,1 4 | a858dcba-b613-42fe-b859-b1e99745db8e,0e7fef56-5316-4000-9ffc-dcc03ba51c7e,2 5 | a858dcba-b613-42fe-b859-b1e99745db8e,f43075ac-cff0-43af-a7a9-aa2913e4e4a7,1 6 | anonymous,041a3fdd-5915-4012-9a31-017671ff4595,2 7 | -------------------------------------------------------------------------------- /server/welcome.py: -------------------------------------------------------------------------------- 1 | from server import app, events_rds, orgs_rds 2 | from server import cross_origin 3 | @app.route('/welcome/') 4 | @cross_origin() 5 | def welcome(user_id): 6 | return {**events_rds.welcome_recommend(user_id),**orgs_rds.welcome_recommend(user_id)} 7 | 8 | @app.route('/welcome') 9 | @cross_origin() 10 | def guest_welcome(): 11 | return {**events_rds.welcome(),**orgs_rds.welcome()} 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2020.12.5 2 | chardet==4.0.0 3 | click==8.0.0 4 | Flask==2.0.0 5 | idna==2.10 6 | itsdangerous==2.0.0 7 | Jinja2==3.0.0 8 | joblib==1.0.1 9 | MarkupSafe==2.0.0 10 | numpy==1.20.3 11 | pandas==1.2.4 12 | python-dateutil==2.8.1 13 | pytz==2021.1 14 | requests==2.25.1 15 | scikit-learn==0.24.2 16 | scipy==1.6.3 17 | six==1.16.0 18 | sklearn==0.0 19 | threadpoolctl==2.1.0 20 | urllib3==1.26.4 21 | Werkzeug==2.0.0 22 | gunicorn== 20.1.0 23 | flask-cors 24 | 25 | -------------------------------------------------------------------------------- /rec_server.md: -------------------------------------------------------------------------------- 1 | # Guide To Use Rec Sever 2 | 3 | ## 1. Recommend for guest 4 | - When recommend orgs, call /orgs/(orgs_id) 5 | - When recommend events, call /events/(events_id) 6 | 7 | ## 2. Recommend for registered user 8 | 9 | - When recommend orgs, call /orgs/(user_id)/(orgs_id) 10 | - When recommend events, call /events/(user_id)/(events_id) 11 | 12 | 13 | ## 3. Recommend home page for guest 14 | - call /welcome 15 | 16 | ## 4. Recommend home page for registered user 17 | - call /welcome/(user_id) 18 | -------------------------------------------------------------------------------- /database/events_category.csv: -------------------------------------------------------------------------------- 1 | item_id,Category_1,Category_2,Category_3 2 | f6fc4e73-8b46-4cbe-9d7f-34a13e92e811,Khoa học,Công nghệ, 3 | e3c88311-9ef5-4a48-8d85-af82f78ca1a8,Công nghệ,Khoa học, 4 | 49681079-10bf-4401-85a6-1797b439e68a,Chiêm tinh,, 5 | 359bd37a-bda9-47a7-8698-c58ee34166a5,Đời sống,Nhân quyền,Từ thiện 6 | d5e6409c-2739-4bf8-9f16-d185fae91462,Đời sống,Truyền thống,Văn hóa 7 | 0e7fef56-5316-4000-9ffc-dcc03ba51c7e,Khoa học,Toán học,Giáo dục 8 | 1d7515c1-faa5-4994-91bb-1c378a8f4e41,Khoa học,Công nghệ,Kỹ thuật 9 | f43075ac-cff0-43af-a7a9-aa2913e4e4a7,Công nghệ,Giáo dục, 10 | 86c13e8a-805c-4589-82a2-7f58791b4b0c,Công nghệ,Khoa học,Từ thiện 11 | fce1a276-1f1a-48f3-b69d-801870f4c798,Khoa học,Công nghệ,Từ thiện 12 | -------------------------------------------------------------------------------- /database/orgs_category.csv: -------------------------------------------------------------------------------- 1 | item_id,Category_1,Category_2,Category_3 2 | 7d2120c5-6272-40f8-b890-9d420317823d,Triết học,Giáo dục,Văn học 3 | 30135076-d59a-43b3-858e-a34f40a6430d,Khoa học,Công nghệ,Kinh tế 4 | 8aab5d0a-da07-47e3-bb4d-8f8d079b4d25,Khoa học,Truyền thống,Âm nhạc 5 | ed8e3b31-7b3b-47ce-af89-5afa65de01a0,Môi trường,Lịch sử,Đời sống 6 | 67427c1f-94a9-4e4e-8754-df8de12047e8,Môi trường,Văn hóa,Đời sống 7 | 3fef48d3-455b-4936-925b-7b104cd642ca,Môi trường,Văn hóa,Đời sống 8 | bc6ec031-63a1-4bbd-afea-72dc6ffe1ce2,Công nghệ,Giáo dục,Đời sống 9 | 0616ca63-b056-43db-a905-023cee908ec9,Nhiếp ảnh,Kỹ năng mềm, 10 | dd50715f-7d37-4c15-945f-5e7204d2cb44,Từ thiện,Văn hóa,Đời sống 11 | e25e7184-a236-4dc2-8497-b68ec5ac464a,cate3,cate13,Technology3 12 | -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask_cors import CORS, cross_origin 3 | from collaborative_filtering.events import CF_events 4 | from collaborative_filtering.orgs import CF_orgs 5 | import pandas as pd 6 | import os 7 | app=Flask(__name__) 8 | cors=CORS(app) 9 | app.config['CORS_HEADERS'] = 'Content-Type' 10 | link=os.environ.get("GRAPHQL") 11 | events_clicking=pd.read_csv("database/events_clicking.csv", encoding="latin-1") 12 | events_category=pd.read_csv("database/events_category.csv", encoding="latin-1") 13 | events_rds=CF_events(events_clicking,events_category,3,10) 14 | events_rds.preprocessing_data() 15 | orgs_clicking=pd.read_csv("database/orgs_clicking.csv", encoding="latin-1") 16 | orgs_category=pd.read_csv("database/orgs_category.csv", encoding="latin-1") 17 | orgs_rds=CF_orgs(orgs_clicking,orgs_category,3,10) 18 | orgs_rds.preprocessing_data() 19 | from server import index 20 | from server import welcome 21 | from server import recommend 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Projectube Recommendation System 2 | I design a recommendation system server using Flask for [Projectube](https://www.projectube.org/) 3 | ## Normal use 4 | 1. Clone 5 | - Go to terminal 6 | ``` 7 | https://github.com/hoangcaobao/Projectube_Recommendation_System.git 8 | ``` 9 | 2. Change directory: 10 | ``` 11 | cd Projectube_Recommendation_System 12 | ``` 13 | 3. GraphQL DATABASE link: 14 | ``` 15 | export GRAPHQL=link 16 | ``` 17 | - REMEMBER NO SPACE BETWEEN GRAPHQL=yourlink 18 | 19 | 4. Install packages: 20 | ``` 21 | pip install -r requirements.txt 22 | ``` 23 | 24 | 5. Run: 25 | ``` 26 | python3 wsgi.py 27 | ``` 28 | --- 29 | ## Docker use 30 | 1. Clone 31 | - Go to terminal 32 | ``` 33 | https://github.com/hoangcaobao/Projectube_Recommendation_System.git 34 | ``` 35 | 2. Change directory: 36 | ``` 37 | cd Projectube_Recommendation_System 38 | ``` 39 | 3. Docker build: 40 | ``` 41 | docker image build -t app . 42 | ``` 43 | 4. Docker run: 44 | ``` 45 | docker run -e GRAPHQL=link -p 5000:5000 app 46 | ``` 47 | --- 48 | ## Bao Hoang 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Hoang Cao Bao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /server/recommend.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from server import cross_origin 3 | from server import app, events_rds, orgs_rds 4 | @app.route("/events//") 5 | @cross_origin() 6 | def events_recommend(user_id,item_id): 7 | df=pd.read_csv("database/events_clicking.csv") 8 | a=df[df["user_id"]==user_id] 9 | a=a[a["item_id"]==item_id] 10 | if a.index.size==0: 11 | #If user first time click to item then create a new row 12 | df.loc[len(df.index)]=[user_id,item_id,1] 13 | else: 14 | #If user already clicked then increase the amount of click by 1 15 | df.at[a.index[0], 'Clicking'] =a["Clicking"]+1 16 | #Save data to clicking.csv 17 | df.to_csv("database/events_clicking.csv",index=False) 18 | 19 | clicking=pd.read_csv("database/events_clicking.csv", encoding="latin-1") 20 | category=pd.read_csv("database/events_category.csv", encoding="latin-1") 21 | #refresh lại data 22 | events_rds.refresh_data(clicking, category) 23 | dict={ 24 | 'list_of_recommend':events_rds.recommend(user_id,item_id) 25 | } 26 | 27 | return dict 28 | 29 | @app.route("/events/") 30 | @cross_origin() 31 | def guest_events_recommend(item_id): 32 | df=pd.read_csv("database/events_clicking.csv") 33 | a=df[df["user_id"]=="anonymous"] 34 | a=a[a["item_id"]==item_id] 35 | if a.index.size==0: 36 | #If user first time click to item then create a new row 37 | df.loc[len(df.index)]=["anonymous",item_id,1] 38 | else: 39 | #If user already clicked then increase the amount of click by 1 40 | df.at[a.index[0], 'Clicking'] =a["Clicking"]+1 41 | #Save data to clicking.csv 42 | df.to_csv("database/events_clicking.csv",index=False) 43 | dict={ 44 | "list_of_recommend": events_rds.hottest(item_id) 45 | } 46 | return dict 47 | 48 | 49 | 50 | @app.route("/orgs//") 51 | @cross_origin() 52 | def orgs_recommend(user_id,item_id): 53 | df=pd.read_csv("database/orgs_clicking.csv") 54 | a=df[df["user_id"]==user_id] 55 | a=a[a["item_id"]==item_id] 56 | if a.index.size==0: 57 | #If user first time click to item then create a new row 58 | df.loc[len(df.index)]=[user_id,item_id,1] 59 | else: 60 | #If user already clicked then increase the amount of click by 1 61 | df.at[a.index[0], 'Clicking'] =a["Clicking"]+1 62 | #Save data to clicking.csv 63 | df.to_csv("database/orgs_clicking.csv",index=False) 64 | 65 | clicking=pd.read_csv("database/orgs_clicking.csv", encoding="latin-1") 66 | category=pd.read_csv("database/orgs_category.csv", encoding="latin-1") 67 | #refresh lại data 68 | orgs_rds.refresh_data(clicking, category) 69 | dict={ 70 | 'list_of_recommend':orgs_rds.recommend(user_id,item_id) 71 | } 72 | 73 | return dict 74 | 75 | 76 | @app.route("/orgs/") 77 | @cross_origin() 78 | def guest_orgs_recommend(item_id): 79 | df=pd.read_csv("database/orgs_clicking.csv") 80 | a=df[df["user_id"]=="anonymous"] 81 | a=a[a["item_id"]==item_id] 82 | if a.index.size==0: 83 | #If user first time click to item then create a new row 84 | df.loc[len(df.index)]=["anonymous",item_id,1] 85 | else: 86 | #If user already clicked then increase the amount of click by 1 87 | df.at[a.index[0], 'Clicking'] =a["Clicking"]+1 88 | #Save data to clicking.csv 89 | df.to_csv("database/orgs_clicking.csv",index=False) 90 | dict={ 91 | "list_of_recommend": orgs_rds.hottest(item_id) 92 | } 93 | return dict 94 | -------------------------------------------------------------------------------- /server/update.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from server import link 3 | import pandas as pd 4 | import threading 5 | from server import events_rds, orgs_rds 6 | from collaborative_filtering.events import CF_events 7 | from collaborative_filtering.orgs import CF_orgs 8 | import time 9 | #function to get graphql 10 | 11 | def run_query(query): 12 | global link 13 | request = requests.post(link, json={'query': query}) 14 | if request.status_code == 200: 15 | return request.json() 16 | else: 17 | raise Exception("Query failed to run by returning code of {}. {}".format(request.status_code, query)) 18 | 19 | #setInterval 20 | def set_interval(func, sec): 21 | def func_wrapper(): 22 | time.sleep(5) 23 | set_interval(func, sec) 24 | func() 25 | t = threading.Timer(sec, func_wrapper) 26 | t.start() 27 | 28 | 29 | def update(): 30 | #query sql api 31 | events_query = """ 32 | { 33 | events(limit: 0){ 34 | id 35 | name 36 | categories 37 | } 38 | } 39 | """ 40 | events = run_query(events_query) 41 | events=events['data']['events'] 42 | orgs_query = """ 43 | { 44 | orgs(limit:0){ 45 | id 46 | name 47 | categories 48 | } 49 | } 50 | """ 51 | orgs = run_query(orgs_query) 52 | orgs=orgs['data']['orgs'] 53 | 54 | #create a dic contain id and category 55 | dic={'item_id':[], 56 | 'Category_1':[], 57 | 'Category_2':[], 58 | 'Category_3':[], 59 | } 60 | for event in events: 61 | categories=event['categories'] 62 | dic['item_id'].append(event['id']) 63 | if len(event['categories'])==1: 64 | dic['Category_1'].append(categories[0]) 65 | dic['Category_2'].append(None) 66 | dic['Category_3'].append(None) 67 | elif len(event['categories'])==2: 68 | dic['Category_1'].append(categories[0]) 69 | dic['Category_2'].append(categories[1]) 70 | dic['Category_3'].append(None) 71 | else: 72 | dic['Category_1'].append(categories[0]) 73 | dic['Category_2'].append(categories[1]) 74 | dic['Category_3'].append(categories[2]) 75 | df=pd.DataFrame(data=dic) 76 | df.to_csv('database/events_category.csv',index=False) 77 | dic={'item_id':[], 78 | 'Category_1':[], 79 | 'Category_2':[], 80 | 'Category_3':[], 81 | } 82 | for org in orgs: 83 | categories=org['categories'] 84 | dic['item_id'].append(org['id']) 85 | 86 | if len(org['categories'])==1: 87 | dic['Category_1'].append(categories[0]) 88 | dic['Category_2'].append(None) 89 | dic['Category_3'].append(None) 90 | elif len(org['categories'])==2: 91 | dic['Category_1'].append(categories[0]) 92 | dic['Category_2'].append(categories[1]) 93 | dic['Category_3'].append(None) 94 | else: 95 | dic['Category_1'].append(categories[0]) 96 | dic['Category_2'].append(categories[1]) 97 | dic['Category_3'].append(categories[2]) 98 | 99 | 100 | #create pandas framework from dic then save it to category 101 | df=pd.DataFrame(data=dic) 102 | df.to_csv('database/orgs_category.csv',index=False) 103 | 104 | #reset 105 | events_clicking=pd.read_csv("database/events_clicking.csv", encoding="latin-1") 106 | events_category=pd.read_csv("database/events_category.csv", encoding="latin-1") 107 | events_rds=CF_events(events_clicking,events_category,3,10) 108 | events_rds.preprocessing_data() 109 | orgs_clicking=pd.read_csv("database/orgs_clicking.csv", encoding="latin-1") 110 | orgs_category=pd.read_csv("database/orgs_category.csv", encoding="latin-1") 111 | orgs_rds=CF_orgs(orgs_clicking,orgs_category,3,10) 112 | orgs_rds.preprocessing_data() 113 | 114 | -------------------------------------------------------------------------------- /collaborative_filtering/orgs.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.metrics.pairwise import cosine_similarity 4 | from scipy.sparse import csr_matrix 5 | import random 6 | 7 | class CF_orgs(): 8 | #k is the number of users using to recommend for current user 9 | #clicking is a table contain the amount of clicking of user_id for item_id 10 | #category is the table cotain category of item_id 11 | #number_of_recommend is a number of item recommend to user 12 | def __init__(self,orgs_clicking,orgs_category,k,number_of_recommend): 13 | self.k=k 14 | self.clicking=orgs_clicking 15 | 16 | self.recommended_items_final=[] 17 | self.category=orgs_category 18 | self.number_of_recommend=number_of_recommend 19 | 20 | 21 | #clean_data has index of each row (item) 22 | #clean_data1 is numpy type of clean_data 23 | #clean_data2 do not have index of each row(item) 24 | #clean_data3 is the total amount of clicking of each item 25 | # sim is the table compare the cosine similarity each user 26 | #csr_data use to decrease sparse data 27 | def preprocessing_data(self): 28 | try: 29 | self.clean_data=self.clicking.pivot(index="item_id",columns="user_id",values="Clicking") 30 | self.clean_data3=self.clean_data.sum(axis=1) 31 | #standarization data 32 | 33 | self.clean_data=(self.clean_data-self.clean_data.mean()+(1e-8))/(self.clean_data.std()+(1e-8)) 34 | 35 | self.clean_data.fillna(0,inplace=True) 36 | self.clean_data1=np.array(self.clean_data) 37 | self.clean_data2=self.clean_data.copy() 38 | csr_data=csr_matrix(self.clean_data) 39 | self.clean_data.reset_index(inplace=True) 40 | 41 | self.sim=cosine_similarity(csr_data.T,csr_data.T) 42 | self.delete() 43 | except: 44 | pass 45 | 46 | 47 | def pred(self,user_id,item_id): 48 | #get index of other user clicked to item id, current user, and item 49 | users_clicked_item_id=self.clicking[self.clicking["item_id"]==item_id]["user_id"] 50 | users_clicked_item_index=[self.clean_data2.columns.get_loc(i) for i in users_clicked_item_id] 51 | user_index=self.clean_data2.columns.get_loc(user_id) 52 | item_index=self.clean_data[self.clean_data["item_id"]==item_id].index[0] 53 | 54 | #check whether user click item or not, if already click do not recommend 55 | if self.clean_data1[item_index,user_index]!=0: 56 | return False 57 | 58 | #get sim of current user and other users clicked to item 59 | #get k users has highest sim to current user 60 | sim_user=self.sim[user_index,users_clicked_item_index] 61 | most_similar_index=np.argsort(sim_user)[-self.k:] 62 | most_similar_user_index=[] 63 | for i in most_similar_index: 64 | most_similar_user_index.append(users_clicked_item_index[i]) 65 | 66 | #the function to predict how much user will click to item 67 | largest_sim=sim_user[most_similar_index] 68 | click_value=self.clean_data1[item_index,most_similar_user_index] 69 | pred=(click_value*largest_sim).sum()/(np.abs(largest_sim).sum()+1e-8) 70 | return pred 71 | 72 | def recommend(self,user_id,item_id): 73 | recommended_items_final=[] 74 | recommended_items_CF=[] 75 | 76 | #Sort item with pred score 77 | for item in self.clean_data2.index: 78 | #Check whether user clicked item or not 79 | if self.pred(user_id,item)==False: 80 | continue 81 | recommended_items_CF.append((item,self.pred(user_id,item))) 82 | recommended_items_CF.sort(key=lambda x: x[1]) 83 | 84 | #Choose half item has highest pred score 85 | recommended_items_CF=recommended_items_CF[-int(self.number_of_recommend/2):] 86 | print(recommended_items_CF) 87 | for item in recommended_items_CF: 88 | recommended_items_final.append(item[0]) 89 | 90 | #Sort items with number of category that are the same with current item and total amount of click 91 | if len(self.category[self.category["item_id"]==item_id])==0: 92 | item_category=[] 93 | else: 94 | item_category=self.category[self.category["item_id"]==item_id].to_numpy()[0][1:] 95 | 96 | #Remove nan category 97 | nan_position=[] 98 | for i in range(len(item_category)-1,-1,-1): 99 | if item_category[i]!= item_category[i]: 100 | nan_position.append(i) 101 | for i in nan_position: 102 | item_category=np.delete(item_category,i) 103 | items_score=[] 104 | for item in self.category['item_id']: 105 | #check whether item has already in recommend list 106 | if item==item_id or item in recommended_items_final: 107 | continue 108 | category_score=0 109 | if len(self.category[self.category['item_id']==item])==0: 110 | continue 111 | for i in self.category[self.category["item_id"]==item].to_numpy()[0][1:]: 112 | #plus 1 to score if have the same category 113 | if i in item_category: 114 | category_score+=1 115 | try: 116 | clicking_score=self.clean_data3.loc[item] 117 | except: 118 | clicking_score=0 119 | items_score.append((item, category_score, clicking_score)) 120 | 121 | items_score.sort(key=lambda x: (x[1], x[2]),reverse=True) 122 | 123 | 124 | #choose items until recommended_items_list full 125 | for i in range(len(items_score)): 126 | if len(recommended_items_final)==self.number_of_recommend-2: 127 | items_score=items_score[i:] 128 | random.shuffle(items_score) 129 | break 130 | recommended_items_final.append(items_score[i][0]) 131 | for i in range(len(items_score)): 132 | if len(recommended_items_final)==self.number_of_recommend: 133 | break 134 | recommended_items_final.append(items_score[i][0]) 135 | return recommended_items_final 136 | 137 | #update data 138 | def refresh_data(self,orgs_clicking,orgs_category): 139 | self.clicking=orgs_clicking 140 | self.category=orgs_category 141 | self.preprocessing_data() 142 | 143 | def hottest(self,item_id): 144 | if len(self.category[self.category["item_id"]==item_id])==0: 145 | item_category=[] 146 | else: 147 | item_category=self.category[self.category["item_id"]==item_id].to_numpy()[0][1:] 148 | 149 | #Remove nan category 150 | nan_position=[] 151 | for i in range(len(item_category)): 152 | if item_category[i]!=item_category[i]: 153 | nan_position.append(i) 154 | for i in nan_position: 155 | item_category=np.delete(item_category,i) 156 | items_score=[] 157 | for item in self.category['item_id']: 158 | #check whether item has already in recommend list 159 | if item==item_id : 160 | continue 161 | category_score=0 162 | if len(self.category[self.category['item_id']==item])==0: 163 | continue 164 | for i in self.category[self.category["item_id"]==item].to_numpy()[0][1:]: 165 | #plus 1 to score if have the same category 166 | if i in item_category: 167 | category_score+=1 168 | try: 169 | clicking_score=self.clean_data3.loc[item] 170 | except: 171 | clicking_score=0 172 | items_score.append((item, category_score, clicking_score)) 173 | 174 | items_score.sort(key=lambda x: (x[1], x[2]),reverse=True) 175 | recommend_list=[] 176 | for i in range(len(items_score)): 177 | recommend_list.append(items_score[i][0]) 178 | 179 | return recommend_list 180 | 181 | def welcome_recommend(self,user_id): 182 | dic={ 183 | 'hottest_orgs':[], 184 | 'care_orgs':[], 185 | } 186 | try: 187 | recommended_items_CF=[] 188 | #Sort item with pred score 189 | for item in self.clean_data2.index: 190 | #Check whether user clicked item or not 191 | if self.pred(user_id,item)==False: 192 | continue 193 | recommended_items_CF.append((item,self.pred(user_id,item))) 194 | recommended_items_CF.sort(key=lambda x: x[1]) 195 | 196 | for i in range(len(recommended_items_CF)): 197 | recommended_items_CF[i]=recommended_items_CF[i][0] 198 | 199 | #Choose 10 item with highest pred score 200 | if len(recommended_items_CF)>10: 201 | recommended_items_CF=recommended_items_CF[-10:] 202 | dic['care_orgs']=recommended_items_CF 203 | except: 204 | pass 205 | #Sort item with highest amount of click 206 | self.clean_data3=self.clean_data3.sort_values(ascending=False) 207 | recommended_items_hottest=[] 208 | count=0 209 | #Choose 10 hottest item 210 | items_score=[] 211 | for item in self.category['item_id']: 212 | try: 213 | clicking_score=self.clean_data3.loc[item] 214 | except: 215 | clicking_score=0 216 | items_score.append((item, clicking_score)) 217 | 218 | items_score.sort(key=lambda x: (x[1]),reverse=True) 219 | for i in items_score: 220 | if count==100: 221 | break 222 | recommended_items_hottest.append(i[0]) 223 | dic['hottest_orgs']=recommended_items_hottest 224 | return dic 225 | def welcome(self): 226 | items_score=[] 227 | self.clean_data3=self.clean_data3.sort_values(ascending=False) 228 | recommended_items_hottest=[] 229 | count=0 230 | dic={ 231 | 'hottest_orgs':[], 232 | } 233 | for item in self.category['item_id']: 234 | try: 235 | clicking_score=self.clean_data3.loc[item] 236 | except: 237 | clicking_score=0 238 | items_score.append((item, clicking_score)) 239 | 240 | items_score.sort(key=lambda x: (x[1]),reverse=True) 241 | for i in items_score: 242 | if count==100: 243 | break 244 | recommended_items_hottest.append(i[0]) 245 | count+=1 246 | dic['hottest_orgs']=recommended_items_hottest 247 | return dic 248 | 249 | def delete(self): 250 | #delete an org in clicking due to the removal 251 | total_item= self.category["item_id"] 252 | clicking_item= self.clicking["item_id"] 253 | contain=[] 254 | for i in clicking_item: 255 | check=False 256 | for j in total_item: 257 | if(i==j): 258 | check=True 259 | break 260 | if(check==False): 261 | contain.append(i) 262 | for i in contain: 263 | self.clicking=self.clicking[self.clicking["item_id"]!=i] 264 | self.clicking.to_csv('database/orgs_clicking.csv',index=False) 265 | -------------------------------------------------------------------------------- /collaborative_filtering/events.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from sklearn.metrics.pairwise import cosine_similarity 4 | from scipy.sparse import csr_matrix 5 | import random 6 | 7 | class CF_events(): 8 | #k is the number of users using to recommend for current user 9 | #clicking is a table contain the amount of clicking of user_id for item_id 10 | #category is the table cotain category of item_id 11 | #number_of_recommend is a number of item recommend to user 12 | def __init__(self,events_clicking,events_category,k,number_of_recommend): 13 | self.k=k 14 | self.clicking=events_clicking 15 | self.recommended_items_final=[] 16 | self.category=events_category 17 | self.number_of_recommend=number_of_recommend 18 | 19 | 20 | #clean_data has index of each row (item) 21 | #clean_data1 is numpy type of clean_data 22 | #clean_data2 do not have index of each row(item) 23 | #clean_data3 is the total amount of clicking of each item 24 | # sim is the table compare the cosine similarity each user 25 | #csr_data use to decrease sparse data 26 | def preprocessing_data(self): 27 | try: 28 | self.clean_data=self.clicking.pivot(index="item_id",columns="user_id",values="Clicking") 29 | self.clean_data3=self.clean_data.sum(axis=1) 30 | #standarization data 31 | 32 | self.clean_data=(self.clean_data-self.clean_data.mean()+(1e-8))/(self.clean_data.std()+(1e-8)) 33 | 34 | self.clean_data.fillna(0,inplace=True) 35 | self.clean_data1=np.array(self.clean_data) 36 | self.clean_data2=self.clean_data.copy() 37 | csr_data=csr_matrix(self.clean_data) 38 | self.clean_data.reset_index(inplace=True) 39 | self.sim=cosine_similarity(csr_data.T,csr_data.T) 40 | 41 | self.delete() 42 | except: 43 | pass 44 | 45 | 46 | def pred(self,user_id,item_id): 47 | #get index of other user clicked to item id, current user, and item 48 | users_clicked_item_id=self.clicking[self.clicking["item_id"]==item_id]["user_id"] 49 | users_clicked_item_index=[self.clean_data2.columns.get_loc(i) for i in users_clicked_item_id] 50 | user_index=self.clean_data2.columns.get_loc(user_id) 51 | item_index=self.clean_data[self.clean_data["item_id"]==item_id].index[0] 52 | 53 | #check whether user click item or not, if already click do not recommend 54 | if self.clean_data1[item_index,user_index]!=0: 55 | return False 56 | 57 | #get sim of current user and other users clicked to item 58 | #get k users has highest sim to current user 59 | sim_user=self.sim[user_index,users_clicked_item_index] 60 | most_similar_index=np.argsort(sim_user)[-self.k:] 61 | most_similar_user_index=[] 62 | for i in most_similar_index: 63 | most_similar_user_index.append(users_clicked_item_index[i]) 64 | 65 | #the function to predict how much user will click to item 66 | largest_sim=sim_user[most_similar_index] 67 | click_value=self.clean_data1[item_index,most_similar_user_index] 68 | pred=(click_value*largest_sim).sum()/(np.abs(largest_sim).sum()+1e-8) 69 | return pred 70 | 71 | def recommend(self,user_id,item_id): 72 | recommended_items_final=[] 73 | recommended_items_CF=[] 74 | 75 | #Sort item with pred score 76 | for item in self.clean_data2.index: 77 | #Check whether user clicked item or not 78 | if self.pred(user_id,item)==False: 79 | continue 80 | recommended_items_CF.append((item,self.pred(user_id,item))) 81 | recommended_items_CF.sort(key=lambda x: x[1]) 82 | 83 | #Choose half item has highest pred score 84 | recommended_items_CF=recommended_items_CF[-int(self.number_of_recommend/2):] 85 | 86 | for item in recommended_items_CF: 87 | recommended_items_final.append(item[0]) 88 | 89 | #Sort items with number of category that are the same with current item and total amount of click 90 | if len(self.category[self.category["item_id"]==item_id])==0: 91 | item_category=[] 92 | else: 93 | item_category=self.category[self.category["item_id"]==item_id].to_numpy()[0][1:] 94 | 95 | #Remove nan category 96 | nan_position=[] 97 | for i in range(len(item_category)-1,-1,-1): 98 | if item_category[i]!=item_category[i]: 99 | nan_position.append(i) 100 | for i in nan_position: 101 | item_category=np.delete(item_category,i) 102 | items_score=[] 103 | for item in self.category['item_id']: 104 | #check whether item has already in recommend list 105 | if item==item_id or item in recommended_items_final: 106 | continue 107 | category_score=0 108 | if len(self.category[self.category['item_id']==item])==0: 109 | continue 110 | for i in self.category[self.category["item_id"]==item].to_numpy()[0][1:]: 111 | #plus 1 to score if have the same category 112 | if i in item_category: 113 | category_score+=1 114 | try: 115 | clicking_score=self.clean_data3.loc[item] 116 | except: 117 | clicking_score=0 118 | items_score.append((item, category_score, clicking_score)) 119 | 120 | items_score.sort(key=lambda x: (x[1], x[2]),reverse=True) 121 | 122 | 123 | #choose items until recommended_items_list full 124 | for i in range(len(items_score)): 125 | if len(recommended_items_final)==self.number_of_recommend-2: 126 | items_score=items_score[i:] 127 | random.shuffle(items_score) 128 | break 129 | recommended_items_final.append(items_score[i][0]) 130 | for i in range(len(items_score)): 131 | if len(recommended_items_final)==self.number_of_recommend: 132 | break 133 | recommended_items_final.append(items_score[i][0]) 134 | return recommended_items_final 135 | 136 | #update data 137 | def refresh_data(self,events_clicking,events_category): 138 | self.clicking=events_clicking 139 | self.category=events_category 140 | self.preprocessing_data() 141 | 142 | def hottest(self,item_id): 143 | if len(self.category[self.category["item_id"]==item_id])==0: 144 | item_category=[] 145 | else: 146 | item_category=self.category[self.category["item_id"]==item_id].to_numpy()[0][1:] 147 | 148 | #Remove nan category 149 | nan_position=[] 150 | for i in range(len(item_category)): 151 | if item_category[i]!=item_category[i]: 152 | nan_position.append(i) 153 | for i in nan_position: 154 | item_category=np.delete(item_category,i) 155 | items_score=[] 156 | for item in self.category['item_id']: 157 | #check whether item has already in recommend list 158 | if item==item_id : 159 | continue 160 | category_score=0 161 | if len(self.category[self.category['item_id']==item])==0: 162 | continue 163 | for i in self.category[self.category["item_id"]==item].to_numpy()[0][1:]: 164 | #plus 1 to score if have the same category 165 | if i in item_category: 166 | category_score+=1 167 | try: 168 | clicking_score=self.clean_data3.loc[item] 169 | except: 170 | clicking_score=0 171 | items_score.append((item, category_score, clicking_score)) 172 | 173 | items_score.sort(key=lambda x: (x[1], x[2]),reverse=True) 174 | recommend_list=[] 175 | for i in range(len(items_score)): 176 | recommend_list.append(items_score[i][0]) 177 | 178 | return recommend_list 179 | 180 | def welcome_recommend(self,user_id): 181 | dic={ 182 | 'hottest_events':[], 183 | 'care_events':[], 184 | } 185 | try: 186 | recommended_items_CF=[] 187 | #Sort item with pred score 188 | for item in self.clean_data2.index: 189 | #Check whether user clicked item or not 190 | if self.pred(user_id,item)==False: 191 | continue 192 | recommended_items_CF.append((item,self.pred(user_id,item))) 193 | recommended_items_CF.sort(key=lambda x: x[1]) 194 | 195 | for i in range(len(recommended_items_CF)): 196 | recommended_items_CF[i]=recommended_items_CF[i][0] 197 | 198 | #Choose 10 item with highest pred score 199 | if len(recommended_items_CF)>10: 200 | recommended_items_CF=recommended_items_CF[-10:] 201 | dic['care_events']=recommended_items_CF 202 | except: 203 | pass 204 | #Sort item with highest amount of click 205 | self.clean_data3=self.clean_data3.sort_values(ascending=False) 206 | recommended_items_hottest=[] 207 | count=0 208 | #Choose 10 hottest item 209 | items_score=[] 210 | for item in self.category['item_id']: 211 | try: 212 | clicking_score=self.clean_data3.loc[item] 213 | except: 214 | clicking_score=0 215 | items_score.append((item, clicking_score)) 216 | 217 | items_score.sort(key=lambda x: (x[1]),reverse=True) 218 | for i in items_score: 219 | if count==100: 220 | break 221 | recommended_items_hottest.append(i[0]) 222 | count+=1 223 | dic['hottest_events']=recommended_items_hottest 224 | return dic 225 | def welcome(self): 226 | items_score=[] 227 | self.clean_data3=self.clean_data3.sort_values(ascending=False) 228 | recommended_items_hottest=[] 229 | count=0 230 | dic={ 231 | 'hottest_events':[], 232 | } 233 | for item in self.category['item_id']: 234 | try: 235 | clicking_score=self.clean_data3.loc[item] 236 | except: 237 | clicking_score=0 238 | items_score.append((item, clicking_score)) 239 | 240 | items_score.sort(key=lambda x: (x[1]),reverse=True) 241 | for i in items_score: 242 | if count==100: 243 | break 244 | recommended_items_hottest.append(i[0]) 245 | count+=1 246 | dic['hottest_events']=recommended_items_hottest 247 | return dic 248 | 249 | def delete(self): 250 | #delete an event in clicking due to the removal 251 | total_item= self.category["item_id"] 252 | clicking_item= self.clicking["item_id"] 253 | contain=[] 254 | for i in clicking_item: 255 | check=False 256 | for j in total_item: 257 | if(i==j): 258 | check=True 259 | break 260 | if(check==False): 261 | contain.append(i) 262 | for i in contain: 263 | self.clicking=self.clicking[self.clicking["item_id"]!=i] 264 | self.clicking.to_csv('database/events_clicking.csv',index=False) 265 | --------------------------------------------------------------------------------