├── .gitignore ├── Item_CF.py ├── User_CF.py └── 新建文本文档.csv /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Item_CF.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import math 4 | from operator import itemgetter 5 | # 基于物品的协同过滤算法 6 | # 1.读入数据 7 | # 2.构建train-set 8 | # 3.构建movie-popular表 存入每个电影出现的总数量 9 | # 4.构建movie-sim矩阵 存入每两个电影同时出现的次数 10 | # 5.将movie-sim矩阵中的值计算为他们的相似度 11 | # 6.选出排名前十的电影 推荐给用户 12 | class item(): 13 | def set_movie(self): 14 | self.train_set={} 15 | self.test_set={} 16 | self.movie_sim_matrix = {} 17 | self.movie_popular = {} 18 | self.movie_count = 0 19 | print("初始化完成!") 20 | def get_user_data(self,filename): 21 | for file in self.load_file(filename): 22 | user,movie,grade=file.split(',') 23 | if random.random() < 0.75: 24 | self.train_set.setdefault(user,{}) 25 | self.train_set[user][movie]=grade 26 | else: 27 | self.test_set.setdefault(user,{}) 28 | self.test_set[user][movie]=grade 29 | self.movie_count+=1 30 | print("训练集和测试集构建完成!") 31 | def set_movie_sim(self): 32 | for user, movies in self.train_set.items(): 33 | for n,movie in enumerate(movies): 34 | self.movie_popular.setdefault(movie,0) 35 | self.movie_popular[movie]+=1 36 | print("流行电影表构建完成!") 37 | for user,movies in self.train_set.items(): 38 | for m1 in movies: 39 | for m2 in movies: 40 | if m1==m2: 41 | continue 42 | self.movie_sim_matrix.setdefault(m1,{}) 43 | self.movie_sim_matrix[m1].setdefault(m2,0) 44 | self.movie_sim_matrix[m1][m2]+=1 45 | print("电影相似度邻接矩阵构建完成!") 46 | for m1,movies in self.movie_sim_matrix.items(): 47 | for m2 in self.movie_sim_matrix[m1]: 48 | self.movie_sim_matrix[m1][m2]=self.movie_sim_matrix[m1][m2]/math.sqrt(self.movie_popular[m1]*self.movie_popular[m2]) 49 | print("电影相似度邻接矩阵构建完成!") 50 | def rec_movie(self,user): 51 | K=10 52 | re_movies= {} 53 | watch_movie=self.train_set[user] 54 | for movie,grade in watch_movie.items(): 55 | for relate_movie,g in sorted(self.movie_sim_matrix[movie].items(),key=itemgetter(1),reverse=True)[:K]: 56 | if relate_movie not in watch_movie: 57 | re_movies.setdefault(relate_movie,0) 58 | re_movies[relate_movie]=float(grade)*g 59 | print("推荐电影表构建完成!") 60 | print(re_movies) 61 | 62 | def load_file(self,filename): 63 | with open(filename,'r') as f: 64 | for i,line in enumerate(f): 65 | if i == 0: 66 | continue 67 | yield line.strip('\r\n') 68 | print("文件录入成功!") 69 | 70 | 71 | 72 | 73 | 74 | if __name__=="__main__": 75 | rating_file = 'C:\\Users\\lenovo\\Desktop\\新建文本文档.csv' 76 | i=item() 77 | i.set_movie() 78 | i.get_user_data(rating_file) 79 | i.set_movie_sim() 80 | i.rec_movie("A") -------------------------------------------------------------------------------- /User_CF.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import math 4 | 5 | from operator import itemgetter 6 | # 基于用户的协同过滤算法 7 | # 1.读入数据 8 | # 2.构建trainset 9 | # 3.构建movie-user矩阵 10 | # 4.构建sim-user矩阵 11 | # 5.计算sim-user矩阵的值,计算出排名前几的用户 12 | # 6.找出要推荐给user的电影 13 | class user(): 14 | def set(self): 15 | self.trainset={} 16 | self.testset={} 17 | self.sim_user={} 18 | self.movie_user={} 19 | print("初始化成功!") 20 | 21 | def file_load(self,filename): 22 | with open(filename,'r') as f: 23 | for i,line in enumerate(f): 24 | if i == 0: 25 | continue 26 | yield line.strip('\r\n') 27 | print("文件读取成功!") 28 | 29 | def get_dataset(self,filename): 30 | for line in self.file_load(filename): 31 | user,movie,grade=line.split(',') 32 | if random.random() <0.75: 33 | self.trainset.setdefault(user,{}) 34 | self.trainset[user][movie]=grade 35 | else: 36 | self.testset.setdefault(user,{}) 37 | self.testset[user][movie]=grade 38 | print("训练集和数据集构建完成!") 39 | for user,movies in self.trainset.items(): 40 | for movie in movies: 41 | self.movie_user.setdefault(movie,set()) 42 | self.movie_user[movie].add(user) 43 | print("用户-电影表构建完成!") 44 | for movie,users in self.movie_user.items(): 45 | for u1 in users: 46 | for u2 in users: 47 | if u1==u2: 48 | continue 49 | self.sim_user.setdefault(u1,{}) 50 | self.sim_user[u1].setdefault(u2,0) 51 | self.sim_user[u1][u2]+=1 52 | for u1,users in self.sim_user.items(): 53 | for u2,v in users.items(): 54 | self.sim_user[u1][u2]=v/math.sqrt(len(self.trainset[u1])*len(self.trainset[u2])) 55 | print("相似性用户表构建完成!") 56 | def recommend(self,user): 57 | rec_movies={} 58 | for sim_u,u in sorted(self.sim_user[user].items(),key=itemgetter(1),reverse=True)[0:20]: 59 | for rec_m,grade in self.trainset[sim_u].items(): 60 | if rec_m in self.trainset[user]: 61 | continue 62 | rec_movies[rec_m]=float(grade)*u 63 | rec_movies=sorted(rec_movies.items(),key=itemgetter(1),reverse=True)[0:10] 64 | print("推荐电影列表:") 65 | print(rec_movies) 66 | 67 | if __name__=="__main__": 68 | u1=user() 69 | u1.set() 70 | u1.get_dataset('C:\\Users\\lenovo\\Desktop\\新建文本文档.csv') 71 | u1.recommend('A') 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /新建文本文档.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lll8866/Collaborative-Filtering-Python-/e8e0d8a6be6436c17df0f0561cb95a98c9fb651b/新建文本文档.csv --------------------------------------------------------------------------------