├── Project ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── settings.cpython-310.pyc │ ├── urls.cpython-310.pyc │ └── wsgi.cpython-310.pyc ├── app01 │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── admin.cpython-310.pyc │ │ ├── apps.cpython-310.pyc │ │ ├── models.cpython-310.pyc │ │ └── views.cpython-310.pyc │ ├── admin.py │ ├── apps.py │ ├── models.py │ ├── tests.py │ └── views.py ├── asgi.py ├── migrations │ ├── __init__.py │ └── __pycache__ │ │ └── __init__.cpython-310.pyc ├── settings.py ├── templates │ ├── content_detail.html │ ├── home.html │ ├── login.html │ ├── search.html │ ├── temp.html │ └── topic.html ├── urls.py └── wsgi.py ├── README.md ├── __pycache__ ├── manage.cpython-310.pyc └── temp.cpython-310.pyc ├── db.sqlite3 ├── manage.py ├── temp.py └── zhihu_result.csv /Project/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/__init__.py -------------------------------------------------------------------------------- /Project/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Project/__pycache__/settings.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/__pycache__/settings.cpython-310.pyc -------------------------------------------------------------------------------- /Project/__pycache__/urls.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/__pycache__/urls.cpython-310.pyc -------------------------------------------------------------------------------- /Project/__pycache__/wsgi.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/__pycache__/wsgi.cpython-310.pyc -------------------------------------------------------------------------------- /Project/app01/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/app01/__init__.py -------------------------------------------------------------------------------- /Project/app01/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/app01/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Project/app01/__pycache__/admin.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/app01/__pycache__/admin.cpython-310.pyc -------------------------------------------------------------------------------- /Project/app01/__pycache__/apps.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/app01/__pycache__/apps.cpython-310.pyc -------------------------------------------------------------------------------- /Project/app01/__pycache__/models.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/app01/__pycache__/models.cpython-310.pyc -------------------------------------------------------------------------------- /Project/app01/__pycache__/views.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/app01/__pycache__/views.cpython-310.pyc -------------------------------------------------------------------------------- /Project/app01/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /Project/app01/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class App01Config(AppConfig): 5 | default_auto_field = "django.db.models.BigAutoField" 6 | name = "app01" 7 | -------------------------------------------------------------------------------- /Project/app01/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /Project/app01/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /Project/app01/views.py: -------------------------------------------------------------------------------- 1 | # 21122924 刘育杰 智能科学与技术 2 | from django.views.decorators.csrf import csrf_exempt 3 | import time 4 | from django.shortcuts import render,HttpResponse,redirect 5 | import pandas as pd 6 | from datetime import datetime 7 | from app01.templatetags.recommand import recommend_question 8 | import jieba 9 | import jieba.analyse 10 | from wordcloud import WordCloud 11 | import numpy as np 12 | from PIL import Image 13 | # 函数 Create your views here. 14 | 15 | @csrf_exempt 16 | def login(request): 17 | if request.method == "GET": 18 | return render(request,"login.html") 19 | else: 20 | username = request.POST.get("user") 21 | password = request.POST.get("pwd") 22 | if username == "root" and password == "12345": 23 | time.sleep(1.5) 24 | return redirect('/home/?date=2023-05-10') 25 | else: 26 | return render(request,"login.html") 27 | 28 | 29 | 30 | 31 | from django.http import HttpResponseBadRequest 32 | 33 | 34 | def topic(request): 35 | # 获取查询参数question_id或question_title 36 | question_id = request.GET.get('question_id', None) 37 | question_title = request.GET.get('question_title', None) 38 | # 如果question_id和question_title都为空,则返回错误 39 | if not question_id and not question_title: 40 | return HttpResponseBadRequest("Invalid question_id or question_title") 41 | 42 | # 如果没有传递question_id,则通过question_title查找对应的question_id 43 | if not question_id and question_title: 44 | # 读取CSV文件,找到对应question_title的数据 45 | df = pd.read_csv('E:/NLP/Project/zhihu_result.csv', encoding='utf-8') 46 | for i, row in df.iterrows(): 47 | if str(row['question_title']) == question_title: 48 | question_id = str(row['question_id']) 49 | # 重定向到带有question_id的URL 50 | return redirect('/topic/?question_id=' + question_id) 51 | 52 | # 读取CSV文件,找到对应question_id或question_title的数据 53 | content_list = [] 54 | df = pd.read_csv('E:/NLP/Project/zhihu_result.csv', encoding='utf-8') 55 | # 如果question_id存在,以question_id为依据搜索 56 | if question_id: 57 | for i, row in df.iterrows(): 58 | if str(row['question_id']) == question_id: 59 | content_list.append({'id': row['id'], 'content': str(row['content'])}) 60 | if question_title is None: 61 | question_title = row['question_title'] 62 | # 如果question_title存在,以question_title为依据搜索 63 | elif question_title: 64 | for i, row in df.iterrows(): 65 | if str(row['question_title']) == question_title: 66 | content_list.append({'id': row['id'], 'content': str(row['content'])}) 67 | question_id = row['question_id'] 68 | if question_title is None or question_id is None: 69 | return HttpResponseBadRequest("Question not found") 70 | # 获取相似的问题 71 | similar_questions = recommend_question(question_title) 72 | # 去除相似问题中的重复问题 73 | similar_questions = list(set(similar_questions)) 74 | # 读取CSV文件,找到对应question_id或question_title的数据 75 | content_list = [] 76 | content_text = '' 77 | question_title = None 78 | df = pd.read_csv('E:/NLP/Project/zhihu_result.csv', encoding='utf-8') 79 | for i, row in df.iterrows(): 80 | if str(row['question_id']) == question_id: 81 | content_list.append({'id': row['id'], 'content': str(row['content'])}) 82 | content_text += str(row['content']) + ' ' 83 | if question_title is None: 84 | question_title = row['question_title'] 85 | # 生成关键词 86 | keyword_list = jieba.analyse.extract_tags(content_text, topK=100, withWeight=True) 87 | keywords = {item[0]: item[1] for item in keyword_list} 88 | # 生成词云图 89 | # 读取自定义形状图片并转换为数组 90 | custom_shape_path = 'E:/NLP/Project/app01/static/image/example.jpg' 91 | custom_shape = np.array(Image.open(custom_shape_path).convert('L')) 92 | wordcloud = WordCloud(font_path='simhei.ttf', background_color="white", max_words=100, mask=custom_shape) 93 | wordcloud.generate_from_frequencies(keywords) 94 | wordcloud_image_path = 'E:/NLP/Project/app01/static/image/wordcloud.png' 95 | wordcloud.to_file(wordcloud_image_path) 96 | # 将数据传递给模板 97 | return render(request, 'topic.html', {'content_list': content_list, 'question_title': question_title, 98 | 'similar_questions': similar_questions, 99 | 'wordcloud_img_url': 'E:/NLP/Project/app01/static/image/wordcloud.png'}) 100 | 101 | 102 | def content_detail(request): 103 | # 获取查询参数id 104 | content_id = request.GET.get('id', None) 105 | # 读取CSV文件,找到对应id的数据 106 | df = pd.read_csv('E:/NLP/Project/zhihu_result.csv', encoding='utf-8') 107 | for i, row in df.iterrows(): 108 | if str(row['id']) == content_id: 109 | created_time = datetime.fromisoformat(row['created_time'].replace("Z", "+00:00")) 110 | formatted_time = created_time.strftime("%Y-%m-%d, %H:%M:%S") 111 | # 预处理hot值 112 | hot = row['hot'] 113 | if hot > 1: 114 | hot = 1 115 | content_data = { 116 | 'id': row['id'], 117 | 'content': str(row['content']), 118 | 'question_title': row['question_title'], 119 | 'author_name': row['author_name'], 120 | 'created_time': formatted_time, 121 | 'comment_count': row['comment_count'], 122 | 'fans_count': row['fans_count'], 123 | 'voteup_count': row['voteup_count'], 124 | 'emotion_label': row['emotion_label'], 125 | 'emotion_score': row['emotion_score'], 126 | 'key_words': row['key_words'], 127 | 'key_sentence': row['key_sentence'], 128 | 'fake': row['fake'], 129 | 'hot': hot, # 添加hot值到context中 130 | } 131 | break 132 | # 将数据传递给模板 133 | return render(request, 'content_detail.html', content_data) 134 | 135 | def temp(request): 136 | return render(request, 'temp.html') 137 | 138 | from django.shortcuts import render 139 | from .templatetags.search.sim_search import search_similar_questions 140 | from django.http import JsonResponse 141 | 142 | def get_question_url(question_id): 143 | base_url = "http://127.0.0.1:8000/topic/?question_id=" 144 | return base_url + str(question_id) 145 | 146 | def home(request): 147 | # 获取查询参数date 148 | date_str = request.GET.get('date', None) 149 | # 如果没有提供date参数,或者date参数格式错误,选择重定向到其他页面 150 | if date_str is None: 151 | return redirect('/home') 152 | try: 153 | date = datetime.strptime(date_str, '%Y-%m-%d') 154 | except ValueError: 155 | return HttpResponse('Invalid date format') 156 | 157 | # 读取CSV文件,找到对应日期的数据 158 | data_list = [] 159 | df = pd.read_csv('E:/NLP/Project/zhihu_result.csv', encoding='utf-8') 160 | for i, row in df.iterrows(): 161 | if i % 20 == 0 and str(row[16]) == date_str: 162 | data_list.append({'id': str(row[18]), 'question_title': str(row[0])}) 163 | # 统计 'class' 列的频数 164 | class_data = [{'name': '时政', 'value': 565}, {'name': '娱乐', 'value': 1815}, {'name': '科技', 'value': 880}, {'name': '游戏', 'value': 220}, {'name': '财经', 'value': 45}, {'name': '家居', 'value': 425}, {'name': '社会', 'value': 365}, {'name': '股票', 'value': 280}, {'name': '教育', 'value': 380}, {'name': '房产', 'value': 60}, {'name': '时尚', 'value': 120}] 165 | # 将数据传递给模板 166 | return render(request, 'home.html', {'data_list': data_list, 'class_data': class_data}) 167 | 168 | from django.http import JsonResponse 169 | 170 | @csrf_exempt 171 | def search(request): 172 | search_text = request.POST.get('search_text', '') 173 | results = search_similar_questions(search_text) 174 | 175 | results = [{'question_title': result['question_title'], 'score': result['score'], 176 | 'url': get_question_url(result['question_id'])} for result in results] 177 | print(results) 178 | return render(request, 'search.html', {'results': results}) 179 | 180 | 181 | -------------------------------------------------------------------------------- /Project/asgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASGI config for Project project. 3 | 4 | It exposes the ASGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.asgi import get_asgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "Project.settings") 15 | 16 | application = get_asgi_application() 17 | -------------------------------------------------------------------------------- /Project/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/migrations/__init__.py -------------------------------------------------------------------------------- /Project/migrations/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/Project/migrations/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /Project/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for Project project. 3 | 4 | Generated by 'django-admin startproject' using Django 4.2.1. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/4.2/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/4.2/ref/settings/ 11 | """ 12 | 13 | 14 | 15 | from pathlib import Path 16 | 17 | # Build paths inside the project like this: BASE_DIR / 'subdir'. 18 | BASE_DIR = Path(__file__).resolve().parent.parent 19 | 20 | 21 | # Quick-start development settings - unsuitable for production 22 | # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ 23 | 24 | # SECURITY WARNING: keep the secret key used in production secret! 25 | SECRET_KEY = "django-insecure-z7oz9i&*fn1ui!n)&v=u=48xn$7dz55&733aozwzhw)gbzsb84" 26 | 27 | # SECURITY WARNING: don't run with debug turned on in production! 28 | DEBUG = True 29 | 30 | ALLOWED_HOSTS = [] 31 | 32 | 33 | # Application definition 34 | 35 | INSTALLED_APPS = [ 36 | "django.contrib.admin", 37 | "django.contrib.auth", 38 | "django.contrib.contenttypes", 39 | "django.contrib.sessions", 40 | "django.contrib.messages", 41 | "django.contrib.staticfiles", 42 | "app01.apps.App01Config", 43 | ] 44 | 45 | MIDDLEWARE = [ 46 | "django.middleware.security.SecurityMiddleware", 47 | "django.contrib.sessions.middleware.SessionMiddleware", 48 | "django.middleware.common.CommonMiddleware", 49 | "django.middleware.csrf.CsrfViewMiddleware", 50 | "django.contrib.auth.middleware.AuthenticationMiddleware", 51 | "django.contrib.messages.middleware.MessageMiddleware", 52 | "django.middleware.clickjacking.XFrameOptionsMiddleware", 53 | ] 54 | 55 | ROOT_URLCONF = "Project.urls" 56 | 57 | TEMPLATES = [ 58 | { 59 | "BACKEND": "django.template.backends.django.DjangoTemplates", 60 | "DIRS": [] 61 | , 62 | "APP_DIRS": True, 63 | "OPTIONS": { 64 | "context_processors": [ 65 | "django.template.context_processors.debug", 66 | "django.template.context_processors.request", 67 | "django.contrib.auth.context_processors.auth", 68 | "django.contrib.messages.context_processors.messages", 69 | ], 70 | }, 71 | }, 72 | ] 73 | 74 | WSGI_APPLICATION = "Project.wsgi.application" 75 | 76 | 77 | # Database 78 | # https://docs.djangoproject.com/en/4.2/ref/settings/#databases 79 | 80 | DATABASES = { 81 | "default": { 82 | "ENGINE": "django.db.backends.sqlite3", 83 | "NAME": BASE_DIR / "db.sqlite3", 84 | } 85 | } 86 | 87 | 88 | # Password validation 89 | # https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators 90 | 91 | AUTH_PASSWORD_VALIDATORS = [ 92 | { 93 | "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", 94 | }, 95 | { 96 | "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", 97 | }, 98 | { 99 | "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", 100 | }, 101 | { 102 | "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", 103 | }, 104 | ] 105 | 106 | 107 | # Internationalization 108 | # https://docs.djangoproject.com/en/4.2/topics/i18n/ 109 | 110 | LANGUAGE_CODE = "en-us" 111 | 112 | TIME_ZONE = "UTC" 113 | 114 | USE_I18N = True 115 | 116 | USE_TZ = True 117 | 118 | 119 | # Static files (CSS, JavaScript, Images) 120 | # https://docs.djangoproject.com/en/4.2/howto/static-files/ 121 | STATIC_URL = "/static/" 122 | 123 | # Default primary key field type 124 | # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field 125 | 126 | DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" 127 | 128 | 129 | -------------------------------------------------------------------------------- /Project/templates/content_detail.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | 5 | 详细内容 6 | 7 | 8 | 9 | 59 | 60 | 61 |
62 |
63 | {{ question_title }} 64 |
65 |
66 |
67 | {% if fake %} 68 | fake info 69 | 可能包含虚假信息,请辨别其真实性 70 | {% else %} 71 | true info 72 | 该文章通过了谣言检测 73 | {% endif %} 74 |
75 | 76 |
77 |
78 | 作者:{{ author_name }} | 创作时间:{{ created_time }} 79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 | {{ content }} 91 |
92 |
93 |
关键词
94 |
{{ key_words }}
95 |
96 |
97 |
金句
98 |
{{ key_sentence }}
99 |
100 |
101 |
102 |
103 | 104 | {# 柱状图 #} 105 | 144 | {# 情感分析图 #} 145 | 177 | {# 热度预测 #} 178 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | -------------------------------------------------------------------------------- /Project/templates/home.html: -------------------------------------------------------------------------------- 1 | 2 | {% load static %} 3 | {% load math_tags %} 4 | 5 | 6 | 主页 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 70 | 71 | 72 | 73 | 83 | 84 | 85 |
86 |

智慧舆云系统

87 |
88 |
89 |
90 | 91 |
92 |
93 |
94 |
95 |
96 |
97 | 98 | 102 | 103 | 104 |
105 | 106 |
107 |
108 |
109 |
110 | 111 | {# 圆环图 #} 112 | 168 | 183 | 184 | 185 | 186 | 187 | 192 | 193 | 194 | 195 | 196 | -------------------------------------------------------------------------------- /Project/templates/login.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | {#21122924 刘育杰 智能科学与技术#} 3 | 4 | 5 | 6 | 7 | 8 | 9 | 登录 10 | 98 | 99 | 100 | 101 |
102 |

LOGIN

103 |
104 | {% csrf_token %} 105 |
106 | 107 |
108 |
109 | 110 |
111 | 112 |
113 |
114 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /Project/templates/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 搜索 5 | 6 | 7 | 8 | 9 | 10 | 50 | 51 | 52 |
53 |

语义搜索

54 |
55 | {% csrf_token %} 56 | 57 | 58 |
59 | 60 |

搜索结果

61 |
62 | 63 | 67 | 68 | 69 |
70 |
71 | 72 | 73 | 74 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /Project/templates/temp.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Element UI 表格测试 5 | 6 | 7 | 8 | 9 |
10 |

Element UI 表格测试

11 | 12 | 13 | 14 | 15 | 18 | 19 | 20 |
21 | 22 | 23 | 24 | 25 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /Project/templates/topic.html: -------------------------------------------------------------------------------- 1 | {% load static %} 2 | 3 | 4 | 5 | 话题内容 6 | 7 | 8 | 9 | 53 | 68 | 69 | 70 |
{{ question_title }}
71 |
72 |
73 |
74 |

推荐的问题

75 |
    76 | {% for question in similar_questions %} 77 |
  • 78 | {{ question }} 79 |
  • 80 | {% endfor %} 81 |
82 | 词云图 83 |
84 |
85 | {% for item in content_list %} 86 |
87 |
88 | {{ item.content }} 89 |
90 |
91 | 92 | 93 |
94 |
95 | {% endfor %} 96 |
97 |
98 |
99 | 100 | 101 | -------------------------------------------------------------------------------- /Project/urls.py: -------------------------------------------------------------------------------- 1 | """ 2 | URL configuration for Project project. 3 | 4 | The `urlpatterns` list routes URLs to views. For more information please see: 5 | https://docs.djangoproject.com/en/4.2/topics/http/urls/ 6 | Examples: 7 | Function views 8 | 1. Add an import: from my_app import views 9 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 10 | Class-based views 11 | 1. Add an import: from other_app.views import Home 12 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 13 | Including another URLconf 14 | 1. Import the include() function: from django.urls import include, path 15 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 16 | """ 17 | from django.contrib import admin 18 | from django.urls import path 19 | from app01 import views 20 | 21 | 22 | urlpatterns = [ 23 | 24 | path('temp', views.temp), 25 | path('home/', views.home), 26 | path("topic/", views.topic), 27 | path("content_detail/", views.content_detail), 28 | path('search/', views.search, name='search'), 29 | path("", views.login), 30 | ] 31 | -------------------------------------------------------------------------------- /Project/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for Project project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "Project.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 舆情的监控分析与预测 2 | 本项目旨在设计并实现一个舆情监控系统,具体基于对知乎热榜话题的数据抓取、分析与可视化。系统利用爬虫技术收集近一年以来知乎热榜的前10个话题以及每个话题下的前20个回答,然后借助于自然语言处理(NLP)的技术对这些数据进行深入的分析和处理。在本项目中,我选择采用知乎热榜话题下的回答作为研究对象。这样选择的原因主要有两点。首先,知乎是一个中文长文本的问答平台,与新浪微博、豆瓣等平台相比,知乎的数据具有更大的分析潜力。其次,尽管目前存在少量针对新浪微博、新闻门户网站等平台的舆情分析系统,但由于知乎平台具有较强的反爬虫机制和多变的网页设计,迄今为止尚未出现针对性的基于知乎平台数据的舆情分析系统。因此,本项目基于知乎平台的数据进行爬取和分析,有助于更全面地了解中文互联网的舆情趋势和发展方向。 3 | 4 | 本研究的意义不仅在于填补了基于知乎平台数据的舆情分析系统的空白,还在于拓展了舆情研究的广度和深度。通过对知乎热榜话题下的回答进行分析,我们可以深入挖掘其中所蕴含的信息和观点,揭示中文互联网用户的关注焦点和态度倾向。这对于政府、企业和社会公众具有重要意义,通过本项目,可以了解各种话题的讨论和观点分歧,从而更全面地了解社会动态和舆论风向。 5 | 本项目后端采用Django框架,前端主要由以下四个主要部分组成:主页(home.html),搜索页面(search.html),话题页面(topic.html),以及内容详情页面(content_detail.html)。每个部分均使用了NLP技术以及数据可视化手段,为用户提供直观、全面的信息和体验。 6 | 7 | 1. 主页(home.html):主页提供了日历组件,供用户选择特定日期查看当天的知乎热榜内容,并使用echarts饼图对所有话题进行分类并展示其占比情况。 8 | 2. 搜索页面(search.html):实现语义搜索功能,通过NLP技术,用户输入词语或句子后,系统会找到与输入最相似的知乎热榜标题。 9 | 3. 话题页面(topic.html):该页面展示某一话题下的回答内容,并通过NLP技术,推荐相似度高的其他话题,同时展示该话题下的词云图,以直观地展现关键词和热度。 10 | 4. 内容详情页面(content_detail.html):在此页面上,系统会对选定的回答进行深入分析,包括使用echarts柱状图展示评论数、粉丝数和赞同数,提取并展示关键词和金句,对回答进行情感分析、热度预测,以及谣言检测等功能。 11 | -------------------------------------------------------------------------------- /__pycache__/manage.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/__pycache__/manage.cpython-310.pyc -------------------------------------------------------------------------------- /__pycache__/temp.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/__pycache__/temp.cpython-310.pyc -------------------------------------------------------------------------------- /db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Anifalak-Lobelia/NLP/6145e6bef445628b879c88fedbe6104d96e3c522/db.sqlite3 -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | """Run administrative tasks.""" 9 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "Project.settings") 10 | try: 11 | from django.core.management import execute_from_command_line 12 | except ImportError as exc: 13 | raise ImportError( 14 | "Couldn't import Django. Are you sure it's installed and " 15 | "available on your PYTHONPATH environment variable? Did you " 16 | "forget to activate a virtual environment?" 17 | ) from exc 18 | execute_from_command_line(sys.argv) 19 | 20 | 21 | if __name__ == "__main__": 22 | main() 23 | -------------------------------------------------------------------------------- /temp.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | # 读取csv文件 4 | df = pd.read_csv('E:\\NLP\\data\\zhihu_result.csv') 5 | 6 | # 创建id属性 7 | df['id'] = range(1, len(df) + 1) 8 | 9 | # 创建一个映射,将不同的question_title映射到一个唯一的id上 10 | question_title_to_id = {title: idx for idx, title in enumerate(df['question_title'].unique(), start=1)} 11 | 12 | # 创建question_id属性 13 | df['question_id'] = df['question_title'].apply(lambda x: question_title_to_id[x]) 14 | 15 | # 将处理过的DataFrame写回csv 16 | df.to_csv('E:\\NLP\\data\\zhihu_result.csv', index=False) 17 | --------------------------------------------------------------------------------