├── .idea ├── cnki.iml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── README.md ├── backend ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── admin.cpython-36.pyc │ ├── models.cpython-36.pyc │ ├── urls.cpython-36.pyc │ └── views.cpython-36.pyc ├── admin.py ├── apps.py ├── migrations │ ├── __init__.py │ └── __pycache__ │ │ └── __init__.cpython-36.pyc ├── models.py ├── tests.py ├── urls.py └── views.py ├── cnki ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── celery.cpython-36.pyc │ ├── consumer.cpython-36.pyc │ ├── routing.cpython-36.pyc │ ├── settings.cpython-36.pyc │ ├── tasks.cpython-36.pyc │ ├── urls.cpython-36.pyc │ └── wsgi.cpython-36.pyc ├── celery.py ├── consumer.py ├── routing.py ├── settings.py ├── urls.py └── wsgi.py ├── db.sqlite3 ├── frontend ├── .babelrc ├── .editorconfig ├── .eslintignore ├── .eslintrc.js ├── .gitignore ├── .postcssrc.js ├── README.md ├── build │ ├── build.js │ ├── check-versions.js │ ├── logo.png │ ├── utils.js │ ├── vue-loader.conf.js │ ├── webpack.base.conf.js │ ├── webpack.dev.conf.js │ └── webpack.prod.conf.js ├── config │ ├── dev.env.js │ ├── index.js │ ├── prod.env.js │ └── test.env.js ├── index.html ├── package.json ├── src │ ├── App.vue │ ├── assets │ │ ├── css │ │ │ └── animate.css │ │ └── img │ │ │ ├── analyse.png │ │ │ ├── btos.png │ │ │ ├── chart.png │ │ │ ├── check.png │ │ │ ├── cnki.png │ │ │ ├── footer.png │ │ │ ├── hot │ │ │ ├── icon-item001.png │ │ │ ├── icon-item002.png │ │ │ ├── icon-item003.png │ │ │ ├── icon-item004.png │ │ │ ├── icon-item005.png │ │ │ └── icon-item006.png │ │ │ ├── hotAnalyBg.jpg │ │ │ ├── icon1.png │ │ │ ├── icon2.png │ │ │ ├── icon3.png │ │ │ ├── logo.png │ │ │ ├── menu.png │ │ │ ├── online │ │ │ ├── left.png │ │ │ └── right.png │ │ │ ├── sliderBg.png │ │ │ └── under.png │ ├── components │ │ ├── cnki-footer.vue │ │ ├── cnki-header.vue │ │ └── index │ │ │ ├── index-data.vue │ │ │ ├── index-feature.vue │ │ │ ├── index-service.vue │ │ │ └── index-slider.vue │ ├── layouts │ │ ├── chartAnalyse.vue │ │ ├── hotAnalyse.vue │ │ ├── index.vue │ │ ├── onlineSpider.vue │ │ └── paperDetail.vue │ ├── main.js │ └── router │ │ └── index.js ├── static │ └── .gitkeep └── test │ ├── e2e │ ├── custom-assertions │ │ └── elementCount.js │ ├── nightwatch.conf.js │ ├── runner.js │ └── specs │ │ └── test.js │ └── unit │ ├── .eslintrc │ ├── jest.conf.js │ ├── setup.js │ └── specs │ └── HelloWorld.spec.js ├── manage.py └── spider ├── __init__.py ├── __pycache__ ├── __init__.cpython-36.pyc ├── config.cpython-36.pyc ├── db_handle.cpython-36.pyc ├── get_authors.cpython-36.pyc ├── get_cookies.cpython-36.pyc ├── get_fund.cpython-36.pyc ├── get_keyWordID.cpython-36.pyc ├── get_school.cpython-36.pyc ├── get_year.cpython-36.pyc ├── mian.cpython-36.pyc ├── paper_detail.cpython-36.pyc ├── paper_spider.cpython-36.pyc └── paper_spider_by_app.cpython-36.pyc ├── config.py ├── db_handle.py ├── get_authors.py ├── get_cookies.py ├── get_fund.py ├── get_keyWordID.py ├── get_school.py ├── get_year.py ├── mian.py ├── paper_detail.py ├── paper_spider.py └── paper_spider_by_app.py /.idea/cnki.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 14 | 15 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 20 | 21 | 26 | 27 | 28 | 30 | 31 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 65 | 66 | 67 | 68 | 69 | 89 | 90 | 91 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 1571150444735 125 | 137 | 138 | 139 | 140 | 142 | 143 | 154 | 155 | 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #### 本仓库为毕业设计的重构版,之前采取Django的模板引擎制作的页面,本版本将采用vue+jango进行重构D,并加入新的功能,同时按照进度争取完成对应的博文进行讲解 2 | ### 2019/10/23 -开始使用vue重构页面 3 | 1. 添加响应式导航栏 4 | 5 | ### 2019/10/25 6 | 1. 首页slider 7 | 8 | ### 2019/10/25 9 | 1. 完成首页 10 | 11 | ### 2019/10/30 12 | 1. 热门分析页 13 | 14 | ### 2019/10/31 15 | 1. 在线爬虫页 -------------------------------------------------------------------------------- /backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__init__.py -------------------------------------------------------------------------------- /backend/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /backend/__pycache__/admin.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/admin.cpython-36.pyc -------------------------------------------------------------------------------- /backend/__pycache__/models.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/models.cpython-36.pyc -------------------------------------------------------------------------------- /backend/__pycache__/urls.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/urls.cpython-36.pyc -------------------------------------------------------------------------------- /backend/__pycache__/views.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/views.cpython-36.pyc -------------------------------------------------------------------------------- /backend/admin.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | 3 | # Register your models here. 4 | -------------------------------------------------------------------------------- /backend/apps.py: -------------------------------------------------------------------------------- 1 | from django.apps import AppConfig 2 | 3 | 4 | class BackendConfig(AppConfig): 5 | name = 'backend' 6 | -------------------------------------------------------------------------------- /backend/migrations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/migrations/__init__.py -------------------------------------------------------------------------------- /backend/migrations/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/migrations/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /backend/models.py: -------------------------------------------------------------------------------- 1 | from django.db import models 2 | 3 | # Create your models here. 4 | -------------------------------------------------------------------------------- /backend/tests.py: -------------------------------------------------------------------------------- 1 | from django.test import TestCase 2 | 3 | # Create your tests here. 4 | -------------------------------------------------------------------------------- /backend/urls.py: -------------------------------------------------------------------------------- 1 | from django.urls import path 2 | from django.conf.urls.static import static 3 | from django.conf import settings 4 | 5 | from backend import views 6 | 7 | urlpatterns = [ 8 | 9 | # 爬虫 10 | path('testSpider', views.spider, name="testSpider"), 11 | 12 | ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) 13 | -------------------------------------------------------------------------------- /backend/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render 2 | 3 | import redis 4 | import json 5 | # Create your views here. 6 | from django.template import loader 7 | from django.http import HttpResponse 8 | from django.http import JsonResponse 9 | from django.core import serializers 10 | from spider.mian import start_spider 11 | 12 | import smtplib 13 | from email.mime.text import MIMEText 14 | 15 | 16 | 17 | 18 | 19 | 20 | def spider(request): 21 | 22 | start_spider.delay(10) 23 | 24 | return HttpResponse("pachong") 25 | 26 | -------------------------------------------------------------------------------- /cnki/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .celery import app as celery_app # noqa 4 | 5 | 6 | 7 | import pymysql 8 | pymysql.install_as_MySQLdb() 9 | 10 | -------------------------------------------------------------------------------- /cnki/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/__pycache__/celery.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/celery.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/__pycache__/consumer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/consumer.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/__pycache__/routing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/routing.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/__pycache__/settings.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/settings.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/__pycache__/tasks.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/tasks.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/__pycache__/urls.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/urls.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/__pycache__/wsgi.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/wsgi.cpython-36.pyc -------------------------------------------------------------------------------- /cnki/celery.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, unicode_literals # 必须在最上面 2 | 3 | import os 4 | from celery import Celery 5 | from django.conf import settings 6 | 7 | # 配置环境变量 8 | project_name = os.path.split(os.path.abspath('.'))[-1] 9 | project_settings = '%s.settings' % project_name 10 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', project_settings) 11 | 12 | # 实例化 Celery 13 | app = Celery(project_name, 14 | broker='redis://localhost:6379', 15 | backend='redis://localhost:6379' 16 | ) 17 | # 使用 Django 的 settings 文件配置 Celery 18 | app.config_from_object('django.conf:settings') 19 | 20 | # Celery 加载所有注册的应用 21 | app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) 22 | -------------------------------------------------------------------------------- /cnki/consumer.py: -------------------------------------------------------------------------------- 1 | from channels.generic.websocket import WebsocketConsumer 2 | import json 3 | from spider.paper_spider_by_app import start_spider_by_app 4 | 5 | class ChatConsumer(WebsocketConsumer): 6 | 7 | 8 | def connect(self): 9 | # 连接时触发 10 | print("开始连接") 11 | self.accept() 12 | print('self.channel_name',self.channel_name) 13 | 14 | # self.send(text_data=json.dumps({"message": "message"})) 15 | def disconnect(self, code): 16 | # 关闭连接时触发 17 | # print('关闭连接') 18 | # 19 | # try: 20 | # self.browser.quit(); 21 | # except Exception as e: 22 | # print("关闭出错啦=====================\n", e) 23 | # self.browser.quit() 24 | self.result.revoke(terminate=True) 25 | self.close() 26 | print('关闭连接') 27 | 28 | def receive(self, text_data=None, bytes_data=None): 29 | print("收到消息") 30 | print("==========",text_data) 31 | print(json.loads(text_data)['message']) 32 | print('self.channel_name',self.channel_name) 33 | self.keyWords=json.loads(text_data)['message'] 34 | self.result = start_spider_by_app.delay(self.keyWords, self.channel_name) 35 | 36 | # start_spider_by_app.delay(self) 37 | 38 | def send_message(self, event): 39 | print(event) 40 | print('self.result',self.result) 41 | self.send(json.dumps({ 42 | "paperInfo": event["message"] 43 | })) -------------------------------------------------------------------------------- /cnki/routing.py: -------------------------------------------------------------------------------- 1 | from channels.routing import ProtocolTypeRouter,URLRouter 2 | from channels.auth import AuthMiddlewareStack 3 | from django.urls import path,re_path 4 | from cnki.consumer import * 5 | 6 | 7 | 8 | # routing.py路由文件跟django的url.py功能类似,语法也一样,意思就是访问websocket接口 9 | 10 | 11 | websocket_urlpatterns = [ 12 | path(r"ws/chat/", ChatConsumer), 13 | 14 | ] 15 | 16 | # 这里规定了去哪里找websocket的接口 17 | application = ProtocolTypeRouter({ 18 | 'websocket':AuthMiddlewareStack( 19 | URLRouter( 20 | websocket_urlpatterns 21 | ) 22 | ) 23 | }) -------------------------------------------------------------------------------- /cnki/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for cnki project. 3 | 4 | Generated by 'django-admin startproject' using Django 2.0.5. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.0/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/2.0/ref/settings/ 11 | """ 12 | 13 | import os 14 | import djcelery 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/2.0/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | SECRET_KEY = 'n&5=u+_cef#4r!9uueivbo)%n2k-6zgursc@tf3+uw)!ymx$&h' 24 | 25 | # SECURITY WARNING: don't run with debug turned on in production! 26 | DEBUG = True 27 | 28 | ALLOWED_HOSTS = [] 29 | 30 | 31 | # Application definition 32 | 33 | INSTALLED_APPS = [ 34 | 'django.contrib.admin', 35 | 'django.contrib.auth', 36 | 'django.contrib.contenttypes', 37 | 'django.contrib.sessions', 38 | 'django.contrib.messages', 39 | 'django.contrib.staticfiles', 40 | 'djcelery', 41 | 'corsheaders', 42 | 'channels', 43 | 'backend', 44 | 'spider', 45 | 46 | ] 47 | 48 | MIDDLEWARE = [ 49 | 'django.middleware.security.SecurityMiddleware', 50 | 'django.contrib.sessions.middleware.SessionMiddleware', 51 | 'corsheaders.middleware.CorsMiddleware', # 默认 52 | 'django.middleware.common.CommonMiddleware', 53 | 'django.middleware.csrf.CsrfViewMiddleware', 54 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 55 | 'django.contrib.messages.middleware.MessageMiddleware', 56 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 57 | ] 58 | 59 | ROOT_URLCONF = 'cnki.urls' 60 | 61 | TEMPLATES = [ 62 | { 63 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 64 | # 'DIRS':['frontend/dist'], 65 | 'DIRS':[''], 66 | 'APP_DIRS': True, 67 | 'OPTIONS': { 68 | 'context_processors': [ 69 | 'django.template.context_processors.debug', 70 | 'django.template.context_processors.request', 71 | 'django.contrib.auth.context_processors.auth', 72 | 'django.contrib.messages.context_processors.messages', 73 | ], 74 | }, 75 | }, 76 | ] 77 | 78 | WSGI_APPLICATION = 'cnki.wsgi.application' 79 | 80 | ASGI_APPLICATION = 'cnki.routing.application' 81 | 82 | # Database 83 | # https://docs.djangoproject.com/en/2.0/ref/settings/#databases 84 | 85 | DATABASES = { 86 | 'default': { 87 | 'ENGINE': 'django.db.backends.sqlite3', 88 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 89 | } 90 | } 91 | 92 | 93 | # Password validation 94 | # https://docs.djangoproject.com/en/2.0/ref/settings/#auth-password-validators 95 | 96 | AUTH_PASSWORD_VALIDATORS = [ 97 | { 98 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 99 | }, 100 | { 101 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 102 | }, 103 | { 104 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 105 | }, 106 | { 107 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 108 | }, 109 | ] 110 | 111 | 112 | # Internationalization 113 | # https://docs.djangoproject.com/en/2.0/topics/i18n/ 114 | 115 | LANGUAGE_CODE = 'zh-Hans' 116 | 117 | TIME_ZONE = 'UTC' 118 | 119 | USE_I18N = True 120 | 121 | USE_L10N = True 122 | 123 | USE_TZ = True 124 | 125 | 126 | # Static files (CSS, JavaScript, Images) 127 | # https://docs.djangoproject.com/en/2.0/howto/static-files/ 128 | 129 | # STATIC_URL = '/static/' 130 | STATIC_URL = '/static/' 131 | STATIC_ROOT = os.path.join(BASE_DIR, 'backend/static') 132 | 133 | # # Add for vue.js 134 | # STATICFILES_DIRS = [ 135 | # os.path.join(BASE_DIR, "frontend/dist/static"), 136 | # ] 137 | 138 | # 跨域增加忽略 139 | CORS_ALLOW_CREDENTIALS = True 140 | CORS_ORIGIN_ALLOW_ALL = True 141 | # CORS_ORIGIN_WHITELIST = ( 142 | # 'https://*' 143 | # ) 144 | CORS_ALLOW_METHODS = ( 145 | 'DELETE', 146 | 'GET', 147 | 'OPTIONS', 148 | 'PATCH', 149 | 'POST', 150 | 'PUT', 151 | 'VIEW', 152 | ) 153 | CORS_ALLOW_HEADERS = ( 154 | 'XMLHttpRequest', 155 | 'X_FILENAME', 156 | 'accept-encoding', 157 | 'authorization', 158 | 'content-type', 159 | 'dnt', 160 | 'origin', 161 | 'user-agent', 162 | 'x-csrftoken', 163 | 'x-requested-with', 164 | ) 165 | 166 | djcelery.setup_loader() 167 | BROKER_URL = 'redis://127.0.0.1:6379/1' 168 | CELERY_IMPORTS = ('spider.paper_spider_by_app') 169 | CHANNEL_LAYERS = { 170 | 'default': { 171 | 'BACKEND': 'channels_redis.core.RedisChannelLayer', 172 | 'CONFIG': { 173 | "hosts": [('127.0.0.1', 6379)], 174 | }, 175 | }, 176 | } -------------------------------------------------------------------------------- /cnki/urls.py: -------------------------------------------------------------------------------- 1 | """cnki URL Configuration 2 | 3 | The `urlpatterns` list routes URLs to views. For more information please see: 4 | https://docs.djangoproject.com/en/2.0/topics/http/urls/ 5 | Examples: 6 | Function views 7 | 1. Add an import: from my_app import views 8 | 2. Add a URL to urlpatterns: path('', views.home, name='home') 9 | Class-based views 10 | 1. Add an import: from other_app.views import Home 11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') 12 | Including another URLconf 13 | 1. Import the include() function: from django.urls import include, path 14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 15 | """ 16 | from django.contrib import admin 17 | from django.urls import path, include 18 | from django.views.generic.base import TemplateView # 使用通用模板 19 | 20 | 21 | urlpatterns = [ 22 | path('admin/', admin.site.urls), 23 | # path(r'', TemplateView.as_view(template_name="index.html")), 24 | path('cnki/', include('backend.urls')), 25 | 26 | ] 27 | -------------------------------------------------------------------------------- /cnki/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for cnki project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/2.0/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cnki.settings") 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/db.sqlite3 -------------------------------------------------------------------------------- /frontend/.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | ["env", { 4 | "modules": false, 5 | "targets": { 6 | "browsers": ["> 1%", "last 2 versions", "not ie <= 8"] 7 | } 8 | }], 9 | "stage-2" 10 | ], 11 | "plugins": ["transform-vue-jsx", "transform-runtime"], 12 | "env": { 13 | "test": { 14 | "presets": ["env", "stage-2"], 15 | "plugins": ["transform-vue-jsx", "transform-es2015-modules-commonjs", "dynamic-import-node"] 16 | } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /frontend/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | indent_style = space 6 | indent_size = 2 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | -------------------------------------------------------------------------------- /frontend/.eslintignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | /config/ 3 | /dist/ 4 | /*.js 5 | /test/unit/coverage/ 6 | -------------------------------------------------------------------------------- /frontend/.eslintrc.js: -------------------------------------------------------------------------------- 1 | // https://eslint.org/docs/user-guide/configuring 2 | 3 | module.exports = { 4 | root: true, 5 | parserOptions: { 6 | parser: 'babel-eslint' 7 | }, 8 | env: { 9 | browser: true, 10 | }, 11 | extends: [ 12 | // https://github.com/vuejs/eslint-plugin-vue#priority-a-essential-error-prevention 13 | // consider switching to `plugin:vue/strongly-recommended` or `plugin:vue/recommended` for stricter rules. 14 | 'plugin:vue/essential', 15 | // https://github.com/standard/standard/blob/master/docs/RULES-en.md 16 | 'standard' 17 | ], 18 | // required to lint *.vue files 19 | plugins: [ 20 | 'vue' 21 | ], 22 | // add your custom rules here 23 | rules: { 24 | // allow async-await 25 | 'generator-star-spacing': 'off', 26 | // allow debugger during development 27 | 'no-debugger': process.env.NODE_ENV === 'production' ? 'error' : 'off' 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /frontend/.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules/ 3 | /dist/ 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | /test/unit/coverage/ 8 | /test/e2e/reports/ 9 | selenium-debug.log 10 | 11 | # Editor directories and files 12 | .idea 13 | .vscode 14 | *.suo 15 | *.ntvs* 16 | *.njsproj 17 | *.sln 18 | -------------------------------------------------------------------------------- /frontend/.postcssrc.js: -------------------------------------------------------------------------------- 1 | // https://github.com/michael-ciniawsky/postcss-load-config 2 | 3 | module.exports = { 4 | "plugins": { 5 | "postcss-import": {}, 6 | "postcss-url": {}, 7 | // to edit target browsers: use "browserslist" field in package.json 8 | "autoprefixer": {} 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /frontend/README.md: -------------------------------------------------------------------------------- 1 | # frontend 2 | 3 | > cnki spider 4 | 5 | ## Build Setup 6 | 7 | ``` bash 8 | # install dependencies 9 | npm install 10 | 11 | # serve with hot reload at localhost:8080 12 | npm run dev 13 | 14 | # build for production with minification 15 | npm run build 16 | 17 | # build for production and view the bundle analyzer report 18 | npm run build --report 19 | 20 | # run unit tests 21 | npm run unit 22 | 23 | # run e2e tests 24 | npm run e2e 25 | 26 | # run all tests 27 | npm test 28 | ``` 29 | 30 | For a detailed explanation on how things work, check out the [guide](http://vuejs-templates.github.io/webpack/) and [docs for vue-loader](http://vuejs.github.io/vue-loader). 31 | -------------------------------------------------------------------------------- /frontend/build/build.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | require('./check-versions')() 3 | 4 | process.env.NODE_ENV = 'production' 5 | 6 | const ora = require('ora') 7 | const rm = require('rimraf') 8 | const path = require('path') 9 | const chalk = require('chalk') 10 | const webpack = require('webpack') 11 | const config = require('../config') 12 | const webpackConfig = require('./webpack.prod.conf') 13 | 14 | const spinner = ora('building for production...') 15 | spinner.start() 16 | 17 | rm(path.join(config.build.assetsRoot, config.build.assetsSubDirectory), err => { 18 | if (err) throw err 19 | webpack(webpackConfig, (err, stats) => { 20 | spinner.stop() 21 | if (err) throw err 22 | process.stdout.write(stats.toString({ 23 | colors: true, 24 | modules: false, 25 | children: false, // If you are using ts-loader, setting this to true will make TypeScript errors show up during build. 26 | chunks: false, 27 | chunkModules: false 28 | }) + '\n\n') 29 | 30 | if (stats.hasErrors()) { 31 | console.log(chalk.red(' Build failed with errors.\n')) 32 | process.exit(1) 33 | } 34 | 35 | console.log(chalk.cyan(' Build complete.\n')) 36 | console.log(chalk.yellow( 37 | ' Tip: built files are meant to be served over an HTTP server.\n' + 38 | ' Opening index.html over file:// won\'t work.\n' 39 | )) 40 | }) 41 | }) 42 | -------------------------------------------------------------------------------- /frontend/build/check-versions.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const chalk = require('chalk') 3 | const semver = require('semver') 4 | const packageConfig = require('../package.json') 5 | const shell = require('shelljs') 6 | 7 | function exec (cmd) { 8 | return require('child_process').execSync(cmd).toString().trim() 9 | } 10 | 11 | const versionRequirements = [ 12 | { 13 | name: 'node', 14 | currentVersion: semver.clean(process.version), 15 | versionRequirement: packageConfig.engines.node 16 | } 17 | ] 18 | 19 | if (shell.which('npm')) { 20 | versionRequirements.push({ 21 | name: 'npm', 22 | currentVersion: exec('npm --version'), 23 | versionRequirement: packageConfig.engines.npm 24 | }) 25 | } 26 | 27 | module.exports = function () { 28 | const warnings = [] 29 | 30 | for (let i = 0; i < versionRequirements.length; i++) { 31 | const mod = versionRequirements[i] 32 | 33 | if (!semver.satisfies(mod.currentVersion, mod.versionRequirement)) { 34 | warnings.push(mod.name + ': ' + 35 | chalk.red(mod.currentVersion) + ' should be ' + 36 | chalk.green(mod.versionRequirement) 37 | ) 38 | } 39 | } 40 | 41 | if (warnings.length) { 42 | console.log('') 43 | console.log(chalk.yellow('To use this template, you must update following to modules:')) 44 | console.log() 45 | 46 | for (let i = 0; i < warnings.length; i++) { 47 | const warning = warnings[i] 48 | console.log(' ' + warning) 49 | } 50 | 51 | console.log() 52 | process.exit(1) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /frontend/build/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/build/logo.png -------------------------------------------------------------------------------- /frontend/build/utils.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const path = require('path') 3 | const config = require('../config') 4 | const ExtractTextPlugin = require('extract-text-webpack-plugin') 5 | const packageConfig = require('../package.json') 6 | 7 | exports.assetsPath = function (_path) { 8 | const assetsSubDirectory = process.env.NODE_ENV === 'production' 9 | ? config.build.assetsSubDirectory 10 | : config.dev.assetsSubDirectory 11 | 12 | return path.posix.join(assetsSubDirectory, _path) 13 | } 14 | 15 | exports.cssLoaders = function (options) { 16 | options = options || {} 17 | 18 | const cssLoader = { 19 | loader: 'css-loader', 20 | options: { 21 | sourceMap: options.sourceMap 22 | } 23 | } 24 | 25 | const postcssLoader = { 26 | loader: 'postcss-loader', 27 | options: { 28 | sourceMap: options.sourceMap 29 | } 30 | } 31 | 32 | // generate loader string to be used with extract text plugin 33 | function generateLoaders (loader, loaderOptions) { 34 | const loaders = options.usePostCSS ? [cssLoader, postcssLoader] : [cssLoader] 35 | 36 | if (loader) { 37 | loaders.push({ 38 | loader: loader + '-loader', 39 | options: Object.assign({}, loaderOptions, { 40 | sourceMap: options.sourceMap 41 | }) 42 | }) 43 | } 44 | 45 | // Extract CSS when that option is specified 46 | // (which is the case during production build) 47 | if (options.extract) { 48 | return ExtractTextPlugin.extract({ 49 | use: loaders, 50 | fallback: 'vue-style-loader' 51 | }) 52 | } else { 53 | return ['vue-style-loader'].concat(loaders) 54 | } 55 | } 56 | 57 | // https://vue-loader.vuejs.org/en/configurations/extract-css.html 58 | return { 59 | css: generateLoaders(), 60 | postcss: generateLoaders(), 61 | less: generateLoaders('less'), 62 | sass: generateLoaders('sass', { indentedSyntax: true }), 63 | scss: generateLoaders('sass'), 64 | stylus: generateLoaders('stylus'), 65 | styl: generateLoaders('stylus') 66 | } 67 | } 68 | 69 | // Generate loaders for standalone style files (outside of .vue) 70 | exports.styleLoaders = function (options) { 71 | const output = [] 72 | const loaders = exports.cssLoaders(options) 73 | 74 | for (const extension in loaders) { 75 | const loader = loaders[extension] 76 | output.push({ 77 | test: new RegExp('\\.' + extension + '$'), 78 | use: loader 79 | }) 80 | } 81 | 82 | return output 83 | } 84 | 85 | exports.createNotifierCallback = () => { 86 | const notifier = require('node-notifier') 87 | 88 | return (severity, errors) => { 89 | if (severity !== 'error') return 90 | 91 | const error = errors[0] 92 | const filename = error.file && error.file.split('!').pop() 93 | 94 | notifier.notify({ 95 | title: packageConfig.name, 96 | message: severity + ': ' + error.name, 97 | subtitle: filename || '', 98 | icon: path.join(__dirname, 'logo.png') 99 | }) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /frontend/build/vue-loader.conf.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const utils = require('./utils') 3 | const config = require('../config') 4 | const isProduction = process.env.NODE_ENV === 'production' 5 | const sourceMapEnabled = isProduction 6 | ? config.build.productionSourceMap 7 | : config.dev.cssSourceMap 8 | 9 | module.exports = { 10 | loaders: utils.cssLoaders({ 11 | sourceMap: sourceMapEnabled, 12 | extract: isProduction 13 | }), 14 | cssSourceMap: sourceMapEnabled, 15 | cacheBusting: config.dev.cacheBusting, 16 | transformToRequire: { 17 | video: ['src', 'poster'], 18 | source: 'src', 19 | img: 'src', 20 | image: 'xlink:href' 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /frontend/build/webpack.base.conf.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const path = require('path') 3 | const utils = require('./utils') 4 | const config = require('../config') 5 | const vueLoaderConfig = require('./vue-loader.conf') 6 | 7 | function resolve (dir) { 8 | return path.join(__dirname, '..', dir) 9 | } 10 | 11 | const createLintingRule = () => ({ 12 | test: /\.(js|vue)$/, 13 | loader: 'eslint-loader', 14 | enforce: 'pre', 15 | include: [resolve('src'), resolve('test')], 16 | options: { 17 | formatter: require('eslint-friendly-formatter'), 18 | emitWarning: !config.dev.showEslintErrorsInOverlay 19 | } 20 | }) 21 | 22 | module.exports = { 23 | context: path.resolve(__dirname, '../'), 24 | entry: { 25 | app: './src/main.js' 26 | }, 27 | output: { 28 | path: config.build.assetsRoot, 29 | filename: '[name].js', 30 | publicPath: process.env.NODE_ENV === 'production' 31 | ? config.build.assetsPublicPath 32 | : config.dev.assetsPublicPath 33 | }, 34 | resolve: { 35 | extensions: ['.js', '.vue', '.json'], 36 | alias: { 37 | 'vue$': 'vue/dist/vue.esm.js', 38 | '@': resolve('src'), 39 | } 40 | }, 41 | module: { 42 | rules: [ 43 | ...(config.dev.useEslint ? [createLintingRule()] : []), 44 | { 45 | test: /\.vue$/, 46 | loader: 'vue-loader', 47 | options: vueLoaderConfig 48 | }, 49 | { 50 | test: /\.js$/, 51 | loader: 'babel-loader', 52 | include: [resolve('src'), resolve('test'), resolve('node_modules/webpack-dev-server/client')] 53 | }, 54 | { 55 | test: /\.(png|jpe?g|gif|svg)(\?.*)?$/, 56 | loader: 'url-loader', 57 | options: { 58 | limit: 10000, 59 | name: utils.assetsPath('img/[name].[hash:7].[ext]') 60 | } 61 | }, 62 | { 63 | test: /\.(mp4|webm|ogg|mp3|wav|flac|aac)(\?.*)?$/, 64 | loader: 'url-loader', 65 | options: { 66 | limit: 10000, 67 | name: utils.assetsPath('media/[name].[hash:7].[ext]') 68 | } 69 | }, 70 | { 71 | test: /\.(woff2?|eot|ttf|otf)(\?.*)?$/, 72 | loader: 'url-loader', 73 | options: { 74 | limit: 10000, 75 | name: utils.assetsPath('fonts/[name].[hash:7].[ext]') 76 | } 77 | } 78 | ] 79 | }, 80 | node: { 81 | // prevent webpack from injecting useless setImmediate polyfill because Vue 82 | // source contains it (although only uses it if it's native). 83 | setImmediate: false, 84 | // prevent webpack from injecting mocks to Node native modules 85 | // that does not make sense for the client 86 | dgram: 'empty', 87 | fs: 'empty', 88 | net: 'empty', 89 | tls: 'empty', 90 | child_process: 'empty' 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /frontend/build/webpack.dev.conf.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const utils = require('./utils') 3 | const webpack = require('webpack') 4 | const config = require('../config') 5 | const merge = require('webpack-merge') 6 | const path = require('path') 7 | const baseWebpackConfig = require('./webpack.base.conf') 8 | const CopyWebpackPlugin = require('copy-webpack-plugin') 9 | const HtmlWebpackPlugin = require('html-webpack-plugin') 10 | const FriendlyErrorsPlugin = require('friendly-errors-webpack-plugin') 11 | const portfinder = require('portfinder') 12 | 13 | const HOST = process.env.HOST 14 | const PORT = process.env.PORT && Number(process.env.PORT) 15 | 16 | const devWebpackConfig = merge(baseWebpackConfig, { 17 | module: { 18 | rules: utils.styleLoaders({ sourceMap: config.dev.cssSourceMap, usePostCSS: true }) 19 | }, 20 | // cheap-module-eval-source-map is faster for development 21 | devtool: config.dev.devtool, 22 | 23 | // these devServer options should be customized in /config/index.js 24 | devServer: { 25 | clientLogLevel: 'warning', 26 | historyApiFallback: { 27 | rewrites: [ 28 | { from: /.*/, to: path.posix.join(config.dev.assetsPublicPath, 'index.html') }, 29 | ], 30 | }, 31 | hot: true, 32 | contentBase: false, // since we use CopyWebpackPlugin. 33 | compress: true, 34 | host: HOST || config.dev.host, 35 | port: PORT || config.dev.port, 36 | open: config.dev.autoOpenBrowser, 37 | overlay: config.dev.errorOverlay 38 | ? { warnings: false, errors: true } 39 | : false, 40 | publicPath: config.dev.assetsPublicPath, 41 | proxy: config.dev.proxyTable, 42 | quiet: true, // necessary for FriendlyErrorsPlugin 43 | watchOptions: { 44 | poll: config.dev.poll, 45 | } 46 | }, 47 | plugins: [ 48 | new webpack.DefinePlugin({ 49 | 'process.env': require('../config/dev.env') 50 | }), 51 | new webpack.HotModuleReplacementPlugin(), 52 | new webpack.NamedModulesPlugin(), // HMR shows correct file names in console on update. 53 | new webpack.NoEmitOnErrorsPlugin(), 54 | // https://github.com/ampedandwired/html-webpack-plugin 55 | new HtmlWebpackPlugin({ 56 | filename: 'index.html', 57 | template: 'index.html', 58 | inject: true 59 | }), 60 | // copy custom static assets 61 | new CopyWebpackPlugin([ 62 | { 63 | from: path.resolve(__dirname, '../static'), 64 | to: config.dev.assetsSubDirectory, 65 | ignore: ['.*'] 66 | } 67 | ]) 68 | ] 69 | }) 70 | 71 | module.exports = new Promise((resolve, reject) => { 72 | portfinder.basePort = process.env.PORT || config.dev.port 73 | portfinder.getPort((err, port) => { 74 | if (err) { 75 | reject(err) 76 | } else { 77 | // publish the new Port, necessary for e2e tests 78 | process.env.PORT = port 79 | // add port to devServer config 80 | devWebpackConfig.devServer.port = port 81 | 82 | // Add FriendlyErrorsPlugin 83 | devWebpackConfig.plugins.push(new FriendlyErrorsPlugin({ 84 | compilationSuccessInfo: { 85 | messages: [`Your application is running here: http://${devWebpackConfig.devServer.host}:${port}`], 86 | }, 87 | onErrors: config.dev.notifyOnErrors 88 | ? utils.createNotifierCallback() 89 | : undefined 90 | })) 91 | 92 | resolve(devWebpackConfig) 93 | } 94 | }) 95 | }) 96 | -------------------------------------------------------------------------------- /frontend/build/webpack.prod.conf.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const path = require('path') 3 | const utils = require('./utils') 4 | const webpack = require('webpack') 5 | const config = require('../config') 6 | const merge = require('webpack-merge') 7 | const baseWebpackConfig = require('./webpack.base.conf') 8 | const CopyWebpackPlugin = require('copy-webpack-plugin') 9 | const HtmlWebpackPlugin = require('html-webpack-plugin') 10 | const ExtractTextPlugin = require('extract-text-webpack-plugin') 11 | const OptimizeCSSPlugin = require('optimize-css-assets-webpack-plugin') 12 | const UglifyJsPlugin = require('uglifyjs-webpack-plugin') 13 | 14 | const env = process.env.NODE_ENV === 'testing' 15 | ? require('../config/test.env') 16 | : require('../config/prod.env') 17 | 18 | const webpackConfig = merge(baseWebpackConfig, { 19 | module: { 20 | rules: utils.styleLoaders({ 21 | sourceMap: config.build.productionSourceMap, 22 | extract: true, 23 | usePostCSS: true 24 | }) 25 | }, 26 | devtool: config.build.productionSourceMap ? config.build.devtool : false, 27 | output: { 28 | path: config.build.assetsRoot, 29 | filename: utils.assetsPath('js/[name].[chunkhash].js'), 30 | chunkFilename: utils.assetsPath('js/[id].[chunkhash].js') 31 | }, 32 | plugins: [ 33 | // http://vuejs.github.io/vue-loader/en/workflow/production.html 34 | new webpack.DefinePlugin({ 35 | 'process.env': env 36 | }), 37 | new UglifyJsPlugin({ 38 | uglifyOptions: { 39 | compress: { 40 | warnings: false 41 | } 42 | }, 43 | sourceMap: config.build.productionSourceMap, 44 | parallel: true 45 | }), 46 | // extract css into its own file 47 | new ExtractTextPlugin({ 48 | filename: utils.assetsPath('css/[name].[contenthash].css'), 49 | // Setting the following option to `false` will not extract CSS from codesplit chunks. 50 | // Their CSS will instead be inserted dynamically with style-loader when the codesplit chunk has been loaded by webpack. 51 | // It's currently set to `true` because we are seeing that sourcemaps are included in the codesplit bundle as well when it's `false`, 52 | // increasing file size: https://github.com/vuejs-templates/webpack/issues/1110 53 | allChunks: true, 54 | }), 55 | // Compress extracted CSS. We are using this plugin so that possible 56 | // duplicated CSS from different components can be deduped. 57 | new OptimizeCSSPlugin({ 58 | cssProcessorOptions: config.build.productionSourceMap 59 | ? { safe: true, map: { inline: false } } 60 | : { safe: true } 61 | }), 62 | // generate dist index.html with correct asset hash for caching. 63 | // you can customize output by editing /index.html 64 | // see https://github.com/ampedandwired/html-webpack-plugin 65 | new HtmlWebpackPlugin({ 66 | filename: process.env.NODE_ENV === 'testing' 67 | ? 'index.html' 68 | : config.build.index, 69 | template: 'index.html', 70 | inject: true, 71 | minify: { 72 | removeComments: true, 73 | collapseWhitespace: true, 74 | removeAttributeQuotes: true 75 | // more options: 76 | // https://github.com/kangax/html-minifier#options-quick-reference 77 | }, 78 | // necessary to consistently work with multiple chunks via CommonsChunkPlugin 79 | chunksSortMode: 'dependency' 80 | }), 81 | // keep module.id stable when vendor modules does not change 82 | new webpack.HashedModuleIdsPlugin(), 83 | // enable scope hoisting 84 | new webpack.optimize.ModuleConcatenationPlugin(), 85 | // split vendor js into its own file 86 | new webpack.optimize.CommonsChunkPlugin({ 87 | name: 'vendor', 88 | minChunks (module) { 89 | // any required modules inside node_modules are extracted to vendor 90 | return ( 91 | module.resource && 92 | /\.js$/.test(module.resource) && 93 | module.resource.indexOf( 94 | path.join(__dirname, '../node_modules') 95 | ) === 0 96 | ) 97 | } 98 | }), 99 | // extract webpack runtime and module manifest to its own file in order to 100 | // prevent vendor hash from being updated whenever app bundle is updated 101 | new webpack.optimize.CommonsChunkPlugin({ 102 | name: 'manifest', 103 | minChunks: Infinity 104 | }), 105 | // This instance extracts shared chunks from code splitted chunks and bundles them 106 | // in a separate chunk, similar to the vendor chunk 107 | // see: https://webpack.js.org/plugins/commons-chunk-plugin/#extra-async-commons-chunk 108 | new webpack.optimize.CommonsChunkPlugin({ 109 | name: 'app', 110 | async: 'vendor-async', 111 | children: true, 112 | minChunks: 3 113 | }), 114 | 115 | // copy custom static assets 116 | new CopyWebpackPlugin([ 117 | { 118 | from: path.resolve(__dirname, '../static'), 119 | to: config.build.assetsSubDirectory, 120 | ignore: ['.*'] 121 | } 122 | ]) 123 | ] 124 | }) 125 | 126 | if (config.build.productionGzip) { 127 | const CompressionWebpackPlugin = require('compression-webpack-plugin') 128 | 129 | webpackConfig.plugins.push( 130 | new CompressionWebpackPlugin({ 131 | asset: '[path].gz[query]', 132 | algorithm: 'gzip', 133 | test: new RegExp( 134 | '\\.(' + 135 | config.build.productionGzipExtensions.join('|') + 136 | ')$' 137 | ), 138 | threshold: 10240, 139 | minRatio: 0.8 140 | }) 141 | ) 142 | } 143 | 144 | if (config.build.bundleAnalyzerReport) { 145 | const BundleAnalyzerPlugin = require('webpack-bundle-analyzer').BundleAnalyzerPlugin 146 | webpackConfig.plugins.push(new BundleAnalyzerPlugin()) 147 | } 148 | 149 | module.exports = webpackConfig 150 | -------------------------------------------------------------------------------- /frontend/config/dev.env.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const merge = require('webpack-merge') 3 | const prodEnv = require('./prod.env') 4 | 5 | module.exports = merge(prodEnv, { 6 | NODE_ENV: '"development"' 7 | }) 8 | -------------------------------------------------------------------------------- /frontend/config/index.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | // Template version: 1.3.1 3 | // see http://vuejs-templates.github.io/webpack for documentation. 4 | 5 | const path = require('path') 6 | 7 | module.exports = { 8 | dev: { 9 | 10 | // Paths 11 | assetsSubDirectory: 'static', 12 | assetsPublicPath: '/', 13 | proxyTable: {}, 14 | 15 | // Various Dev Server settings 16 | host: 'localhost', // can be overwritten by process.env.HOST 17 | port: 8080, // can be overwritten by process.env.PORT, if port is in use, a free one will be determined 18 | autoOpenBrowser: false, 19 | errorOverlay: true, 20 | notifyOnErrors: true, 21 | poll: false, // https://webpack.js.org/configuration/dev-server/#devserver-watchoptions- 22 | 23 | // Use Eslint Loader? 24 | // If true, your code will be linted during bundling and 25 | // linting errors and warnings will be shown in the console. 26 | // useEslint: true, 27 | useEslint: false, 28 | // If true, eslint errors and warnings will also be shown in the error overlay 29 | // in the browser. 30 | showEslintErrorsInOverlay: false, 31 | 32 | /** 33 | * Source Maps 34 | */ 35 | 36 | // https://webpack.js.org/configuration/devtool/#development 37 | devtool: 'cheap-module-eval-source-map', 38 | 39 | // If you have problems debugging vue-files in devtools, 40 | // set this to false - it *may* help 41 | // https://vue-loader.vuejs.org/en/options.html#cachebusting 42 | cacheBusting: true, 43 | 44 | cssSourceMap: true 45 | }, 46 | 47 | build: { 48 | // Template for index.html 49 | index: path.resolve(__dirname, '../dist/index.html'), 50 | 51 | // Paths 52 | assetsRoot: path.resolve(__dirname, '../dist'), 53 | assetsSubDirectory: 'static', 54 | assetsPublicPath: '/', 55 | 56 | /** 57 | * Source Maps 58 | */ 59 | 60 | productionSourceMap: true, 61 | // https://webpack.js.org/configuration/devtool/#production 62 | devtool: '#source-map', 63 | 64 | // Gzip off by default as many popular static hosts such as 65 | // Surge or Netlify already gzip all static assets for you. 66 | // Before setting to `true`, make sure to: 67 | // npm install --save-dev compression-webpack-plugin 68 | productionGzip: false, 69 | productionGzipExtensions: ['js', 'css'], 70 | 71 | // Run the build command with an extra argument to 72 | // View the bundle analyzer report after build finishes: 73 | // `npm run build --report` 74 | // Set to `true` or `false` to always turn it on or off 75 | bundleAnalyzerReport: process.env.npm_config_report 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /frontend/config/prod.env.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | module.exports = { 3 | NODE_ENV: '"production"' 4 | } 5 | -------------------------------------------------------------------------------- /frontend/config/test.env.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | const merge = require('webpack-merge') 3 | const devEnv = require('./dev.env') 4 | 5 | module.exports = merge(devEnv, { 6 | NODE_ENV: '"testing"' 7 | }) 8 | -------------------------------------------------------------------------------- /frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | frontend 7 | 8 | 9 |
10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "frontend", 3 | "version": "1.0.0", 4 | "description": "cnki spider", 5 | "author": "coder-syl", 6 | "private": true, 7 | "scripts": { 8 | "dev": "webpack-dev-server --inline --progress --config build/webpack.dev.conf.js", 9 | "start": "npm run dev", 10 | "unit": "jest --config test/unit/jest.conf.js --coverage", 11 | "e2e": "node test/e2e/runner.js", 12 | "test": "npm run unit && npm run e2e", 13 | "lint": "eslint --ext .js,.vue src test/unit test/e2e/specs", 14 | "build": "node build/build.js" 15 | }, 16 | "dependencies": { 17 | "element-ui": "^2.13.0", 18 | "vue": "^2.5.2", 19 | "vue-router": "^3.0.1" 20 | }, 21 | "devDependencies": { 22 | "autoprefixer": "^7.1.2", 23 | "babel-core": "^6.22.1", 24 | "babel-eslint": "^8.2.1", 25 | "babel-helper-vue-jsx-merge-props": "^2.0.3", 26 | "babel-jest": "^21.0.2", 27 | "babel-loader": "^7.1.1", 28 | "babel-plugin-dynamic-import-node": "^1.2.0", 29 | "babel-plugin-syntax-jsx": "^6.18.0", 30 | "babel-plugin-transform-es2015-modules-commonjs": "^6.26.0", 31 | "babel-plugin-transform-runtime": "^6.22.0", 32 | "babel-plugin-transform-vue-jsx": "^3.5.0", 33 | "babel-preset-env": "^1.3.2", 34 | "babel-preset-stage-2": "^6.22.0", 35 | "babel-register": "^6.22.0", 36 | "chalk": "^2.0.1", 37 | "chromedriver": "^2.27.2", 38 | "copy-webpack-plugin": "^4.0.1", 39 | "cross-spawn": "^5.0.1", 40 | "css-loader": "^0.28.0", 41 | "eslint": "^4.15.0", 42 | "eslint-config-standard": "^10.2.1", 43 | "eslint-friendly-formatter": "^3.0.0", 44 | "eslint-loader": "^1.7.1", 45 | "eslint-plugin-import": "^2.7.0", 46 | "eslint-plugin-node": "^5.2.0", 47 | "eslint-plugin-promise": "^3.4.0", 48 | "eslint-plugin-standard": "^3.0.1", 49 | "eslint-plugin-vue": "^4.0.0", 50 | "extract-text-webpack-plugin": "^3.0.0", 51 | "file-loader": "^1.1.4", 52 | "friendly-errors-webpack-plugin": "^1.6.1", 53 | "html-webpack-plugin": "^2.30.1", 54 | "jest": "^22.0.4", 55 | "jest-serializer-vue": "^0.3.0", 56 | "nightwatch": "^0.9.12", 57 | "node-notifier": "^5.1.2", 58 | "optimize-css-assets-webpack-plugin": "^3.2.0", 59 | "ora": "^1.2.0", 60 | "portfinder": "^1.0.13", 61 | "postcss-import": "^11.0.0", 62 | "postcss-loader": "^2.0.8", 63 | "postcss-url": "^7.2.1", 64 | "rimraf": "^2.6.0", 65 | "selenium-server": "^3.0.1", 66 | "semver": "^5.3.0", 67 | "shelljs": "^0.7.6", 68 | "uglifyjs-webpack-plugin": "^1.1.1", 69 | "url-loader": "^0.5.8", 70 | "vue-jest": "^1.0.2", 71 | "vue-loader": "^13.3.0", 72 | "vue-style-loader": "^3.0.1", 73 | "vue-template-compiler": "^2.5.2", 74 | "webpack": "^3.6.0", 75 | "webpack-bundle-analyzer": "^3.5.2", 76 | "webpack-dev-server": "^2.9.1", 77 | "webpack-merge": "^4.1.0" 78 | }, 79 | "engines": { 80 | "node": ">= 6.0.0", 81 | "npm": ">= 3.0.0" 82 | }, 83 | "browserslist": [ 84 | "> 1%", 85 | "last 2 versions", 86 | "not ie <= 8" 87 | ] 88 | } 89 | -------------------------------------------------------------------------------- /frontend/src/App.vue: -------------------------------------------------------------------------------- 1 | 14 | 25 | 75 | -------------------------------------------------------------------------------- /frontend/src/assets/img/analyse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/analyse.png -------------------------------------------------------------------------------- /frontend/src/assets/img/btos.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/btos.png -------------------------------------------------------------------------------- /frontend/src/assets/img/chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/chart.png -------------------------------------------------------------------------------- /frontend/src/assets/img/check.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/check.png -------------------------------------------------------------------------------- /frontend/src/assets/img/cnki.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/cnki.png -------------------------------------------------------------------------------- /frontend/src/assets/img/footer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/footer.png -------------------------------------------------------------------------------- /frontend/src/assets/img/hot/icon-item001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item001.png -------------------------------------------------------------------------------- /frontend/src/assets/img/hot/icon-item002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item002.png -------------------------------------------------------------------------------- /frontend/src/assets/img/hot/icon-item003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item003.png -------------------------------------------------------------------------------- /frontend/src/assets/img/hot/icon-item004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item004.png -------------------------------------------------------------------------------- /frontend/src/assets/img/hot/icon-item005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item005.png -------------------------------------------------------------------------------- /frontend/src/assets/img/hot/icon-item006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item006.png -------------------------------------------------------------------------------- /frontend/src/assets/img/hotAnalyBg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hotAnalyBg.jpg -------------------------------------------------------------------------------- /frontend/src/assets/img/icon1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/icon1.png -------------------------------------------------------------------------------- /frontend/src/assets/img/icon2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/icon2.png -------------------------------------------------------------------------------- /frontend/src/assets/img/icon3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/icon3.png -------------------------------------------------------------------------------- /frontend/src/assets/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/logo.png -------------------------------------------------------------------------------- /frontend/src/assets/img/menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/menu.png -------------------------------------------------------------------------------- /frontend/src/assets/img/online/left.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/online/left.png -------------------------------------------------------------------------------- /frontend/src/assets/img/online/right.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/online/right.png -------------------------------------------------------------------------------- /frontend/src/assets/img/sliderBg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/sliderBg.png -------------------------------------------------------------------------------- /frontend/src/assets/img/under.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/under.png -------------------------------------------------------------------------------- /frontend/src/components/cnki-footer.vue: -------------------------------------------------------------------------------- 1 | 51 | 52 | 62 | 63 | 64 | 183 | -------------------------------------------------------------------------------- /frontend/src/components/cnki-header.vue: -------------------------------------------------------------------------------- 1 | 28 | 29 | 70 | 71 | 72 | 188 | -------------------------------------------------------------------------------- /frontend/src/components/index/index-data.vue: -------------------------------------------------------------------------------- 1 | 15 | > 42 | 82 | -------------------------------------------------------------------------------- /frontend/src/components/index/index-feature.vue: -------------------------------------------------------------------------------- 1 | 44 | > 49 | 104 | -------------------------------------------------------------------------------- /frontend/src/components/index/index-service.vue: -------------------------------------------------------------------------------- 1 | 18 | > 45 | 77 | -------------------------------------------------------------------------------- /frontend/src/components/index/index-slider.vue: -------------------------------------------------------------------------------- 1 | 21 | > 26 | 101 | -------------------------------------------------------------------------------- /frontend/src/layouts/chartAnalyse.vue: -------------------------------------------------------------------------------- 1 | 7 | 8 | 18 | 19 | 20 | 37 | -------------------------------------------------------------------------------- /frontend/src/layouts/hotAnalyse.vue: -------------------------------------------------------------------------------- 1 | 20 | 21 | 60 | 61 | 124 | -------------------------------------------------------------------------------- /frontend/src/layouts/index.vue: -------------------------------------------------------------------------------- 1 | 9 | 10 | 30 | 31 | 32 | 34 | -------------------------------------------------------------------------------- /frontend/src/layouts/onlineSpider.vue: -------------------------------------------------------------------------------- 1 | 66 | 67 | 147 | 148 | 149 | 288 | -------------------------------------------------------------------------------- /frontend/src/layouts/paperDetail.vue: -------------------------------------------------------------------------------- 1 | 7 | 8 | 18 | 19 | 20 | 37 | -------------------------------------------------------------------------------- /frontend/src/main.js: -------------------------------------------------------------------------------- 1 | // The Vue build version to load with the `import` command 2 | // (runtime-only or standalone) has been set in webpack.base.conf with an alias. 3 | import Vue from 'vue' 4 | import App from './App' 5 | import router from './router' 6 | import ElementUI from 'element-ui' 7 | import 'element-ui/lib/theme-chalk/index.css' 8 | 9 | Vue.use(ElementUI) 10 | Vue.config.productionTip = false 11 | 12 | /* eslint-disable no-new */ 13 | new Vue({ 14 | el: '#app', 15 | router, 16 | components: { App }, 17 | template: '' 18 | }) 19 | -------------------------------------------------------------------------------- /frontend/src/router/index.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import Router from 'vue-router' 3 | import index from '@/layouts/index' 4 | import hotAnalyse from '@/layouts/hotAnalyse' 5 | import onlineSpider from '@/layouts/onlineSpider' 6 | import chartAnalyse from '@/layouts/chartAnalyse' 7 | import paperDetail from '@/layouts/paperDetail' 8 | 9 | Vue.use(Router) 10 | 11 | export default new Router({ 12 | routes: [ 13 | { 14 | path: '/', 15 | name: 'index', 16 | component: index 17 | }, 18 | { 19 | path: '/hotAnalyse', 20 | name: 'hotAnalyse', 21 | component: hotAnalyse 22 | }, 23 | { 24 | path: '/onlineSpider', 25 | name: 'onlineSpider', 26 | component: onlineSpider 27 | }, 28 | { 29 | path: '/chartAnalyse', 30 | name: 'chartAnalyse', 31 | component: chartAnalyse 32 | }, 33 | { 34 | path: '/paperDetail', 35 | name: 'paperDetail', 36 | component: paperDetail 37 | } 38 | ] 39 | }) 40 | -------------------------------------------------------------------------------- /frontend/static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/static/.gitkeep -------------------------------------------------------------------------------- /frontend/test/e2e/custom-assertions/elementCount.js: -------------------------------------------------------------------------------- 1 | // A custom Nightwatch assertion. 2 | // The assertion name is the filename. 3 | // Example usage: 4 | // 5 | // browser.assert.elementCount(selector, count) 6 | // 7 | // For more information on custom assertions see: 8 | // http://nightwatchjs.org/guide#writing-custom-assertions 9 | 10 | exports.assertion = function (selector, count) { 11 | this.message = 'Testing if element <' + selector + '> has count: ' + count 12 | this.expected = count 13 | this.pass = function (val) { 14 | return val === this.expected 15 | } 16 | this.value = function (res) { 17 | return res.value 18 | } 19 | this.command = function (cb) { 20 | var self = this 21 | return this.api.execute(function (selector) { 22 | return document.querySelectorAll(selector).length 23 | }, [selector], function (res) { 24 | cb.call(self, res) 25 | }) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /frontend/test/e2e/nightwatch.conf.js: -------------------------------------------------------------------------------- 1 | require('babel-register') 2 | var config = require('../../config') 3 | 4 | // http://nightwatchjs.org/gettingstarted#settings-file 5 | module.exports = { 6 | src_folders: ['test/e2e/specs'], 7 | output_folder: 'test/e2e/reports', 8 | custom_assertions_path: ['test/e2e/custom-assertions'], 9 | 10 | selenium: { 11 | start_process: true, 12 | server_path: require('selenium-server').path, 13 | host: '127.0.0.1', 14 | port: 4444, 15 | cli_args: { 16 | 'webdriver.chrome.driver': require('chromedriver').path 17 | } 18 | }, 19 | 20 | test_settings: { 21 | default: { 22 | selenium_port: 4444, 23 | selenium_host: 'localhost', 24 | silent: true, 25 | globals: { 26 | devServerURL: 'http://localhost:' + (process.env.PORT || config.dev.port) 27 | } 28 | }, 29 | 30 | chrome: { 31 | desiredCapabilities: { 32 | browserName: 'chrome', 33 | javascriptEnabled: true, 34 | acceptSslCerts: true 35 | } 36 | }, 37 | 38 | firefox: { 39 | desiredCapabilities: { 40 | browserName: 'firefox', 41 | javascriptEnabled: true, 42 | acceptSslCerts: true 43 | } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /frontend/test/e2e/runner.js: -------------------------------------------------------------------------------- 1 | // 1. start the dev server using production config 2 | process.env.NODE_ENV = 'testing' 3 | 4 | const webpack = require('webpack') 5 | const DevServer = require('webpack-dev-server') 6 | 7 | const webpackConfig = require('../../build/webpack.prod.conf') 8 | const devConfigPromise = require('../../build/webpack.dev.conf') 9 | 10 | let server 11 | 12 | devConfigPromise.then(devConfig => { 13 | const devServerOptions = devConfig.devServer 14 | const compiler = webpack(webpackConfig) 15 | server = new DevServer(compiler, devServerOptions) 16 | const port = devServerOptions.port 17 | const host = devServerOptions.host 18 | return server.listen(port, host) 19 | }) 20 | .then(() => { 21 | // 2. run the nightwatch test suite against it 22 | // to run in additional browsers: 23 | // 1. add an entry in test/e2e/nightwatch.conf.js under "test_settings" 24 | // 2. add it to the --env flag below 25 | // or override the environment flag, for example: `npm run e2e -- --env chrome,firefox` 26 | // For more information on Nightwatch's config file, see 27 | // http://nightwatchjs.org/guide#settings-file 28 | let opts = process.argv.slice(2) 29 | if (opts.indexOf('--config') === -1) { 30 | opts = opts.concat(['--config', 'test/e2e/nightwatch.conf.js']) 31 | } 32 | if (opts.indexOf('--env') === -1) { 33 | opts = opts.concat(['--env', 'chrome']) 34 | } 35 | 36 | const spawn = require('cross-spawn') 37 | const runner = spawn('./node_modules/.bin/nightwatch', opts, { stdio: 'inherit' }) 38 | 39 | runner.on('exit', function (code) { 40 | server.close() 41 | process.exit(code) 42 | }) 43 | 44 | runner.on('error', function (err) { 45 | server.close() 46 | throw err 47 | }) 48 | }) 49 | -------------------------------------------------------------------------------- /frontend/test/e2e/specs/test.js: -------------------------------------------------------------------------------- 1 | // For authoring Nightwatch tests, see 2 | // http://nightwatchjs.org/guide#usage 3 | 4 | module.exports = { 5 | 'default e2e tests': function (browser) { 6 | // automatically uses dev Server port from /config.index.js 7 | // default: http://localhost:8080 8 | // see nightwatch.conf.js 9 | const devServer = browser.globals.devServerURL 10 | 11 | browser 12 | .url(devServer) 13 | .waitForElementVisible('#app', 5000) 14 | .assert.elementPresent('.hello') 15 | .assert.containsText('h1', 'Welcome to Your Vue.js App') 16 | .assert.elementCount('img', 1) 17 | .end() 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /frontend/test/unit/.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "jest": true 4 | }, 5 | "globals": { 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /frontend/test/unit/jest.conf.js: -------------------------------------------------------------------------------- 1 | const path = require('path') 2 | 3 | module.exports = { 4 | rootDir: path.resolve(__dirname, '../../'), 5 | moduleFileExtensions: [ 6 | 'js', 7 | 'json', 8 | 'vue' 9 | ], 10 | moduleNameMapper: { 11 | '^@/(.*)$': '/src/$1' 12 | }, 13 | transform: { 14 | '^.+\\.js$': '/node_modules/babel-jest', 15 | '.*\\.(vue)$': '/node_modules/vue-jest' 16 | }, 17 | testPathIgnorePatterns: [ 18 | '/test/e2e' 19 | ], 20 | snapshotSerializers: ['/node_modules/jest-serializer-vue'], 21 | setupFiles: ['/test/unit/setup'], 22 | mapCoverage: true, 23 | coverageDirectory: '/test/unit/coverage', 24 | collectCoverageFrom: [ 25 | 'src/**/*.{js,vue}', 26 | '!src/main.js', 27 | '!src/router/index.js', 28 | '!**/node_modules/**' 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /frontend/test/unit/setup.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | 3 | Vue.config.productionTip = false 4 | -------------------------------------------------------------------------------- /frontend/test/unit/specs/HelloWorld.spec.js: -------------------------------------------------------------------------------- 1 | import Vue from 'vue' 2 | import HelloWorld from '@/components/HelloWorld' 3 | 4 | describe('HelloWorld.vue', () => { 5 | it('should render correct contents', () => { 6 | const Constructor = Vue.extend(HelloWorld) 7 | const vm = new Constructor().$mount() 8 | expect(vm.$el.querySelector('.hello h1').textContent) 9 | .toEqual('Welcome to Your Vue.js App') 10 | }) 11 | }) 12 | -------------------------------------------------------------------------------- /manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | 5 | if __name__ == "__main__": 6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cnki.settings") 7 | try: 8 | from django.core.management import execute_from_command_line 9 | except ImportError as exc: 10 | raise ImportError( 11 | "Couldn't import Django. Are you sure it's installed and " 12 | "available on your PYTHONPATH environment variable? Did you " 13 | "forget to activate a virtual environment?" 14 | ) from exc 15 | execute_from_command_line(sys.argv) 16 | -------------------------------------------------------------------------------- /spider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__init__.py -------------------------------------------------------------------------------- /spider/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/db_handle.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/db_handle.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/get_authors.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_authors.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/get_cookies.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_cookies.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/get_fund.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_fund.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/get_keyWordID.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_keyWordID.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/get_school.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_school.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/get_year.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_year.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/mian.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/mian.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/paper_detail.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/paper_detail.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/paper_spider.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/paper_spider.cpython-36.pyc -------------------------------------------------------------------------------- /spider/__pycache__/paper_spider_by_app.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/paper_spider_by_app.cpython-36.pyc -------------------------------------------------------------------------------- /spider/config.py: -------------------------------------------------------------------------------- 1 | db_config={ 2 | 'host':'127.0.0.1', 3 | # 'host': '/Applications/MAMP/tmp/mysql/mysql.sock', # 主机 4 | 'port':3306, 5 | 'user':'root', 6 | 'password':'root', 7 | 'database':'cnki', 8 | # 'charset':'UTF8', 9 | } 10 | # paper_title=str('123') 11 | # url="http://localhost:8000/cnki/paperDetail?title=" 12 | # local_url = url+paper_title 13 | # print(local_url) -------------------------------------------------------------------------------- /spider/db_handle.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import pymysql 4 | from .config import db_config as df 5 | # from config import db_config as df 6 | 7 | class dbHandle(): 8 | 9 | def __init__(self,): 10 | try: 11 | self.conn = pymysql.connect(host=df['host'], user=df['user'], password=df['password'],database=df['database'],unix_socket="/Applications/MAMP/tmp/mysql/mysql.sock",charset='utf8') 12 | except: 13 | print("连接数据库失败") 14 | self.cur = self.conn.cursor() 15 | 16 | def dbClose(self): 17 | if self.conn and self.cur: 18 | self.cur.close() 19 | self.conn.close() 20 | 21 | def dbQuery(self,sql): 22 | self.cur.execute(sql) 23 | data = self.cur.fetchall() 24 | return data 25 | def dbInsert(self, sql): 26 | print('开始插入'); 27 | try: 28 | self.cur.execute(sql) 29 | print("插入成功!!!") 30 | self.conn.commit() 31 | 32 | except Exception as e: 33 | print(e) 34 | print('插入失败!!!') 35 | 36 | def dbUpdate(self, sql): 37 | try: 38 | self.cur.execute(sql) 39 | print("更新状态成功!!!") 40 | self.conn.commit() 41 | 42 | except Exception as e: 43 | print(e) 44 | print('更新状态失败!!!') -------------------------------------------------------------------------------- /spider/get_authors.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | 4 | 5 | from .db_handle import dbHandle 6 | # from db_handle import dbHandle 7 | def getAuthors(driver, keywordID): 8 | print('点击作者链接') 9 | print(driver.find_element_by_link_text('作者').text) 10 | driver.find_element_by_link_text('作者').click() 11 | time.sleep(5) 12 | li_div = driver.find_element_by_class_name('hide') 13 | ul = li_div.find_element_by_tag_name('ul') 14 | lis = ul.find_elements_by_tag_name('li') 15 | print(lis) 16 | for li in lis: 17 | print('作者', str(li.text).replace('\n', '')) 18 | author = str(li.text).split('(')[0].replace('\n', '') 19 | number = str(li.text).split('(')[1].replace('\n', '').replace(')', '') 20 | print(author) 21 | print(number) 22 | dbhandle = dbHandle() 23 | # 插入年的信息 24 | in_author_sql = "INSERT INTO analyse_author ( author ) values('%s')" % (author) 25 | dbhandle.dbInsert(in_author_sql) 26 | 27 | query_authorID_sql = "select id from analyse_author where author='%s' " % (author) 28 | print(query_authorID_sql) 29 | author_id = dbhandle.dbQuery(query_authorID_sql)[0][0] 30 | print(author, 'author_id', author_id) 31 | 32 | in_author_to_keyword = "INSERT INTO analyse_authortokeyword(author_id_id,keyword_id_id,counts)" \ 33 | "values('%d','%d','%d')" \ 34 | % (author_id, keywordID, int(number)) 35 | dbhandle.dbInsert(in_author_to_keyword) 36 | time.sleep(5) 37 | -------------------------------------------------------------------------------- /spider/get_cookies.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | from .get_year import getYear 4 | from .get_authors import getAuthors 5 | from .get_school import getSchools 6 | from .get_fund import getFunds 7 | from .get_keyWordID import getKeywordID,insertKeywordID 8 | # 9 | # from get_year import getYear 10 | # from get_authors import getAuthors 11 | # from get_school import getSchools 12 | # from get_fund import getFunds 13 | # from get_keyWordID import getKeywordID,insertKeywordID 14 | 15 | 16 | def getCookies(keyWord): 17 | driver = webdriver.Chrome('/Applications/chromedriver') 18 | print('正在打开知网') 19 | driver.get('http://www.cnki.net/') 20 | # print(driver.title) 21 | print('正在获取cookies') 22 | search_text = driver.find_element_by_id('txt_SearchText') 23 | search_text.send_keys(keyWord) 24 | driver.find_element_by_class_name('input-box').find_element_by_class_name('search-btn').click() 25 | driver.refresh() 26 | time.sleep(5) 27 | insertKeywordID(keyWord) 28 | keyWord_id = getKeywordID(keyWord) 29 | getYear(driver, keyWord_id) 30 | 31 | # 因为只有上一个点击了下一个标签才会被加载 32 | print('点击研究层次') 33 | print(driver.find_element_by_link_text('研究层次').text) 34 | driver.find_element_by_link_text('研究层次').click() 35 | time.sleep(5) 36 | 37 | getAuthors(driver, keyWord_id) 38 | getSchools(driver, keyWord_id) 39 | getFunds(driver, keyWord_id) 40 | 41 | cookies = {} 42 | for cookie in driver.get_cookies(): 43 | cookies[cookie['name']] = cookie['value'] 44 | driver.switch_to.frame('iframeResult') 45 | total_num = int( 46 | str(driver.find_element_by_class_name('pagerTitleCell').text).replace(' 找到 ', '').replace(' 条结果', '').replace( 47 | ',', '')) 48 | # driver.find_element_by_link_text('50').click() 49 | print(cookies) 50 | time.sleep(5) 51 | return cookies, total_num 52 | 53 | 54 | # getCookies('大数据') 55 | -------------------------------------------------------------------------------- /spider/get_fund.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | 4 | from .db_handle import dbHandle 5 | # from db_handle import dbHandle 6 | 7 | def getFunds(driver, keywordID): 8 | print('点击基金链接') 9 | print(driver.find_element_by_link_text('基金').text) 10 | driver.find_element_by_link_text('基金').click() 11 | time.sleep(5) 12 | li_div = driver.find_element_by_class_name('hide') 13 | ul = li_div.find_element_by_tag_name('ul') 14 | lis = ul.find_elements_by_tag_name('li') 15 | print(lis) 16 | for li in lis: 17 | print('基金', str(li.text).replace('\n', '')) 18 | fund = str(li.text).split('(')[0].replace('\n', '') 19 | if(fund=='国家高技术研究发展计划'): 20 | fund=fund+'(863计划)' 21 | if(fund=='国家重点基础研究发展计划'): 22 | fund = fund + '(973计划)' 23 | if(fund=='江苏省教育厅人文社会科学研究基...'): 24 | fund='江苏省教育厅人文社会科学研究基金' 25 | number = str(li.text).replace('(97...', '').replace('(863...', '') 26 | number = number.split('(')[1].replace('\n', '').replace(')', '') 27 | 28 | print(fund) 29 | print(number) 30 | 31 | dbhandle = dbHandle() 32 | # 插入年的信息 33 | in_fund_sql = "INSERT INTO analyse_fund ( fund ) values('%s')" % (fund) 34 | dbhandle.dbInsert(in_fund_sql) 35 | 36 | query_fundID_sql = "select id from analyse_fund where fund='%s' " % (fund) 37 | print(query_fundID_sql) 38 | fund_id = dbhandle.dbQuery(query_fundID_sql)[0][0] 39 | print(fund, 'fund_id', fund_id) 40 | 41 | in_fund_to_keyword = "INSERT INTO analyse_fundtokeyword(fund_id_id,keyword_id_id,counts)" \ 42 | "values('%d','%d','%d')" \ 43 | % (fund_id, keywordID, int(number)) 44 | dbhandle.dbInsert(in_fund_to_keyword) 45 | time.sleep(5) 46 | -------------------------------------------------------------------------------- /spider/get_keyWordID.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | 4 | from .db_handle import dbHandle 5 | # from db_handle import dbHandle 6 | 7 | def insertKeywordID(keyword): 8 | dbhandle = dbHandle() 9 | # 插入关键词的信息 10 | in_keyword_sql = "INSERT INTO analyse_keyword ( keyword ) values('%s')" % (keyword) 11 | dbhandle.dbInsert(in_keyword_sql) 12 | 13 | query_KeywordCount_sql = "select counts from analyse_keyword where keyword='%s' " % (keyword) 14 | print(query_KeywordCount_sql) 15 | KeywordCount = dbhandle.dbQuery(query_KeywordCount_sql)[0][0] 16 | 17 | in_keyword_sql = "UPDATE analyse_keyword SET counts='%d' where keyword='%s'" % (int(KeywordCount),keyword) 18 | dbhandle.dbInsert(in_keyword_sql) 19 | 20 | def getKeywordID(keyword): 21 | dbhandle = dbHandle() 22 | query_KeywordID_sql = "select id from analyse_keyword where keyword='%s' " % (keyword) 23 | print(query_KeywordID_sql) 24 | KeywordID = dbhandle.dbQuery(query_KeywordID_sql)[0][0] 25 | print(KeywordID, 'KeywordID', KeywordID) 26 | return KeywordID 27 | -------------------------------------------------------------------------------- /spider/get_school.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | 4 | from .db_handle import dbHandle 5 | # from db_handle import dbHandle 6 | def getSchools(driver, keywordID): 7 | print('点击机构链接') 8 | print(driver.find_element_by_link_text('机构').text) 9 | driver.find_element_by_link_text('机构').click() 10 | time.sleep(5) 11 | li_div = driver.find_element_by_class_name('hide') 12 | ul = li_div.find_element_by_tag_name('ul') 13 | lis = ul.find_elements_by_tag_name('li') 14 | print(lis) 15 | for li in lis: 16 | print('机构', str(li.text).replace('\n', '')) 17 | school = str(li.text).split('(')[0].replace('\n', '') 18 | number = str(li.text).split('(')[1].replace('\n', '').replace(')', '') 19 | print(school) 20 | print(number) 21 | dbhandle = dbHandle() 22 | # 插入年的信息 23 | in_school_sql = "INSERT INTO analyse_school ( school ) values('%s')" % (school) 24 | dbhandle.dbInsert(in_school_sql) 25 | 26 | query_schoolID_sql = "select id from analyse_school where school='%s' " % (school) 27 | print(query_schoolID_sql) 28 | school_id = dbhandle.dbQuery(query_schoolID_sql)[0][0] 29 | print(school, 'school_id', school_id) 30 | 31 | in_school_to_keyword = "INSERT INTO analyse_schooltokeyword(school_id_id,keyword_id_id,counts)" \ 32 | "values('%d','%d','%d')" \ 33 | % (school_id, keywordID, int(number)) 34 | dbhandle.dbInsert(in_school_to_keyword) 35 | time.sleep(5) -------------------------------------------------------------------------------- /spider/get_year.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | import time 3 | 4 | from .db_handle import dbHandle 5 | 6 | 7 | # from db_handle import dbHandle 8 | 9 | 10 | def getYear(driver, keywordID): 11 | print('点击发表年度链接') 12 | print(driver.find_element_by_link_text('发表年度').text) 13 | driver.find_element_by_link_text('发表年度').click() 14 | time.sleep(5) 15 | li_div = driver.find_element_by_class_name('hide') 16 | ul = li_div.find_element_by_tag_name('ul') 17 | lis = ul.find_elements_by_tag_name('li') 18 | print(lis) 19 | for li in lis: 20 | print('nian', str(li.text).replace('\n', '')) 21 | year = str(li.text).replace('\n', '')[0:4] 22 | number = str(li.text).replace('\n', '')[5:].replace(')', '') 23 | print(year) 24 | print(number) 25 | 26 | dbhandle = dbHandle() 27 | # 插入年的信息 28 | in_year_sql = "INSERT INTO analyse_year ( year ) values('%s')" % (year) 29 | dbhandle.dbInsert(in_year_sql) 30 | 31 | query_yearID_sql = "select id from analyse_year where year='%s' " % (year) 32 | print(query_yearID_sql) 33 | year_id = dbhandle.dbQuery(query_yearID_sql)[0][0] 34 | print(year, 'year_id', year_id) 35 | #queryCount_sql = "select count(*) from analyse_yeartokeyword where keyword_id_id={}".format(keywordID) 36 | #print(queryCount_sql) 37 | # haveCount =0 #dbhandle.dbQuery(queryCount_sql)[0][0] 38 | # if (haveCount == 0): 39 | # break; 40 | in_year_to_keyword = "INSERT INTO analyse_yeartokeyword(year_id_id,keyword_id_id,counts)" \ 41 | "values('%d','%d','%d')" \ 42 | % (year_id, keywordID, int(number)) 43 | # else: 44 | # print('数据库中已经存在该数据') 45 | dbhandle.dbInsert(in_year_to_keyword) 46 | time.sleep(5) 47 | # return 1; 48 | -------------------------------------------------------------------------------- /spider/mian.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.support.ui import WebDriverWait 3 | from selenium.webdriver.support import expected_conditions as EC 4 | from selenium.webdriver.common.by import By 5 | import time 6 | import json 7 | import csv 8 | from celery import task 9 | 10 | 11 | 12 | 13 | @task 14 | def start_spider(page): 15 | print("爬虫启动") 16 | # 设置谷歌驱动器的环境 17 | options = webdriver.ChromeOptions() 18 | # 设置chrome不加载图片,提高速度 19 | options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2}) 20 | # 创建一个谷歌驱动器 21 | browser = webdriver.Chrome('/Applications/chromedriver', chrome_options=options) 22 | url = 'http://wap.cnki.net/touch/web/guide' 23 | 24 | # 声明一个全局列表,用来存储字典 25 | data_list = [] 26 | # 请求url 27 | browser.get(url) 28 | # 显示等待输入框是否加载完成 29 | WebDriverWait(browser, 1000).until( 30 | EC.presence_of_all_elements_located( 31 | (By.ID, 'keyword') 32 | ) 33 | ) 34 | # 找到输入框的id,并输入python关键字 35 | browser.find_element_by_id('keyword').click() 36 | browser.find_element_by_id('keyword_ordinary').send_keys('python') 37 | # 输入关键字之后点击搜索 38 | browser.find_element_by_class_name('btn-search ').click() 39 | # print(browser.page_source) 40 | # 显示等待文献是否加载完成 41 | WebDriverWait(browser, 1000).until( 42 | EC.presence_of_all_elements_located( 43 | (By.CLASS_NAME, 'g-search-body') 44 | ) 45 | ) 46 | 47 | # 声明一个标记,用来标记翻页几页 48 | count = 1 49 | while True: 50 | # 显示等待加载更多按钮加载完成 51 | WebDriverWait(browser, 1000).until( 52 | EC.presence_of_all_elements_located( 53 | (By.CLASS_NAME, 'c-company__body-item-more') 54 | ) 55 | ) 56 | # 获取加载更多按钮 57 | Btn = browser.find_element_by_class_name('c-company__body-item-more') 58 | # 显示等待该信息加载完成 59 | WebDriverWait(browser, 1000).until( 60 | EC.presence_of_all_elements_located( 61 | (By.XPATH, '//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2*count-1)) 62 | ) 63 | ) 64 | # 获取在div标签的信息,其中format(2*count-1)是因为加载的时候有显示多少条 65 | # 简单的说就是这些div的信息都是奇数 66 | divs = browser.find_elements_by_xpath('//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2*count-1)) 67 | # 遍历循环 68 | for div in divs: 69 | # 获取文献的题目 70 | name = div.find_element_by_class_name('c-company__body-title').text 71 | # 获取文献的作者 72 | author = div.find_element_by_class_name('c-company__body-author').text 73 | # 获取文献的摘要 74 | content = div.find_element_by_class_name('c-company__body-content').text 75 | # 获取文献的来源和日期、文献类型等 76 | text = div.find_element_by_class_name('c-company__body-name').text.split() 77 | if (len(text) == 3 and text[-1] == '优先') or len(text) == 2: 78 | # 来源 79 | source = text[0] 80 | # 日期 81 | datetime = text[1] 82 | # 文献类型 83 | literature_type = None 84 | else: 85 | source = text[0] 86 | datetime = text[2] 87 | literature_type = text[1] 88 | # 获取下载数和被引数 89 | temp = div.find_element_by_class_name('c-company__body-info').text.split() 90 | # 下载数 91 | download = temp[0].split(':')[-1] 92 | # 被引数 93 | cite = temp[1].split(':')[-1] 94 | 95 | # 声明一个字典存储数据 96 | data_dict = {} 97 | data_dict['name'] = name 98 | data_dict['author'] = author 99 | data_dict['content'] = content 100 | data_dict['source'] = source 101 | data_dict['datetime'] = datetime 102 | data_dict['literature_type'] = literature_type 103 | data_dict['download'] = download 104 | data_dict['cite'] = cite 105 | 106 | data_list.append(data_dict) 107 | print(data_dict) 108 | # 如果Btn按钮(就是加载更多这个按钮)没有找到(就是已经到底了),就退出 109 | if not Btn: 110 | break 111 | else: 112 | Btn.click() 113 | # 如果到了爬取的页数就退出 114 | if count == page: 115 | break 116 | 117 | count += 1 118 | 119 | # 延迟两秒,我们不是在攻击服务器 120 | time.sleep(2) 121 | 122 | 123 | # def main(): 124 | # 125 | # start_spider(eval(input('请输入要爬取的页数(如果需要全部爬取请输入0):'))) 126 | # 127 | # # 将数据写入json文件中 128 | # # with open('data_json.json', 'a+', encoding='utf-8') as f: 129 | # # json.dump(data_list, f, ensure_ascii=False, indent=4) 130 | # # print('json文件写入完成') 131 | # # 132 | # # # 将数据写入csv文件 133 | # # with open('data_csv.csv', 'w', encoding='utf-8', newline='') as f: 134 | # # # 表头 135 | # # title = data_list[0].keys() 136 | # # # 声明writer对象 137 | # # writer = csv.DictWriter(f, title) 138 | # # # 写入表头 139 | # # writer.writeheader() 140 | # # # 批量写入数据 141 | # # writer.writerows(data_list) 142 | # # print('csv文件写入完成') 143 | # 144 | # 145 | # if __name__ == '__main__': 146 | # 147 | # main() 148 | 149 | -------------------------------------------------------------------------------- /spider/paper_detail.py: -------------------------------------------------------------------------------- 1 | # coding:utf8 2 | # from selenium import webdriver 3 | import time 4 | from bs4 import BeautifulSoup 5 | import requests 6 | import time 7 | # from celery import task 8 | # import redis 9 | # import pymysql 10 | # 11 | from .get_cookies import getCookies 12 | from .db_handle import dbHandle 13 | 14 | # from get_cookies import getCookies 15 | # from db_handle import dbHandle 16 | # this is a function about need many time 17 | 18 | # @task 19 | # paper_url='http://kns.cnki.net/KCMS/detail/11.2103.TN.20190422.1403.030.html?uid=WEEvREdxOWJmbC9oM1NjYkZCbDdrdW1QRWVHWlNKY2JNUkFVTThpbHZRbFU=$R1yZ0H6jyaa0en3RxVUd8df-oHi7XMMDo7mtKT6mSmEvTuk11l2gFA!!&v=MDA3NTN1WnJGQ3JsVUwzTUlWWT1JVFhBZHJHNEg5ak1xNDFIWk90Mll3OU16bVJuNmo1N1QzZmxxV00wQ0xMN1I3cWVi' 20 | def paperDetail(cookies,paper_url): 21 | print('开始爬取详情页') 22 | # DB = dbHandle(); 23 | # 接收用户输入的关键词 24 | paper_url=paper_url 25 | cookies = dict(cookies) 26 | print(paper_url,cookies) 27 | r = requests.get(paper_url, cookies=cookies) 28 | # print(r.text) 29 | paper_description='' 30 | paper_fund='' 31 | paper_keyword='' 32 | try: 33 | soup = BeautifulSoup(r.text, 'lxml') 34 | results = soup.select('.wxBaseinfo') 35 | paper_description=results[0].select('p')[0] 36 | paper_description=str(paper_description.text) 37 | paper_fund = results[0].select('p')[1] 38 | paper_fund=str(paper_fund.text).replace(' ','').replace('\r\n','') 39 | paper_keyword = results[0].select('p')[2] 40 | paper_keyword=str(paper_keyword.text).replace(' ','').replace('\r\n','') 41 | time.sleep(3) 42 | except Exception as e: 43 | print('数据为空',e) 44 | 45 | return paper_description, paper_fund, paper_keyword 46 | # 47 | # cookies= {'c_m_expire': '2019-04-23 12:51:27', '_pk_ses': '*', '_pk_ref': '%5B%22%22%2C%22%22%2C1555993936%2C%22http%3A%2F%2Fwww.cnki.net%2F%22%5D', 'c_m_LinID': 'LinID=WEEvREcwSlJHSldRa1FhdkJkVG1BVmpTQUozQ2hhR21URFB6cDkxZU9YWT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!&ot=04/23/2019 12:51:27', 'SID_krsnew': '125131', 'Ecp_ClientId': '4190423123001492160', 'Ecp_LoginStuts': '%7B%22IsAutoLogin%22%3Afalse%2C%22UserName%22%3A%22NJ0051%22%2C%22ShowName%22%3A%22%25E6%25B1%259F%25E8%258B%258F%25E5%25B8%2588%25E8%258C%2583%25E5%25A4%25A7%25E5%25AD%25A6%25E4%25B8%2580%25E5%25B8%25A6%25E4%25B8%2580%25E8%25B7%25AF%25E7%25A0%2594%25E7%25A9%25B6%25E9%2599%25A2%22%2C%22UserType%22%3A%22bk%22%2C%22r%22%3A%22IB43OG%22%7D', 'SID_crrs': '125133', 'LID': 'WEEvREcwSlJHSldRa1FhdkJkVG1BVmpTQUozQ2hhR21URFB6cDkxZU9YWT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!', 'Ecp_session': '1', 'SID_klogin': '125143', 'KNS_SortType': '', 'cnkiUserKey': 'b18eb70d-b09a-71d5-28a1-bb3d7d9acb00', 'SID_kns': '123113', 'ASP.NET_SessionId': 'lllaehphb55hcugftjtnb2dv', 'RsPerPage': '20', 'Ecp_notFirstLogin': 'IB43OG'} 48 | # paperDetail(cookies,paper_url) -------------------------------------------------------------------------------- /spider/paper_spider.py: -------------------------------------------------------------------------------- 1 | # coding:utf8 2 | from selenium import webdriver 3 | import time 4 | from bs4 import BeautifulSoup 5 | import requests 6 | import time 7 | from celery import task 8 | import redis 9 | import pymysql 10 | # 11 | from .get_cookies import getCookies 12 | from .db_handle import dbHandle 13 | from .paper_detail import paperDetail 14 | 15 | 16 | # 17 | # from get_cookies import getCookies 18 | # from db_handle import dbHandle 19 | 20 | 21 | # this is a function about need many time 22 | 23 | @task 24 | def paperSpider(keyWord): 25 | print('爬虫启动') 26 | # conn = redis.Redis(host='127.0.0.1', port=6379,db=1,charset='utf-8',) 27 | # print('reids 连接成功') 28 | DB = dbHandle(); 29 | # 接收用户输入的关键词 30 | key_word = keyWord 31 | # 获取到cookies,以便登录,同时获取到该关键词下有多少篇文章 32 | cookies, total_num = getCookies(key_word) # 获取到 33 | # cookies字典化 34 | cookies = dict(cookies) 35 | urls = [ 36 | 'http://kns.cnki.net/kns/brief/brief.aspx?curpage={0}&RecordsPerPage=50&QueryID=0&ID=&turnpage=1&tpagemode=L&dbPrefix=SCDB&Fields=&DisplayMode=listmode&PageName=ASP.brief_default_result_aspx#J_ORDER&'.format( 37 | page) for page in range(1, int(total_num / 20))] 38 | # k是用来记录爬取了多少页 39 | k = 1 40 | for url in urls: 41 | num = 0 42 | r = requests.get(url, cookies=cookies) 43 | soup = BeautifulSoup(r.text, 'lxml') 44 | results = soup.select('.GridTableContent tr') 45 | print("正在爬取第%d页*************************************************************" % k); 46 | k = k + 1 47 | # print('requesting ' + url) 48 | if not results: 49 | driver = webdriver.Chrome('/Applications/chromedriver') 50 | for key in cookies: 51 | print(key, cookies[key]) 52 | driver.add_cookie({key: cookies[key]}) 53 | driver.get(url) 54 | break 55 | else: 56 | num = 0 57 | for r in results: 58 | if r.has_attr('bgcolor'): 59 | # 解析出标题 60 | record = r.select('td')[1].find('a') 61 | paper_title = str(record.text).replace('\n', '') 62 | # 文章详情链接页 63 | paper_url = 'http://kns.cnki.net' + str(record.attrs['href']).replace('kns', 'KCMS') 64 | # 作者 65 | paper_author = str(r.select('td')[2].text).replace('\n', '') 66 | authors = paper_author.split(';') 67 | # 文章发表的期刊 68 | paper_magazine = str(r.select('td')[3].text).replace('\n', '') 69 | # 年份 70 | paper_year = str(r.select('td')[4].text) 71 | # .replace(' ', '')[0:5] 72 | # 文章的类型 73 | paper_source_type = str(r.select('td')[5].text).replace(' ', '').replace('\n', '') 74 | title = str(record.text).replace('\n', '') 75 | local_url = "http://localhost:8000/cnki/paperDetail?title=" 76 | local_url = str(local_url + title) 77 | print(local_url) 78 | print('论文发表年限------------', paper_year) 79 | 80 | paper_description, paper_funds, paper_keywords=paperDetail(cookies, paper_url) 81 | print(paper_description,paper_funds,paper_keywords) 82 | # 存储数据库的信息 83 | dbhandle = dbHandle() 84 | # 插入文章的信息 85 | in_paper_sql = "INSERT INTO analyse_paper(title,url,sch_key,description,paper_funds,paper_keywords,public_year,local_url,source_type)" \ 86 | "values('%s','%s','%s','%s','%s','%s','%s','%s','%s')" \ 87 | % (paper_title, paper_url, key_word, paper_description,paper_funds,paper_keywords,paper_year, local_url, paper_magazine) 88 | print(in_paper_sql) 89 | dbhandle.dbInsert(in_paper_sql) 90 | query_paperID_sql = "select id from analyse_paper where title='%s' " % (paper_title) 91 | print(query_paperID_sql) 92 | paper_id = dbhandle.dbQuery(query_paperID_sql)[0][0] 93 | print('paper id', paper_id) 94 | # 插入作者的信息 95 | for author in authors: 96 | author = author.replace(' ', '') 97 | print(author) 98 | in_author_sql = "INSERT INTO analyse_author(author)" \ 99 | "values('%s')" \ 100 | % (author) 101 | dbhandle.dbInsert(in_author_sql) 102 | query_authorID_sql = "select id from analyse_author where author='%s'" % (author) 103 | 104 | print(query_authorID_sql) 105 | author_id = dbhandle.dbQuery(query_authorID_sql)[0][0] 106 | print('author id ', author_id) 107 | # 插入作者与文章的信息的信息 108 | in_author_to_paper = "INSERT INTO analyse_papertoauthor(author_id_id,paper_id_id)" \ 109 | "values('%d','%d')" \ 110 | % (author_id, paper_id) 111 | dbhandle.dbInsert(in_author_to_paper) 112 | 113 | num = num + 1 114 | 115 | print('第{}页抓取了{}条数据'.format(k, num)) 116 | 117 | time.sleep(3) 118 | 119 | # paperSpider('大数据') 120 | -------------------------------------------------------------------------------- /spider/paper_spider_by_app.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from selenium import webdriver 4 | from selenium.webdriver.support.ui import WebDriverWait 5 | from selenium.webdriver.support import expected_conditions as EC 6 | from selenium.webdriver.common.by import By 7 | import time 8 | import json 9 | import csv 10 | from celery import shared_task 11 | 12 | from celery import task, app 13 | from channels.layers import get_channel_layer 14 | from asgiref.sync import async_to_sync 15 | from channels.generic.websocket import WebsocketConsumer 16 | 17 | 18 | 19 | @shared_task 20 | def start_spider_by_app(keyWords,channel_name): 21 | # 收到消息后触发 22 | # 前端页面使用send()发送数据给websocket,由该函数处理 23 | # 真个ChatConsumer类会将所有接收到的消息加上一个"聊天"的前缀发送给客户端 24 | # 设置谷歌驱动器的环境 25 | # _build_model() 26 | print("启动爬虫=======================") 27 | channel_layer = get_channel_layer() 28 | 29 | i=0 30 | options = webdriver.ChromeOptions() 31 | # 设置chrome不加载图片,提高速度 32 | options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2}) 33 | # 创建一个谷歌驱动器 34 | browser = webdriver.Chrome('/Applications/chromedriver', chrome_options=options) 35 | # print("==========",text_data) 36 | # if(text_data=='close'): 37 | # print('关闭连接1') 38 | # close() 39 | # browser.quit() 40 | 41 | print("收到消息===============") 42 | try: 43 | i += 1 44 | print("次数==============================" +str(i)) 45 | 46 | 47 | url = 'http://wap.cnki.net/touch/web/guide' 48 | 49 | # 声明一个全局列表,用来存储字典 50 | data_list = [] 51 | print("发送消息") 52 | # 请求url 53 | browser.get(url) 54 | # 显示等待输入框是否加载完成 55 | WebDriverWait(browser, 1000).until( 56 | EC.presence_of_all_elements_located( 57 | (By.ID, 'keyword') 58 | ) 59 | ) 60 | # 找到输入框的id,并输入python关键字 61 | browser.find_element_by_id('keyword').click() 62 | browser.find_element_by_id('keyword_ordinary').send_keys('python') 63 | # 输入关键字之后点击搜索 64 | browser.find_element_by_class_name('btn-search ').click() 65 | # print('quit') 66 | # browser.quit() 67 | 68 | # print(browser.page_source) 69 | # 显示等待文献是否加载完成 70 | WebDriverWait(browser, 1000).until( 71 | EC.presence_of_all_elements_located( 72 | (By.CLASS_NAME, 'g-search-body') 73 | ) 74 | ) 75 | # spiderSocket.send("202") 76 | async_to_sync(channel_layer.send)( 77 | channel_name, 78 | { 79 | "type": "send.message", 80 | "message": "202" 81 | } 82 | ) 83 | 84 | 85 | 86 | # 声明一个标记,用来标记翻页几页 87 | count = 1 88 | while True: 89 | # 显示等待加载更多按钮加载完成 90 | WebDriverWait(browser, 1000).until( 91 | EC.presence_of_all_elements_located( 92 | (By.CLASS_NAME, 'c-company__body-item-more') 93 | ) 94 | ) 95 | # 获取加载更多按钮 96 | Btn = browser.find_element_by_class_name('c-company__body-item-more') 97 | # 显示等待该信息加载完成 98 | WebDriverWait(browser, 1000).until( 99 | EC.presence_of_all_elements_located( 100 | (By.XPATH, 101 | '//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2 * count - 1)) 102 | ) 103 | ) 104 | # 获取在div标签的信息,其中format(2*count-1)是因为加载的时候有显示多少条 105 | # 简单的说就是这些div的信息都是奇数 106 | divs = browser.find_elements_by_xpath( 107 | '//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2 * count - 1)) 108 | # 遍历循环 109 | for div in divs: 110 | # 获取文献的题目 111 | name = div.find_element_by_class_name('c-company__body-title').text 112 | # 获取文献的作者 113 | author = div.find_element_by_class_name('c-company__body-author').text 114 | # 获取文献的摘要 115 | content = div.find_element_by_class_name('c-company__body-content').text 116 | # 获取文献的来源和日期、文献类型等 117 | text = div.find_element_by_class_name('c-company__body-name').text.split() 118 | if (len(text) == 3 and text[-1] == '优先') or len(text) == 2: 119 | # 来源 120 | source = text[0] 121 | # 日期 122 | datetime = text[1] 123 | # 文献类型 124 | literature_type = None 125 | else: 126 | source = text[0] 127 | datetime = text[2] 128 | literature_type = text[1] 129 | # 获取下载数和被引数 130 | temp = div.find_element_by_class_name('c-company__body-info').text.split() 131 | # 下载数 132 | download = temp[0].split(':')[-1] 133 | # 被引数 134 | cite = temp[1].split(':')[-1] 135 | 136 | # 声明一个字典存储数据 137 | data_dict = {} 138 | data_dict['name'] = name 139 | data_dict['author'] = author 140 | data_dict['content'] = content 141 | data_dict['source'] = source 142 | data_dict['datetime'] = datetime 143 | data_dict['literature_type'] = literature_type 144 | data_dict['download'] = download 145 | data_dict['cite'] = cite 146 | 147 | # spiderSocket.send(json.dumps({"paperInfo":[data_dict]}, ensure_ascii=False)) 148 | async_to_sync(channel_layer.send)( 149 | channel_name, 150 | { 151 | "type": "send.message", 152 | "message": [data_dict] 153 | } 154 | ) 155 | data_list.append(data_dict) 156 | print(data_dict) 157 | # 如果Btn按钮(就是加载更多这个按钮)没有找到(就是已经到底了),就退出 158 | if not Btn: 159 | break 160 | else: 161 | Btn.click() 162 | # 如果到了爬取的页数就退出 163 | if count == 0: 164 | break 165 | 166 | count += 1 167 | 168 | # 延迟两秒,我们不是在攻击服务器 169 | time.sleep(2) 170 | except Exception as e: 171 | print("出错啦=====================\n",e) 172 | browser.quit() --------------------------------------------------------------------------------