├── .idea
├── cnki.iml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── README.md
├── backend
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ ├── admin.cpython-36.pyc
│ ├── models.cpython-36.pyc
│ ├── urls.cpython-36.pyc
│ └── views.cpython-36.pyc
├── admin.py
├── apps.py
├── migrations
│ ├── __init__.py
│ └── __pycache__
│ │ └── __init__.cpython-36.pyc
├── models.py
├── tests.py
├── urls.py
└── views.py
├── cnki
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-36.pyc
│ ├── celery.cpython-36.pyc
│ ├── consumer.cpython-36.pyc
│ ├── routing.cpython-36.pyc
│ ├── settings.cpython-36.pyc
│ ├── tasks.cpython-36.pyc
│ ├── urls.cpython-36.pyc
│ └── wsgi.cpython-36.pyc
├── celery.py
├── consumer.py
├── routing.py
├── settings.py
├── urls.py
└── wsgi.py
├── db.sqlite3
├── frontend
├── .babelrc
├── .editorconfig
├── .eslintignore
├── .eslintrc.js
├── .gitignore
├── .postcssrc.js
├── README.md
├── build
│ ├── build.js
│ ├── check-versions.js
│ ├── logo.png
│ ├── utils.js
│ ├── vue-loader.conf.js
│ ├── webpack.base.conf.js
│ ├── webpack.dev.conf.js
│ └── webpack.prod.conf.js
├── config
│ ├── dev.env.js
│ ├── index.js
│ ├── prod.env.js
│ └── test.env.js
├── index.html
├── package.json
├── src
│ ├── App.vue
│ ├── assets
│ │ ├── css
│ │ │ └── animate.css
│ │ └── img
│ │ │ ├── analyse.png
│ │ │ ├── btos.png
│ │ │ ├── chart.png
│ │ │ ├── check.png
│ │ │ ├── cnki.png
│ │ │ ├── footer.png
│ │ │ ├── hot
│ │ │ ├── icon-item001.png
│ │ │ ├── icon-item002.png
│ │ │ ├── icon-item003.png
│ │ │ ├── icon-item004.png
│ │ │ ├── icon-item005.png
│ │ │ └── icon-item006.png
│ │ │ ├── hotAnalyBg.jpg
│ │ │ ├── icon1.png
│ │ │ ├── icon2.png
│ │ │ ├── icon3.png
│ │ │ ├── logo.png
│ │ │ ├── menu.png
│ │ │ ├── online
│ │ │ ├── left.png
│ │ │ └── right.png
│ │ │ ├── sliderBg.png
│ │ │ └── under.png
│ ├── components
│ │ ├── cnki-footer.vue
│ │ ├── cnki-header.vue
│ │ └── index
│ │ │ ├── index-data.vue
│ │ │ ├── index-feature.vue
│ │ │ ├── index-service.vue
│ │ │ └── index-slider.vue
│ ├── layouts
│ │ ├── chartAnalyse.vue
│ │ ├── hotAnalyse.vue
│ │ ├── index.vue
│ │ ├── onlineSpider.vue
│ │ └── paperDetail.vue
│ ├── main.js
│ └── router
│ │ └── index.js
├── static
│ └── .gitkeep
└── test
│ ├── e2e
│ ├── custom-assertions
│ │ └── elementCount.js
│ ├── nightwatch.conf.js
│ ├── runner.js
│ └── specs
│ │ └── test.js
│ └── unit
│ ├── .eslintrc
│ ├── jest.conf.js
│ ├── setup.js
│ └── specs
│ └── HelloWorld.spec.js
├── manage.py
└── spider
├── __init__.py
├── __pycache__
├── __init__.cpython-36.pyc
├── config.cpython-36.pyc
├── db_handle.cpython-36.pyc
├── get_authors.cpython-36.pyc
├── get_cookies.cpython-36.pyc
├── get_fund.cpython-36.pyc
├── get_keyWordID.cpython-36.pyc
├── get_school.cpython-36.pyc
├── get_year.cpython-36.pyc
├── mian.cpython-36.pyc
├── paper_detail.cpython-36.pyc
├── paper_spider.cpython-36.pyc
└── paper_spider_by_app.cpython-36.pyc
├── config.py
├── db_handle.py
├── get_authors.py
├── get_cookies.py
├── get_fund.py
├── get_keyWordID.py
├── get_school.py
├── get_year.py
├── mian.py
├── paper_detail.py
├── paper_spider.py
└── paper_spider_by_app.py
/.idea/cnki.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 | 1571150444735
125 |
126 |
127 | 1571150444735
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #### 本仓库为毕业设计的重构版,之前采取Django的模板引擎制作的页面,本版本将采用vue+jango进行重构D,并加入新的功能,同时按照进度争取完成对应的博文进行讲解
2 | ### 2019/10/23 -开始使用vue重构页面
3 | 1. 添加响应式导航栏
4 |
5 | ### 2019/10/25
6 | 1. 首页slider
7 |
8 | ### 2019/10/25
9 | 1. 完成首页
10 |
11 | ### 2019/10/30
12 | 1. 热门分析页
13 |
14 | ### 2019/10/31
15 | 1. 在线爬虫页
--------------------------------------------------------------------------------
/backend/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__init__.py
--------------------------------------------------------------------------------
/backend/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/backend/__pycache__/admin.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/admin.cpython-36.pyc
--------------------------------------------------------------------------------
/backend/__pycache__/models.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/models.cpython-36.pyc
--------------------------------------------------------------------------------
/backend/__pycache__/urls.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/urls.cpython-36.pyc
--------------------------------------------------------------------------------
/backend/__pycache__/views.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/__pycache__/views.cpython-36.pyc
--------------------------------------------------------------------------------
/backend/admin.py:
--------------------------------------------------------------------------------
1 | from django.contrib import admin
2 |
3 | # Register your models here.
4 |
--------------------------------------------------------------------------------
/backend/apps.py:
--------------------------------------------------------------------------------
1 | from django.apps import AppConfig
2 |
3 |
4 | class BackendConfig(AppConfig):
5 | name = 'backend'
6 |
--------------------------------------------------------------------------------
/backend/migrations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/migrations/__init__.py
--------------------------------------------------------------------------------
/backend/migrations/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/backend/migrations/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/backend/models.py:
--------------------------------------------------------------------------------
1 | from django.db import models
2 |
3 | # Create your models here.
4 |
--------------------------------------------------------------------------------
/backend/tests.py:
--------------------------------------------------------------------------------
1 | from django.test import TestCase
2 |
3 | # Create your tests here.
4 |
--------------------------------------------------------------------------------
/backend/urls.py:
--------------------------------------------------------------------------------
1 | from django.urls import path
2 | from django.conf.urls.static import static
3 | from django.conf import settings
4 |
5 | from backend import views
6 |
7 | urlpatterns = [
8 |
9 | # 爬虫
10 | path('testSpider', views.spider, name="testSpider"),
11 |
12 | ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)
13 |
--------------------------------------------------------------------------------
/backend/views.py:
--------------------------------------------------------------------------------
1 | from django.shortcuts import render
2 |
3 | import redis
4 | import json
5 | # Create your views here.
6 | from django.template import loader
7 | from django.http import HttpResponse
8 | from django.http import JsonResponse
9 | from django.core import serializers
10 | from spider.mian import start_spider
11 |
12 | import smtplib
13 | from email.mime.text import MIMEText
14 |
15 |
16 |
17 |
18 |
19 |
20 | def spider(request):
21 |
22 | start_spider.delay(10)
23 |
24 | return HttpResponse("pachong")
25 |
26 |
--------------------------------------------------------------------------------
/cnki/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from .celery import app as celery_app # noqa
4 |
5 |
6 |
7 | import pymysql
8 | pymysql.install_as_MySQLdb()
9 |
10 |
--------------------------------------------------------------------------------
/cnki/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/__pycache__/celery.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/celery.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/__pycache__/consumer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/consumer.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/__pycache__/routing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/routing.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/__pycache__/settings.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/settings.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/__pycache__/tasks.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/tasks.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/__pycache__/urls.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/urls.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/__pycache__/wsgi.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/cnki/__pycache__/wsgi.cpython-36.pyc
--------------------------------------------------------------------------------
/cnki/celery.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, unicode_literals # 必须在最上面
2 |
3 | import os
4 | from celery import Celery
5 | from django.conf import settings
6 |
7 | # 配置环境变量
8 | project_name = os.path.split(os.path.abspath('.'))[-1]
9 | project_settings = '%s.settings' % project_name
10 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', project_settings)
11 |
12 | # 实例化 Celery
13 | app = Celery(project_name,
14 | broker='redis://localhost:6379',
15 | backend='redis://localhost:6379'
16 | )
17 | # 使用 Django 的 settings 文件配置 Celery
18 | app.config_from_object('django.conf:settings')
19 |
20 | # Celery 加载所有注册的应用
21 | app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
22 |
--------------------------------------------------------------------------------
/cnki/consumer.py:
--------------------------------------------------------------------------------
1 | from channels.generic.websocket import WebsocketConsumer
2 | import json
3 | from spider.paper_spider_by_app import start_spider_by_app
4 |
5 | class ChatConsumer(WebsocketConsumer):
6 |
7 |
8 | def connect(self):
9 | # 连接时触发
10 | print("开始连接")
11 | self.accept()
12 | print('self.channel_name',self.channel_name)
13 |
14 | # self.send(text_data=json.dumps({"message": "message"}))
15 | def disconnect(self, code):
16 | # 关闭连接时触发
17 | # print('关闭连接')
18 | #
19 | # try:
20 | # self.browser.quit();
21 | # except Exception as e:
22 | # print("关闭出错啦=====================\n", e)
23 | # self.browser.quit()
24 | self.result.revoke(terminate=True)
25 | self.close()
26 | print('关闭连接')
27 |
28 | def receive(self, text_data=None, bytes_data=None):
29 | print("收到消息")
30 | print("==========",text_data)
31 | print(json.loads(text_data)['message'])
32 | print('self.channel_name',self.channel_name)
33 | self.keyWords=json.loads(text_data)['message']
34 | self.result = start_spider_by_app.delay(self.keyWords, self.channel_name)
35 |
36 | # start_spider_by_app.delay(self)
37 |
38 | def send_message(self, event):
39 | print(event)
40 | print('self.result',self.result)
41 | self.send(json.dumps({
42 | "paperInfo": event["message"]
43 | }))
--------------------------------------------------------------------------------
/cnki/routing.py:
--------------------------------------------------------------------------------
1 | from channels.routing import ProtocolTypeRouter,URLRouter
2 | from channels.auth import AuthMiddlewareStack
3 | from django.urls import path,re_path
4 | from cnki.consumer import *
5 |
6 |
7 |
8 | # routing.py路由文件跟django的url.py功能类似,语法也一样,意思就是访问websocket接口
9 |
10 |
11 | websocket_urlpatterns = [
12 | path(r"ws/chat/", ChatConsumer),
13 |
14 | ]
15 |
16 | # 这里规定了去哪里找websocket的接口
17 | application = ProtocolTypeRouter({
18 | 'websocket':AuthMiddlewareStack(
19 | URLRouter(
20 | websocket_urlpatterns
21 | )
22 | )
23 | })
--------------------------------------------------------------------------------
/cnki/settings.py:
--------------------------------------------------------------------------------
1 | """
2 | Django settings for cnki project.
3 |
4 | Generated by 'django-admin startproject' using Django 2.0.5.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/2.0/topics/settings/
8 |
9 | For the full list of settings and their values, see
10 | https://docs.djangoproject.com/en/2.0/ref/settings/
11 | """
12 |
13 | import os
14 | import djcelery
15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
17 |
18 |
19 | # Quick-start development settings - unsuitable for production
20 | # See https://docs.djangoproject.com/en/2.0/howto/deployment/checklist/
21 |
22 | # SECURITY WARNING: keep the secret key used in production secret!
23 | SECRET_KEY = 'n&5=u+_cef#4r!9uueivbo)%n2k-6zgursc@tf3+uw)!ymx$&h'
24 |
25 | # SECURITY WARNING: don't run with debug turned on in production!
26 | DEBUG = True
27 |
28 | ALLOWED_HOSTS = []
29 |
30 |
31 | # Application definition
32 |
33 | INSTALLED_APPS = [
34 | 'django.contrib.admin',
35 | 'django.contrib.auth',
36 | 'django.contrib.contenttypes',
37 | 'django.contrib.sessions',
38 | 'django.contrib.messages',
39 | 'django.contrib.staticfiles',
40 | 'djcelery',
41 | 'corsheaders',
42 | 'channels',
43 | 'backend',
44 | 'spider',
45 |
46 | ]
47 |
48 | MIDDLEWARE = [
49 | 'django.middleware.security.SecurityMiddleware',
50 | 'django.contrib.sessions.middleware.SessionMiddleware',
51 | 'corsheaders.middleware.CorsMiddleware', # 默认
52 | 'django.middleware.common.CommonMiddleware',
53 | 'django.middleware.csrf.CsrfViewMiddleware',
54 | 'django.contrib.auth.middleware.AuthenticationMiddleware',
55 | 'django.contrib.messages.middleware.MessageMiddleware',
56 | 'django.middleware.clickjacking.XFrameOptionsMiddleware',
57 | ]
58 |
59 | ROOT_URLCONF = 'cnki.urls'
60 |
61 | TEMPLATES = [
62 | {
63 | 'BACKEND': 'django.template.backends.django.DjangoTemplates',
64 | # 'DIRS':['frontend/dist'],
65 | 'DIRS':[''],
66 | 'APP_DIRS': True,
67 | 'OPTIONS': {
68 | 'context_processors': [
69 | 'django.template.context_processors.debug',
70 | 'django.template.context_processors.request',
71 | 'django.contrib.auth.context_processors.auth',
72 | 'django.contrib.messages.context_processors.messages',
73 | ],
74 | },
75 | },
76 | ]
77 |
78 | WSGI_APPLICATION = 'cnki.wsgi.application'
79 |
80 | ASGI_APPLICATION = 'cnki.routing.application'
81 |
82 | # Database
83 | # https://docs.djangoproject.com/en/2.0/ref/settings/#databases
84 |
85 | DATABASES = {
86 | 'default': {
87 | 'ENGINE': 'django.db.backends.sqlite3',
88 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
89 | }
90 | }
91 |
92 |
93 | # Password validation
94 | # https://docs.djangoproject.com/en/2.0/ref/settings/#auth-password-validators
95 |
96 | AUTH_PASSWORD_VALIDATORS = [
97 | {
98 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
99 | },
100 | {
101 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
102 | },
103 | {
104 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
105 | },
106 | {
107 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
108 | },
109 | ]
110 |
111 |
112 | # Internationalization
113 | # https://docs.djangoproject.com/en/2.0/topics/i18n/
114 |
115 | LANGUAGE_CODE = 'zh-Hans'
116 |
117 | TIME_ZONE = 'UTC'
118 |
119 | USE_I18N = True
120 |
121 | USE_L10N = True
122 |
123 | USE_TZ = True
124 |
125 |
126 | # Static files (CSS, JavaScript, Images)
127 | # https://docs.djangoproject.com/en/2.0/howto/static-files/
128 |
129 | # STATIC_URL = '/static/'
130 | STATIC_URL = '/static/'
131 | STATIC_ROOT = os.path.join(BASE_DIR, 'backend/static')
132 |
133 | # # Add for vue.js
134 | # STATICFILES_DIRS = [
135 | # os.path.join(BASE_DIR, "frontend/dist/static"),
136 | # ]
137 |
138 | # 跨域增加忽略
139 | CORS_ALLOW_CREDENTIALS = True
140 | CORS_ORIGIN_ALLOW_ALL = True
141 | # CORS_ORIGIN_WHITELIST = (
142 | # 'https://*'
143 | # )
144 | CORS_ALLOW_METHODS = (
145 | 'DELETE',
146 | 'GET',
147 | 'OPTIONS',
148 | 'PATCH',
149 | 'POST',
150 | 'PUT',
151 | 'VIEW',
152 | )
153 | CORS_ALLOW_HEADERS = (
154 | 'XMLHttpRequest',
155 | 'X_FILENAME',
156 | 'accept-encoding',
157 | 'authorization',
158 | 'content-type',
159 | 'dnt',
160 | 'origin',
161 | 'user-agent',
162 | 'x-csrftoken',
163 | 'x-requested-with',
164 | )
165 |
166 | djcelery.setup_loader()
167 | BROKER_URL = 'redis://127.0.0.1:6379/1'
168 | CELERY_IMPORTS = ('spider.paper_spider_by_app')
169 | CHANNEL_LAYERS = {
170 | 'default': {
171 | 'BACKEND': 'channels_redis.core.RedisChannelLayer',
172 | 'CONFIG': {
173 | "hosts": [('127.0.0.1', 6379)],
174 | },
175 | },
176 | }
--------------------------------------------------------------------------------
/cnki/urls.py:
--------------------------------------------------------------------------------
1 | """cnki URL Configuration
2 |
3 | The `urlpatterns` list routes URLs to views. For more information please see:
4 | https://docs.djangoproject.com/en/2.0/topics/http/urls/
5 | Examples:
6 | Function views
7 | 1. Add an import: from my_app import views
8 | 2. Add a URL to urlpatterns: path('', views.home, name='home')
9 | Class-based views
10 | 1. Add an import: from other_app.views import Home
11 | 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
12 | Including another URLconf
13 | 1. Import the include() function: from django.urls import include, path
14 | 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
15 | """
16 | from django.contrib import admin
17 | from django.urls import path, include
18 | from django.views.generic.base import TemplateView # 使用通用模板
19 |
20 |
21 | urlpatterns = [
22 | path('admin/', admin.site.urls),
23 | # path(r'', TemplateView.as_view(template_name="index.html")),
24 | path('cnki/', include('backend.urls')),
25 |
26 | ]
27 |
--------------------------------------------------------------------------------
/cnki/wsgi.py:
--------------------------------------------------------------------------------
1 | """
2 | WSGI config for cnki project.
3 |
4 | It exposes the WSGI callable as a module-level variable named ``application``.
5 |
6 | For more information on this file, see
7 | https://docs.djangoproject.com/en/2.0/howto/deployment/wsgi/
8 | """
9 |
10 | import os
11 |
12 | from django.core.wsgi import get_wsgi_application
13 |
14 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cnki.settings")
15 |
16 | application = get_wsgi_application()
17 |
--------------------------------------------------------------------------------
/db.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/db.sqlite3
--------------------------------------------------------------------------------
/frontend/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": [
3 | ["env", {
4 | "modules": false,
5 | "targets": {
6 | "browsers": ["> 1%", "last 2 versions", "not ie <= 8"]
7 | }
8 | }],
9 | "stage-2"
10 | ],
11 | "plugins": ["transform-vue-jsx", "transform-runtime"],
12 | "env": {
13 | "test": {
14 | "presets": ["env", "stage-2"],
15 | "plugins": ["transform-vue-jsx", "transform-es2015-modules-commonjs", "dynamic-import-node"]
16 | }
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/frontend/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | charset = utf-8
5 | indent_style = space
6 | indent_size = 2
7 | end_of_line = lf
8 | insert_final_newline = true
9 | trim_trailing_whitespace = true
10 |
--------------------------------------------------------------------------------
/frontend/.eslintignore:
--------------------------------------------------------------------------------
1 | /build/
2 | /config/
3 | /dist/
4 | /*.js
5 | /test/unit/coverage/
6 |
--------------------------------------------------------------------------------
/frontend/.eslintrc.js:
--------------------------------------------------------------------------------
1 | // https://eslint.org/docs/user-guide/configuring
2 |
3 | module.exports = {
4 | root: true,
5 | parserOptions: {
6 | parser: 'babel-eslint'
7 | },
8 | env: {
9 | browser: true,
10 | },
11 | extends: [
12 | // https://github.com/vuejs/eslint-plugin-vue#priority-a-essential-error-prevention
13 | // consider switching to `plugin:vue/strongly-recommended` or `plugin:vue/recommended` for stricter rules.
14 | 'plugin:vue/essential',
15 | // https://github.com/standard/standard/blob/master/docs/RULES-en.md
16 | 'standard'
17 | ],
18 | // required to lint *.vue files
19 | plugins: [
20 | 'vue'
21 | ],
22 | // add your custom rules here
23 | rules: {
24 | // allow async-await
25 | 'generator-star-spacing': 'off',
26 | // allow debugger during development
27 | 'no-debugger': process.env.NODE_ENV === 'production' ? 'error' : 'off'
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules/
3 | /dist/
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | /test/unit/coverage/
8 | /test/e2e/reports/
9 | selenium-debug.log
10 |
11 | # Editor directories and files
12 | .idea
13 | .vscode
14 | *.suo
15 | *.ntvs*
16 | *.njsproj
17 | *.sln
18 |
--------------------------------------------------------------------------------
/frontend/.postcssrc.js:
--------------------------------------------------------------------------------
1 | // https://github.com/michael-ciniawsky/postcss-load-config
2 |
3 | module.exports = {
4 | "plugins": {
5 | "postcss-import": {},
6 | "postcss-url": {},
7 | // to edit target browsers: use "browserslist" field in package.json
8 | "autoprefixer": {}
9 | }
10 | }
11 |
--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
1 | # frontend
2 |
3 | > cnki spider
4 |
5 | ## Build Setup
6 |
7 | ``` bash
8 | # install dependencies
9 | npm install
10 |
11 | # serve with hot reload at localhost:8080
12 | npm run dev
13 |
14 | # build for production with minification
15 | npm run build
16 |
17 | # build for production and view the bundle analyzer report
18 | npm run build --report
19 |
20 | # run unit tests
21 | npm run unit
22 |
23 | # run e2e tests
24 | npm run e2e
25 |
26 | # run all tests
27 | npm test
28 | ```
29 |
30 | For a detailed explanation on how things work, check out the [guide](http://vuejs-templates.github.io/webpack/) and [docs for vue-loader](http://vuejs.github.io/vue-loader).
31 |
--------------------------------------------------------------------------------
/frontend/build/build.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | require('./check-versions')()
3 |
4 | process.env.NODE_ENV = 'production'
5 |
6 | const ora = require('ora')
7 | const rm = require('rimraf')
8 | const path = require('path')
9 | const chalk = require('chalk')
10 | const webpack = require('webpack')
11 | const config = require('../config')
12 | const webpackConfig = require('./webpack.prod.conf')
13 |
14 | const spinner = ora('building for production...')
15 | spinner.start()
16 |
17 | rm(path.join(config.build.assetsRoot, config.build.assetsSubDirectory), err => {
18 | if (err) throw err
19 | webpack(webpackConfig, (err, stats) => {
20 | spinner.stop()
21 | if (err) throw err
22 | process.stdout.write(stats.toString({
23 | colors: true,
24 | modules: false,
25 | children: false, // If you are using ts-loader, setting this to true will make TypeScript errors show up during build.
26 | chunks: false,
27 | chunkModules: false
28 | }) + '\n\n')
29 |
30 | if (stats.hasErrors()) {
31 | console.log(chalk.red(' Build failed with errors.\n'))
32 | process.exit(1)
33 | }
34 |
35 | console.log(chalk.cyan(' Build complete.\n'))
36 | console.log(chalk.yellow(
37 | ' Tip: built files are meant to be served over an HTTP server.\n' +
38 | ' Opening index.html over file:// won\'t work.\n'
39 | ))
40 | })
41 | })
42 |
--------------------------------------------------------------------------------
/frontend/build/check-versions.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const chalk = require('chalk')
3 | const semver = require('semver')
4 | const packageConfig = require('../package.json')
5 | const shell = require('shelljs')
6 |
7 | function exec (cmd) {
8 | return require('child_process').execSync(cmd).toString().trim()
9 | }
10 |
11 | const versionRequirements = [
12 | {
13 | name: 'node',
14 | currentVersion: semver.clean(process.version),
15 | versionRequirement: packageConfig.engines.node
16 | }
17 | ]
18 |
19 | if (shell.which('npm')) {
20 | versionRequirements.push({
21 | name: 'npm',
22 | currentVersion: exec('npm --version'),
23 | versionRequirement: packageConfig.engines.npm
24 | })
25 | }
26 |
27 | module.exports = function () {
28 | const warnings = []
29 |
30 | for (let i = 0; i < versionRequirements.length; i++) {
31 | const mod = versionRequirements[i]
32 |
33 | if (!semver.satisfies(mod.currentVersion, mod.versionRequirement)) {
34 | warnings.push(mod.name + ': ' +
35 | chalk.red(mod.currentVersion) + ' should be ' +
36 | chalk.green(mod.versionRequirement)
37 | )
38 | }
39 | }
40 |
41 | if (warnings.length) {
42 | console.log('')
43 | console.log(chalk.yellow('To use this template, you must update following to modules:'))
44 | console.log()
45 |
46 | for (let i = 0; i < warnings.length; i++) {
47 | const warning = warnings[i]
48 | console.log(' ' + warning)
49 | }
50 |
51 | console.log()
52 | process.exit(1)
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/frontend/build/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/build/logo.png
--------------------------------------------------------------------------------
/frontend/build/utils.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const path = require('path')
3 | const config = require('../config')
4 | const ExtractTextPlugin = require('extract-text-webpack-plugin')
5 | const packageConfig = require('../package.json')
6 |
7 | exports.assetsPath = function (_path) {
8 | const assetsSubDirectory = process.env.NODE_ENV === 'production'
9 | ? config.build.assetsSubDirectory
10 | : config.dev.assetsSubDirectory
11 |
12 | return path.posix.join(assetsSubDirectory, _path)
13 | }
14 |
15 | exports.cssLoaders = function (options) {
16 | options = options || {}
17 |
18 | const cssLoader = {
19 | loader: 'css-loader',
20 | options: {
21 | sourceMap: options.sourceMap
22 | }
23 | }
24 |
25 | const postcssLoader = {
26 | loader: 'postcss-loader',
27 | options: {
28 | sourceMap: options.sourceMap
29 | }
30 | }
31 |
32 | // generate loader string to be used with extract text plugin
33 | function generateLoaders (loader, loaderOptions) {
34 | const loaders = options.usePostCSS ? [cssLoader, postcssLoader] : [cssLoader]
35 |
36 | if (loader) {
37 | loaders.push({
38 | loader: loader + '-loader',
39 | options: Object.assign({}, loaderOptions, {
40 | sourceMap: options.sourceMap
41 | })
42 | })
43 | }
44 |
45 | // Extract CSS when that option is specified
46 | // (which is the case during production build)
47 | if (options.extract) {
48 | return ExtractTextPlugin.extract({
49 | use: loaders,
50 | fallback: 'vue-style-loader'
51 | })
52 | } else {
53 | return ['vue-style-loader'].concat(loaders)
54 | }
55 | }
56 |
57 | // https://vue-loader.vuejs.org/en/configurations/extract-css.html
58 | return {
59 | css: generateLoaders(),
60 | postcss: generateLoaders(),
61 | less: generateLoaders('less'),
62 | sass: generateLoaders('sass', { indentedSyntax: true }),
63 | scss: generateLoaders('sass'),
64 | stylus: generateLoaders('stylus'),
65 | styl: generateLoaders('stylus')
66 | }
67 | }
68 |
69 | // Generate loaders for standalone style files (outside of .vue)
70 | exports.styleLoaders = function (options) {
71 | const output = []
72 | const loaders = exports.cssLoaders(options)
73 |
74 | for (const extension in loaders) {
75 | const loader = loaders[extension]
76 | output.push({
77 | test: new RegExp('\\.' + extension + '$'),
78 | use: loader
79 | })
80 | }
81 |
82 | return output
83 | }
84 |
85 | exports.createNotifierCallback = () => {
86 | const notifier = require('node-notifier')
87 |
88 | return (severity, errors) => {
89 | if (severity !== 'error') return
90 |
91 | const error = errors[0]
92 | const filename = error.file && error.file.split('!').pop()
93 |
94 | notifier.notify({
95 | title: packageConfig.name,
96 | message: severity + ': ' + error.name,
97 | subtitle: filename || '',
98 | icon: path.join(__dirname, 'logo.png')
99 | })
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/frontend/build/vue-loader.conf.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const utils = require('./utils')
3 | const config = require('../config')
4 | const isProduction = process.env.NODE_ENV === 'production'
5 | const sourceMapEnabled = isProduction
6 | ? config.build.productionSourceMap
7 | : config.dev.cssSourceMap
8 |
9 | module.exports = {
10 | loaders: utils.cssLoaders({
11 | sourceMap: sourceMapEnabled,
12 | extract: isProduction
13 | }),
14 | cssSourceMap: sourceMapEnabled,
15 | cacheBusting: config.dev.cacheBusting,
16 | transformToRequire: {
17 | video: ['src', 'poster'],
18 | source: 'src',
19 | img: 'src',
20 | image: 'xlink:href'
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/frontend/build/webpack.base.conf.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const path = require('path')
3 | const utils = require('./utils')
4 | const config = require('../config')
5 | const vueLoaderConfig = require('./vue-loader.conf')
6 |
7 | function resolve (dir) {
8 | return path.join(__dirname, '..', dir)
9 | }
10 |
11 | const createLintingRule = () => ({
12 | test: /\.(js|vue)$/,
13 | loader: 'eslint-loader',
14 | enforce: 'pre',
15 | include: [resolve('src'), resolve('test')],
16 | options: {
17 | formatter: require('eslint-friendly-formatter'),
18 | emitWarning: !config.dev.showEslintErrorsInOverlay
19 | }
20 | })
21 |
22 | module.exports = {
23 | context: path.resolve(__dirname, '../'),
24 | entry: {
25 | app: './src/main.js'
26 | },
27 | output: {
28 | path: config.build.assetsRoot,
29 | filename: '[name].js',
30 | publicPath: process.env.NODE_ENV === 'production'
31 | ? config.build.assetsPublicPath
32 | : config.dev.assetsPublicPath
33 | },
34 | resolve: {
35 | extensions: ['.js', '.vue', '.json'],
36 | alias: {
37 | 'vue$': 'vue/dist/vue.esm.js',
38 | '@': resolve('src'),
39 | }
40 | },
41 | module: {
42 | rules: [
43 | ...(config.dev.useEslint ? [createLintingRule()] : []),
44 | {
45 | test: /\.vue$/,
46 | loader: 'vue-loader',
47 | options: vueLoaderConfig
48 | },
49 | {
50 | test: /\.js$/,
51 | loader: 'babel-loader',
52 | include: [resolve('src'), resolve('test'), resolve('node_modules/webpack-dev-server/client')]
53 | },
54 | {
55 | test: /\.(png|jpe?g|gif|svg)(\?.*)?$/,
56 | loader: 'url-loader',
57 | options: {
58 | limit: 10000,
59 | name: utils.assetsPath('img/[name].[hash:7].[ext]')
60 | }
61 | },
62 | {
63 | test: /\.(mp4|webm|ogg|mp3|wav|flac|aac)(\?.*)?$/,
64 | loader: 'url-loader',
65 | options: {
66 | limit: 10000,
67 | name: utils.assetsPath('media/[name].[hash:7].[ext]')
68 | }
69 | },
70 | {
71 | test: /\.(woff2?|eot|ttf|otf)(\?.*)?$/,
72 | loader: 'url-loader',
73 | options: {
74 | limit: 10000,
75 | name: utils.assetsPath('fonts/[name].[hash:7].[ext]')
76 | }
77 | }
78 | ]
79 | },
80 | node: {
81 | // prevent webpack from injecting useless setImmediate polyfill because Vue
82 | // source contains it (although only uses it if it's native).
83 | setImmediate: false,
84 | // prevent webpack from injecting mocks to Node native modules
85 | // that does not make sense for the client
86 | dgram: 'empty',
87 | fs: 'empty',
88 | net: 'empty',
89 | tls: 'empty',
90 | child_process: 'empty'
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/frontend/build/webpack.dev.conf.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const utils = require('./utils')
3 | const webpack = require('webpack')
4 | const config = require('../config')
5 | const merge = require('webpack-merge')
6 | const path = require('path')
7 | const baseWebpackConfig = require('./webpack.base.conf')
8 | const CopyWebpackPlugin = require('copy-webpack-plugin')
9 | const HtmlWebpackPlugin = require('html-webpack-plugin')
10 | const FriendlyErrorsPlugin = require('friendly-errors-webpack-plugin')
11 | const portfinder = require('portfinder')
12 |
13 | const HOST = process.env.HOST
14 | const PORT = process.env.PORT && Number(process.env.PORT)
15 |
16 | const devWebpackConfig = merge(baseWebpackConfig, {
17 | module: {
18 | rules: utils.styleLoaders({ sourceMap: config.dev.cssSourceMap, usePostCSS: true })
19 | },
20 | // cheap-module-eval-source-map is faster for development
21 | devtool: config.dev.devtool,
22 |
23 | // these devServer options should be customized in /config/index.js
24 | devServer: {
25 | clientLogLevel: 'warning',
26 | historyApiFallback: {
27 | rewrites: [
28 | { from: /.*/, to: path.posix.join(config.dev.assetsPublicPath, 'index.html') },
29 | ],
30 | },
31 | hot: true,
32 | contentBase: false, // since we use CopyWebpackPlugin.
33 | compress: true,
34 | host: HOST || config.dev.host,
35 | port: PORT || config.dev.port,
36 | open: config.dev.autoOpenBrowser,
37 | overlay: config.dev.errorOverlay
38 | ? { warnings: false, errors: true }
39 | : false,
40 | publicPath: config.dev.assetsPublicPath,
41 | proxy: config.dev.proxyTable,
42 | quiet: true, // necessary for FriendlyErrorsPlugin
43 | watchOptions: {
44 | poll: config.dev.poll,
45 | }
46 | },
47 | plugins: [
48 | new webpack.DefinePlugin({
49 | 'process.env': require('../config/dev.env')
50 | }),
51 | new webpack.HotModuleReplacementPlugin(),
52 | new webpack.NamedModulesPlugin(), // HMR shows correct file names in console on update.
53 | new webpack.NoEmitOnErrorsPlugin(),
54 | // https://github.com/ampedandwired/html-webpack-plugin
55 | new HtmlWebpackPlugin({
56 | filename: 'index.html',
57 | template: 'index.html',
58 | inject: true
59 | }),
60 | // copy custom static assets
61 | new CopyWebpackPlugin([
62 | {
63 | from: path.resolve(__dirname, '../static'),
64 | to: config.dev.assetsSubDirectory,
65 | ignore: ['.*']
66 | }
67 | ])
68 | ]
69 | })
70 |
71 | module.exports = new Promise((resolve, reject) => {
72 | portfinder.basePort = process.env.PORT || config.dev.port
73 | portfinder.getPort((err, port) => {
74 | if (err) {
75 | reject(err)
76 | } else {
77 | // publish the new Port, necessary for e2e tests
78 | process.env.PORT = port
79 | // add port to devServer config
80 | devWebpackConfig.devServer.port = port
81 |
82 | // Add FriendlyErrorsPlugin
83 | devWebpackConfig.plugins.push(new FriendlyErrorsPlugin({
84 | compilationSuccessInfo: {
85 | messages: [`Your application is running here: http://${devWebpackConfig.devServer.host}:${port}`],
86 | },
87 | onErrors: config.dev.notifyOnErrors
88 | ? utils.createNotifierCallback()
89 | : undefined
90 | }))
91 |
92 | resolve(devWebpackConfig)
93 | }
94 | })
95 | })
96 |
--------------------------------------------------------------------------------
/frontend/build/webpack.prod.conf.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const path = require('path')
3 | const utils = require('./utils')
4 | const webpack = require('webpack')
5 | const config = require('../config')
6 | const merge = require('webpack-merge')
7 | const baseWebpackConfig = require('./webpack.base.conf')
8 | const CopyWebpackPlugin = require('copy-webpack-plugin')
9 | const HtmlWebpackPlugin = require('html-webpack-plugin')
10 | const ExtractTextPlugin = require('extract-text-webpack-plugin')
11 | const OptimizeCSSPlugin = require('optimize-css-assets-webpack-plugin')
12 | const UglifyJsPlugin = require('uglifyjs-webpack-plugin')
13 |
14 | const env = process.env.NODE_ENV === 'testing'
15 | ? require('../config/test.env')
16 | : require('../config/prod.env')
17 |
18 | const webpackConfig = merge(baseWebpackConfig, {
19 | module: {
20 | rules: utils.styleLoaders({
21 | sourceMap: config.build.productionSourceMap,
22 | extract: true,
23 | usePostCSS: true
24 | })
25 | },
26 | devtool: config.build.productionSourceMap ? config.build.devtool : false,
27 | output: {
28 | path: config.build.assetsRoot,
29 | filename: utils.assetsPath('js/[name].[chunkhash].js'),
30 | chunkFilename: utils.assetsPath('js/[id].[chunkhash].js')
31 | },
32 | plugins: [
33 | // http://vuejs.github.io/vue-loader/en/workflow/production.html
34 | new webpack.DefinePlugin({
35 | 'process.env': env
36 | }),
37 | new UglifyJsPlugin({
38 | uglifyOptions: {
39 | compress: {
40 | warnings: false
41 | }
42 | },
43 | sourceMap: config.build.productionSourceMap,
44 | parallel: true
45 | }),
46 | // extract css into its own file
47 | new ExtractTextPlugin({
48 | filename: utils.assetsPath('css/[name].[contenthash].css'),
49 | // Setting the following option to `false` will not extract CSS from codesplit chunks.
50 | // Their CSS will instead be inserted dynamically with style-loader when the codesplit chunk has been loaded by webpack.
51 | // It's currently set to `true` because we are seeing that sourcemaps are included in the codesplit bundle as well when it's `false`,
52 | // increasing file size: https://github.com/vuejs-templates/webpack/issues/1110
53 | allChunks: true,
54 | }),
55 | // Compress extracted CSS. We are using this plugin so that possible
56 | // duplicated CSS from different components can be deduped.
57 | new OptimizeCSSPlugin({
58 | cssProcessorOptions: config.build.productionSourceMap
59 | ? { safe: true, map: { inline: false } }
60 | : { safe: true }
61 | }),
62 | // generate dist index.html with correct asset hash for caching.
63 | // you can customize output by editing /index.html
64 | // see https://github.com/ampedandwired/html-webpack-plugin
65 | new HtmlWebpackPlugin({
66 | filename: process.env.NODE_ENV === 'testing'
67 | ? 'index.html'
68 | : config.build.index,
69 | template: 'index.html',
70 | inject: true,
71 | minify: {
72 | removeComments: true,
73 | collapseWhitespace: true,
74 | removeAttributeQuotes: true
75 | // more options:
76 | // https://github.com/kangax/html-minifier#options-quick-reference
77 | },
78 | // necessary to consistently work with multiple chunks via CommonsChunkPlugin
79 | chunksSortMode: 'dependency'
80 | }),
81 | // keep module.id stable when vendor modules does not change
82 | new webpack.HashedModuleIdsPlugin(),
83 | // enable scope hoisting
84 | new webpack.optimize.ModuleConcatenationPlugin(),
85 | // split vendor js into its own file
86 | new webpack.optimize.CommonsChunkPlugin({
87 | name: 'vendor',
88 | minChunks (module) {
89 | // any required modules inside node_modules are extracted to vendor
90 | return (
91 | module.resource &&
92 | /\.js$/.test(module.resource) &&
93 | module.resource.indexOf(
94 | path.join(__dirname, '../node_modules')
95 | ) === 0
96 | )
97 | }
98 | }),
99 | // extract webpack runtime and module manifest to its own file in order to
100 | // prevent vendor hash from being updated whenever app bundle is updated
101 | new webpack.optimize.CommonsChunkPlugin({
102 | name: 'manifest',
103 | minChunks: Infinity
104 | }),
105 | // This instance extracts shared chunks from code splitted chunks and bundles them
106 | // in a separate chunk, similar to the vendor chunk
107 | // see: https://webpack.js.org/plugins/commons-chunk-plugin/#extra-async-commons-chunk
108 | new webpack.optimize.CommonsChunkPlugin({
109 | name: 'app',
110 | async: 'vendor-async',
111 | children: true,
112 | minChunks: 3
113 | }),
114 |
115 | // copy custom static assets
116 | new CopyWebpackPlugin([
117 | {
118 | from: path.resolve(__dirname, '../static'),
119 | to: config.build.assetsSubDirectory,
120 | ignore: ['.*']
121 | }
122 | ])
123 | ]
124 | })
125 |
126 | if (config.build.productionGzip) {
127 | const CompressionWebpackPlugin = require('compression-webpack-plugin')
128 |
129 | webpackConfig.plugins.push(
130 | new CompressionWebpackPlugin({
131 | asset: '[path].gz[query]',
132 | algorithm: 'gzip',
133 | test: new RegExp(
134 | '\\.(' +
135 | config.build.productionGzipExtensions.join('|') +
136 | ')$'
137 | ),
138 | threshold: 10240,
139 | minRatio: 0.8
140 | })
141 | )
142 | }
143 |
144 | if (config.build.bundleAnalyzerReport) {
145 | const BundleAnalyzerPlugin = require('webpack-bundle-analyzer').BundleAnalyzerPlugin
146 | webpackConfig.plugins.push(new BundleAnalyzerPlugin())
147 | }
148 |
149 | module.exports = webpackConfig
150 |
--------------------------------------------------------------------------------
/frontend/config/dev.env.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const merge = require('webpack-merge')
3 | const prodEnv = require('./prod.env')
4 |
5 | module.exports = merge(prodEnv, {
6 | NODE_ENV: '"development"'
7 | })
8 |
--------------------------------------------------------------------------------
/frontend/config/index.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | // Template version: 1.3.1
3 | // see http://vuejs-templates.github.io/webpack for documentation.
4 |
5 | const path = require('path')
6 |
7 | module.exports = {
8 | dev: {
9 |
10 | // Paths
11 | assetsSubDirectory: 'static',
12 | assetsPublicPath: '/',
13 | proxyTable: {},
14 |
15 | // Various Dev Server settings
16 | host: 'localhost', // can be overwritten by process.env.HOST
17 | port: 8080, // can be overwritten by process.env.PORT, if port is in use, a free one will be determined
18 | autoOpenBrowser: false,
19 | errorOverlay: true,
20 | notifyOnErrors: true,
21 | poll: false, // https://webpack.js.org/configuration/dev-server/#devserver-watchoptions-
22 |
23 | // Use Eslint Loader?
24 | // If true, your code will be linted during bundling and
25 | // linting errors and warnings will be shown in the console.
26 | // useEslint: true,
27 | useEslint: false,
28 | // If true, eslint errors and warnings will also be shown in the error overlay
29 | // in the browser.
30 | showEslintErrorsInOverlay: false,
31 |
32 | /**
33 | * Source Maps
34 | */
35 |
36 | // https://webpack.js.org/configuration/devtool/#development
37 | devtool: 'cheap-module-eval-source-map',
38 |
39 | // If you have problems debugging vue-files in devtools,
40 | // set this to false - it *may* help
41 | // https://vue-loader.vuejs.org/en/options.html#cachebusting
42 | cacheBusting: true,
43 |
44 | cssSourceMap: true
45 | },
46 |
47 | build: {
48 | // Template for index.html
49 | index: path.resolve(__dirname, '../dist/index.html'),
50 |
51 | // Paths
52 | assetsRoot: path.resolve(__dirname, '../dist'),
53 | assetsSubDirectory: 'static',
54 | assetsPublicPath: '/',
55 |
56 | /**
57 | * Source Maps
58 | */
59 |
60 | productionSourceMap: true,
61 | // https://webpack.js.org/configuration/devtool/#production
62 | devtool: '#source-map',
63 |
64 | // Gzip off by default as many popular static hosts such as
65 | // Surge or Netlify already gzip all static assets for you.
66 | // Before setting to `true`, make sure to:
67 | // npm install --save-dev compression-webpack-plugin
68 | productionGzip: false,
69 | productionGzipExtensions: ['js', 'css'],
70 |
71 | // Run the build command with an extra argument to
72 | // View the bundle analyzer report after build finishes:
73 | // `npm run build --report`
74 | // Set to `true` or `false` to always turn it on or off
75 | bundleAnalyzerReport: process.env.npm_config_report
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/frontend/config/prod.env.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | module.exports = {
3 | NODE_ENV: '"production"'
4 | }
5 |
--------------------------------------------------------------------------------
/frontend/config/test.env.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | const merge = require('webpack-merge')
3 | const devEnv = require('./dev.env')
4 |
5 | module.exports = merge(devEnv, {
6 | NODE_ENV: '"testing"'
7 | })
8 |
--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | frontend
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "frontend",
3 | "version": "1.0.0",
4 | "description": "cnki spider",
5 | "author": "coder-syl",
6 | "private": true,
7 | "scripts": {
8 | "dev": "webpack-dev-server --inline --progress --config build/webpack.dev.conf.js",
9 | "start": "npm run dev",
10 | "unit": "jest --config test/unit/jest.conf.js --coverage",
11 | "e2e": "node test/e2e/runner.js",
12 | "test": "npm run unit && npm run e2e",
13 | "lint": "eslint --ext .js,.vue src test/unit test/e2e/specs",
14 | "build": "node build/build.js"
15 | },
16 | "dependencies": {
17 | "element-ui": "^2.13.0",
18 | "vue": "^2.5.2",
19 | "vue-router": "^3.0.1"
20 | },
21 | "devDependencies": {
22 | "autoprefixer": "^7.1.2",
23 | "babel-core": "^6.22.1",
24 | "babel-eslint": "^8.2.1",
25 | "babel-helper-vue-jsx-merge-props": "^2.0.3",
26 | "babel-jest": "^21.0.2",
27 | "babel-loader": "^7.1.1",
28 | "babel-plugin-dynamic-import-node": "^1.2.0",
29 | "babel-plugin-syntax-jsx": "^6.18.0",
30 | "babel-plugin-transform-es2015-modules-commonjs": "^6.26.0",
31 | "babel-plugin-transform-runtime": "^6.22.0",
32 | "babel-plugin-transform-vue-jsx": "^3.5.0",
33 | "babel-preset-env": "^1.3.2",
34 | "babel-preset-stage-2": "^6.22.0",
35 | "babel-register": "^6.22.0",
36 | "chalk": "^2.0.1",
37 | "chromedriver": "^2.27.2",
38 | "copy-webpack-plugin": "^4.0.1",
39 | "cross-spawn": "^5.0.1",
40 | "css-loader": "^0.28.0",
41 | "eslint": "^4.15.0",
42 | "eslint-config-standard": "^10.2.1",
43 | "eslint-friendly-formatter": "^3.0.0",
44 | "eslint-loader": "^1.7.1",
45 | "eslint-plugin-import": "^2.7.0",
46 | "eslint-plugin-node": "^5.2.0",
47 | "eslint-plugin-promise": "^3.4.0",
48 | "eslint-plugin-standard": "^3.0.1",
49 | "eslint-plugin-vue": "^4.0.0",
50 | "extract-text-webpack-plugin": "^3.0.0",
51 | "file-loader": "^1.1.4",
52 | "friendly-errors-webpack-plugin": "^1.6.1",
53 | "html-webpack-plugin": "^2.30.1",
54 | "jest": "^22.0.4",
55 | "jest-serializer-vue": "^0.3.0",
56 | "nightwatch": "^0.9.12",
57 | "node-notifier": "^5.1.2",
58 | "optimize-css-assets-webpack-plugin": "^3.2.0",
59 | "ora": "^1.2.0",
60 | "portfinder": "^1.0.13",
61 | "postcss-import": "^11.0.0",
62 | "postcss-loader": "^2.0.8",
63 | "postcss-url": "^7.2.1",
64 | "rimraf": "^2.6.0",
65 | "selenium-server": "^3.0.1",
66 | "semver": "^5.3.0",
67 | "shelljs": "^0.7.6",
68 | "uglifyjs-webpack-plugin": "^1.1.1",
69 | "url-loader": "^0.5.8",
70 | "vue-jest": "^1.0.2",
71 | "vue-loader": "^13.3.0",
72 | "vue-style-loader": "^3.0.1",
73 | "vue-template-compiler": "^2.5.2",
74 | "webpack": "^3.6.0",
75 | "webpack-bundle-analyzer": "^3.5.2",
76 | "webpack-dev-server": "^2.9.1",
77 | "webpack-merge": "^4.1.0"
78 | },
79 | "engines": {
80 | "node": ">= 6.0.0",
81 | "npm": ">= 3.0.0"
82 | },
83 | "browserslist": [
84 | "> 1%",
85 | "last 2 versions",
86 | "not ie <= 8"
87 | ]
88 | }
89 |
--------------------------------------------------------------------------------
/frontend/src/App.vue:
--------------------------------------------------------------------------------
1 |
2 |
13 |
14 |
25 |
75 |
--------------------------------------------------------------------------------
/frontend/src/assets/img/analyse.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/analyse.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/btos.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/btos.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/chart.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/check.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/check.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/cnki.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/cnki.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/footer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/footer.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/hot/icon-item001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item001.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/hot/icon-item002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item002.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/hot/icon-item003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item003.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/hot/icon-item004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item004.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/hot/icon-item005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item005.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/hot/icon-item006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hot/icon-item006.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/hotAnalyBg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/hotAnalyBg.jpg
--------------------------------------------------------------------------------
/frontend/src/assets/img/icon1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/icon1.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/icon2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/icon2.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/icon3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/icon3.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/logo.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/menu.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/online/left.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/online/left.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/online/right.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/online/right.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/sliderBg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/sliderBg.png
--------------------------------------------------------------------------------
/frontend/src/assets/img/under.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/src/assets/img/under.png
--------------------------------------------------------------------------------
/frontend/src/components/cnki-footer.vue:
--------------------------------------------------------------------------------
1 |
2 |
50 |
51 |
52 |
62 |
63 |
64 |
183 |
--------------------------------------------------------------------------------
/frontend/src/components/cnki-header.vue:
--------------------------------------------------------------------------------
1 |
2 |
27 |
28 |
29 |
70 |
71 |
72 |
188 |
--------------------------------------------------------------------------------
/frontend/src/components/index/index-data.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
{{item.title}}
7 |
8 |
9 |
{{ item.dec}}
10 |
11 |
12 |
13 |
14 |
15 | >
42 |
82 |
--------------------------------------------------------------------------------
/frontend/src/components/index/index-feature.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
知网大数据
7 |
8 |
9 |
通过智能的在线爬虫,实时抓取特定的论文信息,实时针对数据在线分析,快速了解某个领域。同时系统支持在线查找,可查找具体的作者, 学校,论文。实现到点的分析。
10 |
11 |
12 |
13 |

14 |
15 |
16 |
17 |
18 |

19 |
20 |
21 |
22 |
可视化图表
23 |
24 |
25 |
精准的可视化数据图表,让数据一览无余,找出数据背后的价值.提供多样式的表格数据展示,满足多种数据阅读需要, 支持灵活地筛选需要观察的数据
26 |
27 |
28 |
29 |
30 |
31 |
32 |
性能稳定的分析平台
33 |
34 |
35 |
将数据分析和爬虫相结合,使用户可以从复杂的代码编写中解放出来,快速的抓取和分析知网数据
36 |
37 |
38 |
39 |

40 |
41 |
42 |
43 |
44 | >
49 |
104 |
--------------------------------------------------------------------------------
/frontend/src/components/index/index-service.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
![]()
7 |
8 |
9 |
{{item.title}}
10 |
11 |
14 |
15 |
16 |
17 |
18 | >
45 |
77 |
--------------------------------------------------------------------------------
/frontend/src/components/index/index-slider.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
知网大数据
7 |
8 |
9 |
本系统采用Django+Celery实现在线知网爬虫,并针对爬取的数据进行实时的数据分析
10 |
11 |
12 |
13 |
14 |
15 |
16 |

17 |
18 |
19 |
20 |
21 | >
26 |
101 |
--------------------------------------------------------------------------------
/frontend/src/layouts/chartAnalyse.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
{{ msg }}
4 | Essential Links
5 |
6 |
7 |
8 |
18 |
19 |
20 |
37 |
--------------------------------------------------------------------------------
/frontend/src/layouts/hotAnalyse.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
热门查询
5 |
6 |
7 |
{{index+1}}
8 |
9 |
10 |
![]()
11 |
12 |
{{item.num}}
13 |
{{item.title}}
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
60 |
61 |
124 |
--------------------------------------------------------------------------------
/frontend/src/layouts/index.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
30 |
31 |
32 |
34 |
--------------------------------------------------------------------------------
/frontend/src/layouts/onlineSpider.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | 启动爬虫
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
输入关键词
18 |
在输入框内输入你要抓取的的相关的关键字,例如大数据,然后回车或者点击开启爬虫按钮。
19 |
20 |
21 |
22 |
23 |
等待数据爬取
24 |
提示爬虫已经启动,然后等待10秒钟,等待系统连接到知网,此时可以实现查看爬取的进度和爬取的相关论文。
25 |
26 |
27 |
28 |
29 |
30 |
31 |
输入关键词
32 |
在输入框内输入你要抓取的的相关的关键字,例如大数据,然后回车或者点击开启爬虫按钮。
33 |
34 |
35 |
36 |
37 |
等待数据爬取
38 |
提示爬虫已经启动,然后等待10秒钟,等待系统连接到知网,此时可以实现查看爬取的进度和爬取的相关论文。
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | 移除
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
147 |
148 |
149 |
288 |
--------------------------------------------------------------------------------
/frontend/src/layouts/paperDetail.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
{{ msg }}
4 | Essential Links
5 |
6 |
7 |
8 |
18 |
19 |
20 |
37 |
--------------------------------------------------------------------------------
/frontend/src/main.js:
--------------------------------------------------------------------------------
1 | // The Vue build version to load with the `import` command
2 | // (runtime-only or standalone) has been set in webpack.base.conf with an alias.
3 | import Vue from 'vue'
4 | import App from './App'
5 | import router from './router'
6 | import ElementUI from 'element-ui'
7 | import 'element-ui/lib/theme-chalk/index.css'
8 |
9 | Vue.use(ElementUI)
10 | Vue.config.productionTip = false
11 |
12 | /* eslint-disable no-new */
13 | new Vue({
14 | el: '#app',
15 | router,
16 | components: { App },
17 | template: ''
18 | })
19 |
--------------------------------------------------------------------------------
/frontend/src/router/index.js:
--------------------------------------------------------------------------------
1 | import Vue from 'vue'
2 | import Router from 'vue-router'
3 | import index from '@/layouts/index'
4 | import hotAnalyse from '@/layouts/hotAnalyse'
5 | import onlineSpider from '@/layouts/onlineSpider'
6 | import chartAnalyse from '@/layouts/chartAnalyse'
7 | import paperDetail from '@/layouts/paperDetail'
8 |
9 | Vue.use(Router)
10 |
11 | export default new Router({
12 | routes: [
13 | {
14 | path: '/',
15 | name: 'index',
16 | component: index
17 | },
18 | {
19 | path: '/hotAnalyse',
20 | name: 'hotAnalyse',
21 | component: hotAnalyse
22 | },
23 | {
24 | path: '/onlineSpider',
25 | name: 'onlineSpider',
26 | component: onlineSpider
27 | },
28 | {
29 | path: '/chartAnalyse',
30 | name: 'chartAnalyse',
31 | component: chartAnalyse
32 | },
33 | {
34 | path: '/paperDetail',
35 | name: 'paperDetail',
36 | component: paperDetail
37 | }
38 | ]
39 | })
40 |
--------------------------------------------------------------------------------
/frontend/static/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/frontend/static/.gitkeep
--------------------------------------------------------------------------------
/frontend/test/e2e/custom-assertions/elementCount.js:
--------------------------------------------------------------------------------
1 | // A custom Nightwatch assertion.
2 | // The assertion name is the filename.
3 | // Example usage:
4 | //
5 | // browser.assert.elementCount(selector, count)
6 | //
7 | // For more information on custom assertions see:
8 | // http://nightwatchjs.org/guide#writing-custom-assertions
9 |
10 | exports.assertion = function (selector, count) {
11 | this.message = 'Testing if element <' + selector + '> has count: ' + count
12 | this.expected = count
13 | this.pass = function (val) {
14 | return val === this.expected
15 | }
16 | this.value = function (res) {
17 | return res.value
18 | }
19 | this.command = function (cb) {
20 | var self = this
21 | return this.api.execute(function (selector) {
22 | return document.querySelectorAll(selector).length
23 | }, [selector], function (res) {
24 | cb.call(self, res)
25 | })
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/frontend/test/e2e/nightwatch.conf.js:
--------------------------------------------------------------------------------
1 | require('babel-register')
2 | var config = require('../../config')
3 |
4 | // http://nightwatchjs.org/gettingstarted#settings-file
5 | module.exports = {
6 | src_folders: ['test/e2e/specs'],
7 | output_folder: 'test/e2e/reports',
8 | custom_assertions_path: ['test/e2e/custom-assertions'],
9 |
10 | selenium: {
11 | start_process: true,
12 | server_path: require('selenium-server').path,
13 | host: '127.0.0.1',
14 | port: 4444,
15 | cli_args: {
16 | 'webdriver.chrome.driver': require('chromedriver').path
17 | }
18 | },
19 |
20 | test_settings: {
21 | default: {
22 | selenium_port: 4444,
23 | selenium_host: 'localhost',
24 | silent: true,
25 | globals: {
26 | devServerURL: 'http://localhost:' + (process.env.PORT || config.dev.port)
27 | }
28 | },
29 |
30 | chrome: {
31 | desiredCapabilities: {
32 | browserName: 'chrome',
33 | javascriptEnabled: true,
34 | acceptSslCerts: true
35 | }
36 | },
37 |
38 | firefox: {
39 | desiredCapabilities: {
40 | browserName: 'firefox',
41 | javascriptEnabled: true,
42 | acceptSslCerts: true
43 | }
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/frontend/test/e2e/runner.js:
--------------------------------------------------------------------------------
1 | // 1. start the dev server using production config
2 | process.env.NODE_ENV = 'testing'
3 |
4 | const webpack = require('webpack')
5 | const DevServer = require('webpack-dev-server')
6 |
7 | const webpackConfig = require('../../build/webpack.prod.conf')
8 | const devConfigPromise = require('../../build/webpack.dev.conf')
9 |
10 | let server
11 |
12 | devConfigPromise.then(devConfig => {
13 | const devServerOptions = devConfig.devServer
14 | const compiler = webpack(webpackConfig)
15 | server = new DevServer(compiler, devServerOptions)
16 | const port = devServerOptions.port
17 | const host = devServerOptions.host
18 | return server.listen(port, host)
19 | })
20 | .then(() => {
21 | // 2. run the nightwatch test suite against it
22 | // to run in additional browsers:
23 | // 1. add an entry in test/e2e/nightwatch.conf.js under "test_settings"
24 | // 2. add it to the --env flag below
25 | // or override the environment flag, for example: `npm run e2e -- --env chrome,firefox`
26 | // For more information on Nightwatch's config file, see
27 | // http://nightwatchjs.org/guide#settings-file
28 | let opts = process.argv.slice(2)
29 | if (opts.indexOf('--config') === -1) {
30 | opts = opts.concat(['--config', 'test/e2e/nightwatch.conf.js'])
31 | }
32 | if (opts.indexOf('--env') === -1) {
33 | opts = opts.concat(['--env', 'chrome'])
34 | }
35 |
36 | const spawn = require('cross-spawn')
37 | const runner = spawn('./node_modules/.bin/nightwatch', opts, { stdio: 'inherit' })
38 |
39 | runner.on('exit', function (code) {
40 | server.close()
41 | process.exit(code)
42 | })
43 |
44 | runner.on('error', function (err) {
45 | server.close()
46 | throw err
47 | })
48 | })
49 |
--------------------------------------------------------------------------------
/frontend/test/e2e/specs/test.js:
--------------------------------------------------------------------------------
1 | // For authoring Nightwatch tests, see
2 | // http://nightwatchjs.org/guide#usage
3 |
4 | module.exports = {
5 | 'default e2e tests': function (browser) {
6 | // automatically uses dev Server port from /config.index.js
7 | // default: http://localhost:8080
8 | // see nightwatch.conf.js
9 | const devServer = browser.globals.devServerURL
10 |
11 | browser
12 | .url(devServer)
13 | .waitForElementVisible('#app', 5000)
14 | .assert.elementPresent('.hello')
15 | .assert.containsText('h1', 'Welcome to Your Vue.js App')
16 | .assert.elementCount('img', 1)
17 | .end()
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/frontend/test/unit/.eslintrc:
--------------------------------------------------------------------------------
1 | {
2 | "env": {
3 | "jest": true
4 | },
5 | "globals": {
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/frontend/test/unit/jest.conf.js:
--------------------------------------------------------------------------------
1 | const path = require('path')
2 |
3 | module.exports = {
4 | rootDir: path.resolve(__dirname, '../../'),
5 | moduleFileExtensions: [
6 | 'js',
7 | 'json',
8 | 'vue'
9 | ],
10 | moduleNameMapper: {
11 | '^@/(.*)$': '/src/$1'
12 | },
13 | transform: {
14 | '^.+\\.js$': '/node_modules/babel-jest',
15 | '.*\\.(vue)$': '/node_modules/vue-jest'
16 | },
17 | testPathIgnorePatterns: [
18 | '/test/e2e'
19 | ],
20 | snapshotSerializers: ['/node_modules/jest-serializer-vue'],
21 | setupFiles: ['/test/unit/setup'],
22 | mapCoverage: true,
23 | coverageDirectory: '/test/unit/coverage',
24 | collectCoverageFrom: [
25 | 'src/**/*.{js,vue}',
26 | '!src/main.js',
27 | '!src/router/index.js',
28 | '!**/node_modules/**'
29 | ]
30 | }
31 |
--------------------------------------------------------------------------------
/frontend/test/unit/setup.js:
--------------------------------------------------------------------------------
1 | import Vue from 'vue'
2 |
3 | Vue.config.productionTip = false
4 |
--------------------------------------------------------------------------------
/frontend/test/unit/specs/HelloWorld.spec.js:
--------------------------------------------------------------------------------
1 | import Vue from 'vue'
2 | import HelloWorld from '@/components/HelloWorld'
3 |
4 | describe('HelloWorld.vue', () => {
5 | it('should render correct contents', () => {
6 | const Constructor = Vue.extend(HelloWorld)
7 | const vm = new Constructor().$mount()
8 | expect(vm.$el.querySelector('.hello h1').textContent)
9 | .toEqual('Welcome to Your Vue.js App')
10 | })
11 | })
12 |
--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 |
5 | if __name__ == "__main__":
6 | os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cnki.settings")
7 | try:
8 | from django.core.management import execute_from_command_line
9 | except ImportError as exc:
10 | raise ImportError(
11 | "Couldn't import Django. Are you sure it's installed and "
12 | "available on your PYTHONPATH environment variable? Did you "
13 | "forget to activate a virtual environment?"
14 | ) from exc
15 | execute_from_command_line(sys.argv)
16 |
--------------------------------------------------------------------------------
/spider/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__init__.py
--------------------------------------------------------------------------------
/spider/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/config.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/db_handle.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/db_handle.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/get_authors.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_authors.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/get_cookies.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_cookies.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/get_fund.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_fund.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/get_keyWordID.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_keyWordID.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/get_school.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_school.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/get_year.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/get_year.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/mian.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/mian.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/paper_detail.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/paper_detail.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/paper_spider.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/paper_spider.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/__pycache__/paper_spider_by_app.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coder-syl/cnki-spider-and-analyse/85118a20028403b6f6e21d1cd0f933bf96f0d4dd/spider/__pycache__/paper_spider_by_app.cpython-36.pyc
--------------------------------------------------------------------------------
/spider/config.py:
--------------------------------------------------------------------------------
1 | db_config={
2 | 'host':'127.0.0.1',
3 | # 'host': '/Applications/MAMP/tmp/mysql/mysql.sock', # 主机
4 | 'port':3306,
5 | 'user':'root',
6 | 'password':'root',
7 | 'database':'cnki',
8 | # 'charset':'UTF8',
9 | }
10 | # paper_title=str('123')
11 | # url="http://localhost:8000/cnki/paperDetail?title="
12 | # local_url = url+paper_title
13 | # print(local_url)
--------------------------------------------------------------------------------
/spider/db_handle.py:
--------------------------------------------------------------------------------
1 | # coding:utf-8
2 |
3 | import pymysql
4 | from .config import db_config as df
5 | # from config import db_config as df
6 |
7 | class dbHandle():
8 |
9 | def __init__(self,):
10 | try:
11 | self.conn = pymysql.connect(host=df['host'], user=df['user'], password=df['password'],database=df['database'],unix_socket="/Applications/MAMP/tmp/mysql/mysql.sock",charset='utf8')
12 | except:
13 | print("连接数据库失败")
14 | self.cur = self.conn.cursor()
15 |
16 | def dbClose(self):
17 | if self.conn and self.cur:
18 | self.cur.close()
19 | self.conn.close()
20 |
21 | def dbQuery(self,sql):
22 | self.cur.execute(sql)
23 | data = self.cur.fetchall()
24 | return data
25 | def dbInsert(self, sql):
26 | print('开始插入');
27 | try:
28 | self.cur.execute(sql)
29 | print("插入成功!!!")
30 | self.conn.commit()
31 |
32 | except Exception as e:
33 | print(e)
34 | print('插入失败!!!')
35 |
36 | def dbUpdate(self, sql):
37 | try:
38 | self.cur.execute(sql)
39 | print("更新状态成功!!!")
40 | self.conn.commit()
41 |
42 | except Exception as e:
43 | print(e)
44 | print('更新状态失败!!!')
--------------------------------------------------------------------------------
/spider/get_authors.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 |
4 |
5 | from .db_handle import dbHandle
6 | # from db_handle import dbHandle
7 | def getAuthors(driver, keywordID):
8 | print('点击作者链接')
9 | print(driver.find_element_by_link_text('作者').text)
10 | driver.find_element_by_link_text('作者').click()
11 | time.sleep(5)
12 | li_div = driver.find_element_by_class_name('hide')
13 | ul = li_div.find_element_by_tag_name('ul')
14 | lis = ul.find_elements_by_tag_name('li')
15 | print(lis)
16 | for li in lis:
17 | print('作者', str(li.text).replace('\n', ''))
18 | author = str(li.text).split('(')[0].replace('\n', '')
19 | number = str(li.text).split('(')[1].replace('\n', '').replace(')', '')
20 | print(author)
21 | print(number)
22 | dbhandle = dbHandle()
23 | # 插入年的信息
24 | in_author_sql = "INSERT INTO analyse_author ( author ) values('%s')" % (author)
25 | dbhandle.dbInsert(in_author_sql)
26 |
27 | query_authorID_sql = "select id from analyse_author where author='%s' " % (author)
28 | print(query_authorID_sql)
29 | author_id = dbhandle.dbQuery(query_authorID_sql)[0][0]
30 | print(author, 'author_id', author_id)
31 |
32 | in_author_to_keyword = "INSERT INTO analyse_authortokeyword(author_id_id,keyword_id_id,counts)" \
33 | "values('%d','%d','%d')" \
34 | % (author_id, keywordID, int(number))
35 | dbhandle.dbInsert(in_author_to_keyword)
36 | time.sleep(5)
37 |
--------------------------------------------------------------------------------
/spider/get_cookies.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 | from .get_year import getYear
4 | from .get_authors import getAuthors
5 | from .get_school import getSchools
6 | from .get_fund import getFunds
7 | from .get_keyWordID import getKeywordID,insertKeywordID
8 | #
9 | # from get_year import getYear
10 | # from get_authors import getAuthors
11 | # from get_school import getSchools
12 | # from get_fund import getFunds
13 | # from get_keyWordID import getKeywordID,insertKeywordID
14 |
15 |
16 | def getCookies(keyWord):
17 | driver = webdriver.Chrome('/Applications/chromedriver')
18 | print('正在打开知网')
19 | driver.get('http://www.cnki.net/')
20 | # print(driver.title)
21 | print('正在获取cookies')
22 | search_text = driver.find_element_by_id('txt_SearchText')
23 | search_text.send_keys(keyWord)
24 | driver.find_element_by_class_name('input-box').find_element_by_class_name('search-btn').click()
25 | driver.refresh()
26 | time.sleep(5)
27 | insertKeywordID(keyWord)
28 | keyWord_id = getKeywordID(keyWord)
29 | getYear(driver, keyWord_id)
30 |
31 | # 因为只有上一个点击了下一个标签才会被加载
32 | print('点击研究层次')
33 | print(driver.find_element_by_link_text('研究层次').text)
34 | driver.find_element_by_link_text('研究层次').click()
35 | time.sleep(5)
36 |
37 | getAuthors(driver, keyWord_id)
38 | getSchools(driver, keyWord_id)
39 | getFunds(driver, keyWord_id)
40 |
41 | cookies = {}
42 | for cookie in driver.get_cookies():
43 | cookies[cookie['name']] = cookie['value']
44 | driver.switch_to.frame('iframeResult')
45 | total_num = int(
46 | str(driver.find_element_by_class_name('pagerTitleCell').text).replace(' 找到 ', '').replace(' 条结果', '').replace(
47 | ',', ''))
48 | # driver.find_element_by_link_text('50').click()
49 | print(cookies)
50 | time.sleep(5)
51 | return cookies, total_num
52 |
53 |
54 | # getCookies('大数据')
55 |
--------------------------------------------------------------------------------
/spider/get_fund.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 |
4 | from .db_handle import dbHandle
5 | # from db_handle import dbHandle
6 |
7 | def getFunds(driver, keywordID):
8 | print('点击基金链接')
9 | print(driver.find_element_by_link_text('基金').text)
10 | driver.find_element_by_link_text('基金').click()
11 | time.sleep(5)
12 | li_div = driver.find_element_by_class_name('hide')
13 | ul = li_div.find_element_by_tag_name('ul')
14 | lis = ul.find_elements_by_tag_name('li')
15 | print(lis)
16 | for li in lis:
17 | print('基金', str(li.text).replace('\n', ''))
18 | fund = str(li.text).split('(')[0].replace('\n', '')
19 | if(fund=='国家高技术研究发展计划'):
20 | fund=fund+'(863计划)'
21 | if(fund=='国家重点基础研究发展计划'):
22 | fund = fund + '(973计划)'
23 | if(fund=='江苏省教育厅人文社会科学研究基...'):
24 | fund='江苏省教育厅人文社会科学研究基金'
25 | number = str(li.text).replace('(97...', '').replace('(863...', '')
26 | number = number.split('(')[1].replace('\n', '').replace(')', '')
27 |
28 | print(fund)
29 | print(number)
30 |
31 | dbhandle = dbHandle()
32 | # 插入年的信息
33 | in_fund_sql = "INSERT INTO analyse_fund ( fund ) values('%s')" % (fund)
34 | dbhandle.dbInsert(in_fund_sql)
35 |
36 | query_fundID_sql = "select id from analyse_fund where fund='%s' " % (fund)
37 | print(query_fundID_sql)
38 | fund_id = dbhandle.dbQuery(query_fundID_sql)[0][0]
39 | print(fund, 'fund_id', fund_id)
40 |
41 | in_fund_to_keyword = "INSERT INTO analyse_fundtokeyword(fund_id_id,keyword_id_id,counts)" \
42 | "values('%d','%d','%d')" \
43 | % (fund_id, keywordID, int(number))
44 | dbhandle.dbInsert(in_fund_to_keyword)
45 | time.sleep(5)
46 |
--------------------------------------------------------------------------------
/spider/get_keyWordID.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 |
4 | from .db_handle import dbHandle
5 | # from db_handle import dbHandle
6 |
7 | def insertKeywordID(keyword):
8 | dbhandle = dbHandle()
9 | # 插入关键词的信息
10 | in_keyword_sql = "INSERT INTO analyse_keyword ( keyword ) values('%s')" % (keyword)
11 | dbhandle.dbInsert(in_keyword_sql)
12 |
13 | query_KeywordCount_sql = "select counts from analyse_keyword where keyword='%s' " % (keyword)
14 | print(query_KeywordCount_sql)
15 | KeywordCount = dbhandle.dbQuery(query_KeywordCount_sql)[0][0]
16 |
17 | in_keyword_sql = "UPDATE analyse_keyword SET counts='%d' where keyword='%s'" % (int(KeywordCount),keyword)
18 | dbhandle.dbInsert(in_keyword_sql)
19 |
20 | def getKeywordID(keyword):
21 | dbhandle = dbHandle()
22 | query_KeywordID_sql = "select id from analyse_keyword where keyword='%s' " % (keyword)
23 | print(query_KeywordID_sql)
24 | KeywordID = dbhandle.dbQuery(query_KeywordID_sql)[0][0]
25 | print(KeywordID, 'KeywordID', KeywordID)
26 | return KeywordID
27 |
--------------------------------------------------------------------------------
/spider/get_school.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 |
4 | from .db_handle import dbHandle
5 | # from db_handle import dbHandle
6 | def getSchools(driver, keywordID):
7 | print('点击机构链接')
8 | print(driver.find_element_by_link_text('机构').text)
9 | driver.find_element_by_link_text('机构').click()
10 | time.sleep(5)
11 | li_div = driver.find_element_by_class_name('hide')
12 | ul = li_div.find_element_by_tag_name('ul')
13 | lis = ul.find_elements_by_tag_name('li')
14 | print(lis)
15 | for li in lis:
16 | print('机构', str(li.text).replace('\n', ''))
17 | school = str(li.text).split('(')[0].replace('\n', '')
18 | number = str(li.text).split('(')[1].replace('\n', '').replace(')', '')
19 | print(school)
20 | print(number)
21 | dbhandle = dbHandle()
22 | # 插入年的信息
23 | in_school_sql = "INSERT INTO analyse_school ( school ) values('%s')" % (school)
24 | dbhandle.dbInsert(in_school_sql)
25 |
26 | query_schoolID_sql = "select id from analyse_school where school='%s' " % (school)
27 | print(query_schoolID_sql)
28 | school_id = dbhandle.dbQuery(query_schoolID_sql)[0][0]
29 | print(school, 'school_id', school_id)
30 |
31 | in_school_to_keyword = "INSERT INTO analyse_schooltokeyword(school_id_id,keyword_id_id,counts)" \
32 | "values('%d','%d','%d')" \
33 | % (school_id, keywordID, int(number))
34 | dbhandle.dbInsert(in_school_to_keyword)
35 | time.sleep(5)
--------------------------------------------------------------------------------
/spider/get_year.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | import time
3 |
4 | from .db_handle import dbHandle
5 |
6 |
7 | # from db_handle import dbHandle
8 |
9 |
10 | def getYear(driver, keywordID):
11 | print('点击发表年度链接')
12 | print(driver.find_element_by_link_text('发表年度').text)
13 | driver.find_element_by_link_text('发表年度').click()
14 | time.sleep(5)
15 | li_div = driver.find_element_by_class_name('hide')
16 | ul = li_div.find_element_by_tag_name('ul')
17 | lis = ul.find_elements_by_tag_name('li')
18 | print(lis)
19 | for li in lis:
20 | print('nian', str(li.text).replace('\n', ''))
21 | year = str(li.text).replace('\n', '')[0:4]
22 | number = str(li.text).replace('\n', '')[5:].replace(')', '')
23 | print(year)
24 | print(number)
25 |
26 | dbhandle = dbHandle()
27 | # 插入年的信息
28 | in_year_sql = "INSERT INTO analyse_year ( year ) values('%s')" % (year)
29 | dbhandle.dbInsert(in_year_sql)
30 |
31 | query_yearID_sql = "select id from analyse_year where year='%s' " % (year)
32 | print(query_yearID_sql)
33 | year_id = dbhandle.dbQuery(query_yearID_sql)[0][0]
34 | print(year, 'year_id', year_id)
35 | #queryCount_sql = "select count(*) from analyse_yeartokeyword where keyword_id_id={}".format(keywordID)
36 | #print(queryCount_sql)
37 | # haveCount =0 #dbhandle.dbQuery(queryCount_sql)[0][0]
38 | # if (haveCount == 0):
39 | # break;
40 | in_year_to_keyword = "INSERT INTO analyse_yeartokeyword(year_id_id,keyword_id_id,counts)" \
41 | "values('%d','%d','%d')" \
42 | % (year_id, keywordID, int(number))
43 | # else:
44 | # print('数据库中已经存在该数据')
45 | dbhandle.dbInsert(in_year_to_keyword)
46 | time.sleep(5)
47 | # return 1;
48 |
--------------------------------------------------------------------------------
/spider/mian.py:
--------------------------------------------------------------------------------
1 | from selenium import webdriver
2 | from selenium.webdriver.support.ui import WebDriverWait
3 | from selenium.webdriver.support import expected_conditions as EC
4 | from selenium.webdriver.common.by import By
5 | import time
6 | import json
7 | import csv
8 | from celery import task
9 |
10 |
11 |
12 |
13 | @task
14 | def start_spider(page):
15 | print("爬虫启动")
16 | # 设置谷歌驱动器的环境
17 | options = webdriver.ChromeOptions()
18 | # 设置chrome不加载图片,提高速度
19 | options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
20 | # 创建一个谷歌驱动器
21 | browser = webdriver.Chrome('/Applications/chromedriver', chrome_options=options)
22 | url = 'http://wap.cnki.net/touch/web/guide'
23 |
24 | # 声明一个全局列表,用来存储字典
25 | data_list = []
26 | # 请求url
27 | browser.get(url)
28 | # 显示等待输入框是否加载完成
29 | WebDriverWait(browser, 1000).until(
30 | EC.presence_of_all_elements_located(
31 | (By.ID, 'keyword')
32 | )
33 | )
34 | # 找到输入框的id,并输入python关键字
35 | browser.find_element_by_id('keyword').click()
36 | browser.find_element_by_id('keyword_ordinary').send_keys('python')
37 | # 输入关键字之后点击搜索
38 | browser.find_element_by_class_name('btn-search ').click()
39 | # print(browser.page_source)
40 | # 显示等待文献是否加载完成
41 | WebDriverWait(browser, 1000).until(
42 | EC.presence_of_all_elements_located(
43 | (By.CLASS_NAME, 'g-search-body')
44 | )
45 | )
46 |
47 | # 声明一个标记,用来标记翻页几页
48 | count = 1
49 | while True:
50 | # 显示等待加载更多按钮加载完成
51 | WebDriverWait(browser, 1000).until(
52 | EC.presence_of_all_elements_located(
53 | (By.CLASS_NAME, 'c-company__body-item-more')
54 | )
55 | )
56 | # 获取加载更多按钮
57 | Btn = browser.find_element_by_class_name('c-company__body-item-more')
58 | # 显示等待该信息加载完成
59 | WebDriverWait(browser, 1000).until(
60 | EC.presence_of_all_elements_located(
61 | (By.XPATH, '//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2*count-1))
62 | )
63 | )
64 | # 获取在div标签的信息,其中format(2*count-1)是因为加载的时候有显示多少条
65 | # 简单的说就是这些div的信息都是奇数
66 | divs = browser.find_elements_by_xpath('//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2*count-1))
67 | # 遍历循环
68 | for div in divs:
69 | # 获取文献的题目
70 | name = div.find_element_by_class_name('c-company__body-title').text
71 | # 获取文献的作者
72 | author = div.find_element_by_class_name('c-company__body-author').text
73 | # 获取文献的摘要
74 | content = div.find_element_by_class_name('c-company__body-content').text
75 | # 获取文献的来源和日期、文献类型等
76 | text = div.find_element_by_class_name('c-company__body-name').text.split()
77 | if (len(text) == 3 and text[-1] == '优先') or len(text) == 2:
78 | # 来源
79 | source = text[0]
80 | # 日期
81 | datetime = text[1]
82 | # 文献类型
83 | literature_type = None
84 | else:
85 | source = text[0]
86 | datetime = text[2]
87 | literature_type = text[1]
88 | # 获取下载数和被引数
89 | temp = div.find_element_by_class_name('c-company__body-info').text.split()
90 | # 下载数
91 | download = temp[0].split(':')[-1]
92 | # 被引数
93 | cite = temp[1].split(':')[-1]
94 |
95 | # 声明一个字典存储数据
96 | data_dict = {}
97 | data_dict['name'] = name
98 | data_dict['author'] = author
99 | data_dict['content'] = content
100 | data_dict['source'] = source
101 | data_dict['datetime'] = datetime
102 | data_dict['literature_type'] = literature_type
103 | data_dict['download'] = download
104 | data_dict['cite'] = cite
105 |
106 | data_list.append(data_dict)
107 | print(data_dict)
108 | # 如果Btn按钮(就是加载更多这个按钮)没有找到(就是已经到底了),就退出
109 | if not Btn:
110 | break
111 | else:
112 | Btn.click()
113 | # 如果到了爬取的页数就退出
114 | if count == page:
115 | break
116 |
117 | count += 1
118 |
119 | # 延迟两秒,我们不是在攻击服务器
120 | time.sleep(2)
121 |
122 |
123 | # def main():
124 | #
125 | # start_spider(eval(input('请输入要爬取的页数(如果需要全部爬取请输入0):')))
126 | #
127 | # # 将数据写入json文件中
128 | # # with open('data_json.json', 'a+', encoding='utf-8') as f:
129 | # # json.dump(data_list, f, ensure_ascii=False, indent=4)
130 | # # print('json文件写入完成')
131 | # #
132 | # # # 将数据写入csv文件
133 | # # with open('data_csv.csv', 'w', encoding='utf-8', newline='') as f:
134 | # # # 表头
135 | # # title = data_list[0].keys()
136 | # # # 声明writer对象
137 | # # writer = csv.DictWriter(f, title)
138 | # # # 写入表头
139 | # # writer.writeheader()
140 | # # # 批量写入数据
141 | # # writer.writerows(data_list)
142 | # # print('csv文件写入完成')
143 | #
144 | #
145 | # if __name__ == '__main__':
146 | #
147 | # main()
148 |
149 |
--------------------------------------------------------------------------------
/spider/paper_detail.py:
--------------------------------------------------------------------------------
1 | # coding:utf8
2 | # from selenium import webdriver
3 | import time
4 | from bs4 import BeautifulSoup
5 | import requests
6 | import time
7 | # from celery import task
8 | # import redis
9 | # import pymysql
10 | #
11 | from .get_cookies import getCookies
12 | from .db_handle import dbHandle
13 |
14 | # from get_cookies import getCookies
15 | # from db_handle import dbHandle
16 | # this is a function about need many time
17 |
18 | # @task
19 | # paper_url='http://kns.cnki.net/KCMS/detail/11.2103.TN.20190422.1403.030.html?uid=WEEvREdxOWJmbC9oM1NjYkZCbDdrdW1QRWVHWlNKY2JNUkFVTThpbHZRbFU=$R1yZ0H6jyaa0en3RxVUd8df-oHi7XMMDo7mtKT6mSmEvTuk11l2gFA!!&v=MDA3NTN1WnJGQ3JsVUwzTUlWWT1JVFhBZHJHNEg5ak1xNDFIWk90Mll3OU16bVJuNmo1N1QzZmxxV00wQ0xMN1I3cWVi'
20 | def paperDetail(cookies,paper_url):
21 | print('开始爬取详情页')
22 | # DB = dbHandle();
23 | # 接收用户输入的关键词
24 | paper_url=paper_url
25 | cookies = dict(cookies)
26 | print(paper_url,cookies)
27 | r = requests.get(paper_url, cookies=cookies)
28 | # print(r.text)
29 | paper_description=''
30 | paper_fund=''
31 | paper_keyword=''
32 | try:
33 | soup = BeautifulSoup(r.text, 'lxml')
34 | results = soup.select('.wxBaseinfo')
35 | paper_description=results[0].select('p')[0]
36 | paper_description=str(paper_description.text)
37 | paper_fund = results[0].select('p')[1]
38 | paper_fund=str(paper_fund.text).replace(' ','').replace('\r\n','')
39 | paper_keyword = results[0].select('p')[2]
40 | paper_keyword=str(paper_keyword.text).replace(' ','').replace('\r\n','')
41 | time.sleep(3)
42 | except Exception as e:
43 | print('数据为空',e)
44 |
45 | return paper_description, paper_fund, paper_keyword
46 | #
47 | # cookies= {'c_m_expire': '2019-04-23 12:51:27', '_pk_ses': '*', '_pk_ref': '%5B%22%22%2C%22%22%2C1555993936%2C%22http%3A%2F%2Fwww.cnki.net%2F%22%5D', 'c_m_LinID': 'LinID=WEEvREcwSlJHSldRa1FhdkJkVG1BVmpTQUozQ2hhR21URFB6cDkxZU9YWT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!&ot=04/23/2019 12:51:27', 'SID_krsnew': '125131', 'Ecp_ClientId': '4190423123001492160', 'Ecp_LoginStuts': '%7B%22IsAutoLogin%22%3Afalse%2C%22UserName%22%3A%22NJ0051%22%2C%22ShowName%22%3A%22%25E6%25B1%259F%25E8%258B%258F%25E5%25B8%2588%25E8%258C%2583%25E5%25A4%25A7%25E5%25AD%25A6%25E4%25B8%2580%25E5%25B8%25A6%25E4%25B8%2580%25E8%25B7%25AF%25E7%25A0%2594%25E7%25A9%25B6%25E9%2599%25A2%22%2C%22UserType%22%3A%22bk%22%2C%22r%22%3A%22IB43OG%22%7D', 'SID_crrs': '125133', 'LID': 'WEEvREcwSlJHSldRa1FhdkJkVG1BVmpTQUozQ2hhR21URFB6cDkxZU9YWT0=$9A4hF_YAuvQ5obgVAqNKPCYcEjKensW4IQMovwHtwkF4VYPoHbKxJw!!', 'Ecp_session': '1', 'SID_klogin': '125143', 'KNS_SortType': '', 'cnkiUserKey': 'b18eb70d-b09a-71d5-28a1-bb3d7d9acb00', 'SID_kns': '123113', 'ASP.NET_SessionId': 'lllaehphb55hcugftjtnb2dv', 'RsPerPage': '20', 'Ecp_notFirstLogin': 'IB43OG'}
48 | # paperDetail(cookies,paper_url)
--------------------------------------------------------------------------------
/spider/paper_spider.py:
--------------------------------------------------------------------------------
1 | # coding:utf8
2 | from selenium import webdriver
3 | import time
4 | from bs4 import BeautifulSoup
5 | import requests
6 | import time
7 | from celery import task
8 | import redis
9 | import pymysql
10 | #
11 | from .get_cookies import getCookies
12 | from .db_handle import dbHandle
13 | from .paper_detail import paperDetail
14 |
15 |
16 | #
17 | # from get_cookies import getCookies
18 | # from db_handle import dbHandle
19 |
20 |
21 | # this is a function about need many time
22 |
23 | @task
24 | def paperSpider(keyWord):
25 | print('爬虫启动')
26 | # conn = redis.Redis(host='127.0.0.1', port=6379,db=1,charset='utf-8',)
27 | # print('reids 连接成功')
28 | DB = dbHandle();
29 | # 接收用户输入的关键词
30 | key_word = keyWord
31 | # 获取到cookies,以便登录,同时获取到该关键词下有多少篇文章
32 | cookies, total_num = getCookies(key_word) # 获取到
33 | # cookies字典化
34 | cookies = dict(cookies)
35 | urls = [
36 | 'http://kns.cnki.net/kns/brief/brief.aspx?curpage={0}&RecordsPerPage=50&QueryID=0&ID=&turnpage=1&tpagemode=L&dbPrefix=SCDB&Fields=&DisplayMode=listmode&PageName=ASP.brief_default_result_aspx#J_ORDER&'.format(
37 | page) for page in range(1, int(total_num / 20))]
38 | # k是用来记录爬取了多少页
39 | k = 1
40 | for url in urls:
41 | num = 0
42 | r = requests.get(url, cookies=cookies)
43 | soup = BeautifulSoup(r.text, 'lxml')
44 | results = soup.select('.GridTableContent tr')
45 | print("正在爬取第%d页*************************************************************" % k);
46 | k = k + 1
47 | # print('requesting ' + url)
48 | if not results:
49 | driver = webdriver.Chrome('/Applications/chromedriver')
50 | for key in cookies:
51 | print(key, cookies[key])
52 | driver.add_cookie({key: cookies[key]})
53 | driver.get(url)
54 | break
55 | else:
56 | num = 0
57 | for r in results:
58 | if r.has_attr('bgcolor'):
59 | # 解析出标题
60 | record = r.select('td')[1].find('a')
61 | paper_title = str(record.text).replace('\n', '')
62 | # 文章详情链接页
63 | paper_url = 'http://kns.cnki.net' + str(record.attrs['href']).replace('kns', 'KCMS')
64 | # 作者
65 | paper_author = str(r.select('td')[2].text).replace('\n', '')
66 | authors = paper_author.split(';')
67 | # 文章发表的期刊
68 | paper_magazine = str(r.select('td')[3].text).replace('\n', '')
69 | # 年份
70 | paper_year = str(r.select('td')[4].text)
71 | # .replace(' ', '')[0:5]
72 | # 文章的类型
73 | paper_source_type = str(r.select('td')[5].text).replace(' ', '').replace('\n', '')
74 | title = str(record.text).replace('\n', '')
75 | local_url = "http://localhost:8000/cnki/paperDetail?title="
76 | local_url = str(local_url + title)
77 | print(local_url)
78 | print('论文发表年限------------', paper_year)
79 |
80 | paper_description, paper_funds, paper_keywords=paperDetail(cookies, paper_url)
81 | print(paper_description,paper_funds,paper_keywords)
82 | # 存储数据库的信息
83 | dbhandle = dbHandle()
84 | # 插入文章的信息
85 | in_paper_sql = "INSERT INTO analyse_paper(title,url,sch_key,description,paper_funds,paper_keywords,public_year,local_url,source_type)" \
86 | "values('%s','%s','%s','%s','%s','%s','%s','%s','%s')" \
87 | % (paper_title, paper_url, key_word, paper_description,paper_funds,paper_keywords,paper_year, local_url, paper_magazine)
88 | print(in_paper_sql)
89 | dbhandle.dbInsert(in_paper_sql)
90 | query_paperID_sql = "select id from analyse_paper where title='%s' " % (paper_title)
91 | print(query_paperID_sql)
92 | paper_id = dbhandle.dbQuery(query_paperID_sql)[0][0]
93 | print('paper id', paper_id)
94 | # 插入作者的信息
95 | for author in authors:
96 | author = author.replace(' ', '')
97 | print(author)
98 | in_author_sql = "INSERT INTO analyse_author(author)" \
99 | "values('%s')" \
100 | % (author)
101 | dbhandle.dbInsert(in_author_sql)
102 | query_authorID_sql = "select id from analyse_author where author='%s'" % (author)
103 |
104 | print(query_authorID_sql)
105 | author_id = dbhandle.dbQuery(query_authorID_sql)[0][0]
106 | print('author id ', author_id)
107 | # 插入作者与文章的信息的信息
108 | in_author_to_paper = "INSERT INTO analyse_papertoauthor(author_id_id,paper_id_id)" \
109 | "values('%d','%d')" \
110 | % (author_id, paper_id)
111 | dbhandle.dbInsert(in_author_to_paper)
112 |
113 | num = num + 1
114 |
115 | print('第{}页抓取了{}条数据'.format(k, num))
116 |
117 | time.sleep(3)
118 |
119 | # paperSpider('大数据')
120 |
--------------------------------------------------------------------------------
/spider/paper_spider_by_app.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from selenium import webdriver
4 | from selenium.webdriver.support.ui import WebDriverWait
5 | from selenium.webdriver.support import expected_conditions as EC
6 | from selenium.webdriver.common.by import By
7 | import time
8 | import json
9 | import csv
10 | from celery import shared_task
11 |
12 | from celery import task, app
13 | from channels.layers import get_channel_layer
14 | from asgiref.sync import async_to_sync
15 | from channels.generic.websocket import WebsocketConsumer
16 |
17 |
18 |
19 | @shared_task
20 | def start_spider_by_app(keyWords,channel_name):
21 | # 收到消息后触发
22 | # 前端页面使用send()发送数据给websocket,由该函数处理
23 | # 真个ChatConsumer类会将所有接收到的消息加上一个"聊天"的前缀发送给客户端
24 | # 设置谷歌驱动器的环境
25 | # _build_model()
26 | print("启动爬虫=======================")
27 | channel_layer = get_channel_layer()
28 |
29 | i=0
30 | options = webdriver.ChromeOptions()
31 | # 设置chrome不加载图片,提高速度
32 | options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
33 | # 创建一个谷歌驱动器
34 | browser = webdriver.Chrome('/Applications/chromedriver', chrome_options=options)
35 | # print("==========",text_data)
36 | # if(text_data=='close'):
37 | # print('关闭连接1')
38 | # close()
39 | # browser.quit()
40 |
41 | print("收到消息===============")
42 | try:
43 | i += 1
44 | print("次数==============================" +str(i))
45 |
46 |
47 | url = 'http://wap.cnki.net/touch/web/guide'
48 |
49 | # 声明一个全局列表,用来存储字典
50 | data_list = []
51 | print("发送消息")
52 | # 请求url
53 | browser.get(url)
54 | # 显示等待输入框是否加载完成
55 | WebDriverWait(browser, 1000).until(
56 | EC.presence_of_all_elements_located(
57 | (By.ID, 'keyword')
58 | )
59 | )
60 | # 找到输入框的id,并输入python关键字
61 | browser.find_element_by_id('keyword').click()
62 | browser.find_element_by_id('keyword_ordinary').send_keys('python')
63 | # 输入关键字之后点击搜索
64 | browser.find_element_by_class_name('btn-search ').click()
65 | # print('quit')
66 | # browser.quit()
67 |
68 | # print(browser.page_source)
69 | # 显示等待文献是否加载完成
70 | WebDriverWait(browser, 1000).until(
71 | EC.presence_of_all_elements_located(
72 | (By.CLASS_NAME, 'g-search-body')
73 | )
74 | )
75 | # spiderSocket.send("202")
76 | async_to_sync(channel_layer.send)(
77 | channel_name,
78 | {
79 | "type": "send.message",
80 | "message": "202"
81 | }
82 | )
83 |
84 |
85 |
86 | # 声明一个标记,用来标记翻页几页
87 | count = 1
88 | while True:
89 | # 显示等待加载更多按钮加载完成
90 | WebDriverWait(browser, 1000).until(
91 | EC.presence_of_all_elements_located(
92 | (By.CLASS_NAME, 'c-company__body-item-more')
93 | )
94 | )
95 | # 获取加载更多按钮
96 | Btn = browser.find_element_by_class_name('c-company__body-item-more')
97 | # 显示等待该信息加载完成
98 | WebDriverWait(browser, 1000).until(
99 | EC.presence_of_all_elements_located(
100 | (By.XPATH,
101 | '//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2 * count - 1))
102 | )
103 | )
104 | # 获取在div标签的信息,其中format(2*count-1)是因为加载的时候有显示多少条
105 | # 简单的说就是这些div的信息都是奇数
106 | divs = browser.find_elements_by_xpath(
107 | '//div[@id="searchlist_div"]/div[{}]/div[@class="c-company__body-item"]'.format(2 * count - 1))
108 | # 遍历循环
109 | for div in divs:
110 | # 获取文献的题目
111 | name = div.find_element_by_class_name('c-company__body-title').text
112 | # 获取文献的作者
113 | author = div.find_element_by_class_name('c-company__body-author').text
114 | # 获取文献的摘要
115 | content = div.find_element_by_class_name('c-company__body-content').text
116 | # 获取文献的来源和日期、文献类型等
117 | text = div.find_element_by_class_name('c-company__body-name').text.split()
118 | if (len(text) == 3 and text[-1] == '优先') or len(text) == 2:
119 | # 来源
120 | source = text[0]
121 | # 日期
122 | datetime = text[1]
123 | # 文献类型
124 | literature_type = None
125 | else:
126 | source = text[0]
127 | datetime = text[2]
128 | literature_type = text[1]
129 | # 获取下载数和被引数
130 | temp = div.find_element_by_class_name('c-company__body-info').text.split()
131 | # 下载数
132 | download = temp[0].split(':')[-1]
133 | # 被引数
134 | cite = temp[1].split(':')[-1]
135 |
136 | # 声明一个字典存储数据
137 | data_dict = {}
138 | data_dict['name'] = name
139 | data_dict['author'] = author
140 | data_dict['content'] = content
141 | data_dict['source'] = source
142 | data_dict['datetime'] = datetime
143 | data_dict['literature_type'] = literature_type
144 | data_dict['download'] = download
145 | data_dict['cite'] = cite
146 |
147 | # spiderSocket.send(json.dumps({"paperInfo":[data_dict]}, ensure_ascii=False))
148 | async_to_sync(channel_layer.send)(
149 | channel_name,
150 | {
151 | "type": "send.message",
152 | "message": [data_dict]
153 | }
154 | )
155 | data_list.append(data_dict)
156 | print(data_dict)
157 | # 如果Btn按钮(就是加载更多这个按钮)没有找到(就是已经到底了),就退出
158 | if not Btn:
159 | break
160 | else:
161 | Btn.click()
162 | # 如果到了爬取的页数就退出
163 | if count == 0:
164 | break
165 |
166 | count += 1
167 |
168 | # 延迟两秒,我们不是在攻击服务器
169 | time.sleep(2)
170 | except Exception as e:
171 | print("出错啦=====================\n",e)
172 | browser.quit()
--------------------------------------------------------------------------------