├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── analysis ├── bili-video-data-hsql-database.ipynb ├── bili-video-data-hsql-database.py ├── export │ ├── batch │ │ └── .githold │ └── bubble │ │ ├── animated_bubble.gif │ │ ├── duration_step01.png │ │ ├── duration_step02.png │ │ ├── duration_step03.png │ │ ├── duration_step04.png │ │ ├── duration_step05.png │ │ ├── duration_step06.png │ │ ├── duration_step07.png │ │ ├── duration_step08.png │ │ ├── duration_step09.png │ │ ├── duration_step10.png │ │ ├── duration_step11.png │ │ ├── duration_step12.png │ │ ├── duration_step13.png │ │ ├── duration_step14.png │ │ ├── duration_step15.png │ │ ├── duration_step16.png │ │ ├── duration_step17.png │ │ ├── duration_step18.png │ │ ├── duration_step19.png │ │ └── duration_step20.png ├── history-sect-slicing.ipynb └── vis.ipynb ├── app ├── LICENSE.md ├── README.md ├── about.html ├── assets │ ├── css │ │ ├── bulma.css │ │ ├── bulma.css.map │ │ └── style.css │ ├── mongo.js │ └── vue │ │ ├── README.md │ │ ├── vue.common.js │ │ ├── vue.esm.browser.js │ │ ├── vue.esm.js │ │ ├── vue.js │ │ ├── vue.min.js │ │ ├── vue.runtime.common.js │ │ ├── vue.runtime.esm.js │ │ ├── vue.runtime.js │ │ └── vue.runtime.min.js ├── bilicrawler-0.1.1.js ├── bilicrawler.js ├── index.html ├── main.js ├── package.json └── renderer.js ├── biliSpider-logo.jpg ├── connect_to_remote_mongodb_demo.ipynb ├── iodata ├── iodata.sln ├── iodata │ ├── dllmain.c │ ├── iodata.cpp │ ├── iodata.vcxproj │ ├── iodata.vcxproj.filters │ ├── iodata.vcxproj.user │ ├── stdafx.cpp │ ├── stdafx.h │ ├── targetver.h │ └── x64 │ │ └── Release │ │ ├── dllmain.obj │ │ ├── iodata.log │ │ ├── iodata.tlog │ │ ├── CL.command.1.tlog │ │ ├── CL.read.1.tlog │ │ ├── CL.write.1.tlog │ │ ├── iodata.lastbuildstate │ │ ├── iodata.write.1u.tlog │ │ ├── link.command.1.tlog │ │ ├── link.read.1.tlog │ │ └── link.write.1.tlog │ │ └── vc141.pdb ├── readme.md ├── x64 │ └── Release │ │ ├── iodata.dll │ │ ├── iodata.exp │ │ ├── iodata.iobj │ │ ├── iodata.ipdb │ │ ├── iodata.lib │ │ └── iodata.pdb └── 例子.nb ├── node-spider-dist ├── .editorconfig ├── .eslintrc.yml ├── client.js ├── client │ ├── constants.js │ ├── nest.js │ ├── process.js │ ├── proxy │ │ ├── 89ip.js │ │ ├── cn-proxy.js │ │ ├── kuaidaili.js │ │ ├── mayidaili.js │ │ ├── mogudaili.js │ │ ├── xdaili.js │ │ ├── xicidaili.js │ │ └── yundaili.js │ ├── spider.js │ └── utils.js ├── data-transfer.js ├── index.js ├── package.json ├── server.js └── test │ ├── index.js │ └── user-info.json ├── spider.py ├── 专栏跟踪爬虫-氘化氢.nb ├── 极速视频爬虫2.2.nb ├── 视频跟踪爬虫-LePtC.nb └── 视频跟踪爬虫的可视化.nb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # node and electron 104 | 105 | node_modules 106 | app/dist/ 107 | package-lock.json 108 | yarn.lock -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | 3 | node_js: 4 | - "lts/*" 5 | 6 | install: 7 | - "cd node-spider-dist" 8 | - "npm install" 9 | 10 | script: 11 | - "npm run lint" 12 | - "npm run test" -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BiliSpider 2 | 3 | 【本项目孵化自 [idea #4](https://github.com/orgs/uupers/teams/daily-idea/discussions/4)】 4 | 5 | # 简介 6 | 7 | 这里是UUPs爬虫主项目组,任何新技术均会首先应用在B站爬虫上。我们的目标是为每位up主的创作之路助力,主要体现在: 8 | 9 | 1. 数据获取与管理:利用爬虫技术获取全B站的视频与用户数据,并用维护数据库的方法管理它们,为创作者提供检索服务 10 | 2. 数据筛选与可视化:在海量数据中筛选出有效且感兴趣的信息,并研究将它们可视化的方法,为创作者提供创作素材 11 | 3. 数据分析与科研:根据获取到的有效数据,对B站推广机制,用户习惯等进行研究,为创作者提供推广经验 12 | 13 | # 数据来源 14 | 目前已经迁移到本项目[Wiki](https://github.com/uupers/BiliSpider/wiki)页面 15 | -------------------------------------------------------------------------------- /analysis/bili-video-data-hsql-database.py: -------------------------------------------------------------------------------- 1 | class Cleaner: 2 | """ Data Cleaner """ 3 | 4 | def __init__(self): 5 | self.raw = [] 6 | self.data = [] 7 | 8 | 9 | def hay(self,path): 10 | 11 | with open(path) as f: 12 | for line in f: 13 | self.raw.append(line) 14 | 15 | def dval(self, bgp=None,edp=None, seg=None): 16 | if bgp == None: 17 | bgp = 7 18 | if edp == None: 19 | edp = -1 20 | if seg == None: 21 | seg = 2 22 | 23 | arr = self.raw[bgp:edp:seg] 24 | data1 = [] 25 | 26 | for elem in arr: 27 | data1.append( elem[38:-2] ) 28 | 29 | for elem in data: 30 | self.data.append( elem.split(",") ) 31 | 32 | for i in range(len(self.data)): 33 | for j in range( len(self.data[i]) ): 34 | self.data[i][j] = int( (self.data)[i][j] ) 35 | 36 | -------------------------------------------------------------------------------- /analysis/export/batch/.githold: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/batch/.githold -------------------------------------------------------------------------------- /analysis/export/bubble/animated_bubble.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/animated_bubble.gif -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step01.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step02.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step03.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step04.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step05.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step06.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step07.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step08.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step08.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step09.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step10.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step11.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step12.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step13.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step14.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step15.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step16.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step17.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step18.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step19.png -------------------------------------------------------------------------------- /analysis/export/bubble/duration_step20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/analysis/export/bubble/duration_step20.png -------------------------------------------------------------------------------- /analysis/history-sect-slicing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## History data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 197, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np\n", 17 | "import pandas as pd\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "from pandas.plotting import scatter_matrix\n", 20 | "\n", 21 | "import seaborn as sns\n", 22 | "sns.set_style(\"white\")\n", 23 | "fig_w = 2530\n", 24 | "fig_h = 1900\n", 25 | "my_dpi=200\n", 26 | "\n", 27 | "import dateutil" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "Config plot styles" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 198, 40 | "metadata": {}, 41 | "outputs": [], 42 | "source": [ 43 | "plt.style.use('ggplot')\n", 44 | "\n", 45 | "from matplotlib import rcParams\n", 46 | "# plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签\n", 47 | "plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号\n", 48 | "\n", 49 | "plt.rc('font', family='BabelStone Han', size=13) # 选择你自己电脑上的字体" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "Load data" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 199, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# 如果还没有数据,来群里索要数据文件就行\n", 66 | "\n", 67 | "df = pd.read_csv('data/full-data.csv') " 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "Validate all the possible columns of data frame" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 200, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "['Unnamed: 0',\n", 86 | " 'aid',\n", 87 | " 'uid',\n", 88 | " 'time',\n", 89 | " 'duration',\n", 90 | " 'state',\n", 91 | " 'view',\n", 92 | " 'danmaku',\n", 93 | " 'coin',\n", 94 | " 'favorite',\n", 95 | " 'share',\n", 96 | " 'reply',\n", 97 | " 'like',\n", 98 | " 'sectid',\n", 99 | " 'sectname']" 100 | ] 101 | }, 102 | "execution_count": 200, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "list(df.columns.values)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 201, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "df['time'] = pd.to_datetime(df['time'],unit='s')#.dt.date" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 195, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# df;" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "## Group by sects" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "Get all the sects by sectname" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 202, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "data": { 150 | "text/plain": [ 151 | "'连载剧集'" 152 | ] 153 | }, 154 | "execution_count": 202, 155 | "metadata": {}, 156 | "output_type": "execute_result" 157 | } 158 | ], 159 | "source": [ 160 | "sects = df.sectname.unique()\n", 161 | "sects[0]" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": {}, 167 | "source": [ 168 | "Create a dataframe to store the average data" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 213, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "Dealing with: 连载剧集\n", 181 | "Start date:2017-08-30 13:14:52\n", 182 | "End date:2017-10-24 09:16:47\n", 183 | "连载剧集 has 5rows\n", 184 | "Data saved\n", 185 | "Dealing with: flash游戏\n", 186 | "Start date:2010-10-04 23:00:49\n", 187 | "End date:2012-03-30 07:24:04\n", 188 | "flash游戏 has 54rows\n", 189 | "Data saved\n", 190 | "Dealing with: 单机联机\n", 191 | "Start date:2009-07-03 11:10:16\n", 192 | "End date:2018-02-28 07:36:52\n", 193 | "单机联机 has 316rows\n", 194 | "Data saved\n", 195 | "Dealing with: Mugen\n", 196 | "Start date:2009-07-14 14:59:24\n", 197 | "End date:2018-02-28 07:10:03\n", 198 | "Mugen has 315rows\n", 199 | "Data saved\n", 200 | "Dealing with: 宅舞\n", 201 | "Start date:2009-07-15 12:34:50\n", 202 | "End date:2018-02-28 07:39:39\n", 203 | "宅舞 has 314rows\n", 204 | "Data saved\n", 205 | "Dealing with: 日常\n", 206 | "Start date:2009-06-26 07:15:45\n", 207 | "End date:2018-02-28 08:06:32\n", 208 | "日常 has 316rows\n", 209 | "Data saved\n", 210 | "Dealing with: 鬼畜调教\n", 211 | "Start date:2009-07-14 01:23:55\n", 212 | "End date:2018-02-28 07:09:05\n", 213 | "鬼畜调教 has 315rows\n", 214 | "Data saved\n", 215 | "Dealing with: MAD-AMV\n", 216 | "Start date:2009-07-15 07:12:48\n", 217 | "End date:2018-02-28 08:16:54\n", 218 | "MAD-AMV has 315rows\n", 219 | "Data saved\n", 220 | "Dealing with: MMD-3D\n", 221 | "Start date:2009-09-16 05:11:11\n", 222 | "End date:2018-02-28 08:24:46\n", 223 | "MMD-3D has 308rows\n", 224 | "Data saved\n", 225 | "Dealing with: 音MAD\n", 226 | "Start date:2009-07-09 07:55:49\n", 227 | "End date:2018-02-28 08:01:25\n", 228 | "音MAD has 315rows\n", 229 | "Data saved\n", 230 | "Dealing with: 综合\n", 231 | "Start date:2009-07-13 11:36:05\n", 232 | "End date:2018-02-28 08:37:09\n", 233 | "综合 has 315rows\n", 234 | "Data saved\n", 235 | "Dealing with: 原创音乐\n", 236 | "Start date:2010-03-18 16:03:38\n", 237 | "End date:2018-02-28 08:01:19\n", 238 | "原创音乐 has 290rows\n", 239 | "Data saved\n", 240 | "Dealing with: 三次元音乐\n", 241 | "Start date:2009-07-14 10:46:02\n", 242 | "End date:2018-02-28 08:49:01\n", 243 | "三次元音乐 has 315rows\n", 244 | "Data saved\n", 245 | "Dealing with: VOCALOID-UTAU\n", 246 | "Start date:2009-07-12 15:15:53\n", 247 | "End date:2018-02-28 08:20:34\n", 248 | "VOCALOID-UTAU has 315rows\n", 249 | "Data saved\n", 250 | "Dealing with: 翻唱\n", 251 | "Start date:2009-07-13 13:17:13\n", 252 | "End date:2018-02-28 09:01:31\n", 253 | "翻唱 has 315rows\n", 254 | "Data saved\n", 255 | "Dealing with: 完结动画\n", 256 | "Start date:2009-08-26 06:57:38\n", 257 | "End date:2018-02-28 03:25:16\n", 258 | "完结动画 has 310rows\n", 259 | "Data saved\n", 260 | "Dealing with: 连载动画\n", 261 | "Start date:2009-10-06 03:29:23\n", 262 | "End date:2018-02-28 02:12:46\n", 263 | "连载动画 has 306rows\n", 264 | "Data saved\n", 265 | "Dealing with: 人文历史\n", 266 | "Start date:2010-10-11 05:21:01\n", 267 | "End date:2018-02-28 07:18:50\n", 268 | "人文历史 has 269rows\n", 269 | "Data saved\n", 270 | "Dealing with: 演讲-公开课\n", 271 | "Start date:2010-06-17 00:21:36\n", 272 | "End date:2018-02-28 09:16:24\n", 273 | "演讲-公开课 has 281rows\n", 274 | "Data saved\n", 275 | "Dealing with: 未知\n", 276 | "Start date:2015-05-11 08:43:27\n", 277 | "End date:2015-05-11 08:43:27\n", 278 | "未知 has 0rows\n", 279 | "Dealing with: MMD.3D\n", 280 | "Start date:2011-09-01 02:19:42\n", 281 | "End date:2014-05-09 03:06:24\n", 282 | "MMD.3D has 98rows\n", 283 | "Data saved\n", 284 | "Dealing with: 短片-手书-配音\n", 285 | "Start date:2009-07-14 01:34:49\n", 286 | "End date:2018-02-28 09:21:30\n", 287 | "短片-手书-配音 has 315rows\n", 288 | "Data saved\n", 289 | "Dealing with: 资讯\n", 290 | "Start date:2010-08-30 13:33:38\n", 291 | "End date:2018-02-28 13:18:07\n", 292 | "资讯 has 273rows\n", 293 | "Data saved\n", 294 | "Dealing with: OP-ED-OST\n", 295 | "Start date:2009-09-20 09:22:24\n", 296 | "End date:2018-02-28 09:09:17\n", 297 | "OP-ED-OST has 308rows\n", 298 | "Data saved\n", 299 | "Dealing with: 演奏\n", 300 | "Start date:2009-07-21 03:20:03\n", 301 | "End date:2018-02-28 09:28:44\n", 302 | "演奏 has 314rows\n", 303 | "Data saved\n", 304 | "Dealing with: 网络游戏\n", 305 | "Start date:2010-04-28 12:27:00\n", 306 | "End date:2018-02-28 10:18:47\n", 307 | "网络游戏 has 286rows\n", 308 | "Data saved\n", 309 | "Dealing with: 单机游戏\n", 310 | "Start date:2012-06-25 14:18:48\n", 311 | "End date:2014-08-14 04:34:01\n", 312 | "单机游戏 has 77rows\n", 313 | "Data saved\n", 314 | "Dealing with: 综艺\n", 315 | "Start date:2010-10-01 05:01:59\n", 316 | "End date:2018-02-28 10:33:32\n", 317 | "综艺 has 270rows\n", 318 | "Data saved\n", 319 | "Dealing with: 动物圈\n", 320 | "Start date:2010-02-23 06:23:13\n", 321 | "End date:2018-02-28 10:44:47\n", 322 | "动物圈 has 292rows\n", 323 | "Data saved\n", 324 | "Dealing with: 美食圈\n", 325 | "Start date:2010-03-25 04:19:33\n", 326 | "End date:2018-02-28 10:55:24\n", 327 | "美食圈 has 289rows\n", 328 | "Data saved\n", 329 | "Dealing with: 喵星人\n", 330 | "Start date:2014-04-22 11:31:46\n", 331 | "End date:2014-04-22 11:31:46\n", 332 | "喵星人 has 0rows\n", 333 | "Dealing with: 美食视频\n", 334 | "Start date:2014-04-18 14:33:01\n", 335 | "End date:2014-08-21 05:14:23\n", 336 | "美食视频 has 12rows\n", 337 | "Data saved\n", 338 | "Dealing with: 电影相关\n", 339 | "Start date:2013-02-07 02:20:21\n", 340 | "End date:2017-10-31 15:01:36\n", 341 | "电影相关 has 172rows\n", 342 | "Data saved\n", 343 | "Dealing with: 其他国家\n", 344 | "Start date:2016-02-22 12:41:35\n", 345 | "End date:2017-12-20 10:59:06\n", 346 | "其他国家 has 66rows\n", 347 | "Data saved\n", 348 | "Dealing with: 短片\n", 349 | "Start date:2010-03-24 18:05:57\n", 350 | "End date:2018-02-28 11:01:11\n", 351 | "短片 has 289rows\n", 352 | "Data saved\n", 353 | "Dealing with: 特摄\n", 354 | "Start date:2010-06-21 20:00:07\n", 355 | "End date:2018-02-28 10:43:29\n", 356 | "特摄 has 280rows\n", 357 | "Data saved\n", 358 | "Dealing with: 剧场版\n", 359 | "Start date:2014-08-01 11:16:26\n", 360 | "End date:2015-03-17 14:01:30\n", 361 | "剧场版 has 22rows\n", 362 | "Data saved\n", 363 | "Dealing with: 数码\n", 364 | "Start date:2010-07-22 15:58:01\n", 365 | "End date:2018-02-28 11:16:49\n", 366 | "数码 has 277rows\n", 367 | "Data saved\n", 368 | "Dealing with: 星海\n", 369 | "Start date:2011-03-22 17:55:22\n", 370 | "End date:2018-02-28 11:21:27\n", 371 | "星海 has 253rows\n", 372 | "Data saved\n", 373 | "Dealing with: 机械\n", 374 | "Start date:2010-02-14 19:57:33\n", 375 | "End date:2018-02-28 11:25:32\n", 376 | "机械 has 293rows\n", 377 | "Data saved\n", 378 | "Dealing with: GMV\n", 379 | "Start date:2010-07-26 14:57:28\n", 380 | "End date:2018-02-28 11:26:16\n", 381 | "GMV has 277rows\n", 382 | "Data saved\n", 383 | "Dealing with: 野生技术协会\n", 384 | "Start date:2009-09-09 01:09:09\n", 385 | "End date:2018-02-28 11:38:47\n", 386 | "野生技术协会 has 309rows\n", 387 | "Data saved\n", 388 | "Dealing with: 趣味科普人文\n", 389 | "Start date:2009-10-14 04:38:56\n", 390 | "End date:2018-02-28 11:49:46\n", 391 | "趣味科普人文 has 305rows\n", 392 | "Data saved\n", 393 | "Dealing with: 其他\n", 394 | "Start date:2009-09-13 07:01:43\n", 395 | "End date:2018-02-28 14:17:25\n", 396 | "其他 has 309rows\n", 397 | "Data saved\n", 398 | "Dealing with: 人力VOCALOID\n", 399 | "Start date:2009-11-20 03:59:58\n", 400 | "End date:2018-02-28 11:53:18\n", 401 | "人力VOCALOID has 302rows\n", 402 | "Data saved\n", 403 | "Dealing with: 教程演示\n", 404 | "Start date:2010-03-02 03:33:45\n", 405 | "End date:2018-02-27 15:43:02\n", 406 | "教程演示 has 291rows\n", 407 | "Data saved\n", 408 | "Dealing with: 电视剧相关\n", 409 | "Start date:2012-10-14 10:14:57\n", 410 | "End date:2017-10-31 22:39:57\n", 411 | "电视剧相关 has 184rows\n", 412 | "Data saved\n", 413 | "Dealing with: 音乐选集\n", 414 | "Start date:2009-07-09 06:07:51\n", 415 | "End date:2018-02-28 12:15:05\n", 416 | "音乐选集 has 315rows\n", 417 | "Data saved\n", 418 | "Dealing with: Korea相关\n", 419 | "Start date:2011-08-11 04:16:29\n", 420 | "End date:2018-02-28 12:28:16\n", 421 | "Korea相关 has 239rows\n", 422 | "Data saved\n", 423 | "Dealing with: 音游\n", 424 | "Start date:2009-07-21 14:14:31\n", 425 | "End date:2018-02-28 12:21:43\n", 426 | "音游 has 314rows\n", 427 | "Data saved\n", 428 | "Dealing with: 明星\n", 429 | "Start date:2012-06-10 10:56:41\n", 430 | "End date:2018-02-28 12:45:54\n", 431 | "明星 has 208rows\n", 432 | "Data saved\n", 433 | "Dealing with: 搞笑\n", 434 | "Start date:2010-02-09 09:37:26\n", 435 | "End date:2018-02-28 12:50:05\n", 436 | "搞笑 has 294rows\n", 437 | "Data saved\n", 438 | "Dealing with: 实况解说\n", 439 | "Start date:2015-04-08 17:53:37\n", 440 | "End date:2015-04-09 01:19:13\n", 441 | "实况解说 has 0rows\n", 442 | "Dealing with: 游戏集锦\n", 443 | "Start date:2015-04-08 23:17:14\n", 444 | "End date:2015-04-08 23:42:10\n", 445 | "游戏集锦 has 0rows\n", 446 | "Dealing with: 欧美电影\n", 447 | "Start date:2016-02-22 11:45:41\n", 448 | "End date:2018-02-12 15:55:51\n", 449 | "欧美电影 has 72rows\n", 450 | "Data saved\n", 451 | "Dealing with: 日本电影\n", 452 | "Start date:2016-05-11 06:11:29\n", 453 | "End date:2018-01-26 08:49:37\n", 454 | "日本电影 has 62rows\n", 455 | "Data saved\n", 456 | "Dealing with: 国产电影\n", 457 | "Start date:2012-01-03 08:49:05\n", 458 | "End date:2018-02-28 02:34:13\n", 459 | "国产电影 has 224rows\n", 460 | "Data saved\n", 461 | "Dealing with: 官方延伸\n", 462 | "Start date:2010-01-07 03:46:26\n", 463 | "End date:2018-02-28 11:37:35\n", 464 | "官方延伸 has 297rows\n", 465 | "Data saved\n", 466 | "Dealing with: 国产动画\n", 467 | "Start date:2010-01-23 22:47:56\n", 468 | "End date:2018-02-28 03:30:00\n", 469 | "国产动画 has 295rows\n", 470 | "Data saved\n", 471 | "Dealing with: 三次元舞蹈\n", 472 | "Start date:2010-09-08 21:56:54\n", 473 | "End date:2018-02-28 12:40:54\n", 474 | "三次元舞蹈 has 272rows\n", 475 | "Data saved\n", 476 | "Dealing with: 舞蹈教程\n", 477 | "Start date:2011-04-02 11:17:02\n", 478 | "End date:2018-02-28 04:07:41\n", 479 | "舞蹈教程 has 252rows\n", 480 | "Data saved\n", 481 | "Dealing with: 美妆\n", 482 | "Start date:2014-04-21 18:15:57\n", 483 | "End date:2018-02-28 13:06:05\n", 484 | "美妆 has 140rows\n", 485 | "Data saved\n", 486 | "Dealing with: 服饰\n", 487 | "Start date:2013-11-24 12:11:47\n", 488 | "End date:2018-02-28 13:00:58\n", 489 | "服饰 has 155rows\n", 490 | "Data saved\n", 491 | "Dealing with: 手工\n", 492 | "Start date:2010-09-29 17:44:14\n", 493 | "End date:2018-02-28 13:11:51\n", 494 | "手工 has 270rows\n", 495 | "Data saved\n", 496 | "Dealing with: 绘画\n", 497 | "Start date:2010-03-17 00:33:51\n", 498 | "End date:2018-02-28 12:54:55\n", 499 | "绘画 has 290rows\n", 500 | "Data saved\n", 501 | "Dealing with: 运动\n", 502 | "Start date:2010-07-24 08:31:15\n", 503 | "End date:2018-02-28 13:19:40\n", 504 | "运动 has 277rows\n", 505 | "Data saved\n", 506 | "Dealing with: 健身\n", 507 | "Start date:2014-07-28 13:04:28\n", 508 | "End date:2018-02-28 13:00:58\n", 509 | "健身 has 131rows\n", 510 | "Data saved\n", 511 | "Dealing with: 广告\n", 512 | "Start date:2009-07-13 18:34:45\n", 513 | "End date:2018-02-28 13:20:06\n", 514 | "广告 has 315rows\n", 515 | "Data saved\n", 516 | "Dealing with: 国产原创相关\n", 517 | "Start date:2009-09-30 12:42:21\n", 518 | "End date:2018-02-28 13:19:43\n", 519 | "国产原创相关 has 307rows\n", 520 | "Data saved\n", 521 | "Dealing with: 布袋戏\n", 522 | "Start date:2010-02-14 11:05:42\n", 523 | "End date:2018-02-28 09:27:27\n", 524 | "布袋戏 has 293rows\n", 525 | "Data saved\n", 526 | "Dealing with: 电子竞技\n", 527 | "Start date:2009-07-09 19:15:16\n", 528 | "End date:2018-02-28 13:49:44\n", 529 | "电子竞技 has 315rows\n", 530 | "Data saved\n", 531 | "Dealing with: 手机游戏\n", 532 | "Start date:2010-02-06 19:12:21\n", 533 | "End date:2018-02-28 14:08:13\n", 534 | "手机游戏 has 294rows\n", 535 | "Data saved\n", 536 | "Dealing with: 桌游棋牌\n", 537 | "Start date:2010-03-27 14:17:19\n", 538 | "End date:2018-02-28 14:07:32\n", 539 | "桌游棋牌 has 289rows\n", 540 | "Data saved\n", 541 | "Dealing with: ASMR\n", 542 | "Start date:2010-05-29 03:11:51\n", 543 | "End date:2018-02-28 14:22:05\n", 544 | "ASMR has 283rows\n", 545 | "Data saved\n", 546 | "Dealing with: 汽车\n", 547 | "Start date:2011-01-12 13:41:51\n", 548 | "End date:2018-02-28 14:19:35\n", 549 | "汽车 has 260rows\n", 550 | "Data saved\n", 551 | "Dealing with: 科学探索\n", 552 | "Start date:2010-06-03 09:55:15\n", 553 | "End date:2018-02-28 10:01:16\n", 554 | "科学探索 has 282rows\n", 555 | "Data saved\n", 556 | "Dealing with: 热血军事\n", 557 | "Start date:2010-12-23 06:50:28\n", 558 | "End date:2018-02-28 06:08:24\n", 559 | "热血军事 has 262rows\n", 560 | "Data saved\n", 561 | "Dealing with: 舌尖上的旅行\n", 562 | "Start date:2011-06-08 23:18:17\n", 563 | "End date:2018-02-28 10:58:27\n", 564 | "舌尖上的旅行 has 245rows\n", 565 | "Data saved\n", 566 | "Dealing with: 影视杂谈\n", 567 | "Start date:2011-07-08 04:36:59\n", 568 | "End date:2018-02-28 14:28:54\n", 569 | "影视杂谈 has 242rows\n", 570 | "Data saved\n", 571 | "Dealing with: 影视剪辑\n", 572 | "Start date:2010-02-03 14:12:18\n", 573 | "End date:2018-02-28 15:00:27\n", 574 | "影视剪辑 has 294rows\n", 575 | "Data saved\n", 576 | "Dealing with: 预告 资讯\n", 577 | "Start date:2009-06-26 07:11:36\n", 578 | "End date:2018-02-28 14:45:07\n", 579 | "预告 资讯 has 316rows\n", 580 | "Data saved\n", 581 | "Dealing with: 国产剧\n", 582 | "Start date:2011-02-04 11:04:49\n", 583 | "End date:2018-02-28 08:04:43\n", 584 | "国产剧 has 258rows\n", 585 | "Data saved\n", 586 | "Dealing with: 海外剧\n", 587 | "Start date:2014-08-09 05:55:52\n", 588 | "End date:2018-02-25 02:00:00\n", 589 | "海外剧 has 129rows\n", 590 | "Data saved\n" 591 | ] 592 | } 593 | ], 594 | "source": [ 595 | "index_store = ['date','tduration', 'tview', 'tcoin', 'tfavorite', 'tlike','totalv','mostaid']\n", 596 | "\n", 597 | "# idx = 3\n", 598 | "for idx in np.arange(len(sects) ):\n", 599 | " df_sect = df[df['sectname']==sects[idx]] \n", 600 | "\n", 601 | " max_date = df_sect.time.max()\n", 602 | " min_date = df_sect.time.min()\n", 603 | " sect_id_str = str(df_sect.sectid.unique()[0])\n", 604 | " sect_name_str = str(sects[idx])\n", 605 | " print('Dealing with: '+sect_name_str)\n", 606 | " print('Start date:' + str(min_date))\n", 607 | " print('End date:' + str(max_date))\n", 608 | "\n", 609 | " dateseries = pd.date_range(start=min_date, end=max_date, freq='10D')\n", 610 | "\n", 611 | "\n", 612 | " df_store = pd.DataFrame(columns=index_store)\n", 613 | "\n", 614 | "\n", 615 | " ########\n", 616 | " # Loop Through Dates\n", 617 | " ########\n", 618 | "\n", 619 | " # d_idx = 0\n", 620 | "\n", 621 | " for d_idx in np.arange(len(dateseries) -1 ):\n", 622 | " df_sect_temp = df_sect[df_sect['time'].between(dateseries[d_idx],dateseries[d_idx+1])]\n", 623 | "\n", 624 | " if not df_sect_temp.empty:\n", 625 | " mostviewed_temp = df_sect_temp.loc[df_sect_temp['view'].idxmax()].aid\n", 626 | " totalrows_temp = len(df_sect_temp.index)\n", 627 | " sumviews_temp = np.sum(df_sect_temp['view'].values)\n", 628 | " sumduration_temp = np.sum(df_sect_temp['duration'].values)\n", 629 | " sumcoin_temp = np.sum(df_sect_temp['coin'].values)\n", 630 | " sumfav_temp = np.sum(df_sect_temp['favorite'].values)\n", 631 | " sumlike_temp = np.sum(df_sect_temp['like'].values)\n", 632 | "\n", 633 | " df_sect_temp2 = df_sect_temp.mean().round(2)\n", 634 | "\n", 635 | " values_temp = [dateseries[d_idx] , sumduration_temp, sumviews_temp, sumcoin_temp, sumfav_temp, sumlike_temp, totalrows_temp ,mostviewed_temp]\n", 636 | "\n", 637 | " df_store_temp = pd.DataFrame([values_temp],columns=['date','tduration', 'tview', 'tcoin', 'tfavorite', 'tlike','totalv','mostaid'])\n", 638 | "\n", 639 | "\n", 640 | " df_store = (pd.concat([df_store, df_store_temp]) ).reset_index(drop=True)\n", 641 | " else:\n", 642 | " values_temp = [dateseries[d_idx] , 0, 0, 0, 0, 0, 0 ,0]\n", 643 | "\n", 644 | " df_store_temp = pd.DataFrame([values_temp],columns=['date','tduration', 'tview', 'tcoin', 'tfavorite', 'tlike','totalv','mostaid'])\n", 645 | "\n", 646 | " df_store = (pd.concat([df_store, df_store_temp]) ).reset_index(drop=True)\n", 647 | "\n", 648 | " \n", 649 | " print(sect_name_str + ' has ' + str( len(df_store.index) ) + 'rows' )\n", 650 | " \n", 651 | " if not df_store.empty:\n", 652 | " df_store['date'] = df_store['date'].dt.date\n", 653 | "\n", 654 | " df_store.to_csv('export/batch/'+sect_id_str+'-'+sect_name_str+'.csv', sep=',', encoding='utf-8')\n", 655 | "\n", 656 | " print('Data saved') " 657 | ] 658 | }, 659 | { 660 | "cell_type": "code", 661 | "execution_count": null, 662 | "metadata": {}, 663 | "outputs": [], 664 | "source": [] 665 | } 666 | ], 667 | "metadata": { 668 | "kernelspec": { 669 | "display_name": "Python 3", 670 | "language": "python", 671 | "name": "python3" 672 | }, 673 | "language_info": { 674 | "codemirror_mode": { 675 | "name": "ipython", 676 | "version": 3 677 | }, 678 | "file_extension": ".py", 679 | "mimetype": "text/x-python", 680 | "name": "python", 681 | "nbconvert_exporter": "python", 682 | "pygments_lexer": "ipython3", 683 | "version": "3.6.4" 684 | } 685 | }, 686 | "nbformat": 4, 687 | "nbformat_minor": 2 688 | } 689 | -------------------------------------------------------------------------------- /app/LICENSE.md: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | ================== 3 | 4 | Statement of Purpose 5 | --------------------- 6 | 7 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). 8 | 9 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. 10 | 11 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 12 | 13 | 1. Copyright and Related Rights. 14 | -------------------------------- 15 | A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: 16 | 17 | i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; 18 | ii. moral rights retained by the original author(s) and/or performer(s); 19 | iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; 20 | iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; 21 | v. rights protecting the extraction, dissemination, use and reuse of data in a Work; 22 | vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and 23 | vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 24 | 25 | 2. Waiver. 26 | ----------- 27 | To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 28 | 29 | 3. Public License Fallback. 30 | ---------------------------- 31 | Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 32 | 33 | 4. Limitations and Disclaimers. 34 | -------------------------------- 35 | 36 | a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. 37 | b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. 38 | c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. 39 | d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. 40 | -------------------------------------------------------------------------------- /app/README.md: -------------------------------------------------------------------------------- 1 | # BiliSpider App 2 | 3 | 4 | Install `npm` 5 | 6 | Install dependencies 7 | 8 | ``` 9 | npm install 10 | ``` 11 | 12 | Run test 13 | 14 | ``` 15 | npm start 16 | ``` 17 | 18 | Build your own version of the app 19 | 20 | ``` 21 | npm run dist 22 | ``` -------------------------------------------------------------------------------- /app/about.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | BiliSpider 7 | 8 | 9 | 17 | 18 | 19 | 20 | 21 |
22 |
23 |
24 |

25 | BiliSpider 26 |

27 | 28 |
29 |

30 | 一个用来获取 Bilibili 用户数据的爬虫 31 |

32 |
https://github.com/uupers/uupers.github.io
33 | 34 | 35 | 36 |
37 | 38 | 39 | 40 | 41 | 42 |
43 | 44 |
45 | 46 |
47 | 48 |
49 |
50 |
51 |
52 | 53 |
54 |
55 |
56 |       LOG: 
57 |
58 |
59 |
60 | 61 | 62 | 63 | 64 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /app/assets/css/style.css: -------------------------------------------------------------------------------- 1 | .title { 2 | text-align: center; 3 | } 4 | .text-centered { 5 | text-align: center; 6 | } 7 | 8 | #log-pre { 9 | height: 200px; 10 | max-height: 200px; 11 | min-height: 200px; 12 | overflow: auto 13 | } 14 | 15 | 16 | #data-html { 17 | height: 200px; 18 | max-height: 200px; 19 | min-height: 200px; 20 | overflow: auto 21 | } 22 | .db-span { 23 | display: block; 24 | } -------------------------------------------------------------------------------- /app/assets/mongo.js: -------------------------------------------------------------------------------- 1 | const dbhtml = document.getElementById('db-info') 2 | 3 | // Connection URL 4 | var url = 'mongodb://spiderrd:spiderrd@45.32.68.44:37017/bilibili_spider'; 5 | 6 | const dbName = 'bilibili_spider'; 7 | var mongojs = require('mongojs'); 8 | var db = mongojs(url); 9 | var db_member = db.collection('member_card'); 10 | 11 | var db_size = 0; 12 | var db_objects = 0 ; 13 | var db_storageSize = 0; 14 | 15 | 16 | setInterval( 17 | db.stats(function () { 18 | console.log(arguments); 19 | db_size = arguments[1].dataSize/1024/1024/1024; 20 | dbit("数据库大小:" + db_size.toString().substring(0,5) + "G;"); 21 | db_storageSize = arguments[1].storageSize/1024/1024/1024; 22 | dbit("存储空间:" + db_storageSize.toString().substring(0,5) + "G;"); 23 | db_objects = arguments[1].objects; 24 | dbit("用户数:" + db_objects+";"); 25 | }), 26 | 600000); 27 | 28 | 29 | 30 | function dbit(elem) { 31 | dbhtml.innerHTML += ""+elem+""; 32 | } -------------------------------------------------------------------------------- /app/assets/vue/README.md: -------------------------------------------------------------------------------- 1 | ## Explanation of Build Files 2 | 3 | | | UMD | CommonJS | ES Module | 4 | | --- | --- | --- | --- | 5 | | **Full** | vue.js | vue.common.js | vue.esm.js | 6 | | **Runtime-only** | vue.runtime.js | vue.runtime.common.js | vue.runtime.esm.js | 7 | | **Full (production)** | vue.min.js | | | 8 | | **Runtime-only (production)** | vue.runtime.min.js | | | 9 | 10 | ### Terms 11 | 12 | - **Full**: builds that contains both the compiler and the runtime. 13 | 14 | - **Compiler**: code that is responsible for compiling template strings into JavaScript render functions. 15 | 16 | - **Runtime**: code that is responsible for creating Vue instances, rendering and patching virtual DOM, etc. Basically everything minus the compiler. 17 | 18 | - **[UMD](https://github.com/umdjs/umd)**: UMD builds can be used directly in the browser via a ` 61 | 62 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /app/main.js: -------------------------------------------------------------------------------- 1 | const electron = require('electron') 2 | // Module to control application life. 3 | const app = electron.app 4 | // Module to create native browser window. 5 | const BrowserWindow = electron.BrowserWindow 6 | 7 | const path = require('path') 8 | const url = require('url') 9 | 10 | // Keep a global reference of the window object, if you don't, the window will 11 | // be closed automatically when the JavaScript object is garbage collected. 12 | let mainWindow 13 | 14 | function createWindow () { 15 | // Create the browser window. 16 | mainWindow = new BrowserWindow({width: 930, height: 650}) 17 | 18 | // and load the index.html of the app. 19 | mainWindow.loadURL(url.format({ 20 | pathname: path.join(__dirname, 'index.html'), 21 | protocol: 'file:', 22 | slashes: true 23 | })) 24 | 25 | // Open the DevTools. 26 | // mainWindow.webContents.openDevTools() 27 | 28 | // Emitted when the window is closed. 29 | mainWindow.on('closed', function () { 30 | // Dereference the window object, usually you would store windows 31 | // in an array if your app supports multi windows, this is the time 32 | // when you should delete the corresponding element. 33 | mainWindow = null 34 | }) 35 | } 36 | 37 | // This method will be called when Electron has finished 38 | // initialization and is ready to create browser windows. 39 | // Some APIs can only be used after this event occurs. 40 | app.on('ready', createWindow) 41 | 42 | // Quit when all windows are closed. 43 | app.on('window-all-closed', function () { 44 | // On OS X it is common for applications and their menu bar 45 | // to stay active until the user quits explicitly with Cmd + Q 46 | if (process.platform !== 'darwin') { 47 | app.quit() 48 | } 49 | }) 50 | 51 | app.on('activate', function () { 52 | // On OS X it's common to re-create a window in the app when the 53 | // dock icon is clicked and there are no other windows open. 54 | if (mainWindow === null) { 55 | createWindow() 56 | } 57 | }) 58 | 59 | // In this file you can include the rest of your app's specific main process 60 | // code. You can also put them in separate files and require them here. 61 | 62 | 63 | -------------------------------------------------------------------------------- /app/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bilispider", 3 | "version": "0.1.8", 4 | "description": "Bilibili spider to crawl data", 5 | "main": "main.js", 6 | "scripts": { 7 | "start": "electron .", 8 | "pack": "electron-builder --dir", 9 | "dist": "electron-builder" 10 | }, 11 | "repository": "https://github.com/uupers/BiliSpider", 12 | "keywords": [ 13 | "Bilibili", 14 | "Spider" 15 | ], 16 | "author": "uupers", 17 | "license": "CC0-1.0", 18 | "devDependencies": { 19 | "electron": "~1.8.2", 20 | "electron-builder": "^20.0.8", 21 | "electron-packager": "^11.0.1" 22 | }, 23 | "dependencies": { 24 | "bulma": "*", 25 | "mongojs": "^2.5.0", 26 | "spider-dist": "file:../node-spider-dist" 27 | }, 28 | "build": { 29 | "appId": "com.uupers.app", 30 | "mac": { 31 | "category": "crawler" 32 | }, 33 | "win": { 34 | "target": "portable" 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /app/renderer.js: -------------------------------------------------------------------------------- 1 | // This file is required by the index.html file and will 2 | // be executed in the renderer process for that window. 3 | // All of the Node.js APIs are available in this process. 4 | 5 | -------------------------------------------------------------------------------- /biliSpider-logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/biliSpider-logo.jpg -------------------------------------------------------------------------------- /connect_to_remote_mongodb_demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 20, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "# http://api.mongodb.com/python/current/tutorial.html\n", 12 | "import pymongo\n", 13 | "import pprint" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 22, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "client = pymongo.MongoClient(\"mongodb://spiderrd:spiderrd@45.32.68.44:37017/bilibili_spider\")\n", 25 | "db = client.bilibili_spider\n", 26 | "member_card = db.member_card" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 23, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/plain": [ 39 | "{'DisplayRank': '0',\n", 40 | " '_id': ObjectId('5a8ed520b09d9a3c4000d53b'),\n", 41 | " 'approve': False,\n", 42 | " 'archive_count': 1,\n", 43 | " 'article': 0,\n", 44 | " 'attention': 0,\n", 45 | " 'attentions': [],\n", 46 | " 'birthday': '',\n", 47 | " 'ctime': '2018-02-22 22:32:50',\n", 48 | " 'description': '',\n", 49 | " 'face': 'http://i2.hdslb.com/bfs/face/de17baa65d6478ee7fe55a5c0528854247334f2d.gif',\n", 50 | " 'fans': 0,\n", 51 | " 'friend': 0,\n", 52 | " 'level_info': {'current_exp': 3920,\n", 53 | " 'current_level': 3,\n", 54 | " 'current_min': 1500,\n", 55 | " 'next_exp': 4500},\n", 56 | " 'mid': 32000,\n", 57 | " 'name': '无枫',\n", 58 | " 'nameplate': {'condition': '',\n", 59 | " 'image': '',\n", 60 | " 'image_small': '',\n", 61 | " 'level': '',\n", 62 | " 'name': '',\n", 63 | " 'nid': 0},\n", 64 | " 'official_verify': {'desc': '', 'type': -1},\n", 65 | " 'pendant': {'expire': 0, 'image': '', 'name': '', 'pid': 0},\n", 66 | " 'place': '',\n", 67 | " 'rank': '10000',\n", 68 | " 'regtime': 0,\n", 69 | " 'sex': '男',\n", 70 | " 'sign': '',\n", 71 | " 'spacesta': 0,\n", 72 | " 'vip': {'accessStatus': 1,\n", 73 | " 'dueRemark': '',\n", 74 | " 'vipDueDate': 0,\n", 75 | " 'vipStatus': 0,\n", 76 | " 'vipStatusWarn': '',\n", 77 | " 'vipType': 0}}" 78 | ] 79 | }, 80 | "execution_count": 23, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "member_card.find_one()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 24, 92 | "metadata": { 93 | "collapsed": false 94 | }, 95 | "outputs": [ 96 | { 97 | "name": "stdout", 98 | "output_type": "stream", 99 | "text": [ 100 | "{'DisplayRank': '0',\n", 101 | " '_id': ObjectId('5a91fe0f979a0f5c461ff290'),\n", 102 | " 'approve': False,\n", 103 | " 'archive_count': 17,\n", 104 | " 'article': 0,\n", 105 | " 'attention': 43,\n", 106 | " 'attentions': [31782714,\n", 107 | " 26728556,\n", 108 | " 510196,\n", 109 | " 4495971,\n", 110 | " 10010775,\n", 111 | " 3557916,\n", 112 | " 949952,\n", 113 | " 261669819,\n", 114 | " 8629138,\n", 115 | " 5760446,\n", 116 | " 2317414,\n", 117 | " 1278545,\n", 118 | " 19190861,\n", 119 | " 279353518,\n", 120 | " 13089665,\n", 121 | " 89225756,\n", 122 | " 2881659,\n", 123 | " 8226817,\n", 124 | " 75304607,\n", 125 | " 62426742,\n", 126 | " 94742590,\n", 127 | " 94177278,\n", 128 | " 317866,\n", 129 | " 6355324,\n", 130 | " 205631797,\n", 131 | " 28588984,\n", 132 | " 43413569,\n", 133 | " 8511003,\n", 134 | " 88461692,\n", 135 | " 228267254,\n", 136 | " 8578857,\n", 137 | " 3623851,\n", 138 | " 1446291,\n", 139 | " 1664494,\n", 140 | " 20503549,\n", 141 | " 12735613,\n", 142 | " 6290510,\n", 143 | " 5581898,\n", 144 | " 6711533,\n", 145 | " 4486494,\n", 146 | " 2019740,\n", 147 | " 3766866,\n", 148 | " 488744],\n", 149 | " 'birthday': '',\n", 150 | " 'ctime': '2018-02-25 08:04:36',\n", 151 | " 'description': '',\n", 152 | " 'face': 'http://i0.hdslb.com/bfs/face/3a2799018636c9c43774dd7bf6685387bb219011.jpg',\n", 153 | " 'fans': 507,\n", 154 | " 'friend': 43,\n", 155 | " 'level_info': {'current_exp': 14372,\n", 156 | " 'current_level': 5,\n", 157 | " 'current_min': 10800,\n", 158 | " 'next_exp': 28800},\n", 159 | " 'mid': 2654670,\n", 160 | " 'name': 'LePtC',\n", 161 | " 'nameplate': {'condition': '',\n", 162 | " 'image': '',\n", 163 | " 'image_small': '',\n", 164 | " 'level': '',\n", 165 | " 'name': '',\n", 166 | " 'nid': 0},\n", 167 | " 'official_verify': {'desc': '', 'type': -1},\n", 168 | " 'pendant': {'expire': 0, 'image': '', 'name': '', 'pid': 0},\n", 169 | " 'place': '',\n", 170 | " 'rank': '10000',\n", 171 | " 'regtime': 0,\n", 172 | " 'sex': '保密',\n", 173 | " 'sign': '学物理的都好萌~',\n", 174 | " 'spacesta': 0,\n", 175 | " 'vip': {'accessStatus': 1,\n", 176 | " 'dueRemark': '',\n", 177 | " 'vipDueDate': 1491235200000.0,\n", 178 | " 'vipStatus': 0,\n", 179 | " 'vipStatusWarn': '',\n", 180 | " 'vipType': 1}}\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "pprint.pprint(member_card.find_one({\"name\": 'LePtC'}))" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 25, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [ 195 | { 196 | "name": "stdout", 197 | "output_type": "stream", 198 | "text": [ 199 | "2571000\n" 200 | ] 201 | } 202 | ], 203 | "source": [ 204 | "pprint.pprint(member_card.count())" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": { 211 | "collapsed": true 212 | }, 213 | "outputs": [], 214 | "source": [] 215 | } 216 | ], 217 | "metadata": { 218 | "anaconda-cloud": {}, 219 | "kernelspec": { 220 | "display_name": "Python [conda root]", 221 | "language": "python", 222 | "name": "conda-root-py" 223 | }, 224 | "language_info": { 225 | "codemirror_mode": { 226 | "name": "ipython", 227 | "version": 3 228 | }, 229 | "file_extension": ".py", 230 | "mimetype": "text/x-python", 231 | "name": "python", 232 | "nbconvert_exporter": "python", 233 | "pygments_lexer": "ipython3", 234 | "version": "3.5.2" 235 | } 236 | }, 237 | "nbformat": 4, 238 | "nbformat_minor": 1 239 | } 240 | -------------------------------------------------------------------------------- /iodata/iodata.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.27428.2015 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "iodata", "iodata\iodata.vcxproj", "{DF288116-8BB3-4611-A993-14C20C152CCD}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|x64 = Debug|x64 11 | Debug|x86 = Debug|x86 12 | Release|x64 = Release|x64 13 | Release|x86 = Release|x86 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Debug|x64.ActiveCfg = Debug|x64 17 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Debug|x64.Build.0 = Debug|x64 18 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Debug|x86.ActiveCfg = Debug|Win32 19 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Debug|x86.Build.0 = Debug|Win32 20 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Release|x64.ActiveCfg = Release|x64 21 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Release|x64.Build.0 = Release|x64 22 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Release|x86.ActiveCfg = Release|Win32 23 | {DF288116-8BB3-4611-A993-14C20C152CCD}.Release|x86.Build.0 = Release|Win32 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {DD2172BB-8F84-49D1-98EE-89E5EC4B3E85} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /iodata/iodata/dllmain.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/dllmain.c -------------------------------------------------------------------------------- /iodata/iodata/iodata.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/iodata.cpp -------------------------------------------------------------------------------- /iodata/iodata/iodata.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 15.0 23 | {DF288116-8BB3-4611-A993-14C20C152CCD} 24 | Win32Proj 25 | iodata 26 | 10.0.16299.0 27 | iodata 28 | 29 | 30 | 31 | DynamicLibrary 32 | true 33 | v141 34 | Unicode 35 | 36 | 37 | DynamicLibrary 38 | false 39 | v141 40 | true 41 | Unicode 42 | 43 | 44 | DynamicLibrary 45 | true 46 | v141 47 | Unicode 48 | 49 | 50 | DynamicLibrary 51 | false 52 | v141 53 | true 54 | Unicode 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | true 76 | 77 | 78 | true 79 | 80 | 81 | false 82 | 83 | 84 | false 85 | 86 | 87 | 88 | Use 89 | Level3 90 | Disabled 91 | true 92 | WIN32;_DEBUG;IODATA_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) 93 | true 94 | 95 | 96 | Windows 97 | true 98 | 99 | 100 | 101 | 102 | Use 103 | Level3 104 | Disabled 105 | true 106 | _DEBUG;IODATA_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) 107 | true 108 | 109 | 110 | Windows 111 | true 112 | 113 | 114 | 115 | 116 | Use 117 | Level3 118 | MaxSpeed 119 | true 120 | true 121 | true 122 | WIN32;NDEBUG;IODATA_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) 123 | true 124 | 125 | 126 | Windows 127 | true 128 | true 129 | true 130 | 131 | 132 | 133 | 134 | NotUsing 135 | Level3 136 | MaxSpeed 137 | true 138 | true 139 | false 140 | NDEBUG;IODATA_EXPORTS;_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) 141 | true 142 | Fast 143 | 144 | 145 | Windows 146 | true 147 | true 148 | true 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /iodata/iodata/iodata.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /iodata/iodata/iodata.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /iodata/iodata/stdafx.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/stdafx.cpp -------------------------------------------------------------------------------- /iodata/iodata/stdafx.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/stdafx.h -------------------------------------------------------------------------------- /iodata/iodata/targetver.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/targetver.h -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/dllmain.obj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/dllmain.obj -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.log: -------------------------------------------------------------------------------- 1 |  dllmain.c 2 | c:\users\surface\desktop\spidertest\iodata\iodata\dllmain.c(42): warning C4267: “初始化”: 从“size_t”转换到“int”,可能丢失数据 3 | c:\users\surface\desktop\spidertest\iodata\iodata\dllmain.c(72): warning C4267: “初始化”: 从“size_t”转换到“int”,可能丢失数据 4 | 正在创建库 C:\Users\surface\Desktop\spidertest\iodata\x64\Release\iodata.lib 和对象 C:\Users\surface\Desktop\spidertest\iodata\x64\Release\iodata.exp 5 | 正在生成代码 6 | 1 of 6 functions (16.7%) were compiled, the rest were copied from previous compilation. 7 | 0 functions were new in current compilation 8 | 0 functions had inline decision re-evaluated but remain unchanged 9 | 已完成代码的生成 10 | iodata.vcxproj -> C:\Users\surface\Desktop\spidertest\iodata\x64\Release\iodata.dll 11 | -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/CL.command.1.tlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/iodata.tlog/CL.command.1.tlog -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/CL.read.1.tlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/iodata.tlog/CL.read.1.tlog -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/CL.write.1.tlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/iodata.tlog/CL.write.1.tlog -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/iodata.lastbuildstate: -------------------------------------------------------------------------------- 1 | #TargetFrameworkVersion=v4.0:PlatformToolSet=v141:EnableManagedIncrementalBuild=false:VCToolArchitecture=Native32Bit:WindowsTargetPlatformVersion=10.0.16299.0 2 | Release|x64|C:\Users\surface\Desktop\spidertest\iodata\| 3 | -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/iodata.write.1u.tlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/iodata.tlog/iodata.write.1u.tlog -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/link.command.1.tlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/iodata.tlog/link.command.1.tlog -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/link.read.1.tlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/iodata.tlog/link.read.1.tlog -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/iodata.tlog/link.write.1.tlog: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/iodata.tlog/link.write.1.tlog -------------------------------------------------------------------------------- /iodata/iodata/x64/Release/vc141.pdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/iodata/x64/Release/vc141.pdb -------------------------------------------------------------------------------- /iodata/readme.md: -------------------------------------------------------------------------------- 1 | # 编译文件 2 | 3 | 这是 VS2017 的完整工程,因此如果安装 VS,打开 sln 工程,将 Mathematica\11.3\SystemFiles\IncludeFiles\C 里的全部 H 文件拷贝至 MSVC 编译器里的 4 | include 目录下,将 Mathematica\11.3\SystemFiles\Libraries\Windows-x86-64 里的全部 dll 文件拷贝至 MSVC 编译器里的 x64\lib 目录下,然后编译生成即可 5 | 6 | 如果没有安装,直接可以用编译好的dll, 即 x64\Release\iodata.dll 这个文件,需要保证自己是win10系统 7 | 8 | # 接口的使用 9 | 参考"例子.nb" 10 | -------------------------------------------------------------------------------- /iodata/x64/Release/iodata.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/x64/Release/iodata.dll -------------------------------------------------------------------------------- /iodata/x64/Release/iodata.exp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/x64/Release/iodata.exp -------------------------------------------------------------------------------- /iodata/x64/Release/iodata.iobj: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/x64/Release/iodata.iobj -------------------------------------------------------------------------------- /iodata/x64/Release/iodata.ipdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/x64/Release/iodata.ipdb -------------------------------------------------------------------------------- /iodata/x64/Release/iodata.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/x64/Release/iodata.lib -------------------------------------------------------------------------------- /iodata/x64/Release/iodata.pdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uupers/BiliSpider/95c07da2f7abf4f318dc41dff8e6dd891045b799/iodata/x64/Release/iodata.pdb -------------------------------------------------------------------------------- /iodata/例子.nb: -------------------------------------------------------------------------------- 1 | (* Content-type: application/vnd.wolfram.mathematica *) 2 | 3 | (*** Wolfram Notebook File ***) 4 | (* http://www.wolfram.com/nb *) 5 | 6 | (* CreatedBy='Mathematica 11.3' *) 7 | 8 | (*CacheID: 234*) 9 | (* Internal cache information: 10 | NotebookFileLineBreakTest 11 | NotebookFileLineBreakTest 12 | NotebookDataPosition[ 158, 7] 13 | NotebookDataLength[ 12079, 286] 14 | NotebookOptionsPosition[ 11487, 269] 15 | NotebookOutlinePosition[ 11844, 285] 16 | CellTagsIndexPosition[ 11801, 282] 17 | WindowFrame->Normal*) 18 | 19 | (* Beginning of Notebook Content *) 20 | Notebook[{ 21 | Cell[BoxData[{ 22 | RowBox[{ 23 | RowBox[{ 24 | RowBox[{"dlladdress", "=", 25 | RowBox[{ 26 | RowBox[{"NotebookDirectory", "[", "]"}], "<>", 27 | "\"\\""}]}], ";"}], "\[IndentingNewLine]", 28 | RowBox[{ 29 | "(*", " ", "dll\:5e93\:7684\:5730\:5740", " ", 30 | "*)"}]}], "\[IndentingNewLine]", 31 | RowBox[{ 32 | RowBox[{"(*", 33 | RowBox[{ 34 | "\:6ce8\:610f", "\:ff0c", 35 | "\:5730\:5740\:5185\:4e0d\:80fd\:5305\:542b\:4e2d\:6587", ",", 36 | "\:82e5\:6587\:4ef6\:5939\:4ee5\:4e2d\:6587\:547d\:540d\:4f1a\:51fa\:9519\ 37 | \:8bef"}], "*)"}]}]}], "Input", 38 | CellChangeTimes->{{3.73216712972129*^9, 3.7321672270784993`*^9}, 39 | 3.7321673132394714`*^9, 3.7348302083462377`*^9, {3.7393501758631268`*^9, 40 | 3.7393501764153805`*^9}, {3.739350215442043*^9, 41 | 3.739350234967987*^9}},ExpressionUUID->"fc713bd0-bdd1-47af-a095-\ 42 | 42482e05f762"], 43 | 44 | Cell[BoxData[ 45 | RowBox[{"\[IndentingNewLine]", 46 | RowBox[{ 47 | RowBox[{ 48 | RowBox[{"VideoDataRead", "=", 49 | RowBox[{"LibraryFunctionLoad", "[", 50 | RowBox[{"dlladdress", ",", "\"\\"", ",", 51 | RowBox[{"{", 52 | RowBox[{"\"\\"", ",", 53 | RowBox[{"{", 54 | RowBox[{"Integer", ",", "1"}], "}"}], ",", "Integer"}], "}"}], ",", 55 | RowBox[{"{", 56 | RowBox[{"Integer", ",", "_"}], "}"}]}], "]"}]}], ";"}], 57 | "\[IndentingNewLine]", 58 | RowBox[{"(*", " ", "\:51fd\:6570\:52a0\:8f7d", " ", "*)"}]}]}]], "Input", 59 | CellChangeTimes->{{3.7321672573508244`*^9, 3.7321673204006643`*^9}, 60 | 3.7348300232041855`*^9}, 61 | CellLabel->"In[8]:=",ExpressionUUID->"e0af29f6-1a87-4bee-bc01-2fabe0f5f650"], 62 | 63 | Cell[BoxData[{ 64 | RowBox[{"VideoDataRead", "[", 65 | RowBox[{ 66 | RowBox[{ 67 | RowBox[{"NotebookDirectory", "[", "]"}], "<>", "\"\<2018-6-30\>\""}], ",", 68 | RowBox[{"{", 69 | RowBox[{"3", ",", "6"}], "}"}], ",", "20000000"}], 70 | "]"}], "\[IndentingNewLine]", 71 | RowBox[{ 72 | RowBox[{"(*", 73 | RowBox[{ 74 | "\:6ce8\:610f", "\:ff0c", 75 | "\:5730\:5740\:5185\:4e0d\:80fd\:5305\:542b\:4e2d\:6587", ",", 76 | "\:82e5\:6587\:4ef6\:5939\:4ee5\:4e2d\:6587\:547d\:540d\:4f1a\:51fa\:9519\ 77 | \:8bef"}], "*)"}]}]}], "Input", 78 | CellChangeTimes->{{3.7321673233529215`*^9, 3.7321677471989913`*^9}, { 79 | 3.732167821696864*^9, 3.7321678582866273`*^9}, {3.7321678888017893`*^9, 80 | 3.7321679154352865`*^9}, {3.7321679554973545`*^9, 3.732168336868788*^9}, { 81 | 3.7321694839658813`*^9, 3.732169489343811*^9}, {3.7348300499369802`*^9, 82 | 3.734830084998059*^9}, {3.734830219141446*^9, 3.734830234465931*^9}, { 83 | 3.739350115548448*^9, 3.7393501495434303`*^9}, 84 | 3.739350240554093*^9},ExpressionUUID->"2a63592d-2fce-4e3b-b842-\ 85 | 7f556151e89c"], 86 | 87 | Cell[BoxData[ 88 | RowBox[{"(*", " ", "\[IndentingNewLine]", " ", 89 | RowBox[{ 90 | RowBox[{ 91 | "VideoDataRead", " ", "\:51fd\:6570", "\[IndentingNewLine]", 92 | "\[IndentingNewLine]", 93 | RowBox[{"\:529f\:80fd", ":", " ", 94 | RowBox[{ 95 | "\:8bfb\:53d6\:4e8c\:8fdb\:5236\:6587\:4ef6\:4e2d\:7684\:6570\:636e", 96 | "\[IndentingNewLine]", "\[IndentingNewLine]", "\:8f93\:5165"}], ":", 97 | " ", "\:7b2c\:4e00\:4e2a\:53c2\:6570\:4e3a\:5b57\:7b26\:4e32"}]}], 98 | "\:ff0c", 99 | RowBox[{ 100 | "\:8868\:793a\:5f85\:8bfb\:53d6\:4e8c\:8fdb\:5236\:6587\:4ef6\:7684\:5b8c\ 101 | \:6574\:5730\:5740", "\[IndentingNewLine]", " ", 102 | "\:7b2c\:4e8c\:4e2a\:53c2\:6570\:4e3a\:6574\:6570\:5217\:8868"}], 103 | "\:ff0c", 104 | "\:8868\:793a\:8bfb\:53d6\:89c6\:9891\:4e2d\:9700\:8981\:5305\:542b\:7684\ 105 | \:6570\:636e\:9879", "\:ff0c", 106 | RowBox[{ 107 | "\:89c1\:4e0b\:9762\:7684\:89e3\:91ca", "\[IndentingNewLine]", " ", 108 | "\:7b2c\:4e09\:4e2a\:53c2\:6570\:540c\:4e3a\:6700\:5927\:5206\:914d\:5185\ 109 | \:5b58\:53c2\:6570"}], "\:ff0c", 110 | "\:4f46\:662f\:4e0e\:4e0a\:9762\:7684\:542b\:4e49\:4e0d\:540c", "\:ff0c", 111 | RowBox[{ 112 | "\:53ea\:9700\:6bd4\:603b\:73b0\:5b58\:7a3f\:4ef6\:6570\:5927\:5c31\:884c\ 113 | \:4e86", "\[IndentingNewLine]", "\[IndentingNewLine]", 114 | "\:5173\:4e8e\:7b2c\:4e8c\:4e2a\:53c2\:6570\:7684\:89e3\:91ca"}], 115 | "\:ff1a", "\[IndentingNewLine]", 116 | RowBox[{ 117 | RowBox[{ 118 | "\:6ee1\:8db3\:4ee5\:4e0b\:5bf9\:5e94\:89c4\:5219", "\[IndentingNewLine]", 119 | "1"}], " ", "->", " ", 120 | RowBox[{ 121 | RowBox[{"\:89c6\:9891\:7f16\:53f7", "\[IndentingNewLine]", "2"}], " ", "->", 122 | " ", 123 | RowBox[{ 124 | RowBox[{"\:4e0a\:4f20\:8005\:7f16\:53f7", "\[IndentingNewLine]", "3"}], 125 | " ", "->", " ", 126 | RowBox[{ 127 | RowBox[{"\:6295\:7a3f\:65f6\:95f4", "\[IndentingNewLine]", "4"}], " ", 128 | "->", " ", 129 | RowBox[{ 130 | RowBox[{"\:89c6\:9891\:65f6\:957f", "\[IndentingNewLine]", "51"}], 131 | " ", "->", " ", 132 | RowBox[{ 133 | RowBox[{"\:5206P\:6570", "\[IndentingNewLine]", "52"}], " ", "->", 134 | " ", 135 | RowBox[{ 136 | RowBox[{"\:5206\:533a\:7f16\:53f7", "\[IndentingNewLine]", "53"}], 137 | " ", "->", " ", 138 | RowBox[{ 139 | RowBox[{ 140 | "\:539f\:521b\:4e3a0\:642c\:8fd0\:4e3a1", "\[IndentingNewLine]", 141 | "54"}], " ", "->", " ", 142 | RowBox[{ 143 | RowBox[{"\:672a\:77e5", "\[IndentingNewLine]", "6"}], " ", "->", 144 | " ", 145 | RowBox[{ 146 | RowBox[{"\:64ad\:653e\:6570", "\[IndentingNewLine]", "7"}], " ", 147 | "->", " ", 148 | RowBox[{ 149 | RowBox[{"\:5f39\:5e55\:6570", "\[IndentingNewLine]", "8"}], " ", 150 | "->", " ", 151 | RowBox[{ 152 | RowBox[{"\:786c\:5e01\:6570", "\[IndentingNewLine]", "9"}], 153 | " ", "->", " ", 154 | RowBox[{ 155 | RowBox[{"\:6536\:85cf\:6570", "\[IndentingNewLine]", "10"}], 156 | " ", "->", " ", 157 | RowBox[{ 158 | RowBox[{"\:5206\:4eab\:6570", "\[IndentingNewLine]", "11"}], 159 | " ", "->", " ", 160 | RowBox[{ 161 | RowBox[{"\:8bc4\:8bba\:6570", "\[IndentingNewLine]", "12"}], 162 | " ", "->", " ", 163 | RowBox[{ 164 | RowBox[{ 165 | "\:70b9\:8d5e\:6570", "\[IndentingNewLine]", "13"}], " ", "->", 166 | " ", 167 | RowBox[{ 168 | "\:5dee\:8bc4\:6570", "\[IndentingNewLine]", 169 | "\[IndentingNewLine]", 170 | "\:8f93\:51fa"}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]}]}], 171 | "\:ff1a", 172 | RowBox[{ 173 | "\:4e8c\:9636\:6574\:6570\:5217\:8868", "\[IndentingNewLine]", 174 | "\[IndentingNewLine]", " ", 175 | "\:6211\:4eec\:8bfb\:7684\:662f\:521a\:624dGetData\:751f\:6210\:7684\:4e8c\ 176 | \:8fdb\:5236\:6587\:4ef6"}], "\:ff0c", 177 | RowBox[{"\:5982\:679c20000000", 178 | RowBox[{"\:ff08", "\:603b\:73b0\:5b58\:7a3f\:4ef6\:6570", "\:ff09"}], 179 | "\:4e0d\:591f\:5219\:8c03\:5927", "\[IndentingNewLine]", 180 | "\[IndentingNewLine]", "\:5bf9\:4e8e\:4e0a\:9762\:7684\:4f8b\:5b50"}], 181 | "\:ff0c", "\:7531\:4e8e3\:5bf9\:5e94\:6295\:7a3f\:65f6\:95f4", "\:ff0c", 182 | RowBox[{"6", "\:5bf9\:5e94\:64ad\:653e\:6570"}], "\:ff0c", 183 | RowBox[{ 184 | "\:90a3\:4e48\:6700\:7ec8\:7684\:8f93\:51fa\:4e3a", "\[IndentingNewLine]", 185 | RowBox[{"\:ff5b", "\[IndentingNewLine]", 186 | RowBox[{ 187 | RowBox[{"\:ff5b", 188 | RowBox[{ 189 | "\:7b2c1\:4e2a\:89c6\:9891\:7684\:6295\:7a3f\:65f6\:95f4", "\:ff0c", 190 | "\:7b2c1\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570"}], "\:ff5d"}], 191 | ",", "\[IndentingNewLine]", 192 | RowBox[{"\:ff5b", 193 | RowBox[{ 194 | "\:7b2c2\:4e2a\:89c6\:9891\:7684\:6295\:7a3f\:65f6\:95f4", "\:ff0c", 195 | "\:7b2c2\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570"}], "\:ff5d"}], 196 | ",", "\[IndentingNewLine]", 197 | RowBox[{"\:ff5b", 198 | RowBox[{ 199 | "\:7b2c3\:4e2a\:89c6\:9891\:7684\:6295\:7a3f\:65f6\:95f4", "\:ff0c", 200 | "\:7b2c3\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570"}], "\:ff5d"}], 201 | ",", "\[IndentingNewLine]", 202 | RowBox[{"...", "\[IndentingNewLine]", 203 | RowBox[{"\:ff5b", 204 | RowBox[{ 205 | "\:6700\:540e\:4e00\:4e2a\:89c6\:9891\:7684\:6295\:7a3f\:65f6\:95f4", 206 | "\:ff0c", 207 | "\:6700\:540e\:4e00\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570"}], 208 | "\:ff5d"}]}]}], "\[IndentingNewLine]", "\:ff5d"}], 209 | "\[IndentingNewLine]", "\[IndentingNewLine]", 210 | "\:4e4b\:6240\:4ee5\:8fd9\:4e48\:8bbe\:8ba1\:662f\:4e3a\:4e86\:8282\:7701\ 211 | \:5185\:5b58"}], "\:ff0c", 212 | "\:5982\:679c\:4f60\:8981\:8bfb\:53d6\:5168\:90e8\:6570\:636e", "\:ff0c", 213 | RowBox[{"\:5c31\:662f", "\[IndentingNewLine]", 214 | RowBox[{"VideoDataRead", "[", 215 | RowBox[{ 216 | RowBox[{ 217 | RowBox[{"NotebookDirectory", "[", "]"}], "<>", "\"\<2018-6-30\>\""}], 218 | ",", 219 | RowBox[{"{", 220 | RowBox[{ 221 | "1", ",", "2", ",", "3", ",", "4", ",", "51", ",", "52", ",", "53", 222 | ",", "54", ",", "6", ",", "7", ",", "8", ",", "9", ",", "10", ",", 223 | "11", ",", "12"}], "}"}], ",", "20000000"}], "]"}], 224 | "\[IndentingNewLine]", "\[IndentingNewLine]", 225 | "\:5982\:679c\:53ea\:5bf9\:4e00\:9879\:6570\:636e\:611f\:5174\:8da3"}], 226 | "\:ff0c", 227 | RowBox[{ 228 | "\:6bd4\:5982\:64ad\:653e\:6570", "\[IndentingNewLine]", 229 | "\[IndentingNewLine]", 230 | RowBox[{"VideoDataRead", "[", 231 | RowBox[{ 232 | RowBox[{ 233 | RowBox[{"NotebookDirectory", "[", "]"}], "<>", "\"\<2018-6-30\>\""}], 234 | ",", 235 | RowBox[{"{", "6", "}"}], ",", "20000000"}], "]"}], 236 | "\[IndentingNewLine]", "\[IndentingNewLine]", 237 | "\:6700\:540e\:9700\:8981\:505aFlatten\:64cd\:4f5c"}], "\:ff0c", 238 | RowBox[{"\:56e0\:4e3a\:8fd4\:56de\:7684\:662f", "\[IndentingNewLine]", 239 | RowBox[{"\:ff5b", "\[IndentingNewLine]", 240 | RowBox[{ 241 | RowBox[{ 242 | "\:ff5b", "\:7b2c1\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570", 243 | "\:ff5d"}], ",", "\[IndentingNewLine]", 244 | RowBox[{ 245 | "\:ff5b", "\:7b2c2\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570", 246 | "\:ff5d"}], ",", "\[IndentingNewLine]", 247 | RowBox[{ 248 | "\:ff5b", "\:7b2c3\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570", 249 | "\:ff5d"}], ",", "\[IndentingNewLine]", 250 | RowBox[{"...", "\[IndentingNewLine]", 251 | RowBox[{ 252 | "\:ff5b", 253 | "\:6700\:540e\:4e00\:4e2a\:89c6\:9891\:7684\:64ad\:653e\:6570", 254 | "\:ff5d"}]}]}], "\[IndentingNewLine]", "\:ff5d"}], 255 | "\[IndentingNewLine]", "\:591a\:4e86\:4e00\:91cd\:62ec\:53f7"}]}], 256 | "\[IndentingNewLine]", "\[IndentingNewLine]", "*)"}]], "Input", 257 | CellChangeTimes->{{3.732168366807485*^9, 3.7321683879654946`*^9}, { 258 | 3.732168460436823*^9, 3.7321685042017503`*^9}, {3.7321685418605547`*^9, 259 | 3.732168681827449*^9}, {3.732168733365339*^9, 3.732168789597686*^9}, { 260 | 3.7321688565648155`*^9, 3.7321688946178856`*^9}, {3.732168971923645*^9, 261 | 3.7321689749752784`*^9}, {3.7321690551681805`*^9, 3.732169266521139*^9}, { 262 | 3.7321693074723463`*^9, 3.732169467157052*^9}, {3.7321695087635098`*^9, 263 | 3.732169872299734*^9}, {3.7321702106523886`*^9, 3.7321702116834574`*^9}, { 264 | 3.7348300942972703`*^9, 3.73483011163785*^9}, {3.7348327630242662`*^9, 265 | 3.7348327657731028`*^9}, {3.738677149879278*^9, 3.738677156091444*^9}, { 266 | 3.7393502514872866`*^9, 267 | 3.7393502969013157`*^9}},ExpressionUUID->"e4d72a2d-fb4a-4b02-b1df-\ 268 | d764ca5ceb9a"] 269 | }, 270 | WindowSize->{1280, 637}, 271 | WindowMargins->{{-8, Automatic}, {Automatic, -8}}, 272 | FrontEndVersion->"11.3 for Microsoft Windows (64-bit) (2018\:5e743\:670828\ 273 | \:65e5)", 274 | StyleDefinitions->"Default.nb" 275 | ] 276 | (* End of Notebook Content *) 277 | 278 | (* Internal cache information *) 279 | (*CellTagsOutline 280 | CellTagsIndex->{} 281 | *) 282 | (*CellTagsIndex 283 | CellTagsIndex->{} 284 | *) 285 | (*NotebookFileOutline 286 | Notebook[{ 287 | Cell[558, 20, 832, 21, 85, "Input",ExpressionUUID->"fc713bd0-bdd1-47af-a095-42482e05f762"], 288 | Cell[1393, 43, 743, 17, 85, "Input",ExpressionUUID->"e0af29f6-1a87-4bee-bc01-2fabe0f5f650"], 289 | Cell[2139, 62, 1012, 22, 66, "Input",ExpressionUUID->"2a63592d-2fce-4e3b-b842-7f556151e89c"], 290 | Cell[3154, 86, 8329, 181, 1131, "Input",ExpressionUUID->"e4d72a2d-fb4a-4b02-b1df-d764ca5ceb9a"] 291 | } 292 | ] 293 | *) 294 | 295 | -------------------------------------------------------------------------------- /node-spider-dist/.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | indent_style = space 9 | indent_size = 4 10 | 11 | [*.{md,json,yml,yaml}] 12 | trim_trailing_whitespace = false 13 | indent_style = space 14 | indent_size = 2 15 | -------------------------------------------------------------------------------- /node-spider-dist/.eslintrc.yml: -------------------------------------------------------------------------------- 1 | extends: "standard" 2 | plugins: 3 | - "standard" 4 | - "promise" 5 | - "node" 6 | rules: 7 | indent: 8 | - "warn" 9 | - 4 10 | semi: 11 | - "warn" 12 | - "always" 13 | semi-style: 14 | - "warn" 15 | - "last" 16 | semi-spacing: 17 | - "error" 18 | - 19 | before: false 20 | after: true 21 | -------------------------------------------------------------------------------- /node-spider-dist/client.js: -------------------------------------------------------------------------------- 1 | const { client } = require('.'); 2 | const minimist = require('minimist'); 3 | const ProgressBar = require('progress'); 4 | const ora = require('ora'); 5 | 6 | const xdaili = require('./client/proxy/xdaili'); 7 | const xicidaili = require('./client/proxy/xicidaili'); 8 | const kuaidaili = require('./client/proxy/kuaidaili'); 9 | const cnProxy = require('./client/proxy/cn-proxy'); 10 | const ip89 = require('./client/proxy/89ip'); 11 | const yundaili = require('./client/proxy/yundaili'); 12 | const mogudaili = require('./client/proxy/mogudaili'); 13 | const mayidaili = require('./client/proxy/mayidaili'); 14 | 15 | const args = minimist(process.argv.slice(2), { 16 | alias: { 'p': 'proxy', 'q': 'quiet', 'np': 'netproxy' }, 17 | string: 'proxy', 18 | boolean: ['quiet', 'old', 'netproxy', 'dev'], 19 | default: { proxy: [ ], quiet: false, old: false } 20 | }); 21 | 22 | const config = require('y-config'); 23 | config.args = args; 24 | 25 | const proxyList = 26 | typeof args.proxy === 'string' ? [ args.proxy ] : args.proxy; 27 | 28 | const barMap = { }; 29 | 30 | const startLoop = async () => { 31 | try { 32 | return await client.loop(proxyList); 33 | } catch (error) { 34 | return startLoop(); 35 | } 36 | }; 37 | 38 | // start code 39 | (async () => { 40 | require('cfonts').say('UUPERS', { align: 'left', font: 'block' }); 41 | if (args.quiet) { 42 | client.setOutput(); 43 | } else if (!args.old) { 44 | let spiderInfo; 45 | client.setOutput(); 46 | client.on(client.event.START, (pid, mids) => { 47 | const format = 48 | 'Package :pkg [:bar] :percent :rate/urs:active:ban:ips :elapseds'; 49 | barMap[pid] = new ProgressBar(format, { 50 | width: 25, 51 | total: mids.length 52 | }); 53 | barMap[pid].tick(0, { 'pkg': pid }); 54 | }); 55 | client.on(client.event.HEART, (pid, info) => { 56 | const bar = barMap[pid]; 57 | if (!bar) { 58 | return; 59 | } 60 | spiderInfo = info; 61 | const a = `${spiderInfo.active}${spiderInfo.ban === 0 ? '' : 'A'}`; 62 | bar.tick(0, { 63 | 'pkg': pid, 64 | 'active': spiderInfo.total <= 1 ? '' : `[${a}`, 65 | 'ban': spiderInfo.ban === 0 ? '' : `,${spiderInfo.ban}B`, 66 | 'ips': spiderInfo.total <= 1 ? '' : `/${spiderInfo.total}IPs]` 67 | }); 68 | }); 69 | client.on(client.event.CATCH, (pid, mid, cardList) => { 70 | const bar = barMap[pid]; 71 | if (!bar) { 72 | return; 73 | } 74 | const a = `${spiderInfo.active}${spiderInfo.ban === 0 ? '' : 'A'}`; 75 | bar.tick({ 76 | 'pkg': pid, 77 | 'active': spiderInfo.total <= 1 ? '' : `[${a}`, 78 | 'ban': spiderInfo.ban === 0 ? '' : `,${spiderInfo.ban}B`, 79 | 'ips': spiderInfo.total <= 1 ? '' : `/${spiderInfo.total}IPs]` 80 | }); 81 | }); 82 | client.on(client.event.END, (pid) => { 83 | delete barMap[pid]; 84 | }); 85 | client.on(client.event.TIMEOUT, (pid) => { 86 | delete barMap[pid]; 87 | ora(`获取超时 Package ${pid}`).fail(); 88 | }); 89 | client.on(client.event.VING, (pid) => { 90 | barMap[pid] = ora(`正在校检 Package ${pid}`).start(); 91 | }); 92 | client.on(client.event.VFAIL, (pid) => { 93 | barMap[pid].fail(`数据有误 Package ${pid}`); 94 | delete barMap[pid]; 95 | }); 96 | client.on(client.event.SENDING, (pid) => { 97 | barMap[pid].start(`正在上传 Package ${pid}`); 98 | }); 99 | client.on(client.event.SENDED, (pid) => { 100 | barMap[pid].succeed(`成功上传 Package ${pid}`); 101 | delete barMap[pid]; 102 | }); 103 | client.on(client.event.SENDFAIL, (pid) => { 104 | barMap[pid].fail(`上传失败 Package ${pid}`); 105 | delete barMap[pid]; 106 | }); 107 | } 108 | if (args.np) { 109 | xdaili.process(); 110 | xicidaili.process(); 111 | kuaidaili.process(); 112 | cnProxy.process(); 113 | ip89.process(); 114 | yundaili.process(); 115 | mogudaili.process(); 116 | mayidaili.process(); 117 | } 118 | await startLoop(); 119 | })(); 120 | -------------------------------------------------------------------------------- /node-spider-dist/client/constants.js: -------------------------------------------------------------------------------- 1 | 2 | const SLEEP_NORMAL_LOCAL = 200; // ms 3 | 4 | const SLEEP_NORMAL_PROXY = 180; // ms 5 | 6 | const SLEEP_BAN_IP = 1000 * 60 * 10; // 10min 7 | 8 | const URL_GET_PACKAGE = 'http://45.32.68.44:16123/getPackage'; 9 | 10 | const URL_UPLOAD_PACKAGE = 'http://45.32.68.44:16123/uploadPackage'; 11 | 12 | const URL_USER_INFO = 'http://api.bilibili.com/x/web-interface/card'; 13 | 14 | const ID_RANGE_NUM = 1000; 15 | 16 | module.exports = { 17 | SLEEP_NORMAL_LOCAL, 18 | SLEEP_NORMAL_PROXY, 19 | SLEEP_BAN_IP, 20 | URL_GET_PACKAGE, 21 | URL_UPLOAD_PACKAGE, 22 | URL_USER_INFO, 23 | ID_RANGE_NUM 24 | }; 25 | -------------------------------------------------------------------------------- /node-spider-dist/client/nest.js: -------------------------------------------------------------------------------- 1 | const { Spider, SpiderStatus, SpiderEvent } = require('./spider'); 2 | const { 3 | getPackageAsync, uploadPackageAsync, packageArray, sleep, nowStr, OT 4 | } = require('./utils'); 5 | const { ID_RANGE_NUM } = require('./constants'); 6 | const lodash = require('lodash'); 7 | const EventEmitter = require('events').EventEmitter; 8 | const config = require('y-config'); 9 | const Keyv = require('keyv'); 10 | 11 | const NestEvent = { 12 | 'HEART': Symbol('HEART'), 13 | 'START': Symbol('START'), 14 | 'END': Symbol('END'), 15 | 'SENDING': Symbol('SENDING'), 16 | 'SENDED': Symbol('SENDED'), 17 | 'SENDFAIL': Symbol('SENDFAIL'), 18 | 'VING': Symbol('VING'), 19 | 'VSUCCESS': Symbol('VSUCCESS'), 20 | 'VFAIL': Symbol('VFAIL'), 21 | 'TIMEOUT': Symbol('TIMEOUT'), 22 | 'CATCH': Symbol('CATCH') 23 | }; 24 | 25 | const TIMEOUT = 1000 * 60 * 60; // 1h 26 | 27 | class SpiderNest { 28 | constructor (list = [ '' ]) { 29 | this.names = [ ]; 30 | this.nest = [ ]; 31 | this.event = new EventEmitter(); 32 | this.startedAt = 0; 33 | this.keyv = new Keyv(); 34 | 35 | this.appendSpiders(list); 36 | this.event.on(NestEvent.START, (pid, mids) => { 37 | OT.info(`[${nowStr()}] Get package ${pid}, fetch mids [${mids[0]}, ${mids[mids.length - 1]}]`); 38 | }); 39 | this.event.on(NestEvent.SENDING, (pid) => { 40 | OT.log(`[${nowStr()}] Sending package ${pid}`); 41 | }); 42 | this.event.on(NestEvent.SENDED, (pid) => { 43 | OT.log(`[${nowStr()}] Sended package ${pid}`); 44 | }); 45 | this.event.on(NestEvent.CATCH, () => { 46 | this.startedAt = Date.now(); 47 | }); 48 | } 49 | 50 | appendSpiders (names) { 51 | const list = [...new Set(names)].filter((v) => { 52 | return this.names.indexOf(v) === -1; 53 | }); 54 | this.names.push(...list); 55 | this.nest.push(...list.map((name) => { 56 | const spider = new Spider(name); 57 | const event = spider.event; 58 | event.on(SpiderEvent.ERROR, (s, mid, msg) => { 59 | OT.error(`[${nowStr()}][${s.url}] mid=${mid} ${msg}`); 60 | if (s.errors >= 5) { 61 | this.cleanDeadSpider(this.names.indexOf(s.url)); 62 | } 63 | }); 64 | event.on(SpiderEvent.BAN, (s) => { 65 | OT.warn(`[${nowStr()}][${s.url}] oops,你的IP进小黑屋了,爬虫程序会在10min后继续`); 66 | }); 67 | if ((config.args || { }).dev) { 68 | event.on(SpiderEvent.START, (s, mid) => { 69 | OT.log(`[${nowStr()}][${s.url}] mid=${mid} Start`); 70 | }); 71 | event.on(SpiderEvent.END, (s, mid) => { 72 | OT.log(`[${nowStr()}][${s.url}] mid=${mid} Get`); 73 | }); 74 | } 75 | return spider; 76 | })); 77 | return true; 78 | } 79 | 80 | async march () { 81 | this.startedAt = Date.now(); 82 | const data = await getPackageAsync(); 83 | const pid = JSON.parse(data).pid; 84 | if (pid === -1) return pid; 85 | this.store = new NestStore(pid); 86 | this.store.on(StoreEvent.PUSH, (store, mid) => { 87 | const cardList = store.getList(); 88 | const pid = store.getPid(); 89 | this.event.emit(NestEvent.CATCH, pid, mid, cardList); 90 | }); 91 | const mids = this.store.getMids(); 92 | this.event.emit(NestEvent.START, pid, mids); 93 | const that = this; 94 | for (;;) { 95 | await sleep(50); 96 | if (that.startedAt + TIMEOUT <= Date.now()) { 97 | that.event.emit(NestEvent.TIMEOUT, pid); 98 | break; 99 | } 100 | this.event.emit(NestEvent.HEART, pid, await that.getSpidersInfo()); 101 | if (that.store.getCount() === ID_RANGE_NUM) { 102 | that.event.emit(NestEvent.END, pid, that.store.getList()); 103 | await that.upload(pid); 104 | break; 105 | } else if (that.isHasFree()) { 106 | let spiders = that.getFreeSpiders(); 107 | spiders = lodash.sampleSize(spiders, spiders.length); 108 | let cards = that.store.getLoseCards(); 109 | cards = lodash.sampleSize(cards, cards.length); 110 | const lanuchArr = 111 | lodash.minBy([spiders, cards], 'length'); 112 | for (let i = 0; i < lanuchArr.length; i++) { 113 | const spider = spiders[i]; 114 | spider.crawl(that.store, cards[i]); 115 | } 116 | if (spiders.length <= cards.length) { 117 | continue; 118 | } 119 | cards = lodash.sampleSize(cards, cards.length); 120 | for (let i = lanuchArr.length; i < spiders.length; i++) { 121 | const spider = spiders[i]; 122 | spider.crawl(that.store, cards[i]); 123 | } 124 | } 125 | } 126 | } 127 | 128 | validtion () { 129 | const pid = this.store.getPid(); 130 | this.event.emit(NestEvent.VING, pid); 131 | const srcList = this.store.getMids(); 132 | const tarList = this.store.getList() 133 | .sort((a, b) => { 134 | return a.mid - b.mid; 135 | }) 136 | .reduce((arr, item) => { 137 | arr.push(+item.mid); 138 | return arr; 139 | }, [ ]); 140 | for (let i = 0; i < srcList.length; i++) { 141 | if (srcList[i] !== tarList[i]) { 142 | this.event.emit(NestEvent.VFAIL, pid); 143 | return false; 144 | } 145 | } 146 | this.event.emit(NestEvent.VSUCCESS, pid); 147 | return true; 148 | } 149 | 150 | async upload (index = 0) { 151 | const pid = this.store.getPid(); 152 | if (!this.validtion(pid)) { 153 | return; 154 | } 155 | this.event.emit(NestEvent.SENDING, pid, index); 156 | return uploadPackageAsync(pid, this.store.getList()).then(() => { 157 | this.event.emit(NestEvent.SENDED, pid); 158 | }).catch(() => { 159 | if (index < 10) { 160 | return this.upload(++index); 161 | } 162 | this.event.emit(NestEvent.SENDFAIL, pid); 163 | }); 164 | } 165 | 166 | isHasFree () { 167 | for (const spider of this.nest) { 168 | if (spider.status === SpiderStatus.FREE) { 169 | return true; 170 | } 171 | } 172 | return false; 173 | } 174 | 175 | isHasBusy () { 176 | for (const spider of this.nest) { 177 | if (spider.status === SpiderStatus.BUSY) { 178 | return true; 179 | } 180 | } 181 | return false; 182 | } 183 | 184 | getFreeSpiders () { 185 | return this.nest.filter((s) => { 186 | return s.status === SpiderStatus.FREE; 187 | }); 188 | } 189 | 190 | async getSpidersInfo () { 191 | const KEY = 'info'; 192 | let content = await this.keyv.get(KEY); 193 | if (content) { 194 | return content; 195 | } 196 | content = { 197 | active: 198 | this.nest.filter((s) => s.status !== SpiderStatus.BAN).length, 199 | ban: this.nest.filter((s) => s.status === SpiderStatus.BAN).length, 200 | total: this.nest.length 201 | }; 202 | await this.keyv.set('info', content, 1000); // Cache 1s 203 | return content; 204 | } 205 | 206 | randomSpider () { 207 | if (!this.isHasFree()) { 208 | return null; 209 | } 210 | const s = lodash.sample(this.nest); 211 | return s.status === SpiderStatus.FREE ? s : this.randomSpider(); 212 | } 213 | 214 | cleanDeadSpider (point) { 215 | this.names.splice(point, 1); 216 | this.nest.splice(point, 1); 217 | } 218 | } 219 | 220 | const StoreStatus = { 221 | 'NONE': Symbol('NONE'), 222 | 'EXIST': Symbol('EXIST'), 223 | 'PANDING': Symbol('PANDING') 224 | }; 225 | 226 | const StoreEvent = { 227 | 'PUSH': Symbol('PUSH') 228 | }; 229 | 230 | class NestStore extends EventEmitter { 231 | constructor (pid) { 232 | super(); 233 | this.pid = pid; 234 | this.mids = packageArray(pid); 235 | this.cardList = [ ]; 236 | this.processing = this.mids.reduce((obj, mid) => { 237 | obj[mid] = StoreStatus.NONE; 238 | return obj; 239 | }, { }); 240 | } 241 | 242 | getList () { return this.cardList; } 243 | 244 | getPid () { return this.pid; } 245 | 246 | getMids () { return this.mids; } 247 | 248 | getLoseCards () { 249 | const diff = [ ]; 250 | for (const k of Object.keys(this.processing)) { 251 | if (this.processing[k] === StoreStatus.NONE) { 252 | diff.push(k); 253 | } 254 | } 255 | return diff; 256 | } 257 | 258 | getCount () { 259 | return this.cardList.length; 260 | } 261 | 262 | addCard (mid, card) { 263 | if (this.processing[mid] !== StoreStatus.NONE) { 264 | return; 265 | } 266 | this.processing[mid] = StoreStatus.PANDING; 267 | this.cardList.push(card); 268 | this.processing[mid] = StoreStatus.EXIST; 269 | this.emit(StoreEvent.PUSH, this, mid); 270 | } 271 | } 272 | 273 | module.exports = { SpiderNest, NestEvent }; 274 | -------------------------------------------------------------------------------- /node-spider-dist/client/process.js: -------------------------------------------------------------------------------- 1 | const lodash = require('lodash'); 2 | const { 3 | sleep, nowStr, OT 4 | } = require('./utils'); 5 | 6 | const { SpiderNest, NestEvent } = require('./nest'); 7 | 8 | const events = [ ]; 9 | 10 | const on = (eventName, fn) => { 11 | if (!~lodash.values(NestEvent).indexOf(eventName)) { 12 | return; 13 | } 14 | let obj = { 15 | name: eventName, 16 | fns: [ ] 17 | }; 18 | let point = -1; 19 | for (let i = 0; i < events.length; i++) { 20 | const eventObj = events[i]; 21 | if (eventObj.name === eventName) { 22 | obj = eventObj; 23 | point = i; 24 | break; 25 | } 26 | } 27 | obj.fns.push(fn); 28 | if (point === -1) { 29 | events.push(obj); 30 | } else { 31 | events[point] = obj; 32 | } 33 | }; 34 | 35 | let curNest; 36 | 37 | const process = (list) => { 38 | curNest = new SpiderNest(list); 39 | for (const eventObj of events) { 40 | for (const fn of eventObj.fns) { 41 | curNest.event.on(eventObj.name, fn); 42 | } 43 | } 44 | return curNest.march(); 45 | }; 46 | 47 | const loop = async (list = [ ]) => { 48 | OT.info(`[${nowStr()}] Start to fetch member info.`); 49 | for (;;) { 50 | try { 51 | const proxyList = 52 | getCurrent() ? getCurrent().names : [''].concat(list); 53 | if (await process(proxyList) === -1) { 54 | break; 55 | } 56 | } catch (err) { 57 | OT.error(`很有可能是网络超时了, 10秒后重试 ${err.message}`); 58 | await sleep(10000); 59 | } 60 | } 61 | OT.info(`[${nowStr()}] End fetch.`); 62 | }; 63 | 64 | const getCurrent = () => curNest; 65 | 66 | module.exports = { 67 | process, loop, on, getCurrent 68 | }; 69 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/89ip.js: -------------------------------------------------------------------------------- 1 | const rp = require('request-promise'); 2 | const cheerio = require('cheerio'); 3 | const { sleep } = require('../utils'); 4 | const { client } = require('../..'); 5 | const schedule = require('node-schedule'); 6 | 7 | const URLS = [ 8 | 'http://www.89ip.cn/tiqv.php?sxb=&tqsl=1000&ports=&ktip=&xl=on&submit=%CC%E1++%C8%A1', // 全国 9 | 'http://www.89ip.cn/tiqv.php?sxb=&tqsl=1000&ports=&ktip=&xl=%B5%E7%D0%C5&submit=%CC%E1++%C8%A1', // 电信 10 | 'http://www.89ip.cn/tiqv.php?sxb=&tqsl=1000&ports=&ktip=&xl=%C1%AA%CD%A8&submit=%CC%E1++%C8%A1', // 联通 11 | 'http://www.89ip.cn/tiqv.php?sxb=&tqsl=1000&ports=&ktip=&xl=%D2%C6%B6%AF&submit=%CC%E1++%C8%A1' // 移动 12 | ]; 13 | 14 | const getListAsync = async (url = URLS[0], index = 0) => { 15 | try { 16 | return await rp({ 17 | method: 'GET', 18 | uri: url, 19 | timeout: 60 * 1000, 20 | transform: function (body) { 21 | return cheerio.load(body); 22 | } 23 | }) 24 | .then(($) => $('.mass').html().replace(/\s+/g, '')) 25 | .then((html) => { 26 | return html 27 | .match(/(\d+\.){3}\d+:\d+(?!:
)/gi) 28 | .map((item) => item.toString()) 29 | .map((item) => `http://${item}`); 30 | }); 31 | } catch (error) { 32 | if (index < 10) { 33 | return getListAsync(url, ++index); 34 | } 35 | return [ ]; 36 | } 37 | }; 38 | 39 | const appendList = async () => { 40 | await sleep(3000); 41 | if (client.getCurrent()) { 42 | for (const url of URLS) { 43 | let list = await getListAsync(url); 44 | if (!Array.isArray(list)) { 45 | continue; 46 | } 47 | list.map((item) => item.toString()); 48 | client.getCurrent().appendSpiders(list); 49 | await sleep(500); 50 | } 51 | } 52 | }; 53 | 54 | module.exports.process = async () => { 55 | // get per 5 min 56 | schedule.scheduleJob('*/5 * * * *', () => { 57 | appendList(); 58 | }); 59 | await appendList(); 60 | }; 61 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/cn-proxy.js: -------------------------------------------------------------------------------- 1 | const rp = require('request-promise'); 2 | const cheerio = require('cheerio'); 3 | const { sleep } = require('../utils'); 4 | const { client } = require('../..'); 5 | const schedule = require('node-schedule'); 6 | 7 | const URLS = [ 8 | 'http://cn-proxy.com/', 9 | 'http://cn-proxy.com/archives/218' 10 | ]; 11 | 12 | // ////////////////////////// 13 | // 14 | // 该代理在墙外 15 | // 16 | // ////////////////////////// 17 | const getListAsync = async (url = URLS[0], index = 0) => { 18 | try { 19 | return await rp({ 20 | method: 'GET', 21 | uri: url, 22 | timeout: 60 * 1000, 23 | transform: function (body) { 24 | return cheerio.load(body); 25 | } 26 | }) 27 | .then(($) => $('.sortable tbody tr')) 28 | .then((trs) => { 29 | const list = [ ]; 30 | for (const tr of trs.toArray()) { 31 | const tds = cheerio('td', tr); 32 | const texts = 33 | [0, 1].map((index) => tds.eq(index).html()); 34 | const url = 35 | `http://${texts[0]}:${texts[1]}`; 36 | list.push(url); 37 | } 38 | return list; 39 | }); 40 | } catch (error) { 41 | if (index < 10) { 42 | return getListAsync(url, ++index); 43 | } 44 | return [ ]; 45 | } 46 | }; 47 | 48 | const appendList = async () => { 49 | await sleep(3000); 50 | if (client.getCurrent()) { 51 | for (const url of URLS) { 52 | let list = await getListAsync(url); 53 | if (!Array.isArray(list)) { 54 | continue; 55 | } 56 | list.map((item) => item.toString()); 57 | client.getCurrent().appendSpiders(list); 58 | await sleep(500); 59 | } 60 | } 61 | }; 62 | 63 | module.exports.process = async () => { 64 | // get per 10 min 65 | schedule.scheduleJob('*/10 * * * *', () => { 66 | appendList(); 67 | }); 68 | await appendList(); 69 | }; 70 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/kuaidaili.js: -------------------------------------------------------------------------------- 1 | const rp = require('request-promise'); 2 | const cheerio = require('cheerio'); 3 | const { sleep } = require('../utils'); 4 | const { client } = require('../..'); 5 | const schedule = require('node-schedule'); 6 | 7 | const URLS = [ 8 | 'https://www.kuaidaili.com/free/inha/1/', 9 | 'https://www.kuaidaili.com/free/intr/1/' 10 | ]; 11 | 12 | const getListAsync = async (url = URLS[0]) => { 13 | try { 14 | return await rp({ 15 | uri: url, 16 | transform: function (body) { 17 | return cheerio.load(body); 18 | } 19 | }) 20 | .then(($) => $('.con-body #list tbody tr')) 21 | .then((trs) => { 22 | const list = [ ]; 23 | for (const tr of trs.toArray()) { 24 | const tds = cheerio('td', tr); 25 | const texts = 26 | [3, 0, 1].map((index) => tds.eq(index).text()); 27 | const url = 28 | `${texts[0].toLowerCase()}://${texts[1]}:${texts[2]}`; 29 | list.push(url); 30 | } 31 | return list; 32 | }); 33 | } catch (error) { 34 | return [ ]; 35 | } 36 | }; 37 | 38 | const appendList = async () => { 39 | await sleep(3000); 40 | if (client.getCurrent()) { 41 | for (const url of URLS) { 42 | let list = await getListAsync(url); 43 | if (!Array.isArray(list)) { 44 | continue; 45 | } 46 | list.map((item) => item.toString()); 47 | client.getCurrent().appendSpiders(list); 48 | await sleep(500); 49 | } 50 | } 51 | }; 52 | 53 | module.exports.process = async () => { 54 | // get per 5 min 55 | schedule.scheduleJob('*/5 * * * *', () => { 56 | appendList(); 57 | }); 58 | await appendList(); 59 | }; 60 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/mayidaili.js: -------------------------------------------------------------------------------- 1 | const rp = require('request-promise'); 2 | const cheerio = require('cheerio'); 3 | const { sleep } = require('../utils'); 4 | const { client } = require('../..'); 5 | const schedule = require('node-schedule'); 6 | 7 | const getUrls = async (index = 0) => { 8 | try { 9 | return await rp({ 10 | uri: 'http://www.mayidaili.com/share/', 11 | transform: function (body) { 12 | return cheerio.load(body); 13 | } 14 | }) 15 | .then(($) => $('a[href^=\'http://www.mayidaili.com/share/view/\']')) 16 | .then((as) => { 17 | const list = [ ]; 18 | for (const a of as.toArray()) { 19 | list.push(cheerio(a).attr('href')); 20 | } 21 | return [...new Set(list)]; 22 | }); 23 | } catch (error) { 24 | if (index < 10) { 25 | return getUrls(++index); 26 | } 27 | console.log(error); 28 | return [ ]; 29 | } 30 | }; 31 | 32 | const getListAsync = async (url) => { 33 | try { 34 | return await rp({ 35 | uri: url, 36 | transform: function (body) { 37 | return cheerio.load(body); 38 | } 39 | }) 40 | .then(($) => $('.container p')) 41 | .then((p) => { 42 | return p.text() 43 | .match(/(\d+\.){3}\d+:\d+/gi) 44 | .map((item) => item.toString()) 45 | .map((item) => `http://${item}`); 46 | }); 47 | } catch (error) { 48 | return [ ]; 49 | } 50 | }; 51 | 52 | const appendList = async () => { 53 | await sleep(3000); 54 | if (client.getCurrent()) { 55 | const urls = await getUrls(); 56 | for (const url of urls) { 57 | let list = await getListAsync(url); 58 | if (!Array.isArray(list)) { 59 | continue; 60 | } 61 | list.map((item) => item.toString()); 62 | client.getCurrent().appendSpiders(list); 63 | await sleep(500); 64 | } 65 | } 66 | }; 67 | 68 | module.exports.process = async () => { 69 | // get per 8 hour 70 | schedule.scheduleJob('* */8 * * *', () => { 71 | appendList(); 72 | }); 73 | await appendList(); 74 | }; 75 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/mogudaili.js: -------------------------------------------------------------------------------- 1 | const rp = require('request-promise'); 2 | const { sleep } = require('../utils'); 3 | const { client } = require('../..'); 4 | const schedule = require('node-schedule'); 5 | 6 | const URLS = [ 7 | 'http://www.mogumiao.com/proxy/free/listFreeIp', 8 | 'http://www.mogumiao.com/proxy/api/freeIp?count=20' 9 | ]; 10 | 11 | const getListAsync = async (url = URLS[0], index = 0) => { 12 | try { 13 | return await rp({ 14 | uri: url, 15 | header: { 16 | 'Host': 'www.mogumiao.com', 17 | 'Referer': 'http://www.mogumiao.com/web' 18 | }, 19 | transform: function (body) { 20 | return typeof body === 'string' ? JSON.parse(body) : body; 21 | } 22 | }) 23 | .then((data) => { 24 | return (data.msg || [ ]).map((item) => { 25 | return `http://${item.ip}:${item.port}`; 26 | }); 27 | }); 28 | } catch (error) { 29 | if (index < 10) { 30 | return getListAsync(url, ++index); 31 | } 32 | return [ ]; 33 | } 34 | }; 35 | 36 | const appendList = async () => { 37 | await sleep(3000); 38 | if (client.getCurrent()) { 39 | for (const url of URLS) { 40 | let list = await getListAsync(url); 41 | if (!Array.isArray(list)) { 42 | continue; 43 | } 44 | list.map((item) => item.toString()); 45 | client.getCurrent().appendSpiders(list); 46 | await sleep(500); 47 | } 48 | } 49 | }; 50 | 51 | module.exports.process = async () => { 52 | // get per 5 min 53 | schedule.scheduleJob('*/5 * * * *', () => { 54 | appendList(); 55 | }); 56 | await appendList(); 57 | }; 58 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/xdaili.js: -------------------------------------------------------------------------------- 1 | const { httpGetAsync, sleep } = require('../utils'); 2 | const { client } = require('../..'); 3 | const schedule = require('node-schedule'); 4 | 5 | const url = 'http://www.xdaili.cn/ipagent/freeip/getFreeIps'; 6 | 7 | const getListAsync = (page = 1) => { 8 | return httpGetAsync(url, { query: [{ page }] }) 9 | .then(JSON.parse) 10 | .then((res) => { 11 | return res.RESULT.rows || [ ]; 12 | }).then((arr) => { 13 | return arr.map((item) => { 14 | return `http://${item.ip}:${item.port}`; 15 | }); 16 | }); 17 | }; 18 | 19 | const appendList = async () => { 20 | await sleep(3000); 21 | if (client.getCurrent()) { 22 | for (const i in Array(2)) { 23 | client.getCurrent().appendSpiders(await getListAsync(i)); 24 | await sleep(500); 25 | } 26 | } 27 | }; 28 | 29 | module.exports.process = async () => { 30 | // get per 5 min 31 | schedule.scheduleJob('*/5 * * * *', () => { 32 | appendList(); 33 | }); 34 | await appendList(); 35 | }; 36 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/xicidaili.js: -------------------------------------------------------------------------------- 1 | const rp = require('request-promise'); 2 | const cheerio = require('cheerio'); 3 | const { sleep } = require('../utils'); 4 | const { client } = require('../..'); 5 | const schedule = require('node-schedule'); 6 | 7 | const URLS = [ 8 | 'http://www.xicidaili.com/', 9 | 'http://www.xicidaili.com/nn/', 10 | 'http://www.xicidaili.com/nt/', 11 | 'http://www.xicidaili.com/wn/', 12 | 'http://www.xicidaili.com/wt/' 13 | ]; 14 | 15 | const getListAsync = async (url = URLS[0]) => { 16 | try { 17 | return await rp({ 18 | uri: url, 19 | transform: function (body) { 20 | return cheerio.load(body); 21 | } 22 | }) 23 | .then(($) => $('tr:has(td.country)')) 24 | .then((trs) => { 25 | const list = [ ]; 26 | for (const tr of trs.toArray()) { 27 | const tds = cheerio('td', tr); 28 | if (!/http/i.test(tds.eq(5).text())) { 29 | continue; 30 | } 31 | const texts = 32 | [5, 1, 2].map((index) => tds.eq(index).text()); 33 | const url = 34 | `${texts[0].toLowerCase()}://${texts[1]}:${texts[2]}`; 35 | list.push(url); 36 | } 37 | return list; 38 | }); 39 | } catch (error) { 40 | return [ ]; 41 | } 42 | }; 43 | 44 | const appendList = async () => { 45 | await sleep(3000); 46 | if (client.getCurrent()) { 47 | for (const url of URLS) { 48 | let list = await getListAsync(url); 49 | if (!Array.isArray(list)) { 50 | continue; 51 | } 52 | list.map((item) => item.toString()); 53 | client.getCurrent().appendSpiders(list); 54 | await sleep(500); 55 | } 56 | } 57 | }; 58 | 59 | module.exports.process = async () => { 60 | // get per 5 min 61 | schedule.scheduleJob('*/5 * * * *', () => { 62 | appendList(); 63 | }); 64 | await appendList(); 65 | }; 66 | -------------------------------------------------------------------------------- /node-spider-dist/client/proxy/yundaili.js: -------------------------------------------------------------------------------- 1 | const rp = require('request-promise'); 2 | const cheerio = require('cheerio'); 3 | const { sleep } = require('../utils'); 4 | const { client } = require('../..'); 5 | const schedule = require('node-schedule'); 6 | 7 | const URLS = [ 8 | 'http://www.ip3366.net/free/' 9 | ]; 10 | 11 | const getListAsync = async (url = URLS[0]) => { 12 | try { 13 | return await rp({ 14 | uri: url, 15 | transform: function (body) { 16 | return cheerio.load(body); 17 | } 18 | }) 19 | .then(($) => $('tbody tr')) 20 | .then((trs) => { 21 | const list = [ ]; 22 | for (const tr of trs.toArray()) { 23 | const tds = cheerio('td', tr); 24 | const texts = 25 | [3, 0, 1].map((index) => tds.eq(index).text()); 26 | const url = 27 | `${texts[0].toLowerCase()}://${texts[1]}:${texts[2]}`; 28 | list.push(url); 29 | } 30 | return list; 31 | }); 32 | } catch (error) { 33 | return [ ]; 34 | } 35 | }; 36 | 37 | const appendList = async () => { 38 | await sleep(3000); 39 | if (client.getCurrent()) { 40 | for (const url of URLS) { 41 | let list = await getListAsync(url); 42 | if (!Array.isArray(list)) { 43 | continue; 44 | } 45 | list.map((item) => item.toString()); 46 | client.getCurrent().appendSpiders(list); 47 | await sleep(500); 48 | } 49 | } 50 | }; 51 | 52 | module.exports.process = async () => { 53 | // get per 30 min 54 | schedule.scheduleJob('*/30 * * * *', () => { 55 | appendList(); 56 | }); 57 | await appendList(); 58 | }; 59 | -------------------------------------------------------------------------------- /node-spider-dist/client/spider.js: -------------------------------------------------------------------------------- 1 | const { 2 | SLEEP_NORMAL_LOCAL, SLEEP_NORMAL_PROXY, SLEEP_BAN_IP, ID_RANGE_NUM 3 | } = require('./constants'); 4 | const { fetchUserInfo, nowStr } = require('./utils'); 5 | const EventEmitter = require('events').EventEmitter; 6 | 7 | const SpiderStatus = { 8 | 'FREE': Symbol('FREE'), 9 | 'BUSY': Symbol('BUSY'), 10 | 'BAN': Symbol('BAN') 11 | }; 12 | 13 | const SpiderEvent = { 14 | /** 15 | * (spider, mid, msg) 16 | */ 17 | 'ERROR': Symbol('ERROR'), 18 | /** 19 | * (spider, mid, msg) 20 | */ 21 | 'BAN': Symbol('BAN'), 22 | /** 23 | * (spider, mid) 24 | */ 25 | 'START': Symbol('START'), 26 | /** 27 | * (spider, mid) 28 | */ 29 | 'END': Symbol('END') 30 | }; 31 | 32 | class Spider { 33 | constructor (url) { 34 | this.url = url; 35 | this.status = SpiderStatus.FREE; 36 | this.sleepms = 37 | this.url === '' ? SLEEP_NORMAL_LOCAL : SLEEP_NORMAL_PROXY; 38 | this.errors = 0; 39 | this.runnedAt = Date.now(); 40 | this.event = new EventEmitter(); 41 | 42 | this.event.on(SpiderEvent.ERROR, (spider, mids, mid, msg) => { 43 | spider.errors++; 44 | spider.status = SpiderStatus.FREE; 45 | }); 46 | this.event.on(SpiderEvent.END, (spider) => { 47 | spider.status = SpiderStatus.FREE; 48 | spider.errors > 0 && spider.errors--; 49 | spider.sleepms = 50 | spider.url === '' ? SLEEP_NORMAL_LOCAL : SLEEP_NORMAL_PROXY; 51 | }); 52 | this.event.on(SpiderEvent.BAN, (spider) => { 53 | spider.sleepms = SLEEP_BAN_IP; 54 | spider.status = SpiderStatus.BAN; 55 | }); 56 | this.event.on(SpiderEvent.START, (spider) => { 57 | spider.runnedAt = Date.now(); 58 | }); 59 | } 60 | 61 | async crawl (store, mid) { 62 | if (this.runnedAt + this.sleepms > Date.now()) { 63 | return; 64 | } 65 | if (store.getCount() >= ID_RANGE_NUM) { 66 | return; 67 | } 68 | if (this.status === SpiderStatus.BUSY) { 69 | return; 70 | } 71 | this.status = SpiderStatus.BUSY; 72 | try { 73 | this.event.emit(SpiderEvent.START, this, mid); 74 | const rs = await fetchUserInfo(mid, { proxy: this.url }); 75 | if (!rs) { 76 | this.event.emit(SpiderEvent.ERROR, this, mid, 'Empty response'); 77 | return; 78 | } 79 | const data = JSON.parse(rs).data; 80 | data.card.mid = +mid; 81 | data.card.archive_count = data.archive_count; 82 | data.card.ctime = nowStr(); 83 | store.addCard(mid, data.card); 84 | this.event.emit(SpiderEvent.END, this, mid); 85 | } catch (err) { 86 | if (err.message && err.message.indexOf('Forbidden') !== -1) { 87 | // IP进小黑屋了 88 | this.event.emit(SpiderEvent.BAN, this, mid, 'Ban IP'); 89 | return; 90 | } 91 | this.event.emit(SpiderEvent.ERROR, this, mid, err.message); 92 | } 93 | } 94 | } 95 | 96 | module.exports = { Spider, SpiderStatus, SpiderEvent }; 97 | -------------------------------------------------------------------------------- /node-spider-dist/client/utils.js: -------------------------------------------------------------------------------- 1 | const superagent = require('superagent'); 2 | require('superagent-proxy')(superagent); 3 | const moment = require('moment'); 4 | moment.locale('zh-cn'); 5 | 6 | const { 7 | URL_GET_PACKAGE, URL_USER_INFO, URL_UPLOAD_PACKAGE, ID_RANGE_NUM 8 | } = require('./constants'); 9 | 10 | /** 11 | * 休眠函数 12 | * @param {Number} time 休眠时间(单位毫秒) 13 | */ 14 | const sleep = (time) => { 15 | return new Promise(resolve => setTimeout(resolve, time)); 16 | }; 17 | 18 | const DEF_HTTP_GET_OPTIONS = { 19 | query: [ ], 20 | proxy: '' 21 | }; 22 | 23 | const httpGetAsync = (url, opts = DEF_HTTP_GET_OPTIONS) => { 24 | let req = superagent.get(url).timeout(5000); 25 | if (opts) { 26 | if (Array.isArray(opts.query) && opts.query.length > 0) { 27 | for (const q of opts.query) { 28 | req = req.query(q); 29 | } 30 | } 31 | if (typeof opts.proxy === 'string') { 32 | if (opts.proxy !== '') { 33 | req = req.proxy(opts.proxy); 34 | } 35 | } 36 | } 37 | 38 | return req.then((res) => res && res.text); 39 | }; 40 | 41 | const nowStr = () => moment().format('YYYY-MM-DD HH:mm:ss'); 42 | 43 | // 区间数组生成 rangeArray(0,4) => [0,1,2,3,4] 44 | const rangeArray = (start, end) => { 45 | return Array(end - start + 1).fill(0).map((v, i) => i + start); 46 | }; 47 | 48 | // 按千生成区间数组 49 | const packageArray = (packageId) => { 50 | const baseNum = packageId * 1000; 51 | return rangeArray(baseNum + 1, baseNum + ID_RANGE_NUM); 52 | }; 53 | 54 | /** 55 | * 获取任务包 56 | */ 57 | const getPackageAsync = () => httpGetAsync(URL_GET_PACKAGE); 58 | 59 | // 上传任务结果 60 | const uploadPackageAsync = (pid, cardList) => { 61 | const data = { 62 | pid: pid, 63 | package: JSON.stringify(cardList) 64 | }; 65 | return superagent.post(URL_UPLOAD_PACKAGE).type('form').send(data).then(); 66 | }; 67 | 68 | /** 69 | * 爬取用户信息 70 | */ 71 | const fetchUserInfo = (mid, opts = { proxy: '' }) => { 72 | return httpGetAsync( 73 | URL_USER_INFO, 74 | Object.assign({ query: [{ mid }] }, opts) 75 | ); 76 | }; 77 | 78 | const setMock = (mockModule) => { 79 | return mockModule(superagent); 80 | }; 81 | 82 | const OT = { 83 | log: console.log, 84 | info: console.info, 85 | warn: console.warn, 86 | error: console.error 87 | }; 88 | 89 | const setOutput = (obj) => { 90 | if (!obj) { 91 | for (const method of Object.keys(OT)) { 92 | OT[method] = () => { }; 93 | } 94 | return; 95 | } 96 | for (const method of Object.keys(OT)) { 97 | const fn = obj[method]; 98 | if (fn && typeof fn === 'function') { 99 | OT[method] = fn; 100 | } 101 | } 102 | }; 103 | 104 | module.exports = { 105 | sleep, 106 | httpGetAsync, 107 | nowStr, 108 | packageArray, 109 | getPackageAsync, 110 | uploadPackageAsync, 111 | fetchUserInfo, 112 | setMock, 113 | setOutput, 114 | OT 115 | }; 116 | -------------------------------------------------------------------------------- /node-spider-dist/data-transfer.js: -------------------------------------------------------------------------------- 1 | const moment = require('moment'); 2 | moment.locale('zh-cn'); 3 | const nowstr = () => moment().format('YYYY-MM-DD HH:mm:ss') 4 | // 休眠函数 5 | const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)) 6 | 7 | const mongojs = require('mongojs') 8 | // 本地库和远程库测试地址 9 | const localdb = mongojs('bilibili_spider', ['member_comp']) 10 | const remotedb = mongojs('bilibili_spider', ['member_card']) 11 | // 数据裁剪 12 | const reduce = (doc) => { 13 | let { 14 | _id, //不需要 15 | approve, //空值 16 | sex, //男-0,女-1,保密-2 17 | face, // 去掉前缀 18 | DisplayRank, //不需要 19 | rank, //不需要 20 | article, //空值 21 | regtime, //空值 22 | spacesta, //空值 23 | birthday, //空值 24 | place, //空值 25 | description, //空值 26 | attentions, //toString 27 | attention, //不需要 28 | level_info, //仅保留经验值 => levelExp 29 | pendant, //不需要 30 | nameplate, //不需要 31 | official_verify, // 32 | vip, // 33 | ctime, 34 | ...rest 35 | } = doc; 36 | const sexMap = { 37 | '男': 0, 38 | '女': 1, 39 | '保密': 2 40 | } 41 | rest.sex = sexMap[sex]; 42 | rest.face = face && face.split('face/').length == 2 && face.split('face/')[1] || '' 43 | rest.attentions = attentions + '' 44 | rest.exp = level_info && level_info.current_exp 45 | return rest 46 | } 47 | // 获取远程库中最小的MID 48 | const fetchMinMidAsync = async () => { 49 | return new Promise((resolve, reject) => remotedb.member_card.find().sort({ 50 | mid: 1 51 | }).limit(1, (err, docs) => err ? reject(err) : resolve(docs && docs.length > 0 && docs[0].mid || -1))) 52 | } 53 | 54 | const fetchDocsAsync = async (startMid, size) => { 55 | return new Promise((resolve, reject) => remotedb.member_card.find({ 56 | mid: { 57 | $gte: startMid 58 | } 59 | }).sort({ 60 | mid: 1 61 | }).limit(size, (err, docs) => err ? reject(err) : resolve(docs))) 62 | } 63 | 64 | const saveToLocal = async (docs) => { 65 | return new Promise((resolve, reject) => localdb.member_comp.insert(docs, { 66 | writeConcern: { 67 | w: 0 //忽略写入异常 68 | } 69 | }, (err, res) => err ? reject(err) : resolve(res && res.length))) 70 | } 71 | 72 | const removeFromRemote = async mids => { 73 | return new Promise((resolve, reject) => remotedb.member_card.remove({ 74 | mid: { 75 | $gte: mids[0], 76 | $lte: mids[mids.length - 1] 77 | } 78 | }, (err, res) => err ? reject(err) : resolve(res))) 79 | } 80 | // retry 重试次数 81 | const fetchAndSaveAndRemove = async (startMid, size, loopId, retry = 5) => { 82 | // 记录步骤,方便报错的时候查看出错的位置 83 | const states = [`retry=${retry}`] 84 | try { 85 | const docs = await fetchDocsAsync(startMid, size) 86 | states.push(`fetch`) 87 | console.log(`${nowstr()} #${loopId} docs length=${docs.length}`) 88 | if (!docs || docs.length === 0) return 89 | const shrinkDocs = docs.map(v => reduce(v)) 90 | await saveToLocal(shrinkDocs) 91 | states.push(`save`) 92 | 93 | const mids = shrinkDocs.map(v => v.mid) 94 | await removeFromRemote(mids) 95 | states.push(`remove`) 96 | return loopId 97 | } catch (err) { 98 | console.error(`${nowstr()} #${loopId} [${states}] ${err.stack}`) 99 | if (retry > 0) { 100 | // retry the task 101 | return fetchAndSaveAndRemove(startMid, size, loopId, retry - 1) 102 | } 103 | } 104 | return loopId * -1 105 | } 106 | 107 | const run = async () => { 108 | console.log(`${nowstr()} ========== job started.`) 109 | let startMid = await fetchMinMidAsync() 110 | if(startMid == -1) return -1; 111 | console.log(`${nowstr()} start mid=${startMid}`) 112 | const total = 400 113 | const step = 25 114 | let fins = 0 115 | 116 | for (let i = 0; i < total; i++) { 117 | console.log(`${nowstr()} loopId=${i}, start mid=${startMid}, size=${step}`) 118 | fetchAndSaveAndRemove(startMid, step, i).then((loopId) => { 119 | console.log(`${nowstr()} #${loopId} finished, fins/total=${++fins}/${total}`) 120 | }) 121 | startMid += step 122 | await sleep(300) 123 | } 124 | 125 | console.log(`${nowstr()} ========== Wait for sub processing.`) 126 | while (fins < total) { 127 | await sleep(1000) 128 | } 129 | console.log(`${nowstr()} ========== job end.`) 130 | return 0 131 | } 132 | 133 | (async () => { 134 | for (let i = 0;; i++) { 135 | console.log(`${nowstr()} ========== BIG LOOP ${i}`) 136 | const code = await run() 137 | if(code == -1) break 138 | } 139 | localdb.close() 140 | remotedb.close() 141 | console.log(`${nowstr()} the mongo connection closed.`) 142 | })() 143 | -------------------------------------------------------------------------------- /node-spider-dist/index.js: -------------------------------------------------------------------------------- 1 | const process = require('./client/process'); 2 | const utils = require('./client/utils'); 3 | const { NestEvent } = require('./client/nest'); 4 | 5 | const client = { 6 | process: process.process, 7 | loop: process.loop, 8 | on: process.on, 9 | event: NestEvent, 10 | getCurrent: process.getCurrent, 11 | setOutput: utils.setOutput 12 | }; 13 | 14 | module.exports = { 15 | client 16 | }; 17 | -------------------------------------------------------------------------------- /node-spider-dist/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "spider-dist", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "lint": "eslint index.js client.js test/**/*.js", 8 | "test": "ava" 9 | }, 10 | "homepage": "https://github.com/uupers/BiliSpider#readme", 11 | "bugs": { 12 | "url": "https://github.com/uupers/BiliSpider/issues" 13 | }, 14 | "engines": { 15 | "node": ">=8.4" 16 | }, 17 | "repository": "github:uupers/BiliSpider", 18 | "author": "Nintha ", 19 | "contributors": [ 20 | "AryloYeung " 21 | ], 22 | "license": "GNU General Public License v3.0", 23 | "dependencies": { 24 | "body-parser": "^1.18.2", 25 | "cfonts": "^1.2.0", 26 | "cheerio": "^1.0.0-rc.2", 27 | "express": "^4.16.2", 28 | "keyv": "^3.0.0", 29 | "lodash": "^4.17.5", 30 | "minimist": "^1.2.0", 31 | "moment": "^2.21.0", 32 | "mongodb": "^3.0.4", 33 | "node-schedule": "^1.3.0", 34 | "ora": "^2.0.0", 35 | "progress": "^2.0.0", 36 | "redis": "^2.8.0", 37 | "request": "^2.85.0", 38 | "request-promise": "^4.2.2", 39 | "superagent": "^3.8.2", 40 | "superagent-proxy": "^1.0.3", 41 | "y-config": "^1.1.5" 42 | }, 43 | "devDependencies": { 44 | "ava": "^0.25.0", 45 | "eslint": "^4.19.1", 46 | "eslint-config-standard": "^11.0.0", 47 | "eslint-plugin-import": "^2.10.0", 48 | "eslint-plugin-node": "^6.0.1", 49 | "eslint-plugin-promise": "^3.7.0", 50 | "eslint-plugin-standard": "^3.0.1", 51 | "superagent-mocker": "^0.5.2" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /node-spider-dist/server.js: -------------------------------------------------------------------------------- 1 | const moment = require('moment'); 2 | moment.locale('zh-cn'); 3 | const nowstr = () => moment().format('YYYY-MM-DD HH:mm:ss') 4 | const express = require('express'); 5 | const app = express(); 6 | const bodyParser = require('body-parser') 7 | app.use(bodyParser.json({limit: '50mb'})); 8 | app.use(bodyParser.urlencoded({limit: '50mb', extended: true})); 9 | 10 | //=======mongodb======= 11 | const MongoClient = require('mongodb').MongoClient; 12 | const mongoUrl = "mongodb://localhost/bilibili_spider"; 13 | // 链接mongodb 14 | const connectMongoDBAsync = () => { 15 | return new Promise((resolve, reject) => MongoClient.connect(mongoUrl, function (err, db) { 16 | if (err) throw err; 17 | console.log(`${nowstr()} 数据库已连接! mongoUrl=${mongoUrl}`); 18 | resolve(db) 19 | })) 20 | } 21 | let dbo = null; 22 | connectMongoDBAsync().then((db) => dbo = db.db('bilibili_spider')); 23 | 24 | // 批量插入 25 | const insertListToMongoAsync = (datalist) => { 26 | return new Promise((resolve, reject) => { 27 | if (datalist == null || datalist.length == 0) { 28 | console.log(`${nowstr()} empty datalist`) 29 | resolve(null) 30 | return 31 | } 32 | dbo.collection("member_card").insertMany(datalist, function (err, res) { 33 | if (err) reject(err); 34 | resolve(res) 35 | }); 36 | }) 37 | } 38 | 39 | //=======Redis======= 40 | const redis = require('redis'); 41 | const redisClient = redis.createClient('6379', '127.0.0.1'); 42 | // redis 链接错误 43 | redisClient.on("error", error => console.error(nowstr(), error)) 44 | const redisWaitListKey = "bilibili:package:wait" 45 | const redisDoneListKey = "bilibili:package:done" 46 | const redisProcessHash = "bilibili:package:process" 47 | 48 | const pushWaitList = (pid) => redisClient.lpush(redisWaitListKey, pid); 49 | const popWaitListAsync = () => { 50 | return new Promise((resolve, reject) => { 51 | redisClient.brpop(redisWaitListKey, 20, (err, res) => resolve(res && res[1])) 52 | }) 53 | } 54 | const pushDoneSet = (pid) => redisClient.sadd(redisDoneListKey, pid) 55 | const checkPidInHash = (pid, cb) => redisClient.hexists(redisProcessHash, pid, cb) 56 | const setProcessHash = (pid, value) => redisClient.hset(redisProcessHash, pid, value) 57 | const removeProcessHash = (pid) => redisClient.hdel(redisProcessHash, pid) 58 | const getProcessHashAsync = (pid) => { 59 | return new Promise((resolve, reject) => { 60 | redisClient.hset(redisProcessHash, pid, (err, res) => resolve(res)) 61 | }) 62 | } 63 | const moveExpiredHashItem = (cb) => { 64 | redisClient.hgetall(redisProcessHash, (err, allItem) => { 65 | if(allItem){ 66 | Object.keys(allItem).forEach(pid =>{ 67 | const ctime = moment(JSON.parse(allItem[pid]).ctime) 68 | // 超过30分钟的任务做超时处理 69 | if(ctime.add(30, 'minutes').isBefore(moment())){ 70 | pushWaitList(pid) 71 | removeProcessHash(pid) 72 | console.log(`${nowstr()} expire pid ${pid}`) 73 | } 74 | }) 75 | } 76 | }) 77 | } 78 | 79 | //===========Web====== 80 | app.get('/initRedis', function (req, res) { 81 | redisClient.llen(redisWaitListKey, function (err, rs) { 82 | const ret = {} 83 | ret.success = true 84 | if (rs == 0) { 85 | for (let i = 1; i <= 300000; i++) { 86 | pushWaitList(i) 87 | } 88 | ret.message = '30 0000 pids' 89 | } else { 90 | ret.message = 'do nothing' 91 | } 92 | res.send(ret); 93 | }) 94 | }) 95 | 96 | app.get('/getPackage', function (req, res) { 97 | moveExpiredHashItem() 98 | popWaitListAsync().then((pid) => { 99 | if (pid) { 100 | const jsonValue = JSON.stringify({ 101 | ctime: nowstr() 102 | }) 103 | setProcessHash(pid, jsonValue) 104 | const ret = {} 105 | ret.success = true 106 | ret.pid = pid * 1 107 | res.send(ret); 108 | } else { 109 | const ret = {} 110 | ret.success = false 111 | ret.pid = -1 112 | res.send(ret); 113 | } 114 | }) 115 | }) 116 | 117 | app.post('/uploadPackage', function (req, res) { 118 | const pid = req.body['pid'] 119 | const package = req.body['package'] 120 | if (package == null || package == '') { 121 | res.send({ 122 | pid: pid, 123 | success: false, 124 | message: 'empty package' 125 | }); 126 | console.log(`${nowstr()} empty package, pid=${pid}`) 127 | return 128 | } 129 | const cardList = JSON.parse(package) 130 | 131 | insertListToMongoAsync(cardList).then((res) => { 132 | checkPidInHash(pid, (err, exist) => { 133 | if (exist) { // 校验hash中是否存在对应的field 134 | removeProcessHash(pid) 135 | if (res) { 136 | pushDoneSet(pid) 137 | } else { 138 | pushWaitList(pid) 139 | } 140 | } else { 141 | console.log(`${nowstr()} the pid not in hash, pid=${pid}`) 142 | } 143 | }) 144 | 145 | const str = res ? 'Seccess' : 'Failed' 146 | console.log(`${nowstr()} ${str} to insert a package, pid=${pid}`) 147 | }) 148 | const ret = { 149 | pid: pid, 150 | success: true, 151 | message: 'ok' 152 | } 153 | res.send(ret); 154 | }) 155 | 156 | app.get('/', function (req, res) { 157 | res.send('Hello World'); 158 | }) 159 | 160 | const port = 16123; 161 | var server = app.listen(port, function () { 162 | 163 | var host = server.address().address 164 | var port = server.address().port 165 | 166 | console.log("Express,访问地址为 http://%s:%s", host, port) 167 | 168 | }) 169 | -------------------------------------------------------------------------------- /node-spider-dist/test/index.js: -------------------------------------------------------------------------------- 1 | import test from 'ava'; 2 | 3 | const { 4 | URL_GET_PACKAGE, URL_USER_INFO, URL_UPLOAD_PACKAGE, SLEEP_NORMAL_LOCAL 5 | } = require('../client/constants'); 6 | const { setMock } = require('../client/utils'); 7 | const mock = setMock(require('superagent-mocker')); 8 | const { client } = require('..'); 9 | const lodash = require('lodash'); 10 | 11 | mock.get(URL_GET_PACKAGE, (req) => { 12 | const body = { 'success': true, 'pid': 1234 }; 13 | return { 14 | body, text: JSON.stringify(body) 15 | }; 16 | }); 17 | 18 | mock.timeout = () => lodash.sample([20, 35, 50]); 19 | 20 | mock.get(URL_USER_INFO, (req) => { 21 | const body = require('./user-info.json'); 22 | body.data.card.mid = req.query.mid; 23 | return { 24 | body, text: JSON.stringify(body) 25 | }; 26 | }); 27 | 28 | client.setOutput(); 29 | 30 | test.serial('Default', async (t) => { 31 | let startTime; 32 | mock.post(URL_UPLOAD_PACKAGE, (req) => { 33 | const body = { 34 | pid: req.body.pid, 35 | package: JSON.parse(req.body.package) 36 | }; 37 | t.is(body.pid, 1234); 38 | t.is(body.package.length, 1000); 39 | t.true((Date.now() - startTime) >= SLEEP_NORMAL_LOCAL * 1000); 40 | t.is(typeof body.package[0].mid, 'number'); 41 | return { }; 42 | }); 43 | startTime = Date.now(); 44 | await client.process(); 45 | }); 46 | 47 | // 现在会过滤同名的代理, 所以本测试无效 48 | test.skip('Use Proxy', async (t) => { 49 | let startTime; 50 | mock.post(URL_UPLOAD_PACKAGE, (req) => { 51 | const body = { 52 | pid: req.body.pid, 53 | package: JSON.parse(req.body.package) 54 | }; 55 | t.is(body.pid, 1234); 56 | t.is(body.package.length, 1000); 57 | t.true((Date.now() - startTime) < SLEEP_NORMAL_LOCAL * 1000); 58 | t.true((Date.now() - startTime) >= SLEEP_NORMAL_LOCAL * 500); 59 | return { }; 60 | }); 61 | startTime = Date.now(); 62 | await client.process(['']); 63 | }); 64 | 65 | // 现在会过滤同名的代理, 所以本测试无效 66 | test.skip('Multi Proxy', async (t) => { 67 | const proxyList = Array(5).fill(''); 68 | let startTime; 69 | mock.post(URL_UPLOAD_PACKAGE, (req) => { 70 | const body = { 71 | pid: req.body.pid, 72 | package: JSON.parse(req.body.package) 73 | }; 74 | t.is(body.pid, 1234); 75 | t.is(body.package.length, 1000); 76 | t.true((Date.now() - startTime) < SLEEP_NORMAL_LOCAL * (1000 / proxyList.length + 2)); 77 | t.true((Date.now() - startTime) >= SLEEP_NORMAL_LOCAL * (1000 / (proxyList.length + 1))); 78 | return { }; 79 | }); 80 | startTime = Date.now(); 81 | await client.process(proxyList); 82 | }); 83 | -------------------------------------------------------------------------------- /node-spider-dist/test/user-info.json: -------------------------------------------------------------------------------- 1 | { 2 | "code": 0, 3 | "message": "0", 4 | "ttl": 1, 5 | "data": { 6 | "card": { 7 | "mid": "411421", 8 | "name": "AryloYeung", 9 | "approve": false, 10 | "sex": "保密", 11 | "rank": "10000", 12 | "face": "http://i2.hdslb.com/bfs/face/4abe5a6531e1509a1648fa25bdf746593802c441.jpg", 13 | "DisplayRank": "0", 14 | "regtime": 0, 15 | "spacesta": 0, 16 | "birthday": "", 17 | "place": "", 18 | "description": "", 19 | "article": 0, 20 | "attentions": [ 21 | 288239, 22 | 1393437, 23 | 92106599, 24 | 957390, 25 | 957244, 26 | 123938419, 27 | 6075139, 28 | 8756320, 29 | 39847390, 30 | 38002736, 31 | 14013810, 32 | 25453948, 33 | 535637, 34 | 761403, 35 | 6330633, 36 | 9982481, 37 | 1643718, 38 | 423895, 39 | 113711, 40 | 216025, 41 | 137173, 42 | 30160, 43 | 3347851, 44 | 777536, 45 | 3020100, 46 | 2123683, 47 | 133076 48 | ], 49 | "fans": 2, 50 | "friend": 27, 51 | "attention": 27, 52 | "sign": "", 53 | "level_info": { 54 | "current_level": 5, 55 | "current_min": 0, 56 | "current_exp": 0, 57 | "next_exp": 0 58 | }, 59 | "pendant": { 60 | "pid": 0, 61 | "name": "", 62 | "image": "", 63 | "expire": 0 64 | }, 65 | "nameplate": { 66 | "nid": 0, 67 | "name": "", 68 | "image": "", 69 | "image_small": "", 70 | "level": "", 71 | "condition": "" 72 | }, 73 | "official_verify": { 74 | "type": -1, 75 | "desc": "" 76 | }, 77 | "vip": { 78 | "vipType": 1, 79 | "dueRemark": "", 80 | "accessStatus": 0, 81 | "vipStatus": 0, 82 | "vipStatusWarn": "" 83 | } 84 | }, 85 | "following": false, 86 | "archive_count": 0, 87 | "article_count": 0, 88 | "follower": 2 89 | } 90 | } -------------------------------------------------------------------------------- /spider.py: -------------------------------------------------------------------------------- 1 | #!pyana 2 | # -*- coding: utf-8 -*- 3 | 4 | import json, math, queue 5 | from time import sleep 6 | from threading import Thread 7 | 8 | import requests 9 | 10 | import pandas as pd 11 | #import numpy as np 12 | #import matplotlib.pyplot as plt 13 | 14 | # ******************************************************* 15 | # 扫描参数 16 | # ******************************************************* 17 | #MAX_USER_ID = 259899999 18 | MAX_USER_ID = 4 19 | 20 | # ******************************************************* 21 | # 封装 HTTP 请求 22 | # ******************************************************* 23 | tasks = queue.Queue() 24 | is_closing = False 25 | 26 | # ******************************************************* 27 | # 后台进程 28 | # ******************************************************* 29 | def deamon(): 30 | 31 | print(' 运行中 ...') 32 | 33 | while True: 34 | 35 | sleep(0.1) 36 | 37 | print(' 循环中 ...') 38 | 39 | try: 40 | name, url, hdlr, cb = tasks.get() 41 | 42 | print(' 处理消息', name) 43 | 44 | except queue.Empty: 45 | continue 46 | 47 | else: 48 | req = None 49 | txt = None 50 | res = None 51 | 52 | try: 53 | req = requests.get(url) 54 | 55 | except requests.exceptions.Timeout: 56 | tasks.put_nowait(url) 57 | print(' 请求超时 !') 58 | continue 59 | 60 | try: 61 | txt = hdlr(req.text) 62 | 63 | except: 64 | # TODO logging 65 | print(' 失败!未能成功处理消息文本!') 66 | continue 67 | 68 | try: 69 | res = json.loads(txt) 70 | 71 | except: 72 | # TODO logging 73 | print(' 失败!未能成功解析 JSON!') 74 | continue 75 | 76 | try: 77 | code = res['code'] 78 | 79 | except KeyError: 80 | 81 | try: 82 | status = res['status'] 83 | 84 | except KeyError: 85 | cb(None) 86 | # TODO logging 87 | print(' 找不到 `code` 或 `status`!') 88 | 89 | else: 90 | 91 | if status is True: 92 | cb(res) 93 | 94 | else: 95 | cb(None) 96 | # TODO logging 97 | print(' 非法 `status` [{}] !'.format(status)) 98 | 99 | else: 100 | 101 | if code == 0: 102 | cb(res) 103 | 104 | else: 105 | #message = res['message'] 106 | #ttl = res['ttl'] 107 | cb(None) 108 | # TODO logging 109 | print(' 非法 `code` [{}] !'.format(code)) 110 | 111 | print(' 循环结束!') 112 | 113 | # ******************************************************* 114 | # 将 HTTP 请求压栈 115 | # ******************************************************* 116 | def get(url, name = '', handler = (lambda text: text), callback = None): 117 | 118 | print('请求 [{}] 压栈'.format(name)) 119 | 120 | if callback is None: 121 | raise TypeError('`callback` 空指针!') 122 | 123 | global tasks 124 | tasks.put_nowait({'name': name, 'url': url, 'hdlr': handler, 'cb': callback}) 125 | 126 | # ******************************************************* 127 | # 处理 __jp5 回调 128 | # ******************************************************* 129 | def handle_jp5(text): 130 | if len(text) <= 7: 131 | raise AssertionError('handle_jp5 内容具有错误长度 [{}]!'.format(len(text))) 132 | 133 | prefix, suffix = text[:6], text[-1] 134 | if prefix == '__jp5(' and suffix == ')': 135 | return text[6:-1] 136 | 137 | # ******************************************************* 138 | # 处理用户关注关系数据 139 | # ******************************************************* 140 | def handle_relation_data(data): 141 | buf = [] 142 | 143 | for user in data['list']: 144 | entry = { 145 | 'mid': user['mid'], 146 | #'attribute': user['attribute'], 147 | 'mtime': user['mtime'], 148 | #'tag': user['tag'], 149 | #'special': user['special'], 150 | #'uname': user['uname'], 151 | #'face': user['face'], 152 | #'sign': user['sign'], 153 | #'official_verify': { 154 | # 'type': user['official_verify']['type'], 155 | # 'desc': user['official_verify']['desc'], 156 | #}, 157 | #'vip': { 158 | # 'vipType': user['vip']['vipType'], 159 | # 'vipDueDate': user['vip']['vipDueDate'], 160 | # 'dueRemark': user['vip']['dueRemark'], 161 | # 'accessStatus': user['vip']['accessStatus'], 162 | # 'vipStatus': user['vip']['vipStatus'], 163 | # 'vipStatusWarn': user['vip']['vipStatusWarn'], 164 | #}, 165 | } 166 | buf.append(entry) 167 | 168 | return buf 169 | 170 | # ******************************************************* 171 | # 获取该用户粉丝列表 172 | # ******************************************************* 173 | def get_followers(user_id, count_followers): 174 | step = 50 175 | followers = [] 176 | 177 | for page in range(1, min(5, 1 + math.ceil(count_followers / step))): 178 | url = 'https://api.bilibili.com/x/relation/followers?vmid={}&pn={}&ps={}&order=desc&jsonp=jsonp&callback=__jp5'.format(user_id, page, step) 179 | get(url, name='get_followers', handler=handle_jp5, callback=lambda res: followers.extend(handle_relation_data(res['data']))) 180 | 181 | return followers 182 | 183 | # ******************************************************* 184 | # 获取该用户关注列表 185 | # ******************************************************* 186 | def get_followings(user_id, count_followings): 187 | step = 50 188 | followings = [] 189 | 190 | for page in range(1, min(5, 1 + math.ceil(count_followings / step))): 191 | url = 'https://api.bilibili.com/x/relation/followings?vmid={}&pn={}&ps={}&order=desc&jsonp=jsonp&callback=__jp5'.format(user_id, page, step) 192 | get(url, name='get_followings', handler=handle_jp5, callback=lambda res: followings.extend(handle_relation_data(res['data']))) 193 | 194 | return followings 195 | 196 | # ******************************************************* 197 | # 获取该用户基础信息 198 | # ******************************************************* 199 | def get_user_info(user_id): 200 | #url01 = 'https://api.bilibili.com/x/relation/stat?vmid={}'.format(user_id) 201 | url02 = 'https://api.bilibili.com/x/space/navnum?mid={}'.format(user_id) 202 | step = 50 203 | following = None 204 | follower = None 205 | list_followings = None 206 | list_followers = None 207 | video = None 208 | videos = [] 209 | 210 | print('正在扫描用户 [{}] ...'.format(user_id)) 211 | 212 | def handle_relation(res01): 213 | nonlocal following 214 | nonlocal follower 215 | nonlocal list_followings 216 | nonlocal list_followers 217 | 218 | data = res01['data'] 219 | # 该用户关注的人 - 数量 220 | following = int(data['following']) 221 | # 该用户的悄悄话 - 数量 222 | #whisper = int(data['whisper']) 223 | # 该用户的黑名单 - 数量 224 | #black = int(data['black']) 225 | # 关注该用户的人 - 数量 226 | follower = int(data['follower']) 227 | 228 | # 该用户的关注列表 229 | list_followings = get_followings(user_id, following) 230 | # 该用户的粉丝列表 231 | list_followers = get_followers(user_id, follower) 232 | 233 | def handle_information(res02): 234 | nonlocal video 235 | 236 | data = res02['data'] 237 | # 该用户上传的视频 - 数量 238 | video = int(data['video']) 239 | print('视频数量:', video) 240 | # 该用户订阅的番剧 - 数量 241 | #bangumi = int(data['bangumi']) 242 | # 该用户创建的频道 - 数量 243 | #channel = {'master': int(data['channel']['master']), 'guest': int(data['channel']['guest'])} 244 | # 该用户创建的收藏夹 - 数量 245 | #favourite = {'master': int(data['favourite']['master']), 'guest': int(data['favourite']['guest'])} 246 | # 该用户订阅的标签 - 数量 247 | #tag = int(data['tag']) 248 | # 该用户撰写的文章 - 数量 249 | #article = int(data['article']) 250 | #playlist = data['playlist'] 251 | #album = data['album'] 252 | 253 | def handle_video_list(res): 254 | nonlocal videos 255 | 256 | data = res['data'] 257 | vlist = data['vlist'] 258 | 259 | for video_info in vlist: 260 | videos.append({ 261 | # 评论数量 262 | 'comment': int(video_info['comment']), 263 | # 视频分类 264 | 'typeid': video_info['typeid'], 265 | # 播放量 266 | 'play': int(video_info['play']), 267 | # 视频封面图片 268 | 'pic': video_info['pic'], 269 | # 子标题? 270 | #'subtitle': video_info['subtitle'], 271 | # 视频简介 272 | #'description': video_info['description'], 273 | # 版权 274 | #'copyright': video_info['copyright'], 275 | # 视频标题 276 | 'title': video_info['title'], 277 | #'review': video_info['review'], 278 | # 作者昵称 279 | #'author': video_info['author'], 280 | # 作者ID 281 | 'mid': int(video_info['mid']), 282 | # 发布时间 283 | 'created': int(video_info['created']), 284 | # 时间长度 285 | 'length': video_info['length'], 286 | #'video_review': video_info['video_review'], 287 | # 视频av号 288 | 'aid': int(video_info['aid']), 289 | #'hide_click': video_info['hide_click'], 290 | }) 291 | 292 | ##################################################### 293 | # 关系网 294 | ##################################################### 295 | #get(url01, name='get_user_info', callback=handle_relation) 296 | 297 | ##################################################### 298 | # 稿件信息 299 | ##################################################### 300 | get(url02, name='get_user_info', callback=handle_information) 301 | print('视频数量:', video) 302 | 303 | ##################################################### 304 | # 遍历视频 305 | # 306 | # 参考:https://space.bilibili.com/ajax/member/getSubmitVideos?mid=6290510&pagesize=50&page=1 307 | ##################################################### 308 | for page in range(1, min(5, 1 + math.ceil(video / step))): 309 | url = 'http://space.bilibili.com/ajax/member/getSubmitVideos?mid={}&pagesize={}&page={}'.format(user_id, step, page) 310 | get(url, name='get_user_info', callback=handle_video_list) 311 | 312 | ##################################################### 313 | # 存储数据 314 | ##################################################### 315 | info = { 316 | #'followings': { 317 | # 'count': following, 318 | # 'users': list_followings, 319 | #}, 320 | #'followers': { 321 | # 'count': follower, 322 | # 'users': list_followers, 323 | #}, 324 | 'videos': { 325 | 'count': video, 326 | 'videos': videos, 327 | }, 328 | } 329 | 330 | return info 331 | 332 | ''' 333 | def get_video_info(video_id): 334 | url = 'https://api.bilibili.com/x/web-interface/archive/stat?aid={}'.format(video_id) 335 | res = get(url, name='get_video_info', callback=None) 336 | 337 | def get_comments(video_id): 338 | url = 'https://api.bilibili.cn/feedback?aid={}'.format(video_id) 339 | res = get(url, name="get_comments", callback=None) 340 | ''' 341 | 342 | def main(): 343 | # 遍历用户 ID 344 | for user_id in range(2, MAX_USER_ID): 345 | info = get_user_info(user_id) 346 | 347 | pd.DataFrame([[video['comment'], video['typeid'], video['play'], video['title'], video['created'], video['length'], video['aid'], video['mid']] for video in info['videos']['videos']], 348 | columns=['comment', 'typeid', 'play', 'title', 'created', 'length', 'aid', 'mid']).to_csv('datasets/info/video_info.csv') 349 | 350 | class DaemonThread(Thread): 351 | 352 | def __init__(self): 353 | Thread.__init__(self) 354 | self.daemon = True 355 | self.name = 'Bilibili 爬虫 后台线程' 356 | 357 | def run(self): 358 | print('线程 `{}` 正在运行 ...'.format(self.name)) 359 | deamon() 360 | 361 | class MainThread(Thread): 362 | 363 | def __init__(self): 364 | Thread.__init__(self) 365 | self.name = 'Bilibili 爬虫 主线程' 366 | 367 | def run(self): 368 | print('线程 `{}` 正在运行 ...'.format(self.name)) 369 | main() 370 | 371 | DaemonThread().start() 372 | MainThread().start() 373 | -------------------------------------------------------------------------------- /专栏跟踪爬虫-氘化氢.nb: -------------------------------------------------------------------------------- 1 | (* Content-type: application/vnd.wolfram.mathematica *) 2 | 3 | (*** Wolfram Notebook File ***) 4 | (* http://www.wolfram.com/nb *) 5 | 6 | (* CreatedBy='Mathematica 11.2' *) 7 | 8 | (*CacheID: 234*) 9 | (* Internal cache information: 10 | NotebookFileLineBreakTest 11 | NotebookFileLineBreakTest 12 | NotebookDataPosition[ 158, 7] 13 | NotebookDataLength[ 14563, 284] 14 | NotebookOptionsPosition[ 13843, 265] 15 | NotebookOutlinePosition[ 14229, 282] 16 | CellTagsIndexPosition[ 14186, 279] 17 | WindowFrame->Normal*) 18 | 19 | (* Beginning of Notebook Content *) 20 | Notebook[{ 21 | Cell[BoxData[ 22 | RowBox[{ 23 | RowBox[{"(*", "\:76d1\:6d4b\:4e13\:680f\:7684cv\:53f7", "*)"}], 24 | RowBox[{ 25 | RowBox[{"cv", "=", "404336"}], ";", 26 | RowBox[{"(*", 27 | RowBox[{"\:76d1\:6d4b\:5468\:671f", 28 | RowBox[{"\:ff08", "\:79d2", "\:ff09"}]}], "*)"}], 29 | RowBox[{"fuse", "=", "300"}], ";", 30 | RowBox[{"(*", 31 | RowBox[{"\:65e0\:9650\:8fd0\:884c", "\:ff0c", 32 | RowBox[{"\:4f7f\:7528Alt", "+", 33 | RowBox[{".", "\:505c\:6b62"}]}]}], "*)"}]}]}]], "Input", 34 | CellChangeTimes->{{3.7333447812333612`*^9, 3.7333447942201037`*^9}, { 35 | 3.733344869459407*^9, 3.7333448929627514`*^9}, {3.7333449487321453`*^9, 36 | 3.7333450000522175`*^9}, {3.7333450731253195`*^9, 37 | 3.7333450761853237`*^9}, {3.733345165536449*^9, 3.7333451842164755`*^9}, { 38 | 3.7333452268365345`*^9, 3.733345270596596*^9}, {3.7333458356391163`*^9, 39 | 3.733345845119129*^9}, {3.7333458864491873`*^9, 3.733345901099208*^9}, { 40 | 3.733345976329313*^9, 3.733346050159416*^9}, {3.733346156419565*^9, 41 | 3.73334622421966*^9}, {3.7333464362909565`*^9, 3.733346444741969*^9}, 42 | 3.7333472353470755`*^9, {3.733347992954562*^9, 3.7333479954845657`*^9}, { 43 | 3.7333480546646485`*^9, 3.733348063264661*^9}, {3.7333481047847185`*^9, 44 | 3.73334810570472*^9}, {3.733350750202597*^9, 3.733350751212599*^9}, { 45 | 3.7333508783337765`*^9, 3.7333509985839453`*^9}, 3.7333513451544304`*^9, { 46 | 3.7333513759144735`*^9, 3.7333514004445076`*^9}, {3.7333514505845776`*^9, 47 | 3.733351475224612*^9}, {3.733351565514739*^9, 3.7333515667447405`*^9}, { 48 | 3.7333533730652714`*^9, 3.7333533975953054`*^9}, 3.7333560115433426`*^9, 49 | 3.73404181594972*^9, 3.73404189422983*^9, {3.734042046090042*^9, 50 | 3.7340420469500437`*^9}, 3.734042091500106*^9, 3.734042329690439*^9, 51 | 3.734042391150525*^9, {3.734042602210821*^9, 3.734042635340867*^9}, { 52 | 3.7340426948909507`*^9, 3.7340426986309557`*^9}, 3.7340428537331743`*^9, { 53 | 3.7340434503840094`*^9, 3.734043464294029*^9}, {3.734048284159063*^9, 54 | 3.7340482851790643`*^9}, 3.7340493758925915`*^9, {3.7340534306432686`*^9, 55 | 3.73405343154327*^9}, {3.734053503693371*^9, 3.734053532863412*^9}, { 56 | 3.734089773212278*^9, 3.7340897850422945`*^9}, {3.734090518553906*^9, 57 | 3.7340905546239567`*^9}, {3.7340911201947484`*^9, 58 | 3.7340911355647697`*^9}, {3.7340913145750203`*^9, 3.734091327535039*^9}, { 59 | 3.7340920581220617`*^9, 60 | 3.7340921545021973`*^9}},ExpressionUUID->"1dc22295-016a-49f3-af38-\ 61 | c221317654d3"], 62 | 63 | Cell[BoxData[{ 64 | RowBox[{ 65 | RowBox[{"address", "=", 66 | RowBox[{ 67 | RowBox[{"NotebookDirectory", "[", "]"}], "<>", "\"\\"", "<>", 68 | RowBox[{"ToString", "[", "cv", "]"}], "<>", "\"\<.csv\>\""}]}], 69 | ";"}], "\[IndentingNewLine]", 70 | RowBox[{ 71 | RowBox[{"replacelist", "=", 72 | RowBox[{"{", 73 | RowBox[{ 74 | "\"\\"", ",", "\"\\"", ",", "\"\\"", ",", 75 | "\"\\"", ",", "\"\\"", ",", "\"\\""}], "}"}]}], 76 | ";"}]}], "Input", 77 | CellChangeTimes->{{3.7333447812333612`*^9, 3.7333447942201037`*^9}, { 78 | 3.733344869459407*^9, 3.7333448929627514`*^9}, {3.7333449487321453`*^9, 79 | 3.7333450000522175`*^9}, {3.7333450731253195`*^9, 80 | 3.7333450761853237`*^9}, {3.733345165536449*^9, 3.7333451842164755`*^9}, { 81 | 3.7333452268365345`*^9, 3.733345270596596*^9}, {3.7333458356391163`*^9, 82 | 3.733345845119129*^9}, {3.7333458864491873`*^9, 3.733345901099208*^9}, { 83 | 3.733345976329313*^9, 3.733346050159416*^9}, {3.733346156419565*^9, 84 | 3.73334622421966*^9}, {3.7333464362909565`*^9, 3.733346444741969*^9}, 85 | 3.7333472353470755`*^9, {3.733347992954562*^9, 3.7333479954845657`*^9}, { 86 | 3.7333480546646485`*^9, 3.733348063264661*^9}, {3.7333481047847185`*^9, 87 | 3.73334810570472*^9}, {3.733350750202597*^9, 3.733350751212599*^9}, { 88 | 3.7333508783337765`*^9, 3.7333509985839453`*^9}, 3.7333513451544304`*^9, { 89 | 3.7333513759144735`*^9, 3.7333514004445076`*^9}, {3.7333514505845776`*^9, 90 | 3.733351475224612*^9}, {3.733351565514739*^9, 3.7333515667447405`*^9}, { 91 | 3.7333533730652714`*^9, 3.7333533975953054`*^9}, 3.7333560115433426`*^9, 92 | 3.73404181594972*^9, 3.73404189422983*^9, {3.734042046090042*^9, 93 | 3.7340420469500437`*^9}, 3.734042091500106*^9, 3.734042329690439*^9, 94 | 3.734042391150525*^9, {3.734042602210821*^9, 3.734042635340867*^9}, { 95 | 3.7340426948909507`*^9, 3.7340426986309557`*^9}, 3.7340428537331743`*^9, { 96 | 3.7340434503840094`*^9, 3.734043464294029*^9}, {3.734048284159063*^9, 97 | 3.7340482851790643`*^9}, 3.7340493758925915`*^9, {3.7340534306432686`*^9, 98 | 3.73405343154327*^9}, {3.734053503693371*^9, 3.734053532863412*^9}, { 99 | 3.734089773212278*^9, 3.7340897850422945`*^9}, {3.734090518553906*^9, 100 | 3.7340905546239567`*^9}, {3.7340911201947484`*^9, 101 | 3.7340911385147743`*^9}, {3.7340912171248837`*^9, 102 | 3.7340912440149217`*^9}, {3.7340912797949715`*^9, 3.73409130717501*^9}, { 103 | 3.734091768938657*^9, 3.7340917709586596`*^9}, 104 | 3.734092521370573*^9},ExpressionUUID->"86793f00-7c10-4df9-b694-\ 105 | a13e1869ee75"], 106 | 107 | Cell[BoxData[ 108 | RowBox[{"If", "[", 109 | RowBox[{ 110 | RowBox[{"!", 111 | RowBox[{"FileExistsQ", "[", "address", "]"}]}], ",", 112 | "\[IndentingNewLine]", 113 | RowBox[{ 114 | RowBox[{"OpenAppend", "[", 115 | RowBox[{"address", ",", 116 | RowBox[{"PageWidth", "\[Rule]", "Infinity"}]}], "]"}], ";", 117 | "\[IndentingNewLine]", 118 | RowBox[{"WriteString", "[", 119 | RowBox[{ 120 | RowBox[{"Streams", "[", "address", "]"}], ",", 121 | "\"\\""}], "]"}], ";", "\[IndentingNewLine]", 123 | RowBox[{ 124 | RowBox[{ 125 | RowBox[{"Close", "[", "#", "]"}], "&"}], "/@", "stream"}], ";"}]}], 126 | "]"}]], "Input", 127 | CellChangeTimes->{{3.7333447812333612`*^9, 3.7333447942201037`*^9}, { 128 | 3.733344869459407*^9, 3.7333448929627514`*^9}, {3.7333449487321453`*^9, 129 | 3.7333450000522175`*^9}, {3.7333450731253195`*^9, 130 | 3.7333450761853237`*^9}, {3.733345165536449*^9, 3.7333451842164755`*^9}, { 131 | 3.7333452268365345`*^9, 3.733345270596596*^9}, {3.7333458356391163`*^9, 132 | 3.733345845119129*^9}, {3.7333458864491873`*^9, 3.733345901099208*^9}, { 133 | 3.733345976329313*^9, 3.733346050159416*^9}, {3.733346156419565*^9, 134 | 3.73334622421966*^9}, {3.7333464362909565`*^9, 3.733346444741969*^9}, 135 | 3.7333472353470755`*^9, {3.733347992954562*^9, 3.7333479954845657`*^9}, { 136 | 3.7333480546646485`*^9, 3.733348063264661*^9}, {3.7333481047847185`*^9, 137 | 3.73334810570472*^9}, {3.733350750202597*^9, 3.733350751212599*^9}, { 138 | 3.7333508783337765`*^9, 3.7333509985839453`*^9}, 3.7333513451544304`*^9, { 139 | 3.7333513759144735`*^9, 3.7333514004445076`*^9}, {3.7333514505845776`*^9, 140 | 3.733351475224612*^9}, {3.733351565514739*^9, 3.7333515667447405`*^9}, { 141 | 3.7333533730652714`*^9, 3.7333533975953054`*^9}, 3.7333560115433426`*^9, 142 | 3.73404181594972*^9, 3.73404189422983*^9, {3.734042046090042*^9, 143 | 3.7340420469500437`*^9}, 3.734042091500106*^9, 3.734042329690439*^9, 144 | 3.734042391150525*^9, {3.734042602210821*^9, 3.734042635340867*^9}, { 145 | 3.7340426948909507`*^9, 3.7340426986309557`*^9}, 3.7340428537331743`*^9, { 146 | 3.7340434503840094`*^9, 3.734043464294029*^9}, {3.734048284159063*^9, 147 | 3.7340482851790643`*^9}, 3.7340493758925915`*^9, {3.7340534306432686`*^9, 148 | 3.73405343154327*^9}, {3.734053503693371*^9, 3.734053532863412*^9}, { 149 | 3.734089773212278*^9, 3.7340897850422945`*^9}, {3.734090518553906*^9, 150 | 3.7340905546239567`*^9}, {3.7340911201947484`*^9, 151 | 3.7340911385147743`*^9}, {3.7340912171248837`*^9, 152 | 3.7340912440149217`*^9}, {3.7340912797949715`*^9, 3.73409130717501*^9}, 153 | 3.7340913382050533`*^9, {3.7340915242453136`*^9, 3.7340915333053265`*^9}, 154 | 3.7340921204821496`*^9, {3.734092179932233*^9, 155 | 3.734092181072234*^9}},ExpressionUUID->"5e6e99e2-aba7-42e2-be7c-\ 156 | f505364870c5"], 157 | 158 | Cell[BoxData[ 159 | RowBox[{ 160 | RowBox[{"getdata", "[", "cv_", "]"}], ":=", 161 | RowBox[{"(", "\[IndentingNewLine]", 162 | RowBox[{ 163 | RowBox[{"OpenAppend", "[", 164 | RowBox[{"address", ",", 165 | RowBox[{"PageWidth", "\[Rule]", "Infinity"}]}], "]"}], ";", 166 | "\[IndentingNewLine]", 167 | RowBox[{"time", "=", 168 | RowBox[{"StringRiffle", "[", 169 | RowBox[{ 170 | RowBox[{"ToString", "/@", 171 | RowBox[{"Floor", "@", 172 | RowBox[{"Now", "[", 173 | RowBox[{"[", 174 | RowBox[{"1", ",", 175 | RowBox[{";;", "6"}]}], "]"}], "]"}]}]}], ",", "\"\<,\>\""}], 176 | "]"}]}], ";", "\[IndentingNewLine]", 177 | RowBox[{"data1", "=", 178 | RowBox[{"Import", "[", 179 | RowBox[{ 180 | RowBox[{"\"\\"", "<>", 181 | RowBox[{"ToString", "[", "cv", "]"}]}], ",", "\"\\""}], 182 | "]"}]}], ";", "\[IndentingNewLine]", 183 | RowBox[{"replace", "=", 184 | RowBox[{"\"\\"", "/.", "data1"}]}], ";", 185 | RowBox[{"rule", "=", 186 | RowBox[{"\"\\"", "/.", "replace"}]}], ";", 187 | RowBox[{"stat", "=", 188 | RowBox[{"ToString", "[", 189 | RowBox[{"replacelist", "/.", "rule"}], "]"}]}], ";", 190 | "\[IndentingNewLine]", 191 | RowBox[{"output", "=", 192 | RowBox[{"time", "<>", "\"\<,\>\"", "<>", 193 | RowBox[{"StringDrop", "[", 194 | RowBox[{ 195 | RowBox[{"StringDrop", "[", 196 | RowBox[{"stat", ",", 197 | RowBox[{"-", "1"}]}], "]"}], ",", "1"}], "]"}], "<>", 198 | "\"\<\\n\>\""}]}], ";", "\[IndentingNewLine]", 199 | RowBox[{"WriteString", "[", 200 | RowBox[{ 201 | RowBox[{"Streams", "[", "address", "]"}], ",", "output"}], "]"}], ";", 202 | "\[IndentingNewLine]", 203 | RowBox[{ 204 | RowBox[{ 205 | RowBox[{"Close", "[", "#", "]"}], "&"}], "/@", "stream"}], ";"}], 206 | ")"}]}]], "Input", 207 | CellChangeTimes->{{3.7333447812333612`*^9, 3.7333447942201037`*^9}, { 208 | 3.733344869459407*^9, 3.7333448929627514`*^9}, {3.7333449487321453`*^9, 209 | 3.7333450000522175`*^9}, {3.7333450731253195`*^9, 210 | 3.7333450761853237`*^9}, {3.733345165536449*^9, 3.7333451842164755`*^9}, { 211 | 3.7333452268365345`*^9, 3.733345270596596*^9}, {3.7333458356391163`*^9, 212 | 3.733345845119129*^9}, {3.7333458864491873`*^9, 3.733345901099208*^9}, { 213 | 3.733345976329313*^9, 3.733346050159416*^9}, {3.733346156419565*^9, 214 | 3.73334622421966*^9}, {3.7333464362909565`*^9, 3.733346444741969*^9}, 215 | 3.7333472353470755`*^9, {3.733347992954562*^9, 3.7333479954845657`*^9}, { 216 | 3.7333480546646485`*^9, 3.733348063264661*^9}, {3.7333481047847185`*^9, 217 | 3.73334810570472*^9}, {3.733350750202597*^9, 3.733350751212599*^9}, { 218 | 3.7333508783337765`*^9, 3.7333509985839453`*^9}, 3.7333513451544304`*^9, { 219 | 3.7333513759144735`*^9, 3.7333514004445076`*^9}, {3.7333514505845776`*^9, 220 | 3.733351475224612*^9}, {3.733351565514739*^9, 3.7333515667447405`*^9}, { 221 | 3.7333533730652714`*^9, 3.7333533975953054`*^9}, 3.7333560115433426`*^9, 222 | 3.73404181594972*^9, 3.73404189422983*^9, {3.734042046090042*^9, 223 | 3.7340420469500437`*^9}, 3.734042091500106*^9, 3.734042329690439*^9, 224 | 3.734042391150525*^9, {3.734042602210821*^9, 3.734042635340867*^9}, { 225 | 3.7340426948909507`*^9, 3.7340426986309557`*^9}, 3.7340428537331743`*^9, { 226 | 3.7340434503840094`*^9, 3.7340434571240187`*^9}, 3.734043654034295*^9, { 227 | 3.7340438286268225`*^9, 3.734043829066823*^9}, {3.734048791612773*^9, 228 | 3.734048827862824*^9}, {3.7340489952730584`*^9, 3.734049040703122*^9}, 229 | 3.7340493708025846`*^9, {3.7340494144626455`*^9, 3.734049445402689*^9}, { 230 | 3.734049482922742*^9, 3.7340495896378913`*^9}, {3.7340496735180087`*^9, 231 | 3.734049675768012*^9}, 3.7340497685181417`*^9, {3.7340498445482483`*^9, 232 | 3.734049875878292*^9}, 3.734050417909051*^9, {3.7340895716309958`*^9, 233 | 3.734089649091104*^9}, {3.7340901972334557`*^9, 3.7340903043636055`*^9}, 234 | 3.7340903912377276`*^9, {3.7340905792339907`*^9, 3.734090586444001*^9}, { 235 | 3.734090837824353*^9, 3.7340908433143606`*^9}, {3.7340909025144434`*^9, 236 | 3.734090904254446*^9}, {3.734091037424632*^9, 3.734091097584717*^9}, { 237 | 3.7340914444452024`*^9, 3.734091582835396*^9}, {3.7340917123785777`*^9, 238 | 3.7340917408686175`*^9}, {3.73409217036222*^9, 239 | 3.734092224902296*^9}},ExpressionUUID->"574d8bbc-9a2c-4e1b-98cd-\ 240 | 2515a17b62a1"], 241 | 242 | Cell[BoxData[ 243 | RowBox[{"While", "[", 244 | RowBox[{"True", ",", 245 | RowBox[{ 246 | RowBox[{"getdata", "[", "cv", "]"}], ";", 247 | RowBox[{"Pause", "[", "fuse", "]"}]}]}], "]"}]], "Input", 248 | CellChangeTimes->{{3.7333447812333612`*^9, 3.7333447942201037`*^9}, { 249 | 3.733344869459407*^9, 3.7333448929627514`*^9}, {3.7333449487321453`*^9, 250 | 3.7333450000522175`*^9}, {3.7333450731253195`*^9, 251 | 3.7333450761853237`*^9}, {3.733345165536449*^9, 3.7333451842164755`*^9}, { 252 | 3.7333452268365345`*^9, 3.733345270596596*^9}, {3.7333458356391163`*^9, 253 | 3.733345845119129*^9}, {3.7333458864491873`*^9, 3.733345901099208*^9}, { 254 | 3.733345976329313*^9, 3.733346050159416*^9}, {3.733346156419565*^9, 255 | 3.73334622421966*^9}, {3.7333464362909565`*^9, 3.733346444741969*^9}, 256 | 3.7333472353470755`*^9, {3.733347992954562*^9, 3.7333479954845657`*^9}, { 257 | 3.7333480546646485`*^9, 3.733348063264661*^9}, {3.7333481047847185`*^9, 258 | 3.73334810570472*^9}, {3.733350750202597*^9, 3.733350751212599*^9}, { 259 | 3.7333508783337765`*^9, 3.7333509650738983`*^9}, {3.734042674490922*^9, 260 | 3.734042674950923*^9}, {3.7340428004930997`*^9, 3.7340429097132525`*^9}, { 261 | 3.73404939586262*^9, 3.73404939639262*^9}, {3.734090572793982*^9, 262 | 3.7340905732139826`*^9}, {3.734091382475116*^9, 263 | 3.734091383365117*^9}},ExpressionUUID->"d44d6345-60fa-4486-8fd5-\ 264 | 75e6885b32f7"] 265 | }, 266 | WindowSize->{1350, 686}, 267 | WindowMargins->{{-8, Automatic}, {Automatic, 0}}, 268 | Magnification:>1.3 Inherited, 269 | FrontEndVersion->"11.2 for Microsoft Windows (64-bit) (2017\:5e7410\:67082\ 270 | \:65e5)", 271 | StyleDefinitions->"Default.nb" 272 | ] 273 | (* End of Notebook Content *) 274 | 275 | (* Internal cache information *) 276 | (*CellTagsOutline 277 | CellTagsIndex->{} 278 | *) 279 | (*CellTagsIndex 280 | CellTagsIndex->{} 281 | *) 282 | (*NotebookFileOutline 283 | Notebook[{ 284 | Cell[558, 20, 2415, 40, 37, "Input",ExpressionUUID->"1dc22295-016a-49f3-af38-c221317654d3"], 285 | Cell[2976, 62, 2505, 42, 85, "Input",ExpressionUUID->"86793f00-7c10-4df9-b694-a13e1869ee75"], 286 | Cell[5484, 106, 2770, 49, 198, "Input",ExpressionUUID->"5e6e99e2-aba7-42e2-be7c-f505364870c5"], 287 | Cell[8257, 157, 4234, 82, 364, "Input",ExpressionUUID->"574d8bbc-9a2c-4e1b-98cd-2515a17b62a1"], 288 | Cell[12494, 241, 1345, 22, 57, "Input",ExpressionUUID->"d44d6345-60fa-4486-8fd5-75e6885b32f7"] 289 | } 290 | ] 291 | *) 292 | 293 | -------------------------------------------------------------------------------- /视频跟踪爬虫-LePtC.nb: -------------------------------------------------------------------------------- 1 | (* Content-type: application/vnd.wolfram.mathematica *) 2 | 3 | (*** Wolfram Notebook File ***) 4 | (* http://www.wolfram.com/nb *) 5 | 6 | (* CreatedBy='Mathematica 11.2' *) 7 | 8 | (*CacheID: 234*) 9 | (* Internal cache information: 10 | NotebookFileLineBreakTest 11 | NotebookFileLineBreakTest 12 | NotebookDataPosition[ 158, 7] 13 | NotebookDataLength[ 8940, 235] 14 | NotebookOptionsPosition[ 7852, 211] 15 | NotebookOutlinePosition[ 8208, 227] 16 | CellTagsIndexPosition[ 8165, 224] 17 | WindowFrame->Normal*) 18 | 19 | (* Beginning of Notebook Content *) 20 | Notebook[{ 21 | Cell["\:8f93\:5165\:8981\:76d1\:89c6\:7684\:89c6\:9891\:ff0c\:7b2c\:4e00\:884c\ 22 | \:6587\:4ef6\:540d\:ff0c\:7b2c\:4e8c\:884cav\:53f7\:ff0c\:591a\:4e2a\:89c6\ 23 | \:9891\:7528\:9017\:53f7\:5206\:5272", "Text", 24 | CellChangeTimes->{{3.7312278218075223`*^9, 25 | 3.7312278645750875`*^9}},ExpressionUUID->"bceab26c-cfe2-4e9c-bd49-\ 26 | 41fe4c8d3da1"], 27 | 28 | Cell[BoxData[ 29 | RowBox[{ 30 | RowBox[{ 31 | RowBox[{"watchlist", "=", 32 | RowBox[{"(", GridBox[{ 33 | {"\"\\"", "\"\\"", "\"\\""}, 34 | {"19574757", "19070980", "18623766"} 35 | }], ")"}]}], ";"}], " ", 36 | RowBox[{"(*", " ", 37 | RowBox[{ 38 | RowBox[{ 39 | "\:64ad\:653e\:91cf\:4fdd\:5bc6\:7684\:89c6\:9891\:4f1a\:51fa", " ", 40 | "bug"}], "\:ff0c", 41 | RowBox[{"\:4f8b\:5982", " ", "av2"}]}], " ", "*)"}]}]], "Input", 42 | CellChangeTimes->{{3.7312278026523094`*^9, 3.731227811640462*^9}, { 43 | 3.7312279045443277`*^9, 3.7312279569390125`*^9}, 3.7312284342809663`*^9, { 44 | 3.731228481786167*^9, 3.7312285204462605`*^9}, {3.7312286212255936`*^9, 45 | 3.7312286313254447`*^9}, {3.731229907037536*^9, 3.7312299414522295`*^9}, { 46 | 3.7312300111584935`*^9, 47 | 3.7312300202691593`*^9}},ExpressionUUID->"94f6d68a-1ed7-4e3f-9b00-\ 48 | bab15e00cfeb"], 49 | 50 | Cell["\:7136\:540e\:9009\:62e9\:8ba1\:7b97-\:8ba1\:7b97\:7b14\:8bb0\:672c\ 51 | \:5373\:53ef", "Text", 52 | CellChangeTimes->{{3.7312279962207775`*^9, 53 | 3.731228014550985*^9}},ExpressionUUID->"7add0a1f-a90c-473f-a0c1-\ 54 | e96efbbb2abd"], 55 | 56 | Cell[BoxData[{ 57 | RowBox[{ 58 | RowBox[{"getdata", "[", "n_", "]"}], ":=", 59 | RowBox[{"\"\\"", "/.", 60 | RowBox[{"Import", "[", 61 | RowBox[{ 62 | RowBox[{ 63 | "\"\\"", "<>", 64 | RowBox[{"ToString", "[", "n", "]"}]}], ",", "\"\\""}], 65 | "]"}]}]}], "\n", 66 | RowBox[{ 67 | RowBox[{"li", "=", 68 | RowBox[{"{", 69 | RowBox[{ 70 | "\"\\"", ",", "\"\\"", ",", "\"\\"", ",", 71 | "\"\\"", ",", "\"\\"", ",", "\"\\"", ",", 72 | "\"\\"", ",", "\"\\""}], "}"}]}], ";"}]}], "Input", 73 | CellChangeTimes->{{3.728717511469638*^9, 3.728717558483469*^9}, { 74 | 3.7287178428973875`*^9, 3.7287178545916924`*^9}, {3.7290691895205107`*^9, 75 | 3.7290692054765153`*^9}, 76 | 3.7312295971935267`*^9},ExpressionUUID->"05d0891f-2e21-49c0-b441-\ 77 | fe2c39aed65f"], 78 | 79 | Cell[BoxData[{ 80 | RowBox[{ 81 | RowBox[{"SetDirectory", "[", 82 | RowBox[{"NotebookDirectory", "[", "]"}], "]"}], ";"}], "\n", 83 | RowBox[{ 84 | RowBox[{"no", "=", 85 | RowBox[{"Length", "@", 86 | RowBox[{ 87 | "watchlist", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}]}], 88 | ";"}], "\n", 89 | RowBox[{ 90 | RowBox[{"stream", "=", 91 | RowBox[{ 92 | RowBox[{ 93 | RowBox[{"OpenAppend", "[", 94 | RowBox[{ 95 | RowBox[{"#", "<>", "\"\<.csv\>\""}], ",", 96 | RowBox[{"PageWidth", "\[Rule]", "Infinity"}]}], "]"}], "&"}], "/@", 97 | RowBox[{ 98 | "watchlist", "\[LeftDoubleBracket]", "1", "\[RightDoubleBracket]"}]}]}], 99 | ";"}], "\[IndentingNewLine]", 100 | RowBox[{ 101 | RowBox[{"lastplay", "=", 102 | RowBox[{"ConstantArray", "[", 103 | RowBox[{"0", ",", "no"}], "]"}]}], ";"}]}], "Input", 104 | CellChangeTimes->{{3.7312307109607377`*^9, 105 | 3.7312307129128942`*^9}},ExpressionUUID->"df9cead0-7f97-4ee5-94b8-\ 106 | 884844db2515"], 107 | 108 | Cell[CellGroupData[{ 109 | 110 | Cell[BoxData[ 111 | RowBox[{"While", "[", 112 | RowBox[{"True", ",", 113 | RowBox[{ 114 | RowBox[{"d", "=", 115 | RowBox[{ 116 | RowBox[{ 117 | RowBox[{"getdata", "[", "#", "]"}], "&"}], "/@", 118 | RowBox[{ 119 | "watchlist", "\[LeftDoubleBracket]", "2", "\[RightDoubleBracket]"}]}]}], 120 | ";", "\[IndentingNewLine]", 121 | RowBox[{"dd", "=", 122 | RowBox[{"Table", "[", 123 | RowBox[{ 124 | RowBox[{"li", "/.", 125 | RowBox[{"d", "\[LeftDoubleBracket]", "i", "\[RightDoubleBracket]"}]}], 126 | ",", 127 | RowBox[{"{", 128 | RowBox[{"i", ",", "no"}], "}"}]}], "]"}]}], ";", 129 | "\[IndentingNewLine]", 130 | RowBox[{"da", "=", 131 | RowBox[{"DateString", "[", 132 | RowBox[{"{", 133 | RowBox[{ 134 | "\"\\"", ",", "\"\\"", ",", "\"\\"", ",", 135 | "\"\\"", ",", "\"\\"", ",", "\"\< \>\"", ",", 136 | "\"\\"", ",", "\"\<:\>\"", ",", "\"\\""}], "}"}], 137 | "]"}]}], ";", "\n", 138 | RowBox[{ 139 | RowBox[{"Table", "[", "\[IndentingNewLine]", 140 | RowBox[{ 141 | RowBox[{ 142 | RowBox[{"If", "[", 143 | RowBox[{ 144 | RowBox[{ 145 | RowBox[{"dd", "\[LeftDoubleBracket]", 146 | RowBox[{"i", ",", "2"}], "\[RightDoubleBracket]"}], ">", 147 | RowBox[{ 148 | "lastplay", "\[LeftDoubleBracket]", "i", 149 | "\[RightDoubleBracket]"}]}], ",", "\n", 150 | RowBox[{"Write", "[", 151 | RowBox[{ 152 | RowBox[{ 153 | "stream", "\[LeftDoubleBracket]", "i", "\[RightDoubleBracket]"}], 154 | ",", 155 | RowBox[{"OutputForm", "[", 156 | RowBox[{"StringJoin", "[", 157 | RowBox[{"Riffle", "[", 158 | RowBox[{ 159 | RowBox[{"Prepend", "[", 160 | RowBox[{ 161 | RowBox[{"ToString", "/@", 162 | RowBox[{ 163 | "dd", "\[LeftDoubleBracket]", "i", 164 | "\[RightDoubleBracket]"}]}], ",", "da"}], "]"}], ",", 165 | "\"\<,\>\""}], "]"}], "]"}], "]"}]}], "]"}]}], 166 | "\[IndentingNewLine]", "]"}], ";", "\[IndentingNewLine]", 167 | RowBox[{ 168 | RowBox[{ 169 | "lastplay", "\[LeftDoubleBracket]", "i", "\[RightDoubleBracket]"}], 170 | "=", 171 | RowBox[{"dd", "\[LeftDoubleBracket]", 172 | RowBox[{"i", ",", "2"}], "\[RightDoubleBracket]"}]}]}], ",", "\n", 173 | RowBox[{"{", 174 | RowBox[{"i", ",", "no"}], "}"}]}], "]"}], "\n", 175 | RowBox[{"Pause", "[", "60", "]"}]}]}]}], "\n", "]"}]], "Input", 176 | CellChangeTimes->{{3.728717713627672*^9, 3.7287177502522163`*^9}, 177 | 3.7290694878045874`*^9, {3.7312280902827373`*^9, 3.7312281477154827`*^9}, { 178 | 3.7312281822306223`*^9, 3.7312282063568425`*^9}, {3.731228244607533*^9, 179 | 3.7312282474101434`*^9}, {3.731228388043501*^9, 3.731228390245373*^9}, 180 | 3.7312287181829004`*^9, {3.731228762799139*^9, 3.7312288356838646`*^9}, { 181 | 3.731228901202846*^9, 3.73122896790339*^9}, {3.7312290357547245`*^9, 182 | 3.7312290757951803`*^9}, {3.731229134008765*^9, 3.731229172688515*^9}, { 183 | 3.7312292840985594`*^9, 3.7312292880919495`*^9}, {3.731229324219702*^9, 184 | 3.7312293278732777`*^9}, {3.731229485212408*^9, 3.7312294870105467`*^9}, { 185 | 3.7312306799892464`*^9, 186 | 3.7312306850906363`*^9}},ExpressionUUID->"80406992-5e0e-4d9f-a712-\ 187 | 81e524035a59"], 188 | 189 | Cell[BoxData["$Aborted"], "Output", 190 | CellChangeTimes->{ 191 | 3.7312309001711707`*^9},ExpressionUUID->"765b2aa3-3c65-4155-9431-\ 192 | 223e849aa203"] 193 | }, Open ]], 194 | 195 | Cell["\:7a0b\:5e8f\:5c06\:65e0\:9650\:8fd0\:884c\:ff0c\:9700\:8981\:65f6\:624b\ 196 | \:52a8\:7528 Alt+, \:7ec8\:6b62\:8fd0\:884c", "Text", 197 | CellChangeTimes->{{3.7312283017215266`*^9, 198 | 3.7312283557590513`*^9}},ExpressionUUID->"be1d6cf2-5d72-4895-9cd8-\ 199 | 8d8ea6cbed57"], 200 | 201 | Cell[BoxData[ 202 | RowBox[{ 203 | RowBox[{ 204 | RowBox[{ 205 | RowBox[{"Close", "[", "#", "]"}], "&"}], "/@", "stream"}], ";"}]], "Input", 206 | CellChangeTimes->{{3.7287352053111906`*^9, 3.7287352121411915`*^9}, 207 | 3.729069348919593*^9, {3.731228276752705*^9, 3.731228293822445*^9}, { 208 | 3.73122953654716*^9, 209 | 3.731229537903629*^9}},ExpressionUUID->"118e0893-6f38-4f49-9916-\ 210 | 179689114705"] 211 | }, 212 | WindowSize->{1920, 961}, 213 | WindowMargins->{{-5, Automatic}, {Automatic, 0}}, 214 | FrontEndVersion->"11.2 for Microsoft Windows (64-bit) (2017\:5e7410\:67082\ 215 | \:65e5)", 216 | StyleDefinitions->"Default.nb" 217 | ] 218 | (* End of Notebook Content *) 219 | 220 | (* Internal cache information *) 221 | (*CellTagsOutline 222 | CellTagsIndex->{} 223 | *) 224 | (*CellTagsIndex 225 | CellTagsIndex->{} 226 | *) 227 | (*NotebookFileOutline 228 | Notebook[{ 229 | Cell[558, 20, 332, 5, 102, "Text",ExpressionUUID->"bceab26c-cfe2-4e9c-bd49-41fe4c8d3da1"], 230 | Cell[893, 27, 863, 20, 190, "Input",ExpressionUUID->"94f6d68a-1ed7-4e3f-9b00-bab15e00cfeb"], 231 | Cell[1759, 49, 225, 4, 102, "Text",ExpressionUUID->"7add0a1f-a90c-473f-a0c1-e96efbbb2abd"], 232 | Cell[1987, 55, 876, 21, 376, "Input",ExpressionUUID->"05d0891f-2e21-49c0-b441-fe2c39aed65f"], 233 | Cell[2866, 78, 905, 27, 260, "Input",ExpressionUUID->"df9cead0-7f97-4ee5-94b8-884844db2515"], 234 | Cell[CellGroupData[{ 235 | Cell[3796, 109, 3251, 77, 782, "Input",ExpressionUUID->"80406992-5e0e-4d9f-a712-81e524035a59"], 236 | Cell[7050, 188, 139, 3, 97, "Output",ExpressionUUID->"765b2aa3-3c65-4155-9431-223e849aa203"] 237 | }, Open ]], 238 | Cell[7204, 194, 263, 4, 102, "Text",ExpressionUUID->"be1d6cf2-5d72-4895-9cd8-8d8ea6cbed57"], 239 | Cell[7470, 200, 378, 9, 85, "Input",ExpressionUUID->"118e0893-6f38-4f49-9916-179689114705"] 240 | } 241 | ] 242 | *) 243 | 244 | --------------------------------------------------------------------------------