├── .gitignore
├── README.md
├── ext
├── background.js
├── catgate.logo.png
├── catgate.logo.small.png
├── dist
│ ├── b02bdc1b846fd65473922f5f62832108.ttf
│ ├── build.js
│ ├── build.js.map
│ ├── catgate.logo.png
│ ├── catgate.logo.small.png
│ └── d2f69a92faa6fe990d2e613c358be705.woff
├── index.html
├── logo.png
└── manifest.json
├── intro.png
├── npm-debug.log
├── php
├── list.php
└── save.php
└── src
├── .babelrc
├── .gitignore
├── README.md
├── catgate.logo.png
├── index.html
├── package.json
├── src
├── App.vue
├── assets
│ └── catgate.logo.png
└── main.js
└── webpack.config.js
/.gitignore:
--------------------------------------------------------------------------------
1 | php/data
2 | src/node_modules
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CatGate
2 |
3 |
4 | CatGate 是一个基于浏览器插件的数据抓取工具。做成浏览器插件无需模拟登入,能最真实的模仿用户行为和特征。
5 |
6 | ## 演示视频
7 |
8 | http://weibo.com/1088413295/F9P1izhY2
9 |
10 | ## 功能
11 |
12 | 
13 |
14 | - ① CatGate 会访问这个URL,这个URL应该返回一个Json格式的数组,每一个元素一个字符串(即网址)。点 ② 按钮即可载入数据,并可按 ③ 进行查看,会自动排重。
15 | - ⑤ CatGate 抓取到数据后,会直接 POST 给这个URL,key 为 content , 通过 $_REQUEST['content'] 即可获取抓取内容。
16 |
17 | - ⑥ 默认一秒抓取一次,如果抓取失败太多,可以增加秒数。
18 |
19 | - ④ 这是一个高级用法,就是 ① 这个 URL 可以不停的新增 URL,CatGate 会自动每隔一秒(可设置)去抓取并合并进来。
20 |
21 |
22 | ## Demo
23 |
24 | 进入 `php` 目录,运行 `php -S localhost:8000` 即可测试。
25 |
26 | - list.php 这个页面生成了要抓取的页面列表,默认是微博收藏的页面。
27 | - save.php 这个页面定义了如何处置抓到的数据,默认是保持成为文件。
28 |
29 |
30 | ## 安装插件
31 |
32 | 商店地址:https://chrome.google.com/webstore/detail/catgate/nncgefdjnpnipajdfnindaiockdadpab
33 |
34 | 如果你不想或不能在Chrome商店安装插件,可手工安装。
35 |
36 | chrome > extentsion > load unpacked extension > ./ext
37 |
38 |
39 | # License
40 |
41 | CC,保留签名、非商用。
42 |
43 |
44 |
--------------------------------------------------------------------------------
/ext/background.js:
--------------------------------------------------------------------------------
1 | chrome.browserAction.onClicked.addListener(function(activeTab)
2 | {
3 | //chrome.tabs.update( activeTab.id , { url: 'index.html?url='+activeTab.url } );
4 | chrome.tabs.create({ url: 'index.html?url='+activeTab.url });
5 |
6 |
7 | });
8 |
9 | chrome.runtime.onInstalled.addListener(function (object)
10 | {
11 | chrome.tabs.create({ url: 'index.html' });
12 | });
--------------------------------------------------------------------------------
/ext/catgate.logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/ext/catgate.logo.png
--------------------------------------------------------------------------------
/ext/catgate.logo.small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/ext/catgate.logo.small.png
--------------------------------------------------------------------------------
/ext/dist/b02bdc1b846fd65473922f5f62832108.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/ext/dist/b02bdc1b846fd65473922f5f62832108.ttf
--------------------------------------------------------------------------------
/ext/dist/catgate.logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/ext/dist/catgate.logo.png
--------------------------------------------------------------------------------
/ext/dist/catgate.logo.small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/ext/dist/catgate.logo.small.png
--------------------------------------------------------------------------------
/ext/dist/d2f69a92faa6fe990d2e613c358be705.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/ext/dist/d2f69a92faa6fe990d2e613c358be705.woff
--------------------------------------------------------------------------------
/ext/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CatGate
6 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/ext/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/ext/logo.png
--------------------------------------------------------------------------------
/ext/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version": 2,
3 |
4 | "name": "CatGate",
5 | "description": "基于浏览器插件的数据抓取小工具",
6 | "version": "0.91",
7 |
8 | "permissions": [
9 | "http://*/*","https://*/*",
10 | "tabs",
11 | "notifications",
12 | "alarms"
13 | ],
14 | "browser_action": {
15 | "default_icon": "catgate.logo.small.png"
16 | }
17 | ,
18 | "background":
19 | {
20 | "scripts": ["background.js"]
21 | },
22 | "web_accessible_resources": [
23 | "catgate.logo.small.png"
24 | ]
25 |
26 | }
--------------------------------------------------------------------------------
/intro.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/intro.png
--------------------------------------------------------------------------------
/npm-debug.log:
--------------------------------------------------------------------------------
1 | 0 info it worked if it ends with ok
2 | 1 verbose cli [ '/usr/local/bin/node', '/usr/local/bin/npm', 'run', 'build' ]
3 | 2 info using npm@2.11.3
4 | 3 info using node@v0.12.7
5 | 4 verbose config Skipping project config: /Users/Easy/.npmrc. (matches userconfig)
6 | 5 verbose stack Error: ENOENT, open '/Users/Easy/package.json'
7 | 5 verbose stack at Error (native)
8 | 6 verbose cwd /Users/Easy/Code/gitcode/catgate
9 | 7 error Darwin 16.6.0
10 | 8 error argv "/usr/local/bin/node" "/usr/local/bin/npm" "run" "build"
11 | 9 error node v0.12.7
12 | 10 error npm v2.11.3
13 | 11 error path /Users/Easy/package.json
14 | 12 error code ENOENT
15 | 13 error errno -2
16 | 14 error enoent ENOENT, open '/Users/Easy/package.json'
17 | 14 error enoent This is most likely not a problem with npm itself
18 | 14 error enoent and is related to npm not being able to find a file.
19 | 15 verbose exit [ -2, true ]
20 |
--------------------------------------------------------------------------------
/php/list.php:
--------------------------------------------------------------------------------
1 | 0 )
14 | {
15 | $fname = 'data/' . microtime_float() . '.txt';
16 | file_put_contents( $fname , $content );
17 |
18 | die('save to ' . $fname . ' url = ' . $_REQUEST['url'] );
19 | }
20 | else
21 | {
22 | die('empty');
23 | }
24 |
25 |
26 |
27 | function microtime_float()
28 | {
29 | list($usec, $sec) = explode(" ", microtime());
30 | return ((float)$usec + (float)$sec);
31 | }
32 |
--------------------------------------------------------------------------------
/src/.babelrc:
--------------------------------------------------------------------------------
1 | {
2 | "presets": [
3 | ["latest", {
4 | "es2015": { "modules": false }
5 | }]
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules/
3 | dist/
4 | npm-debug.log
5 | yarn-error.log
6 |
--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
1 | # Fangtang-Chrome-Starter-Kit
2 |
3 | ## Build Setup
4 |
5 | ``` bash
6 | # install dependencies
7 | npm install
8 |
9 | # serve with hot reload at localhost:8080
10 | npm run dev
11 |
12 | # build for production with minification
13 | npm run build
14 | ```
15 |
16 | For detailed explanation on how things work, consult the [docs for vue-loader](http://vuejs.github.io/vue-loader).
17 |
--------------------------------------------------------------------------------
/src/catgate.logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/src/catgate.logo.png
--------------------------------------------------------------------------------
/src/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | wecat
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/src/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "wecat",
3 | "description": "weibo anlysis tools",
4 | "version": "1.0.0",
5 | "author": "EasyChen ",
6 | "private": true,
7 | "scripts": {
8 | "dev": "cross-env NODE_ENV=development webpack-dev-server --open --hot",
9 | "build": "cross-env NODE_ENV=production webpack --progress --hide-modules"
10 | },
11 | "dependencies": {
12 | "element-ui": "^1.2.7",
13 | "jquery": "^3.2.1",
14 | "vue": "^2.2.1"
15 | },
16 | "devDependencies": {
17 | "babel-core": "^6.0.0",
18 | "babel-loader": "^6.0.0",
19 | "babel-preset-latest": "^6.0.0",
20 | "cross-env": "^3.0.0",
21 | "css-loader": "^0.25.0",
22 | "style-loader": "^0.13.1",
23 | "file-loader": "^0.9.0",
24 | "vue-loader": "^11.1.4",
25 | "vue-template-compiler": "^2.2.1",
26 | "webpack": "^2.2.0",
27 | "webpack-dev-server": "^2.2.0"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/src/src/App.vue:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |

5 |
6 |
7 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | 载入列表数据 查看列表数据 [{{url_num}}] 自动多次载入列表
27 |
28 |
29 |
30 |
33 |
34 |
35 |
36 |
37 | {{auto_text}}
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | {{continue_text}}
55 |
56 | 重新抓取{{bad_urls.length}}条失败的URL
57 |
58 |
59 |
60 |
61 |
62 |
63 | {{flog}}
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
210 |
211 |
283 |
--------------------------------------------------------------------------------
/src/src/assets/catgate.logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/catgate/c2f31af80fe50653b77c251582f8645b6245c79f/src/src/assets/catgate.logo.png
--------------------------------------------------------------------------------
/src/src/main.js:
--------------------------------------------------------------------------------
1 | import Vue from 'vue'
2 | import App from './App.vue'
3 | import ElementUI from 'element-ui'
4 | import 'element-ui/lib/theme-default/index.css'
5 |
6 | Vue.use(ElementUI)
7 |
8 | new Vue({
9 | el: '#app',
10 | render: h => h(App)
11 | })
12 |
--------------------------------------------------------------------------------
/src/webpack.config.js:
--------------------------------------------------------------------------------
1 | var path = require('path')
2 | var webpack = require('webpack')
3 |
4 | module.exports = {
5 | entry: './src/main.js',
6 | output: {
7 | path: path.resolve(__dirname, '../ext/dist'),
8 | publicPath: '/dist/',
9 | filename: 'build.js'
10 | },
11 | module: {
12 | rules: [
13 | {
14 | test: /\.vue$/,
15 | loader: 'vue-loader',
16 | options: {
17 | loaders: {
18 | }
19 | // other vue-loader options go here
20 | }
21 | },
22 | {
23 | test: /\.js$/,
24 | loader: 'babel-loader',
25 | exclude: /node_modules/
26 | },
27 | {
28 | test: /\.css$/,
29 | loader: 'style-loader!css-loader'
30 | },
31 | {
32 | test: /\.(eot|svg|ttf|woff|woff2)(\?\S*)?$/,
33 | loader: 'file-loader'
34 | },
35 | {
36 | test: /\.(png|jpe?g|gif|svg)(\?\S*)?$/,
37 | loader: 'file-loader',
38 | query: {
39 | name: '[name].[ext]?[hash]'
40 | }
41 | }
42 | ]
43 | },
44 | resolve: {
45 | alias: {
46 | 'vue$': 'vue/dist/vue.esm.js'
47 | }
48 | },
49 | devServer: {
50 | historyApiFallback: true,
51 | noInfo: true
52 | },
53 | performance: {
54 | hints: false
55 | },
56 | devtool: '#eval-source-map'
57 | }
58 |
59 | if (process.env.NODE_ENV === 'production') {
60 | module.exports.devtool = '#source-map'
61 | // http://vue-loader.vuejs.org/en/workflow/production.html
62 | module.exports.plugins = (module.exports.plugins || []).concat([
63 | new webpack.DefinePlugin({
64 | 'process.env': {
65 | NODE_ENV: '"production"'
66 | }
67 | }),
68 | new webpack.optimize.UglifyJsPlugin({
69 | sourceMap: true,
70 | compress: {
71 | warnings: false
72 | }
73 | }),
74 | new webpack.LoaderOptionsPlugin({
75 | minimize: true
76 | })
77 | ])
78 | }
79 |
--------------------------------------------------------------------------------