├── .babelrc ├── .eslintrc ├── .gitignore ├── .prettierrc ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── _locales ├── en │ └── messages.json └── zh_CN │ └── messages.json ├── manifest.json ├── package.json ├── public ├── GitHub-Mark-Light-64px.png ├── icon.png └── logo.png ├── screenshot ├── 1.jpg ├── 2.jpg ├── 3.jpg ├── 4.jpg └── 5.jpg ├── scripts ├── build.js ├── dev.js └── dist.js ├── src ├── code │ ├── nodejs.ts │ └── python.ts ├── components │ ├── CodeTab.tsx │ ├── DemoAndInstruction.tsx │ ├── Editor.tsx │ ├── PreviewTab.tsx │ └── SelectOverview.tsx ├── containers │ ├── DetailPanelContainer.tsx │ ├── OverviewPanelContainer.tsx │ ├── PanelContainer.tsx │ └── PopupContainer.tsx ├── entry │ ├── background.ts │ ├── content.ts │ ├── panel.tsx │ └── popup.tsx ├── models │ ├── app.ts │ ├── constant.ts │ └── types.ts ├── search │ ├── engine.ts │ ├── search.ts │ └── simpleSearch.ts ├── styles │ ├── editor.css │ ├── global.css │ └── panel.css └── utils │ ├── codeHeader.ts │ ├── connect.ts │ ├── message.ts │ ├── updateState.ts │ └── urls.ts ├── template └── index.html ├── test ├── index.html └── test.js ├── tsconfig.json ├── updates.json ├── webpack.config.js └── yarn.lock /.babelrc: -------------------------------------------------------------------------------- 1 | { 2 | "presets": [ 3 | // "@babel/preset-env", 4 | // "@babel/preset-react" 5 | "react-app" 6 | ], 7 | "plugins": [ 8 | // "@babel/plugin-proposal-class-properties", 9 | "react-hot-loader/babel" 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "react-app", 3 | "globals": { 4 | "chrome": "readonly" 5 | }, 6 | "ignorePatterns": "**/*.js" 7 | } 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | build/ 3 | dist/ 4 | -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "trailingComma": "es5", 4 | "requirePragma": false, 5 | "arrowParens": "always" 6 | } 7 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | "eslint.enable": false, 4 | "python.pythonPath": "C:\\dev\\python3\\python.exe" 5 | } 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Weicheng Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE.. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Version:0.4 Alpha 2 | 3 | Link to [Chrome Store](https://chrome.google.com/webstore/detail/%E6%95%B0%E6%8D%AE%E6%8A%93%E5%8F%96%E5%88%86%E6%9E%90%E5%B7%A5%E5%85%B7/kmghfpaenbmakjffjhjncacmhagadgbg?hl=zh-CN) 4 | 5 | # 数据抓取分析助手 (Chrome 插件版) 6 | 7 | 根据选择元素,自动生成最简 HTML 选择命令,用于爬虫网页内容分析 8 | 9 | ## 功能: 10 | 11 | - 自动生成最简 HTML 选择命令,用于爬虫网页内容分析 12 | - 查看选择元素属性 13 | - 生成参考代码 14 | 15 | # Scaping Helper 16 | 17 | Automatically generate CSS selector for web structure analysis 18 | 19 | ## Feature 20 | 21 | - Automatically generate CSS selector for web structure analysis 22 | - Inspect element attributes 23 | - Generate reference code 24 | 25 | ![1](screenshot/1.jpg) 26 | ![2](screenshot/2.jpg) 27 | ![3](screenshot/3.jpg) 28 | ![4](screenshot/4.jpg) 29 | ![5](screenshot/5.jpg) 30 | 31 | MIT LICENSE 32 | Contributor LordBread 33 | -------------------------------------------------------------------------------- /_locales/en/messages.json: -------------------------------------------------------------------------------- 1 | { 2 | "extName": { 3 | "message": "Scraping Helper" 4 | }, 5 | "extDescription": { 6 | "message": "Automatically generate CSS selector for web structure analysis" 7 | }, 8 | "feature": { 9 | "message": "Feature" 10 | }, 11 | "featureList": { 12 | "message": "Automatically generate CSS selector for web structure analysis|Inspect element attributes|Generate reference code" 13 | }, 14 | "update": { 15 | "message": "Updates" 16 | }, 17 | "note": { 18 | "message": "Note" 19 | }, 20 | "noteList": { 21 | "message": "This extension will influence website JS functionalities|Data from AJAX can not be download directly|Complex page structure will make extension unresponsive|Refresh webpage if strange things happen" 22 | }, 23 | "launch": { 24 | "message": "Launch" 25 | }, 26 | "cantLaunch": { 27 | "message": "This extension only functions in website" 28 | }, 29 | "version": { 30 | "message": "Version" 31 | }, 32 | "codeHeader": { 33 | "message": "This sample code is provided by Scraping helper Chrome Extension|Please respect to website rule and local law|Author of this extension is not responsible for any lawsuit" 34 | }, 35 | "demoPrepare": { 36 | "message": "Prepare" 37 | }, 38 | "demoPredict": { 39 | "message": "Predict" 40 | }, 41 | "demoLegend": { 42 | "message": "Indicator" 43 | }, 44 | "reset": { 45 | "message": "Reset" 46 | }, 47 | "selectInstruction": { 48 | "message": "Please click on web page element" 49 | }, 50 | "selector": { 51 | "message": "Selector" 52 | }, 53 | "selectCount": { 54 | "message": "Select count" 55 | }, 56 | "preview": { 57 | "message": "Preview" 58 | }, 59 | "sampleCode": { 60 | "message": "Sample code" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /_locales/zh_CN/messages.json: -------------------------------------------------------------------------------- 1 | { 2 | "extName": { 3 | "message": "数据抓取分析工具" 4 | }, 5 | "extDescription": { 6 | "message": "自动生成最简HTML选择命令,用于爬虫网页内容分析" 7 | }, 8 | "feature": { 9 | "message": "功能" 10 | }, 11 | "featureList": { 12 | "message": "自动生成最简HTML选择命令,用于爬虫网页内容分析|查看选择元素属性|生成参考代码" 13 | }, 14 | "update": { 15 | "message": "更新" 16 | }, 17 | "note": { 18 | "message": "说明" 19 | }, 20 | "noteList": { 21 | "message": "本插件使用期间会使部分网页JS失效|由JS载入的数据无法被直接下载|页面复杂可能导致无响应|遇到奇怪的行为请刷新页面" 22 | }, 23 | "launch": { 24 | "message": "启动" 25 | }, 26 | "cantLaunch": { 27 | "message": "无法在非网页页面下启动" 28 | }, 29 | "version": { 30 | "message": "版本" 31 | }, 32 | "codeHeader": { 33 | "message": "此代码由数据抓取分析助手提供,仅供参考|爬取数据时,请遵守网站爬虫规则与当地法律法规|因使用本插件而违反法律,作者概不负责" 34 | }, 35 | "demoPrepare": { 36 | "message": "准备选择" 37 | }, 38 | "demoPredict": { 39 | "message": "预测选择" 40 | }, 41 | "demoLegend": { 42 | "message": "示例" 43 | }, 44 | "reset": { 45 | "message": "重新选择" 46 | }, 47 | "selectInstruction": { 48 | "message": "请在页面上选择元素" 49 | }, 50 | "selector": { 51 | "message": "选择器" 52 | }, 53 | "selectCount": { 54 | "message": "选择数量" 55 | }, 56 | "preview": { 57 | "message": "预览" 58 | }, 59 | "sampleCode": { 60 | "message": "示例代码" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "manifest_version": 2, 3 | 4 | "name": "__MSG_extName__", 5 | "description": "__MSG_extDescription__", 6 | "version": "0.3", 7 | "icons": { "128": "icon.png" }, 8 | "browser_action": { 9 | "default_icon": "icon.png", 10 | "default_popup": "popup.html" 11 | }, 12 | "permissions": ["activeTab", "tabs"], 13 | "web_accessible_resources": ["icon.png", "panel.html"], 14 | "content_scripts": [ 15 | { 16 | "matches": ["http://*/*", "https://*/*"], 17 | "css": ["css/global.css"], 18 | "js": ["js/content.bundle.js"] 19 | } 20 | ], 21 | "default_locale": "en" 22 | } 23 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "scraping-helper-chrome-extension", 3 | "version": "0.4", 4 | "description": "自动生成最简HTML选择命令,用于爬虫网页内容分析", 5 | "scripts": { 6 | "test": "jest", 7 | "build": "node scripts/build.js", 8 | "dist": "node scripts/dist.js", 9 | "dev": "node scripts/dev.js" 10 | }, 11 | "repository": { 12 | "type": "git", 13 | "url": "git+https://github.com/huangwc94/scraping-helper-chrome-extension.git" 14 | }, 15 | "author": "", 16 | "license": "ISC", 17 | "bugs": { 18 | "url": "https://github.com/huangwc94/scraping-helper-chrome-extension/issues" 19 | }, 20 | "homepage": "https://github.com/huangwc94/scraping-helper-chrome-extension#readme", 21 | "devDependencies": { 22 | "@babel/core": "^7.12.17", 23 | "@babel/plugin-proposal-class-properties": "^7.12.13", 24 | "@babel/preset-env": "^7.12.17", 25 | "@babel/preset-react": "^7.12.13", 26 | "@types/lodash": "^4.14.170", 27 | "@types/prismjs": "^1.16.5", 28 | "archiver": "^5.3.0", 29 | "babel-eslint": "^10.1.0", 30 | "babel-loader": "^8.2.2", 31 | "babel-preset-react-app": "^10.0.0", 32 | "clean-webpack-plugin": "^3.0.0", 33 | "copy-webpack-plugin": "^7.0.0", 34 | "css-loader": "^5.0.2", 35 | "eslint": "^7.20.0", 36 | "eslint-config-react-app": "^6.0.0", 37 | "eslint-plugin-flowtype": "^5.2.2", 38 | "eslint-plugin-import": "^2.22.1", 39 | "eslint-plugin-jsx-a11y": "^6.4.1", 40 | "eslint-plugin-react": "^7.22.0", 41 | "eslint-plugin-react-hooks": "^4.2.0", 42 | "file-loader": "^6.2.0", 43 | "fs-extra": "^9.1.0", 44 | "html-loader": "^2.1.0", 45 | "html-webpack-plugin": "^5.2.0", 46 | "jest": "^27.0.1", 47 | "node-sass": "^4.14.1", 48 | "prettier": "^2.3.0", 49 | "sass-loader": "^11.0.1", 50 | "source-map-loader": "^2.0.1", 51 | "style-loader": "^2.0.0", 52 | "terser-webpack-plugin": "^5.1.1", 53 | "ts-loader": "^8.0.17", 54 | "typescript": "^4.3.2", 55 | "webpack": "^5.23.0", 56 | "webpack-cli": "^4.5.0", 57 | "webpack-dev-server": "^3.11.2" 58 | }, 59 | "dependencies": { 60 | "@hot-loader/react-dom": "^17.0.1", 61 | "@types/chrome": "0.0.132", 62 | "@types/react": "^17.0.2", 63 | "@types/react-dom": "^17.0.1", 64 | "antd": "^4.16.0", 65 | "cash-dom": "^8.1.0", 66 | "dva": "^2.4.1", 67 | "lodash": "^4.17.21", 68 | "prismjs": "^1.23.0", 69 | "react": "^17.0.1", 70 | "react-dom": "^17.0.1", 71 | "react-hot-loader": "^4.13.0", 72 | "react-simple-code-editor": "^0.11.0" 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /public/GitHub-Mark-Light-64px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/public/GitHub-Mark-Light-64px.png -------------------------------------------------------------------------------- /public/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/public/icon.png -------------------------------------------------------------------------------- /public/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/public/logo.png -------------------------------------------------------------------------------- /screenshot/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/screenshot/1.jpg -------------------------------------------------------------------------------- /screenshot/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/screenshot/2.jpg -------------------------------------------------------------------------------- /screenshot/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/screenshot/3.jpg -------------------------------------------------------------------------------- /screenshot/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/screenshot/4.jpg -------------------------------------------------------------------------------- /screenshot/5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/screenshot/5.jpg -------------------------------------------------------------------------------- /scripts/build.js: -------------------------------------------------------------------------------- 1 | // Do this as the first thing so that any code reading it knows the right env. 2 | process.env.BABEL_ENV = "production"; 3 | process.env.NODE_ENV = "production"; 4 | process.env.ASSET_PATH = "/"; 5 | 6 | var webpack = require("webpack"), 7 | config = require("../webpack.config"); 8 | 9 | delete config.chromeExtensionBoilerplate; 10 | 11 | config.mode = "production"; 12 | 13 | webpack(config, function(err) { 14 | if (err) throw err; 15 | }); 16 | -------------------------------------------------------------------------------- /scripts/dev.js: -------------------------------------------------------------------------------- 1 | // Do this as the first thing so that any code reading it knows the right env. 2 | process.env.BABEL_ENV = "development"; 3 | process.env.NODE_ENV = "development"; 4 | process.env.ASSET_PATH = "/"; 5 | 6 | var WebpackDevServer = require("webpack-dev-server"), 7 | webpack = require("webpack"), 8 | config = require("../webpack.config"), 9 | path = require("path"); 10 | 11 | var options = config.chromeExtensionBoilerplate || {}; 12 | var excludeEntriesToHotReload = options.notHotReload || []; 13 | 14 | for (var entryName in config.entry) { 15 | if (excludeEntriesToHotReload.indexOf(entryName) === -1) { 16 | config.entry[entryName] = [ 17 | "webpack-dev-server/client?http://localhost:3000", 18 | "webpack/hot/dev-server" 19 | ].concat(config.entry[entryName]); 20 | } 21 | } 22 | 23 | config.plugins = [new webpack.HotModuleReplacementPlugin()].concat( 24 | config.plugins || [] 25 | ); 26 | 27 | delete config.chromeExtensionBoilerplate; 28 | 29 | var compiler = webpack(config); 30 | 31 | var server = new WebpackDevServer(compiler, { 32 | https: false, 33 | hot: true, 34 | injectClient: false, 35 | writeToDisk: true, 36 | port: 3000, 37 | contentBase: path.join(__dirname, "../build"), 38 | publicPath: `http://localhost:3000`, 39 | headers: { 40 | "Access-Control-Allow-Origin": "*" 41 | }, 42 | disableHostCheck: true 43 | }); 44 | 45 | if (process.env.NODE_ENV === "development" && module.hot) { 46 | module.hot.accept(); 47 | } 48 | 49 | server.listen(3000); 50 | -------------------------------------------------------------------------------- /scripts/dist.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const { join } = require('path'); 3 | const archiver = require('archiver'); 4 | 5 | const distPath = join(__dirname, '..', 'dist'); 6 | const srcPath = join(__dirname, '..', 'build'); 7 | 8 | if (!fs.existsSync(distPath)) { 9 | fs.mkdirSync(distPath); 10 | } 11 | 12 | const output = fs.createWriteStream(join(distPath, 'build.zip')); 13 | 14 | const archive = archiver('zip', { 15 | zlib: { level: 9 }, 16 | }); 17 | 18 | // listen for all archive data to be written 19 | // 'close' event is fired only when a file descriptor is involved 20 | output.on('close', function () { 21 | console.log(archive.pointer() + ' total bytes'); 22 | console.log( 23 | 'archiver has been finalized and the output file descriptor has closed.' 24 | ); 25 | }); 26 | 27 | // This event is fired when the data source is drained no matter what was the data source. 28 | // It is not part of this library but rather from the NodeJS Stream API. 29 | // @see: https://nodejs.org/api/stream.html#stream_event_end 30 | output.on('end', function () { 31 | console.log('Data has been drained'); 32 | }); 33 | 34 | // good practice to catch warnings (ie stat failures and other non-blocking errors) 35 | archive.on('warning', function (err) { 36 | if (err.code === 'ENOENT') { 37 | // log warning 38 | } else { 39 | // throw error 40 | throw err; 41 | } 42 | }); 43 | 44 | // good practice to catch this error explicitly 45 | archive.on('error', function (err) { 46 | throw err; 47 | }); 48 | 49 | // pipe archive data to the file 50 | archive.pipe(output); 51 | archive.directory(srcPath, false); 52 | // finalize the archive (ie we are done appending files but streams have to finish yet) 53 | // 'close', 'end' or 'finish' may be fired right after calling this method so register to them beforehand 54 | archive.finalize(); 55 | -------------------------------------------------------------------------------- /src/code/nodejs.ts: -------------------------------------------------------------------------------- 1 | import { createCodeHeader } from '../utils/codeHeader'; 2 | 3 | export function generateNodejsCode(selector: string, url: string): string { 4 | return `${createCodeHeader('//')} 5 | 6 | // Install: 7 | // npm install axios 8 | // npm install node-html-parser 9 | 10 | const axios = require('axios'); 11 | const { parse } = require('node-html-parser'); 12 | 13 | async function getData(selector, url) { 14 | const response = await axios.get(url); 15 | const root = parse(response.data); 16 | const elements = root.querySelectorAll(selector); 17 | elements.forEach((element) => { 18 | console.log('text', element.text); 19 | console.log('src', element.getAttribute('src')); 20 | console.log('href', element.getAttribute('href')); 21 | }); 22 | } 23 | 24 | getData('${selector}', '${url}'); 25 | 26 | `; 27 | } 28 | -------------------------------------------------------------------------------- /src/code/python.ts: -------------------------------------------------------------------------------- 1 | import { createCodeHeader } from '../utils/codeHeader'; 2 | 3 | export function generatePythonCode(selector: string, url: string): string { 4 | return `${createCodeHeader('#')} 5 | 6 | # Install: 7 | # pip install pyquery 8 | 9 | from pyquery import PyQuery as pq 10 | 11 | 12 | def get_data(selector, url): 13 | d = pq(url=url) 14 | elements = d(selector) 15 | for element in elements: 16 | ele = pq(element) 17 | print('text:', ele.text()) 18 | print('href:', ele.attr('href')) 19 | print('src:', ele.attr('src')) 20 | 21 | 22 | if __name__ == "__main__": 23 | get_data('${selector}', 24 | '${url}') 25 | `; 26 | } 27 | -------------------------------------------------------------------------------- /src/components/CodeTab.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Tabs } from 'antd'; 3 | import { generatePythonCode } from '../code/python'; 4 | import { generateNodejsCode } from '../code/nodejs'; 5 | 6 | import Editor from './Editor'; 7 | import { SelectData } from '../models/types'; 8 | import { connectApp } from '../utils/connect'; 9 | import { currentUrl } from '../utils/urls'; 10 | 11 | const { TabPane } = Tabs; 12 | 13 | const availableCode: { 14 | [id: string]: (selector: string, url: string) => string; 15 | } = { 16 | python: generatePythonCode, 17 | js: generateNodejsCode, 18 | }; 19 | 20 | const CodeTab = ({ app }: { app: SelectData }) => { 21 | return ( 22 | 23 | {Object.keys(availableCode).map((tp) => { 24 | return ( 25 | 26 | 30 | 31 | ); 32 | })} 33 | 34 | ); 35 | }; 36 | 37 | export default connectApp(CodeTab); 38 | -------------------------------------------------------------------------------- /src/components/DemoAndInstruction.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | export default () => { 4 | return ( 5 |
13 |

14 | {chrome.i18n.getMessage('note')} 15 |

16 | 26 |

27 | {' '} 28 | {chrome.i18n.getMessage('demoLegend')} 29 |

30 |
40 | 44 | {chrome.i18n.getMessage('demoPrepare')} 45 | 46 | 50 | {chrome.i18n.getMessage('demoPredict')} 51 | 52 |
53 |
54 | ); 55 | }; 56 | -------------------------------------------------------------------------------- /src/components/Editor.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import Editor from 'react-simple-code-editor'; 3 | //@ts-ignore 4 | import { highlight, languages } from 'prismjs/components/prism-core'; 5 | import 'prismjs/components/prism-clike'; 6 | import 'prismjs/components/prism-javascript'; 7 | import 'prismjs/components/prism-python'; 8 | import 'prismjs/components/prism-java'; 9 | import '../styles/editor.css'; 10 | 11 | export default ({ type, code }: { type: string; code: string }) => { 12 | return ( 13 |
14 | {}} 18 | highlight={(value) => highlight(value, languages[type])} 19 | padding={10} 20 | style={{ 21 | fontFamily: 22 | 'SFMono-Regular, Consolas, Liberation Mono, Menlo, "Fira code", "Fira Mono", monospace', 23 | fontSize: 18, 24 | }} 25 | /> 26 |
27 | ); 28 | }; 29 | -------------------------------------------------------------------------------- /src/components/PreviewTab.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import $ from 'cash-dom'; 3 | import { connectApp } from '../utils/connect'; 4 | import { SelectData } from '../models/types'; 5 | import { Radio, Divider } from 'antd'; 6 | 7 | const options = [ 8 | { label: 'HTML', value: 'HTML' }, 9 | { label: 'src', value: 'src' }, 10 | { label: 'href', value: 'href' }, 11 | { label: 'text', value: 'text' }, 12 | ]; 13 | 14 | const Preview = ({ app }: { app: SelectData }) => { 15 | const [displayOption, setDisplayOption] = useState(options[0].value); 16 | 17 | const renderHtml = () => { 18 | return app.htmls.map((html, idx) => { 19 | return ( 20 |
25 | ); 26 | }); 27 | }; 28 | 29 | const renderAttr = (attr: string) => { 30 | return app.htmls.map((html, idx) => { 31 | const element = $(html)[0]; 32 | const href = element && element.getAttribute(attr); 33 | return ( 34 |
35 | {attr}: {href} 36 |
37 | ); 38 | }); 39 | }; 40 | 41 | const renderText = () => { 42 | return app.htmls.map((html, idx) => { 43 | return ( 44 |
45 | {$(html).text()} 46 |
47 | ); 48 | }); 49 | }; 50 | 51 | const renderDisplay = () => { 52 | switch (displayOption) { 53 | case 'src': 54 | return renderAttr('src'); 55 | case 'href': 56 | return renderAttr('href'); 57 | case 'text': 58 | return renderText(); 59 | default: 60 | return renderHtml(); 61 | } 62 | }; 63 | 64 | return ( 65 |
66 |
67 | setDisplayOption(e.target.value)} 70 | value={displayOption} 71 | optionType="button" 72 | buttonStyle="solid" 73 | /> 74 |
75 | 76 |
84 | {renderDisplay()} 85 |
86 |
87 | ); 88 | }; 89 | 90 | export default connectApp(Preview); 91 | -------------------------------------------------------------------------------- /src/components/SelectOverview.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { SelectData } from '../models/types'; 3 | import { Button, Descriptions, Tag, Alert, Divider } from 'antd'; 4 | import { connectApp } from '../utils/connect'; 5 | import { sendUpstreamMessage } from '../utils/message'; 6 | import { InternalMessageType } from '../models/constant'; 7 | 8 | function SelectOverview({ app }: { app: SelectData }) { 9 | const onReset = () => { 10 | sendUpstreamMessage(InternalMessageType.CLEAR); 11 | }; 12 | 13 | return ( 14 |
15 | 16 | 17 | 18 | {app.selectedCount} 19 | 20 | 21 | 22 | 29 | {app.suggestSelector} 30 | 31 | 32 | 33 | 34 | {app.selected ? ( 35 | 38 | ) : ( 39 | 44 | )} 45 |
46 | ); 47 | } 48 | 49 | export default connectApp(SelectOverview); 50 | -------------------------------------------------------------------------------- /src/containers/DetailPanelContainer.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Row, Col, Divider, Tabs } from 'antd'; 3 | 4 | import DemoAndInstruction from '../components/DemoAndInstruction'; 5 | import SelectOverview from '../components/SelectOverview'; 6 | import PreviewTab from '../components/PreviewTab'; 7 | import CodeTab from '../components/CodeTab'; 8 | 9 | const { TabPane } = Tabs; 10 | 11 | export default () => { 12 | return ( 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | ); 31 | }; 32 | -------------------------------------------------------------------------------- /src/containers/OverviewPanelContainer.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { SelectData } from '../models/types'; 3 | import DemoAndInstruction from '../components/DemoAndInstruction'; 4 | import SelectOverview from '../components/SelectOverview'; 5 | import { Divider } from 'antd'; 6 | 7 | export default () => { 8 | return ( 9 |
10 | 11 | 12 | 13 |
14 | ); 15 | }; 16 | -------------------------------------------------------------------------------- /src/containers/PanelContainer.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import 'antd/dist/antd.css'; 3 | import 'antd/dist/antd.dark.css'; 4 | import '../styles/panel.css'; 5 | 6 | import { Card, Button } from 'antd'; 7 | import { InternalMessageType } from '../models/constant'; 8 | import { sendUpstreamMessage } from '../utils/message'; 9 | import { LeftOutlined, RightOutlined, CloseOutlined } from '@ant-design/icons'; 10 | import DetailPanelContainer from './DetailPanelContainer'; 11 | 12 | import OverviewPanelContainer from './OverviewPanelContainer'; 13 | 14 | export default () => { 15 | const [expanded, setExpanded] = useState(false); 16 | 17 | const toggleExpand = () => { 18 | setExpanded(!expanded); 19 | sendUpstreamMessage(InternalMessageType.EXPAND, !expanded); 20 | }; 21 | 22 | const close = () => { 23 | sendUpstreamMessage(InternalMessageType.STOP); 24 | }; 25 | 26 | return ( 27 | 38 |
46 | } 47 | > 48 | {expanded ? : } 49 | 50 | ); 51 | }; 52 | -------------------------------------------------------------------------------- /src/containers/PopupContainer.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState, useEffect } from 'react'; 2 | import { MessageType } from '../models/constant'; 3 | import 'antd/dist/antd.css'; 4 | import 'antd/dist/antd.dark.css'; 5 | 6 | import { Button, Layout, Card } from 'antd'; 7 | import updates from '../../updates.json'; 8 | import { sendMessage } from '../utils/message'; 9 | import { getCurrentUrl } from '../utils/urls'; 10 | 11 | const { Header, Content } = Layout; 12 | 13 | export default () => { 14 | const onStartClicked = () => { 15 | sendMessage(MessageType.START); 16 | window.close(); 17 | }; 18 | 19 | const [canStart, setCanStart] = useState(false); 20 | 21 | useEffect(() => { 22 | getCurrentUrl().then((url) => { 23 | setCanStart(url.startsWith('http') || url.startsWith('https')); 24 | }); 25 | }); 26 | 27 | return ( 28 |
29 | 30 |
39 | 40 |
41 | 48 | {chrome.i18n.getMessage('extName')} 49 | 50 | v{process.env.npm_package_version} 51 | 52 | 53 |
54 | 65 | 70 | 71 |
72 | 73 | 74 | 75 |

{chrome.i18n.getMessage('feature')}

76 |
    77 | {chrome.i18n 78 | .getMessage('featureList') 79 | .split('|') 80 | .map((feature) => ( 81 |
  • 82 |
    {feature}
    83 |
  • 84 | ))} 85 |
86 |

{chrome.i18n.getMessage('note')}

87 |
    88 | {chrome.i18n 89 | .getMessage('noteList') 90 | .split('|') 91 | .map((feature) => ( 92 |
  • 93 |
    {feature}
    94 |
  • 95 | ))} 96 |
97 |

{chrome.i18n.getMessage('update')}

98 |
    99 | {updates.map((feature) => ( 100 |
  • 101 |
    {feature}
    102 |
  • 103 | ))} 104 |
105 |
106 | 118 |
119 |
120 |
121 | ); 122 | }; 123 | -------------------------------------------------------------------------------- /src/entry/background.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huangwc94/scraping-helper-chrome-extension/858ebc3d44690cccb320ca577c02b80813afd78d/src/entry/background.ts -------------------------------------------------------------------------------- /src/entry/content.ts: -------------------------------------------------------------------------------- 1 | import { MessageType, InternalMessageType } from '../models/constant'; 2 | import { listenMessage, listenInternalMessage } from '../utils/message'; 3 | import { SHEngine } from '../search/engine'; 4 | let started = false; 5 | 6 | let engine = new SHEngine(); 7 | 8 | listenMessage((event: MessageType, payload: any) => { 9 | switch (event) { 10 | case MessageType.START: 11 | initialize(); 12 | break; 13 | } 14 | }); 15 | 16 | listenInternalMessage((event: InternalMessageType, payload: any) => { 17 | switch (event) { 18 | case InternalMessageType.EXPAND: 19 | expand(payload); 20 | break; 21 | case InternalMessageType.STOP: 22 | stop(); 23 | break; 24 | case InternalMessageType.CLEAR: 25 | engine.reset(); 26 | break; 27 | } 28 | }); 29 | 30 | function stop() { 31 | if (!started) { 32 | return; 33 | } 34 | started = false; 35 | const frame = document.getElementById('sh-iframe'); 36 | if (!!frame) { 37 | frame.remove(); 38 | } 39 | engine.unbind(); 40 | } 41 | 42 | function initialize() { 43 | if (started) { 44 | return; 45 | } 46 | started = true; 47 | const frame = document.createElement('iframe'); 48 | frame.src = chrome.extension.getURL('panel.html'); 49 | frame.id = 'sh-iframe'; 50 | frame.scrolling = 'no'; 51 | frame.className = 'sh-frame-no-expand'; 52 | frame.draggable = true; 53 | document.body.appendChild(frame); 54 | engine.bind(); 55 | } 56 | 57 | function expand(expand: boolean) { 58 | const frame = document.getElementById('sh-iframe'); 59 | if (!!frame) { 60 | if (expand) { 61 | frame.className = 'sh-frame-expand'; 62 | } else { 63 | frame.className = 'sh-frame-no-expand'; 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/entry/panel.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import '../styles/global.css'; 3 | import PanelContainer from '../containers/PanelContainer'; 4 | import dva from 'dva'; 5 | import { AppModel } from '../models/app'; 6 | import { listenInternalMessage } from '../utils/message'; 7 | import { InternalMessageType } from '../models/constant'; 8 | 9 | const app = dva(); 10 | 11 | app.model(AppModel); 12 | 13 | listenInternalMessage((event: InternalMessageType, payload: any) => { 14 | if (event === InternalMessageType.UPDATE) { 15 | //@ts-ignore 16 | app._store.dispatch({ type: 'app/update', payload }); 17 | } 18 | }); 19 | 20 | app.router(() => { 21 | return ; 22 | }); 23 | 24 | app.start('#app-container'); 25 | 26 | //@ts-ignore 27 | if (module.hot) module.hot.accept(); 28 | -------------------------------------------------------------------------------- /src/entry/popup.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { render } from 'react-dom'; 3 | import '../styles/global.css'; 4 | import PopupContainer from '../containers/PopupContainer'; 5 | 6 | render(, window.document.querySelector('#app-container')); 7 | 8 | //@ts-ignore 9 | if (module.hot) module.hot.accept(); 10 | -------------------------------------------------------------------------------- /src/models/app.ts: -------------------------------------------------------------------------------- 1 | import { Model } from 'dva'; 2 | import { SelectData, GetDefaultData } from './types'; 3 | import { UpdateState } from '../utils/updateState'; 4 | 5 | export interface AppModelType extends Model { 6 | state: SelectData; 7 | } 8 | 9 | export const AppModel: AppModelType = { 10 | namespace: 'app', 11 | state: GetDefaultData(), 12 | reducers: { 13 | update: UpdateState(), 14 | }, 15 | }; 16 | -------------------------------------------------------------------------------- /src/models/constant.ts: -------------------------------------------------------------------------------- 1 | export enum MessageType { 2 | START = 'START', 3 | } 4 | 5 | export enum InternalMessageType { 6 | STOP = 'STOP', 7 | EXPAND = 'EXPAND', 8 | UPDATE = 'UPDATE', 9 | CLEAR = 'CLEAR', 10 | } 11 | 12 | export const SH_HOVER_CLASS = 'sh-hover'; 13 | export const SH_SELECT_CLASS = 'sh-select'; 14 | export const SH_PREDICT_CLASS = 'sh-predict'; 15 | 16 | export const LANGUAGE_STORAGE_KEY = 'sh-language'; 17 | -------------------------------------------------------------------------------- /src/models/types.ts: -------------------------------------------------------------------------------- 1 | export interface SelectData { 2 | selectedCount: number; 3 | selected: boolean; 4 | suggestSelector: string; 5 | htmls: string[]; 6 | success: boolean; 7 | } 8 | 9 | export type DataElement = HTMLElement & Element & Node; 10 | 11 | export interface AppState { 12 | app: SelectData; 13 | } 14 | 15 | export function mapAppStateToProps({ app }: { app: SelectData }) { 16 | return { app }; 17 | } 18 | 19 | export function GetDefaultData() { 20 | return { 21 | selected: false, 22 | success: false, 23 | selectedCount: 0, 24 | suggestSelector: '', 25 | htmls: [], 26 | }; 27 | } 28 | -------------------------------------------------------------------------------- /src/search/engine.ts: -------------------------------------------------------------------------------- 1 | import $ from 'cash-dom'; 2 | import { GetDefaultData } from '../models/types'; 3 | import { SearchBase } from './search'; 4 | import { updateSelection } from '../utils/message'; 5 | import { SimpleSearch } from './simpleSearch'; 6 | import { SH_HOVER_CLASS, SH_PREDICT_CLASS } from '../models/constant'; 7 | 8 | enum EngineState { 9 | NOT_START = 'NOT_START', 10 | SELECT = 'SELECT', 11 | ANALYZE = 'ANALYZE', 12 | } 13 | 14 | const SH_SEARCHABLE_ELEMENT = ['*']; 15 | // const SH_SEARCHABLE_ELEMENT = [ 16 | // 'a', 17 | // 'h1', 18 | // 'h2', 19 | // 'h3', 20 | // 'h4', 21 | // 'h5', 22 | // 'h6', 23 | // 'p', 24 | // 'span', 25 | // 'li', 26 | // 'td', 27 | // 'img', 28 | // ]; 29 | 30 | export class SHEngine { 31 | private state: EngineState; 32 | 33 | private search: SearchBase; 34 | 35 | constructor() { 36 | this.state = EngineState.NOT_START; 37 | this.search = new SimpleSearch(); 38 | } 39 | 40 | bind() { 41 | SH_SEARCHABLE_ELEMENT.forEach((tag) => { 42 | $(tag).on('mouseover', (e: Event) => { 43 | e.stopPropagation(); 44 | e.preventDefault(); 45 | if (this.state !== EngineState.SELECT) { 46 | return; 47 | } 48 | e.target && this.onElementHover(e.target); 49 | }); 50 | 51 | $(tag).on('click', (e: Event) => { 52 | e.stopPropagation(); 53 | e.preventDefault(); 54 | if (this.state !== EngineState.SELECT) { 55 | return; 56 | } 57 | e.target && this.onElementClick(e.target); 58 | }); 59 | }); 60 | 61 | this.state = EngineState.SELECT; 62 | } 63 | 64 | unbind() { 65 | SH_SEARCHABLE_ELEMENT.forEach((tag) => { 66 | $(tag).off(); 67 | }); 68 | 69 | this.reset(); 70 | this.state = EngineState.NOT_START; 71 | } 72 | 73 | reset() { 74 | this.clearClass(SH_HOVER_CLASS); 75 | this.clearClass(SH_PREDICT_CLASS); 76 | this.state = EngineState.SELECT; 77 | updateSelection(GetDefaultData()); 78 | } 79 | 80 | clearClass(className: string) { 81 | $(`.${className}`).removeClass(className); 82 | } 83 | 84 | onElementClick(element: EventTarget) { 85 | // remove old select & predict class 86 | 87 | this.clearClass(SH_PREDICT_CLASS); 88 | 89 | // set internal state 90 | this.state = EngineState.ANALYZE; 91 | 92 | // replace sh-hover with sh-select 93 | const ele = $(element as HTMLElement); 94 | const selectElements = this.search.select(ele); 95 | 96 | this.clearClass(SH_HOVER_CLASS); 97 | 98 | // calculate predicts 99 | const result = this.search.predict(ele, selectElements); 100 | result.elements.addClass(SH_PREDICT_CLASS); 101 | 102 | const copyHtml = result.elements.clone(); 103 | 104 | copyHtml.removeClass(SH_PREDICT_CLASS); 105 | copyHtml.removeClass(SH_HOVER_CLASS); 106 | const htmls: string[] = []; 107 | copyHtml.each((_, ele) => { 108 | htmls.push(ele.outerHTML); 109 | }); 110 | 111 | updateSelection({ 112 | selected: true, 113 | selectedCount: result.elements.length, 114 | suggestSelector: result.suggestSelector, 115 | htmls, 116 | success: result.success, 117 | }); 118 | } 119 | 120 | onElementHover(element: EventTarget) { 121 | this.clearClass(SH_HOVER_CLASS); 122 | const a = $(element as HTMLElement); 123 | const sameElements = this.search.select(a); 124 | 125 | sameElements.addClass(SH_HOVER_CLASS); 126 | 127 | updateSelection({ 128 | selected: false, 129 | selectedCount: sameElements.length, 130 | suggestSelector: '', 131 | htmls: [], 132 | success: false, 133 | }); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/search/search.ts: -------------------------------------------------------------------------------- 1 | import $, { Cash } from 'cash-dom'; 2 | 3 | export interface PredictResult { 4 | elements: Cash; 5 | suggestSelector: string; 6 | 7 | success: boolean; 8 | } 9 | 10 | export class SearchBase { 11 | getFullPath(e: Cash): string { 12 | const parents = e.parentsUntil('body'); 13 | const tagNames: string[] = !!e[0] ? [e[0].tagName.toLocaleLowerCase()] : []; 14 | parents.each((i, ele) => { 15 | tagNames.push(ele.tagName.toLocaleLowerCase()); 16 | }); 17 | return tagNames.reverse().join(' '); 18 | } 19 | 20 | // getChildren(e: Cash): string[] {} 21 | 22 | predict(e: Cash, referenceElements: Cash): PredictResult { 23 | return { 24 | elements: $([]), 25 | suggestSelector: '', 26 | success: true, 27 | }; 28 | } 29 | 30 | select(e: Cash): Cash { 31 | const pathName = this.getFullPath(e); 32 | if (e.length > 0) { 33 | const element = e[0]; 34 | const sameElements: Node[] = []; 35 | element && 36 | $(element.tagName.toLocaleLowerCase()).each((i, ele) => { 37 | const comparePathName = this.getFullPath($(ele)); 38 | if (pathName === comparePathName) { 39 | sameElements.push(ele); 40 | } 41 | }); 42 | return $(sameElements); 43 | } else { 44 | return $(); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/search/simpleSearch.ts: -------------------------------------------------------------------------------- 1 | import { SearchBase, PredictResult } from './search'; 2 | import $, { Cash } from 'cash-dom'; 3 | import { 4 | SH_SELECT_CLASS, 5 | SH_PREDICT_CLASS, 6 | SH_HOVER_CLASS, 7 | } from '../models/constant'; 8 | 9 | export class SimpleSearch extends SearchBase { 10 | private _currentBest: string = ''; 11 | private _currentDiff: number = Number.MAX_VALUE; 12 | private _referenceElements: Cash = $([]); 13 | constructor() { 14 | super(); 15 | } 16 | private resetSearch() { 17 | this._currentBest = ''; 18 | this._currentDiff = Number.MAX_VALUE; 19 | this._referenceElements = $([]); 20 | } 21 | 22 | predict(e: Cash, referenceElements: Cash): PredictResult { 23 | let targetElement = e[0]; 24 | 25 | if (!targetElement) { 26 | return { 27 | elements: $([]), 28 | suggestSelector: '', 29 | success: true, 30 | }; 31 | } 32 | 33 | this.resetSearch(); 34 | 35 | this._referenceElements = referenceElements; 36 | let previousPath: string[] = []; 37 | let searchNode = targetElement; 38 | let solutionFound = false; 39 | 40 | while ( 41 | !solutionFound && 42 | searchNode.tagName.toLocaleLowerCase() !== 'body' 43 | ) { 44 | const candidates = this.getElementCombination(searchNode, previousPath); 45 | 46 | for (let i = 0; i < candidates.length; i++) { 47 | solutionFound = this.checkResult(candidates[i]); 48 | if (solutionFound) { 49 | return { 50 | elements: $(this._currentBest), 51 | suggestSelector: this._currentBest, 52 | success: solutionFound, 53 | }; 54 | } 55 | } 56 | 57 | previousPath.push(searchNode.tagName.toLocaleLowerCase()); 58 | 59 | const p = $(searchNode).parent()[0]; 60 | if (!!p) { 61 | searchNode = p; 62 | } else { 63 | break; 64 | } 65 | } 66 | 67 | return { 68 | elements: $(this._currentBest), 69 | suggestSelector: this._currentBest, 70 | success: solutionFound, 71 | }; 72 | } 73 | 74 | getElementCombination(e: Element, previous: string[]): string[] { 75 | const tagName = e.tagName.toLocaleLowerCase(); 76 | const res: string[] = []; 77 | 78 | if (previous.length === 0) { 79 | res.push(tagName); 80 | e.classList.forEach((cls) => { 81 | if ( 82 | ![SH_SELECT_CLASS, SH_PREDICT_CLASS, SH_HOVER_CLASS].includes(cls) 83 | ) { 84 | res.push(`${tagName}.${cls}`); 85 | } 86 | }); 87 | } else { 88 | previous.forEach((previousSelector) => { 89 | res.push(`${previousSelector} ${tagName}`); 90 | e.classList.forEach((cls) => { 91 | if ( 92 | ![SH_SELECT_CLASS, SH_PREDICT_CLASS, SH_HOVER_CLASS].includes(cls) 93 | ) { 94 | res.push(`${tagName}.${cls} ${previousSelector}`); 95 | } 96 | }); 97 | }); 98 | } 99 | return res; 100 | } 101 | 102 | checkResult(currentSelector: string): boolean { 103 | const currentElements = $(currentSelector); 104 | 105 | const diff = Math.abs( 106 | currentElements.length - this._referenceElements.length 107 | ); 108 | 109 | if (this._currentDiff > diff) { 110 | this._currentBest = currentSelector; 111 | this._currentDiff = diff; 112 | } 113 | 114 | if (diff === 0) { 115 | return true; 116 | } 117 | 118 | return false; 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/styles/editor.css: -------------------------------------------------------------------------------- 1 | pre[class*='language-'], 2 | code[class*='language-'] { 3 | color: #d4d4d4; 4 | font-size: 13px; 5 | text-shadow: none; 6 | font-family: Menlo, Monaco, Consolas, 'Andale Mono', 'Ubuntu Mono', 7 | 'Courier New', monospace; 8 | direction: ltr; 9 | text-align: left; 10 | white-space: pre; 11 | word-spacing: normal; 12 | word-break: normal; 13 | line-height: 1.5; 14 | -moz-tab-size: 4; 15 | -o-tab-size: 4; 16 | tab-size: 4; 17 | -webkit-hyphens: none; 18 | -moz-hyphens: none; 19 | -ms-hyphens: none; 20 | hyphens: none; 21 | } 22 | 23 | pre[class*='language-']::selection, 24 | code[class*='language-']::selection, 25 | pre[class*='language-'] *::selection, 26 | code[class*='language-'] *::selection { 27 | text-shadow: none; 28 | background: #75a7ca; 29 | } 30 | 31 | @media print { 32 | pre[class*='language-'], 33 | code[class*='language-'] { 34 | text-shadow: none; 35 | } 36 | } 37 | 38 | pre[class*='language-'] { 39 | padding: 1em; 40 | margin: 0.5em 0; 41 | overflow: auto; 42 | background: #1e1e1e; 43 | } 44 | 45 | :not(pre) > code[class*='language-'] { 46 | padding: 0.1em 0.3em; 47 | border-radius: 0.3em; 48 | color: #db4c69; 49 | background: #f9f2f4; 50 | } 51 | /********************************************************* 52 | * Tokens 53 | */ 54 | .namespace { 55 | opacity: 0.7; 56 | } 57 | 58 | .token.doctype .token.doctype-tag { 59 | color: #569cd6; 60 | } 61 | 62 | .token.doctype .token.name { 63 | color: #9cdcfe; 64 | } 65 | 66 | .token.comment, 67 | .token.prolog { 68 | color: #6a9955; 69 | } 70 | 71 | .token.punctuation, 72 | .language-html .language-css .token.punctuation, 73 | .language-html .language-javascript .token.punctuation { 74 | color: #d4d4d4; 75 | } 76 | 77 | .token.property, 78 | .token.tag, 79 | .token.boolean, 80 | .token.number, 81 | .token.constant, 82 | .token.symbol, 83 | .token.inserted, 84 | .token.unit { 85 | color: #b5cea8; 86 | } 87 | 88 | .token.selector, 89 | .token.attr-name, 90 | .token.string, 91 | .token.char, 92 | .token.builtin, 93 | .token.deleted { 94 | color: #ce9178; 95 | } 96 | 97 | .language-css .token.string.url { 98 | text-decoration: underline; 99 | } 100 | 101 | .token.operator, 102 | .token.entity { 103 | color: #d4d4d4; 104 | } 105 | 106 | .token.operator.arrow { 107 | color: #569cd6; 108 | } 109 | 110 | .token.atrule { 111 | color: #ce9178; 112 | } 113 | 114 | .token.atrule .token.rule { 115 | color: #c586c0; 116 | } 117 | 118 | .token.atrule .token.url { 119 | color: #9cdcfe; 120 | } 121 | 122 | .token.atrule .token.url .token.function { 123 | color: #dcdcaa; 124 | } 125 | 126 | .token.atrule .token.url .token.punctuation { 127 | color: #d4d4d4; 128 | } 129 | 130 | .token.keyword { 131 | color: #569cd6; 132 | } 133 | 134 | .token.keyword.module, 135 | .token.keyword.control-flow { 136 | color: #c586c0; 137 | } 138 | 139 | .token.function, 140 | .token.function .token.maybe-class-name { 141 | color: #dcdcaa; 142 | } 143 | 144 | .token.regex { 145 | color: #d16969; 146 | } 147 | 148 | .token.important { 149 | color: #569cd6; 150 | } 151 | 152 | .token.italic { 153 | font-style: italic; 154 | } 155 | 156 | .token.constant { 157 | color: #9cdcfe; 158 | } 159 | 160 | .token.class-name, 161 | .token.maybe-class-name { 162 | color: #4ec9b0; 163 | } 164 | 165 | .token.console { 166 | color: #9cdcfe; 167 | } 168 | 169 | .token.parameter { 170 | color: #9cdcfe; 171 | } 172 | 173 | .token.interpolation { 174 | color: #9cdcfe; 175 | } 176 | 177 | .token.punctuation.interpolation-punctuation { 178 | color: #569cd6; 179 | } 180 | 181 | .token.boolean { 182 | color: #569cd6; 183 | } 184 | 185 | .token.property, 186 | .token.variable, 187 | .token.imports .token.maybe-class-name, 188 | .token.exports .token.maybe-class-name { 189 | color: #9cdcfe; 190 | } 191 | 192 | .token.selector { 193 | color: #d7ba7d; 194 | } 195 | 196 | .token.escape { 197 | color: #d7ba7d; 198 | } 199 | 200 | .token.tag { 201 | color: #569cd6; 202 | } 203 | 204 | .token.tag .token.punctuation { 205 | color: #808080; 206 | } 207 | 208 | .token.cdata { 209 | color: #808080; 210 | } 211 | 212 | .token.attr-name { 213 | color: #9cdcfe; 214 | } 215 | 216 | .token.attr-value, 217 | .token.attr-value .token.punctuation { 218 | color: #ce9178; 219 | } 220 | 221 | .token.attr-value .token.punctuation.attr-equals { 222 | color: #d4d4d4; 223 | } 224 | 225 | .token.entity { 226 | color: #569cd6; 227 | } 228 | 229 | .token.namespace { 230 | color: #4ec9b0; 231 | } 232 | /********************************************************* 233 | * Language Specific 234 | */ 235 | 236 | pre[class*='language-javascript'], 237 | code[class*='language-javascript'], 238 | pre[class*='language-jsx'], 239 | code[class*='language-jsx'], 240 | pre[class*='language-typescript'], 241 | code[class*='language-typescript'], 242 | pre[class*='language-tsx'], 243 | code[class*='language-tsx'] { 244 | color: #9cdcfe; 245 | } 246 | 247 | pre[class*='language-css'], 248 | code[class*='language-css'] { 249 | color: #ce9178; 250 | } 251 | 252 | pre[class*='language-html'], 253 | code[class*='language-html'] { 254 | color: #d4d4d4; 255 | } 256 | 257 | .language-regex .token.anchor { 258 | color: #dcdcaa; 259 | } 260 | 261 | .language-html .token.punctuation { 262 | color: #808080; 263 | } 264 | /********************************************************* 265 | * Line highlighting 266 | */ 267 | pre[data-line] { 268 | position: relative; 269 | } 270 | 271 | pre[class*='language-'] > code[class*='language-'] { 272 | position: relative; 273 | z-index: 1; 274 | } 275 | 276 | .line-highlight { 277 | position: absolute; 278 | left: 0; 279 | right: 0; 280 | padding: inherit 0; 281 | margin-top: 1em; 282 | background: #f7ebc6; 283 | box-shadow: inset 5px 0 0 #f7d87c; 284 | z-index: 0; 285 | pointer-events: none; 286 | line-height: inherit; 287 | white-space: pre; 288 | } 289 | -------------------------------------------------------------------------------- /src/styles/global.css: -------------------------------------------------------------------------------- 1 | #sh-iframe { 2 | position: fixed; 3 | right: 20px; 4 | top: 20px; 5 | z-index: 2147483647; 6 | overflow: hidden !important; 7 | border: 0px none rgb(0, 0, 0); 8 | /*transition: -webkit-transform 300ms ease, opacity 300ms ease;*/ 9 | transition: width 0.5s, height 0.5s; 10 | } 11 | 12 | .sh-frame-no-expand { 13 | width: 300px; 14 | height: 650px; 15 | } 16 | 17 | .sh-frame-expand { 18 | width: 90vw; 19 | height: 90vh; 20 | } 21 | 22 | .sh-predict { 23 | border: 2px solid red; 24 | background-color: rgba(0, 153, 51, 0.8) !important; 25 | color: #fff !important; 26 | text-shoadow: none !important; 27 | } 28 | 29 | .sh-hover { 30 | background-color: rgba(0, 204, 255, 0.5) !important; 31 | color: #fff !important; 32 | text-shoadow: none !important; 33 | } 34 | 35 | .sh-with-shadow { 36 | transition: box-shadow 0.3s; 37 | box-shadow: 0px 0px #888888; 38 | } 39 | 40 | .sh-with-shadow:hover { 41 | box-shadow: 3px 3px 20px white; 42 | } 43 | -------------------------------------------------------------------------------- /src/styles/panel.css: -------------------------------------------------------------------------------- 1 | .sh-panel .ant-card-head-title { 2 | font-size: 16px; 3 | } 4 | 5 | .sh-preview-cell { 6 | border-color: goldenrod; 7 | border-width: 2px; 8 | border-style: solid; 9 | padding: 5px; 10 | margin: 10px; 11 | } 12 | -------------------------------------------------------------------------------- /src/utils/codeHeader.ts: -------------------------------------------------------------------------------- 1 | export function createCodeHeader(commentSymbol: string): string { 2 | const notes = chrome.i18n.getMessage('codeHeader').split('|'); 3 | const header = notes.map((n) => commentSymbol + ' ' + n); 4 | 5 | return header.join('\n'); 6 | } 7 | -------------------------------------------------------------------------------- /src/utils/connect.ts: -------------------------------------------------------------------------------- 1 | import { ReactComponentLike } from 'prop-types'; 2 | import { connect } from 'dva'; 3 | import { mapAppStateToProps } from '../models/types'; 4 | 5 | export function connectApp(container: ReactComponentLike) { 6 | return connect(mapAppStateToProps)(container); 7 | } 8 | -------------------------------------------------------------------------------- /src/utils/message.ts: -------------------------------------------------------------------------------- 1 | import { MessageType, InternalMessageType } from '../models/constant'; 2 | import { SelectData, GetDefaultData } from '../models/types'; 3 | 4 | export function sendMessage(event: MessageType, payload: any = null) { 5 | chrome.tabs.query({ active: true, currentWindow: true }, function (tabs) { 6 | const tab = tabs[0]; 7 | if (!!tab && tab.id !== undefined) { 8 | chrome.tabs.sendMessage(tab.id, { 9 | SH_EVENT: true, 10 | event, 11 | payload, 12 | }); 13 | } 14 | }); 15 | } 16 | 17 | export function listenMessage(cb: (event: MessageType, payload: any) => void) { 18 | chrome.runtime.onMessage.addListener((message) => { 19 | if (message.SH_EVENT) { 20 | cb(message.event, message.payload); 21 | } 22 | }); 23 | } 24 | 25 | export function sendUpstreamMessage( 26 | event: InternalMessageType, 27 | payload: any = null 28 | ) { 29 | parent.postMessage( 30 | { 31 | SH_EVENT: true, 32 | event, 33 | payload, 34 | }, 35 | '*' 36 | ); 37 | } 38 | 39 | export function sendDownstreamMessage( 40 | event: InternalMessageType, 41 | payload: any = null 42 | ) { 43 | const frame = document.getElementById('sh-iframe'); 44 | if (!!frame) { 45 | //@ts-ignore 46 | frame.contentWindow.postMessage( 47 | { 48 | SH_EVENT: true, 49 | event, 50 | payload, 51 | }, 52 | '*' 53 | ); 54 | } 55 | } 56 | 57 | export function listenInternalMessage( 58 | cb: (event: InternalMessageType, payload: any) => void 59 | ) { 60 | window.addEventListener('message', (e) => { 61 | if (e.data.SH_EVENT) { 62 | cb(e.data.event, e.data.payload); 63 | } 64 | }); 65 | } 66 | 67 | export function updateSelection(select: SelectData) { 68 | sendDownstreamMessage(InternalMessageType.UPDATE, select); 69 | } 70 | -------------------------------------------------------------------------------- /src/utils/updateState.ts: -------------------------------------------------------------------------------- 1 | import _ from 'lodash'; 2 | import { AnyAction, Reducer } from 'redux'; 3 | 4 | export function UpdateState(): Reducer { 5 | return UpdateStateG; 6 | } 7 | 8 | export function UpdateStateG( 9 | oldState: T, 10 | { payload: newState }: AnyAction 11 | ): T { 12 | return { ...oldState, ...newState }; 13 | } 14 | -------------------------------------------------------------------------------- /src/utils/urls.ts: -------------------------------------------------------------------------------- 1 | let _currentUrl = ''; 2 | 3 | getCurrentUrl(); 4 | 5 | export function currentUrl(): string { 6 | return _currentUrl; 7 | } 8 | 9 | export function getCurrentUrl(): Promise { 10 | return new Promise((resolve) => { 11 | chrome.tabs.query({ active: true, currentWindow: true }, function (tabs) { 12 | const tab = tabs[0]; 13 | if (!!tab && tab.id !== undefined) { 14 | _currentUrl = tab.url || ''; 15 | resolve(tab.url || ''); 16 | } else { 17 | resolve(''); 18 | } 19 | }); 20 | }); 21 | } 22 | -------------------------------------------------------------------------------- /template/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Webpack App 6 | 7 | 8 |
9 | 10 | 11 | -------------------------------------------------------------------------------- /test/index.html: -------------------------------------------------------------------------------- 1 | 4 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | Scraping Helper Extension TestSet 21 | 22 | 23 | 24 |
25 | 28 | 31 | 32 |
33 |
34 |
35 | Default Doom 36 |
37 | Vestibulum at eros 38 |
39 |
40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
# Test Name Test Status Note
1 Test Name Test Status Note
2 Test Name Test Status Note
49 |
50 |
51 | 52 | fjoaijf 53 |
    54 |
  • fjoaijf 55 |
      56 |
    • asjbritjb
    • 57 |
    58 |
  • 59 |
60 | 61 | 62 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | $(function(){ 2 | 3 | }); -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es5", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": false, 6 | "skipLibCheck": true, 7 | "esModuleInterop": true, 8 | "allowSyntheticDefaultImports": true, 9 | "strict": true, 10 | "forceConsistentCasingInFileNames": true, 11 | "noFallthroughCasesInSwitch": true, 12 | "module": "esnext", 13 | "moduleResolution": "node", 14 | "resolveJsonModule": true, 15 | "noEmit": false, 16 | "jsx": "react" 17 | }, 18 | "include": ["src"], 19 | "exclude": ["build", "node_modules"] 20 | } 21 | -------------------------------------------------------------------------------- /updates.json: -------------------------------------------------------------------------------- 1 | ["使用React+Antd重写", "增加Python, JS示例代码", "Add english translation!"] 2 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | var webpack = require('webpack'), 2 | path = require('path'), 3 | { CleanWebpackPlugin } = require('clean-webpack-plugin'), 4 | CopyWebpackPlugin = require('copy-webpack-plugin'), 5 | HtmlWebpackPlugin = require('html-webpack-plugin'), 6 | TerserPlugin = require('terser-webpack-plugin'); 7 | 8 | var alias = { 9 | 'react-dom': '@hot-loader/react-dom', 10 | }; 11 | 12 | var fileExtensions = [ 13 | 'jpg', 14 | 'jpeg', 15 | 'png', 16 | 'gif', 17 | 'eot', 18 | 'otf', 19 | 'svg', 20 | 'ttf', 21 | 'woff', 22 | 'woff2', 23 | ]; 24 | 25 | var options = { 26 | mode: process.env.NODE_ENV || 'development', 27 | entry: { 28 | popup: path.join(__dirname, 'src', 'entry', 'popup.tsx'), 29 | content: path.join(__dirname, 'src', 'entry', 'content.ts'), 30 | panel: path.join(__dirname, 'src', 'entry', 'panel.tsx'), 31 | background: path.join(__dirname, 'src', 'entry', 'background.ts'), 32 | }, 33 | chromeExtensionBoilerplate: { 34 | notHotReload: ['content'], 35 | }, 36 | output: { 37 | path: path.resolve(__dirname, 'build/js'), 38 | filename: '[name].bundle.js', 39 | publicPath: 'js/', 40 | }, 41 | module: { 42 | rules: [ 43 | { 44 | // look for .css or .scss files 45 | test: /\.(css|scss)$/, 46 | // in the `src` directory 47 | use: [ 48 | { 49 | loader: 'style-loader', 50 | }, 51 | { 52 | loader: 'css-loader', 53 | }, 54 | { 55 | loader: 'sass-loader', 56 | options: { 57 | sourceMap: true, 58 | }, 59 | }, 60 | ], 61 | }, 62 | { 63 | test: new RegExp('.(' + fileExtensions.join('|') + ')$'), 64 | loader: 'file-loader', 65 | options: { 66 | name: '[name].[ext]', 67 | }, 68 | exclude: /node_modules/, 69 | }, 70 | { 71 | test: /\.html$/, 72 | loader: 'html-loader', 73 | exclude: /node_modules/, 74 | }, 75 | { test: /\.(ts|tsx)$/, loader: 'ts-loader', exclude: /node_modules/ }, 76 | { 77 | test: /\.(js|jsx)$/, 78 | use: [ 79 | { 80 | loader: 'source-map-loader', 81 | }, 82 | { 83 | loader: 'babel-loader', 84 | }, 85 | ], 86 | exclude: /node_modules/, 87 | }, 88 | ], 89 | }, 90 | resolve: { 91 | alias: alias, 92 | extensions: fileExtensions 93 | .map((extension) => '.' + extension) 94 | .concat(['.js', '.jsx', '.ts', '.tsx', '.css']), 95 | }, 96 | plugins: [ 97 | new webpack.ProgressPlugin(), 98 | // clean the build folder 99 | new CleanWebpackPlugin({ 100 | verbose: true, 101 | cleanStaleWebpackAssets: true, 102 | }), 103 | // expose and write the allowed env vars on the compiled bundle 104 | new webpack.EnvironmentPlugin(['NODE_ENV', 'npm_package_version']), 105 | new CopyWebpackPlugin({ 106 | patterns: [ 107 | { 108 | from: 'manifest.json', 109 | to: path.join(__dirname, 'build'), 110 | force: true, 111 | transform: function (content) { 112 | return Buffer.from( 113 | JSON.stringify( 114 | { 115 | ...JSON.parse(content.toString()), 116 | version: process.env.npm_package_version, 117 | }, 118 | null, 119 | 2 120 | ) 121 | ); 122 | }, 123 | }, 124 | ], 125 | }), 126 | new CopyWebpackPlugin({ 127 | patterns: [ 128 | { 129 | from: 'src/styles/*.css', 130 | to: path.join(__dirname, 'build/css/[name].[ext]'), 131 | force: true, 132 | }, 133 | ], 134 | }), 135 | new CopyWebpackPlugin({ 136 | patterns: [ 137 | { 138 | from: 'public/*.png', 139 | to: path.join(__dirname, 'build/[name].[ext]'), 140 | force: true, 141 | }, 142 | ], 143 | }), 144 | new CopyWebpackPlugin({ 145 | patterns: [ 146 | { 147 | from: '_locales', 148 | to: path.join(__dirname, 'build/_locales'), 149 | force: true, 150 | }, 151 | ], 152 | }), 153 | new HtmlWebpackPlugin({ 154 | template: path.join(__dirname, 'template', 'index.html'), 155 | filename: path.join(__dirname, 'build', 'popup.html'), 156 | chunks: ['popup'], 157 | cache: false, 158 | }), 159 | new HtmlWebpackPlugin({ 160 | template: path.join(__dirname, 'template', 'index.html'), 161 | filename: path.join(__dirname, 'build', 'panel.html'), 162 | chunks: ['panel'], 163 | cache: false, 164 | }), 165 | ], 166 | infrastructureLogging: { 167 | level: 'info', 168 | }, 169 | }; 170 | 171 | if (process.env.NODE_ENV === 'development') { 172 | options.devtool = 'cheap-module-source-map'; 173 | } else { 174 | options.optimization = { 175 | minimize: true, 176 | minimizer: [ 177 | new TerserPlugin({ 178 | extractComments: false, 179 | }), 180 | ], 181 | }; 182 | } 183 | 184 | module.exports = options; 185 | --------------------------------------------------------------------------------