├── .gitignore
├── collaborators.md
├── contributing.md
├── examples
└── hoyt.js
├── index.js
├── package.json
├── readme.md
├── renderer.js
├── test
├── cats.html
├── cool.html
├── index.html
└── test.js
├── webview.js
└── window.html
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | socket-client-bundle.js
--------------------------------------------------------------------------------
/collaborators.md:
--------------------------------------------------------------------------------
1 | ## Collaborators
2 |
3 | electron-microscope is only possible due to the excellent work of the following collaborators:
4 |
5 |
8 | Error: Could not addkarissa on npm
9 |
--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
1 | ## tests
2 |
3 | Electron doesn't run on travis (to my knowledge), so please run `npm test` before making a pull request
--------------------------------------------------------------------------------
/examples/hoyt.js:
--------------------------------------------------------------------------------
1 | var createMicroscope = require('../')
2 | var electron = require('electron')
3 |
4 | electron.app.commandLine.appendSwitch('disable-http-cache', true)
5 |
6 | electron.app.on('ready', function () {
7 | createMicroscope(function (err, scope) {
8 | if (err) throw err
9 | // clears localstorage state
10 | scope.window.webContents.session.clearStorageData(function (err) {
11 | if (err) throw err
12 | scope.loadURL('http://hoytarboretum.gardenexplorer.org/taxalist.aspx', function (err) {
13 | if (err) throw err
14 | console.log('loaded home page')
15 | loop(scope)
16 | })
17 | })
18 | })
19 | })
20 |
21 | function loop (scope) {
22 | var data = scope.run(clickNextLetter)
23 | data.on('error', function (e) {
24 | console.error('Error:', e)
25 | scope.destroy()
26 | })
27 | scope.once('did-fail-load', function (error) {
28 | console.error('Failed to load', error)
29 | scope.destroy()
30 | })
31 | scope.once('did-finish-load', function () {
32 | var data = scope.run(getSpecies)
33 | data.on('data', function (d) {
34 | console.log('Species', d.toString() ? d.toString() : d)
35 | })
36 | data.on('finish', function () {
37 | console.log('go back')
38 | scope.window.webContents.executeJavaScript("document.querySelector('webview').goBack()")
39 | scope.once('did-fail-load', function (error) {
40 | console.error('Failed to go back', error)
41 | scope.destroy()
42 | })
43 | scope.once('did-finish-load', function () {
44 | loop(scope)
45 | })
46 | })
47 | })
48 | }
49 |
50 | // these two functions are executed on the page, .toString() is called on them!
51 | function getSpecies (send, done) {
52 | var species = document.querySelectorAll('.taxalist a b')
53 | for (var i = 0; i < species.length; i++) send(species[i].innerText)
54 | done()
55 | }
56 |
57 | function clickNextLetter (send, done) {
58 | var links = document.querySelectorAll('.content input[type="button"]')
59 | var lastClicked = window.localStorage.getItem('last-clicked')
60 | if (typeof lastClicked === 'undefined') lastClicked = 0
61 | else lastClicked = +lastClicked
62 | var link = links[lastClicked]
63 | if (!link) return done(new Error('clicked all links'))
64 | window.localStorage.setItem('last-clicked', ++lastClicked)
65 | link.click()
66 | done()
67 | }
68 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | var crypto = require('crypto')
2 | var path = require('path')
3 | var electron = require('electron')
4 | var through = require('through2')
5 | var events = require('events')
6 | var inherits = require('inherits')
7 | var debug = require('debug')('electron-microscope')
8 | var BrowserWindow = electron.BrowserWindow
9 |
10 | module.exports = Microscope
11 |
12 | function Microscope (opts, ready) {
13 | if (!(this instanceof Microscope)) return new Microscope(opts, ready)
14 | events.EventEmitter.call(this)
15 | var self = this
16 | if (typeof opts === 'function') {
17 | ready = opts
18 | opts = {}
19 | }
20 | this.opts = opts || {}
21 | this.window = new BrowserWindow({
22 | width: 800,
23 | height: 600,
24 | show: true
25 | })
26 | this.window.loadURL(path.join('file://', __dirname, 'window.html'))
27 | this.window.webContents.once('did-finish-load', function () {
28 | debug('did-finish-load window.html')
29 | ready(null, self)
30 | })
31 | this.window.webContents.once('did-fail-load', function (err) {
32 | debug('did-fail-load window.html', err)
33 | ready(err)
34 | })
35 | electron.ipcMain.on('webview-event', function (event, channel, data) {
36 | debug('webview-event', channel, data)
37 | self.emit(channel, data)
38 | })
39 | }
40 |
41 | inherits(Microscope, events.EventEmitter)
42 |
43 | Microscope.prototype.loadURL = function (url, cb) {
44 | debug('start loadURL', url)
45 | this.window.send('load-url', url)
46 | if (cb) {
47 | electron.ipcMain.once('webview-did-finish-load', function (event, error) {
48 | debug('finish loadURL', url, error || '')
49 | cb(error)
50 | })
51 | }
52 | }
53 |
54 | Microscope.prototype.run = function (code) {
55 | if (typeof code === 'function') code = code.toString()
56 | var outStream = through()
57 | var id = crypto.randomBytes(16).toString('hex')
58 | this.window.send('run', id, code)
59 | electron.ipcMain.on(id + '-send-data', function (event, data) {
60 | outStream.push(data)
61 | })
62 | electron.ipcMain.once(id + '-done-running', function (event, err) {
63 | if (err) outStream.destroy(err)
64 | else outStream.end()
65 | })
66 | return outStream
67 | }
68 |
69 | Microscope.prototype.destroy = function () {
70 | this.window.close()
71 | }
72 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "electron-microscope",
3 | "description": "Use electron to inspect websites and extract data. useful for automation, testing, web scraping, etc",
4 | "version": "2.0.0",
5 | "main": "index.js",
6 | "scripts": {
7 | "test": "standard && electron test/test.js"
8 | },
9 | "repository": {
10 | "type": "git",
11 | "url": "git+https://github.com/maxogden/electron-microscope.git"
12 | },
13 | "author": "",
14 | "license": "BSD-2-Clause",
15 | "bugs": {
16 | "url": "https://github.com/maxogden/electron-microscope/issues"
17 | },
18 | "homepage": "https://github.com/maxogden/electron-microscope#readme",
19 | "dependencies": {
20 | "debug": "^2.2.0",
21 | "domify": "^1.4.0",
22 | "inherits": "^2.0.1",
23 | "through2": "^2.0.1"
24 | },
25 | "standard": {
26 | "ignore": [
27 | "test/scrapers"
28 | ]
29 | },
30 | "devDependencies": {
31 | "concat-stream": "^1.5.1",
32 | "electron-prebuilt": "^0.36.7",
33 | "pump": "^1.0.1",
34 | "standard": "^6.0.4",
35 | "tape": "^4.4.0"
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # electron-microscope
2 |
3 | Use [electron](http://electron.atom.io/) to load websites and extract data. Intended for automation, testing, web scraping, etc.
4 |
5 | Loads URLs inside an electron [webview tag](https://github.com/atom/electron/blob/master/docs/api/web-view-tag.md), allows you to execute code on them and stream data from the pages back to your main process.
6 |
7 | Run this headlessly on Linux using `xvfb-run`.
8 |
9 | Please note this is intended to be a fairly low level library that tries to not add much on top of what Electron is doing under the hood, so things that you might think are simple to do can turn out to be relatively complex due to the way web browser events end up working.
10 |
11 | ## usage
12 |
13 | Use this in an electron app:
14 |
15 | ```js
16 | var electron = require('electron')
17 | var createMicroscope = require('electron-microscope')
18 |
19 | electron.app.on('ready', function () {
20 | createMicroscope(function (err, scope) {
21 | if (err) throw err
22 | // use your new microscope
23 | })
24 | })
25 | ```
26 |
27 | Run it with electron:
28 |
29 | ```sh
30 | $ npm install electron-prebuilt -g
31 | $ electron my-code.js
32 | ```
33 |
34 | ## examples
35 |
36 | See the `test/` and `examples/` folders
37 |
38 | ## API
39 |
40 | ### `require('electron-microscope')(options, ready)`
41 |
42 | Requiring the module returns a constructor function that you use to create a new instance. Pass it an `options` object and a `ready` callback that will be called with `(error, scope)`. `scope` is your new instance all ready to go.
43 |
44 | ### scope.window
45 |
46 | The electon [BrowserWindow](https://github.com/atom/electron/blob/master/docs/api/browser-window.md) instance, AKA the renderer, which contains the `` that pages are loaded in.
47 |
48 | Currently because there are three node processes at play (main, renderer, webview), to access `webview` APIs you have to go through the `window`, e.g.:
49 |
50 | ```js
51 | scope.window.webContents.executeJavaScript("document.querySelector('webview').goBack()")
52 | ```
53 |
54 | ### `scope.loadURL(url, cb)`
55 |
56 | Load a `url`, and call `cb` with `(err)` when loading is done. If there was a problem loading the page `err` will be the error, otherwise it means it loaded successfully
57 |
58 | ### `var outputStream = scope.run(code)`
59 |
60 | Run `code` on the currently loaded page. Run this after calling `loadURL`. Code must be a string, if it is a `function` then `.toString()` will be called on it. `scope.run` returns a readable stream that emits data generated by your code.
61 |
62 | Uses the [webview.executeJavascript](https://github.com/atom/electron/blob/master/docs/api/web-view-tag.md#webviewexecutejavascriptcode-usergesture) electron API, which doesn't provide an error handling mechamism. Electron microscope wraps your code in a `try/catch` and if an error occurs it will be emitted on the stream. However if you have a syntax error it will likely not catch it so it may appear nothing is happening.
63 |
64 | You code must be a function that has this template:
65 |
66 | ```js
67 | function (send, done) {
68 | // put your custom code here
69 | // call 'send(data)' to write data to the stream
70 | // call 'done()' to end the stream
71 | // calling send is optional, but you must eventually call done to end the stream
72 | }
73 | ```
74 |
75 | For example:
76 |
77 | ```js
78 | var code = `function (send, done) {
79 | for (var i = 0; i < 5; i++) send(i)
80 | done()
81 | }`
82 |
83 | var output = scope.run(code)
84 |
85 | output.on('data', function (data) {
86 | // will get called for every time send is called above
87 | // data will be the value passed to send
88 | // in this case 5 times: 1, 2, 3, 4, 5
89 | })
90 |
91 | output.on('error', function (error) {
92 | // will get called if your code throws an exception
93 | // error will be an object with .message and .stack from the thrown error object
94 | })
95 | ```
96 |
97 | ### scope.on('will-navigate', cb)
98 |
99 | Emitted the page wants to start navigation. It can happen when the window.location object is changed or a link is clicked in the page.
100 |
101 | Calls `cb` with `(url)`, forwarded from [this event](https://github.com/atom/electron/blob/master/docs/api/web-view-tag.md#event-will-navigate).
102 |
103 | ### scope.on('did-finish-load', cb)
104 |
105 | This event is like `did-finish-load`, but fired when the load failed or was cancelled.
106 |
107 | Calls `cb` with no arguments, forwarded from [this event](https://github.com/atom/electron/blob/master/docs/api/web-view-tag.md#event-did-finish-load).
108 |
109 | ### scope.on('did-fail-load', cb)
110 |
111 | This event is like `did-finish-load`, but fired when the load failed or was cancelled.
112 |
113 | Calls `cb` with `(error)`, forwarded from [this event](https://github.com/atom/electron/blob/master/docs/api/web-view-tag.md#event-did-fail-load).
114 |
115 | ### scope.on('did-start-loading', cb)
116 |
117 | Corresponds to the points in time when the spinner of the tab starts spinning.
118 |
119 | Calls `cb` with no arguments, forwarded from [this event](https://github.com/atom/electron/blob/master/docs/api/web-view-tag.md#event-did-start-loading).
120 |
121 | ### scope.on('did-stop-loading', cb)
122 |
123 | Corresponds to the points in time when the spinner of the tab stops spinning.
124 |
125 | Calls `cb` with no arguments, forwarded from [this event](https://github.com/atom/electron/blob/master/docs/api/web-view-tag.md#event-did-stop-loading).
126 |
127 | ### scope.destroy()
128 |
129 | Call when you don't want to use the scope anymore. Causes the `browser-window` elecron-microscope uses internally to close, which may cause your electron app to exit if you do not have any other active windows.
130 |
--------------------------------------------------------------------------------
/renderer.js:
--------------------------------------------------------------------------------
1 | var electron = require('electron')
2 | var domify = require('domify')
3 |
4 | module.exports = function () {
5 | electron.ipcRenderer.on('load-url', function (event, url) {
6 | var webview = domify('')
7 | document.body.innerHTML = ''
8 | document.body.appendChild(webview)
9 | webview.addEventListener('will-navigate', function (newUrl) {
10 | electron.ipcRenderer.send('webview-event', 'will-navigate', newUrl)
11 | })
12 | webview.addEventListener('did-finish-load', function () {
13 | electron.ipcRenderer.send('webview-event', 'did-finish-load')
14 | electron.ipcRenderer.send('webview-did-finish-load')
15 | })
16 | webview.addEventListener('did-fail-load', function (error) {
17 | electron.ipcRenderer.send('webview-event', 'did-fail-load', error)
18 | electron.ipcRenderer.send('webview-did-finish-load', error)
19 | })
20 | webview.addEventListener('did-start-loading', function () {
21 | electron.ipcRenderer.send('webview-event', 'did-start-loading')
22 | })
23 | webview.addEventListener('did-stop-loading', function () {
24 | electron.ipcRenderer.send('webview-event', 'did-stop-loading')
25 | })
26 | })
27 |
28 | electron.ipcRenderer.on('run', function (event, id, code) {
29 | var webview = document.querySelector('webview')
30 | webview.addEventListener('ipc-message', onIPC)
31 |
32 | function onIPC (event) {
33 | electron.ipcRenderer.send.apply(null, [id + '-' + event.channel].concat(event.args))
34 | if (event.channel === 'done-running') {
35 | webview.removeEventListener('ipc-message', onIPC)
36 | }
37 | }
38 |
39 | webview.executeJavaScript(ipcWrap(code))
40 | })
41 | }
42 |
43 | function ipcWrap (code) {
44 | return `;(function () {
45 | try {
46 | (${code})(ELECTRON_MICROSCOPE_SEND, ELECTRON_MICROSCOPE_DONE)
47 | } catch (err) {
48 | ELECTRON_MICROSCOPE_DONE(err)
49 | }
50 | })();
51 | `
52 | }
53 |
--------------------------------------------------------------------------------
/test/cats.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | electron-microscope cats page
6 |
7 |
8 | cats page
9 | go to cool page
10 |
11 |
--------------------------------------------------------------------------------
/test/cool.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | electron-microscope cool test page
6 |
7 |
8 | cool
9 | go to home page
10 |
11 |
--------------------------------------------------------------------------------
/test/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | electron-microscope test page
6 |
7 |
8 | bar
9 |
10 |
--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
1 | var test = require('tape')
2 | var concat = require('concat-stream')
3 | var pump = require('pump')
4 | var createMicroscope = require('../')
5 | var electron = require('electron')
6 | var execspawn = require('npm-execspawn')
7 |
8 | electron.app.commandLine.appendSwitch('disable-http-cache', true)
9 |
10 | var server, scope
11 |
12 | test('wait for electron', function (t) {
13 | electron.app.on('window-all-closed', function () {
14 | server.kill()
15 | server.on('close', function () {
16 | electron.app.quit()
17 | })
18 | })
19 | electron.app.on('ready', function () {
20 | t.ok(true, 'electron ready')
21 | t.end()
22 | })
23 | })
24 |
25 | test('start test server', function (t) {
26 | server = execspawn('http-server ./ -p 54321', {cwd: __dirname})
27 | server.stdout.once('data', function (ch) {
28 | if (ch.toString().indexOf('Starting up') > -1) t.ok(true, 'server started')
29 | else t.ok(false, ch)
30 | t.end()
31 | })
32 | })
33 |
34 | test('retrieve the innerText of a div', function (t) {
35 | createMicroscope(function (err, newScope) {
36 | scope = newScope
37 | if (err) t.ifError(err)
38 | scope.loadURL('http://localhost:54321', function (err) {
39 | if (err) t.ifError(err)
40 | var scraper = `function (send, done) {
41 | send(document.querySelector('.foo').innerText)
42 | done()
43 | }`
44 | var output = scope.run(scraper)
45 | output.pipe(concat(function (out) {
46 | t.equal(out.toString(), 'bar', 'output matched')
47 | t.end()
48 | }))
49 | })
50 | })
51 | })
52 |
53 | test('invalid code causes stream error', function (t) {
54 | scope.loadURL('http://localhost:54321/cool.html', function (err) {
55 | if (err) t.ifError(err)
56 | var code = 'function () { donkeys() }'
57 | var output = scope.run(code)
58 | var concatter = concat(function (out) {
59 | t.ok(false, 'should not get here')
60 | })
61 | pump(output, concatter, function (err) {
62 | t.equal(err.message, 'donkeys is not defined', 'got error message')
63 | t.ok(!!err.stack, 'error has .stack')
64 | t.end()
65 | })
66 | })
67 | })
68 |
69 | test('load a new page', function (t) {
70 | t.plan(4)
71 | scope.loadURL('http://localhost:54321/cats.html', function (err) {
72 | if (err) t.ifError(err)
73 | var scraper = `function (send, done) {
74 | document.querySelector('a.cool-button').click()
75 | done()
76 | }`
77 | var output = scope.run(scraper)
78 | output.pipe(concat(function (out) {
79 | t.equal(out.toString(), '', 'no output')
80 | }))
81 | scope.on('will-navigate', function (newUrl) {
82 | t.equal(newUrl.url, 'http://localhost:54321/cool.html', 'navigating to cool.html')
83 | })
84 | scope.on('did-finish-load', function () {
85 | t.ok(true, 'stopped loading')
86 | var coolScraper = `function (send, done) {
87 | send(document.querySelector('.foo').innerText)
88 | done()
89 | }`
90 | var coolOutput = scope.run(coolScraper)
91 | coolOutput.pipe(concat(function (out) {
92 | t.equal(out.toString(), 'cool', 'got cool')
93 | scope.destroy()
94 | }))
95 | })
96 | })
97 | })
98 |
--------------------------------------------------------------------------------
/webview.js:
--------------------------------------------------------------------------------
1 | window.ELECTRON_MICROSCOPE_IPC = require('ipc')
2 |
3 | window.ELECTRON_MICROSCOPE_SEND = function send (obj) {
4 | window.ELECTRON_MICROSCOPE_IPC.sendToHost('send-data', obj)
5 | }
6 |
7 | window.ELECTRON_MICROSCOPE_DONE = function done (error) {
8 | if (error && error.stack && error.message) {
9 | error = {message: error.message, stack: error.stack}
10 | }
11 | window.ELECTRON_MICROSCOPE_IPC.sendToHost('done-running', error)
12 | }
13 |
--------------------------------------------------------------------------------
/window.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------