├── .dockerignore ├── lib ├── tools │ ├── any.js │ └── wait.js ├── actions │ ├── ActionBack.js │ ├── ActionHasRedirect.js │ ├── ActionOpen.js │ ├── ActionSnapshot.js │ ├── ActionParse.js │ ├── ActionPause.js │ ├── ActionMouseClick.js │ ├── ActionMouseUp.js │ ├── ActionUrl.js │ ├── ActionMouseDown.js │ ├── ActionProvideRules.js │ ├── ActionWaitForPage.js │ ├── ActionNot.js │ ├── ActionWaitForQuery.js │ ├── ActionOr.js │ ├── ActionWaitForElement.js │ ├── ActionExist.js │ ├── ActionClick.js │ ├── ActionFocus.js │ ├── ActionBlur.js │ ├── ActionWait.js │ ├── ActionWaitForVisible.js │ ├── ActionWaitForPattern.js │ ├── ActionCondition.js │ ├── ActionChangeElement.js │ ├── ActionType.js │ ├── Action.js │ ├── ActionWaitForCases.js │ └── actionsFactory.js ├── transforms │ ├── TransformDecodeUri.js │ ├── TransformEncodeUri.js │ ├── TransformEqual.js │ ├── TransformBase64Decode.js │ ├── TransformTrim.js │ ├── TransformDecodeHtml.js │ ├── TransformPick.js │ ├── TransformPluck.js │ ├── TransformCompare.js │ ├── TransformGet.js │ ├── TransformJoin.js │ ├── TransformDate.js │ ├── TransformReplace.js │ ├── TransformCombine.js │ ├── TransformSplit.js │ ├── TransformMatch.js │ ├── Transform.js │ └── transformsFactory.js ├── Storage.js ├── Scope.js ├── Transforms.js ├── Actions.js └── Parser.js ├── docker ├── entrypoint.sh ├── publish.sh ├── build.js └── index.js ├── Dockerfile ├── webpack.config.js ├── docker-compose.yml ├── browser.js ├── .gitignore ├── test ├── unit │ ├── Storage.test.js │ ├── Scope.test.js │ ├── transforms │ │ ├── transformsFactory.test.js │ │ ├── Transform.test.js │ │ └── transforms.test.js │ └── Transforms.test.js ├── tools.js └── integration │ ├── parser.test.js │ └── actions │ └── actions.test.js ├── LICENSE ├── package.json ├── .circleci └── config.yml ├── README.md └── jest.config.js /.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !Dockerfile 3 | !package.json 4 | !yarn.lock 5 | !docker/index.js 6 | !docker/build.js 7 | !docker/entrypoint.sh 8 | -------------------------------------------------------------------------------- /lib/tools/any.js: -------------------------------------------------------------------------------- 1 | function reverse(promise) { 2 | return new Promise((resolve, reject) => Promise.resolve(promise).then(reject, resolve)); 3 | } 4 | 5 | module.exports = function promiseAny(iterable) { 6 | return reverse(Promise.all([...iterable].map(reverse))); 7 | }; 8 | -------------------------------------------------------------------------------- /lib/actions/ActionBack.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Navigates to previous page 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionBack extends Action { 10 | async perform () { 11 | return this._env.back(); 12 | } 13 | } 14 | 15 | module.exports = ActionBack; 16 | 17 | -------------------------------------------------------------------------------- /lib/actions/ActionHasRedirect.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | */ 5 | 6 | const Action = require('./Action'); 7 | 8 | class ActionHasRedirect extends Action { 9 | async perform() { 10 | return this._env.hasRedirect(this._options.url); 11 | } 12 | } 13 | 14 | module.exports = ActionHasRedirect; 15 | 16 | -------------------------------------------------------------------------------- /lib/actions/ActionOpen.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Opens a page 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionOpen extends Action { 10 | perform() { 11 | const { url } = this._options; 12 | return this._env.goto(url); 13 | } 14 | } 15 | 16 | module.exports = ActionOpen; 17 | 18 | -------------------------------------------------------------------------------- /lib/actions/ActionSnapshot.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Performs snapshot via action 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionSnapshot extends Action { 10 | async perform() { 11 | return this._env.snapshot(this._options.name); 12 | } 13 | } 14 | 15 | module.exports = ActionSnapshot; 16 | 17 | -------------------------------------------------------------------------------- /lib/transforms/TransformDecodeUri.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform decodes URI 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformDecodeUri extends Transform { 10 | doTransform() { 11 | return decodeURI(this._value); 12 | } 13 | } 14 | 15 | module.exports = TransformDecodeUri; 16 | -------------------------------------------------------------------------------- /lib/transforms/TransformEncodeUri.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform encodes URI 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformEncodeUri extends Transform { 10 | doTransform() { 11 | return encodeURI(this._value); 12 | } 13 | } 14 | 15 | module.exports = TransformEncodeUri; 16 | -------------------------------------------------------------------------------- /lib/actions/ActionParse.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Allows to parse passed rules by action 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionParse extends Action { 10 | async perform() { 11 | return this._parser.processRule(this._options.rules || {}); 12 | } 13 | } 14 | 15 | module.exports = ActionParse; 16 | 17 | -------------------------------------------------------------------------------- /lib/transforms/TransformEqual.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform compares value with options.value 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformEqual extends Transform { 10 | doTransform() { 11 | return this._value === this._options.value; 12 | } 13 | } 14 | 15 | module.exports = TransformEqual; 16 | -------------------------------------------------------------------------------- /docker/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | pipe=/tmp/goose-pipe 4 | if [ ! -p ${pipe} ]; then 5 | mkfifo ${pipe} 6 | fi 7 | 8 | # goose-parser uses stdout to deliver parsing result in cli mode 9 | # to do so, it forwards stdout -> /dev/null and pipe -> stdout 10 | # all debug/logs information is provided to stderr 11 | node index.js "$@" > /dev/null & 12 | cat ${pipe} 13 | 14 | -------------------------------------------------------------------------------- /lib/actions/ActionPause.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview Pause execution for certain number of seconds 3 | */ 4 | 5 | const Action = require('./Action'); 6 | 7 | class ActionPause extends Action { 8 | perform () { 9 | return new Promise(resolve => setTimeout(() => resolve(), this._options.timeout)); 10 | } 11 | } 12 | 13 | module.exports = ActionPause; 14 | 15 | -------------------------------------------------------------------------------- /lib/actions/ActionMouseClick.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform MouseClick on the element matched by selector 5 | * 6 | */ 7 | 8 | const Action = require('./Action'); 9 | 10 | class ActionMouseClick extends Action { 11 | async perform() { 12 | return this._env.mouseClick(this._selector); 13 | } 14 | } 15 | 16 | module.exports = ActionMouseClick; 17 | -------------------------------------------------------------------------------- /lib/transforms/TransformBase64Decode.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform decodes base64 string 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformBase64Decode extends Transform { 10 | doTransform() { 11 | return new Buffer(this._value, 'base64').toString('ascii'); 12 | } 13 | } 14 | 15 | module.exports = TransformBase64Decode; 16 | -------------------------------------------------------------------------------- /lib/transforms/TransformTrim.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform applies trim function to result 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformTrim extends Transform { 10 | doTransform() { 11 | return typeof this._value === 'string' ? this._value.trim() : this._value; 12 | } 13 | } 14 | 15 | module.exports = TransformTrim; 16 | -------------------------------------------------------------------------------- /lib/actions/ActionMouseUp.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform MouseUp on the element matched by selector 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionMouseUp extends Action { 10 | async perform() { 11 | this.log('mouseUp on %s', this._selector); 12 | return this._env.mouseUp(this._selector); 13 | } 14 | } 15 | 16 | module.exports = ActionMouseUp; 17 | 18 | -------------------------------------------------------------------------------- /lib/actions/ActionUrl.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Extracts current page url 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionUrl extends Action { 10 | async perform() { 11 | return this._env.evaluateJs(/* istanbul ignore next */ function () { 12 | return window.location.toString(); 13 | }); 14 | } 15 | } 16 | 17 | module.exports = ActionUrl; 18 | 19 | -------------------------------------------------------------------------------- /lib/transforms/TransformDecodeHtml.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform decodes HTML entity 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | const entities = require('html-entities').Html5Entities; 9 | 10 | class TransformDecodeHtml extends Transform { 11 | doTransform() { 12 | return entities.decode(this._value); 13 | } 14 | } 15 | 16 | module.exports = TransformDecodeHtml; 17 | -------------------------------------------------------------------------------- /lib/actions/ActionMouseDown.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform MouseDown on the element matched by selector 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionMouseDown extends Action { 10 | async perform() { 11 | this.log('mouseDown on %s', this._selector); 12 | return this._env.mouseDown(this._selector); 13 | } 14 | } 15 | 16 | module.exports = ActionMouseDown; 17 | 18 | -------------------------------------------------------------------------------- /lib/transforms/TransformPick.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform retrieves value from result array by options.prop 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | const pick = require('lodash.pick'); 9 | 10 | class TransformPick extends Transform { 11 | doTransform() { 12 | return pick(this._value, this._options.prop); 13 | } 14 | } 15 | 16 | module.exports = TransformPick; 17 | -------------------------------------------------------------------------------- /lib/transforms/TransformPluck.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform does a map by result array with field options.path 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | const pluck = require('lodash.pluck'); 9 | 10 | class TransformPluck extends Transform { 11 | doTransform() { 12 | return pluck(this._value, this._options.path); 13 | } 14 | } 15 | 16 | module.exports = TransformPluck; 17 | -------------------------------------------------------------------------------- /lib/transforms/TransformCompare.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform compare value with item, which is retrieved from storage by options.field name 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformCompare extends Transform { 10 | doTransform() { 11 | return this._value === this._storage.get(this._options.field); 12 | } 13 | } 14 | 15 | module.exports = TransformCompare; 16 | -------------------------------------------------------------------------------- /lib/actions/ActionProvideRules.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview Action for providing dynamic parsing rules as action result 3 | */ 4 | 5 | const Action = require('./Action'); 6 | 7 | class ActionProvideRules extends Action { 8 | async perform() { 9 | const rules = this._options.rules || {}; 10 | this.log('Providing rules %o', rules); 11 | return Promise.resolve(rules); 12 | } 13 | } 14 | 15 | module.exports = ActionProvideRules; 16 | 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG ENVIRONMENT 2 | ARG BASE="redcode/${ENVIRONMENT}" 3 | ARG ENVIRONMENT_VERSION=latest 4 | 5 | FROM ${BASE}:${ENVIRONMENT_VERSION} 6 | 7 | MAINTAINER Andrew Reddikh 8 | 9 | COPY docker/index.js /usr/src/app 10 | COPY docker/build.js /usr/src/app 11 | COPY docker/entrypoint.sh /usr/src/app 12 | RUN node ./build.js && rm ./build.js 13 | RUN yarn install --production --no-progress 14 | 15 | ENTRYPOINT ["/usr/src/app/entrypoint.sh"] 16 | -------------------------------------------------------------------------------- /lib/transforms/TransformGet.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform retrieves value from result by options.path 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | const get = require('lodash.get'); 9 | 10 | class TransformGet extends Transform { 11 | doTransform() { 12 | const defaultValue = this._options.default || ''; 13 | return get(this._value, this._options.path, defaultValue); 14 | } 15 | } 16 | 17 | module.exports = TransformGet; 18 | -------------------------------------------------------------------------------- /lib/transforms/TransformJoin.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform joins result array to string using options.glue 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformJoin extends Transform { 10 | doTransform() { 11 | const glue = this._options.glue !== undefined ? this._options.glue : ' '; 12 | return Array.isArray(this._value) ? this._value.join(glue) : this._value; 13 | } 14 | } 15 | 16 | module.exports = TransformJoin; 17 | -------------------------------------------------------------------------------- /lib/actions/ActionWaitForPage.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Wait for new page load 5 | */ 6 | 7 | const Action = require('./Action'); 8 | const { waitForEvent } = require('../tools/wait'); 9 | 10 | class ActionWaitForPage extends Action { 11 | async perform() { 12 | return waitForEvent( 13 | this._env, 14 | { type: 'navigation'}, 15 | this._options.breaker, 16 | this._options.timeout 17 | ); 18 | } 19 | } 20 | 21 | module.exports = ActionWaitForPage; 22 | 23 | -------------------------------------------------------------------------------- /lib/actions/ActionNot.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Inverts a result of the passed actions 5 | * 6 | */ 7 | 8 | const Action = require('./Action'); 9 | 10 | class ActionNot extends Action { 11 | async perform() { 12 | const actions = this._options.actions || []; 13 | const result = await this._actions.performActions(actions, this._selector); 14 | this.log('Inverting original result %s to %s', result, !result); 15 | return !result; 16 | } 17 | } 18 | 19 | module.exports = ActionNot; 20 | 21 | -------------------------------------------------------------------------------- /lib/transforms/TransformDate.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform transforms formatted date from one format to another using {@link http://momentjs.com momentjs} 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | const moment = require('moment'); 9 | 10 | class TransformDate extends Transform { 11 | doTransform() { 12 | return moment(this._value, this._options.from, this._options.locale || 'en') 13 | .format(this._options.to); 14 | } 15 | } 16 | 17 | module.exports = TransformDate; 18 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const webpack = require('webpack'); 2 | 3 | module.exports = { 4 | entry: './browser.js', 5 | output: { 6 | path: __dirname, 7 | filename: "build/browser.bundle.js" 8 | }, 9 | module: { 10 | loaders: [ 11 | { 12 | test: /\.js$/, 13 | exclude: /(node_modules|bower_components)/, 14 | loader: 'babel' 15 | } 16 | ] 17 | }, 18 | plugins: [ 19 | new webpack.ContextReplacementPlugin(/moment[\/\\]locale$/, /en|ru/) 20 | ] 21 | }; 22 | -------------------------------------------------------------------------------- /lib/actions/ActionWaitForQuery.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Wait for uri query match specified URI happens 5 | */ 6 | 7 | const Action = require('./Action'); 8 | const { waitForEvent } = require('../tools/wait'); 9 | 10 | class ActionWaitForQuery extends Action { 11 | async perform() { 12 | return waitForEvent( 13 | this._env, 14 | { type: 'request', urlPattern: this._options.uri }, 15 | this._options.breaker, 16 | this._options.timeout 17 | ); 18 | } 19 | } 20 | 21 | module.exports = ActionWaitForQuery; 22 | 23 | -------------------------------------------------------------------------------- /lib/actions/ActionOr.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Allows to apply Or condition to passed actions 5 | * 6 | */ 7 | 8 | const Action = require('./Action'); 9 | 10 | class ActionOr extends Action { 11 | async perform() { 12 | const actions = this._options.actions || []; 13 | 14 | return actions.reduce(async (promise, action) => { 15 | const result = await promise; 16 | if (result) { 17 | return result; 18 | } 19 | return this._actions.performAction(action); 20 | }, Promise.resolve(false)); 21 | } 22 | } 23 | 24 | module.exports = ActionOr; 25 | 26 | -------------------------------------------------------------------------------- /lib/actions/ActionWaitForElement.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * Waits for element to exits on the page 4 | */ 5 | 6 | const ActionWait = require('./ActionWait'); 7 | 8 | class ActionWaitForElement extends ActionWait { 9 | async perform() { 10 | this.log('._waitElement() ' + this._selector); 11 | return this.wait(/* istanbul ignore next */ function (selector) { 12 | return Sizzle(selector).length; 13 | }, function (foundElementsCount) { 14 | return !!foundElementsCount; 15 | }, this._options.breaker, [this._selector]); 16 | } 17 | } 18 | 19 | module.exports = ActionWaitForElement; 20 | 21 | -------------------------------------------------------------------------------- /lib/actions/ActionExist.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Check if element exists on the page 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionExist extends Action { 10 | async perform() { 11 | const child = (this._options.child !== undefined) ? this._options.child : null; 12 | return this._env.evaluateJs(this._selector, child, /* istanbul ignore next */ function (selector, child) { 13 | const selected = Sizzle(selector); 14 | return selected.length > 0 && (child === null || (selected[0].childNodes[child] !== undefined)); 15 | }); 16 | } 17 | } 18 | 19 | module.exports = ActionExist; 20 | 21 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '2' 2 | services: 3 | goose-parser: 4 | build: 5 | context: . 6 | args: 7 | ENVIRONMENT: goose-chrome-environment 8 | ENVIRONMENT_VERSION: 1.0.17 9 | command: https://www.google.com/search?q=goose-parser '{"actions":[{"type":"wait","scope":".gsdfdsf"}],"rules":{"scope":".g","collection":[[{"scope":".r>a h3","name":"name"},{"scope":".r>a:eq(0)","name":"link","attr":"href"}]]}}' 10 | # environment: 11 | # - DEBUG=*,-puppeteer:* 12 | volumes: 13 | - ${PWD}/docker/entrypoint.sh:/usr/src/app/entrypoint.sh 14 | - ${PWD}/docker/index.js:/usr/src/app/index.js 15 | - ${PWD}/lib:/usr/src/app/node_modules/goose-parser/lib 16 | -------------------------------------------------------------------------------- /browser.js: -------------------------------------------------------------------------------- 1 | window.__gooseParse = function(rule, offset, scopes) { 2 | window.__gooseResults = null; 3 | window.__gooseError = null; 4 | const BrowserEnvironment = require('!babel!./lib/BrowserEnvironment'); 5 | const Parser = require('!babel!./lib/Parser'); 6 | const env = new BrowserEnvironment; 7 | const parser = new Parser({environment: env}); 8 | 9 | delete rule.inject; 10 | console.log('Injection..'); 11 | parser._scopes = scopes; 12 | parser.clearDom = true; 13 | parser 14 | ._parseScope(rule, offset) 15 | .then( 16 | results => window.__gooseResults = results, 17 | error => window.__gooseError = error 18 | ); 19 | }; 20 | -------------------------------------------------------------------------------- /lib/transforms/TransformReplace.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform applies replace function to result with regex options.re and replace it to options.to 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformReplace extends Transform { 10 | doTransform() { 11 | const re = this._options.re; 12 | if (!Array.isArray(re)) { 13 | throw new Error('You must pass an array as `re` to `replace` transform'); 14 | } 15 | const value = typeof this._value === 'string' ? this._value : ''; 16 | 17 | return value.replace(RegExp.apply(null, re), this._options.to); 18 | } 19 | } 20 | 21 | module.exports = TransformReplace; 22 | -------------------------------------------------------------------------------- /lib/actions/ActionClick.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform click to specified selector on the page 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionClick extends Action { 10 | async perform() { 11 | this.log('click by %s', this._selector); 12 | const clickedCount = await this._env.evaluateJs(this._selector, /* istanbul ignore next */ function (selector) { 13 | const nodes = Sizzle(selector); 14 | for (var i = 0, l = nodes.length; i < l; i++) { 15 | nodes[i].click(); 16 | } 17 | 18 | return nodes.length; 19 | }); 20 | this.log('clicked %s nodes', clickedCount); 21 | } 22 | } 23 | 24 | module.exports = ActionClick; 25 | -------------------------------------------------------------------------------- /lib/actions/ActionFocus.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform click to specified selector on the page 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionFocus extends Action { 10 | async perform() { 11 | this.log('focus on %s', this._selector); 12 | const focusedCount = await this._env.evaluateJs(this._selector, /* istanbul ignore next */ function (selector) { 13 | const nodes = Sizzle(selector); 14 | for (var i = 0, l = nodes.length; i < l; i++) { 15 | nodes[i].focus(); 16 | } 17 | 18 | return nodes.length; 19 | }); 20 | this.log('focused consequently %s nodes', focusedCount); 21 | } 22 | } 23 | 24 | module.exports = ActionFocus; 25 | -------------------------------------------------------------------------------- /lib/actions/ActionBlur.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform click to specified selector on the page 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionBlur extends Action { 10 | async perform() { 11 | this.log('blur on %s', this._selector); 12 | const blurredCount = await this._env.evaluateJs(this._selector, /* istanbul ignore next */ function (selector) { 13 | const nodes = Sizzle(selector); 14 | for (var i = 0, l = nodes.length; i < l; i++) { 15 | nodes[i].blur(); 16 | } 17 | 18 | return nodes.length; 19 | }); 20 | this.log('blurred consequently %s nodes', blurredCount); 21 | return blurredCount; 22 | } 23 | } 24 | 25 | module.exports = ActionBlur; 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | 5 | # Runtime data 6 | pids 7 | *.pid 8 | *.seed 9 | 10 | # Directory for instrumented libs generated by jscoverage/JSCover 11 | lib-cov 12 | 13 | # Coverage info 14 | coverage/ 15 | 16 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 17 | .grunt 18 | 19 | # node-waf configuration 20 | .lock-wscript 21 | 22 | # Compiled binary addons (http://nodejs.org/api/addons.html) 23 | build/Release 24 | 25 | # Dependency directory 26 | # https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git 27 | node_modules 28 | 29 | .idea 30 | tests/browser_parser_test.bundle.js 31 | parser.bundle.js 32 | 33 | # Default snapshot directory 34 | snapshots 35 | 36 | build 37 | 38 | docker/environment.js 39 | -------------------------------------------------------------------------------- /lib/Storage.js: -------------------------------------------------------------------------------- 1 | const debug = require('debug')('Storage'); 2 | 3 | class Storage { 4 | /** 5 | * @param {?Object} data 6 | */ 7 | constructor(data) { 8 | data = data || {}; 9 | this._store = data; 10 | } 11 | 12 | /** 13 | * @param {string} name 14 | * @returns {*} 15 | */ 16 | get(name) { 17 | debug('.get %s', name); 18 | return this._store[name]; 19 | } 20 | 21 | /** 22 | * @param {string} name 23 | * @param {*} value 24 | */ 25 | set(name, value) { 26 | debug('.set %s %o', name, value); 27 | this._store[name] = value; 28 | } 29 | 30 | /** 31 | * @param {string} name 32 | */ 33 | unset(name) { 34 | debug('.unset %s', name); 35 | delete this._store[name]; 36 | } 37 | } 38 | 39 | module.exports = Storage; 40 | -------------------------------------------------------------------------------- /test/unit/Storage.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const Storage = require('../../lib/Storage'); 4 | 5 | describe('Storage Api', () => { 6 | let storage; 7 | 8 | test('return value after creating storage', async () => { 9 | storage = new Storage({ 10 | one: 1, 11 | }); 12 | 13 | const result = storage.get('one'); 14 | expect(result).toEqual(1); 15 | }); 16 | 17 | test('return value after creating storage', async () => { 18 | storage = new Storage(); 19 | 20 | storage.set('one', 1); 21 | 22 | const result = storage.get('one'); 23 | expect(result).toEqual(1); 24 | }); 25 | 26 | test('after unset value shouldn\'t be in the storage', async () => { 27 | storage = new Storage(); 28 | 29 | storage.set('one', 1); 30 | storage.unset('one'); 31 | 32 | const result = storage.get('one'); 33 | expect(result).toEqual(undefined); 34 | }); 35 | }); 36 | -------------------------------------------------------------------------------- /lib/actions/ActionWait.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * Abstract wait action 4 | */ 5 | 6 | const Action = require('./Action'); 7 | const { waitForEvaluate } = require('../tools/wait'); 8 | 9 | class ActionWait extends Action { 10 | /** 11 | * Wait until function evalFunction expected in checkerFunction result 12 | * @param {Function} evalFunction 13 | * @param {Function} [checkerFunction] 14 | * @param {Function} [breakerFunction] 15 | * @param {Array} [args] 16 | * @returns {Promise} 17 | */ 18 | async wait(evalFunction, checkerFunction, breakerFunction, args) { 19 | return waitForEvaluate( 20 | this._env, 21 | evalFunction, 22 | checkerFunction, 23 | breakerFunction, 24 | args, 25 | this._options.timeout, 26 | this._options.interval); 27 | } 28 | } 29 | 30 | module.exports = ActionWait; 31 | 32 | -------------------------------------------------------------------------------- /lib/transforms/TransformCombine.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform retrieves from storage by "field" from options.fields and combines into array 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformCombine extends Transform { 10 | doTransform() { 11 | const fields = this._options.fields || []; 12 | const type = this._options.dataType || 'string'; 13 | return fields.map(field => { 14 | const value = this._storage.get(field); 15 | switch (type) { 16 | case 'int': 17 | case 'integer': 18 | return parseInt(value); 19 | case 'number': 20 | case 'float': 21 | case 'double': 22 | return parseFloat(value); 23 | } 24 | return value; 25 | }); 26 | } 27 | } 28 | 29 | module.exports = TransformCombine; 30 | -------------------------------------------------------------------------------- /lib/actions/ActionWaitForVisible.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Wait for an element is on the page become visible or invisible 5 | */ 6 | 7 | const ActionWait = require('./ActionWait'); 8 | 9 | class ActionWaitForVisible extends ActionWait { 10 | async perform() { 11 | const visibility = this._options.visibility === undefined ? true : this._options.visibility; 12 | return this.wait(/* istanbul ignore next */ function (selector, visibility) { 13 | const nodes = Array.prototype.slice.call(Sizzle(selector), 0); 14 | const result = nodes.some(function (node) { 15 | return node.offsetWidth !== 0 && node.offsetHeight !== 0; 16 | }); 17 | 18 | return visibility ? result : !result; 19 | }, (result) => { 20 | return result; 21 | }, this._options.breaker, [this._selector, visibility]); 22 | } 23 | } 24 | 25 | module.exports = ActionWaitForVisible; 26 | 27 | -------------------------------------------------------------------------------- /lib/actions/ActionWaitForPattern.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Wait for an element'c content matches pattern 5 | */ 6 | 7 | const ActionWait = require('./ActionWait'); 8 | 9 | class ActionWaitForPattern extends ActionWait { 10 | async perform() { 11 | const { pattern, attr } = this._options; 12 | const selector = this._selector; 13 | this.log('%s on selector %s', pattern, selector); 14 | return this.wait(/* istanbul ignore next */ function (selector, attr) { 15 | const nodes = Sizzle(selector); 16 | if (nodes.length === 0) { 17 | return ''; 18 | } 19 | if (attr) { 20 | return nodes[0].getAttribute(attr); 21 | } 22 | return nodes[0].textContent; 23 | }, (text) => { 24 | return text.match(pattern) !== null; 25 | }, this._options.breaker, [selector, attr]); 26 | } 27 | } 28 | 29 | module.exports = ActionWaitForPattern; 30 | 31 | -------------------------------------------------------------------------------- /docker/publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ -z "$1" ] 4 | then 5 | echo "First arg should be environment name, for example goose-phantom-environment" 6 | exit 7 | fi 8 | 9 | environmentName=`echo "$1" | sed -n 's/goose-\(.*\)-environment/\1/p'` 10 | environmentVersion=`npm show goose-${environmentName}-environment version`; 11 | gooseVersion=`npm show goose-parser version`; 12 | 13 | echo "Building image for goose-parser based on $1"; 14 | IMAGE_NAME="redcode/goose-parser"; 15 | TAG_NAME_VERSIONED="${IMAGE_NAME}:${environmentName}-${environmentVersion}-parser-${gooseVersion}"; 16 | TAG_NAME_LATEST="${IMAGE_NAME}:${environmentName}-latest-parser-${gooseVersion}"; 17 | docker build --build-arg ENVIRONMENT=$1 --build-arg ENVIRONMENT_VERSION=$environmentVersion -t "$TAG_NAME_VERSIONED" -t "$TAG_NAME_LATEST" -f ./Dockerfile . 18 | docker tag "$TAG_NAME_VERSIONED" "$TAG_NAME_VERSIONED" 19 | docker tag "$TAG_NAME_LATEST" "$TAG_NAME_LATEST" 20 | docker push "$TAG_NAME_VERSIONED" 21 | docker push "$TAG_NAME_LATEST" 22 | -------------------------------------------------------------------------------- /lib/transforms/TransformSplit.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform applies split function to result 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformSplit extends Transform { 10 | doTransform() { 11 | const index = this._options.index || 0; 12 | const dataType = this._options.dataType || 'string'; 13 | const value = typeof this._value === 'string' ? this._value : ''; 14 | 15 | let separator = this._options.separator !== undefined ? this._options.separator : ','; 16 | separator = Array.isArray(separator) ? RegExp.apply(null, separator) : separator; 17 | const matches = value.split(separator).map(function(item) { 18 | return item.trim(); 19 | }); 20 | if (dataType === 'array') { 21 | return matches; 22 | } 23 | return Array.isArray(matches) && matches[index] !== undefined ? matches[index] : null; 24 | } 25 | } 26 | 27 | module.exports = TransformSplit; 28 | -------------------------------------------------------------------------------- /docker/build.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const { exec } = require('child_process'); 3 | const { dependencies: environmentDependencies } = require('./package.json'); 4 | 5 | const getVersion = async (environmentName) => { 6 | return new Promise((resolve, reject) => { 7 | exec(`npm show ${environmentName} version`, (err, stdout) => { 8 | if (err) { 9 | reject(err); 10 | return; 11 | } 12 | 13 | resolve(stdout.trim()); 14 | }); 15 | }); 16 | }; 17 | 18 | (async function () { 19 | try { 20 | const pkg = { 21 | private: true, 22 | name: 'goose-parser', 23 | dependencies: { 24 | ...environmentDependencies, 25 | 'goose-parser': await getVersion('goose-parser'), 26 | 'minimist': '^1.2.0', 27 | }, 28 | }; 29 | fs.writeFileSync('./package.json', JSON.stringify(pkg, null, ' '), 'utf-8'); 30 | } catch (e) { 31 | console.log('Error occurred'); 32 | console.log(e.message, e.stack); 33 | process.exit(1); 34 | } 35 | })(); 36 | -------------------------------------------------------------------------------- /lib/actions/ActionCondition.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform if-then-else conditional action 5 | */ 6 | 7 | const Action = require('./Action'); 8 | 9 | class ActionCondition extends Action { 10 | async perform() { 11 | this.log('Condition on %s', this._selector); 12 | 13 | const conditions = this._options.conditions || this._options.if || []; 14 | const thenActions = this._options.actions || this._options.then || []; 15 | const elseActions = this._options.elseActions || this._options.else || []; 16 | 17 | const conditionsResult = await this._actions.performActions(conditions, this._selector); 18 | if (!conditionsResult) { 19 | this.log('Conditional actions failed with result %s, skip %o', conditionsResult, thenActions); 20 | return this._actions.performActions(elseActions, this._selector); 21 | } 22 | 23 | this.log('Conditional actions return %s, go with real some', conditionsResult); 24 | return this._actions.performActions(thenActions, this._selector); 25 | } 26 | } 27 | 28 | module.exports = ActionCondition; 29 | -------------------------------------------------------------------------------- /lib/Scope.js: -------------------------------------------------------------------------------- 1 | class Scope { 2 | /** 3 | * @param {Array.} [data] 4 | */ 5 | constructor(data = []) { 6 | this._scopes = data; 7 | } 8 | 9 | /** 10 | * Push scope for future execution 11 | * @param {string} scope 12 | * @param {string?} parentScope 13 | */ 14 | push(scope, parentScope = '') { 15 | this._scopes.push({ scope, parentScope }); 16 | } 17 | 18 | /** 19 | * Pop scope 20 | * @returns {Object} 21 | */ 22 | pop() { 23 | return this._scopes.pop(); 24 | } 25 | 26 | /** 27 | * Get current parsing selector 28 | * @returns {string} 29 | */ 30 | getSelector() { 31 | const scopes = this._scopes; 32 | const selector = []; 33 | for (let i = scopes.length - 1; i >= 0; i--) { 34 | const scope = scopes[i]; 35 | selector.unshift(scope.scope); 36 | 37 | if (scope.parentScope) { 38 | selector.unshift(scope.parentScope); 39 | break; 40 | } 41 | } 42 | 43 | return selector.join(' '); 44 | } 45 | 46 | isEmpty() { 47 | return this._scopes.length === 0; 48 | } 49 | } 50 | 51 | module.exports = Scope; 52 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2015-2019 RedCode | www.redco.io 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/actions/ActionChangeElement.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Performs change of the element styles 5 | * 6 | */ 7 | 8 | const Action = require('./Action'); 9 | 10 | class ActionChangeElement extends Action { 11 | async perform () { 12 | const changeStyleOptions = this._options.change || this._options.style || {}; 13 | const changeAttrOptions = this._options.attr || {}; 14 | return this._env.evaluateJs(this._selector, changeStyleOptions, changeAttrOptions, 15 | /* istanbul ignore next */ 16 | function (selector, changeStyleOptions, changeAttrOptions) { 17 | const element = Sizzle(selector)[0]; 18 | if (!element) { 19 | return; 20 | } 21 | Object.keys(changeStyleOptions).forEach(function (key) { 22 | element.style[key] = changeStyleOptions[key]; 23 | }); 24 | Object.keys(changeAttrOptions).forEach(function (key) { 25 | element.setAttribute(key, changeAttrOptions[key]); 26 | }); 27 | }); 28 | } 29 | } 30 | 31 | module.exports = ActionChangeElement; 32 | 33 | -------------------------------------------------------------------------------- /lib/transforms/TransformMatch.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * This transform applies match function to result using options.re and options.index 5 | */ 6 | 7 | const Transform = require('./Transform'); 8 | 9 | class TransformMatch extends Transform { 10 | doTransform() { 11 | const re = this._options.re; 12 | const index = this._options.index || 0; 13 | 14 | const value = typeof this._value === 'string' ? this._value : ''; 15 | const matches = value.match(RegExp.apply(null, re)); 16 | 17 | if (index === 'any') { 18 | return Array.isArray(matches) && matches.length > 0; 19 | } 20 | if (index === 'all') { 21 | return matches; 22 | } 23 | if (matches) { 24 | const indexArray = !Array.isArray(index) ? [index] : index; 25 | const matchedIndex = indexArray.find(particularIndex => { 26 | return matches[particularIndex] !== undefined; 27 | }); 28 | return matchedIndex !== undefined ? matches[matchedIndex] : null; 29 | } 30 | 31 | return null; 32 | } 33 | } 34 | 35 | module.exports = TransformMatch; 36 | -------------------------------------------------------------------------------- /lib/actions/ActionType.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Type text to the element by emulating of several events: 5 | * - focus element 6 | * - type value 7 | * - emulate keyDown event 8 | * - emulate keyUp event 9 | * - blur element 10 | */ 11 | 12 | const Action = require('./Action'); 13 | 14 | class ActionType extends Action { 15 | async perform() { 16 | const text = this._options.useActionsResult ? this._prevResult : this._options.text; 17 | this.log('typing %s on %s', text, this._selector); 18 | 19 | const typedCount = await this._env.evaluateJs(this._selector, text, /* istanbul ignore next */ function (selector, text) { 20 | const nodes = Sizzle(selector); 21 | for (var i = 0, l = nodes.length; i < l; i++) { 22 | const node = nodes[i]; 23 | node.focus(); 24 | node.value = text; 25 | node.dispatchEvent(new Event('keydown')); 26 | node.dispatchEvent(new Event('keyup')); 27 | node.blur(); 28 | } 29 | 30 | return nodes.length; 31 | }); 32 | this.log('text types in %s nodes', typedCount); 33 | } 34 | } 35 | 36 | module.exports = ActionType; 37 | 38 | -------------------------------------------------------------------------------- /test/unit/Scope.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const Scope = require('../../lib/Scope'); 4 | 5 | describe('Scope Api', () => { 6 | let scope; 7 | 8 | test('newly created scope should be empty', async () => { 9 | scope = new Scope([]); 10 | expect(scope.isEmpty()).toEqual(true); 11 | }); 12 | 13 | test('methods push,pop,getSelector', async () => { 14 | scope = new Scope(); 15 | scope.push('scope', 'parent'); 16 | scope.push('scope2'); 17 | scope.push('scope3'); 18 | scope.push('scope4', 'parent4'); 19 | expect(scope.getSelector()).toEqual('parent4 scope4'); 20 | let data = scope.pop(); 21 | expect(data).toEqual({ scope: 'scope4', parentScope: 'parent4' }); 22 | expect(scope.getSelector()).toEqual('parent scope scope2 scope3'); 23 | data = scope.pop(); 24 | expect(data).toEqual({ scope: 'scope3', parentScope: '' }); 25 | expect(scope.getSelector()).toEqual('parent scope scope2'); 26 | data = scope.pop(); 27 | expect(data).toEqual({ scope: 'scope2', parentScope: '' }); 28 | expect(scope.getSelector()).toEqual('parent scope'); 29 | data = scope.pop(); 30 | expect(data).toEqual({ scope: 'scope', parentScope: 'parent' }); 31 | expect(scope.getSelector()).toEqual(''); 32 | }); 33 | }); 34 | -------------------------------------------------------------------------------- /lib/transforms/Transform.js: -------------------------------------------------------------------------------- 1 | const debug = require('debug')('Transform'); 2 | 3 | class Transform { 4 | /** 5 | * @param {object} options 6 | * @param {object} options.options Particular transform options 7 | * @param {*} options.value value before transformation 8 | * @param {Storage} options.storage 9 | */ 10 | constructor(options) { 11 | this._options = options.options || {}; 12 | this._value = options.value; 13 | this._storage = options.storage; 14 | } 15 | 16 | /** 17 | * @abstract 18 | * @protected 19 | */ 20 | doTransform() { 21 | throw new Error('You must redefine this method in the child class'); 22 | } 23 | 24 | /** 25 | * Do transformation on value 26 | * @return {*} 27 | */ 28 | transform() { 29 | this.log('applied with options %o on value %o', this._options, this._value); 30 | const result = this.doTransform(); 31 | this.log('transformed result', result); 32 | return result; 33 | } 34 | 35 | /** 36 | * Uses debug tool to log msg 37 | * @param msg 38 | */ 39 | log(msg) { 40 | const args = Array.prototype.slice.call(arguments); 41 | args.splice(0, 1, `[${this._options.type}] ${msg}`); 42 | debug(...args); 43 | } 44 | } 45 | 46 | module.exports = Transform; 47 | -------------------------------------------------------------------------------- /lib/Transforms.js: -------------------------------------------------------------------------------- 1 | const debug = require('debug')('Transforms'); 2 | const Storage = require('./Storage'); 3 | const transformsFactory = require('./transforms/transformsFactory'); 4 | 5 | class Transforms { 6 | constructor(options) { 7 | this._storage = options.storage || new Storage(); 8 | } 9 | 10 | /** 11 | * Perform transformations to result value 12 | * @param {Array.} transforms 13 | * @param {*} value 14 | * @returns {*} 15 | */ 16 | produce(transforms, value) { 17 | transforms = transforms || []; 18 | value = typeof value === 'undefined' ? '' : value; 19 | debug('transforms are producing for %o on %o', transforms, value); 20 | return transforms.reduce((value, options) => { 21 | value = typeof value === 'undefined' ? '' : value; 22 | const transform = transformsFactory.createTransform({ 23 | options, 24 | value, 25 | storage: this._storage 26 | }); 27 | 28 | if (!transform) { 29 | throw new Error('Unsupported transform type: ' + options.type); 30 | } 31 | 32 | return transform.transform(); 33 | }, value); 34 | } 35 | 36 | /** 37 | * Add custom transform 38 | * @param {string} type 39 | * @param {Function} transform 40 | */ 41 | addTransform(type, transform) { 42 | transformsFactory.addTransform(type, transform); 43 | } 44 | } 45 | 46 | module.exports = Transforms; 47 | -------------------------------------------------------------------------------- /lib/actions/Action.js: -------------------------------------------------------------------------------- 1 | const debug = require('debug')('Action'); 2 | 3 | class Action { 4 | /** 5 | * @param {object} options 6 | * @param {string} options.selector 7 | * @param {ActionOptions} options.actionOptions 8 | * @param {string} options.parentSelector 9 | * @param {AbstractEnvironment} options.env 10 | * @param {Actions} options.actions 11 | * @param {Parser} options.parser 12 | * @param {*} options.prevResult 13 | */ 14 | constructor(options) { 15 | this._selector = options.selector; 16 | this._parentSelector = options.parentSelector; 17 | this._options = options.actionOptions; 18 | if (!this._options.breaker) { 19 | this._options.breaker = () => false; 20 | } 21 | this._env = options.env; 22 | this._actions = options.actions; 23 | this._parser = options.parser; 24 | this._prevResult = options.prevResult; 25 | } 26 | 27 | async perform() { 28 | throw new Error('You must redefine this method in the real action'); 29 | } 30 | 31 | /** 32 | * @returns {Array.} The list of the methods of other components on which it depends on 33 | * (e.g. Environment.evaluateJs, Parser.parse, ...) 34 | */ 35 | dependsOn() { 36 | return []; 37 | } 38 | 39 | log(msg) { 40 | const args = Array.prototype.slice.call(arguments); 41 | args.splice(0, 1, `[${this._options.type}] ${msg}`); 42 | debug.apply(null, args); 43 | } 44 | } 45 | 46 | module.exports = Action; 47 | -------------------------------------------------------------------------------- /lib/transforms/transformsFactory.js: -------------------------------------------------------------------------------- 1 | const Transform = require('./Transform'); 2 | 3 | const transformsMap = { 4 | date: require('./TransformDate'), 5 | replace: require('./TransformReplace'), 6 | match: require('./TransformMatch'), 7 | split: require('./TransformSplit'), 8 | join: require('./TransformJoin'), 9 | trim: require('./TransformTrim'), 10 | pluck: require('./TransformPluck'), 11 | combine: require('./TransformCombine'), 12 | pick: require('./TransformPick'), 13 | get: require('./TransformGet'), 14 | encodeURI: require('./TransformEncodeUri'), 15 | decodeURI: require('./TransformDecodeUri'), 16 | decodeHTML: require('./TransformDecodeHtml'), 17 | decodeBase64: require('./TransformBase64Decode'), 18 | compare: require('./TransformCompare'), 19 | equal: require('./TransformEqual'), 20 | }; 21 | 22 | const transformsFactory = { 23 | createTransform(options) { 24 | const TransformConstructor = transformsMap[options.options.type]; 25 | if (!TransformConstructor) { 26 | return null; 27 | } 28 | 29 | return new TransformConstructor(options); 30 | }, 31 | 32 | /** 33 | * Adds custom transform 34 | * @param {string} type 35 | * @param {Function} transformFn 36 | */ 37 | addTransform(type, transformFn) { 38 | if (typeof type !== 'string' || typeof transformFn !== 'function') { 39 | throw new Error('addTransform accept "type" as string and "transform" as function which does transformation'); 40 | } 41 | 42 | class CustomTransform extends Transform { 43 | doTransform() { 44 | return transformFn(this._value, this._options, this._storage); 45 | } 46 | } 47 | 48 | transformsMap[type] = CustomTransform; 49 | } 50 | }; 51 | 52 | module.exports = transformsFactory; 53 | -------------------------------------------------------------------------------- /lib/actions/ActionWaitForCases.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @fileOverview 3 | * 4 | * Perform parallel actions 5 | */ 6 | 7 | const Action = require('./Action'); 8 | const any = require('../tools/any'); 9 | 10 | class ActionWaitForCases extends Action { 11 | async perform() { 12 | const { cases } = this._options; 13 | this.log('Handle several cases in parallel %o', cases); 14 | let wonCase = null; 15 | const promises = cases.map(async (actions, caseNumber) => { 16 | try { 17 | const beginningAction = { 18 | ...actions[0], 19 | breaker: () => wonCase !== null, 20 | }; 21 | const beginningPromise = this._actions.performAction( 22 | beginningAction, 23 | this._parentSelector, 24 | this._prevResult 25 | ); 26 | const result = await actions.slice(1) 27 | .reduce(async (promise, action, i, array) => { 28 | await promise; 29 | 30 | if (action.trueCase) { 31 | wonCase = caseNumber; 32 | this.log('Won case with actions %o', cases[wonCase]); 33 | } 34 | 35 | return this._actions.performAction(action, this._parentSelector, this._prevResult); 36 | }, beginningPromise); 37 | if (wonCase === null) { 38 | wonCase = caseNumber; 39 | this.log('Won case with actions %o', cases[wonCase]); 40 | } 41 | 42 | return result; 43 | } catch (reason) { 44 | this.log('Chain %o was rejected with reason %s', actions, reason); 45 | throw reason; 46 | } 47 | }); 48 | 49 | await any(promises); 50 | return promises[wonCase]; 51 | } 52 | } 53 | 54 | module.exports = ActionWaitForCases; 55 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "private": true, 3 | "name": "goose-parser", 4 | "version": "0.6.1", 5 | "main": "lib/Parser.js", 6 | "description": "Multi environment web page parser", 7 | "author": { 8 | "name": "Andrew Reddikh", 9 | "email": "andrew@reddikh.com" 10 | }, 11 | "keywords": [ 12 | "phantomjs", 13 | "chrome", 14 | "chromium", 15 | "jsdom", 16 | "browser", 17 | "parser", 18 | "crawler", 19 | "crawling", 20 | "spider", 21 | "scraper", 22 | "scraping", 23 | "site", 24 | "page", 25 | "dom", 26 | "javascript" 27 | ], 28 | "homepage": "http://redcode.im", 29 | "license": "MIT", 30 | "contributors": [ 31 | { 32 | "name": "Andrew Reddikh", 33 | "email": "andrew@reddikh.com" 34 | }, 35 | { 36 | "name": "Andrew Balakirev", 37 | "email": "balakirev.andrey@gmail.com" 38 | } 39 | ], 40 | "repository": { 41 | "type": "git", 42 | "url": "git+https://github.com/redco/goose-parser.git" 43 | }, 44 | "bugs": { 45 | "url": "https://github.com/redco/goose-parser/issues" 46 | }, 47 | "scripts": { 48 | "test": "jest test --no-cache --runInBand --detectOpenHandles --maxConcurrency=1", 49 | "build": "node build.js", 50 | "coverage": "jest --no-cache --runInBand --maxConcurrency=1 --coverage && rm -rf ./coverage", 51 | "codecov": "jest --no-cache --runInBand --maxConcurrency=1 --coverage && codecov && rm -rf ./coverage" 52 | }, 53 | "dependencies": { 54 | "debug": "^3.1.0", 55 | "html-entities": "^1.2.0", 56 | "lodash.clone": "^4.5.0", 57 | "lodash.get": "^4.4.2", 58 | "lodash.merge": "^4.6.0", 59 | "lodash.pick": "^4.4.0", 60 | "lodash.pluck": "^3.1.2", 61 | "moment": "^2.10.6" 62 | }, 63 | "devDependencies": { 64 | "codecov": "^3.1.0", 65 | "goose-abstract-environment": "^1.1.0", 66 | "goose-chrome-environment": "^1.1.4", 67 | "goose-paginator": "^1.0.2", 68 | "jest": "^24.1.0", 69 | "rimraf": "^2.6.2", 70 | "webpack": "^1.12.2" 71 | }, 72 | "directories": {} 73 | } 74 | -------------------------------------------------------------------------------- /test/unit/transforms/transformsFactory.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const TransformDate = require('../../../lib/transforms/TransformDate'); 4 | const Transform = require('../../../lib/transforms/Transform'); 5 | const transformsFactory = require('../../../lib/transforms/transformsFactory'); 6 | 7 | describe('transformsFactory', () => { 8 | test('createTransform returns instance for correct options.type', async () => { 9 | const transform = transformsFactory.createTransform({ 10 | options: { 11 | type: 'date', 12 | }, 13 | }); 14 | 15 | expect(transform).toBeInstanceOf(TransformDate); 16 | }); 17 | 18 | test('createTransform returns null for incorrect options.type', async () => { 19 | const transform = transformsFactory.createTransform({ 20 | options: { 21 | type: 'wrongType', 22 | }, 23 | }); 24 | 25 | expect(transform).toEqual(null); 26 | }); 27 | 28 | test('addTransform should throw a error if first param is not string', async () => { 29 | const fn = () => { 30 | return transformsFactory.addTransform([], () => { 31 | }); 32 | }; 33 | 34 | expect(fn).toThrowError(/^addTransform accept "type" as string and "transform" as function which does transformation$/); 35 | }); 36 | 37 | test('addTransform should throw a error if second param is not function', async () => { 38 | const fn = () => { 39 | return transformsFactory.addTransform('newType', []); 40 | }; 41 | 42 | expect(fn).toThrowError(/^addTransform accept "type" as string and "transform" as function which does transformation$/); 43 | }); 44 | 45 | test('createTransform should return CustomTransform', async () => { 46 | transformsFactory.addTransform('newType', (value) => { 47 | return (value || '').toUpperCase(); 48 | }); 49 | const transform = transformsFactory.createTransform({ 50 | options: { 51 | type: 'newType', 52 | }, 53 | value: 'string', 54 | }); 55 | 56 | expect(transform).toBeInstanceOf(Transform); 57 | expect(transform.doTransform()).toEqual('STRING'); 58 | }); 59 | 60 | }); 61 | -------------------------------------------------------------------------------- /test/unit/transforms/Transform.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const debug = require('debug')('Transform'); 4 | const Storage = require('../../../lib/Storage'); 5 | const Transform = require('../../../lib/transforms/Transform'); 6 | 7 | jest.mock('../../../lib/Storage'); 8 | jest.mock('debug', () => { 9 | return jest.fn(() => { 10 | if (!this.__fn) { 11 | this.__fn = jest.fn((...args) => { 12 | }); 13 | } 14 | return this.__fn; 15 | }); 16 | }); 17 | 18 | describe('Transform', () => { 19 | let transform; 20 | let storage; 21 | 22 | beforeAll(async () => { 23 | storage = new Storage({}); 24 | }); 25 | 26 | describe('Transform', () => { 27 | test('doTransform should throw a error', async () => { 28 | transform = new Transform({ 29 | value: 'value', 30 | storage, 31 | }); 32 | 33 | const fn = () => { 34 | return transform.doTransform(); 35 | }; 36 | 37 | expect(fn).toThrowError(/^You must redefine this method in the child class$/); 38 | }); 39 | 40 | test('log', async () => { 41 | const transformType = 'TestTransform'; 42 | const message = 'Test Message'; 43 | transform = new Transform({ 44 | options: { 45 | type: transformType, 46 | }, 47 | value: 'value', 48 | storage, 49 | }); 50 | 51 | transform.log(message); 52 | expect(debug).toHaveBeenCalledTimes(1); 53 | expect(debug).toHaveBeenCalledWith(`[${transformType}] ${message}`); 54 | }); 55 | 56 | test('transform', async () => { 57 | const transformType = 'TestTransform'; 58 | const message = 'Test Message'; 59 | transform = new Transform({ 60 | options: { 61 | type: transformType, 62 | }, 63 | value: 'value', 64 | storage, 65 | }); 66 | transform.doTransform = jest.fn(() => { 67 | return 'transformedValue'; 68 | }); 69 | transform.log = jest.fn(() => { 70 | }); 71 | 72 | const transformedValue = transform.transform(); 73 | expect(transformedValue).toEqual('transformedValue'); 74 | expect(transform.doTransform).toHaveBeenCalledTimes(1); 75 | expect(transform.log).toHaveBeenCalledTimes(2); 76 | expect(transform.log).toHaveBeenCalledWith( 77 | 'applied with options %o on value %o', 78 | { 79 | type: transformType, 80 | }, 81 | 'value', 82 | ); 83 | expect(transform.log).toHaveBeenCalledWith( 84 | 'transformed result', 85 | 'transformedValue', 86 | ); 87 | }); 88 | }); 89 | }); 90 | -------------------------------------------------------------------------------- /lib/actions/actionsFactory.js: -------------------------------------------------------------------------------- 1 | const Action = require('./Action'); 2 | 3 | const actionsMap = { 4 | click: require('./ActionClick'), 5 | mouseClick: require('./ActionMouseClick'), 6 | mousedown: require('./ActionMouseDown'), 7 | mouseDown: require('./ActionMouseDown'), 8 | mouseup: require('./ActionMouseUp'), 9 | mouseUp: require('./ActionMouseUp'), 10 | changeElement: require('./ActionChangeElement'), 11 | wait: require('./ActionWaitForElement'), 12 | waitForElement: require('./ActionWaitForElement'), 13 | waitForVisible: require('./ActionWaitForVisible'), 14 | waitForPattern: require('./ActionWaitForPattern'), 15 | waitForPage: require('./ActionWaitForPage'), 16 | waitForQuery: require('./ActionWaitForQuery'), 17 | waitForCases: require('./ActionWaitForCases'), 18 | pause: require('./ActionPause'), 19 | parse: require('./ActionParse'), 20 | type: require('./ActionType'), 21 | exist: require('./ActionExist'), 22 | exists: require('./ActionExist'), 23 | hasRedirect: require('./ActionHasRedirect'), 24 | back: require('./ActionBack'), 25 | provideRules: require('./ActionProvideRules'), 26 | snapshot: require('./ActionSnapshot'), 27 | open: require('./ActionOpen'), 28 | focus: require('./ActionFocus'), 29 | blur: require('./ActionBlur'), 30 | url: require('./ActionUrl'), 31 | 32 | condition: require('./ActionCondition'), 33 | or: require('./ActionOr'), 34 | not: require('./ActionNot'), 35 | cases: require('./ActionWaitForCases'), 36 | }; 37 | 38 | const actionsFactory = { 39 | /** 40 | * @param {ActionOptions} options 41 | * @return {Action|null} 42 | */ 43 | createAction(options) { 44 | const ActionConstructor = actionsMap[options.actionOptions.type]; 45 | if (!ActionConstructor) { 46 | return null; 47 | } 48 | 49 | return new ActionConstructor(options); 50 | }, 51 | 52 | /** 53 | * Adds custom action 54 | * @param {string} type 55 | * @param {Function} action 56 | */ 57 | addAction(type, action) { 58 | if (typeof type !== 'string' || typeof action !== 'function') { 59 | throw new Error('addAction accept type as string and action if function which must return a promise'); 60 | } 61 | 62 | if (actionsMap[type]) { 63 | throw new Error(`Action with type ${type} already registered`); 64 | } 65 | 66 | class CustomAction extends Action { 67 | perform() { 68 | action.call(this, this._options); 69 | } 70 | } 71 | 72 | actionsMap[type] = CustomAction; 73 | } 74 | }; 75 | 76 | module.exports = actionsFactory; 77 | -------------------------------------------------------------------------------- /test/tools.js: -------------------------------------------------------------------------------- 1 | const http = require('http'); 2 | const fs = require('fs'); 3 | 4 | const port = 60053; 5 | let responseRoutes = []; 6 | 7 | /** 8 | * @return {Promise<*>} 9 | */ 10 | async function createTestServer() { 11 | return new Promise((resolve) => { 12 | const server = http.createServer((request, response) => { 13 | const { url } = request; 14 | 15 | const resp = responseRoutes.find(respItem => 16 | respItem.route === '*' || 17 | url === respItem.route 18 | ); 19 | 20 | if (!resp) { 21 | console.log(`Route ${url} not found`); 22 | response.statusCode = 404; 23 | response.end('Not Found'); 24 | return; 25 | } 26 | 27 | response.statusCode = resp.code || 200; 28 | console.log(`Route ${url} responded with status code ${response.statusCode} ${resp.html}`); 29 | let respData = resp.html; 30 | if (resp.fn) { 31 | respData += ` 32 | 35 | `; 36 | } 37 | const headers = resp.headers || []; 38 | headers.forEach(({ name, value }) => response.setHeader(name, value)); 39 | response.end(` 40 | 41 | 42 | 43 | 44 | 45 | ${respData} 46 | 47 | 48 | `); 49 | }); 50 | 51 | const finalizer = { 52 | close: async () => new Promise(res => server.close(() => res())), 53 | }; 54 | server.listen(port, () => resolve(finalizer)); 55 | }); 56 | } 57 | 58 | 59 | /** 60 | * @typedef {object} ServerResponse 61 | * @property {?string} route 62 | * @property {string} html 63 | * @property {?function} fn 64 | */ 65 | 66 | /** 67 | * @param {ServerResponse|Array} response 68 | */ 69 | function setServerResponse(response) { 70 | if (!Array.isArray(response)) { 71 | responseRoutes = [{ route: '*', ...response }]; 72 | } else { 73 | responseRoutes = response; 74 | } 75 | } 76 | 77 | function fileExists(path) { 78 | return new Promise(resolve => { 79 | fs.access(path, fs.F_OK, (err) => { 80 | if (err) { 81 | resolve(false); 82 | return; 83 | } 84 | 85 | resolve(true); 86 | }); 87 | }); 88 | } 89 | 90 | function removeFile(path) { 91 | return new Promise((resolve, reject) => { 92 | fs.unlink(path, (err) => { 93 | if (err) { 94 | reject(err); 95 | return; 96 | } 97 | resolve(); 98 | }); 99 | }); 100 | } 101 | 102 | module.exports = { 103 | fileExists, 104 | removeFile, 105 | setServerResponse, 106 | createTestServer, 107 | url: `http://localhost:${port}/`, 108 | }; 109 | -------------------------------------------------------------------------------- /docker/index.js: -------------------------------------------------------------------------------- 1 | const util = require('util'); 2 | const fs = require('fs'); 3 | const minimist = require('minimist'); 4 | const Parser = require('goose-parser'); 5 | const Environment = require('./environment'); 6 | 7 | const verbose = process.env.VERBOSE; 8 | const argv = minimist(process.argv.slice(2)); 9 | 10 | const defaultEnvOptions = { 11 | url: argv._[0], 12 | snapshot: false, 13 | loadImages: true, 14 | screen: { 15 | width: 1080, 16 | height: 768, 17 | }, 18 | webSecurity: false, 19 | }; 20 | 21 | /** 22 | * @param {string} result 23 | * @return {Promise<*>} 24 | */ 25 | async function writeResult(result) { 26 | return new Promise((resolve, reject) => { 27 | const stream = fs.createWriteStream('/tmp/goose-pipe', { flags: 'w', encoding: 'utf8' }); 28 | stream.on('open', () => { 29 | stream.write(result); 30 | resolve(); 31 | }); 32 | stream.on('error', (err) => reject(err)); 33 | }); 34 | } 35 | 36 | function getRules() { 37 | let rules; 38 | const rulesFile = argv['rules-file']; 39 | if (rulesFile) { 40 | rules = require(rulesFile); 41 | } else { 42 | try { 43 | rules = JSON.parse(argv._[1]); 44 | } catch (e) { 45 | console.error('Error occurred while paring rules'); 46 | throw e; 47 | } 48 | } 49 | return rules; 50 | } 51 | 52 | function getEnvOptions() { 53 | let options = {}; 54 | const optionsFile = argv['options-file']; 55 | if (optionsFile) { 56 | options = require(optionsFile); 57 | } else if (argv._[2]) { 58 | try { 59 | options = JSON.parse(argv._[2]); 60 | } catch (e) { 61 | console.error('Error occurred while parsing environment options'); 62 | throw e; 63 | } 64 | } 65 | options = Object.assign(defaultEnvOptions, options); 66 | 67 | return options; 68 | } 69 | 70 | function getStats() { 71 | return { 72 | timing: { 73 | startedAt: (new Date).getTime(), 74 | finishedAt: null, 75 | execution: null, 76 | }, 77 | memory: { 78 | total: process.memoryUsage().rss / 1024 / 1014, 79 | used: null, 80 | }, 81 | }; 82 | } 83 | 84 | function calcFinishStats(stats) { 85 | const finishTime = (new Date).getTime(); 86 | const finishMemory = process.memoryUsage().rss / 1024 / 1014; 87 | return { 88 | timing: { 89 | ...stats.timing, 90 | finishedAt: finishTime, 91 | execution: finishTime - stats.timing.startedAt, 92 | }, 93 | memory: { 94 | total: finishMemory, 95 | used: finishMemory - stats.memory.total, 96 | }, 97 | }; 98 | } 99 | 100 | (async function () { 101 | const stats = getStats(); 102 | try { 103 | const time = (new Date).getTime(); 104 | const parser = new Parser({ 105 | environment: new Environment(getEnvOptions()), 106 | }); 107 | const data = await parser.parse(getRules()); 108 | if (verbose) { 109 | console.log('Work is done'); 110 | console.log('Execution time: ' + ((new Date).getTime() - time)); 111 | console.log('Results:'); 112 | console.log(util.inspect(data, { showHidden: false, depth: null })); 113 | } else { 114 | await writeResult(JSON.stringify({ 115 | data, 116 | stat: calcFinishStats(stats), 117 | }, null, ' ')); 118 | } 119 | } catch (e) { 120 | if (verbose) { 121 | console.log('Error occurred:'); 122 | console.log(e.message, e.stack); 123 | } else { 124 | await writeResult(JSON.stringify({ 125 | error: { 126 | message: e.message, 127 | stack: e.stack, 128 | }, 129 | stats: calcFinishStats(stats), 130 | }, null, ' ')); 131 | } 132 | } 133 | })(); 134 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | checkout: 4 | docker: 5 | - image: circleci/node:8.12.0 6 | steps: 7 | - checkout 8 | - persist_to_workspace: 9 | root: . 10 | paths: 11 | - . 12 | 13 | build: 14 | docker: 15 | - image: circleci/node:8.12.0 16 | steps: 17 | - attach_workspace: 18 | at: . 19 | - restore_cache: 20 | key: dependency-cache-{{ checksum "yarn.lock" }} 21 | - run: 22 | name: Install dependencies 23 | command: yarn install && yarn build 24 | - save_cache: 25 | key: dependency-cache-{{ checksum "yarn.lock" }} 26 | paths: 27 | - ./node_modules 28 | - persist_to_workspace: 29 | root: . 30 | paths: 31 | - . 32 | 33 | test: 34 | docker: 35 | - image: circleci/node:8.12.0-browsers 36 | steps: 37 | - attach_workspace: 38 | at: . 39 | - run: 40 | name: Run tests 41 | command: yarn test 42 | - run: 43 | name: Run test coverage 44 | command: yarn codecov 45 | 46 | publish: 47 | docker: 48 | - image: circleci/node:8.12.0 49 | steps: 50 | - attach_workspace: 51 | at: . 52 | - setup_remote_docker: 53 | docker_layer_caching: true 54 | - run: 55 | name: Publish release notes when build from a tag 56 | command: | 57 | if [[ $CIRCLE_TAG ]]; then 58 | yarn global add github-release-notes 59 | $(yarn global bin)/gren release 60 | $(yarn global bin)/gren changelog 61 | fi 62 | - run: 63 | name: Release to npm 64 | command: | 65 | TAG_VERSION="$(echo $CIRCLE_TAG | cut -d 'v' -f 2)" 66 | NPM_VERSION="$(npm show goose-parser version)" 67 | echo "$TAG_VERSION == $NPM_VERSION" 68 | if [ $TAG_VERSION == $NPM_VERSION ]; then 69 | exit 0 70 | fi 71 | echo "//registry.npmjs.org/:_authToken=$NPM_TOKEN" > ./.npmrc 72 | npm publish ./build 73 | - run: 74 | name: Waiting for npm version to be ready 75 | command: | 76 | sleep 3 77 | for i in `seq 1 60`; 78 | do 79 | TAG_VERSION="$(echo $CIRCLE_TAG | cut -d 'v' -f 2)" 80 | NPM_VERSION="$(npm show goose-parser version)" 81 | if [ $TAG_VERSION == $NPM_VERSION ]; then 82 | exit 0 83 | fi 84 | echo -n . 85 | sleep 1 86 | done 87 | echo "Failed waiting for npm version $TAG_VERSION != $NPM_VERSION" && exit 1 88 | - run: 89 | name: Build and push docker image 90 | command: | 91 | echo $DOCKER_PASS | docker login -u $DOCKER_USER --password-stdin 92 | ./docker/publish.sh goose-chrome-environment 93 | ./docker/publish.sh goose-phantom-environment 94 | ./docker/publish.sh goose-jsdom-environment 95 | 96 | workflows: 97 | version: 2 98 | build_and_test: 99 | jobs: 100 | - checkout: 101 | filters: 102 | tags: 103 | only: 104 | - /v.*/ 105 | - build: 106 | requires: 107 | - checkout 108 | filters: 109 | tags: 110 | only: 111 | - /v.*/ 112 | - test: 113 | requires: 114 | - build 115 | filters: 116 | tags: 117 | only: 118 | - /v.*/ 119 | - publish: 120 | requires: 121 | - test 122 | filters: 123 | tags: 124 | only: 125 | - /v.*/ 126 | branches: 127 | ignore: 128 | - /.*/ 129 | -------------------------------------------------------------------------------- /lib/tools/wait.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Wait until function evalFunction expected in checkerFunction result 3 | * @param {AbstractEnvironment} env 4 | * @param {Function} evalFunction 5 | * @param {Function} [checkerFunction] 6 | * @param {Function} [breakerFunction] 7 | * @param {Array} [args] 8 | * @param {number} [timeout] 9 | * @param {number} [interval] 10 | * @returns {Promise} 11 | */ 12 | async function waitForEvaluate(env, evalFunction, checkerFunction, breakerFunction, args, timeout, interval) { 13 | args = args || []; 14 | checkerFunction = checkerFunction || function(result) { 15 | return !!result 16 | }; 17 | 18 | timeout = timeout || 5000; 19 | interval = interval || 10; 20 | 21 | let timeoutId, intervalId; 22 | return new Promise((resolve, reject) => { 23 | const errorCallback = { 24 | fn: ({ error }) => { 25 | clearTimeout(timeoutId); 26 | clearInterval(intervalId); 27 | reject(new Error(`Error during wait with args ${args.toString()}, ${error}`)); 28 | }, 29 | }; 30 | 31 | timeoutId = setTimeout(() => { 32 | env.removeCallback('error', errorCallback); 33 | clearInterval(intervalId); 34 | reject(new Error(`Timeout for wait with args ${args.toString()}`)); 35 | }, timeout); 36 | 37 | env.addCallback('error', errorCallback); 38 | 39 | const evalArgs = args.slice(0); 40 | evalArgs.push(evalFunction); 41 | intervalId = setInterval(() => { 42 | env.evaluateJs(...evalArgs) 43 | .then((result) => { 44 | if (checkerFunction(result)) { 45 | clearTimeout(timeoutId); 46 | clearInterval(intervalId); 47 | env.removeCallback('error', errorCallback); 48 | resolve(); 49 | return; 50 | } 51 | if (breakerFunction()) { 52 | clearTimeout(timeoutId); 53 | clearInterval(intervalId); 54 | env.removeCallback('error', errorCallback); 55 | reject(new Error('Function was terminated by breaker')); 56 | } 57 | }); 58 | }, interval); 59 | }); 60 | } 61 | 62 | /** 63 | * Wait until event happens 64 | * @param {ChromeEnvironment} env 65 | * @param {Object} event 66 | * @param {Function} [breakerFunction] 67 | * @param {number} [timeout] 68 | * @param {number} [interval] 69 | * @returns {Promise} 70 | */ 71 | async function waitForEvent(env, event, breakerFunction, timeout = 5000, interval = 10) { 72 | const { type, urlPattern } = event; 73 | let intervalId, timeoutId; 74 | await new Promise((resolve, reject) => { 75 | const callback = { 76 | fn: ({ error }) => { 77 | clearTimeout(timeoutId); 78 | clearInterval(intervalId); 79 | if (error) { 80 | reject(error); 81 | } else { 82 | resolve(); 83 | } 84 | }, 85 | urlPattern, 86 | }; 87 | 88 | timeoutId = setTimeout(() => { 89 | env.removeCallback(type, callback); 90 | clearInterval(intervalId); 91 | reject(new Error('Page navigation timeout')); 92 | }, timeout); 93 | 94 | intervalId = setInterval(() => { 95 | if (breakerFunction()) { 96 | clearTimeout(timeoutId); 97 | clearInterval(intervalId); 98 | env.removeCallback(type, callback); 99 | reject(new Error('Function was terminated by breaker')) 100 | } 101 | }, interval); 102 | 103 | env.addCallback(type, callback); 104 | }); 105 | 106 | if (type === 'navigation') { 107 | await env._injectFiles(env._getVendors()); 108 | } 109 | } 110 | 111 | module.exports = { 112 | waitForEvent, 113 | waitForEvaluate, 114 | }; 115 | -------------------------------------------------------------------------------- /test/integration/parser.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const ChromeEnvironment = require('goose-chrome-environment'); 4 | const { createTestServer, setServerResponse, url } = require('../tools'); 5 | const Parser = require('../../lib/Parser'); 6 | 7 | jest.setTimeout(30000); 8 | describe('Parser', () => { 9 | let testServer; 10 | 11 | beforeAll(async () => { 12 | testServer = await createTestServer(); 13 | }); 14 | 15 | afterAll(async () => { 16 | await testServer.close(); 17 | }); 18 | 19 | describe('Simple', () => { 20 | test('perform', async () => { 21 | setServerResponse({ 22 | html: ``, 23 | }); 24 | const parser = new Parser({ 25 | environment: new ChromeEnvironment({ url }), 26 | }); 27 | const result = await parser.parse({ 28 | rules: { 29 | scope: '[type="text"]', 30 | attr: 'value', 31 | }, 32 | }); 33 | 34 | expect(result).toEqual('test'); 35 | }); 36 | }); 37 | 38 | describe('Collection', () => { 39 | test('perform', async () => { 40 | setServerResponse({ 41 | html: ` 42 |
43 |
UserName
44 |
UserSurname
45 |
+12345678901
46 |
47 | `, 48 | }); 49 | const parser = new Parser({ 50 | environment: new ChromeEnvironment({ url }), 51 | }); 52 | const result = await parser.parse({ 53 | rules: { 54 | scope: '.profile', 55 | collection: [ 56 | { 57 | name: 'name', 58 | scope: '.name', 59 | }, 60 | { 61 | name: 'surname', 62 | scope: '.surname', 63 | }, 64 | { 65 | name: 'phone', 66 | scope: '.phone', 67 | }, 68 | ], 69 | }, 70 | }); 71 | 72 | expect(result).toEqual({ name: 'UserName', surname: 'UserSurname', phone: '+12345678901' }); 73 | }); 74 | }); 75 | 76 | describe('Grid', () => { 77 | test('perform', async () => { 78 | setServerResponse({ 79 | html: ` 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 |
UserName1UserSurname1+12345678901
UserName2UserSurname2+12345678902
UserName3UserSurname3+12345678903
UserName4UserSurname4+12345678904
102 | `, 103 | }); 104 | const parser = new Parser({ 105 | environment: new ChromeEnvironment({ url }), 106 | }); 107 | const result = await parser.parse({ 108 | rules: { 109 | scope: '.profile', 110 | collection: [[ 111 | { 112 | name: 'name', 113 | scope: '.name', 114 | }, 115 | { 116 | name: 'surname', 117 | scope: '.surname', 118 | }, 119 | { 120 | name: 'phone', 121 | scope: '.phone', 122 | }, 123 | ]], 124 | }, 125 | }); 126 | 127 | expect(result).toEqual([ 128 | { name: 'UserName1', surname: 'UserSurname1', phone: '+12345678901' }, 129 | { name: 'UserName2', surname: 'UserSurname2', phone: '+12345678902' }, 130 | { name: 'UserName3', surname: 'UserSurname3', phone: '+12345678903' }, 131 | { name: 'UserName4', surname: 'UserSurname4', phone: '+12345678904' }, 132 | ]); 133 | }); 134 | }); 135 | }); 136 | -------------------------------------------------------------------------------- /test/unit/Transforms.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const Storage = require('../../lib/Storage'); 4 | const Transforms = require('../../lib/Transforms'); 5 | const Transform = require('../../lib/transforms/Transform'); 6 | const TransformTrim = require('../../lib/transforms/TransformTrim'); 7 | const transformsFactory = require('../../lib/transforms/transformsFactory'); 8 | 9 | jest.mock('../../lib/transforms/transformsFactory'); 10 | 11 | describe('Transforms', () => { 12 | let transforms; 13 | let storage; 14 | 15 | beforeAll(async () => { 16 | storage = new Storage({}); 17 | }); 18 | 19 | beforeEach(async () => { 20 | transformsFactory.createTransform.mockReset(); 21 | }); 22 | 23 | test('create Transforms with default params', async () => { 24 | transforms = new Transforms({}); 25 | 26 | expect(transforms._storage).toBeInstanceOf(Storage); 27 | }); 28 | 29 | test('create Transforms with predefined Storage', async () => { 30 | transforms = new Transforms({ 31 | storage, 32 | }); 33 | 34 | expect(transforms._storage).toBeInstanceOf(Storage); 35 | expect(transforms._storage).toEqual(storage); 36 | }); 37 | 38 | test('addTransform', async () => { 39 | const type = 'newTransformType'; 40 | const transformFn = () => { 41 | }; 42 | 43 | transforms = new Transforms({}); 44 | transforms.addTransform(type, transformFn); 45 | expect(transformsFactory.addTransform).toHaveBeenCalledTimes(1); 46 | expect(transformsFactory.addTransform).toHaveBeenCalledWith(type, transformFn); 47 | }); 48 | 49 | test('produce with no transforms', async () => { 50 | transforms = new Transforms({}); 51 | const value = transforms.produce(); 52 | expect(value).toEqual(''); 53 | }); 54 | 55 | test('produce with no value', async () => { 56 | transforms = new Transforms({}); 57 | const value = transforms.produce([]); 58 | expect(value).toEqual(''); 59 | }); 60 | 61 | test('produce with transforms and no value', async () => { 62 | transformsFactory.createTransform.mockImplementation((options) => { 63 | return new TransformTrim({ 64 | options: {}, 65 | value: '', 66 | }); 67 | }); 68 | 69 | transforms = new Transforms({}); 70 | const options = { 71 | options: { 72 | }, 73 | type: 'trim', 74 | }; 75 | const value = transforms.produce([options]); 76 | expect(transformsFactory.createTransform).toHaveBeenCalledTimes(1); 77 | expect(transformsFactory.createTransform).toHaveBeenCalledWith({ 78 | options, 79 | value: '', 80 | storage: new Storage(), 81 | }); 82 | expect(value).toEqual(''); 83 | }); 84 | 85 | test('produce with transforms and value', async () => { 86 | const value = ' value '; 87 | transformsFactory.createTransform.mockImplementation((options) => { 88 | return new TransformTrim({ 89 | options: {}, 90 | value, 91 | }); 92 | }); 93 | 94 | transforms = new Transforms({}); 95 | const options = { 96 | options: { 97 | }, 98 | type: 'trim', 99 | }; 100 | const result = transforms.produce([options], value); 101 | expect(transformsFactory.createTransform).toHaveBeenCalledTimes(1); 102 | expect(transformsFactory.createTransform).toHaveBeenCalledWith({ 103 | options, 104 | value, 105 | storage: new Storage(), 106 | }); 107 | expect(result).toEqual('value'); 108 | }); 109 | 110 | test('produce with transforms, value and transform returns nothing', async () => { 111 | const value = ' value '; 112 | transformsFactory.createTransform.mockImplementation((options) => { 113 | return new (class extends Transform { 114 | doTransform() { 115 | return; 116 | } 117 | })({}); 118 | }); 119 | 120 | transforms = new Transforms({}); 121 | const options = { 122 | options: { 123 | }, 124 | type: 'custom', 125 | }; 126 | const result = transforms.produce([options, options], value); 127 | expect(transformsFactory.createTransform).toHaveBeenCalledTimes(2); 128 | expect(transformsFactory.createTransform).toHaveBeenCalledWith({ 129 | options, 130 | value, 131 | storage: new Storage(), 132 | }); 133 | expect(result).toEqual(undefined); 134 | }); 135 | 136 | test('produce with wrong transform type should throw a error', async () => { 137 | transforms = new Transforms({}); 138 | const fn = () => { 139 | return transforms.produce([{ 140 | options: { 141 | }, 142 | type: 'wrongTransformType', 143 | }], ''); 144 | }; 145 | expect(fn).toThrowError(/^Unsupported transform type: wrongTransformType$/); 146 | }); 147 | }); 148 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![mr.Goose](https://i.imgur.com/e0CPF7C.png)](http://goose.show) 2 | 3 | # goose-parser 4 | 5 | [![CircleCI (all branches)](https://img.shields.io/circleci/project/github/redco/goose-parser.svg)](https://circleci.com/gh/redco/goose-parser) 6 | [![Codecov](https://img.shields.io/codecov/c/github/redco/goose-parser.svg)](https://codecov.io/gh/redco/goose-parser) 7 | [![Latest Stable Version](https://img.shields.io/npm/v/goose-parser.svg?style=flat)](https://www.npmjs.com/package/goose-parser) 8 | [![Total Downloads](https://img.shields.io/npm/dt/goose-parser.svg?style=flat)](https://www.npmjs.com/package/goose-parser) 9 | [![NPM downloads](https://badgen.net/npm/dm/goose-parser)](https://npmjs.com/package/goose-parser) 10 | 11 | This tool moves routine crawling process to the new level. 12 | Now it's possible to parse a web page for a moment. 13 | All you need is to specify parsing rules based on css selectors. It's so simple as Goose can do it. 14 | This library allows to parse such data types as Grid, Collections, and Simple objects. 15 | Parser has support of pagination by extension [goose-paginator](https://github.com/redco/goose-paginator). 16 | Also it offers you following features: *actions* to interact with the page and *transforms* to convert parsed data to friendly format. 17 | 18 | ## Goose Starter Kit 19 | Now it's easy to start with Goose, just try to use [goose-starter-kit](https://github.com/redco/goose-starter-kit) for it. 20 | 21 | ## Key features 22 | * Declarative approach for definition of parsing rules, actions and transformations. 23 | * Multi environments to run parser on the browser, PhantomJS, Chrome, JsDOM and more. 24 | * Clear code with the latest features of ES6. 25 | * Clear and consistent API with promises all the way. 26 | * Improved [Sizzle](https://sizzlejs.com) format of selectors. 27 | * Ajax and multi-pages parsing modes. 28 | * Docker Support. 29 | * It's easy extendable. 30 | 31 | ## Installation 32 | 33 | ```bash 34 | yarn add goose-parser goose-chrome-environment 35 | ``` 36 | 37 | ## Usage 38 | 39 | ```JS 40 | const Parser = require('goose-parser'); 41 | const ChromeEnvironment = require('goose-chrome-environment'); 42 | 43 | const env = new ChromeEnvironment({ 44 | url: 'https://www.google.com/search?q=goose-parser', 45 | }); 46 | 47 | const parser = new Parser({ environment: env }); 48 | 49 | (async function () { 50 | try { 51 | const results = await parser.parse({ 52 | actions: [ 53 | { 54 | type: 'wait', 55 | timeout: 10 * 1000, 56 | scope: '.srg>.g', 57 | parentScope: 'body' 58 | } 59 | ], 60 | rules: { 61 | scope: '.srg>.g', 62 | collection: [[ 63 | { 64 | name: 'url', 65 | scope: 'h3.r>a', 66 | attr: 'href', 67 | }, 68 | { 69 | name: 'text', 70 | scope: 'h3.r>a', 71 | } 72 | ]] 73 | } 74 | }); 75 | console.log(results); 76 | } catch (e) { 77 | console.log('Error occurred:'); 78 | console.log(e.stack); 79 | } 80 | })(); 81 | ``` 82 | 83 | ## Environment 84 | This is a special atmosphere where Parser has to be executed. The main purpose of an environment is to provide a method for evaluating JS on the page. 85 | Goose supports following environments: 86 | * [PhantomJS](https://github.com/redco/goose-phantom-environment) (executes in NodeJS) 87 | * [Chrome](https://github.com/redco/goose-chrome-environment) (executes in NodeJS) 88 | * [JSDom](https://github.com/redco/goose-jsdom-environment) (executes in NodeJS) 89 | * FireFox (coming soon) 90 | * [Browser](https://github.com/redco/goose-browser-environment) (executes in Browser) 91 | 92 | ## Docker usage 93 | 94 | For now it's available to run goose-parser as a docker service. 95 | 96 | **Params:** 97 | 98 | * *url* - first param is an url to parser 99 | * *Parsing rules* [optional] - Rules to parse. It's optional, if *--rules-file* specified. 100 | 101 | **Options:** 102 | 103 | * -e "DEBUG=*" - to enable debug mode and see all what happens inside the goose-parser. Reed more about debug [here](https://www.npmjs.com/package/debug). 104 | * *--rules-file* - to specify rules file. Be aware that you need to mount a folder with rules as a volume to the docker container. 105 | 106 | There are two options to run it: 107 | 108 | ### Process parsing from the user input 109 | 110 | ```bash 111 | docker run -it --rm -e "DEBUG=*,-puppeteer:*" redcode/goose-parser:chrome-1.1.3-parser-0.6.0\ 112 | https://www.google.com/search?q=goose-parser\ 113 | '{ 114 | "actions": [ 115 | { 116 | "type": "wait", 117 | "scope": ".g" 118 | } 119 | ], 120 | "rules": { 121 | "scope": ".g", 122 | "collection": [ 123 | [ 124 | { 125 | "scope": ".r>a h3", 126 | "name": "name" 127 | }, 128 | { 129 | "scope": ".r>a:eq(0)", 130 | "name": "link", 131 | "attr": "href" 132 | } 133 | ] 134 | ] 135 | } 136 | }' 137 | ``` 138 | 139 | ### Process parsing from the mounted file with parsing rules 140 | 141 | Create a file `rules/rules.json` which contains parser rules and run following command: 142 | 143 | ```bash 144 | docker run -it --rm --volume="`pwd`/rules:/app/rules:ro" -e "DEBUG=*,-puppeteer:*" redcode/goose-parser:chrome-1.1.3-parser-0.6.0 --rules-file="/app/rules/rules.json" 'https://www.google.com/search?q=goose-parser' 145 | ``` 146 | 147 | ## Documentation 148 | Based on the code you can find detailed documentation about [actions](https://github.com/redco/goose-parser/tree/master/lib/actions) and [transformations](https://github.com/redco/goose-parser/tree/master/lib/transforms) 149 | 150 | API reference - coming soon 151 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable */ 2 | // For a detailed explanation regarding each configuration property, visit: 3 | // https://jestjs.io/docs/en/configuration.html 4 | 5 | module.exports = { 6 | // All imported modules in your tests should be mocked automatically 7 | // automock: false, 8 | 9 | // Stop running tests after the first failure 10 | // bail: false, 11 | 12 | // Respect "browser" field in package.json when resolving modules 13 | // browser: false, 14 | 15 | // The directory where Jest should store its cached dependency information 16 | // cacheDirectory: "/var/folders/lh/zbzlty6553zd84f0nm6k7ywh0000gn/T/jest_dx", 17 | 18 | // Automatically clear mock calls and instances between every test 19 | // clearMocks: false, 20 | 21 | // Indicates whether the coverage information should be collected while executing the test 22 | // collectCoverage: false, 23 | 24 | // An array of glob patterns indicating a set of files for which coverage information should be collected 25 | collectCoverageFrom: [ 26 | 'lib/**', 27 | ], 28 | 29 | // The directory where Jest should output its coverage files 30 | // coverageDirectory: null, 31 | 32 | // An array of regexp pattern strings used to skip coverage collection 33 | // coveragePathIgnorePatterns: [ 34 | // "/node_modules/" 35 | // ], 36 | 37 | // A list of reporter names that Jest uses when writing coverage reports 38 | // coverageReporters: [ 39 | // "json", 40 | // "text", 41 | // "lcov", 42 | // "clover" 43 | // ], 44 | 45 | // An object that configures minimum threshold enforcement for coverage results 46 | // coverageThreshold: null, 47 | 48 | // Make calling deprecated APIs throw helpful error messages 49 | // errorOnDeprecated: false, 50 | 51 | // Force coverage collection from ignored files usin a array of glob patterns 52 | // forceCoverageMatch: [], 53 | 54 | // A path to a module which exports an async function that is triggered once before all test suites 55 | // globalSetup: null, 56 | 57 | // A path to a module which exports an async function that is triggered once after all test suites 58 | // globalTeardown: null, 59 | 60 | // A set of global variables that need to be available in all test environments 61 | // globals: {}, 62 | 63 | // An array of directory names to be searched recursively up from the requiring module's location 64 | // moduleDirectories: [ 65 | // "node_modules" 66 | // ], 67 | 68 | // An array of file extensions your modules use 69 | // moduleFileExtensions: [ 70 | // "js", 71 | // "json", 72 | // "jsx", 73 | // "node" 74 | // ], 75 | 76 | // A map from regular expressions to module names that allow to stub out resources with a single module 77 | // moduleNameMapper: {}, 78 | 79 | // An array of regexp pattern strings, matched against all module paths before considered 'visible' to the module loader 80 | // modulePathIgnorePatterns: [], 81 | 82 | // Activates notifications for test results 83 | // notify: false, 84 | 85 | // An enum that specifies notification mode. Requires { notify: true } 86 | // notifyMode: "always", 87 | 88 | // A preset that is used as a base for Jest's configuration 89 | // preset: null, 90 | 91 | // Run tests from one or more projects 92 | // projects: null, 93 | 94 | // Use this configuration option to add custom reporters to Jest 95 | // reporters: undefined, 96 | 97 | // Automatically reset mock state between every test 98 | // resetMocks: false, 99 | 100 | // Reset the module registry before running each individual test 101 | // resetModules: false, 102 | 103 | // A path to a custom resolver 104 | // resolver: null, 105 | 106 | // Automatically restore mock state between every test 107 | // restoreMocks: false, 108 | 109 | // The root directory that Jest should scan for tests and modules within 110 | // rootDir: null, 111 | 112 | // A list of paths to directories that Jest should use to search for files in 113 | // roots: [ 114 | // "" 115 | // ], 116 | 117 | // Allows you to use a custom runner instead of Jest's default test runner 118 | // runner: "jest-runner", 119 | 120 | // The paths to modules that run some code to configure or set up the testing environment before each test 121 | // setupFiles: [], 122 | 123 | // The path to a module that runs some code to configure or set up the testing framework before each test 124 | // setupTestFrameworkScriptFile: './test/beforeAll.js', 125 | 126 | // A list of paths to snapshot serializer modules Jest should use for snapshot testing 127 | // snapshotSerializers: [], 128 | 129 | // The test environment that will be used for testing 130 | testEnvironment: 'node', 131 | 132 | // Options that will be passed to the testEnvironment 133 | // testEnvironmentOptions: {}, 134 | 135 | // Adds a location field to test results 136 | // testLocationInResults: false, 137 | 138 | // The glob patterns Jest uses to detect test files 139 | // testMatch: [ 140 | // "**/__tests__/**/*.js?(x)", 141 | // "**/?(*.)+(spec|test).js?(x)" 142 | // ], 143 | 144 | // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped 145 | testPathIgnorePatterns: [ 146 | '/node_modules/', 147 | '/config/', 148 | ], 149 | 150 | // The regexp pattern Jest uses to detect test files 151 | // testRegex: "", 152 | 153 | // This option allows the use of a custom results processor 154 | // testResultsProcessor: null, 155 | 156 | // This option allows use of a custom test runner 157 | // testRunner: "jasmine2", 158 | 159 | // This option sets the URL for the jsdom environment. It is reflected in properties such as location.href 160 | // testURL: "about:blank", 161 | 162 | // Setting this value to "fake" allows the use of fake timers for functions such as "setTimeout" 163 | // timers: "real", 164 | 165 | // A map from regular expressions to paths to transformers 166 | // transform: null, 167 | 168 | // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation 169 | // transformIgnorePatterns: [ 170 | // "/node_modules/" 171 | // ], 172 | 173 | // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them 174 | // unmockedModulePathPatterns: undefined, 175 | 176 | // Indicates whether each individual test should be reported during the run 177 | // verbose: null, 178 | 179 | // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode 180 | // watchPathIgnorePatterns: [], 181 | 182 | // Whether to use watchman for file crawling 183 | // watchman: true, 184 | }; 185 | -------------------------------------------------------------------------------- /lib/Actions.js: -------------------------------------------------------------------------------- 1 | const debug = require('debug')('Actions'); 2 | const merge = require('lodash.merge'); 3 | const Storage = require('./Storage'); 4 | const actionsFactory = require('./actions/actionsFactory'); 5 | const wait = require('./tools/wait'); 6 | 7 | class Actions { 8 | /** 9 | * @param {Object} options 10 | * @param {AbstractEnvironment} options.environment 11 | * @param {Parser} options.parser 12 | * @param {Storage} options.storage 13 | */ 14 | constructor(options) { 15 | this._env = options.environment; 16 | this._parser = options.parser; 17 | this._storage = options.storage || new Storage(); 18 | } 19 | 20 | /** 21 | * Perform parsing rule 22 | * @param {Rule} rule 23 | * @param {string?} parentSelector 24 | * @returns {Promise} 25 | */ 26 | async performForRule(rule, parentSelector) { 27 | const actions = rule.actions; 28 | const possibleErrors = rule.catchError || {}; 29 | 30 | if (!actions) { 31 | return Promise.resolve(); 32 | } 33 | 34 | try { 35 | return this.performActions(actions, parentSelector); 36 | } catch (e) { 37 | debug('Catching possible errors %o', possibleErrors); 38 | if (!(e instanceof Error) || !possibleErrors[e.name]) { 39 | debug('Handler for %o not found', e); 40 | throw e; 41 | } 42 | 43 | return this._handleError(possibleErrors[e.name], args); 44 | } 45 | } 46 | 47 | /** 48 | * Handle action error 49 | * @param {object} handlerOptions 50 | * @param {number} handlerOptions.handler Handler name 51 | * @param {number} handlerOptions.attempts Number of attempts before cancel with error 52 | * @param {number} handlerOptions.__attempt Current attempt number 53 | * @param actionArgs 54 | * @return {Promise} 55 | * @private 56 | */ 57 | async _handleError(handlerOptions, actionArgs) { 58 | debug('Handle error with rules %o', handlerOptions); 59 | switch (handlerOptions.handler) { 60 | case 'repeat': 61 | handlerOptions.__attempt = handlerOptions.__attempt || 0; 62 | if (++handlerOptions.__attempt > handlerOptions.attempts) { 63 | throw new Error('Max attempts limit exceeded'); 64 | } 65 | const result = await this.performForRule(...actionArgs); 66 | delete handlerOptions.__attempt; 67 | return result; 68 | 69 | default: 70 | throw new Error('Unknown handler ' + handlerOptions.handler); 71 | } 72 | } 73 | 74 | /** 75 | * Perform parsing rule 76 | * @param {Rule} rule 77 | * @param {string} parentSelector 78 | * @returns {Promise} 79 | */ 80 | async performPostActionsForRule(rule, parentSelector) { 81 | const actions = rule.postActions; 82 | 83 | if (!actions) { 84 | return Promise.resolve(); 85 | } 86 | 87 | return this.performActions(actions, parentSelector); 88 | } 89 | 90 | /** 91 | * Perform array of actions 92 | * @param {Array} actions 93 | * @param {string} [parentSelector] 94 | * @returns {Promise} 95 | */ 96 | async performActions(actions, parentSelector) { 97 | if (!Array.isArray(actions)) { 98 | throw new Error('actions must be an Array'); 99 | } 100 | 101 | debug('Perform actions %o', actions); 102 | 103 | if (!parentSelector) { 104 | parentSelector = 'body'; 105 | debug('Parent scope switched to %s', parentSelector); 106 | } 107 | 108 | return actions.reduce(async (promise, action) => { 109 | if (action.once && action.__done) { 110 | return promise; 111 | } 112 | 113 | const prevResult = await promise; 114 | const result = await this.performAction(action, parentSelector, prevResult); 115 | action.__done = true; 116 | return result; 117 | }, Promise.resolve()); 118 | } 119 | 120 | /** 121 | * @param {ActionOptions} action 122 | * @param {string} parentSelector 123 | * @param {?*} prevResult 124 | * @returns {Promise} 125 | */ 126 | async performAction(action, parentSelector, prevResult) { 127 | const selector = (action.parentScope || parentSelector || '') + ' ' + (action.scope || ''); 128 | debug('Perform action %o for generated selector %s', action, selector); 129 | 130 | let waitForPromise = Promise.resolve(); 131 | if (action.waitForPage || action.type === 'back') { 132 | waitForPromise = this.performAction({ 133 | type: 'waitForPage', 134 | timeout: action.waitForPageTimeout 135 | }, parentSelector, prevResult); 136 | } 137 | 138 | if (action.waitForQuery) { 139 | const waitAction = merge({}, action.waitForQuery, { 140 | type: this.TYPES.WAIT_FOR_QUERY 141 | }); 142 | waitForPromise = this.performAction(waitAction, parentSelector, prevResult); 143 | } 144 | 145 | if (action.waitFor) { 146 | let waitFor = typeof action.waitFor === 'string' ? 147 | { type: action.waitFor } : action.waitFor; 148 | waitFor = merge({}, waitFor, { 149 | type: `waitFor${waitFor.type.charAt(0).toUpperCase() + waitFor.type.slice(1)}`, 150 | }); 151 | waitForPromise = this.performAction(waitFor, parentSelector, prevResult); 152 | } 153 | 154 | if (action.cases && action.type !== 'cases') { 155 | waitForPromise = this.performAction({ 156 | type: 'cases', 157 | cases: action.cases, 158 | }, parentSelector, prevResult); 159 | } 160 | 161 | // mutation for if-then-else action 162 | if (action.conditions) { 163 | action.type = 'condition'; 164 | } 165 | 166 | const actionInstance = this._createInstance(action, selector, parentSelector, prevResult); 167 | 168 | if (!actionInstance) { 169 | Promise.reject(new Error('Unknown action type: ' + action.type)); 170 | return; 171 | } 172 | 173 | let result = await actionInstance.perform(); 174 | const actionResult = await waitForPromise; 175 | 176 | // mutation for transform action 177 | if (action.transform) { 178 | result = this._parser.transform(result, action.transform); 179 | } 180 | 181 | // mutation for set action 182 | if (action.set) { 183 | this._storage.set(action.set, result); 184 | } 185 | 186 | return actionResult || result; 187 | } 188 | 189 | /** 190 | * @param action 191 | * @param selector 192 | * @param parentSelector 193 | * @param prevResult 194 | * @return {Action} 195 | * @private 196 | */ 197 | _createInstance(action, selector, parentSelector, prevResult) { 198 | return actionsFactory.createAction({ 199 | selector, 200 | actionOptions: action, 201 | parentSelector, 202 | prevResult, 203 | env: this._env, 204 | parser: this._parser, 205 | actions: this 206 | }); 207 | } 208 | 209 | /** 210 | * Add custom action 211 | * @param {string} type 212 | * @param {Function} action 213 | */ 214 | addAction(type, action) { 215 | actionsFactory.addAction(type, action); 216 | } 217 | 218 | async click(selector) { 219 | return this.performAction({ 220 | type: 'click', 221 | scope: selector 222 | }, ''); 223 | } 224 | 225 | /** 226 | * Perform page scroll-down 227 | * @param {number} interval 228 | * @returns {Promise} 229 | */ 230 | async scroll(interval) { 231 | debug('scroll %s px', interval); 232 | return this._env.evaluateJs(interval, /* istanbul ignore next */ function (interval) { 233 | document.body.scrollTop += interval; 234 | }); 235 | } 236 | } 237 | 238 | module.exports = Actions; 239 | -------------------------------------------------------------------------------- /test/unit/transforms/transforms.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const Storage = require('../../../lib/Storage'); 4 | const TransformTrim = require('../../../lib/transforms/TransformTrim'); 5 | const TransformBase64Decode = require('../../../lib/transforms/TransformBase64Decode'); 6 | const TransformSplit = require('../../../lib/transforms/TransformSplit'); 7 | const TransformCombine = require('../../../lib/transforms/TransformCombine'); 8 | const TransformCompare = require('../../../lib/transforms/TransformCompare'); 9 | const TransformDecodeHtml = require('../../../lib/transforms/TransformDecodeHtml'); 10 | const TransformDecodeUri = require('../../../lib/transforms/TransformDecodeUri'); 11 | const TransformEncodeUri = require('../../../lib/transforms/TransformEncodeUri'); 12 | const TransformEqual = require('../../../lib/transforms/TransformEqual'); 13 | const TransformGet = require('../../../lib/transforms/TransformGet'); 14 | const TransformJoin = require('../../../lib/transforms/TransformJoin'); 15 | const TransformMatch = require('../../../lib/transforms/TransformMatch'); 16 | const TransformPick = require('../../../lib/transforms/TransformPick'); 17 | const TransformPluck = require('../../../lib/transforms/TransformPluck'); 18 | const TransformReplace = require('../../../lib/transforms/TransformReplace'); 19 | const TransformDate = require('../../../lib/transforms/TransformDate'); 20 | 21 | jest.mock('../../../lib/Storage'); 22 | 23 | describe('Transforms', () => { 24 | let transform; 25 | let storage; 26 | 27 | beforeAll(async () => { 28 | storage = new Storage({}); 29 | }); 30 | 31 | describe('TransformTrim', () => { 32 | test('perform with string', async () => { 33 | transform = new TransformTrim({ 34 | value: ' test ', 35 | }); 36 | 37 | expect(transform.doTransform()).toEqual('test'); 38 | }); 39 | 40 | test('perform with non-string', async () => { 41 | transform = new TransformTrim({ 42 | value: 12345, 43 | }); 44 | 45 | expect(transform.doTransform()).toEqual(12345); 46 | }); 47 | }); 48 | 49 | describe('TransformBase64Decode', () => { 50 | test('perform', async () => { 51 | transform = new TransformBase64Decode({ 52 | value: 'dGVzdA==', 53 | }); 54 | 55 | expect(transform.doTransform()).toEqual('test'); 56 | }); 57 | }); 58 | 59 | describe('TransformSplit', () => { 60 | test('perform with all default values', async () => { 61 | transform = new TransformSplit({ 62 | value: '123,345,678', 63 | }); 64 | 65 | expect(transform.doTransform()).toEqual('123'); 66 | }); 67 | 68 | test('perform with specified index', async () => { 69 | transform = new TransformSplit({ 70 | value: '123,345,678', 71 | options: { 72 | index: 1, 73 | }, 74 | }); 75 | 76 | expect(transform.doTransform()).toEqual('345'); 77 | }); 78 | 79 | test('perform with wrong index', async () => { 80 | transform = new TransformSplit({ 81 | value: '123,345,678', 82 | options: { 83 | index: 5, 84 | }, 85 | }); 86 | 87 | expect(transform.doTransform()).toEqual(null); 88 | }); 89 | 90 | test('perform with specified separator', async () => { 91 | transform = new TransformSplit({ 92 | value: '123:345:678', 93 | options: { 94 | separator: ':', 95 | }, 96 | }); 97 | 98 | expect(transform.doTransform()).toEqual('123'); 99 | }); 100 | 101 | test('perform with specified separator as regex', async () => { 102 | transform = new TransformSplit({ 103 | value: '123:345:678', 104 | options: { 105 | separator: [':', 'ui'], 106 | }, 107 | }); 108 | 109 | expect(transform.doTransform()).toEqual('123'); 110 | }); 111 | 112 | test('perform with incorrect value', async () => { 113 | transform = new TransformSplit({ 114 | value: ['123'], 115 | }); 116 | 117 | expect(transform.doTransform()).toEqual(''); 118 | }); 119 | 120 | test('perform with specified dataType', async () => { 121 | transform = new TransformSplit({ 122 | value: '123,345,678', 123 | options: { 124 | dataType: 'array', 125 | }, 126 | }); 127 | 128 | expect(transform.doTransform()).toEqual([ 129 | '123', 130 | '345', 131 | '678', 132 | ]); 133 | }); 134 | }); 135 | 136 | describe('TransformCombine', () => { 137 | test('perform with default values', async () => { 138 | transform = new TransformCombine({}); 139 | 140 | expect(transform.doTransform()).toEqual([]); 141 | }); 142 | 143 | test('perform with data', async () => { 144 | const data = { 145 | 'one': '1', 146 | 'two': '2', 147 | }; 148 | storage.get.mockClear(); 149 | storage.get.mockImplementation((key) => data[key]); 150 | transform = new TransformCombine({ 151 | options: { 152 | fields: [ 153 | 'one', 154 | 'two', 155 | ], 156 | }, 157 | storage, 158 | }); 159 | 160 | expect(transform.doTransform()).toEqual(['1', '2']); 161 | 162 | expect(storage.get).toHaveBeenCalledTimes(2); 163 | expect(storage.get).toHaveBeenCalledWith('one'); 164 | expect(storage.get).toHaveBeenCalledWith('two'); 165 | }); 166 | 167 | const checkCombineWithDataType = (dataType, result) => { 168 | const data = { 169 | 'one': '1', 170 | 'two': '2', 171 | }; 172 | storage.get.mockClear(); 173 | storage.get.mockImplementation((key) => data[key]); 174 | transform = new TransformCombine({ 175 | options: { 176 | fields: [ 177 | 'one', 178 | 'two', 179 | ], 180 | dataType, 181 | }, 182 | storage, 183 | }); 184 | 185 | expect(transform.doTransform()).toEqual(result); 186 | 187 | expect(storage.get).toHaveBeenCalledTimes(2); 188 | expect(storage.get).toHaveBeenCalledWith('one'); 189 | expect(storage.get).toHaveBeenCalledWith('two'); 190 | }; 191 | 192 | test('perform with data and data dataType=int.integer', async () => { 193 | checkCombineWithDataType('int', [1, 2]); 194 | checkCombineWithDataType('integer', [1, 2]); 195 | }); 196 | 197 | test('perform with data and data dataType=float,number,double', async () => { 198 | checkCombineWithDataType('float', [1.0, 2.0]); 199 | checkCombineWithDataType('double', [1.0, 2.0]); 200 | checkCombineWithDataType('number', [1.0, 2.0]); 201 | }); 202 | }); 203 | 204 | describe('TransformCompare', () => { 205 | test('perform with data returning true value', async () => { 206 | const data = { 207 | 'one': '1', 208 | }; 209 | storage.get.mockClear(); 210 | storage.get.mockImplementation((key) => data[key]); 211 | transform = new TransformCompare({ 212 | options: { 213 | field: 'one', 214 | }, 215 | value: '1', 216 | storage, 217 | }); 218 | 219 | expect(transform.doTransform()).toEqual(true); 220 | 221 | expect(storage.get).toHaveBeenCalledTimes(1); 222 | expect(storage.get).toHaveBeenCalledWith('one'); 223 | }); 224 | 225 | test('perform with data returning false value', async () => { 226 | const data = { 227 | 'one': '1', 228 | }; 229 | storage.get.mockClear(); 230 | storage.get.mockImplementation((key) => data[key]); 231 | transform = new TransformCompare({ 232 | options: { 233 | field: 'two', 234 | }, 235 | value: '1', 236 | storage, 237 | }); 238 | 239 | expect(transform.doTransform()).toEqual(false); 240 | 241 | expect(storage.get).toHaveBeenCalledTimes(1); 242 | expect(storage.get).toHaveBeenCalledWith('two'); 243 | }); 244 | }); 245 | 246 | describe('TransformDecodeHtml', () => { 247 | test('perform', async () => { 248 | transform = new TransformDecodeHtml({ 249 | value: '<>"&©®', 250 | }); 251 | 252 | expect(transform.doTransform()).toEqual('<>"&©®'); 253 | }); 254 | }); 255 | 256 | describe('TransformDecodeUri', () => { 257 | test('perform', async () => { 258 | transform = new TransformDecodeUri({ 259 | value: 'https://www.google.com/?q=goose-parser%20is%20a%20library%20for%20parsing', 260 | }); 261 | 262 | expect(transform.doTransform()).toEqual('https://www.google.com/?q=goose-parser is a library for parsing'); 263 | }); 264 | }); 265 | 266 | describe('TransformEncodeUri', () => { 267 | test('perform', async () => { 268 | transform = new TransformEncodeUri({ 269 | value: 'https://www.google.com/?q=goose-parser is a library for parsing', 270 | }); 271 | 272 | expect(transform.doTransform()).toEqual('https://www.google.com/?q=goose-parser%20is%20a%20library%20for%20parsing'); 273 | }); 274 | }); 275 | 276 | describe('TransformEqual', () => { 277 | test('perform with returning true', async () => { 278 | transform = new TransformEqual({ 279 | value: 'one', 280 | options: { 281 | value: 'one', 282 | }, 283 | }); 284 | 285 | expect(transform.doTransform()).toEqual(true); 286 | }); 287 | 288 | test('perform returning false', async () => { 289 | transform = new TransformEqual({ 290 | value: 'one', 291 | options: { 292 | value: 'two', 293 | }, 294 | }); 295 | 296 | expect(transform.doTransform()).toEqual(false); 297 | }); 298 | }); 299 | 300 | describe('TransformGet', () => { 301 | test('perform', async () => { 302 | transform = new TransformGet({ 303 | value: { 304 | one: { 305 | two: 'three', 306 | }, 307 | }, 308 | options: { 309 | path: 'one.two', 310 | }, 311 | }); 312 | 313 | expect(transform.doTransform()).toEqual('three'); 314 | }); 315 | 316 | test('perform wrong path without default', async () => { 317 | transform = new TransformGet({ 318 | value: { 319 | one: { 320 | two: 'three', 321 | }, 322 | }, 323 | options: { 324 | path: 'one.three', 325 | }, 326 | }); 327 | 328 | expect(transform.doTransform()).toEqual(''); 329 | }); 330 | 331 | test('perform wrong path with default', async () => { 332 | transform = new TransformGet({ 333 | value: { 334 | one: { 335 | two: 'three', 336 | }, 337 | }, 338 | options: { 339 | path: 'one.three', 340 | default: 'four', 341 | }, 342 | }); 343 | 344 | expect(transform.doTransform()).toEqual('four'); 345 | }); 346 | }); 347 | 348 | describe('TransformJoin', () => { 349 | test('perform without glue', async () => { 350 | transform = new TransformJoin({ 351 | value: ['one', 'two', 'three'], 352 | }); 353 | 354 | expect(transform.doTransform()).toEqual('one two three'); 355 | }); 356 | 357 | test('perform with glue', async () => { 358 | transform = new TransformJoin({ 359 | value: ['one', 'two', 'three'], 360 | options: { 361 | glue: ', ', 362 | }, 363 | }); 364 | 365 | expect(transform.doTransform()).toEqual('one, two, three'); 366 | }); 367 | 368 | test('perform with wrong value', async () => { 369 | transform = new TransformJoin({ 370 | value: 'one two', 371 | }); 372 | 373 | expect(transform.doTransform()).toEqual('one two'); 374 | }); 375 | }); 376 | 377 | describe('TransformMatch', () => { 378 | test('perform with default value', async () => { 379 | transform = new TransformMatch({ 380 | value: 'one/two/three', 381 | options: { 382 | re: [ 383 | '^([^/]+)/([^/]+)/([^/]+)$', 384 | ], 385 | }, 386 | }); 387 | 388 | expect(transform.doTransform()).toEqual('one/two/three'); 389 | }); 390 | 391 | test('perform with specified index', async () => { 392 | transform = new TransformMatch({ 393 | value: 'one/two/three', 394 | options: { 395 | re: [ 396 | '^([^/]+)/([^/]+)/([^/]+)$', 397 | ], 398 | index: 1, 399 | }, 400 | }); 401 | 402 | expect(transform.doTransform()).toEqual('one'); 403 | }); 404 | 405 | test('perform with index=any returning true', async () => { 406 | transform = new TransformMatch({ 407 | value: 'one/two/three', 408 | options: { 409 | re: [ 410 | '^([^/]+)/([^/]+)/([^/]+)$', 411 | ], 412 | index: 'any', 413 | }, 414 | }); 415 | 416 | expect(transform.doTransform()).toEqual(true); 417 | }); 418 | 419 | test('perform with index=any returning false', async () => { 420 | transform = new TransformMatch({ 421 | value: 'one/two/three', 422 | options: { 423 | re: [ 424 | '^([^/]+)\\.([^/]+)\\.([^/]+)$', 425 | ], 426 | index: 'any', 427 | }, 428 | }); 429 | 430 | expect(transform.doTransform()).toEqual(false); 431 | }); 432 | 433 | test('perform with index=all', async () => { 434 | transform = new TransformMatch({ 435 | value: 'one', 436 | options: { 437 | re: [ 438 | '(.+)', 439 | 'g', 440 | ], 441 | index: 'all', 442 | }, 443 | }); 444 | expect(transform.doTransform()).toEqual(['one']); 445 | }); 446 | 447 | test('perform returning no matches', async () => { 448 | transform = new TransformMatch({ 449 | value: 'one', 450 | options: { 451 | re: [ 452 | 'two', 453 | ], 454 | }, 455 | }); 456 | expect(transform.doTransform()).toEqual(null); 457 | }); 458 | 459 | test('perform with wrong value', async () => { 460 | transform = new TransformMatch({ 461 | value: ['one'], 462 | options: { 463 | re: [ 464 | 'one', 465 | ], 466 | }, 467 | }); 468 | expect(transform.doTransform()).toEqual(null); 469 | }); 470 | 471 | test('perform with wrong index', async () => { 472 | transform = new TransformMatch({ 473 | value: 'one', 474 | options: { 475 | re: [ 476 | 'one', 477 | ], 478 | index: 7, 479 | }, 480 | }); 481 | expect(transform.doTransform()).toEqual(null); 482 | }); 483 | 484 | test('perform with index as array', async () => { 485 | transform = new TransformMatch({ 486 | value: 'one', 487 | options: { 488 | re: [ 489 | 'one', 490 | ], 491 | index: [0, 7], 492 | }, 493 | }); 494 | expect(transform.doTransform()).toEqual('one'); 495 | }); 496 | }); 497 | 498 | describe('TransformPick', () => { 499 | test('perform', async () => { 500 | transform = new TransformPick({ 501 | value: { 502 | one: '1', 503 | two: '2', 504 | three: '3', 505 | }, 506 | options: { 507 | prop: ['one', 'three'], 508 | }, 509 | }); 510 | 511 | expect(transform.doTransform()).toEqual({ 512 | one: '1', 513 | three: '3', 514 | }); 515 | }); 516 | }); 517 | 518 | describe('TransformPluck', () => { 519 | test('perform', async () => { 520 | transform = new TransformPluck({ 521 | value: [ 522 | { 'user': 'barney', 'age': 36 }, 523 | { 'user': 'fred', 'age': 40 }, 524 | ], 525 | options: { 526 | path: 'user', 527 | }, 528 | }); 529 | 530 | expect(transform.doTransform()).toEqual(['barney', 'fred']); 531 | }); 532 | }); 533 | 534 | describe('TransformReplace', () => { 535 | test('perform with correct values', async () => { 536 | transform = new TransformReplace({ 537 | value: 'one/two/three', 538 | options: { 539 | re: ['^([^/]+)/([^/]+)/([^/]+)$'], 540 | to: '$1.four.$3', 541 | }, 542 | }); 543 | 544 | expect(transform.doTransform()).toEqual('one.four.three'); 545 | }); 546 | 547 | test('perform with value as non string', async () => { 548 | transform = new TransformReplace({ 549 | value: ['one/two/three'], 550 | options: { 551 | re: ['^([^/]+)/([^/]+)/([^/]+)$'], 552 | to: '$1.four.$3', 553 | }, 554 | }); 555 | 556 | expect(transform.doTransform()).toEqual(''); 557 | }); 558 | 559 | test('perform with incorrect `re`', async () => { 560 | transform = new TransformReplace({ 561 | value: 'one/two/three', 562 | options: { 563 | re: '^([^/]+)/([^/]+)/([^/]+)$', 564 | to: '$1.four.$3', 565 | }, 566 | }); 567 | 568 | const fn = () => { 569 | return transform.doTransform(); 570 | }; 571 | 572 | expect(fn).toThrowError(/^You must pass an array as `re` to `replace` transform$/); 573 | }); 574 | }); 575 | 576 | describe('TransformDate', () => { 577 | test('perform with default locale', async () => { 578 | transform = new TransformDate({ 579 | value: '01.12.2018', 580 | options: { 581 | from: 'DD.MM.YYYY', 582 | to: 'MM-DD-YYYY', 583 | }, 584 | }); 585 | 586 | expect(transform.doTransform()).toEqual('12-01-2018'); 587 | }); 588 | 589 | test('perform with locale=en', async () => { 590 | transform = new TransformDate({ 591 | value: '01.12.2018', 592 | options: { 593 | from: 'DD.MM.YYYY', 594 | to: 'MM-DD-YYYY', 595 | locale: 'en', 596 | }, 597 | }); 598 | 599 | expect(transform.doTransform()).toEqual('12-01-2018'); 600 | }); 601 | }); 602 | }); 603 | -------------------------------------------------------------------------------- /lib/Parser.js: -------------------------------------------------------------------------------- 1 | const debugLib = require('debug'); 2 | const debug = debugLib('Parser'); 3 | const clone = require('lodash.clone'); 4 | const Actions = require('./Actions'); 5 | const Transforms = require('./Transforms'); 6 | const Storage = require('./Storage'); 7 | const Scope = require('./Scope'); 8 | const { waitForEvaluate } = require('./tools/wait'); 9 | const MAX_MILESTONE_ATTEMPTS = 2; 10 | 11 | /** 12 | * @typedef {object} Rule 13 | * @property {?string} scope 14 | * @property {?string} parentScope 15 | * @property {?string} jsScope 16 | * @property {?string} jsParentScope 17 | * @property {string} name 18 | * @property {?Array.} actions 19 | * @property {?Array.} postActions 20 | * @property {?(Grid|Collection)} collection 21 | * @property {?Array.} transform 22 | * @property {?boolean} rulesFromActions 23 | * @property {?string} separator 24 | * @property {?string} type 25 | * @property {?string} attr 26 | * @property {?string} prop 27 | * @property {?number} child 28 | * @property {?boolean|Function} id 29 | * @property {?boolean} inject 30 | * @property {?number} injectionTimeout 31 | * @property {?object} catchError 32 | * @property {string} get 33 | * @property {string} set 34 | * @property {string} add 35 | * @property {string} unset 36 | * @property {*} value 37 | * 38 | */ 39 | 40 | /** 41 | * @typedef {object} ActionOptions 42 | * @property {string} type 43 | * @property {?string} scope 44 | * @property {?string} parentScope 45 | * @property {?string} jsScope 46 | * @property {?string} jsParentScope 47 | * @property {?object} waitFor 48 | * @property {string} waitFor.type 49 | * @property {?object} waitForQuery 50 | * @property {string} waitForQuery.uri pattern of uri which will be awaiting 51 | * @property {string} waitForQuery.timeout 52 | * @property {?boolean} waitForPage 53 | * @property {?number} waitForPageTimeout 54 | * @property {?boolean} once 55 | * @property {?boolean} __done - set after action was performed first time 56 | * @property {?Array.} cases 57 | * @property {?Array.} conditions 58 | * @property {?Array.} transform 59 | * @property {?string} set 60 | * @property {?object} change 61 | * @property {?boolean} useActionsResult 62 | */ 63 | 64 | /** 65 | * @typedef {ActionOptions} WaitAction 66 | * @property {?number} timeout 67 | */ 68 | 69 | /** 70 | * @typedef {Array.} Collection 71 | */ 72 | 73 | /** 74 | * @typedef {Array.>} Grid 75 | */ 76 | 77 | /** 78 | * @typedef {object} TransformOptions 79 | * @property {string} type 80 | */ 81 | 82 | /** 83 | * type=date 84 | * @typedef {TransformOptions} DateTransform 85 | * @property {?string} locale 86 | * @property {string} from - date format for parsing 87 | * @property {string} to - desired date format 88 | */ 89 | 90 | /** 91 | * type=replace 92 | * @typedef {TransformOptions} ReplaceTransform 93 | * @property {?string} locale 94 | * @property {Array.} re - args for RegExp 95 | * @property {string} to - string to replace to 96 | */ 97 | 98 | const RULE_TYPE = { 99 | SIMPLE: 'simple', 100 | COLLECTION: 'collection', 101 | GRID: 'grid', 102 | ACTIONS_RESULT: 'actionsResult', 103 | GET: 'get', 104 | VALUE: 'value', 105 | INJECTION: 'injection' 106 | }; 107 | 108 | const PARSING_MODE = { 109 | SINGLE: 'single', 110 | MULTIPLE: 'multiple', 111 | }; 112 | 113 | class Parser { 114 | /** 115 | * @param {object} options 116 | * @param {AbstractEnvironment} options.environment 117 | * @param {?Paginator} options.paginator 118 | * @param {?boolean} options.clearDom 119 | */ 120 | constructor(options) { 121 | if (!options.environment) { 122 | throw new Error('\'environment\' should be specified'); 123 | } 124 | 125 | this._env = options.environment; 126 | this._paginator = options.paginator; 127 | this.clearDom = options.clearDom || false; 128 | this.mode = options.mode || 'single'; 129 | this._domScope = new Scope(); 130 | this._jsScope = new Scope(); 131 | 132 | /** 133 | * @type {?Rule} 134 | * @private 135 | */ 136 | this._rules = null; 137 | 138 | /** 139 | * @type {Array} 140 | * @private 141 | */ 142 | this._preActions = null; 143 | 144 | this._storage = new Storage({ 145 | 'environment:options': this._env.getOptions() 146 | }); 147 | 148 | this._actions = new Actions({ 149 | environment: this._env, 150 | parser: this, 151 | storage: this._storage 152 | }); 153 | 154 | if (this._paginator) { 155 | this._paginator 156 | .setEnvironment(this._env) 157 | .setActions(this._actions); 158 | } 159 | 160 | this._transforms = new Transforms({storage: this._storage}); 161 | } 162 | 163 | /** 164 | * @param {object} options 165 | * @param {Rule} options.rules 166 | * @param {String} options.url 167 | * @param {Array.} options.actions 168 | * @param {Array.} options.transform 169 | * @returns {Promise} 170 | */ 171 | async parse(options = {}) { 172 | debug('.parse() has called'); 173 | this._rules = options.rules || {}; 174 | this._preActions = options.actions || null; 175 | 176 | let results; 177 | 178 | try { 179 | await this._env.prepare(); 180 | if (this.mode === PARSING_MODE.MULTIPLE && options.url) { 181 | await this._env.goto(options.url); 182 | } 183 | 184 | if (this._paginator) { 185 | this._paginator.reset(); 186 | } 187 | 188 | if (this._preActions) { 189 | await this._actions.performActions(this._preActions); 190 | } 191 | 192 | results = await this._parseRootRule(); 193 | 194 | if (options.transform) { 195 | results = this._transforms.produce(options.transform, results); 196 | } 197 | 198 | if (this._paginator) { 199 | results = await this._paginate(results); 200 | } 201 | if (this.mode === PARSING_MODE.SINGLE) { 202 | await this.finish(); 203 | } 204 | } catch (e) { 205 | await this.finish(); 206 | throw e; 207 | } 208 | 209 | return results; 210 | } 211 | 212 | async finish() { 213 | try { 214 | await this._env.tearDown(); 215 | } catch (e) { 216 | try { 217 | await this._env.snapshot('error'); 218 | await this._env.tearDown(); 219 | } catch (snapshotError) { 220 | await this._env.tearDown(); 221 | } 222 | throw e; 223 | } 224 | } 225 | 226 | async moveYourFeet(stages) { 227 | debug('Hit the road!'); 228 | const milestones = stages.milestones; 229 | const edgeCases = stages.edgeCases; 230 | 231 | try { 232 | await this._env.prepare(); 233 | await milestones.reduce(async (promise, milestone) => { 234 | await promise; 235 | return this.passMilestone(milestone, edgeCases); 236 | }, Promise.resolve()); 237 | 238 | await this._env.tearDown(); 239 | } catch (e) { 240 | await this._env.snapshot('error'); 241 | this._env.tearDown(); 242 | throw e; 243 | } 244 | } 245 | 246 | async passMilestone(milestone, edgeCases, attemptNumber) { 247 | attemptNumber = attemptNumber || 0; 248 | debug('Passing %o milestone, attempt #%s', milestone, attemptNumber); 249 | 250 | try { 251 | const result = await this._actions.performActions(milestone.condition); 252 | if (!result) { 253 | debug('Milestone condition failed'); 254 | return this.catchFailedMilestone(milestone, edgeCases, attemptNumber, 'Milestone condition failed'); 255 | } 256 | 257 | debug('Milestone condition passed, passing milestone'); 258 | return this.processRule(milestone.rules, 0); 259 | } catch (e) { 260 | debug('Caught milestone error %o', e.stack || e); 261 | return this.catchFailedMilestone(milestone, edgeCases, attemptNumber, e); 262 | } 263 | } 264 | 265 | async catchFailedMilestone(milestone, edgeCases, attemptNumber, originalError) { 266 | debug('Catching failing milestone'); 267 | if (attemptNumber > MAX_MILESTONE_ATTEMPTS) { 268 | throw new Error(`Milestone failed more than ${MAX_MILESTONE_ATTEMPTS} times, original error: ${originalError.stack || originalError}`); 269 | } 270 | 271 | const edgeCasesHandled = await this.handleEdgeCases(edgeCases); 272 | if (!edgeCasesHandled) { 273 | debug('Catching edge cases failed'); 274 | return this.catchFailedMilestone(milestone, edgeCases, attemptNumber + 1, originalError); 275 | } 276 | 277 | debug('Edge case handled, another try to pass milestone'); 278 | return this.passMilestone(milestone, edgeCases, attemptNumber + 1); 279 | } 280 | 281 | async handleEdgeCases(edgeCases) { 282 | return edgeCases.reduce(async (promise, edgeCase) => { 283 | const result = await promise; 284 | if (result) { 285 | return Promise.resolve(result); 286 | } 287 | 288 | return this.handleEdgeCase(edgeCase); 289 | }, Promise.resolve(false)); 290 | } 291 | 292 | async handleEdgeCase(edgeCase) { 293 | debug('Handling edge case %o', edgeCase); 294 | 295 | try { 296 | const result = await this._actions.performActions(edgeCase.condition); 297 | if (!result) { 298 | debug('Edge case condition failed'); 299 | return false; 300 | } 301 | 302 | debug('Edge case condition is true, trying to handle the case'); 303 | return this.processRule(edgeCase.rules, 0); 304 | } catch (e) { 305 | debug('Caught edge case error %o', e.stack || e); 306 | return false; 307 | } 308 | } 309 | 310 | /** 311 | * @see {@link Actions#addAction} 312 | */ 313 | addAction(type, action) { 314 | return this._actions.addAction(type, action); 315 | } 316 | 317 | /** 318 | * @see {@link Transforms#addTransform} 319 | */ 320 | addTransform(type, transform) { 321 | return this._transforms.addTransform(type, transform) 322 | } 323 | 324 | /** 325 | * @param {number} [offset] 326 | * @returns {Promise} 327 | * @private 328 | */ 329 | async _parseRootRule(offset) { 330 | offset = offset || 0; 331 | debug('Parsing root rule with offset: %s', offset); 332 | return this.processRule(this._rules, offset); 333 | } 334 | 335 | /** 336 | * @param {Rule} rule 337 | * @param {number} [offset] 338 | * @returns {Promise} 339 | */ 340 | async processRule(rule, offset) { 341 | debug('Process rule %o', rule); 342 | let scopePushed = false; 343 | let jsScopePushed = false; 344 | if (rule.jsScope) { 345 | this._jsScope.push(rule.jsScope, rule.jsParentScope); 346 | jsScopePushed = true; 347 | } 348 | if (rule.scope) { 349 | this._domScope.push(rule.scope, rule.parentScope); 350 | scopePushed = true; 351 | } 352 | const domSelector = this._domScope.getSelector(); 353 | 354 | const actionsResult = await this._actions.performForRule(rule, domSelector); 355 | let actionsScopePushed = false; 356 | let actionsJsScopePushed = false; 357 | if (rule.rulesFromActions) { 358 | if (!actionsResult) { 359 | throw new Error('Rule node marked with "rulesFromActions" flag should return rules from action. Got nothing.'); 360 | } 361 | debug('Rules extracted from action %o', rule); 362 | // use child transform or parent transform or nothing 363 | actionsResult.transform = actionsResult.transform || rule.transform || false; 364 | if (!('inject' in actionsResult)) { 365 | actionsResult.inject = rule.inject; 366 | } 367 | if ('scope' in actionsResult) { 368 | this._domScope.push(actionsResult.scope, actionsResult.parentScope); 369 | actionsScopePushed = true; 370 | } 371 | if ('jsScope' in actionsResult) { 372 | this._jsScope.push(actionsResult.jsScope, actionsResult.jsParentScope); 373 | actionsJsScopePushed = true; 374 | } 375 | 376 | rule = actionsResult; 377 | 378 | if ('actions' in rule) { 379 | await this._actions.performForRule(rule, domSelector); 380 | } 381 | } 382 | 383 | const results = await this._parseScope(rule, offset, actionsResult); 384 | 385 | if (actionsScopePushed) { 386 | this._domScope.pop(); 387 | } 388 | if (actionsJsScopePushed) { 389 | this._jsScope.pop(); 390 | } 391 | 392 | await this._actions.performPostActionsForRule.bind(this._actions, rule, domSelector); 393 | 394 | if (scopePushed) { 395 | this._domScope.pop(); 396 | } 397 | if (jsScopePushed) { 398 | this._jsScope.pop(); 399 | } 400 | 401 | return results; 402 | } 403 | 404 | /** 405 | * Parse a scope 406 | * @param {Rule} rule parsing rule 407 | * @param {number} [offset] offset for GridRule 408 | * @param {*} [actionsResults] 409 | * @returns {Promise} 410 | * @private 411 | */ 412 | async _parseScope(rule, offset, actionsResults) { 413 | let results; 414 | const ruleType = this._getRuleType(rule); 415 | debug('Parse %s rule', ruleType); 416 | switch (ruleType) { 417 | case RULE_TYPE.ACTIONS_RESULT: 418 | results = actionsResults; 419 | break; 420 | 421 | case RULE_TYPE.GET: 422 | results = this._storage.get(rule.get); 423 | break; 424 | 425 | case RULE_TYPE.VALUE: 426 | results = rule.value; 427 | break; 428 | 429 | case RULE_TYPE.GRID: 430 | results = await this._parseGridRule(rule, offset); 431 | break; 432 | 433 | case RULE_TYPE.COLLECTION: 434 | results = await this._parseCollectionRule(rule); 435 | break; 436 | 437 | case RULE_TYPE.SIMPLE: 438 | results = await this._parseSimpleRule(rule); 439 | break; 440 | 441 | case RULE_TYPE.INJECTION: 442 | results = await this._injectBrowserRule(rule, offset, actionsResults); 443 | break; 444 | } 445 | 446 | const extract = (results, ruleType, dataType) => { 447 | if ( 448 | ruleType === RULE_TYPE.SIMPLE && 449 | dataType === 'array' && 450 | Array.isArray(results) && 451 | results.length === 1 && 452 | Array.isArray(results[0]) 453 | ) { 454 | debug('Extracted %o', results[0]); 455 | return results[0]; 456 | } 457 | 458 | return results; 459 | }; 460 | 461 | const format = results => { 462 | if ([RULE_TYPE.SIMPLE, RULE_TYPE.GET, RULE_TYPE.VALUE, RULE_TYPE.COLLECTION].includes(ruleType)) { 463 | if (Array.isArray(results) && rule.type !== 'array') { 464 | return results.length === 1 ? results[0] : results.join(rule.separator || ' '); 465 | } 466 | if (!Array.isArray(results) && rule.type === 'array') { 467 | return [results]; 468 | } 469 | } 470 | 471 | return results; 472 | }; 473 | 474 | const updateResultsInStore = results => { 475 | if (rule.set) { 476 | this._storage.set(rule.set, results); 477 | } 478 | if (rule.add) { 479 | const current = this._storage.get(rule.add) || []; 480 | current.push(results); 481 | this._storage.set(rule.add, current); 482 | } 483 | if (rule.unset) { 484 | this._storage.unset(rule.unset); 485 | } 486 | return results; 487 | }; 488 | 489 | if (!rule.transform) { 490 | results = format(results); 491 | return updateResultsInStore(results); 492 | } 493 | 494 | results = format(this.transform(results, rule.transform)); 495 | results = extract(results, ruleType, rule.type); 496 | return updateResultsInStore(results); 497 | } 498 | 499 | /** 500 | * Perform transformation on results 501 | * @param results 502 | * @param transform 503 | * @returns {*} 504 | */ 505 | transform(results, transform) { 506 | if (Array.isArray(results)) { 507 | results = results.map((result) => { 508 | if (typeof result === 'string') { 509 | result = result.trim(); 510 | } 511 | return this._transforms.produce(transform, result); 512 | }, this); 513 | } else { 514 | results = this._transforms.produce(transform, results); 515 | } 516 | 517 | return results; 518 | } 519 | 520 | /** 521 | * Get rule type 522 | * @param {Object} rule 523 | * @returns {string} 524 | */ 525 | _getRuleType(rule) { 526 | if (rule.inject) { 527 | return RULE_TYPE.INJECTION; 528 | } 529 | 530 | if (rule.useActionsResult) { 531 | return RULE_TYPE.ACTIONS_RESULT; 532 | } 533 | 534 | if (rule.get) { 535 | return RULE_TYPE.GET; 536 | } 537 | 538 | if (typeof rule.value !== 'undefined') { 539 | return RULE_TYPE.VALUE; 540 | } 541 | 542 | const isCollection = Array.isArray(rule.collection); 543 | if (isCollection) { 544 | if (Array.isArray(rule.collection[0])) { 545 | return RULE_TYPE.GRID; 546 | } 547 | 548 | return RULE_TYPE.COLLECTION; 549 | } 550 | 551 | return RULE_TYPE.SIMPLE; 552 | } 553 | 554 | /** 555 | * Parse Grid rule 556 | * @param {Rule} rule 557 | * @param {number} [offset] 558 | * @returns {Promise} 559 | * @private 560 | */ 561 | async _parseGridRule(rule, offset) { 562 | debug('._parseGridRule() has called'); 563 | offset = offset || 0; 564 | const maxItems = rule.maxItems || null; 565 | const collection = rule.collection[0]; 566 | let nodesCount = await this._env.evaluateJs( 567 | this._domScope.getSelector(), 568 | this._jsScope.getSelector(), 569 | /* istanbul ignore next */ function(domSelector, jsSelector) { 570 | var domResult = domSelector && Sizzle(domSelector).length; 571 | if (domSelector) { 572 | return domResult; 573 | } 574 | var jsObject = jsSelector && eval(jsSelector); 575 | if (jsObject && Array.isArray(jsObject)) { 576 | return jsObject.length; 577 | } 578 | }); 579 | if (!nodesCount) { 580 | return []; 581 | } 582 | if (maxItems && nodesCount > maxItems) { 583 | nodesCount = maxItems; 584 | } 585 | debug('parsing %s nodes', nodesCount); 586 | 587 | const scope = this._domScope.pop(); 588 | const jsScope = this._jsScope.pop(); 589 | const results = await this._parseRow({ 590 | collection: collection, 591 | nodesCount: nodesCount - 1 - offset, 592 | offset: offset, 593 | scope: scope, 594 | jsScope: jsScope, 595 | results: [] 596 | }); 597 | if (scope) { 598 | this._domScope.push(scope.scope, scope.parentScope); 599 | } 600 | if (jsScope) { 601 | this._jsScope.push(jsScope.scope, jsScope.parentScope); 602 | } 603 | 604 | debug('._parseGridRule() results %o', results); 605 | return results; 606 | } 607 | 608 | /** 609 | * Parse row of Grid rule 610 | * @param {object} options 611 | * @returns {Promise} 612 | * @private 613 | */ 614 | async _parseRow(options) { 615 | const {scope, jsScope} = options; 616 | const domSelector = scope ? scope.scope + ':eq(' + options.offset + ')' : null; 617 | const jsSelector = jsScope ? jsScope.scope + '[' + options.offset + ']' : null; 618 | debug('._parseRow() has called for %s | %s', domSelector, jsSelector); 619 | if (domSelector) { 620 | this._domScope.push(domSelector, scope.parentScope); 621 | } 622 | if (jsSelector) { 623 | this._jsScope.push(jsSelector, jsSelector.parentScope); 624 | } 625 | 626 | const row = await this._parseCollectionRule({ 627 | collection: options.collection 628 | }); 629 | options.results.push(row); 630 | if (domSelector) { 631 | this._domScope.pop(); 632 | } 633 | if (jsSelector) { 634 | this._jsScope.pop(); 635 | } 636 | 637 | options.nodesCount--; 638 | if (options.nodesCount >= 0) { 639 | options.offset++; 640 | return this._parseRow(options); 641 | } 642 | 643 | const results = options.results; 644 | if (this.clearDom) { 645 | debug('clear parsed dom for %s', domSelector); 646 | await this._env.evaluateJs(domSelector, /* istanbul ignore next */ function(domSelector) { 647 | const parsedElement = Sizzle(domSelector)[0]; 648 | if (!parsedElement) { 649 | return; 650 | } 651 | const boundingRect = parsedElement.getBoundingClientRect(); 652 | parsedElement.innerHTML = ''; 653 | parsedElement.style.height = boundingRect.height + 'px'; 654 | parsedElement.style.width = boundingRect.width + 'px'; 655 | }); 656 | } 657 | return results; 658 | } 659 | 660 | async _injectBrowserRule(rule, offset, actionsResults) { 661 | debug('._injectBrowserRule()'); 662 | let internalGooseResults, internalGooseError; 663 | await this._env.injectBrowserEnv(); 664 | await this._env.evaluateJs(rule, offset, this._domScope, function(rule, offset, scopes) { 665 | __gooseParse(rule, offset, scopes); 666 | }); 667 | await waitForEvaluate(this._env, 668 | () => { 669 | return [__gooseResults, __gooseError]; 670 | }, 671 | resultsToCheck => { 672 | internalGooseResults = resultsToCheck[0]; 673 | internalGooseError = resultsToCheck[1]; 674 | return internalGooseResults || internalGooseError; 675 | }, 676 | () => false, 677 | null, 678 | rule.injectionTimeout 679 | ); 680 | 681 | if (internalGooseError) { 682 | throw internalGooseError; 683 | } 684 | 685 | return internalGooseResults; 686 | } 687 | 688 | /** 689 | * Parse Collection rule 690 | * @param {Rule} rule 691 | * @returns {Promise} 692 | * @private 693 | */ 694 | async _parseCollectionRule(rule) { 695 | debug('._parseCollectionRule() has called for rule %o', rule); 696 | 697 | const collection = rule.collection; 698 | const results = await collection.reduce(async (accumulator, rule) => { 699 | accumulator = await accumulator; 700 | let result = await this.processRule(rule); 701 | let name; 702 | switch (typeof rule.id) { 703 | case 'boolean': 704 | name = '_id'; 705 | break; 706 | case 'function': 707 | name = '_id'; 708 | result = rule.id.call(this, rule, result); 709 | break; 710 | default: 711 | name = rule.name; 712 | } 713 | if (!rule.virtual) { 714 | accumulator[name] = result; 715 | } 716 | return accumulator; 717 | }, {}); 718 | debug('._parseCollectionRule() result %o', results); 719 | return results; 720 | } 721 | 722 | /** 723 | * @param {Rule} rule 724 | * @returns {{type: string, value: string|number}} 725 | * @private 726 | */ 727 | _getSimpleRuleFilter(rule) { 728 | const filter = { 729 | type: 'text' 730 | }; 731 | if (typeof rule.child !== 'undefined') { 732 | filter.type = 'child'; 733 | filter.value = rule.child; 734 | } else if (rule.attr) { 735 | filter.type = 'attr'; 736 | filter.value = rule.attr; 737 | } else if (rule.prop) { 738 | filter.type = 'prop'; 739 | filter.value = rule.prop; 740 | } 741 | 742 | return filter; 743 | } 744 | 745 | /** 746 | * Parse simple rule 747 | * @param {Rule} rule 748 | * @returns {Promise} 749 | * @private 750 | */ 751 | async _parseSimpleRule(rule) { 752 | const selector = this._domScope.getSelector(); 753 | const jsSelector = rule.jsScope ? this._jsScope.getSelector() : ''; 754 | const filter = this._getSimpleRuleFilter(rule); 755 | debug('._parseSimpleRule() has called for selector %s with filter %o', selector, filter); 756 | const results = await this._env.evaluateJs(selector, jsSelector, filter, /* istanbul ignore next */ function(selector, jsSelector, filter) { 757 | if (jsSelector) { 758 | const value = eval(jsSelector); 759 | return Array.isArray(value) ? value : [value]; 760 | } 761 | 762 | const nodes = Sizzle(selector); 763 | return nodes.map(function(node) { 764 | switch (filter.type) { 765 | case 'child': 766 | const childNode = node.childNodes[filter.value]; 767 | return childNode ? childNode.textContent : ''; 768 | case 'attr': 769 | if (typeof filter.value === 'object' && Array.isArray(filter.value.or)) { 770 | const res = filter.value.or.map(function(value) { 771 | return node.getAttribute(value); 772 | }).filter(Boolean); 773 | return res.pop(); 774 | } 775 | return node.getAttribute(filter.value); 776 | case 'prop': 777 | return node[filter.value]; 778 | default: 779 | return node.textContent; 780 | } 781 | }); 782 | }); 783 | if (!results) { 784 | throw new Error('Error during querying selector: ' + (selector || jsSelector)); 785 | } 786 | debug('._parseSimpleRule() result %o', results); 787 | return results; 788 | } 789 | 790 | /** 791 | * @param results 792 | * @returns {Promise.<*>} 793 | * @private 794 | */ 795 | async _paginate(results) { 796 | debug('Pagination...'); 797 | const pagination = await this._paginator.paginate(); 798 | if (pagination.done) { 799 | return results; 800 | } 801 | 802 | const offset = this._paginator.resetCollectionOffsetOnNewPage() ? 0 : results.length; 803 | 804 | const pageResults = await this._parseRootRule(offset); 805 | debug('Pagination results %o', pageResults); 806 | results = results.concat(pageResults); 807 | const maxResults = this._paginator.getMaxResultsCount() - 1; 808 | if (results.length > maxResults) { 809 | results = results.slice(0, maxResults); 810 | return results; 811 | } 812 | return this._paginate(results); 813 | } 814 | } 815 | 816 | module.exports = Parser; 817 | -------------------------------------------------------------------------------- /test/integration/actions/actions.test.js: -------------------------------------------------------------------------------- 1 | /* eslint-env jest */ 2 | 3 | const ChromeEnvironment = require('goose-chrome-environment'); 4 | const { fileExists, removeFile, createTestServer, setServerResponse, url } = require('../../tools'); 5 | const Parser = require('../../../lib/Parser'); 6 | 7 | jest.setTimeout(30000); 8 | describe('Actions', () => { 9 | let testServer; 10 | 11 | beforeAll(async () => { 12 | testServer = await createTestServer(); 13 | }); 14 | 15 | afterAll(async () => { 16 | await testServer.close(); 17 | }); 18 | 19 | describe('ActionBlur', () => { 20 | test('perform', async () => { 21 | setServerResponse({ 22 | html: ``, 23 | fn: () => { 24 | document.querySelector('[type="text"]').addEventListener('blur', ({ target }) => { 25 | const value = '1'; 26 | target.value = value; 27 | target.setAttribute('value', value); 28 | }); 29 | }, 30 | }); 31 | const parser = new Parser({ 32 | environment: new ChromeEnvironment({ url }), 33 | }); 34 | const result = await parser.parse({ 35 | actions: [ 36 | { 37 | type: 'focus', 38 | scope: '[type="text"]', 39 | }, 40 | { 41 | type: 'blur', 42 | scope: '[type="text"]', 43 | }, 44 | { 45 | type: 'pause', 46 | timeout: 50, 47 | }, 48 | ], 49 | rules: { 50 | scope: '[type="text"]', 51 | attr: 'value', 52 | }, 53 | }); 54 | 55 | expect(result).toEqual('1'); 56 | }); 57 | }); 58 | 59 | describe('ActionClick', () => { 60 | test('perform', async () => { 61 | setServerResponse({ 62 | html: ``, 63 | fn: () => { 64 | document.querySelector('[type="text"]').addEventListener('click', ({ target }) => { 65 | const value = '1'; 66 | target.value = value; 67 | target.setAttribute('value', value); 68 | }); 69 | }, 70 | }); 71 | const parser = new Parser({ 72 | environment: new ChromeEnvironment({ url }), 73 | }); 74 | const result = await parser.parse({ 75 | actions: [ 76 | { 77 | type: 'click', 78 | scope: '[type="text"]', 79 | }, 80 | { 81 | type: 'pause', 82 | timeout: 50, 83 | }, 84 | ], 85 | rules: { 86 | scope: '[type="text"]', 87 | attr: 'value', 88 | }, 89 | }); 90 | 91 | expect(result).toEqual('1'); 92 | }); 93 | }); 94 | 95 | describe('ActionMouseClick', () => { 96 | test('perform', async () => { 97 | setServerResponse({ 98 | html: ``, 99 | fn: () => { 100 | document.querySelector('[type="text"]').addEventListener('click', ({ target }) => { 101 | const value = '1'; 102 | target.value = value; 103 | target.setAttribute('value', value); 104 | }); 105 | }, 106 | }); 107 | const parser = new Parser({ 108 | environment: new ChromeEnvironment({ url }), 109 | }); 110 | const result = await parser.parse({ 111 | actions: [ 112 | { 113 | type: 'mouseClick', 114 | scope: '[type="text"]', 115 | }, 116 | { 117 | type: 'pause', 118 | timeout: 50, 119 | }, 120 | ], 121 | rules: { 122 | scope: '[type="text"]', 123 | attr: 'value', 124 | }, 125 | }); 126 | 127 | expect(result).toEqual('1'); 128 | }); 129 | }); 130 | 131 | describe('ActionMouseDown', () => { 132 | test('perform', async () => { 133 | setServerResponse({ 134 | html: ``, 135 | fn: () => { 136 | window.addEventListener('mousedown', () => { 137 | const value = '1'; 138 | const target = document.querySelector('[type="text"]'); 139 | target.value = value; 140 | target.setAttribute('value', value); 141 | }); 142 | }, 143 | }); 144 | const parser = new Parser({ 145 | environment: new ChromeEnvironment({ url }), 146 | }); 147 | const result = await parser.parse({ 148 | actions: [ 149 | { 150 | type: 'mouseDown', 151 | scope: '[type="text"]', 152 | }, 153 | { 154 | type: 'pause', 155 | timeout: 500, 156 | }, 157 | ], 158 | rules: { 159 | scope: '[type="text"]', 160 | attr: 'value', 161 | }, 162 | }); 163 | 164 | expect(result).toEqual('1'); 165 | }); 166 | }); 167 | 168 | describe('ActionMouseUp', () => { 169 | test('perform', async () => { 170 | setServerResponse({ 171 | html: ``, 172 | fn: () => { 173 | window.addEventListener('mousedown', () => { 174 | const value = '1'; 175 | const target = document.querySelector('[type="text"]'); 176 | target.value = value; 177 | target.setAttribute('value', value); 178 | }); 179 | }, 180 | }); 181 | const parser = new Parser({ 182 | environment: new ChromeEnvironment({ url }), 183 | }); 184 | const result = await parser.parse({ 185 | actions: [ 186 | { 187 | type: 'mouseDown', 188 | scope: '[type="text"]', 189 | }, 190 | { 191 | type: 'mouseUp', 192 | scope: '[type="text"]', 193 | }, 194 | { 195 | type: 'pause', 196 | timeout: 50, 197 | }, 198 | ], 199 | rules: { 200 | scope: '[type="text"]', 201 | attr: 'value', 202 | }, 203 | }); 204 | 205 | expect(result).toEqual('1'); 206 | }); 207 | }); 208 | 209 | describe('ActionFocus', () => { 210 | test('perform', async () => { 211 | setServerResponse({ 212 | html: ``, 213 | fn: () => { 214 | document.querySelector('[type="text"]').addEventListener('focus', ({ target }) => { 215 | const value = '1'; 216 | target.value = value; 217 | target.setAttribute('value', value); 218 | }); 219 | }, 220 | }); 221 | const parser = new Parser({ 222 | environment: new ChromeEnvironment({ url }), 223 | }); 224 | const result = await parser.parse({ 225 | actions: [ 226 | { 227 | type: 'focus', 228 | scope: '[type="text"]', 229 | }, 230 | { 231 | type: 'pause', 232 | timeout: 50, 233 | }, 234 | ], 235 | rules: { 236 | scope: '[type="text"]', 237 | attr: 'value', 238 | }, 239 | }); 240 | 241 | expect(result).toEqual('1'); 242 | }); 243 | }); 244 | 245 | describe('ActionClickWithWaitForPage', () => { 246 | test('perform', async () => { 247 | setServerResponse([ 248 | { 249 | route: '/', 250 | html: `test` 251 | }, 252 | { 253 | route: '/test', 254 | html: `1` 255 | } 256 | ]); 257 | const parser = new Parser({ 258 | environment: new ChromeEnvironment({ url }), 259 | }); 260 | const result = await parser.parse({ 261 | actions: [ 262 | { 263 | type: 'click', 264 | scope: 'a', 265 | waitFor: 'page', 266 | }, 267 | ], 268 | rules: { 269 | scope: 'a', 270 | }, 271 | }); 272 | 273 | expect(result).toEqual('1'); 274 | }); 275 | }); 276 | 277 | describe('ActionClickWithWaitForPattern', () => { 278 | test('perform', async () => { 279 | setServerResponse({ 280 | html: `test`, 281 | fn: () => { 282 | // the phone number appears after some time in the link 283 | document.querySelector('a').addEventListener('click', ({ target }) => { 284 | setTimeout(function () { 285 | target.setAttribute('href', 'tel:+123456890102'); 286 | }, 500); 287 | }); 288 | } 289 | }); 290 | const parser = new Parser({ 291 | environment: new ChromeEnvironment({ url }), 292 | }); 293 | const result = await parser.parse({ 294 | actions: [ 295 | { 296 | type: 'click', 297 | scope: 'a', 298 | waitFor: { 299 | type: 'pattern', 300 | pattern: '^tel:', 301 | scope: 'a', 302 | attr: 'href', 303 | }, 304 | }, 305 | ], 306 | rules: { 307 | scope: 'a', 308 | attr: 'href', 309 | }, 310 | }); 311 | 312 | expect(result).toEqual('tel:+123456890102'); 313 | }); 314 | }); 315 | 316 | describe('ActionClickWithWaitForVisible', () => { 317 | test('wait visible', async () => { 318 | setServerResponse({ 319 | html: `test`, 320 | fn: () => { 321 | document.querySelector('a').addEventListener('click', ({ target }) => { 322 | setTimeout(function () { 323 | document.body.insertAdjacentHTML('beforeend', '
12345
'); 324 | }, 500); 325 | }); 326 | } 327 | }); 328 | const parser = new Parser({ 329 | environment: new ChromeEnvironment({ url }), 330 | }); 331 | const result = await parser.parse({ 332 | actions: [ 333 | { 334 | type: 'click', 335 | scope: 'a', 336 | waitFor: { 337 | type: 'visible', 338 | scope: 'div', 339 | }, 340 | }, 341 | ], 342 | rules: { 343 | scope: 'div', 344 | }, 345 | }); 346 | 347 | expect(result).toEqual('12345'); 348 | }); 349 | 350 | test('wait invisible', async () => { 351 | setServerResponse({ 352 | html: `test
12345
`, 353 | fn: () => { 354 | document.querySelector('a').addEventListener('click', ({ target }) => { 355 | setTimeout(function () { 356 | document.querySelector('div').remove(); 357 | }, 500); 358 | }); 359 | } 360 | }); 361 | const parser = new Parser({ 362 | environment: new ChromeEnvironment({ url }), 363 | }); 364 | const result = await parser.parse({ 365 | actions: [ 366 | { 367 | type: 'click', 368 | scope: 'a', 369 | waitFor: { 370 | type: 'visible', 371 | scope: 'div', 372 | visibility: false, 373 | }, 374 | }, 375 | ], 376 | rules: { 377 | scope: 'div', 378 | }, 379 | }); 380 | 381 | expect(result).toEqual(''); 382 | }); 383 | }); 384 | 385 | describe('ActionWaitForElement', () => { 386 | test('perform', async () => { 387 | setServerResponse({ 388 | html: `test`, 389 | fn: () => { 390 | document.querySelector('a').addEventListener('click', ({ target }) => { 391 | setTimeout(function () { 392 | document.body.insertAdjacentHTML('beforeend', '
12345
'); 393 | }, 500); 394 | }); 395 | } 396 | }); 397 | const parser = new Parser({ 398 | environment: new ChromeEnvironment({ url }), 399 | }); 400 | const result = await parser.parse({ 401 | actions: [ 402 | { 403 | type: 'click', 404 | scope: 'a', 405 | waitFor: { 406 | type: 'element', 407 | scope: 'div', 408 | }, 409 | }, 410 | ], 411 | rules: { 412 | scope: 'div', 413 | }, 414 | }); 415 | 416 | expect(result).toEqual('12345'); 417 | }); 418 | }); 419 | 420 | describe('ActionClickWithWaitForQuery', () => { 421 | test('perform', async () => { 422 | setServerResponse([ 423 | { 424 | route: '/', 425 | html: `test`, 426 | fn: () => { 427 | // the phone number appears after some time in the link 428 | document.querySelector('a').addEventListener('click', ({ target }) => { 429 | document.body.insertAdjacentHTML('beforeend', ''); 430 | }); 431 | } 432 | }, 433 | { 434 | route: '/12345', 435 | html: '', 436 | }, 437 | ]); 438 | const parser = new Parser({ 439 | environment: new ChromeEnvironment({ url }), 440 | }); 441 | const result = await parser.parse({ 442 | actions: [ 443 | { 444 | type: 'click', 445 | scope: 'a', 446 | waitFor: { 447 | type: 'query', 448 | uri: '12345', 449 | }, 450 | }, 451 | ], 452 | rules: { 453 | scope: 'img', 454 | attr: 'src' 455 | }, 456 | }); 457 | 458 | expect(result).toEqual('12345'); 459 | }); 460 | }); 461 | 462 | describe('ActionChangeElement', () => { 463 | test('change style, attr', async () => { 464 | setServerResponse({ 465 | html: ``, 466 | }); 467 | const parser = new Parser({ 468 | environment: new ChromeEnvironment({ url }), 469 | }); 470 | const result = await parser.parse({ 471 | actions: [ 472 | { 473 | type: 'changeElement', 474 | scope: 'img', 475 | style: { 476 | display: 'none', 477 | }, 478 | attr: { 479 | alt: 'test', 480 | }, 481 | }, 482 | ], 483 | rules: { 484 | collection: [ 485 | { 486 | name: 'alt', 487 | scope: 'img', 488 | attr: 'alt', 489 | }, 490 | { 491 | name: 'style', 492 | scope: 'img', 493 | attr: 'style', 494 | }, 495 | ], 496 | }, 497 | }); 498 | 499 | expect(result).toEqual({ alt: 'test', style: 'display: none;' }); 500 | }); 501 | 502 | test('change nothing', async () => { 503 | setServerResponse({ 504 | html: ``, 505 | }); 506 | const parser = new Parser({ 507 | environment: new ChromeEnvironment({ url }), 508 | }); 509 | const result = await parser.parse({ 510 | actions: [ 511 | { 512 | type: 'changeElement', 513 | scope: 'img', 514 | }, 515 | ], 516 | rules: { 517 | collection: [ 518 | { 519 | name: 'alt', 520 | scope: 'img', 521 | attr: 'alt', 522 | }, 523 | { 524 | name: 'style', 525 | scope: 'img', 526 | attr: 'style', 527 | }, 528 | ], 529 | }, 530 | }); 531 | 532 | expect(result).toEqual({ alt: null, style: null }); 533 | }); 534 | }); 535 | 536 | describe('ActionType', () => { 537 | test('typing provided text', async () => { 538 | setServerResponse({ 539 | html: ``, 540 | }); 541 | const parser = new Parser({ 542 | environment: new ChromeEnvironment({ url }), 543 | }); 544 | const result = await parser.parse({ 545 | actions: [ 546 | { 547 | type: 'type', 548 | scope: 'input', 549 | text: 'test', 550 | }, 551 | ], 552 | rules: { 553 | scope: 'input', 554 | prop: 'value' 555 | }, 556 | }); 557 | 558 | expect(result).toEqual('test'); 559 | }); 560 | 561 | test('typing value from prev action', async () => { 562 | setServerResponse({ 563 | html: `test`, 564 | }); 565 | const parser = new Parser({ 566 | environment: new ChromeEnvironment({ url }), 567 | }); 568 | const result = await parser.parse({ 569 | actions: [ 570 | { 571 | type: 'parse', 572 | rules: { 573 | scope: 'span', 574 | }, 575 | }, 576 | { 577 | type: 'type', 578 | scope: 'input', 579 | useActionsResult: true, 580 | }, 581 | ], 582 | rules: { 583 | scope: 'input', 584 | prop: 'value' 585 | }, 586 | }); 587 | 588 | expect(result).toEqual('test'); 589 | }); 590 | }); 591 | 592 | describe('ActionParse', () => { 593 | test('parse nothing', async () => { 594 | setServerResponse({ 595 | html: `test`, 596 | }); 597 | const parser = new Parser({ 598 | environment: new ChromeEnvironment({ url }), 599 | }); 600 | const result = await parser.parse({ 601 | actions: [ 602 | { 603 | type: 'parse', 604 | }, 605 | { 606 | type: 'type', 607 | scope: 'input', 608 | useActionsResult: true, 609 | }, 610 | ], 611 | rules: { 612 | scope: 'input', 613 | prop: 'value' 614 | }, 615 | }); 616 | 617 | expect(result).toEqual(''); 618 | }); 619 | }); 620 | 621 | describe('ActionUrl', () => { 622 | test('fetching page url', async () => { 623 | setServerResponse({ 624 | html: `test`, 625 | }); 626 | const parser = new Parser({ 627 | environment: new ChromeEnvironment({ url }), 628 | }); 629 | const result = await parser.parse({ 630 | actions: [ 631 | { 632 | type: 'url', 633 | }, 634 | { 635 | type: 'type', 636 | scope: 'input', 637 | useActionsResult: true, 638 | }, 639 | ], 640 | rules: { 641 | scope: 'input', 642 | prop: 'value' 643 | }, 644 | }); 645 | 646 | expect(result).toEqual(url); 647 | }); 648 | }); 649 | 650 | describe('ActionProvideRules', () => { 651 | test('perform', async () => { 652 | setServerResponse({ 653 | html: `test`, 654 | }); 655 | const parser = new Parser({ 656 | environment: new ChromeEnvironment({ url }), 657 | }); 658 | const result = await parser.parse({ 659 | rules: { 660 | actions: [ 661 | { 662 | type: 'provideRules', 663 | rules: { 664 | scope: 'span', 665 | } 666 | }, 667 | ], 668 | rulesFromActions: true, 669 | }, 670 | }); 671 | 672 | expect(result).toEqual('test'); 673 | }); 674 | 675 | test('perform with no rules', async () => { 676 | setServerResponse({ 677 | html: `test`, 678 | }); 679 | const parser = new Parser({ 680 | environment: new ChromeEnvironment({ url }), 681 | }); 682 | const result = await parser.parse({ 683 | rules: { 684 | actions: [ 685 | { 686 | type: 'provideRules', 687 | }, 688 | ], 689 | rulesFromActions: true, 690 | }, 691 | }); 692 | 693 | expect(result).toEqual(''); 694 | }); 695 | }); 696 | 697 | describe('ActionExist', () => { 698 | test('perform', async () => { 699 | setServerResponse({ 700 | html: `test` 701 | }); 702 | const parser = new Parser({ 703 | environment: new ChromeEnvironment({ url }), 704 | }); 705 | const result = await parser.parse({ 706 | rules: { 707 | actions: [ 708 | { 709 | type: 'condition', 710 | if: [ 711 | { 712 | type: 'exists', 713 | scope: 'a', 714 | } 715 | ], 716 | then: [ 717 | { 718 | type: 'provideRules', 719 | rules: { 720 | scope: 'a', 721 | }, 722 | } 723 | ], 724 | }, 725 | ], 726 | rulesFromActions: true, 727 | }, 728 | }); 729 | 730 | expect(result).toEqual('test'); 731 | }); 732 | 733 | test('perform exist child', async () => { 734 | setServerResponse({ 735 | html: `globaltest` 736 | }); 737 | const parser = new Parser({ 738 | environment: new ChromeEnvironment({ url }), 739 | }); 740 | const result = await parser.parse({ 741 | rules: { 742 | actions: [ 743 | { 744 | type: 'condition', 745 | if: [ 746 | { 747 | type: 'exists', 748 | scope: 'span', 749 | child: 1, 750 | } 751 | ], 752 | then: [ 753 | { 754 | type: 'provideRules', 755 | rules: { 756 | scope: 'span', 757 | child: 1, 758 | }, 759 | } 760 | ], 761 | }, 762 | ], 763 | rulesFromActions: true, 764 | }, 765 | }); 766 | 767 | expect(result).toEqual('test'); 768 | }); 769 | }); 770 | 771 | describe('ActionCondition', () => { 772 | test('perform', async () => { 773 | setServerResponse({ 774 | html: `test` 775 | }); 776 | const parser = new Parser({ 777 | environment: new ChromeEnvironment({ url }), 778 | }); 779 | const result = await parser.parse({ 780 | rules: { 781 | actions: [ 782 | { 783 | type: 'condition', 784 | if: [ 785 | { 786 | type: 'exists', 787 | scope: 'span', 788 | } 789 | ], 790 | then: [ 791 | { 792 | type: 'provideRules', 793 | rules: { 794 | scope: 'span', 795 | }, 796 | } 797 | ], 798 | else: [ 799 | { 800 | type: 'provideRules', 801 | rules: { 802 | scope: 'a', 803 | }, 804 | } 805 | ], 806 | }, 807 | ], 808 | rulesFromActions: true, 809 | }, 810 | }); 811 | 812 | expect(result).toEqual('test'); 813 | }); 814 | 815 | test('perform', async () => { 816 | setServerResponse({ 817 | html: `test` 818 | }); 819 | const parser = new Parser({ 820 | environment: new ChromeEnvironment({ url }), 821 | }); 822 | const result = await parser.parse({ 823 | actions: [ 824 | { 825 | type: 'condition', 826 | }, 827 | ], 828 | }); 829 | 830 | expect(result).toEqual(''); 831 | }); 832 | }); 833 | 834 | describe('ActionOr', () => { 835 | test('perform', async () => { 836 | setServerResponse({ 837 | html: `test` 838 | }); 839 | const parser = new Parser({ 840 | environment: new ChromeEnvironment({ url }), 841 | }); 842 | const result = await parser.parse({ 843 | rules: { 844 | actions: [ 845 | { 846 | type: 'condition', 847 | if: [ 848 | { 849 | type: 'or', 850 | actions: [ 851 | { 852 | type: 'exists', 853 | scope: 'span', 854 | }, 855 | { 856 | type: 'exists', 857 | scope: 'a', 858 | } 859 | ] 860 | } 861 | ], 862 | then: [ 863 | { 864 | type: 'provideRules', 865 | rules: { 866 | scope: 'a', 867 | }, 868 | } 869 | ], 870 | }, 871 | ], 872 | rulesFromActions: true, 873 | }, 874 | }); 875 | 876 | expect(result).toEqual('test'); 877 | }); 878 | 879 | test('perform', async () => { 880 | setServerResponse({ 881 | html: `test` 882 | }); 883 | const parser = new Parser({ 884 | environment: new ChromeEnvironment({ url }), 885 | }); 886 | const result = await parser.parse({ 887 | rules: { 888 | actions: [ 889 | { 890 | type: 'condition', 891 | if: [ 892 | { 893 | type: 'or', 894 | actions: [ 895 | { 896 | type: 'exists', 897 | scope: 'a', 898 | }, 899 | { 900 | type: 'exists', 901 | scope: 'span', 902 | }, 903 | ] 904 | } 905 | ], 906 | then: [ 907 | { 908 | type: 'provideRules', 909 | rules: { 910 | scope: 'a', 911 | }, 912 | } 913 | ], 914 | }, 915 | ], 916 | rulesFromActions: true, 917 | }, 918 | }); 919 | 920 | expect(result).toEqual('test'); 921 | }); 922 | 923 | test('perform without or values', async () => { 924 | setServerResponse({ 925 | html: `test` 926 | }); 927 | const parser = new Parser({ 928 | environment: new ChromeEnvironment({ url }), 929 | }); 930 | const result = await parser.parse({ 931 | rules: { 932 | actions: [ 933 | { 934 | type: 'condition', 935 | if: [ 936 | { 937 | type: 'or', 938 | } 939 | ], 940 | then: [ 941 | { 942 | type: 'provideRules', 943 | rules: { 944 | scope: 'span', 945 | }, 946 | } 947 | ], 948 | else: [ 949 | { 950 | type: 'provideRules', 951 | rules: { 952 | scope: 'a', 953 | }, 954 | } 955 | ], 956 | }, 957 | ], 958 | rulesFromActions: true, 959 | }, 960 | }); 961 | 962 | expect(result).toEqual('test'); 963 | }); 964 | }); 965 | 966 | describe('ActionNot', () => { 967 | test('perform', async () => { 968 | setServerResponse({ 969 | html: `test` 970 | }); 971 | const parser = new Parser({ 972 | environment: new ChromeEnvironment({ url }), 973 | }); 974 | const result = await parser.parse({ 975 | rules: { 976 | actions: [ 977 | { 978 | type: 'condition', 979 | if: [ 980 | { 981 | type: 'not', 982 | actions: [ 983 | { 984 | type: 'exists', 985 | scope: 'span', 986 | }, 987 | ] 988 | } 989 | ], 990 | then: [ 991 | { 992 | type: 'provideRules', 993 | rules: { 994 | scope: 'a', 995 | }, 996 | } 997 | ], 998 | }, 999 | ], 1000 | rulesFromActions: true, 1001 | }, 1002 | }); 1003 | 1004 | expect(result).toEqual('test'); 1005 | }); 1006 | 1007 | test('perform without or values', async () => { 1008 | setServerResponse({ 1009 | html: `test` 1010 | }); 1011 | const parser = new Parser({ 1012 | environment: new ChromeEnvironment({ url }), 1013 | }); 1014 | const result = await parser.parse({ 1015 | rules: { 1016 | actions: [ 1017 | { 1018 | type: 'condition', 1019 | if: [ 1020 | { 1021 | type: 'not', 1022 | } 1023 | ], 1024 | then: [ 1025 | { 1026 | type: 'provideRules', 1027 | rules: { 1028 | scope: 'a', 1029 | }, 1030 | } 1031 | ], 1032 | }, 1033 | ], 1034 | rulesFromActions: true, 1035 | }, 1036 | }); 1037 | 1038 | expect(result).toEqual('test'); 1039 | }); 1040 | }); 1041 | 1042 | describe('ActionHasRedirect', () => { 1043 | test('perform', async () => { 1044 | setServerResponse([ 1045 | { 1046 | html: ``, 1047 | headers: [ 1048 | { 1049 | name: "Location", 1050 | value: `${url}test`, 1051 | } 1052 | ], 1053 | code: 302, 1054 | route: '/' 1055 | }, 1056 | { 1057 | route: '/test', 1058 | html: `test`, 1059 | } 1060 | ]); 1061 | const parser = new Parser({ 1062 | environment: new ChromeEnvironment({ url }), 1063 | }); 1064 | const result = await parser.parse({ 1065 | rules: { 1066 | actions: [ 1067 | { 1068 | type: 'condition', 1069 | if: [ 1070 | { 1071 | type: "hasRedirect" 1072 | } 1073 | ], 1074 | then: [ 1075 | { 1076 | type: 'provideRules', 1077 | rules: { 1078 | scope: 'span', 1079 | }, 1080 | } 1081 | ], 1082 | }, 1083 | ], 1084 | rulesFromActions: true, 1085 | }, 1086 | }); 1087 | 1088 | expect(result).toEqual('test'); 1089 | }); 1090 | }); 1091 | 1092 | describe('ActionBack', () => { 1093 | test('perform', async () => { 1094 | setServerResponse([ 1095 | { 1096 | route: '/', 1097 | html: `test` 1098 | }, 1099 | { 1100 | route: '/test', 1101 | html: `nothing`, 1102 | } 1103 | ]); 1104 | const parser = new Parser({ 1105 | environment: new ChromeEnvironment({ url }), 1106 | }); 1107 | const result = await parser.parse({ 1108 | actions: [ 1109 | { 1110 | type: 'click', 1111 | scope: 'a', 1112 | waitFor: 'page', 1113 | }, 1114 | { 1115 | type: 'back', 1116 | }, 1117 | ], 1118 | rules: { 1119 | scope: 'a' 1120 | }, 1121 | }); 1122 | 1123 | expect(result).toEqual('test'); 1124 | }); 1125 | }); 1126 | 1127 | describe('ActionOpen', () => { 1128 | test('perform', async () => { 1129 | setServerResponse([ 1130 | { 1131 | route: '/', 1132 | html: `test` 1133 | }, 1134 | { 1135 | route: '/test', 1136 | html: `nothing`, 1137 | } 1138 | ]); 1139 | const parser = new Parser({ 1140 | environment: new ChromeEnvironment({ url }), 1141 | }); 1142 | const result = await parser.parse({ 1143 | actions: [ 1144 | { 1145 | type: 'open', 1146 | url: `${url}test`, 1147 | }, 1148 | ], 1149 | rules: { 1150 | scope: 'a' 1151 | }, 1152 | }); 1153 | 1154 | expect(result).toEqual('nothing'); 1155 | }); 1156 | }); 1157 | 1158 | describe('ActionCases', () => { 1159 | test('perform', async () => { 1160 | setServerResponse([ 1161 | { 1162 | route: '/', 1163 | html: `test`, 1164 | fn: () => { 1165 | document.querySelector('a').addEventListener('click', () => { 1166 | setTimeout(() => { 1167 | window.location = '/test'; 1168 | }, 2000); 1169 | }); 1170 | }, 1171 | }, 1172 | { 1173 | route: '/test', 1174 | html: `nothing`, 1175 | } 1176 | ]); 1177 | const parser = new Parser({ 1178 | environment: new ChromeEnvironment({ url }), 1179 | }); 1180 | const result = await parser.parse({ 1181 | rules: { 1182 | rulesFromActions: true, 1183 | actions: [ 1184 | { 1185 | type: 'click', 1186 | scope: 'a', 1187 | cases: [ 1188 | [ 1189 | { 1190 | type: 'waitForQuery', 1191 | timeout: 5000, 1192 | uri: 'tel:', 1193 | }, 1194 | { 1195 | type: 'provideRules', 1196 | trueCase: true, 1197 | rules: { 1198 | scope: 'a' 1199 | } 1200 | } 1201 | ], 1202 | [ 1203 | { 1204 | type: 'waitForPage', 1205 | timeout: 5000, 1206 | }, 1207 | { 1208 | type: 'provideRules', 1209 | trueCase: true, 1210 | rules: { 1211 | scope: 'a' 1212 | } 1213 | } 1214 | ], 1215 | ], 1216 | } 1217 | ], 1218 | }, 1219 | }); 1220 | 1221 | expect(result).toEqual('nothing'); 1222 | }); 1223 | 1224 | test('perform', async () => { 1225 | setServerResponse([ 1226 | { 1227 | route: '/', 1228 | html: `test` 1229 | }, 1230 | { 1231 | route: '/test', 1232 | html: `nothing`, 1233 | } 1234 | ]); 1235 | const parser = new Parser({ 1236 | environment: new ChromeEnvironment({ url }), 1237 | }); 1238 | const result = await parser.parse({ 1239 | rules: { 1240 | rulesFromActions: true, 1241 | actions: [ 1242 | { 1243 | type: 'click', 1244 | scope: 'a', 1245 | cases: [ 1246 | [ 1247 | { 1248 | type: 'waitForPage', 1249 | timeout: 5000, 1250 | }, 1251 | { 1252 | type: 'provideRules', 1253 | trueCase: true, 1254 | rules: { 1255 | scope: 'a' 1256 | } 1257 | } 1258 | ], 1259 | [ 1260 | { 1261 | type: 'waitForPattern', 1262 | timeout: 5000, 1263 | pattern: '^tel:', 1264 | scope: 'a', 1265 | attr: 'href', 1266 | }, 1267 | { 1268 | type: 'provideRules', 1269 | trueCase: true, 1270 | rules: { 1271 | scope: 'a' 1272 | } 1273 | } 1274 | ], 1275 | ], 1276 | } 1277 | ], 1278 | }, 1279 | }); 1280 | 1281 | expect(result).toEqual('test'); 1282 | }); 1283 | 1284 | test('perform', async () => { 1285 | setServerResponse([ 1286 | { 1287 | route: '/', 1288 | html: `test` 1289 | }, 1290 | { 1291 | route: '/test', 1292 | html: `nothing`, 1293 | } 1294 | ]); 1295 | const parser = new Parser({ 1296 | environment: new ChromeEnvironment({ url }), 1297 | }); 1298 | const result = await parser.parse({ 1299 | rules: { 1300 | actions: [ 1301 | { 1302 | type: 'click', 1303 | scope: 'a', 1304 | cases: [ 1305 | [ 1306 | { 1307 | type: 'waitForPage', 1308 | timeout: 5000, 1309 | }, 1310 | ], 1311 | [ 1312 | { 1313 | type: 'waitForPattern', 1314 | timeout: 5000, 1315 | pattern: '^tel:', 1316 | scope: 'a', 1317 | attr: 'href', 1318 | }, 1319 | ], 1320 | ], 1321 | } 1322 | ], 1323 | }, 1324 | }); 1325 | 1326 | expect(result).toEqual(''); 1327 | }); 1328 | 1329 | test('perform with error', async () => { 1330 | setServerResponse([ 1331 | { 1332 | route: '/', 1333 | html: `test` 1334 | }, 1335 | { 1336 | route: '/test', 1337 | html: `nothing`, 1338 | } 1339 | ]); 1340 | const parser = new Parser({ 1341 | environment: new ChromeEnvironment({ url }), 1342 | }); 1343 | try { 1344 | await parser.parse({ 1345 | rules: { 1346 | actions: [ 1347 | { 1348 | type: 'click', 1349 | scope: 'a', 1350 | cases: [ 1351 | [ 1352 | { 1353 | type: 'waitForPage', 1354 | timeout: 500, 1355 | }, 1356 | ], 1357 | [ 1358 | { 1359 | type: 'waitForPattern', 1360 | timeout: 500, 1361 | pattern: '^http:', 1362 | scope: 'a', 1363 | attr: 'href', 1364 | }, 1365 | ], 1366 | ], 1367 | } 1368 | ], 1369 | }, 1370 | }); 1371 | } catch (err) { 1372 | expect(Array.isArray(err)).toEqual(true); 1373 | expect(err.length).toEqual(2); 1374 | err.forEach(errItem => { 1375 | expect(errItem).toBeInstanceOf(Error); 1376 | }); 1377 | } 1378 | }); 1379 | 1380 | test('perform', async () => { 1381 | setServerResponse([ 1382 | { 1383 | route: '/', 1384 | html: `test`, 1385 | fn: () => { 1386 | document.querySelector('a').addEventListener('click', () => { 1387 | setTimeout(() => { 1388 | window.location = '/test'; 1389 | }, 1000); 1390 | }); 1391 | }, 1392 | }, 1393 | { 1394 | route: '/test', 1395 | html: `nothing`, 1396 | } 1397 | ]); 1398 | const parser = new Parser({ 1399 | environment: new ChromeEnvironment({ url }), 1400 | }); 1401 | const result = await parser.parse({ 1402 | rules: { 1403 | rulesFromActions: true, 1404 | actions: [ 1405 | { 1406 | type: 'click', 1407 | scope: 'a', 1408 | cases: [ 1409 | [ 1410 | { 1411 | type: 'waitForQuery', 1412 | timeout: 2000, 1413 | uri: 'tel:', 1414 | }, 1415 | { 1416 | type: 'provideRules', 1417 | rules: { 1418 | scope: 'a' 1419 | } 1420 | } 1421 | ], 1422 | [ 1423 | { 1424 | type: 'waitForPage', 1425 | timeout: 2000, 1426 | }, 1427 | { 1428 | type: 'provideRules', 1429 | rules: { 1430 | scope: 'a' 1431 | } 1432 | } 1433 | ], 1434 | ], 1435 | } 1436 | ], 1437 | }, 1438 | }); 1439 | 1440 | expect(result).toEqual('nothing'); 1441 | }); 1442 | 1443 | test('perform with error', async () => { 1444 | setServerResponse([ 1445 | { 1446 | route: '/', 1447 | html: `test`, 1448 | fn: () => { 1449 | setTimeout(function () { 1450 | document.body.insertAdjacentHTML('beforeend', '
12345
'); 1451 | }, 500); 1452 | }, 1453 | }, 1454 | { 1455 | route: '/test', 1456 | html: `nothing`, 1457 | }, 1458 | ]); 1459 | const parser = new Parser({ 1460 | environment: new ChromeEnvironment({ url }), 1461 | }); 1462 | try { 1463 | await parser.parse({ 1464 | rules: { 1465 | actions: [ 1466 | { 1467 | type: 'click', 1468 | scope: 'a', 1469 | cases: [ 1470 | [ 1471 | { 1472 | type: 'waitForVisible', 1473 | scope: 'div', 1474 | timeout: 2000, 1475 | }, 1476 | ], 1477 | [ 1478 | { 1479 | type: 'waitForPattern', 1480 | timeout: 2000, 1481 | pattern: '^http:', 1482 | scope: 'a', 1483 | attr: 'href', 1484 | }, 1485 | ], 1486 | ], 1487 | }, 1488 | ], 1489 | }, 1490 | }); 1491 | } catch (err) { 1492 | expect(Array.isArray(err)).toEqual(true); 1493 | expect(err.length).toEqual(2); 1494 | err.forEach(errItem => { 1495 | expect(errItem).toBeInstanceOf(Error); 1496 | }); 1497 | } 1498 | }); 1499 | }); 1500 | 1501 | // describe('ActionSnapshot', () => { 1502 | // test('making page snapshot', async () => { 1503 | // setServerResponse({ 1504 | // html: `snapshot`, 1505 | // }); 1506 | // const parser = new Parser({ 1507 | // environment: new ChromeEnvironment({ url, snapshot: true, snapshotDir: '/tmp' }), 1508 | // }); 1509 | // await parser.parse({ 1510 | // actions: [ 1511 | // { 1512 | // type: 'snapshot', 1513 | // name: 'test', 1514 | // }, 1515 | // ], 1516 | // }); 1517 | // 1518 | // const filePath = '/tmp/localhost/test.png'; 1519 | // const snapshotExists = await fileExists(filePath); 1520 | // expect(snapshotExists).toEqual(true); 1521 | // await removeFile(filePath); 1522 | // }); 1523 | // }); 1524 | }); 1525 | --------------------------------------------------------------------------------