├── .github └── workflows │ └── npm.yml ├── .gitignore ├── .npmignore ├── .prettierrc ├── CHANGELOG.md ├── LICENSE.md ├── README.md ├── jest.config.js ├── logo.png ├── package-lock.json ├── package.json ├── src ├── index.ts ├── scraping-actions │ ├── AddCookies.ts │ ├── Click.ts │ ├── Press.ts │ ├── Remove.ts │ ├── Scroll.ts │ ├── Select.ts │ ├── Type.ts │ ├── Wait.ts │ └── index.ts ├── scraping-strategies │ ├── ScreenshotToMap.ts │ ├── TextContentScraping.ts │ └── index.ts ├── types.ts └── utils │ ├── browser-config.ts │ └── utils.ts ├── tests ├── scraper.test.ts ├── screenshotToMap.spec.ts └── templates │ ├── actions.html │ ├── pricing.css │ └── pricing.html └── tsconfig.json /.github/workflows/npm.yml: -------------------------------------------------------------------------------- 1 | name: Publish to NPM 2 | on: 3 | push: 4 | branches: [ main, master ] 5 | 6 | jobs: 7 | test: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v3 11 | - uses: actions/setup-node@v3 12 | with: 13 | node-version: 16 14 | - run: npm ci 15 | - run: npm test 16 | build: 17 | needs: test 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v3 21 | - uses: actions/setup-node@v3 22 | with: 23 | node-version: 16 24 | - run: npm ci 25 | - run: npm run build 26 | - uses: JS-DevTools/npm-publish@v1 27 | with: 28 | token: ${{ secrets.NPM_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .github 3 | tests -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "printWidth": 100, 3 | "useTabs": true 4 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## [0.2.0] — 2022-06-13 6 | 7 | ### Added 8 | 9 | - Remove action: it removes an element from the DOM. It can be useful when dealing with consent pop-ups. 10 | 11 | ## [0.1.0] — 2022-06-11 12 | 13 | ### Added 14 | 15 | - ScreenshotToMap strategy (retrieve screenshot and maps with selectors for the specified elements). 16 | 17 | ### Changed 18 | 19 | - You can now run the scraper without a strategy. It will only run the actions and return the playwright instances of Browser, BrowserContext, and Page. 20 | 21 | ## [0.0.1] — 2022-06-10 22 | 23 | ### Added 24 | 25 | - First release 26 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Alex Ferrari 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |  2 | 3 |