├── .editorconfig ├── .env.example ├── .eslintrc.cjs ├── .github ├── CODEOWNERS ├── actions │ ├── create-env-file │ │ └── action.yml │ ├── install-dependencies │ │ └── action.yml │ └── setup-node │ │ └── action.yml ├── pull_request_template.md └── workflows │ ├── deploy.yml │ ├── pull-request.yml │ └── release.yml ├── .gitignore ├── .husky └── prepare-commit-msg ├── .releaserc ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── app_icons ├── 128x128.png ├── 16x16.png └── 48x48.png ├── config └── jest-preset.cjs ├── e2e ├── __snapshots__ │ └── scrappers.test.ts.snap ├── amazon │ ├── index.html │ └── test.yml ├── ebay │ ├── index.html │ └── test.yml ├── exhibitors-ces │ ├── index.html │ └── test.yml ├── idealista │ ├── index.html │ └── test.yml ├── immobiliare │ ├── index.html │ └── test.yml ├── imovirtual │ ├── index.html │ └── test.yml ├── linkedin-connections │ ├── index.html │ └── test.yml ├── linkedin-jobs │ ├── index.html │ └── test.yml ├── linkedin-search-people │ ├── index.html │ └── test.yml ├── netflix │ ├── index.html │ └── test.yml ├── product-hunt │ ├── index.html │ └── test.yml ├── sample.index.html ├── scrappers.test.ts ├── tik-tok-user │ ├── index.html │ └── test.yml ├── tik-tok-videos │ ├── index.html │ └── test.yml ├── trulia │ ├── index.html │ └── test.yml ├── twitter │ ├── index.html │ └── test.yml ├── wikipedia │ ├── index.html │ └── test.yml ├── yahoo-finance │ ├── index.html │ └── test.yml └── youtube │ ├── index.html │ └── test.yml ├── index.html ├── jest-puppeteer.config.cjs ├── jest.config.cjs ├── manifest.json ├── package-lock.json ├── package.json ├── public ├── empty.svg ├── fonts │ ├── Output Sans Regular.woff2 │ └── Output Sans.woff2 ├── icons │ ├── close.svg │ └── copy.svg └── logo.svg ├── src ├── App.tsx ├── background.ts ├── components │ ├── button.css │ ├── button.tsx │ ├── feedback-form.tsx │ ├── header.css │ ├── header.tsx │ ├── loading-skeleton.tsx │ ├── no-results.css │ ├── no-results.tsx │ ├── preview.css │ └── preview.tsx ├── error-codes.ts ├── index.css ├── main.tsx ├── scrappers │ ├── airbnb.yml │ ├── ajio.yml │ ├── amazon.yml │ ├── annuairevert-magasins.yml │ ├── annuairevert-produits.yml │ ├── apartments.yml │ ├── apple-newsroom.yml │ ├── autotrader-uk.yml │ ├── autotrader.yml │ ├── babymarket-list-products.yml │ ├── babymarket-product-detail-page.yml │ ├── babyone.yml │ ├── babypark-de-list-products.yml │ ├── babypark-de-product-detail-page.yml │ ├── babypark-nl-list-products.yml │ ├── babypark-nl-product-detail-page.yml │ ├── bcf.yml │ ├── bebe9.yml │ ├── bol.yml │ ├── booking.yml │ ├── bpi.yml │ ├── byggdinframtid.yml │ ├── cabral-moncada.yml │ ├── capterra-de.yml │ ├── capterra.yml │ ├── cdiscount.yml │ ├── chaperone-review-list.yml │ ├── clutch.yml │ ├── craigslist.yml │ ├── deliveroo.yml │ ├── dreambaby.yml │ ├── ebay.yml │ ├── exhibitors-ces.yml │ ├── exhibitors-expandnorthstar.yml │ ├── f6s_companies.yml │ ├── facebook.yml │ ├── g2-reviews.yml │ ├── g2-search.yml │ ├── gads-keyword-planner.yml │ ├── github.yml │ ├── google.yml │ ├── gumroad.yml │ ├── homes.yml │ ├── idealista.yml │ ├── ikea.yml │ ├── immobiliare.yml │ ├── immobilienscout24.yml │ ├── imovirtual.yml │ ├── index.ts │ ├── instagram.yml │ ├── johnlewis.yml │ ├── joinef-portfolio.yml │ ├── joinef-posts.yml │ ├── jollyroom.yml │ ├── kuanto-kusta-product.yml │ ├── kuanto-kusta.yml │ ├── linkedIn-company-profile-likes.yml │ ├── linkedin-groups.yml │ ├── linkedin-jobs.yml │ ├── linkedin-my-network.yml │ ├── linkedin-post.yml │ ├── linkedin-sales-leads-search.yml │ ├── linkedin-sales-search.yml │ ├── linkedin-saved-posts.yml │ ├── linkedin.yml │ ├── merrill.yml │ ├── milanuncios.yml │ ├── netflix.yml │ ├── notion.yml │ ├── ocean.io.yml │ ├── oddsportal-12.yml │ ├── oddsportal-1x2.yml │ ├── openviewpartners.yml │ ├── oportunity-leiloes-list.yml │ ├── oportunity-leiloes.yml │ ├── pinkorblue-list-products-it.yml │ ├── pinkorblue-list-products-nl.yml │ ├── pinkorblue-list-products.yml │ ├── pinkorblue-product-detail-page.yml │ ├── pitchbook.yml │ ├── prenatal-com.yml │ ├── prenatal-nl.yml │ ├── product-hunt.yml │ ├── qubika.yml │ ├── racius.yml │ ├── raymondjames.yml │ ├── realstate.yml │ ├── realtor.yml │ ├── recheio.yml │ ├── redfin.yml │ ├── rightmove.yml │ ├── rosaoazul-es-list-products.yml │ ├── rosaoazul-es-product-detail-page.yml │ ├── roseoubleu-fr-list-products.yml │ ├── roseoubleu-fr-product-detail-page.yml │ ├── shopify.yml │ ├── standvirtual.yml │ ├── suchen-mobile-park.yml │ ├── suchen-mobile.yml │ ├── supercoach.yml │ ├── suumo.yml │ ├── tik-tok-accounts.yml │ ├── tik-tok-video.yml │ ├── tilbudsportalen.yml │ ├── trulia.yml │ ├── twitter.yml │ ├── voeazul.yml │ ├── vrbo.yml │ ├── yahoo-finance.yml │ ├── ycombinator.yml │ ├── yellow-pages.yml │ ├── yelp.yml │ ├── youtube.yml │ ├── zillow.yml │ └── zoopla.yml ├── types.ts └── utils │ ├── chrome.ts │ ├── copy.ts │ ├── rows-api │ ├── fetch.ts │ └── report.ts │ ├── scrapperUtils.ts │ ├── scrappers │ ├── custom.ts │ ├── div-tables.ts │ ├── html-tables.ts │ └── types.ts │ └── urlUtils.ts ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: http://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | # Unix-style newlines with a newline ending every file 7 | [*] 8 | indent_style = space 9 | indent_size = 2 10 | charset = utf-8 11 | trim_trailing_whitespace = true 12 | insert_final_newline = true 13 | end_of_line = lf 14 | 15 | # editor config tools is unable to ignore longs strings or urls 16 | max_line_length = null 17 | 18 | [*.md] 19 | trim_trailing_whitespace = false 20 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | VITE_ROWS_API_KEY= 2 | VITE_SPREADSHEET_ID= 3 | VITE_TABLE_ID= 4 | VITE_TABLE_ID_USAGE= -------------------------------------------------------------------------------- /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | parser: "@typescript-eslint/parser", 3 | extends: [ 4 | "eslint:recommended", 5 | "plugin:@typescript-eslint/recommended", 6 | ], 7 | parserOptions: { 8 | ecmaVersion: 2018, 9 | sourceType: "module", 10 | ecmaFeatures: { 11 | jsx: true, 12 | }, 13 | }, 14 | }; -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @rows/fe 2 | -------------------------------------------------------------------------------- /.github/actions/create-env-file/action.yml: -------------------------------------------------------------------------------- 1 | name: Creating the .env file 2 | description: "Prepare env file in order to propagate credentials" 3 | runs: 4 | using: "composite" 5 | steps: 6 | - name: Make envfile 7 | uses: SpicyPizza/create-envfile@v2.0 8 | with: 9 | envkey_VITE_ROWS_API_KEY: ${{ inputs.rows-api-key }} 10 | envkey_VITE_SPREADSHEET_ID: ${{ inputs.spreadsheet-id }} 11 | envkey_VITE_TABLE_ID: ${{ inputs.table-id }} 12 | envkey_VITE_TABLE_ID_USAGE: ${{ inputs.usage-table-id }} 13 | file_name: .env 14 | fail_on_empty: true 15 | -------------------------------------------------------------------------------- /.github/actions/install-dependencies/action.yml: -------------------------------------------------------------------------------- 1 | name: Install dependencies 2 | 3 | description: "Install project dependencies and cache node_modules" 4 | 5 | inputs: 6 | node-auth-token: 7 | description: "NODE_AUTH_TOKEN env variable needed for dependency installations" 8 | required: true 9 | github-token: 10 | description: "GITHUB_TOKEN env variable needed for dependency installations" 11 | required: true 12 | 13 | runs: 14 | using: "composite" 15 | steps: 16 | - name: Cache dependencies 17 | id: cache-deps 18 | uses: actions/cache@v3 19 | with: 20 | path: "**/node_modules" 21 | key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} 22 | 23 | - name: Install dependencies 24 | if: steps.cache-deps.outputs.cache-hit != 'true' 25 | shell: bash 26 | run: npm ci --no-audit 27 | env: 28 | NODE_AUTH_TOKEN: ${{ inputs.node-auth-token }} 29 | GITHUB_TOKEN: ${{ inputs.github-token }} 30 | -------------------------------------------------------------------------------- /.github/actions/setup-node/action.yml: -------------------------------------------------------------------------------- 1 | name: Setup node 2 | description: "Prepare node env and SSH key (needs to be run after checkout code)" 3 | inputs: 4 | node-version: 5 | description: "Node version" 6 | required: false 7 | default: "18" 8 | 9 | runs: 10 | using: "composite" 11 | steps: 12 | - name: Setup Node.js environment 13 | uses: actions/setup-node@v4 14 | with: 15 | cache: "npm" 16 | node-version: ${{ inputs.node-version }} 17 | registry-url: https://npm.pkg.github.com/ 18 | scope: "@rows" 19 | cache-dependency-path: "**/package-lock.json" 20 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Related to 2 | 3 | 4 | 5 | ### Context 6 | 7 | 8 | 9 | ### Approach 10 | 11 | 12 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to Google Store 2 | 3 | on: workflow_dispatch 4 | 5 | jobs: 6 | test: 7 | name: E2E Tests 8 | runs-on: ubuntu-latest 9 | timeout-minutes: 20 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v4 13 | with: 14 | fetch-depth: 0 15 | 16 | - uses: ./.github/actions/setup-node 17 | - uses: ./.github/actions/install-dependencies 18 | with: 19 | node-auth-token: ${{ secrets.NODE_AUTH_TOKEN }} 20 | github-token: ${{ secrets.GITHUB_TOKEN }} 21 | 22 | - name: Run E2E tests 23 | run: node node_modules/puppeteer/install.mjs && npm run test:e2e 24 | 25 | build-and-deploy-extension: 26 | name: Build & deploy extension 27 | needs: 28 | - test 29 | runs-on: ubuntu-latest 30 | timeout-minutes: 20 31 | steps: 32 | - uses: actions/checkout@v1 33 | - uses: ./.github/actions/setup-node 34 | - uses: ./.github/actions/create-env-file 35 | with: 36 | rows-api-key: ${{ secrets.ROWS_API_KEY }} 37 | spreadsheet-id: ${{ secrets.SPREADSHEET_ID }} 38 | table-id: ${{ secrets.TABLE_ID }} 39 | usage-table-id: ${{ secrets.USAGE_TABLE_ID }} 40 | - uses: ./.github/actions/install-dependencies 41 | with: 42 | node-auth-token: ${{ secrets.NODE_AUTH_TOKEN }} 43 | github-token: ${{ secrets.GITHUB_TOKEN }} 44 | - name: Build chrome extension dist files 45 | run: | 46 | npm run build 47 | cd dist 48 | zip -r ../chrome-extension-${{ github.sha }}.zip * 49 | - name: Archive extension build 50 | uses: actions/upload-artifact@v4 51 | with: 52 | name: extension-dist-content 53 | path: dist 54 | - name: Upload & release 55 | uses: mnao305/chrome-extension-upload@v5.0.0 56 | with: 57 | file-path: ./chrome-extension-${{ github.sha }}.zip 58 | extension-id: ${{ secrets.EXTENSION_ID }} 59 | client-id: ${{ secrets.CLIENT_ID }} 60 | client-secret: ${{ secrets.CLIENT_SECRET }} 61 | refresh-token: ${{ secrets.REFRESH_TOKEN }} 62 | publish: true 63 | -------------------------------------------------------------------------------- /.github/workflows/pull-request.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request 2 | 3 | on: 4 | pull_request: 5 | types: [opened, reopened, synchronize, ready_for_review] 6 | 7 | jobs: 8 | audit: 9 | name: Audit dependencies 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v4 14 | 15 | - uses: ./.github/actions/setup-node 16 | 17 | - name: Audit dependencies 18 | run: npm audit --production --audit-level=high 19 | 20 | static-analysis: 21 | name: Static analysis 22 | runs-on: ubuntu-latest 23 | timeout-minutes: 20 24 | steps: 25 | - name: Checkout code 26 | uses: actions/checkout@v4 27 | with: 28 | fetch-depth: 0 29 | 30 | - uses: ./.github/actions/setup-node 31 | 32 | - uses: ./.github/actions/install-dependencies 33 | with: 34 | node-auth-token: ${{ secrets.NODE_AUTH_TOKEN }} 35 | github-token: ${{ secrets.GITHUB_TOKEN }} 36 | 37 | - name: Linting JS/TS 38 | run: npm run lint 39 | 40 | - name: Type checking 41 | run: npm run check-types 42 | 43 | test: 44 | name: E2E Tests 45 | runs-on: ubuntu-latest 46 | timeout-minutes: 20 47 | steps: 48 | - name: Checkout code 49 | uses: actions/checkout@v4 50 | with: 51 | fetch-depth: 0 52 | 53 | - uses: ./.github/actions/setup-node 54 | 55 | - uses: ./.github/actions/install-dependencies 56 | with: 57 | node-auth-token: ${{ secrets.NODE_AUTH_TOKEN }} 58 | github-token: ${{ secrets.GITHUB_TOKEN }} 59 | 60 | - name: Run E2E tests 61 | run: node node_modules/puppeteer/install.mjs && npm run test:e2e 62 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | audit: 10 | name: Audit dependencies 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout code 14 | uses: actions/checkout@v4 15 | 16 | - uses: ./.github/actions/setup-node 17 | 18 | - name: Audit dependencies 19 | run: npm audit --production --audit-level=high 20 | 21 | static-analysis: 22 | name: Static analysis 23 | runs-on: ubuntu-latest 24 | steps: 25 | - name: Checkout code 26 | uses: actions/checkout@v4 27 | with: 28 | fetch-depth: 0 29 | 30 | - uses: ./.github/actions/setup-node 31 | 32 | - uses: ./.github/actions/install-dependencies 33 | with: 34 | node-auth-token: ${{ secrets.NODE_AUTH_TOKEN }} 35 | github-token: ${{ secrets.GITHUB_TOKEN }} 36 | 37 | - name: Linting JS/TS 38 | run: npm run lint 39 | 40 | - name: Type checking 41 | run: npm run check-types 42 | 43 | create-release: 44 | name: Create release 45 | needs: [static-analysis] 46 | runs-on: ubuntu-latest 47 | steps: 48 | - name: Checkout code 49 | uses: actions/checkout@v4 50 | with: 51 | fetch-depth: 0 52 | # Default GITHUB_TOKEN has no permissions to push to locked branches, so we don't 53 | # persist credentials and use the NODE_AUTH_TOKEN personal access token instead 54 | persist-credentials: false 55 | 56 | - uses: ./.github/actions/setup-node 57 | with: 58 | node-version: 20 59 | 60 | # Semantic release 61 | - name: Semantic Release 62 | uses: cycjimmy/semantic-release-action@v3 63 | with: 64 | semantic_version: 19 65 | extra_plugins: | 66 | @semantic-release/changelog 67 | @semantic-release/git 68 | branch: main 69 | env: 70 | # Use NODE_AUTH_TOKEN as GITHUB_TOKEN to allow pushing commits into locked "master" branch 71 | GITHUB_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} 72 | NPM_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }} 73 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | # Created by https://www.toptal.com/developers/gitignore/api/node,macos,windows,linux,webstorm,visualstudiocode 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=node,macos,windows,linux,webstorm,visualstudiocode 4 | 5 | ### Linux ### 6 | *~ 7 | 8 | # temporary files which can be created if a process still has a handle open of a deleted file 9 | .fuse_hidden* 10 | 11 | # KDE directory preferences 12 | .directory 13 | 14 | # Linux trash folder which might appear on any partition or disk 15 | .Trash-* 16 | 17 | # .nfs files are created when an open file is removed but is still being accessed 18 | .nfs* 19 | 20 | ### macOS ### 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | 30 | # Thumbnails 31 | ._* 32 | 33 | # Files that might appear in the root of a volume 34 | .DocumentRevisions-V100 35 | .fseventsd 36 | .Spotlight-V100 37 | .TemporaryItems 38 | .Trashes 39 | .VolumeIcon.icns 40 | .com.apple.timemachine.donotpresent 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | ### macOS Patch ### 50 | # iCloud generated files 51 | *.icloud 52 | 53 | ### Node ### 54 | # Logs 55 | logs 56 | *.log 57 | npm-debug.log* 58 | yarn-debug.log* 59 | yarn-error.log* 60 | lerna-debug.log* 61 | .pnpm-debug.log* 62 | 63 | # Diagnostic reports (https://nodejs.org/api/report.html) 64 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 65 | 66 | # Runtime data 67 | pids 68 | *.pid 69 | *.seed 70 | *.pid.lock 71 | 72 | # Directory for instrumented libs generated by jscoverage/JSCover 73 | lib-cov 74 | 75 | # Coverage directory used by tools like istanbul 76 | coverage 77 | *.lcov 78 | 79 | # nyc test coverage 80 | .nyc_output 81 | 82 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 83 | .grunt 84 | 85 | # Bower dependency directory (https://bower.io/) 86 | bower_components 87 | 88 | # node-waf configuration 89 | .lock-wscript 90 | 91 | # Compiled binary addons (https://nodejs.org/api/addons.html) 92 | build/Release 93 | 94 | # Dependency directories 95 | node_modules/ 96 | jspm_packages/ 97 | 98 | # Snowpack dependency directory (https://snowpack.dev/) 99 | web_modules/ 100 | 101 | # TypeScript cache 102 | *.tsbuildinfo 103 | 104 | # Optional npm cache directory 105 | .npm 106 | 107 | # Optional eslint cache 108 | .eslintcache 109 | 110 | # Optional stylelint cache 111 | .stylelintcache 112 | 113 | # Microbundle cache 114 | .rpt2_cache/ 115 | .rts2_cache_cjs/ 116 | .rts2_cache_es/ 117 | .rts2_cache_umd/ 118 | 119 | # Optional REPL history 120 | .node_repl_history 121 | 122 | # Output of 'npm pack' 123 | *.tgz 124 | 125 | # Yarn Integrity file 126 | .yarn-integrity 127 | 128 | # dotenv environment variable files 129 | .env 130 | .env.development.local 131 | .env.test.local 132 | .env.production.local 133 | .env.local 134 | 135 | # parcel-bundler cache (https://parceljs.org/) 136 | .cache 137 | .parcel-cache 138 | 139 | # Next.js build output 140 | .next 141 | out 142 | 143 | # Nuxt.js build / generate output 144 | .nuxt 145 | dist 146 | 147 | # Gatsby files 148 | .cache/ 149 | # Comment in the public line in if your project uses Gatsby and not Next.js 150 | # https://nextjs.org/blog/next-9-1#public-directory-support 151 | # public 152 | 153 | # vuepress build output 154 | .vuepress/dist 155 | 156 | # vuepress v2.x temp and cache directory 157 | .temp 158 | 159 | # Docusaurus cache and generated files 160 | .docusaurus 161 | 162 | # Serverless directories 163 | .serverless/ 164 | 165 | # FuseBox cache 166 | .fusebox/ 167 | 168 | # DynamoDB Local files 169 | .dynamodb/ 170 | 171 | # TernJS port file 172 | .tern-port 173 | 174 | # Stores VSCode versions used for testing VSCode extensions 175 | .vscode-test 176 | 177 | # yarn v2 178 | .yarn/cache 179 | .yarn/unplugged 180 | .yarn/build-state.yml 181 | .yarn/install-state.gz 182 | .pnp.* 183 | 184 | ### Node Patch ### 185 | # Serverless Webpack directories 186 | .webpack/ 187 | 188 | # Optional stylelint cache 189 | 190 | # SvelteKit build / generate output 191 | .svelte-kit 192 | 193 | ### VisualStudioCode ### 194 | .vscode/* 195 | !.vscode/settings.json 196 | !.vscode/tasks.json 197 | !.vscode/launch.json 198 | !.vscode/extensions.json 199 | !.vscode/*.code-snippets 200 | 201 | # Local History for Visual Studio Code 202 | .history/ 203 | 204 | # Built Visual Studio Code Extensions 205 | *.vsix 206 | 207 | ### VisualStudioCode Patch ### 208 | # Ignore all local history of files 209 | .history 210 | .ionide 211 | 212 | ### WebStorm ### 213 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 214 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 215 | 216 | # User-specific stuff 217 | .idea/**/workspace.xml 218 | .idea/**/tasks.xml 219 | .idea/**/usage.statistics.xml 220 | .idea/**/dictionaries 221 | .idea/**/shelf 222 | .idea 223 | 224 | # AWS User-specific 225 | .idea/**/aws.xml 226 | 227 | # Generated files 228 | .idea/**/contentModel.xml 229 | 230 | # Sensitive or high-churn files 231 | .idea/**/dataSources/ 232 | .idea/**/dataSources.ids 233 | .idea/**/dataSources.local.xml 234 | .idea/**/sqlDataSources.xml 235 | .idea/**/dynamic.xml 236 | .idea/**/uiDesigner.xml 237 | .idea/**/dbnavigator.xml 238 | 239 | # Gradle 240 | .idea/**/gradle.xml 241 | .idea/**/libraries 242 | 243 | # Gradle and Maven with auto-import 244 | # When using Gradle or Maven with auto-import, you should exclude module files, 245 | # since they will be recreated, and may cause churn. Uncomment if using 246 | # auto-import. 247 | # .idea/artifacts 248 | # .idea/compiler.xml 249 | # .idea/jarRepositories.xml 250 | # .idea/modules.xml 251 | # .idea/*.iml 252 | # .idea/modules 253 | # *.iml 254 | # *.ipr 255 | 256 | # CMake 257 | cmake-build-*/ 258 | 259 | # Mongo Explorer plugin 260 | .idea/**/mongoSettings.xml 261 | 262 | # File-based project format 263 | *.iws 264 | 265 | # IntelliJ 266 | out/ 267 | 268 | # mpeltonen/sbt-idea plugin 269 | .idea_modules/ 270 | 271 | # JIRA plugin 272 | atlassian-ide-plugin.xml 273 | 274 | # Cursive Clojure plugin 275 | .idea/replstate.xml 276 | 277 | # SonarLint plugin 278 | .idea/sonarlint/ 279 | 280 | # Crashlytics plugin (for Android Studio and IntelliJ) 281 | com_crashlytics_export_strings.xml 282 | crashlytics.properties 283 | crashlytics-build.properties 284 | fabric.properties 285 | 286 | # Editor-based Rest Client 287 | .idea/httpRequests 288 | 289 | # Android studio 3.1+ serialized cache file 290 | .idea/caches/build_file_checksums.ser 291 | 292 | ### WebStorm Patch ### 293 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 294 | 295 | # *.iml 296 | # modules.xml 297 | # .idea/misc.xml 298 | # *.ipr 299 | 300 | # Sonarlint plugin 301 | # https://plugins.jetbrains.com/plugin/7973-sonarlint 302 | .idea/**/sonarlint/ 303 | 304 | # SonarQube Plugin 305 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin 306 | .idea/**/sonarIssues.xml 307 | 308 | # Markdown Navigator plugin 309 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced 310 | .idea/**/markdown-navigator.xml 311 | .idea/**/markdown-navigator-enh.xml 312 | .idea/**/markdown-navigator/ 313 | 314 | # Cache file creation bug 315 | # See https://youtrack.jetbrains.com/issue/JBR-2257 316 | .idea/$CACHE_FILE$ 317 | 318 | # CodeStream plugin 319 | # https://plugins.jetbrains.com/plugin/12206-codestream 320 | .idea/codestream.xml 321 | 322 | # Azure Toolkit for IntelliJ plugin 323 | # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij 324 | .idea/**/azureSettings.xml 325 | 326 | .idea/**/*.xml 327 | 328 | ### Windows ### 329 | # Windows thumbnail cache files 330 | Thumbs.db 331 | Thumbs.db:encryptable 332 | ehthumbs.db 333 | ehthumbs_vista.db 334 | 335 | # Dump file 336 | *.stackdump 337 | 338 | # Folder config file 339 | [Dd]esktop.ini 340 | 341 | # Recycle Bin used on file shares 342 | $RECYCLE.BIN/ 343 | 344 | # Windows Installer files 345 | *.cab 346 | *.msi 347 | *.msix 348 | *.msm 349 | *.msp 350 | 351 | # Windows shortcuts 352 | *.lnk 353 | 354 | # End of https://www.toptal.com/developers/gitignore/api/node,macos,windows,linux,webstorm,visualstudiocode 355 | -------------------------------------------------------------------------------- /.husky/prepare-commit-msg: -------------------------------------------------------------------------------- 1 | exec < /dev/tty && npx cz --hook || true 2 | -------------------------------------------------------------------------------- /.releaserc: -------------------------------------------------------------------------------- 1 | { 2 | "branches": [ 3 | "master" 4 | ], 5 | "plugins": [ 6 | "@semantic-release/changelog", 7 | [ 8 | "@semantic-release/commit-analyzer", 9 | { 10 | "releaseRules": [ 11 | { "type": "chore", "release": "patch" }, 12 | { "type": "ci", "release": "patch" }, 13 | { "type": "improvement", "release": "minor" }, 14 | { "type": "perf", "release": "patch" }, 15 | { "type": "refactor", "release": "patch" }, 16 | { "type": "revert", "release": "patch" } 17 | ] 18 | } 19 | ], 20 | "@semantic-release/release-notes-generator", 21 | "@semantic-release/npm", 22 | "@semantic-release/git" 23 | ] 24 | } 25 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [1.30.1](https://github.com/rows/X/compare/v1.30.0...v1.30.1) (2025-02-05) 2 | 3 | # [1.30.0](https://github.com/rows/X/compare/v1.29.1...v1.30.0) (2025-01-10) 4 | 5 | 6 | ### Features 7 | 8 | * **linkedin:** adapt to the new html ([#122](https://github.com/rows/X/issues/122)) ([019de39](https://github.com/rows/X/commit/019de3909a354820c6466c44f6c7057067922ede)) 9 | 10 | ## [1.29.1](https://github.com/rows/X/compare/v1.29.0...v1.29.1) (2025-01-09) 11 | 12 | # [1.29.0](https://github.com/rows/X/compare/v1.28.1...v1.29.0) (2024-12-05) 13 | 14 | 15 | ### Features 16 | 17 | * **clutch.co:** add clutch.co reviews scraper ([#120](https://github.com/rows/X/issues/120)) ([8f24454](https://github.com/rows/X/commit/8f24454c0d88c726c88faa7bdd307e796a65428b)) 18 | 19 | ## [1.28.1](https://github.com/rows/X/compare/v1.28.0...v1.28.1) (2024-08-21) 20 | 21 | # [1.28.0](https://github.com/rows/X/compare/v1.27.0...v1.28.0) (2024-07-31) 22 | 23 | 24 | ### Features 25 | 26 | * add facebook post comments scraper ([#116](https://github.com/rows/X/issues/116)) ([8f83de2](https://github.com/rows/X/commit/8f83de27473eb6b932b6b877d2c83f5a5c5a9628)) 27 | 28 | # [1.27.0](https://github.com/rows/X/compare/v1.26.0...v1.27.0) (2024-07-31) 29 | 30 | 31 | ### Features 32 | 33 | * add airbnb scraper ([#115](https://github.com/rows/X/issues/115)) ([43bd12f](https://github.com/rows/X/commit/43bd12f48737aa654eeb62b45110f57e29b58a34)) 34 | 35 | # [1.26.0](https://github.com/rows/X/compare/v1.25.0...v1.26.0) (2024-07-24) 36 | 37 | 38 | ### Features 39 | 40 | * add capterra scraper ([#114](https://github.com/rows/X/issues/114)) ([ea0aa74](https://github.com/rows/X/commit/ea0aa7400454cdbe5125ec41bb781935c5b96fef)) 41 | 42 | # [1.25.0](https://github.com/rows/X/compare/v1.24.1...v1.25.0) (2024-07-11) 43 | 44 | 45 | ### Features 46 | 47 | * add capterra.de scraper ([#112](https://github.com/rows/X/issues/112)) ([1e683a9](https://github.com/rows/X/commit/1e683a95b60d335778b7f274e80705f6f08e46a3)) 48 | 49 | ## [1.24.1](https://github.com/rows/X/compare/v1.24.0...v1.24.1) (2024-07-02) 50 | 51 | 52 | ### Bug Fixes 53 | 54 | * **g2-scraper:** fix bug with pagination ([#111](https://github.com/rows/X/issues/111)) ([1eb7888](https://github.com/rows/X/commit/1eb788812bffb93be054556448deb359d8787fb1)) 55 | 56 | # [1.24.0](https://github.com/rows/X/compare/v1.23.1...v1.24.0) (2024-06-17) 57 | 58 | 59 | ### Features 60 | 61 | * **core:** add mobile.de park scraper ([#110](https://github.com/rows/X/issues/110)) ([e6bbef1](https://github.com/rows/X/commit/e6bbef18a8f9a250147a41b8686c45f76bce14c2)) 62 | 63 | ## [1.23.1](https://github.com/rows/X/compare/v1.23.0...v1.23.1) (2024-06-17) 64 | 65 | 66 | ### Bug Fixes 67 | 68 | * **core:** bump package manually ([#109](https://github.com/rows/X/issues/109)) ([20a9127](https://github.com/rows/X/commit/20a91278af38d30046fd0540f85d9bd97c0c264f)) 69 | 70 | # [1.23.0](https://github.com/rows/X/compare/v1.22.0...v1.23.0) (2024-06-05) 71 | 72 | 73 | ### Bug Fixes 74 | 75 | * **usage-metrics:** correctly report the current tab URL when `Opening in Rows` ([#105](https://github.com/rows/X/issues/105)) ([eaf5de3](https://github.com/rows/X/commit/eaf5de341ef9ab8560b11631ff70af1dcc47fff4)) 76 | 77 | 78 | ### Features 79 | 80 | * **scraper:** add racius.com scraper ([#104](https://github.com/rows/X/issues/104)) ([10e0774](https://github.com/rows/X/commit/10e0774972faf466b63b4731bf1f77dfc1ee2072)) 81 | 82 | # [1.22.0](https://github.com/rows/X/compare/v1.21.1...v1.22.0) (2024-05-29) 83 | 84 | 85 | ### Features 86 | 87 | * **core:** add uninstall form ([#103](https://github.com/rows/X/issues/103)) ([253aebb](https://github.com/rows/X/commit/253aebb5c5cc44d1580c6b076594d0b4c6a562c2)) 88 | 89 | ## [1.21.1](https://github.com/rows/X/compare/v1.21.0...v1.21.1) (2024-05-29) 90 | 91 | 92 | ### Bug Fixes 93 | 94 | * start reporting the usage when the user selects Open in Rows ([#102](https://github.com/rows/X/issues/102)) ([4c5110d](https://github.com/rows/X/commit/4c5110dac86780793ba0969def9dd190229de54e)) 95 | 96 | # [1.21.0](https://github.com/rows/X/compare/v1.20.0...v1.21.0) (2024-05-28) 97 | 98 | 99 | ### Features 100 | 101 | * **analytics:** report extension usage ([#101](https://github.com/rows/X/issues/101)) ([00a73e6](https://github.com/rows/X/commit/00a73e612265417cd5f602cfc5b07fb019ba17fd)) 102 | 103 | # [1.20.0](https://github.com/rows/X/compare/v1.19.1...v1.20.0) (2024-05-27) 104 | 105 | 106 | ### Features 107 | 108 | * **release-rules:** fix release rules ([#100](https://github.com/rows/X/issues/100)) ([5c83b80](https://github.com/rows/X/commit/5c83b80fd7d7a2f37b90e6604aeb2d8c6582b580)) 109 | 110 | ## [1.19.1](https://github.com/rows/X/compare/v1.19.0...v1.19.1) (2024-05-24) 111 | 112 | 113 | ### Bug Fixes 114 | 115 | * **scraper:** fix imovirtual scraper ([#97](https://github.com/rows/X/issues/97)) ([45bb9a6](https://github.com/rows/X/commit/45bb9a6455213116b61ffdd948325e56432130a0)) 116 | 117 | # [1.19.0](https://github.com/rows/X/compare/v1.18.2...v1.19.0) (2024-05-20) 118 | 119 | 120 | ### Features 121 | 122 | * add ikea scrapper ([#95](https://github.com/rows/X/issues/95)) ([68f194b](https://github.com/rows/X/commit/68f194b1deb36cbd15dada8202b141fdc99285cc)) 123 | 124 | ## [1.18.2](https://github.com/rows/X/compare/v1.18.1...v1.18.2) (2024-05-13) 125 | 126 | 127 | ### Bug Fixes 128 | 129 | * fix URL matching scrappers ([#93](https://github.com/rows/X/issues/93)) ([ad3adc9](https://github.com/rows/X/commit/ad3adc9f520eaefccd3a76ff8f222d9a4d327fc8)) 130 | 131 | ## [1.18.1](https://github.com/rows/X/compare/v1.18.0...v1.18.1) (2024-05-13) 132 | 133 | 134 | ### Bug Fixes 135 | 136 | * fix amazon scrapping ([#92](https://github.com/rows/X/issues/92)) ([a47b7d9](https://github.com/rows/X/commit/a47b7d9139e96511d56b433907419c3e52a74439)) 137 | 138 | # [1.18.0](https://github.com/rows/X/compare/v1.17.0...v1.18.0) (2024-05-13) 139 | 140 | 141 | ### Features 142 | 143 | * add scrapper to new sites ([#91](https://github.com/rows/X/issues/91)) ([1e716b1](https://github.com/rows/X/commit/1e716b16f679d1bfb66ef9b16ba1d4a911ad0cb5)) 144 | 145 | # [1.17.0](https://github.com/rows/X/compare/v1.16.0...v1.17.0) (2024-05-06) 146 | 147 | 148 | ### Features 149 | 150 | * add scrapper for LinkedIn Groups ([#90](https://github.com/rows/X/issues/90)) ([f25ef5b](https://github.com/rows/X/commit/f25ef5b82bb56fe85cad95c98ad532474a0bf772)) 151 | 152 | # [1.16.0](https://github.com/rows/X/compare/v1.15.0...v1.16.0) (2024-05-01) 153 | 154 | 155 | ### Features 156 | 157 | * add support for videos in Youtube playlist ([#89](https://github.com/rows/X/issues/89)) ([47161e8](https://github.com/rows/X/commit/47161e8f756d5506a087533b696d446a4372ffc0)) 158 | 159 | # [1.15.0](https://github.com/rows/X/compare/v1.14.0...v1.15.0) (2024-04-23) 160 | 161 | 162 | ### Features 163 | 164 | * add support for Github issues ([#87](https://github.com/rows/X/issues/87)) ([360d820](https://github.com/rows/X/commit/360d82015cc0292eccf7ab694209b31b4839d56f)) 165 | 166 | # [1.14.0](https://github.com/rows/X/compare/v1.13.0...v1.14.0) (2024-04-19) 167 | 168 | 169 | ### Features 170 | 171 | * **bounty:** add report button that append data to a spreadsheet using Rows API ([#81](https://github.com/rows/X/issues/81)) ([578f7c5](https://github.com/rows/X/commit/578f7c5a55227fbc35ea3eb67c8fadfbd57ba6f5)) 172 | 173 | # [1.13.0](https://github.com/rows/X/compare/v1.12.4...v1.13.0) (2024-04-16) 174 | 175 | 176 | ### Features 177 | 178 | * **bounty:** add empty state when it's a internal chrome page ([#79](https://github.com/rows/X/issues/79)) ([80509e7](https://github.com/rows/X/commit/80509e7ec092562e8133745c3af3e8ed55e39a98)) 179 | 180 | ## [1.12.4](https://github.com/rows/X/compare/v1.12.3...v1.12.4) (2024-04-16) 181 | 182 | ## [1.12.3](https://github.com/rows/X/compare/v1.12.2...v1.12.3) (2024-04-15) 183 | 184 | ## [1.12.2](https://github.com/rows/X/compare/v1.12.1...v1.12.2) (2024-04-12) 185 | 186 | ## [1.12.1](https://github.com/rows/X/compare/v1.12.0...v1.12.1) (2024-03-15) 187 | 188 | # [1.12.0](https://github.com/rows/X/compare/v1.11.0...v1.12.0) (2024-03-14) 189 | 190 | 191 | ### Features 192 | 193 | * add apple newsroom scrapper ([#68](https://github.com/rows/X/issues/68)) ([9385f0c](https://github.com/rows/X/commit/9385f0c323dd9652cc36cb91b3adc5311be566b5)) 194 | 195 | # [1.11.0](https://github.com/rows/X/compare/v1.10.0...v1.11.0) (2024-03-04) 196 | 197 | 198 | ### Features 199 | 200 | * remove google maps scrapper ([#67](https://github.com/rows/X/issues/67)) ([d076b74](https://github.com/rows/X/commit/d076b74c1a4e5c566f016f1760846280e0cf72ab)) 201 | 202 | # [1.10.0](https://github.com/rows/X/compare/v1.9.1...v1.10.0) (2024-02-29) 203 | 204 | 205 | ### Features 206 | 207 | * add pinkorblue.nl and pinkorblue.it list of products scrapper ([#66](https://github.com/rows/X/issues/66)) ([03c0f5f](https://github.com/rows/X/commit/03c0f5f95b89de4f53a258a203b7b1a142ecdacf)) 208 | 209 | ## [1.9.1](https://github.com/rows/X/compare/v1.9.0...v1.9.1) (2024-02-28) 210 | 211 | 212 | ### Bug Fixes 213 | 214 | * add linkedIn post url to saved-posts scrapper ([#65](https://github.com/rows/X/issues/65)) ([a0ee654](https://github.com/rows/X/commit/a0ee6541e5b56038f890ead2dbed8114a4e26e7a)) 215 | 216 | # [1.9.0](https://github.com/rows/X/compare/v1.8.0...v1.9.0) (2024-02-28) 217 | 218 | 219 | ### Features 220 | 221 | * add LinkedIn likes scrapper on company and user's profiles ([#64](https://github.com/rows/X/issues/64)) ([cc62872](https://github.com/rows/X/commit/cc628729d14b6fbf03283704a9e0e24301d87994)) 222 | 223 | # [1.8.0](https://github.com/rows/X/compare/v1.7.0...v1.8.0) (2024-02-27) 224 | 225 | 226 | ### Features 227 | 228 | * add chaperone reviews list scrapper ([#63](https://github.com/rows/X/issues/63)) ([a9cfc93](https://github.com/rows/X/commit/a9cfc9397c34842386875be063927e8ada5ea522)) 229 | 230 | # [1.7.0](https://github.com/rows/X/compare/v1.6.0...v1.7.0) (2024-02-27) 231 | 232 | 233 | ### Features 234 | 235 | * enable the possibility to remove the header from the results ([#60](https://github.com/rows/X/issues/60)) ([3c237af](https://github.com/rows/X/commit/3c237af0bffda19a95aa16ce2347157f49e43856)) 236 | 237 | # [1.6.0](https://github.com/rows/X/compare/v1.5.0...v1.6.0) (2024-02-21) 238 | 239 | 240 | ### Features 241 | 242 | * add new scrappers ([#57](https://github.com/rows/X/issues/57)) ([8a4063c](https://github.com/rows/X/commit/8a4063c4f7f4c8816c7bc8fad625cda5836bb36e)) 243 | 244 | # [1.5.0](https://github.com/rows/X/compare/v1.4.4...v1.5.0) (2024-02-16) 245 | 246 | 247 | ### Features 248 | 249 | * add scrappers for google search results, supercoach & tilbudsportalen.dk ([#58](https://github.com/rows/X/issues/58)) ([cb633ab](https://github.com/rows/X/commit/cb633ab24f50b99c500943842153a309bba51f88)) 250 | 251 | ## [1.4.4](https://github.com/rows/X/compare/v1.4.3...v1.4.4) (2024-02-14) 252 | 253 | ## [1.4.3](https://github.com/rows/X/compare/v1.4.2...v1.4.3) (2024-02-12) 254 | 255 | ## [1.4.2](https://github.com/rows/X/compare/v1.4.1...v1.4.2) (2024-02-12) 256 | 257 | 258 | ### Bug Fixes 259 | 260 | * trigger workflow ci ([e4d374f](https://github.com/rows/X/commit/e4d374f385dceeff1b4fa77c980f7e561cd80603)) 261 | 262 | ## [1.4.1](https://github.com/rows/X/compare/v1.4.0...v1.4.1) (2024-02-12) 263 | 264 | # [1.4.0](https://github.com/rows/X/compare/v1.3.13...v1.4.0) (2024-02-12) 265 | 266 | 267 | ### Features 268 | 269 | * add description to the manifest.json ([#55](https://github.com/rows/X/issues/55)) ([a30febb](https://github.com/rows/X/commit/a30febb87d65e650471035a0093766cff3b7efe6)) 270 | 271 | ## [1.3.13](https://github.com/rows/X/compare/v1.3.12...v1.3.13) (2024-02-09) 272 | 273 | ## [1.3.12](https://github.com/rows/X/compare/v1.3.11...v1.3.12) (2024-02-09) 274 | 275 | ## [1.3.11](https://github.com/rows/X/compare/v1.3.10...v1.3.11) (2024-02-09) 276 | 277 | ## [1.3.10](https://github.com/rows/X/compare/v1.3.9...v1.3.10) (2024-02-09) 278 | 279 | ## [1.3.9](https://github.com/rows/X/compare/v1.3.8...v1.3.9) (2024-02-09) 280 | 281 | ## [1.3.8](https://github.com/rows/X/compare/v1.3.7...v1.3.8) (2024-02-09) 282 | 283 | ## [1.3.7](https://github.com/rows/X/compare/v1.3.6...v1.3.7) (2024-02-09) 284 | 285 | ## [1.3.6](https://github.com/rows/X/compare/v1.3.5...v1.3.6) (2024-02-09) 286 | 287 | ## [1.3.5](https://github.com/rows/X/compare/v1.3.4...v1.3.5) (2024-02-09) 288 | 289 | ## [1.3.4](https://github.com/rows/X/compare/v1.3.3...v1.3.4) (2024-02-09) 290 | 291 | ## [1.3.3](https://github.com/rows/X/compare/v1.3.2...v1.3.3) (2024-02-09) 292 | 293 | ## [1.3.2](https://github.com/rows/X/compare/v1.3.1...v1.3.2) (2024-02-09) 294 | 295 | ## [1.3.1](https://github.com/rows/X/compare/v1.3.0...v1.3.1) (2024-02-09) 296 | 297 | # [1.3.0](https://github.com/rows/X/compare/v1.2.0...v1.3.0) (2024-02-09) 298 | 299 | 300 | ### Features 301 | 302 | * fix release step in order to generate the correct version on chrome-extension deliverable ([44e2a85](https://github.com/rows/X/commit/44e2a85045f6fd2717f9d3e784be2d656bb7a71f)) 303 | 304 | # [1.2.0](https://github.com/rows/X/compare/v1.1.0...v1.2.0) (2024-02-09) 305 | 306 | 307 | ### Features 308 | 309 | * generate correct release ([#51](https://github.com/rows/X/issues/51)) ([7146cda](https://github.com/rows/X/commit/7146cda78783d046b8a300102b54b0ee5c95cb79)), closes [#52](https://github.com/rows/X/issues/52) [#53](https://github.com/rows/X/issues/53) [#54](https://github.com/rows/X/issues/54) 310 | 311 | # [1.1.0](https://github.com/rows/X/compare/v1.0.0...v1.1.0) (2024-02-09) 312 | 313 | 314 | ### Features 315 | 316 | * add .releaserc file ([#50](https://github.com/rows/X/issues/50)) ([cd31ab3](https://github.com/rows/X/commit/cd31ab3684ae019d14999f6a0714ad8d8c703032)) 317 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 4 | 5 | ## Our Standards 6 | 7 | Examples of behavior that contributes to creating a positive environment include: 8 | 9 | - Using welcoming and inclusive language 10 | - Being respectful of differing viewpoints and experiences 11 | - Gracefully accepting constructive criticism 12 | - Focusing on what is best for the community 13 | - Showing empathy towards other community members 14 | 15 | Examples of unacceptable behavior by participants include: 16 | 17 | - The use of sexualized language or imagery and unwelcome sexual attention or advances 18 | - Trolling, insulting/derogatory comments, and personal or political attacks 19 | - Public or private harassment 20 | - Publishing others' private information, such as a physical or electronic address, without explicit permission 21 | - Other conduct which could reasonably be considered inappropriate in a professional setting 22 | 23 | ## Enforcement Responsibilities 24 | 25 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. 26 | 27 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned with this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. 28 | 29 | ## Scope 30 | 31 | This Code of Conduct applies within all project spaces, including the GitHub repository, issue trackers, chat channels, and any other forums created by the project team. 32 | 33 | ## Attribution 34 | 35 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 36 | 37 | For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. 38 | 39 | --- 40 | 41 | By participating in this project, you are expected to uphold this code of conduct. Please report unacceptable behavior to [project maintainers](mailto:email@example.com). 42 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for considering contributing to our project! Please follow these simple guidelines to make the contribution process smooth and effective. 4 | 5 | ## Getting Started 6 | 7 | 1. **Clone the Repository** 8 | - Clone the forked repository to your local machine: 9 | ```bash 10 | git clone https://github.com/rows/x.git 11 | ``` 12 | 13 | 2. **Create a Branch** 14 | - Create a new branch for your changes: 15 | ```bash 16 | git checkout -b feature-branch-name 17 | ``` 18 | 19 | 3. **Make Changes** 20 | - Make your desired changes or additions to the codebase. 21 | 22 | 4. **Commit Changes** 23 | - Commit your changes with a descriptive commit message: 24 | ```bash 25 | git add . 26 | git commit -m "Your detailed commit message" 27 | ``` 28 | 29 | 5. **Push Changes** 30 | - Push your changes to the repository: 31 | ```bash 32 | git push origin feature-branch-name 33 | ``` 34 | 35 | 6. **Submit a Pull Request** 36 | - Go to the repository and submit a pull request (PR) from your branch to the `main` branch. 37 | 38 | ## Additional Guidelines 39 | 40 | - **Describe Your Changes**: Provide a brief description of the changes introduced by your PR. 41 | - **Follow Code Standards**: Ensure your code follows the project's coding conventions and style guidelines. 42 | - **Testing**: Include test cases or ensure existing tests pass, if applicable. 43 | 44 | ## Code of Conduct 45 | 46 | Please note that we have a [Code of Conduct](CODE_OF_CONDUCT.md) in place. Contributors are expected to adhere to it in all interactions within this repository. 47 | 48 | ## License 49 | 50 | By contributing to this project, you agree that your contributions will be licensed under the [MIT License](LICENSE). 51 | 52 | ## Notes 53 | 54 | - Pull requests will be reviewed by project maintainers. 55 | - Be respectful to others and their contributions. 56 | 57 | We appreciate your contributions to our project! If you have any questions or need further assistance, feel free to reach out. 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Rows 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RowsX 2 | 3 | RowsX is a Chrome extension that performs simple web scraping tasks for business users. It was built to help users load data from websites with HTML tables into spreadsheets, and is developed by [Rows.com](https://rows.com). 4 | 5 | [Demo video](https://www.youtube.com/watch?v=RjOLjgCvayM) · [Install extension from the Chrome Web Store](https://chromewebstore.google.com/detail/rowsx/abkccndhocmfdombbpmnhfjidcdcjjeo) · [X](https://twitter.com/RowsHQ) · [LinkedIn](https://www.linkedin.com/company/rowshq) · [Discord](https://discord.gg/MqnBDJdf) 6 | 7 | 8 | ![RowsX](https://github.com/rows/X/assets/31993620/c80634eb-27d5-443f-b5de-bb8c2c21e1b3) 9 | 10 | 11 | ## About Rows 12 | 13 | [Rows](https://rows.com) is a modern spreadsheet. It is the easiest way to import, transform and share data in a spreadsheet. 14 | 15 | ## Why Open-source RowsX? 16 | 17 | We're opening RowsX to the community and invite everyone to contribute with new features, support to new websites or new ideas to improve it. 18 | 19 | RowsX was born during a team hackathon in January of 2024. We built it to solve a problem we see our customers struggle with everyday: getting data from the web to a spreadsheet. The Rows platform lets you import data from files, via Integrations with APIs and SaaS services, and connectors on 3rd party platforms that call our Rows API. RowsX extends the importing options to the Browser! Since launching it, more than two thousand people have used it to import lists of data from countless sites, internal tools and back office systems. 20 | 21 | 22 | ## Open Bounties Program 23 | 24 | Join our Open Bounties Program and get rewarded for enhancing the extension! 25 | 26 | Current Open Bounties: 27 | 28 | - [ ] Port extension to Safari: $250 29 | - [ ] Support parsing tables in PDFs: $150. [Example](https://www.oecd.org/pisa/OECD_2022_PISA_Results_Comparing%20countries%E2%80%99%20and%20economies%E2%80%99%20performance%20in%20mathematics.pdf). 30 | 31 | To ensure quality and alignment with our goals, features are only eligible for a bounty once they are approved by our team and made publicly available. If you have submitted code to solve one of the bounties, let us know at security@rows.com. 32 | 33 | Note: RowsX is a tool for business people to use that translates what they see into a spreadsheet. Atm we don't aim to build a fully automated scraper platform that operates beyond the actions of the user and what the user can see. 34 | 35 | ## Get Started 36 | 37 | To start, you'll need to install the project on your local machine. 38 | This requires Node.js to be installed on your system. 39 | Once you've cloned the repository, execute the following command in your terminal to install the project's dependencies: 40 | 41 | ```bash 42 | npm i 43 | ``` 44 | 45 | Once you've installed the project's dependencies, 46 | you can initiate development mode by running the following command in your terminal: 47 | 48 | ```bash 49 | npm run dev 50 | ``` 51 | 52 | Once your development environment is set up, follow these steps to start using our Chrome extension on your machine: 53 | 54 | > [!NOTE] 55 | > You need to do this just one time. 56 | > 57 | > 1. Open the Chrome extensions page at [chrome://extensions](chrome://extensions/). 58 | > 2. In the top right corner, you have a switch called "Developer Mode". Just activate it. 59 | > 3. Click the "Load unpacked" button. 60 | > 4. Select the directory containing your extension project. For example `~/repos/rows/x/dist`. 61 | > 5. Your extension should now be loaded and running in development mode. 62 | > 6. Pin the extension to reach it easily :smiley: 63 | > 7. You can make changes to your extension files, and they will automatically be reflected in the browser. 64 | 65 | ### .env File 66 | 67 | The environment configuration file plays a crucial role in managing the application's integration with the Rows API (this is needed to store the information of user feedback, if you're not doing anywork related with that we could leave it empty in our local computer). The env file contains three specific variables essential for ensuring that the application can securely and accurately interact with the Rows API. Here’s a brief explanation of each variable: 68 | 69 | - `VITE_ROWS_API_KEY`: This variable stores the Rows API key, which is necessary for authenticating requests made from our application to the Rows service. It ensures that our application has the permission to access and modify the spreadsheet data. 70 | - `VITE_SPREADSHEET_ID`: This variable holds the ID of the spreadsheet we want to access. It specifies the target spreadsheet within the Rows platform where all the data from our application is stored or retrieved from. 71 | - `VITE_TABLE_ID`: This variable contains the ID of the specific table within the spreadsheet mentioned above. It identifies the exact location within the spreadsheet where data entries should be made or updated. 72 | - `VITE_TABLE_ID_USAGE`: This variable contains the ID of the specific table within the spreadsheet mentioned above. It identifies the exact location within the spreadsheet where data entries should be made or updated. The table is then used to store this extension usage. 73 | 74 | By storing these keys in the environment configuration file and automating its creation during the release process via GitHub Actions, we ensure that the setup is secure, efficient, and less prone to errors, enabling seamless integration and data management. 75 | 76 | ## How to add a new scraper? 77 | 78 | There are 2 different ways of building a custom scraper: 79 | 80 | 1. **The data is loaded from a list** 81 | 82 | ```js 83 | { 84 | header: 'ProductHunt results', 85 | listElementsQuery: '', 86 | elementParser: [ 87 | //... 88 | { title: 'Product image', query: '', type: 'image' }, 89 | { title: 'Product name', query: '', type: 'text' }, 90 | //... 91 | ] 92 | } 93 | ``` 94 | 95 | In this configuration the only thing that changes is the `type`, and it could be of different types: 96 | 97 | - `image`, it will extract the src link of the image and will be used as `=IMAGE("")` on cells 98 | - `text` will extract all the text of the element 99 | - `clean-url` will get the src without query parameters, this is helpful in sites like LinkedIn. 100 | - `link` it will return the href src 101 | - `get-attribute`, is the most exotic one, because it will get the HTML value of a specific attribute because some elements have descriptions as aria-label. For example, G2.com has data for lazy loading, and the real image source is at the attribute `data-deferred-image-src`, for that scenario we need to use this parameter like this `{ title: 'Logo', query: '[class*="product-listing__img"] > img', type: 'get-attribute', attribute: 'data-deferred-image-src' },`. 102 | 103 | 2. **The data is loaded from a DIV table (not the conventional HTML table)** - There is an example of a configuration for those scenarios: 104 | 105 | ```js 106 | parseTables: { 107 | header: "Custom div parser", // <- title that will presented on RowsX UI. 108 | tables: [ 109 | { rows: '', cols: '' }, 110 | { rows: '', cols: '' }, 111 | ], 112 | mergeTablesBy: 'row' // <- it will merge the tables by row or by column this is optional 113 | } 114 | ``` 115 | 116 | > [!TIP] 117 | > The `mergeTablesBy` property defines the strategy for combining multiple tables into a single dataset. This parameter is optional and could be set as `row`, which means that tables will be merged by rows, resulting in a single table with all rows combined. If set to `column`, tables will be merged by columns, resulting in a single table with all columns combined. 118 | 119 | ### When use `.example` and `[class*="example"]` 120 | 121 | `.example` and `[class*="example"]` are both CSS selectors that can be used to select elements in an HTML document. However, they have different purposes and should be used in different situations. 122 | 123 | - The selector `.example` selects all elements that have the class example. This is a simple way to select elements with a specific class. 124 | - The selector `[class*="example"]` selects all elements that have the word example as a part of their class name. This selector is more versatile than `.example`, because it allows you to select elements that have a class name that starts with `example`, ends with `example`, or has `example` anywhere in the middle. 125 | 126 | > [!TIP] 127 | > The last selector (`[class*="example"]`) is more versatile but could lead to undesirable results, so use it with caution! 128 | 129 | #### Example of different selectors' usage 130 | 131 | image 132 | 133 | For example, if I want to extract the element title from an item at idealista.pt we could use the following configuration: 134 | 135 | ```js 136 | { title: 'Description', query: '.item-link', type: 'get-attribute', attribute: 'title' } 137 | ``` 138 | 139 | image 140 | 141 | If I want to identify the list of elements that I want to extract information I can use 142 | 143 | ```js 144 | { 145 | listElementsQuery: '[data-test*="post-item-"]'; 146 | } 147 | ``` 148 | 149 | ## How does this work with the Rows? :thinking: 150 | 151 | In the following image, you can see how it works and each step will have a better explanation: 152 | 153 | ![image](https://github.com/rows/X/assets/7489569/1425f71f-153c-4e8a-9bb2-cff78ef80a97) 154 | 155 | 1. **User Initiates Action** - The user clicks on the rowsX icon on their browser, prompting the extension to take action. This action triggers the opening of the RowsX UI, which is the main interface for interacting with the extension. 156 | 2. **The RowsX UI is displayed and Event Trigger** - Initially, the RowsX UI displays an empty state component, indicating that no data has been extracted. This signifies the extension is ready to start extracting data from the current web page. And after rendering everything it will trigger the `rows-x:scrap` event. 157 | 3. **Background Script Activation** - Upon displaying the empty state component, the RowsX UI emits an event named `rows-x:scrap`. This event will be listened to by the service worker who is running in the `background.js` script, which is responsible for handling background tasks and communication with the extension's popup window. 158 | 4. **Data Extraction Process** - The background.js script receives the `rows-x:scrap` event and starts the data extraction process. It first attempts to identify a suitable scraper based on the URL of the current web page. If a matching scraper is found, it utilizes that scraper to extract the relevant data from the page. If no matching scraper is found, the script falls back to extracting data directly from HTML tables on the page. 159 | 5. **Transmitting Extracted Data** - Upon completing data extraction, the service worker sends the extracted data to the RowsX UI as JSON. The JSON response follows a structured format that the UI can readily parse and display. 160 | 161 | ```json 162 | [ 163 | ..., 164 | { 165 | "title": "Best Amazon products", 166 | "table": [ 167 | ["header_1", "header_2", "header_3"], 168 | ["cell_1", "cell_2", "cell_3"], 169 | ["cell_4", "cell_5", "cell_8"], 170 | ["cell_7", "cell_8", "cell_9"], 171 | ] 172 | } 173 | ... 174 | ] 175 | ``` 176 | 177 | 6. **Sharing Extracted Data with Rows App** - When the user clicks on the "Open in Rows" button, it initiates the transfer of extracted data to the Rows app. This triggers an event named `rows_x:store`, which signals the service worker to convert the extracted data into a TSV format. The converted data is then packaged and prepared for transfer to the Rows app. 178 | 7. **Injecting Data into Rows App** - Once the data is prepared, the service worker opens a new tab and navigates to the Rows app's URL, https://rows.com/new. The `background.js` will inject the prepared TSV data into the `LocalStorage` of the Rows app. This allows the Rows app to access and utilize the extracted data directly, enabling the user to further manipulate and analyze the data within the Rows app environment. 179 | The data will be stored under the key `rows_x` and will follow the following structure: 180 | 181 | ```json 182 | { "source": "%ROWS_X%", "data": "header_1\theader_2\theader_3\ncell_1\t..." } 183 | ``` 184 | 185 | 8. The app renders and will look for the value of `rows_x` at `LocalStorage`, if there is any data it will load the info to the clipboard 186 | 9. After that the app will trigger a paste event that will load the TSV into a new Table. 187 | 10. The user sees the scraped information in Table 1 of a new Page. 188 | 189 | 190 | ## Contributions 191 | 192 | Contributions to RowsX are welcome! If you have issues or suggestions for improving the extension, please feel free to open an issue or submit a pull request on the GitHub repository. 193 | 194 | Happy scraping with RowsX! 195 | -------------------------------------------------------------------------------- /app_icons/128x128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rows/X/0163083888a66e5b91a1afd93d879558d29f1e31/app_icons/128x128.png -------------------------------------------------------------------------------- /app_icons/16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rows/X/0163083888a66e5b91a1afd93d879558d29f1e31/app_icons/16x16.png -------------------------------------------------------------------------------- /app_icons/48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rows/X/0163083888a66e5b91a1afd93d879558d29f1e31/app_icons/48x48.png -------------------------------------------------------------------------------- /config/jest-preset.cjs: -------------------------------------------------------------------------------- 1 | const ts_preset = require('ts-jest/jest-preset'); 2 | const puppeteer_preset = require('jest-puppeteer/jest-preset'); 3 | 4 | module.exports = Object.assign(ts_preset, puppeteer_preset); 5 | -------------------------------------------------------------------------------- /e2e/amazon/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.amazon.es/s?k=kindle&crid=3R2H4DS46XOMN&sprefix=%2Caps%2C115&ref=nb_sb_ss_recent_1_0_recent 2 | -------------------------------------------------------------------------------- /e2e/ebay/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.ebay.com/sch/i.html?_from=R40&_nkw=cr7+signed+shirt&_sacat=0&_dmd=1&rt=nc&_odkw=sapo&_osacat=0 2 | -------------------------------------------------------------------------------- /e2e/exhibitors-ces/test.yml: -------------------------------------------------------------------------------- 1 | url: https://exhibitors.ces.tech/8_0/#/searchtype/category/search/229/show/all 2 | -------------------------------------------------------------------------------- /e2e/idealista/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.idealista.pt/comprar-casas/matosinhos/sao-mamede-de-infesta-e-senhora-da-hora/ 2 | -------------------------------------------------------------------------------- /e2e/immobiliare/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.immobiliare.it/vendita-case/puglia/?criterio=rilevanza&prezzoMinimo=5000000 2 | -------------------------------------------------------------------------------- /e2e/imovirtual/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.imovirtual.com/comprar/apartamento/ 2 | -------------------------------------------------------------------------------- /e2e/linkedin-connections/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/mynetwork/invite-connect/connections/ 2 | -------------------------------------------------------------------------------- /e2e/linkedin-jobs/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/jobs/search/?currentJobId=3806260395&keywords=senior%20frontend%20developer&origin=SWITCH_SEARCH_VERTICAL 2 | -------------------------------------------------------------------------------- /e2e/linkedin-search-people/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/search/results/people/?keywords=rows&origin=SWITCH_SEARCH_VERTICAL&sid=LBg 2 | -------------------------------------------------------------------------------- /e2e/netflix/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.netflix.com/browse/genre/3979 2 | -------------------------------------------------------------------------------- /e2e/product-hunt/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.producthunt.com/leaderboard/daily/2024/1/18 2 | -------------------------------------------------------------------------------- /e2e/sample.index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /e2e/scrappers.test.ts: -------------------------------------------------------------------------------- 1 | import { readdirSync, promises as fs } from 'fs'; 2 | import { resolve } from 'path'; 3 | import * as yaml from 'js-yaml'; 4 | 5 | const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); 6 | 7 | function listDirectories(path: string) { 8 | const directories = readdirSync(path, { withFileTypes: true }) 9 | .filter((dir) => dir.isDirectory() && dir.name !== '__snapshots__') 10 | .map((dir) => dir.name); 11 | 12 | return directories; 13 | } 14 | 15 | async function getExtensionId() { 16 | const workerTarget = await browser.waitForTarget((target) => target.type() === 'service_worker', { 17 | timeout: 3000, 18 | }); 19 | 20 | const urlRegex = /chrome-extension:\/\/(?[a-z]+)/; 21 | const match = urlRegex.exec(workerTarget.url()); 22 | 23 | if (!match || !match.groups) { 24 | throw new Error('Extension URL does not match expected format'); 25 | } 26 | 27 | return match.groups.id; 28 | } 29 | 30 | describe('RowsX - scrappers tests', () => { 31 | const tests = listDirectories(__dirname); 32 | let extensionId = ''; 33 | 34 | beforeAll(async () => { 35 | extensionId = await getExtensionId(); 36 | }); 37 | 38 | it.each(tests)('Scrapping - %s', async (domain) => { 39 | // 1) get info about the e2e test 40 | const specData = await fs.readFile(resolve(__dirname, `./${domain}/test.yml`)); 41 | const spec = yaml.load(specData.toString()) as { url: string; result: string }; 42 | const extensionUrl = `chrome-extension://${extensionId}/index.html`; 43 | const data = await fs.readFile(resolve(__dirname, `./${domain}/index.html`)); 44 | 45 | // 2) open website to scrap 46 | const appPage = await browser.newPage(); 47 | const extensionPage = await browser.newPage(); 48 | 49 | // 3) mock the requests 50 | await appPage.setRequestInterception(true); 51 | 52 | appPage.on('request', async (request) => { 53 | if (request.resourceType() === 'document') { 54 | request.respond({ status: 200, contentType: 'text/html', body: data.toString() }); 55 | } else { 56 | request.abort(); 57 | } 58 | }); 59 | 60 | await appPage.bringToFront(); 61 | await appPage.goto(spec.url, { waitUntil: 'domcontentloaded' }); 62 | 63 | const client = await appPage.target().createCDPSession(); 64 | await client.send('Browser.setPermission', { 65 | origin: new URL(spec.url), 66 | permission: { 67 | name: 'clipboard-write', 68 | allowWithoutSanitization: true, 69 | }, 70 | setting: 'granted', 71 | }); 72 | 73 | await extensionPage.goto(extensionUrl, { waitUntil: 'domcontentloaded' }); 74 | await sleep(250); 75 | await extensionPage.bringToFront(); 76 | const button = await extensionPage.waitForSelector('.copy-btn'); 77 | await button.click(); 78 | await sleep(180); 79 | await appPage.bringToFront(); 80 | 81 | const clipboard = await appPage.evaluate(async () => await navigator.clipboard.readText()); 82 | 83 | // close pages 84 | await appPage.close(); 85 | 86 | expect(clipboard.trimEnd()).toMatchSnapshot(); 87 | }); 88 | }); 89 | -------------------------------------------------------------------------------- /e2e/tik-tok-user/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.tiktok.com/search/user?q=rows&t=1706293199992 2 | -------------------------------------------------------------------------------- /e2e/tik-tok-videos/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.tiktok.com/search/video?q=rows&t=1706293199992 2 | -------------------------------------------------------------------------------- /e2e/trulia/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.trulia.com/NY/New_York/ 2 | -------------------------------------------------------------------------------- /e2e/twitter/test.yml: -------------------------------------------------------------------------------- 1 | url: https://twitter.com/torbschulz/verified_followers 2 | -------------------------------------------------------------------------------- /e2e/wikipedia/test.yml: -------------------------------------------------------------------------------- 1 | url: https://en.wikipedia.org/wiki/List_of_most-visited_museums 2 | -------------------------------------------------------------------------------- /e2e/yahoo-finance/test.yml: -------------------------------------------------------------------------------- 1 | url: https://finance.yahoo.com/quote/GOOGL/financials 2 | -------------------------------------------------------------------------------- /e2e/youtube/test.yml: -------------------------------------------------------------------------------- 1 | url: https://www.youtube.com/results?search_query=rows 2 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Rows - X 8 | 9 | 10 |
11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /jest-puppeteer.config.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | launch: { 3 | dumpio: false, 4 | headless: true, 5 | args: [ 6 | '--disable-extensions-except=./dist', 7 | '--load-extension=./dist', 8 | '--no-sandbox', 9 | '--disable-setuid-sandbox', 10 | ], 11 | }, 12 | }; 13 | -------------------------------------------------------------------------------- /jest.config.cjs: -------------------------------------------------------------------------------- 1 | /** @type {import('ts-jest').JestConfigWithTsJest} */ 2 | 3 | module.exports = { 4 | setupFilesAfterEnv: ['expect-puppeteer'], 5 | preset: './config/jest-preset.cjs', 6 | testTimeout: 5000, 7 | }; 8 | -------------------------------------------------------------------------------- /manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "RowsX", 3 | "description": "RowsX is your go-to tool for efficiently extracting tables and lists from web pages and move that data to Rows.", 4 | "manifest_version": 3, 5 | "background": { 6 | "service_worker": "src/background.ts", 7 | "type": "module" 8 | }, 9 | "permissions": ["tabs", "activeTab", "scripting"], 10 | "host_permissions": [ 11 | "https://rows.com/*", 12 | "https://rows.new/*", 13 | "https://*.rows.com/*" 14 | ], 15 | "action": { 16 | "default_icon": "app_icons/48x48.png", 17 | "default_popup": "index.html" 18 | }, 19 | "icons": { 20 | "16": "app_icons/16x16.png", 21 | "128": "app_icons/128x128.png" 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rows-x", 3 | "private": true, 4 | "version": "1.30.1", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "build": "tsc && vite build", 9 | "test:e2e": "tsc && vite build --mode=e2e && jest --maxWorkers=1", 10 | "preview": "vite preview", 11 | "lint": "eslint src -c ./.eslintrc.cjs --ext .jsx,.js,.ts,.tsx", 12 | "check-types": "tsc --noEmit", 13 | "prepare": "husky", 14 | "clean": "rm -rf dist" 15 | }, 16 | "prettier": "@rows/prettier-config", 17 | "dependencies": { 18 | "@types/expect-puppeteer": "^5.0.6", 19 | "@types/jest-environment-puppeteer": "^5.0.6", 20 | "@types/puppeteer": "^7.0.4", 21 | "eslint-plugin-import": "^2.29.1", 22 | "preact": "^10.19.3", 23 | "ua-parser-js": "^1.0.37" 24 | }, 25 | "devDependencies": { 26 | "@crxjs/vite-plugin": "^2.0.0-beta.23", 27 | "@modyfi/vite-plugin-yaml": "^1.1.0", 28 | "@preact/preset-vite": "^2.8.1", 29 | "@types/chrome": "^0.0.293", 30 | "@types/jest": "^29.5.11", 31 | "@types/js-yaml": "^4.0.9", 32 | "@types/ua-parser-js": "^0.7.39", 33 | "@typescript-eslint/eslint-plugin": "^4.28.3", 34 | "@typescript-eslint/parser": "^4.0.0", 35 | "commitizen": "^4.3.0", 36 | "cz-conventional-changelog": "^3.3.0", 37 | "eslint": "^7.0.0", 38 | "eslint-plugin-prettier": "^3.1.1", 39 | "husky": "^9.0.10", 40 | "jest": "^29.7.0", 41 | "jest-puppeteer": "^11.0.0", 42 | "js-yaml": "^4.1.0", 43 | "prettier": "2.4", 44 | "puppeteer": "^23.11.1", 45 | "ts-jest": "^29.2.5", 46 | "typescript": "^4.5.0", 47 | "vite": "^5.4.12" 48 | }, 49 | "config": { 50 | "commitizen": { 51 | "path": "./node_modules/cz-conventional-changelog" 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /public/empty.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /public/fonts/Output Sans Regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rows/X/0163083888a66e5b91a1afd93d879558d29f1e31/public/fonts/Output Sans Regular.woff2 -------------------------------------------------------------------------------- /public/fonts/Output Sans.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rows/X/0163083888a66e5b91a1afd93d879558d29f1e31/public/fonts/Output Sans.woff2 -------------------------------------------------------------------------------- /public/icons/close.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /public/icons/copy.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /public/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/App.tsx: -------------------------------------------------------------------------------- 1 | import { FunctionalComponent } from 'preact'; 2 | import { useEffect, useState, useReducer } from 'preact/hooks'; 3 | import './index.css'; 4 | import FeedbackForm from './components/feedback-form'; 5 | import NoResults from './components/no-results'; 6 | import Header from './components/header'; 7 | import Preview from './components/preview'; 8 | import LoadingSkeleton from './components/loading-skeleton'; 9 | import { ExceptionMessage } from "./types"; 10 | 11 | function isResponseIsAnException(response: ExceptionMessage) { 12 | return response.code >= 0 && typeof response.message === 'string'; 13 | } 14 | 15 | const App: FunctionalComponent = () => { 16 | const [isLoading, setLoading] = useState(true); 17 | const [exceptionOnScrapperResult, setException] = useState(""); 18 | const [results, setResults] = useState([]); 19 | const [isReportFormOpen, toggleReportTab] = useReducer((isOpen) => !isOpen, false); 20 | 21 | const hasExceptions = Boolean(exceptionOnScrapperResult); 22 | const showLoading = !hasExceptions && isLoading; 23 | const showResults = !showLoading && results.length > 0; 24 | const noResults = !showLoading && (!hasExceptions && results.length === 0); 25 | 26 | useEffect(() => { 27 | chrome.runtime.sendMessage({ action: 'rows-x:scrap' }, (response) => { 28 | if (isResponseIsAnException(response)) { 29 | setResults([]); 30 | setException(response.message); 31 | } else { 32 | setResults(response); 33 | setException(''); 34 | } 35 | 36 | setLoading(false); 37 | }); 38 | }, []); 39 | 40 | return ( 41 | <> 42 |
43 |
44 | {isReportFormOpen ? ( 45 | 46 | ) : ( 47 | <> 48 | {showLoading && ()} 49 | {showResults && } 50 | {noResults && } 51 | {hasExceptions && } 52 | 53 | )} 54 |
55 | 56 | ); 57 | }; 58 | 59 | export default App; 60 | -------------------------------------------------------------------------------- /src/background.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-non-null-assertion */ 2 | import { ERROR_MESSAGES, ErrorCodes } from './error-codes'; 3 | import { getCurrentTab, runScrapper } from './utils/chrome'; 4 | import { reportUsage } from './utils/rows-api/report'; 5 | import { getScrapperOptionsByUrl } from './utils/scrapperUtils'; 6 | 7 | async function scrap() { 8 | const tab = await getCurrentTab(); 9 | 10 | if (!tab || !tab.url || !tab.title) { 11 | return; 12 | } 13 | 14 | if (tab.url.includes('chrome://')) { 15 | return { 16 | code: ErrorCodes.GOOGLE_CHROME_INTERNAL_PAGES, 17 | message: ERROR_MESSAGES.get(ErrorCodes.GOOGLE_CHROME_INTERNAL_PAGES) 18 | }; 19 | } 20 | 21 | const options = getScrapperOptionsByUrl(tab.url, tab.title); 22 | 23 | return await runScrapper(tab, options); 24 | } 25 | 26 | async function openInRows(message: { data: string; }) { 27 | const tab = await getCurrentTab(); 28 | 29 | if (!tab || !tab.url || !tab.title) { 30 | return; 31 | } 32 | 33 | const tabUrl = tab.url; 34 | 35 | chrome.tabs.create({ url: 'https://rows.com/new' }, (tab) => { 36 | return storeRowsXData(message.data, tab.id!).then(() => reportUsage({ action: 'open_in_Rows', url: tabUrl })); 37 | }); 38 | } 39 | 40 | async function storeRowsXData(tsv: string, tabId: number) { 41 | await chrome.scripting.executeScript({ 42 | target: { tabId }, 43 | args: [tsv], 44 | func: (tsv) => { 45 | window.localStorage.setItem('rows_x', JSON.stringify({ source: '%ROWS_X%', data: tsv })); 46 | }, 47 | }); 48 | } 49 | 50 | chrome.runtime.onMessage.addListener((message, _, sendResponse) => { 51 | switch (message.action) { 52 | case 'rows-x:scrap': 53 | scrap().then((data) => sendResponse(data)); 54 | break; 55 | case 'rows-x:store': 56 | openInRows(message); 57 | break; 58 | default: 59 | break; 60 | } 61 | 62 | return true; // return true to indicate you want to send a response asynchronously 63 | }); 64 | 65 | // Listener when the extension is uninstalled 66 | chrome.runtime.onInstalled.addListener(details => { 67 | if (details.reason === chrome.runtime.OnInstalledReason.INSTALL) { 68 | const uninstall_form_link = 'https://rows.com/share/uninstall-survey-1N9rGowAFzUfdRn4BLihHnOb6qUq1pdLRfHSEEHa9eoE'; 69 | chrome.runtime.setUninstallURL(uninstall_form_link); 70 | } 71 | }); 72 | -------------------------------------------------------------------------------- /src/components/button.css: -------------------------------------------------------------------------------- 1 | .btn { 2 | border: none; 3 | cursor: pointer; 4 | border-radius: 0.25rem; 5 | background-color: transparent; 6 | margin: 0; 7 | display: flex; 8 | justify-content: center; 9 | align-items: center; 10 | width: auto; 11 | height: 2.5rem; 12 | padding: 0.75rem; 13 | } 14 | 15 | .small { 16 | width: 22px; 17 | height: 22px; 18 | padding: 0; 19 | } 20 | 21 | .btn:hover { 22 | background-color: var(--light-grey); 23 | } 24 | 25 | .primary { 26 | background: var(--button-primary); 27 | padding: 0.5rem 2rem; 28 | } 29 | 30 | .text { 31 | width: auto; 32 | padding: 0; 33 | color: var(--grey); 34 | } 35 | 36 | .text:hover { 37 | background: transparent; 38 | } 39 | 40 | .primary:hover { 41 | background: var(--button-primary-hover); 42 | } 43 | 44 | .secondary { 45 | background: var(--button-secondary); 46 | } 47 | 48 | .secondary:hover { 49 | background: var(--button-secondary-hover); 50 | } 51 | -------------------------------------------------------------------------------- /src/components/button.tsx: -------------------------------------------------------------------------------- 1 | import './button.css'; 2 | import { FunctionComponent, ComponentChildren } from 'preact'; 3 | 4 | interface Props { 5 | onClick?: () => void; 6 | children: ComponentChildren; 7 | variant?: 'text' | 'primary' | 'secondary' | 'from'; 8 | type?: string; 9 | className?: string; 10 | size?: 'small'; 11 | } 12 | 13 | const Button: FunctionComponent = ({ 14 | onClick, 15 | className = '', 16 | variant = 'text', 17 | type, 18 | size = '', 19 | children, 20 | }) => { 21 | return ( 22 | 25 | ); 26 | }; 27 | 28 | export default Button; 29 | -------------------------------------------------------------------------------- /src/components/feedback-form.tsx: -------------------------------------------------------------------------------- 1 | import { JSX } from 'preact'; 2 | import { useState, useRef, useEffect } from 'preact/hooks'; 3 | import {createNewReportEntryRow} from '../utils/rows-api/report'; 4 | import Button from './button'; 5 | 6 | const FeedbackForm = (): JSX.Element => { 7 | const [reason, setReason] = useState('table not detected'); 8 | const [feedback, setFeedback] = useState(''); 9 | const inputElement = useRef(null); 10 | const shouldShowInput = reason === 'other'; 11 | 12 | useEffect(() => { 13 | if (inputElement.current && shouldShowInput) { 14 | inputElement.current.focus(); 15 | } 16 | }, [reason]); 17 | 18 | const handleRadioInputChange: JSX.GenericEventHandler = (event) => { 19 | setReason(event.currentTarget.id); 20 | setFeedback(''); 21 | } 22 | 23 | const handleInputTextChange: JSX.GenericEventHandler = (event) => { 24 | setFeedback(event.currentTarget.value); 25 | } 26 | 27 | const handleSubmit: JSX.SubmitEventHandler = (event) => { 28 | event.preventDefault(); 29 | setTimeout(() => window.close(), 150); 30 | createNewReportEntryRow(feedback ? feedback : reason); 31 | } 32 | 33 | return ( 34 |
35 |
36 | 43 | 44 |
45 | 46 |
47 | 54 | 55 |
56 | 57 |
58 | 65 | 66 |
67 | 68 | 69 |
70 | 79 |
80 | 81 | 84 |
85 | ); 86 | }; 87 | 88 | export default FeedbackForm; 89 | -------------------------------------------------------------------------------- /src/components/header.css: -------------------------------------------------------------------------------- 1 | .header { 2 | display: flex; 3 | justify-content: space-between; 4 | } 5 | 6 | .options { 7 | display: flex; 8 | align-items: center; 9 | gap: 1rem; 10 | } 11 | -------------------------------------------------------------------------------- /src/components/header.tsx: -------------------------------------------------------------------------------- 1 | import { JSX } from 'preact'; 2 | import { Dispatch } from 'preact/compat'; 3 | 4 | import './header.css'; 5 | import Button from './button'; 6 | 7 | interface Props { 8 | onReportClick: Dispatch; 9 | } 10 | 11 | const Header = ({ onReportClick }: Props): JSX.Element => { 12 | return ( 13 |
14 | 15 |
16 | 17 | 20 |
21 |
22 | ); 23 | }; 24 | 25 | export default Header; 26 | -------------------------------------------------------------------------------- /src/components/loading-skeleton.tsx: -------------------------------------------------------------------------------- 1 | import { FunctionalComponent } from 'preact'; 2 | 3 | function randomNumber(min: number, max: number) { 4 | return Math.floor(Math.random() * (max - min + 1)) + min; 5 | } 6 | 7 | const LoadingSkeleton: FunctionalComponent = () => { 8 | return ( 9 |
10 |
11 |
12 | 16 |
17 |
18 |
19 |
20 | 21 | {new Array(5).fill(0).map((_, index) => ( 22 | 23 | 33 | 43 | 53 | 54 | ))} 55 |
24 |
32 |
34 |
42 |
44 |
52 |
56 |
57 |
58 |
59 |
60 |
61 | ); 62 | }; 63 | 64 | export default LoadingSkeleton; 65 | -------------------------------------------------------------------------------- /src/components/no-results.css: -------------------------------------------------------------------------------- 1 | .no-results-container { 2 | display: flex; 3 | justify-content: center; 4 | align-items: center; 5 | width: 100%; 6 | color: var(--grey); 7 | } 8 | 9 | .no-results { 10 | display: flex; 11 | justify-content: center; 12 | align-items: center; 13 | height: 15rem; 14 | flex-direction: column; 15 | border: 1px dashed var(--light-grey); 16 | border-radius: 8px; 17 | color: var(--grey); 18 | padding: 1.25rem 1.5rem; 19 | text-align: center; 20 | } 21 | 22 | .no-results > img { 23 | margin: 1rem; 24 | } 25 | 26 | .btn-container { 27 | margin-top: 1rem; 28 | margin-bottom: 2rem; 29 | } 30 | -------------------------------------------------------------------------------- /src/components/no-results.tsx: -------------------------------------------------------------------------------- 1 | import { JSX } from 'preact'; 2 | import './no-results.css'; 3 | import { createNewReportEntryRow} from '../utils/rows-api/report'; 4 | import Button from './button'; 5 | 6 | interface Props { 7 | message?: string; 8 | } 9 | 10 | const NoResults = ({ message }: Props): JSX.Element => { 11 | const redirectToFeedback = () => { 12 | setTimeout(() => window.close(), 150); 13 | createNewReportEntryRow(); 14 | }; 15 | 16 | return ( 17 |
18 |
19 | 20 | No results 21 | {message ?? 'Would you like RowsX to support this website?'} 22 |
23 | 26 |
27 |
28 |
29 | ); 30 | }; 31 | 32 | export default NoResults; 33 | -------------------------------------------------------------------------------- /src/components/preview.css: -------------------------------------------------------------------------------- 1 | .results { 2 | gap: 1rem; 3 | display: flex; 4 | flex-direction: column; 5 | } 6 | 7 | .title { 8 | display: -webkit-box; 9 | -webkit-box-orient: vertical; 10 | -webkit-line-clamp: 2; /* From which line on to truncate */ 11 | overflow: hidden; 12 | text-align: left; 13 | } 14 | 15 | .table-preview img { 16 | height: 16px; 17 | width: 16px; 18 | } 19 | 20 | .table-header { 21 | display: grid; 22 | grid-template-columns: auto max-content; 23 | gap: 0.5rem; 24 | margin-bottom: 1.5rem; 25 | } 26 | 27 | .table-body { 28 | background-color: var(--lighter-grey); 29 | border: 1px solid var(--light-grey); 30 | padding: 0.5rem; 31 | border-radius: 0.5rem; 32 | } 33 | 34 | .pill { 35 | background-color: var(--light-grey); 36 | border-radius: 0.25rem; 37 | padding: 0.25rem 0.5rem; 38 | margin: auto 0; 39 | font-size: 12px; 40 | } 41 | 42 | table, 43 | th, 44 | td { 45 | font-weight: 400; 46 | height: auto; 47 | letter-spacing: 0.2px; 48 | line-height: 16px; 49 | overflow-wrap: break-word; 50 | tab-size: 4; 51 | text-align: start; 52 | background: white; 53 | } 54 | 55 | td { 56 | min-width: 7.5rem; 57 | max-width: 7.5rem; 58 | } 59 | 60 | tr:first-of-type > * { 61 | font-weight: 700; 62 | } 63 | 64 | th, 65 | td, 66 | tr { 67 | padding: 6px 8px; 68 | border: 1px solid #e1e1e1; 69 | white-space: nowrap; 70 | overflow: hidden; 71 | text-overflow: ellipsis; 72 | } 73 | 74 | table { 75 | border-collapse: collapse; 76 | border-style: hidden; 77 | } 78 | 79 | .table-container { 80 | position: relative; 81 | overflow: hidden; 82 | } 83 | 84 | .shade { 85 | position: absolute; 86 | width: 100%; 87 | bottom: 0; 88 | height: 4rem; 89 | background: linear-gradient( 90 | 180deg, 91 | rgba(247, 247, 247, 0) 0%, 92 | var(--lighter-grey) 100% 93 | ); 94 | } 95 | 96 | .table-actions { 97 | display: flex; 98 | justify-content: end; 99 | gap: 0.75rem; 100 | margin-top: 0.5rem; 101 | } 102 | -------------------------------------------------------------------------------- /src/components/preview.tsx: -------------------------------------------------------------------------------- 1 | import Button from './button'; 2 | import './preview.css'; 3 | import { array2tsv, hasImage } from '../utils/copy'; 4 | import { FunctionComponent } from 'preact'; 5 | import { ScrapperResults } from '../utils/chrome'; 6 | import { reportUsage } from '../utils/rows-api/report'; 7 | 8 | interface Props { 9 | results: ScrapperResults; 10 | } 11 | 12 | const Preview: FunctionComponent = ({ results = [] }) => { 13 | const openInRows = async (table: string[][]) => { 14 | try { 15 | await chrome.runtime.sendMessage({ 16 | action: 'rows-x:store', 17 | data: array2tsv(table), 18 | }); 19 | } catch (error) { 20 | console.error("Failed to open data in Rows:", error); 21 | } 22 | }; 23 | 24 | const copyToClipboard = async (result: { 25 | title?: string; 26 | table: string[][]; 27 | includeHeader?: boolean; 28 | }) => { 29 | let tableToCopy = result.table; 30 | if (!result.includeHeader) { 31 | tableToCopy = result.table.slice(1); // Remove the first row (header) 32 | } 33 | 34 | try { 35 | await navigator.clipboard.writeText(array2tsv(tableToCopy)); 36 | 37 | // Send usage report 38 | await reportUsage({action: 'copy_values'}); 39 | } catch (error) { 40 | console.error("Failed to copy data to clipboard:", error); 41 | } 42 | 43 | setTimeout(() => window.close(), 200); 44 | }; 45 | 46 | const renderCell = (cell: string) => { 47 | if (hasImage(cell)) { 48 | return rows_x_image; 49 | } 50 | 51 | return cell; 52 | }; 53 | 54 | return ( 55 |
56 | {results.map((result) => { 57 | return ( 58 |
59 |
60 | {result.title} 61 |
{`${result.table.length - 1} records`}
62 |
63 |
64 |
65 | 66 | {result.table.slice(0, 6).map((row) => ( 67 | 68 | {row.map((col) => ( 69 | 70 | ))} 71 | 72 | ))} 73 |
{renderCell(col)}
74 | {result.table.length > 5 &&
} 75 |
76 |
77 | 84 | 91 |
92 |
93 |
94 | ); 95 | })} 96 |
97 | ); 98 | }; 99 | 100 | export default Preview; 101 | -------------------------------------------------------------------------------- /src/error-codes.ts: -------------------------------------------------------------------------------- 1 | export enum ErrorCodes { 2 | GOOGLE_CHROME_INTERNAL_PAGES 3 | } 4 | 5 | export const ERROR_MESSAGES = new Map( 6 | [ 7 | [ErrorCodes.GOOGLE_CHROME_INTERNAL_PAGES, "Open a page with a table, then try again!"] 8 | ] 9 | ); 10 | 11 | -------------------------------------------------------------------------------- /src/index.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --lighter-grey: #f7f7f7; 3 | --light-grey: #eaeaea; 4 | --grey: #6f6f6f; 5 | --button-secondary: #e1e1e1; 6 | --button-secondary-hover: #e1e1e1; 7 | --button-primary: #ffc800; 8 | --button-primary-hover: #f0bc00; 9 | } 10 | 11 | @font-face { 12 | font-family: Output Sans; 13 | src: url("../fonts/Output Sans Regular.woff2") format("woff2"); 14 | font-style: normal; 15 | font-weight: 100; 16 | font-display: swap; 17 | } 18 | 19 | @font-face { 20 | font-family: Output Sans; 21 | src: url("../fonts/Output Sans.woff2") format("woff2"); 22 | font-weight: 700; 23 | font-style: normal; 24 | font-display: swap; 25 | } 26 | 27 | html, 28 | body, 29 | #root { 30 | margin: 0; 31 | padding: 0; 32 | font-size: 14px; 33 | color: black; 34 | } 35 | 36 | header { 37 | height: 48px; 38 | border-bottom: 1px solid #e1e1e1; 39 | display: flex; 40 | padding: 16px; 41 | align-items: center; 42 | box-sizing: border-box; 43 | } 44 | 45 | * { 46 | font-family: "Output Sans", sans-serif; 47 | font-size: 14px; 48 | font-weight: 400; 49 | line-height: 20px; 50 | letter-spacing: 0.2px; 51 | } 52 | 53 | b, 54 | strong { 55 | color: black; 56 | font-weight: 700; 57 | } 58 | 59 | a:visited, 60 | a:link { 61 | color: var(--grey); 62 | } 63 | 64 | .container { 65 | padding: 1rem; 66 | width: 22.5rem; 67 | max-height: 600px; 68 | } 69 | 70 | .input-wrapper { 71 | display: flex; 72 | align-content: center; 73 | margin-bottom: 0.5rem; 74 | } 75 | 76 | input[type="radio"] { 77 | margin: 0; 78 | accent-color: black; 79 | margin-right: 0.5rem; 80 | } 81 | 82 | input[type="text"] { 83 | border: 1px solid var(--light-grey); 84 | border-radius: 4px; 85 | background: var(--lighter-grey); 86 | padding: 0.5rem; 87 | width: 80%; 88 | -webkit-transition: 0.5s; 89 | transition: 300ms; 90 | } 91 | 92 | input[type="text"]:focus { 93 | border: 1px solid var(--grey); 94 | outline: none; 95 | } 96 | 97 | input[type="text"]:disabled { 98 | opacity: 0.5; 99 | cursor: not-allowed; 100 | } 101 | 102 | .skeleton { 103 | border-radius: 4px; 104 | animation: skeleton-loading 1s linear infinite alternate; 105 | } 106 | 107 | @keyframes skeleton-loading { 108 | 0% { 109 | background-color: var(--lighter-grey); 110 | } 111 | 100% { 112 | background-color: #c7c7c7; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main.tsx: -------------------------------------------------------------------------------- 1 | import { render } from 'preact'; 2 | import App from './App'; 3 | import './index.css'; 4 | 5 | // eslint-disable-next-line @typescript-eslint/no-non-null-assertion 6 | render(, document.getElementById('root')!); 7 | -------------------------------------------------------------------------------- /src/scrappers/airbnb.yml: -------------------------------------------------------------------------------- 1 | url: https://www.airbnb.com* 2 | listElementsQuery: '[itemprop="itemListElement"]' 3 | elementParser: 4 | - title: Title 5 | query: '[data-testid="listing-card-title"]' 6 | type: text 7 | 8 | - title: Sub-title 9 | query: '[data-testid="listing-card-subtitle"]' 10 | type: text 11 | 12 | - title: Price per night 13 | query: '[data-testid="price-availability-row"] > .dir-ltr > div > div > span > div' 14 | type: text 15 | 16 | - title: Price total 17 | query: '[data-testid="price-availability-row"] > .dir-ltr > div > div > span:nth-child(3) > div > button > div > div' 18 | type: text 19 | 20 | - title: Link 21 | query: '[data-testid="card-container"] > a' 22 | type: clean-url 23 | -------------------------------------------------------------------------------- /src/scrappers/ajio.yml: -------------------------------------------------------------------------------- 1 | url: https://www.ajio.com/* 2 | listElementsQuery: .rilrtl-products-list__item 3 | elementParser: 4 | - title: Description 5 | query: .nameCls 6 | type: text 7 | 8 | - title: Brand 9 | query: .brand 10 | type: text 11 | 12 | - title: Price 13 | query: .price 14 | type: text 15 | 16 | - title: Product URL 17 | query: a 18 | type: link 19 | -------------------------------------------------------------------------------- /src/scrappers/amazon.yml: -------------------------------------------------------------------------------- 1 | url: 'https://www.amazon.*' 2 | header: Amazon search results 3 | listElementsQuery: '[class*="sg-"][data-cel-widget*="search_result_"]' 4 | elementParser: 5 | - title: Product image 6 | query: img 7 | type: image 8 | 9 | - title: Product name 10 | query: '[data-cy="title-recipe"]' 11 | type: text 12 | 13 | - title: Price 14 | query: .a-price .a-offscreen 15 | type: text 16 | 17 | - title: Rating 18 | query: .a-row.a-size-small > span 19 | type: get-attribute 20 | attribute: aria-label 21 | 22 | - title: Amazon link 23 | query: '[data-cy="title-recipe"] a' 24 | type: link 25 | -------------------------------------------------------------------------------- /src/scrappers/annuairevert-magasins.yml: -------------------------------------------------------------------------------- 1 | url: https://www.annuairevert.com/magasins* 2 | listElementsQuery: .company 3 | elementParser: 4 | - title: Image 5 | query: img 6 | type: image 7 | 8 | - title: Title 9 | query: .title-container > p 10 | type: text 11 | 12 | - title: Address 13 | query: address > span 14 | type: text -------------------------------------------------------------------------------- /src/scrappers/annuairevert-produits.yml: -------------------------------------------------------------------------------- 1 | url: https://www.annuairevert.com/produits* 2 | listElementsQuery: '[data-type="product"] > .row > a' 3 | elementParser: 4 | - title: Image 5 | query: img 6 | type: image 7 | 8 | - title: Title 9 | query: .title-container > p 10 | type: text 11 | 12 | - title: Fabricant 13 | query: .manufacturer > .entreprise-name 14 | type: text 15 | 16 | - title: Distributeur 17 | query: .distributor > .distributor-name 18 | type: text -------------------------------------------------------------------------------- /src/scrappers/apartments.yml: -------------------------------------------------------------------------------- 1 | url: https://www.apartments.com/* 2 | listElementsQuery: '.placard:not(.reinforcement)' 3 | elementParser: 4 | - title: Name 5 | query: .property-title 6 | type: text 7 | 8 | - title: Address 9 | query: .property-address 10 | type: text 11 | 12 | - title: Price 13 | query: .property-pricing, .property-rents, .price-range 14 | type: text 15 | 16 | - title: Beds 17 | query: .property-beds, .bed-range 18 | type: text 19 | 20 | - title: Specials 21 | query: .property-specials 22 | type: text 23 | 24 | - title: Details 25 | query: .property-amenities 26 | type: text 27 | 28 | - title: Phone Number 29 | query: .phone-link 30 | type: text 31 | 32 | - title: Apartments URL 33 | query: .property-link 34 | type: clean-url 35 | -------------------------------------------------------------------------------- /src/scrappers/apple-newsroom.yml: -------------------------------------------------------------------------------- 1 | url: https://www.apple.com/newsroom/archive/* 2 | header: Apple News 3 | listElementsQuery: .result__item 4 | elementParser: 5 | - title: Tag 6 | query: .item__category 7 | type: text 8 | - title: Title 9 | query: .item__headline 10 | type: text 11 | - title: Date of publication 12 | query: .item__date 13 | type: text 14 | - title: URL 15 | query: href 16 | type: self-link -------------------------------------------------------------------------------- /src/scrappers/autotrader-uk.yml: -------------------------------------------------------------------------------- 1 | url: https://www.autotrader.co.uk/car-search* 2 | listElementsQuery: '[data-testid="advertCard"]' 3 | elementParser: 4 | - title: Car brand 5 | query: '[data-testid="search-listing-title"]' 6 | type: text 7 | 8 | - title: Details 9 | query: '[data-testid="search-listing-subtitle"]' 10 | type: text 11 | 12 | - title: Specifications 13 | query: '[data-testid="search-listing-specs"]' 14 | type: text 15 | 16 | - title: Price 17 | query: 'p span' 18 | type: text 19 | 20 | - title: Autotrader-UK URL 21 | query: a 22 | type: link 23 | -------------------------------------------------------------------------------- /src/scrappers/autotrader.yml: -------------------------------------------------------------------------------- 1 | url: https://www.autotrader.com/* 2 | header: 'Autotrader search results' 3 | listElementsQuery: '[data-cmp="inventoryListing"]' 4 | elementParser: 5 | - title: "Car image" 6 | query: 'img[data-cmp="inventoryImage"]' 7 | type: "image" 8 | 9 | - title: "Description" 10 | query: '[data-cmp="link"]' 11 | type: "text" 12 | 13 | - title: "Price" 14 | query: '[data-cmp="firstPrice"]' 15 | type: "text" 16 | 17 | - title: "Number of miles" 18 | query: '[data-cmp="ownerDistance"]' 19 | type: "text" 20 | 21 | - title: "Contact" 22 | query: '[data-cmp="phoneNumber"]' 23 | type: "text" 24 | 25 | - title: "Autotrader URL" 26 | query: '[data-cmp="link"]' 27 | type: "clean-url" 28 | -------------------------------------------------------------------------------- /src/scrappers/babymarket-list-products.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.babymarkt.de/*/* 3 | listElementsQuery: '.products__item' 4 | elementParser: 5 | - title: Item Brand 6 | query: '.product__brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.product__model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '.product__title' 15 | type: text 16 | 17 | - title: RRP 18 | query: '.product__price--old' 19 | type: text 20 | 21 | - title: Price 22 | query: '.product__price' 23 | type: text 24 | 25 | - title: URL 26 | query: .product__link 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/babymarket-product-detail-page.yml: -------------------------------------------------------------------------------- 1 | url: https://www.babymarkt.de/* 2 | listElementsQuery: .site-main 3 | includeHeader: false 4 | elementParser: 5 | 6 | - title: Item Description 7 | query: 'h1' 8 | type: text 9 | 10 | - title: Item Number 11 | query: '#product-description-content > div > p:nth-child(2)' 12 | type: text 13 | 14 | - title: EAN 15 | query: '#product-description-content > div > p:nth-child(3)' 16 | type: text 17 | 18 | - title: RRP 19 | query: '#buybox-options > div.product-options__prices > div:nth-child(1) > div.col-sm-9' 20 | type: text 21 | 22 | - title: Price 23 | query: '[itemprop="price"]' 24 | type: text -------------------------------------------------------------------------------- /src/scrappers/babyone.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.babyone.de/* 3 | listElementsQuery: .product-box 4 | elementParser: 5 | - title: Item brand 6 | query: .product-brand 7 | type: text 8 | 9 | - title: Item model 10 | query: 'product-model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: .product-name 15 | type: text 16 | 17 | - title: RRP 18 | query: .list-price 19 | type: text 20 | 21 | - title: Price 22 | query: .sale-price 23 | type: text 24 | 25 | - title: URL 26 | query: .product-link 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/babypark-de-list-products.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.babypark.de/search/* 3 | listElementsQuery: '#kuLandingProductsListUl > li' 4 | elementParser: 5 | 6 | - title: Item brand 7 | query: 'kuBrand' 8 | type: text 9 | 10 | - title: Item model 11 | query: 'kuModel' 12 | type: text 13 | 14 | - title: Item Description 15 | query: '.kuName' 16 | type: text 17 | 18 | - title: RRP 19 | query: '.kuOrigPrice' 20 | type: text 21 | 22 | - title: Price 23 | query: '.kuSalePrice' 24 | type: text 25 | 26 | - title: URL 27 | query: a 28 | type: link -------------------------------------------------------------------------------- /src/scrappers/babypark-de-product-detail-page.yml: -------------------------------------------------------------------------------- 1 | url: https://www.babypark.de/* 2 | listElementsQuery: .column 3 | includeHeader: false 4 | elementParser: 5 | 6 | - title: Item Description 7 | query: .page-title 8 | type: text 9 | 10 | - title: Item Number 11 | query: '[data-th="Artikelnummer"]' 12 | type: text 13 | 14 | - title: EAN 15 | query: '[data-th="EAN Nummer"]' 16 | type: text 17 | 18 | - title: RRP 19 | query: '.advice-price-box' 20 | type: text 21 | 22 | - title: Price 23 | query: '.price' 24 | type: text -------------------------------------------------------------------------------- /src/scrappers/babypark-nl-list-products.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.babypark.nl/search/* 3 | listElementsQuery: '#kuLandingProductsListUl > li' 4 | elementParser: 5 | - title: Item brand 6 | query: 'kuBrand' 7 | type: text 8 | 9 | - title: Item model 10 | query: 'kuModel' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '.kuName' 15 | type: text 16 | 17 | - title: RRP 18 | query: '.kuOrigPrice' 19 | type: text 20 | 21 | - title: Price 22 | query: '.kuSalePrice' 23 | type: text 24 | 25 | - title: URL 26 | query: a 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/babypark-nl-product-detail-page.yml: -------------------------------------------------------------------------------- 1 | url: https://www.babypark.nl/* 2 | listElementsQuery: .column 3 | includeHeader: false 4 | elementParser: 5 | 6 | - title: Item Description 7 | query: .page-title 8 | type: text 9 | 10 | - title: Item Number 11 | query: '[data-th="Artikelnummer"]' 12 | type: text 13 | 14 | - title: EAN 15 | query: '[data-th="EAN Code"]' 16 | type: text 17 | 18 | - title: RRP 19 | query: '.advice-price-box' 20 | type: text 21 | 22 | - title: Price 23 | query: '.price' 24 | type: text -------------------------------------------------------------------------------- /src/scrappers/bcf.yml: -------------------------------------------------------------------------------- 1 | url: https://www.bcf.com.au/* 2 | listElementsQuery: .product-tile 3 | elementParser: 4 | - title: Product 5 | query: img 6 | type: image 7 | 8 | - title: Name 9 | query: .product-name 10 | type: text 11 | 12 | - title: Price 13 | query: .product-sales-price 14 | type: text 15 | 16 | - title: Link 17 | query: a 18 | type: clean-url 19 | -------------------------------------------------------------------------------- /src/scrappers/bebe9.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.bebe9.com/* 3 | listElementsQuery: '.products-list > article' 4 | elementParser: 5 | - title: Item brand 6 | query: .product-brand 7 | type: text 8 | 9 | - title: Item model 10 | query: '.product-model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: .product-name 15 | type: text 16 | 17 | - title: RRP 18 | query: .product-public-sale-price 19 | type: text 20 | 21 | - title: Price 22 | query: .product-price 23 | type: text 24 | 25 | - title: URL 26 | query: a 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/bol.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.bol.com/* 3 | listElementsQuery: .product-item, .product-item--row 4 | elementParser: 5 | - title: Item brand 6 | query: '.product-brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.product-model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: .product-title 15 | type: text 16 | 17 | - title: RRP 18 | query: '[data-test="list-price"], [data-test="from-price"]' 19 | type: text 20 | 21 | - title: Price 22 | query: .promo-price 23 | type: text 24 | 25 | - title: URL 26 | query: a 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/booking.yml: -------------------------------------------------------------------------------- 1 | url: https://www.booking.com/searchresults* 2 | header: Booking.com search results 3 | listElementsQuery: '[data-testid="property-card"]' 4 | elementParser: 5 | - title: Title 6 | query: '[data-testid="title"]' 7 | type: text 8 | 9 | - title: Review score 10 | query: '[data-testid="review-score"] div:nth-child(1)' 11 | type: text 12 | 13 | - title: Distance 14 | query: '[data-testid="distance"]' 15 | type: text 16 | 17 | - title: Price w/ discounts 18 | query: '[data-testid="price-and-discounted-price"]' 19 | type: text 20 | 21 | - title: Offers 22 | query: '[data-testid="gallery-ribbon"]' 23 | type: text 24 | 25 | - title: Booking URL 26 | query: a 27 | type: clean-url 28 | -------------------------------------------------------------------------------- /src/scrappers/bpi.yml: -------------------------------------------------------------------------------- 1 | url: https://bpinet.bancobpi.pt/BPINet_Contas/Movimentos.aspx 2 | header: BPI Bank Account Transactions 3 | listElementsQuery: .TableRecords > tbody > tr 4 | elementParser: 5 | - title: Date Movement 6 | query: 'td:nth-child(1) > span' 7 | type: text 8 | 9 | - title: Date Movement 10 | query: 'td:nth-child(2) > div > span' 11 | type: text 12 | 13 | - title: Description 14 | query: 'td:nth-child(3) > div > div:nth-child(2) > div > div:nth-child(2) > a' 15 | type: text 16 | 17 | - title: Value 18 | query: 'td:nth-child(4) > div > span' 19 | type: text 20 | 21 | - title: Balance 22 | query: 'td:nth-child(5) > div > span' 23 | type: text 24 | -------------------------------------------------------------------------------- /src/scrappers/byggdinframtid.yml: -------------------------------------------------------------------------------- 1 | url: https://byggdinframtid.se/se* 2 | listElementsQuery: .school 3 | elementParser: 4 | - title: Logo 5 | query: .logo > img 6 | type: image 7 | 8 | - title: Type 9 | query: .type 10 | type: text 11 | 12 | - title: Name 13 | query: h3 14 | type: text 15 | 16 | - title: Description 17 | query: .desc > div > p 18 | type: text 19 | 20 | - title: Specializations 21 | query: .item:nth-child(1) 22 | type: text 23 | 24 | - title: Occupations 25 | query: .item:nth-child(2) 26 | type: text 27 | 28 | - title: Website 29 | query: .info-contact > .item:nth-child(1) > a 30 | type: link 31 | 32 | - title: Contact info 1 33 | query: .info-contact > .item:nth-child(2) > a 34 | type: link 35 | 36 | - title: Contact info 2 37 | query: .info-contact > .item:nth-child(3) 38 | type: text 39 | 40 | - title: Contact info 3 41 | query: .info-contact > .item:nth-child(4) 42 | type: text -------------------------------------------------------------------------------- /src/scrappers/cabral-moncada.yml: -------------------------------------------------------------------------------- 1 | url: https://www.cml.pt/leiloes/fa/online/* 2 | listElementsQuery: '.lotCard[id*="lot"]' 3 | header: 'Cml search results' 4 | elementParser: 5 | - title: Photo 6 | query: img 7 | type: image 8 | 9 | - title: Article no. 10 | query: .lotNumber 11 | type: text 12 | 13 | - title: Name 14 | query: .lotTitle 15 | type: text 16 | 17 | - title: Description 18 | query: .lotDescription 19 | type: text 20 | 21 | - title: Initial price (€) 22 | query: .lotBaseValue 23 | type: float 24 | 25 | - title: Current price (€) 26 | query: '[mattooltip="Vendido"], .lotCurrentValue span:nth-child(2)' 27 | type: float 28 | 29 | - title: Link 30 | query: a 31 | type: link 32 | -------------------------------------------------------------------------------- /src/scrappers/capterra-de.yml: -------------------------------------------------------------------------------- 1 | url: 'https://www.capterra.com.de/*' 2 | header: Capterra reviews 3 | listElementsQuery: '[data-container-view="ca-review"]' 4 | elementParser: 5 | - title: Title 6 | query: '.row > div:nth-child(2) > h3' 7 | type: text 8 | 9 | - title: Rating 10 | query: '.mos-star-rating' 11 | type: text 12 | 13 | - title: Comment 14 | query: '.row > div:nth-child(2) > p > span:nth-child(2)' 15 | type: text 16 | 17 | - title: Advantages 18 | query: '.row > div:nth-child(2) > p.fw-bold + p' 19 | type: text 20 | 21 | - title: Disadvantages 22 | query: '.row > div:nth-child(2) > p:last-of-type' 23 | type: text -------------------------------------------------------------------------------- /src/scrappers/capterra.yml: -------------------------------------------------------------------------------- 1 | url: 'https://www.capterra.com/*' 2 | header: Capterra reviews 3 | listElementsQuery: '[data-test-id="review-card"]' 4 | elementParser: 5 | - title: Title 6 | query: '[data-testid="review-content"] > div:nth-child(1)' 7 | type: text 8 | 9 | - title: Rating 10 | query: '.star-rating-label' 11 | type: text 12 | 13 | - title: Comment 14 | query: '[data-testid="overall-content"]' 15 | type: text 16 | 17 | - title: Advantages 18 | query: '[data-testid="pros-content"]' 19 | type: text 20 | 21 | - title: Disadvantages 22 | query: '[data-testid="cons-content"]' 23 | type: text -------------------------------------------------------------------------------- /src/scrappers/cdiscount.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.cdiscount.com/* 3 | listElementsQuery: ul > li[data-sku] 4 | elementParser: 5 | - title: Item brand 6 | query: '.brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: h2 15 | type: text 16 | 17 | - title: RRP 18 | query: '.c-price-s > s' 19 | type: text 20 | 21 | - title: Price 22 | query: .prdtBILPrice > .price 23 | type: text 24 | 25 | - title: URL 26 | query: a 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/chaperone-review-list.yml: -------------------------------------------------------------------------------- 1 | url: 'https://chaperone.online/wp-admin/admin.php?page=review-list' 2 | header: Review list 3 | listElementsQuery: 'tr:has(td)' # The has(td) css selector allows to remove the empty table rows 4 | elementParser: 5 | - title: Ref no 6 | query: td:nth-child(1) 7 | type: text 8 | 9 | - title: Student Name 10 | query: td:nth-child(2) 11 | type: text 12 | 13 | - title: Consultant name 14 | query: td:nth-child(3) 15 | type: text 16 | 17 | - title: Rating 18 | query: td:nth-child(4) 19 | type: text 20 | 21 | - title: Comment 22 | query: td:nth-child(5) 23 | type: text 24 | 25 | - title: Email 26 | query: td:nth-child(6) 27 | type: text 28 | 29 | - title: Created 30 | query: td:nth-child(7) 31 | type: text -------------------------------------------------------------------------------- /src/scrappers/clutch.yml: -------------------------------------------------------------------------------- 1 | url: https://clutch.co* 2 | listElementsQuery: '.profile-review' 3 | elementParser: 4 | - title: Title 5 | query: '.profile-review__header > h4' 6 | type: text 7 | 8 | - title: Project services 9 | query: '.profile-review__data > ul > li:nth-child(1)' 10 | type: text 11 | 12 | - title: Project size 13 | query: '.profile-review__data > ul > li:nth-child(2)' 14 | type: text 15 | 16 | - title: Project length 17 | query: '.profile-review__data > ul > li:nth-child(3)' 18 | type: text 19 | 20 | - title: Project summary 21 | query: '.profile-review__summary > p:nth-child(2)' 22 | type: text 23 | 24 | - title: Rating 25 | query: '.sg-rating__number' 26 | type: text 27 | 28 | - title: Review 29 | query: '.profile-review__quote' 30 | type: text 31 | 32 | - title: Review date 33 | query: '.profile-review__date' 34 | type: text 35 | 36 | - title: Review date 37 | query: '.profile-review__feedback > p:nth-child(2)' 38 | type: text 39 | 40 | - title: Reviewer position 41 | query: '.reviewer_position' 42 | type: text 43 | 44 | - title: Reviewer name 45 | query: '.reviewer_card--name' 46 | type: text 47 | 48 | - title: Reviewer information 49 | query: '.reviewer_list' 50 | type: text 51 | -------------------------------------------------------------------------------- /src/scrappers/craigslist.yml: -------------------------------------------------------------------------------- 1 | url: https://*.craigslist.org/search/* 2 | header: 'Craigslist search results' 3 | listElementsQuery: '.result-node-wide, .result-node' 4 | elementParser: 5 | - title: Image 6 | query: img 7 | type: image 8 | 9 | - title: Product name 10 | query: .posting-title 11 | type: text 12 | 13 | - title: Price 14 | query: .priceinfo 15 | type: text 16 | 17 | - title: Location 18 | query: .supertitle 19 | type: text 20 | 21 | - title: Craiglist URL 22 | query: '.cl-app-anchor' 23 | type: 'link' 24 | -------------------------------------------------------------------------------- /src/scrappers/deliveroo.yml: -------------------------------------------------------------------------------- 1 | url: https://deliveroo.co.uk/restaurants/* 2 | header: Deliveroo search results 3 | listElementsQuery: 'a[class*="HomeFeedUICard-"]' 4 | elementParser: 5 | - title: Restaurant 6 | query: p 7 | type: text 8 | 9 | - title: Description 10 | type: get-attribute 11 | attribute: aria-label 12 | 13 | - title: Rating 14 | query: 'li:nth-child(2) > span:nth-child(3) > span' 15 | type: text 16 | 17 | - title: Delivery time 18 | query: '[class*="Bubble-"]' 19 | type: text 20 | 21 | - title: Promotions 22 | query: '[class*="BadgesOverlay-"]' 23 | type: text 24 | 25 | - title: Restaurant link 26 | type: clean-url 27 | -------------------------------------------------------------------------------- /src/scrappers/dreambaby.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.dreambaby.be/* 3 | listElementsQuery: '[data-name="Product"]' 4 | elementParser: 5 | - title: Item brand 6 | query: '.brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: 'h3' 15 | type: text 16 | 17 | - title: RRP 18 | query: 'empty' 19 | type: text 20 | 21 | - title: Price 22 | query: '[itemprop="price"]' 23 | type: text 24 | 25 | - title: URL 26 | query: a 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/ebay.yml: -------------------------------------------------------------------------------- 1 | url: https://www.ebay.com/sch/* 2 | listElementsQuery: 'ul > [id*="item"]' 3 | elementParser: 4 | - title: Image 5 | query: img 6 | type: image 7 | 8 | - title: Name 9 | query: .s-item__title 10 | type: text 11 | 12 | - title: Price 13 | query: .s-item__price 14 | type: text 15 | 16 | - title: State 17 | query: .s-item__subtitle 18 | type: text 19 | 20 | - title: From 21 | query: .s-item__itemLocation 22 | type: text 23 | 24 | - title: Seller info 25 | query: .s-item__seller-info-text 26 | type: text 27 | 28 | - title: Product link 29 | query: .s-item__info > a 30 | type: clean-url 31 | -------------------------------------------------------------------------------- /src/scrappers/exhibitors-ces.yml: -------------------------------------------------------------------------------- 1 | url: https://exhibitors.ces.tech/* 2 | header: Exhibitors CES 2024 3 | listElementsQuery: .card 4 | elementParser: 5 | - title: Logo 6 | query: img 7 | type: image 8 | 9 | - title: Name 10 | query: .card-Title 11 | type: text 12 | 13 | - title: exhibitors URL 14 | query: a 15 | type: link 16 | 17 | - title: Description 18 | query: .card-Desc 19 | type: text 20 | 21 | - title: Booth 22 | query: .card-Subtitle 23 | type: text 24 | -------------------------------------------------------------------------------- /src/scrappers/exhibitors-expandnorthstar.yml: -------------------------------------------------------------------------------- 1 | url: https://exhibitors.expandnorthstar.com/* 2 | listElementsQuery: .list-group-item 3 | elementParser: 4 | - title: Logo 5 | query: img 6 | type: image 7 | 8 | - title: Name 9 | query: .heading 10 | type: text 11 | 12 | - title: Description 13 | query: .list-group-item-text 14 | type: text 15 | 16 | - title: Booth 17 | query: .web p:nth-child(1) 18 | type: text 19 | 20 | - title: Categories 21 | query: .sector_block 22 | type: text 23 | 24 | - title: URL 25 | query: a 26 | type: link 27 | -------------------------------------------------------------------------------- /src/scrappers/f6s_companies.yml: -------------------------------------------------------------------------------- 1 | url: https://www.f6s.com/companies/* 2 | header: F6S Companies 3 | listElementsQuery: '.b16.company-block' 4 | elementParser: 5 | - title: Company Name 6 | type: text 7 | query: h2.company-entry-title a.t-accent.t-heavy 8 | - title: Link 9 | type: clean-url 10 | query: h2.company-entry-title a.t-accent.t-heavy 11 | - title: Subtitle 12 | type: text 13 | query: h3.mt4.mb8 14 | - title: Location 15 | type: text 16 | query: div.centered-content.g8 17 | - title: Founding Year 18 | type: text 19 | query: p.centered-content.g8.mt12 20 | - title: Funding Info 21 | type: text 22 | query: div.centered-content.mt12 div.overview-line-content -------------------------------------------------------------------------------- /src/scrappers/facebook.yml: -------------------------------------------------------------------------------- 1 | url: https://www.facebook.com* 2 | listElementsQuery: '[role="dialog"] [role="article"]' 3 | elementParser: 4 | - title: Name 5 | query: 'div:nth-child(2) > div > div > div > div > span' 6 | type: text 7 | 8 | - title: Comment 9 | query: 'div:nth-child(2) > div > div > div > div > div > span > div' 10 | type: text 11 | -------------------------------------------------------------------------------- /src/scrappers/g2-reviews.yml: -------------------------------------------------------------------------------- 1 | url: https://www.g2.com/*/reviews* 2 | header: 'G2 product reviews' 3 | listElementsQuery: '[id*="survey-response"]' 4 | elementParser: 5 | - title: "Review date" 6 | query: '.x-current-review-date > time' 7 | type: "text" 8 | 9 | - title: "Author" 10 | query: '[itemprop="author"]' 11 | type: "text" 12 | 13 | - title: "Title" 14 | query: 'div[itemprop="name"]' 15 | type: "text" 16 | 17 | - title: "What do you like?" 18 | query: 'div[itemprop="reviewBody"] > div:nth-child(1) > div:nth-child(2)' 19 | type: "text" 20 | 21 | - title: "What do you dislike?" 22 | query: 'div[itemprop="reviewBody"] > div:nth-child(2) > div:nth-child(2)' 23 | type: "text" 24 | 25 | - title: "What are the problems that we solve?" 26 | query: 'div[itemprop="reviewBody"] > div:nth-child(3) > div:nth-child(2)' 27 | type: "text" 28 | -------------------------------------------------------------------------------- /src/scrappers/g2-search.yml: -------------------------------------------------------------------------------- 1 | url: https://www.g2.com/search* 2 | header: 'G2 search results' 3 | listElementsQuery: '[class*="paper mb-1"]' 4 | elementParser: 5 | - title: 'Logo' 6 | query: '[class*="product-listing__img"] > img' 7 | type: 'get-attribute' 8 | attribute: 'data-deferred-image-src' 9 | 10 | - title: 'Product name' 11 | query: '.product-listing__product-name > a > div' 12 | type: 'text' 13 | 14 | - title: 'Total reviews' 15 | query: '.px-4th' 16 | type: 'text' 17 | 18 | - title: 'Rating' 19 | query: '.link--header-color' 20 | type: 'text' 21 | 22 | - title: 'Categories' 23 | query: '.product-listing__search-footer > .cell' 24 | type: 'text' 25 | -------------------------------------------------------------------------------- /src/scrappers/gads-keyword-planner.yml: -------------------------------------------------------------------------------- 1 | url: https://ads.google.com/* 2 | header: Google Ads Keyword Planner 3 | listElementsQuery: '.particle-table-row' 4 | elementParser: 5 | - title: Keyword 6 | query: 'ess-cell[essfield="keyword"] .keyword' 7 | type: text 8 | 9 | - title: Avg. monthly searches 10 | query: 'ess-cell[essfield="stats.search_volume"] .value-text' 11 | type: text 12 | 13 | - title: Three month change 14 | query: 'ess-cell[essfield="stats.recent_search_trend_change"] text-field' 15 | type: text 16 | 17 | - title: YoY change 18 | query: 'ess-cell[essfield="stats.recent_yoy_search_trend_change"] text-field' 19 | type: text 20 | 21 | - title: Competition 22 | query: 'ess-cell[essfield="competition"] text-field' 23 | type: text 24 | 25 | - title: Ad impression share 26 | query: 'ess-cell[essfield="stats.ad_impression_share"] text-field' 27 | type: text 28 | 29 | - title: Top of page bid (low range) 30 | query: 'ess-cell[essfield="stats.bid_min"] text-field' 31 | type: text 32 | 33 | - title: Top of page bid (high range) 34 | query: 'ess-cell[essfield="stats.bid_max"] text-field' 35 | type: text 36 | 37 | - title: Account Status 38 | query: 'ess-cell[essfield="is_in_account"] status-chips' 39 | type: text 40 | -------------------------------------------------------------------------------- /src/scrappers/github.yml: -------------------------------------------------------------------------------- 1 | url: https://github.com/* 2 | header: GitHub issues 3 | listElementsQuery: .Box-row 4 | elementParser: 5 | - title: Link 6 | query: .Link--primary 7 | type: link 8 | - title: Title 9 | query: .Link--primary 10 | type: text -------------------------------------------------------------------------------- /src/scrappers/google.yml: -------------------------------------------------------------------------------- 1 | url: https://www.google.*/search?* 2 | listElementsQuery: '[data-snc]' 3 | elementParser: 4 | - title: Title 5 | query: h3 6 | type: text 7 | 8 | - title: Description 9 | query: 'div[style="-webkit-line-clamp:2"]' 10 | type: text 11 | 12 | - title: URL 13 | query: a 14 | type: clean-url 15 | -------------------------------------------------------------------------------- /src/scrappers/gumroad.yml: -------------------------------------------------------------------------------- 1 | url: https://gumroad.com* 2 | listElementsQuery: article 3 | elementParser: 4 | - title: Name 5 | query: h3 6 | type: text 7 | 8 | - title: User 9 | query: .user 10 | type: text 11 | 12 | - title: User profile URL 13 | query: .user 14 | type: link 15 | 16 | - title: Link 17 | query: .stretched-link 18 | type: link 19 | 20 | - title: Price 21 | query: .price 22 | type: text 23 | 24 | - title: Rating/Reviews 25 | query: .rating 26 | type: text 27 | -------------------------------------------------------------------------------- /src/scrappers/homes.yml: -------------------------------------------------------------------------------- 1 | url: https://www.homes.com/* 2 | listElementsQuery: .placard-container 3 | elementParser: 4 | - title: Address 5 | query: address 6 | type: text 7 | 8 | - title: Price 9 | query: .price-container, .detailed-info-container li:nth-child(1) 10 | type: text 11 | 12 | - title: Description 13 | query: .property-description 14 | type: text 15 | 16 | - title: Agent name 17 | query: .agent-name 18 | type: text 19 | 20 | - title: Agency name 21 | query: .agency-name 22 | type: text 23 | 24 | - title: Property link 25 | query: a 26 | type: clean-url 27 | -------------------------------------------------------------------------------- /src/scrappers/idealista.yml: -------------------------------------------------------------------------------- 1 | url: https://www.idealista.* 2 | header: Idealista search results 3 | listElementsQuery: .item 4 | elementParser: 5 | - title: Home 6 | query: .item-link 7 | type: text 8 | 9 | - title: Price 10 | query: .item-price 11 | type: text 12 | 13 | - title: Typology 14 | query: '.item-detail-char > .item-detail:nth-child(1)' 15 | type: text 16 | 17 | - title: Area 18 | query: '.item-detail-char > .item-detail:nth-child(2)' 19 | type: text 20 | 21 | - title: Description 22 | query: .item-description 23 | type: text 24 | 25 | - title: Link 26 | query: .item-link 27 | type: link 28 | -------------------------------------------------------------------------------- /src/scrappers/ikea.yml: -------------------------------------------------------------------------------- 1 | url: https://www.ikea.* 2 | listElementsQuery: '[data-testid="plp-product-card"]' 3 | elementParser: 4 | - title: Product image 5 | query: img 6 | type: image 7 | 8 | - title: Product name 9 | query: .plp-price-module__product-name 10 | type: text 11 | 12 | - title: Product description 13 | query: .plp-price-module__description 14 | type: text 15 | 16 | - title: Price 17 | query: .plp-price__nowrap 18 | type: text -------------------------------------------------------------------------------- /src/scrappers/immobiliare.yml: -------------------------------------------------------------------------------- 1 | url: https://www.immobiliare.it/* 2 | listElementsQuery: .in-reListCard 3 | elementParser: 4 | - title: Home image 5 | query: img 6 | type: image 7 | 8 | - title: Price 9 | query: .in-reListCardPrice 10 | type: text 11 | 12 | - title: Name 13 | query: .in-reListCard__title 14 | type: text 15 | 16 | - title: Area 17 | query: '[aria-label="superficie"]' 18 | type: text 19 | 20 | - title: Description 21 | query: .in-reListCardDescription 22 | type: text 23 | 24 | - title: Immobiliare URL 25 | query: a 26 | type: link 27 | -------------------------------------------------------------------------------- /src/scrappers/immobilienscout24.yml: -------------------------------------------------------------------------------- 1 | url: https://www.immobilienscout24.de/Suche/* 2 | listElementsQuery: '[data-item="result"]' 3 | elementParser: 4 | - title: Title 5 | query: h2 6 | type: text 7 | 8 | - title: Address 9 | query: .result-list-entry__address 10 | type: text 11 | 12 | - title: Price 13 | query: .result-list-entry__primary-criterion 14 | type: text 15 | 16 | - title: Area 17 | query: .result-list-entry__primary-criterion:nth-child(2) dd 18 | type: text 19 | 20 | - title: Property link 21 | query: a 22 | type: link 23 | -------------------------------------------------------------------------------- /src/scrappers/imovirtual.yml: -------------------------------------------------------------------------------- 1 | url: https://www.imovirtual.com/* 2 | listElementsQuery: article 3 | elementParser: 4 | - title: Home 5 | query: '[data-testid="listing-item-link"]' 6 | type: text 7 | 8 | - title: Typology 9 | query: '[data-testid="advert-card-specs-list"] > dl > dd:nth-child(2)' 10 | type: text 11 | 12 | - title: Area 13 | query: '[data-testid="advert-card-specs-list"] > dl > dd:nth-child(4)' 14 | type: text 15 | 16 | - title: Price 17 | query: '[data-testid="listing-item-header"] > span' 18 | type: text 19 | 20 | - title: URL 21 | query: a 22 | type: link 23 | -------------------------------------------------------------------------------- /src/scrappers/index.ts: -------------------------------------------------------------------------------- 1 | import { getDomainName, ScrapperOptions } from '../utils/chrome'; 2 | 3 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 4 | // @ts-ignore 5 | const data = import.meta.glob('./*.yml', { eager: true }); 6 | const scrappers = new Map>(); 7 | 8 | for (const scrapperPath in data) { 9 | const url = data[scrapperPath].default.url; 10 | let hostname = ''; 11 | 12 | if (Array.isArray(url)) { 13 | hostname = getDomainName(url[0]); 14 | } else { 15 | hostname = getDomainName(url); 16 | } 17 | 18 | if (hostname) { 19 | if (scrappers.has(hostname)) { 20 | // eslint-disable-next-line @typescript-eslint/no-non-null-assertion 21 | const options = scrappers.get(hostname)!; 22 | scrappers.set(hostname, [...options, data[scrapperPath].default]); 23 | } else { 24 | scrappers.set(hostname, [data[scrapperPath].default]); 25 | } 26 | } 27 | } 28 | 29 | export default scrappers; 30 | -------------------------------------------------------------------------------- /src/scrappers/instagram.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.instagram.com/*/following/ 3 | - https://www.instagram.com/*/followers/ 4 | listElementsQuery: '[role="dialog"] > div > div:nth-child(2) > div > div > div:nth-child(3) > div > div > div, [role="dialog"] > div > div:nth-child(2) > div > div > div:nth-child(4) > div > div > div' 5 | elementParser: 6 | - title: Follower handler 7 | query: 'a[role="link"] span' 8 | type: text 9 | 10 | - title: Follower name 11 | query: 'span[dir="auto"]:nth-child(2)' 12 | type: text 13 | 14 | - title: Profile link 15 | query: 'a' 16 | type: clean-url 17 | -------------------------------------------------------------------------------- /src/scrappers/johnlewis.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.johnlewis.com/* 3 | listElementsQuery: article 4 | elementParser: 5 | - title: Item brand 6 | query: '[data-test="product-title"] > h2 > span:nth-child(1)' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '[data-test="product-title"] > h2 > span:nth-child(2)' 15 | type: text 16 | 17 | - title: RRP 18 | query: '[data-test="product-card-price-prev"]' 19 | type: text 20 | 21 | - title: Price 22 | query: '[data-test="product-card-price-now"]' 23 | type: text 24 | 25 | - title: URL 26 | query: a 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/joinef-portfolio.yml: -------------------------------------------------------------------------------- 1 | url: 'https://www.joinef.com/portfolio/*' 2 | header: Portfolio 3 | listElementsQuery: .tile--company--row 4 | elementParser: 5 | - title: Name 6 | query: .tile__name 7 | type: text 8 | 9 | - title: Description 10 | query: .tile__description 11 | type: text 12 | 13 | - title: Tags 14 | query: .tile__tags 15 | type: text 16 | 17 | - title: Link 18 | query: .tile__link 19 | type: link 20 | -------------------------------------------------------------------------------- /src/scrappers/joinef-posts.yml: -------------------------------------------------------------------------------- 1 | url: 'https://www.joinef.com/posts/' 2 | header: Posts 3 | listElementsQuery: article 4 | elementParser: 5 | - title: Title 6 | query: .tile__link 7 | type: text 8 | 9 | - title: Categories 10 | query: .tile__categories 11 | type: text 12 | 13 | - title: Link 14 | query: .tile__link 15 | type: link 16 | -------------------------------------------------------------------------------- /src/scrappers/jollyroom.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.jollyroom.se/* 3 | listElementsQuery: '.product-info' 4 | elementParser: 5 | 6 | - title: Item brand 7 | query: '[itemprop="brand"]' 8 | type: text 9 | 10 | - title: Item model 11 | query: '[itemprop="model"]' 12 | type: text 13 | 14 | - title: Item Description 15 | query: h3 16 | type: text 17 | 18 | - title: RRP 19 | query: .recommmended-price 20 | type: text 21 | 22 | - title: Price 23 | query: .price 24 | type: text 25 | 26 | - title: URL 27 | query: a 28 | type: link -------------------------------------------------------------------------------- /src/scrappers/kuanto-kusta-product.yml: -------------------------------------------------------------------------------- 1 | url: https://www.kuantokusta.pt/p/* 2 | listElementsQuery: '[data-test-id="offer-card"]' 3 | elementParser: 4 | - title: Name 5 | query: '[data-test-id="offer-product-name"]' 6 | type: text 7 | 8 | - title: Price 9 | query: '[data-test-id="offer-price"]' 10 | type: text 11 | 12 | - title: Shipping costs 13 | query: '[data-test-id="offer-shipping"]' 14 | type: text 15 | 16 | - title: Kuanto kusta link 17 | type: link 18 | -------------------------------------------------------------------------------- /src/scrappers/kuanto-kusta.yml: -------------------------------------------------------------------------------- 1 | url: https://www.kuantokusta.pt/search 2 | listElementsQuery: '[data-test-id="product-card"]' 3 | elementParser: 4 | - title: Product image 5 | query: ".card-header img:nth-child(2)" 6 | type: "image" 7 | 8 | - title: Name 9 | query: '[data-test-id="product-card-name"]' 10 | type: text 11 | 12 | - title: Minimum price 13 | query: '[data-test-id="product-card-offers-price-min"] > span' 14 | type: text 15 | 16 | - title: Link 17 | query: 'a' 18 | type: "link" 19 | -------------------------------------------------------------------------------- /src/scrappers/linkedIn-company-profile-likes.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.linkedin.com/company/* 3 | - https://www.linkedin.com/in/* 4 | - https://www.linkedin.com/showcase/* 5 | - https://www.linkedin.com/posts/* 6 | header: Linkedin Likes 7 | listElementsQuery: .artdeco-list__item 8 | elementParser: 9 | - title: Name 10 | query: .artdeco-entity-lockup__title 11 | type: text 12 | 13 | - title: Job title 14 | query: .artdeco-entity-lockup__caption 15 | type: text 16 | 17 | - title: Profile url 18 | query: a 19 | type: link -------------------------------------------------------------------------------- /src/scrappers/linkedin-groups.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/groups/* 2 | header: Linkedin groups 3 | listElementsQuery: .scaffold-finite-scroll__content > div > ul > li 4 | elementParser: 5 | - title: Avatar 6 | query: img 7 | type: image 8 | 9 | - title: Name 10 | query: '[class*="group-listing-item__title-link-"]' 11 | type: text 12 | 13 | - title: Link 14 | query: '[class*="group-listing-item__title-link-"]' 15 | type: link 16 | 17 | - title: Number of members 18 | query: .artdeco-entity-lockup__metadata 19 | type: text 20 | -------------------------------------------------------------------------------- /src/scrappers/linkedin-jobs.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/jobs/search/* 2 | header: Linkedin jobs search results 3 | listElementsQuery: .jobs-search-results__list-item 4 | elementParser: 5 | - title: Position 6 | query: .job-card-container__link 7 | type: text 8 | 9 | - title: Company 10 | query: .job-card-container__primary-description 11 | type: text 12 | 13 | - title: Location 14 | query: .job-card-container__metadata-wrapper 15 | type: text 16 | 17 | - title: URL 18 | query: .job-card-container__link 19 | type: clean-url 20 | -------------------------------------------------------------------------------- /src/scrappers/linkedin-my-network.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/mynetwork/invite-connect/connections/ 2 | header: Linkedin search results 3 | listElementsQuery: '[data-chameleon-result-urn*="urn:li:member:"], .mn-connection-card' 4 | elementParser: 5 | - title: Avatar 6 | query: img 7 | type: image 8 | 9 | - title: Name 10 | query: '.entity-result__title-text > .app-aware-link span[aria-hidden="true"], .mn-connection-card__name' 11 | type: text 12 | 13 | - title: Job 14 | query: .entity-result__primary-subtitle, .mn-connection-card__occupation 15 | type: text 16 | 17 | - title: Location 18 | query: .entity-result__secondary-subtitle 19 | type: text 20 | 21 | - title: Profile url 22 | query: .entity-result__title-text > .app-aware-link, .mn-connection-card__link 23 | type: clean-url 24 | -------------------------------------------------------------------------------- /src/scrappers/linkedin-post.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/feed/update/* 2 | header: Linkedin Post - Profiles who liked 3 | listElementsQuery: .artdeco-list__item 4 | elementParser: 5 | - title: Name 6 | query: .artdeco-entity-lockup__title 7 | type: text 8 | 9 | - title: Job title 10 | query: .artdeco-entity-lockup__caption 11 | type: text 12 | 13 | - title: Profile url 14 | query: a 15 | type: link -------------------------------------------------------------------------------- /src/scrappers/linkedin-sales-leads-search.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/sales/lists/people/* 2 | header: Linkedin Sales Navigator Leads List 3 | listElementsQuery: 'tr.artdeco-models-table-row' 4 | elementParser: 5 | - title: Avatar 6 | query: img 7 | type: image 8 | - title: Name 9 | query: '[data-anonymize="person-name"]' 10 | type: text 11 | - title: Job 12 | query: '[data-anonymize="job-title"]' 13 | type: text 14 | - title: Company 15 | query: '[data-anonymize="company-name"]' 16 | type: text 17 | - title: Location 18 | query: '[data-anonymize="location"]' 19 | type: text 20 | - title: Profile url 21 | query: '[data-anonymize="person-name"]' 22 | type: clean-url -------------------------------------------------------------------------------- /src/scrappers/linkedin-sales-search.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/sales/search/* 2 | header: Linkedin Sales Navigator search results 3 | listElementsQuery: '.artdeco-list__item' 4 | elementParser: 5 | - title: Avatar 6 | query: img 7 | type: image 8 | - title: Name 9 | query: '.artdeco-entity-lockup__title > a' 10 | type: text 11 | - title: Job 12 | query: '.artdeco-entity-lockup__subtitle > span:nth-child(1)' 13 | type: text 14 | - title: Company 15 | query: '.artdeco-entity-lockup__subtitle > a' 16 | type: text 17 | - title: Location 18 | query: '.artdeco-entity-lockup__caption' 19 | type: text 20 | - title: Profile url 21 | query: '.artdeco-entity-lockup__title > a' 22 | type: clean-url -------------------------------------------------------------------------------- /src/scrappers/linkedin-saved-posts.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/my-items/saved-posts/ 2 | header: Linkedin Saved Posts 3 | listElementsQuery: '.reusable-search__result-container' 4 | elementParser: 5 | - title: Name 6 | query: '.entity-result__title-line--2-lines > span > a > span > span:nth-child(1)' 7 | type: text 8 | 9 | - title: Job title 10 | query: '.linked-area.flex-1.cursor-pointer > div' 11 | type: text 12 | 13 | - title: Profile url 14 | query: a 15 | type: link 16 | 17 | - title: Post url 18 | query: .entity-result__content-inner-container > div.mh4 > a.app-aware-link, .entity-result__content-inner-container > a.app-aware-link 19 | type: link 20 | 21 | - title: Post content 22 | query: '.entity-result__content-inner-container > div > p' 23 | type: text 24 | -------------------------------------------------------------------------------- /src/scrappers/linkedin.yml: -------------------------------------------------------------------------------- 1 | url: https://www.linkedin.com/search/results/* 2 | header: Linkedin search results 3 | listElementsQuery: 'ul[role="list"] > li > div > div > div' 4 | elementParser: 5 | - title: Avatar 6 | query: 'div > img' 7 | type: image 8 | 9 | - title: Name 10 | query: 'div:nth-child(2) > div > div > div > span > span > a > span > span' 11 | type: text 12 | 13 | - title: Job 14 | query: 'div:nth-child(2) > div > div:nth-child(2)' 15 | type: text 16 | 17 | - title: Location 18 | query: 'div:nth-child(2) > div > div:nth-child(3)' 19 | type: text 20 | 21 | - title: Profile url 22 | query: 'div > a' 23 | type: clean-url 24 | -------------------------------------------------------------------------------- /src/scrappers/merrill.yml: -------------------------------------------------------------------------------- 1 | url: https://fa.ml.com/find-an-advisor/* 2 | listElementsQuery: .advisorpod 3 | elementParser: 4 | - title: Photo 5 | query: img 6 | type: image 7 | 8 | - title: Name 9 | query: .primaryname 10 | type: text 11 | 12 | - title: Job 13 | query: .advisor-title 14 | type: text 15 | 16 | - title: Company 17 | query: .office-advisor-count 18 | type: text 19 | 20 | - title: Office Address 21 | query: .office-location 22 | type: text 23 | 24 | - title: Phone Number 25 | query: .office-phone 26 | type: text 27 | 28 | - title: Website 29 | query: .office-website > a 30 | type: link 31 | -------------------------------------------------------------------------------- /src/scrappers/milanuncios.yml: -------------------------------------------------------------------------------- 1 | url: https://www.milanuncios.com/* 2 | listElementsQuery: article 3 | elementParser: 4 | - title: Title 5 | query: .ma-AdCardListingV2-TitleLink 6 | type: text 7 | 8 | - title: Price 9 | query: .ma-AdPrice-value 10 | type: text 11 | 12 | - titlte: Address 13 | query: .ma-AdLocation-text 14 | type: text 15 | 16 | - title: Details 17 | query: .ma-AdCardV2-description 18 | type: text 19 | 20 | - title: Link 21 | query: .ma-AdCardListingV2-TitleLink 22 | type: link -------------------------------------------------------------------------------- /src/scrappers/netflix.yml: -------------------------------------------------------------------------------- 1 | url: https://www.netflix.com/browse/* 2 | header: Netflix browse results 3 | listElementsQuery: .title-card 4 | elementParser: 5 | - title: Cover 6 | query: img 7 | type: image 8 | 9 | - title: Title 10 | query: .fallback-text 11 | type: text 12 | 13 | - title: Link 14 | query: a 15 | type: clean-url 16 | -------------------------------------------------------------------------------- /src/scrappers/notion.yml: -------------------------------------------------------------------------------- 1 | url: https://www.notion.so* 2 | header: Templates 3 | listElementsQuery: .template-grid > section 4 | elementParser: 5 | - title: Title 6 | query: '[class*="UserBaseInfo_textInfoContainer"]' 7 | type: text 8 | 9 | - title: Userbase rights 10 | query: '[class*="templatePreview_userBaseRight"]' 11 | type: text 12 | 13 | - title: Link 14 | query: '[class*="DynamicModal_trigger"]' 15 | type: link 16 | -------------------------------------------------------------------------------- /src/scrappers/ocean.io.yml: -------------------------------------------------------------------------------- 1 | url: 'https://www.ocean.io/lookalike-search*' 2 | header: Search results 3 | listElementsQuery: 'tr:nth-child(n+3)' 4 | elementParser: 5 | - title: Score 6 | query: 'td:nth-child(1)' 7 | type: text 8 | 9 | - title: Company 10 | query: 'td:nth-child(2)' 11 | type: text 12 | 13 | - title: Size 14 | query: 'td:nth-child(3)' 15 | type: text 16 | 17 | - title: Country 18 | query: 'td:nth-child(4)' 19 | type: text 20 | 21 | - title: Domain 22 | query: 'td> div > div:nth-child(3) > div > a:nth-child(1)' 23 | type: link 24 | 25 | - title: Link 1 26 | query: 'td> div > div:nth-child(3) > div > a:nth-child(2)' 27 | type: link 28 | 29 | - title: Link 2 30 | query: 'td> div > div:nth-child(3) > div > a:nth-child(3)' 31 | type: link 32 | 33 | - title: Link 3 34 | query: 'td> div > div:nth-child(3) > div > a:nth-child(4)' 35 | type: link -------------------------------------------------------------------------------- /src/scrappers/oddsportal-12.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.oddsportal.com/*basketball* 3 | - https://www.oddsportal.com/*baseball* 4 | - https://www.oddsportal.com/*tennis* 5 | - https://www.oddsportal.com/*american-football* 6 | - https://www.oddsportal.com/*esports* 7 | - https://www.oddsportal.com/*aussie-rules* 8 | - https://www.oddsportal.com/*badminton* 9 | - https://www.oddsportal.com/*cricket* 10 | - https://www.oddsportal.com/*darts* 11 | - https://www.oddsportal.com/*mma* 12 | - https://www.oddsportal.com/*snooker* 13 | - https://www.oddsportal.com/*volleyball* 14 | header: Results 15 | listElementsQuery: '[class*="eventRow"]' 16 | elementParser: 17 | - title: Date 18 | query: '[data-testid="date-header"]' 19 | type: text 20 | 21 | - title: Time 22 | query: '[data-testid="game-row"] [data-testid="time-item"]' 23 | type: text 24 | 25 | - title: Team 1 26 | query: '[data-testid="event-participants"] a.items-start .participant-name' 27 | type: text 28 | 29 | - title: Result 1 30 | query: '[data-testid="event-participants"] a.items-start .flex' 31 | type: text 32 | 33 | - title: Team 2 34 | query: '[data-testid="event-participants"] a.items-center .participant-name' 35 | type: text 36 | 37 | - title: Result 2 38 | query: '[data-testid="event-participants"] a.items-center .flex' 39 | type: text 40 | 41 | - title: Odd 1 42 | query: 'div.border-black-main:nth-child(2)' 43 | type: text 44 | 45 | - title: Odd 2 46 | query: 'div.border-black-main:nth-child(3)' 47 | type: text 48 | -------------------------------------------------------------------------------- /src/scrappers/oddsportal-1x2.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.oddsportal.com/*football* 3 | - https://www.oddsportal.com/*hockey* 4 | - https://www.oddsportal.com/*water-polo* 5 | - https://www.oddsportal.com/*rugby* 6 | - https://www.oddsportal.com/*handball* 7 | - https://www.oddsportal.com/*futsal* 8 | - https://www.oddsportal.com/*floorball* 9 | - https://www.oddsportal.com/*boxing* 10 | header: 1x2 Results 11 | listElementsQuery: '[class*="eventRow"]' 12 | elementParser: 13 | - title: Date 14 | query: '[data-testid="date-header"]' 15 | type: text 16 | 17 | - title: Time 18 | query: '[data-testid="game-row"] [data-testid="time-item"]' 19 | type: text 20 | 21 | - title: Team 1 22 | query: '[data-testid="event-participants"] a.items-start .participant-name' 23 | type: text 24 | 25 | - title: Result 1 26 | query: '[data-testid="event-participants"] a.items-start .flex' 27 | type: text 28 | 29 | - title: Team 2 30 | query: '[data-testid="event-participants"] a.items-center .participant-name' 31 | type: text 32 | 33 | - title: Result 2 34 | query: '[data-testid="event-participants"] a.items-center .flex' 35 | type: text 36 | 37 | - title: Odd 1 38 | query: 'div.border-black-main:nth-child(2)' 39 | type: text 40 | 41 | - title: Odd X 42 | query: 'div.border-black-main:nth-child(3)' 43 | type: text 44 | 45 | - title: Odd 2 46 | query: 'div.border-black-main:nth-child(4)' 47 | type: text 48 | -------------------------------------------------------------------------------- /src/scrappers/openviewpartners.yml: -------------------------------------------------------------------------------- 1 | url: https://openviewpartners.com/portfolio/* 2 | header: OpenView portfolio 3 | listElementsQuery: .portfolio-grid-logo-container 4 | elementParser: 5 | - title: Company name 6 | type: get-attribute 7 | attribute: data-name 8 | 9 | - title: Acquired By 10 | query: .tag-company 11 | type: text 12 | 13 | - title: State 14 | query: .grid-ipo-tag 15 | type: text 16 | 17 | - title: Status 18 | query: .grid-exited-tag 19 | type: text 20 | 21 | - title: Company website 22 | query: a 23 | type: link 24 | 25 | -------------------------------------------------------------------------------- /src/scrappers/oportunity-leiloes-list.yml: -------------------------------------------------------------------------------- 1 | url: https://oportunityleiloes.auctionserver.net/ 2 | header: 'Oportunity Leilões search result' 3 | listElementsQuery: .auclting 4 | elementParser: 5 | - title: Banner 6 | query: img 7 | type: image 8 | 9 | - title: Auction 10 | query: h6 11 | type: text 12 | 13 | - title: Status 14 | query: .aucdes span .row-link 15 | type: text 16 | 17 | - title: Time interval 18 | query: '[id*="aucdate"]' 19 | type: text 20 | 21 | - title: Auction URL 22 | query: h6 > a 23 | type: link 24 | -------------------------------------------------------------------------------- /src/scrappers/oportunity-leiloes.yml: -------------------------------------------------------------------------------- 1 | url: https://oportunityleiloes.auctionserver.net/m/view-auctions/info/id/* 2 | listElementsQuery: .auc_items 3 | elementParser: 4 | - title: Article 5 | query: img 6 | type: image 7 | 8 | - title: Code 9 | query: span 10 | type: text 11 | 12 | - title: Description 13 | query: h4 14 | type: text 15 | 16 | - title: Auction URL 17 | query: a 18 | type: clean-url 19 | 20 | -------------------------------------------------------------------------------- /src/scrappers/pinkorblue-list-products-it.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.pinkorblue.it/*/* 3 | listElementsQuery: .products__item 4 | elementParser: 5 | - title: Item Brand 6 | query: '.product__brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.product__model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '.product__title' 15 | type: text 16 | 17 | - title: RRP 18 | query: '.product__price--old' 19 | type: text 20 | 21 | - title: Price 22 | query: '.product__price' 23 | type: text 24 | 25 | - title: URL 26 | query: .product__link 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/pinkorblue-list-products-nl.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.pinkorblue.nl/*/* 3 | listElementsQuery: .products__item 4 | elementParser: 5 | - title: Item Brand 6 | query: '.product__brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.product__model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '.product__title' 15 | type: text 16 | 17 | - title: RRP 18 | query: '.product__price--old' 19 | type: text 20 | 21 | - title: Price 22 | query: '.product__price' 23 | type: text 24 | 25 | - title: URL 26 | query: .product__link 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/pinkorblue-list-products.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.pinkorblue.be/*/* 3 | listElementsQuery: .products__item 4 | elementParser: 5 | - title: Item Brand 6 | query: '.product__brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.product__model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '.product__title' 15 | type: text 16 | 17 | - title: RRP 18 | query: '.product__price--old' 19 | type: text 20 | 21 | - title: Price 22 | query: '.product__price' 23 | type: text 24 | 25 | - title: URL 26 | query: .product__link 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/pinkorblue-product-detail-page.yml: -------------------------------------------------------------------------------- 1 | url: https://www.pinkorblue.* 2 | listElementsQuery: .site-main 3 | includeHeader: false 4 | elementParser: 5 | 6 | - title: Item Description 7 | query: 'h1' 8 | type: text 9 | 10 | - title: Item Number 11 | query: '#product-description-content > div > p:nth-child(2)' 12 | type: text 13 | 14 | - title: EAN 15 | query: '#product-description-content > div > p:nth-child(3)' 16 | type: text 17 | 18 | - title: RRP 19 | query: '#buybox-options > div.product-options__prices > div:nth-child(1) > div.col-sm-9' 20 | type: text 21 | 22 | - title: Price 23 | query: '[itemprop="price"]' 24 | type: text -------------------------------------------------------------------------------- /src/scrappers/pitchbook.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://my.pitchbook.com/search-results/*/deals 3 | - https://my.pitchbook.com/search-results/*/companies 4 | - https://my.pitchbook.com/search-results/*/investors 5 | parseTables: 6 | tables: 7 | - rows: '#search-results-data-table-left .data-table__row' 8 | cols: .data-table__cell 9 | 10 | - rows: '#search-results-data-table-right .data-table__row, #search-results-data-table-right .data-table__headers' 11 | cols: .data-table__cell 12 | 13 | mergeTablesBy: column 14 | -------------------------------------------------------------------------------- /src/scrappers/prenatal-com.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.prenatal.com/* 3 | listElementsQuery: '[data-hits] [x-bind="card"]' 4 | elementParser: 5 | - title: Item Brand 6 | query: 'div.relative.px-2.py-2 > span' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.no_model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '[x-html="hit.title"]' 15 | type: text 16 | 17 | - title: RRP 18 | query: 'div.relative.px-2.py-2 > div > div' 19 | type: text 20 | 21 | - title: Price 22 | query: 'div.relative.px-2.py-2 > div.flex.items-center.pb-2 > div.flex.flex-row.items-center > span' 23 | type: text 24 | 25 | - title: URL 26 | query: 'div.swiper-slide.swiper-slide-active > a' 27 | type: link 28 | -------------------------------------------------------------------------------- /src/scrappers/prenatal-nl.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.prenatal.nl/* 3 | listElementsQuery: .search-result-items > li 4 | elementParser: 5 | - title: Item Brand 6 | query: '.no_brand' 7 | type: text 8 | 9 | - title: Item model 10 | query: '.no_model' 11 | type: text 12 | 13 | - title: Item Description 14 | query: '.name-link' 15 | type: text 16 | 17 | - title: RRP 18 | query: '.product-standard-price' 19 | type: text 20 | 21 | - title: Price 22 | query: '.product-sales-price' 23 | type: text 24 | 25 | - title: URL 26 | query: .thumb-link 27 | type: link -------------------------------------------------------------------------------- /src/scrappers/product-hunt.yml: -------------------------------------------------------------------------------- 1 | url: https://www.producthunt.com/* 2 | header: ProductHunt results 3 | listElementsQuery: '[class*="styles_item_"][data-test*="post-item-"],[class*="styles_item_"][data-test*="ad-slot"],[class*="styles_item_"][data-test*="product-"]' 4 | elementParser: 5 | - title: Product image 6 | query: 'img,video' 7 | type: image 8 | 9 | - title: Product name 10 | query: '[data-test*="post-name"], a[href*="/products"] div:nth-child(1), [class*="titleTaglineItem"]' 11 | type: text 12 | 13 | - title: Description 14 | query: '[class*="styles_tagline"], a[href*="/products"] div:nth-child(2), [class*="_extraInfo"], [class*="styles_adMeta"]' 15 | type: text 16 | 17 | - title: Up votes 18 | query: '[data-test="vote-button"]' 19 | type: text 20 | 21 | - title: Product hunt link 22 | query: 'a[data-test*="post-name"], a[href*="/products"]' 23 | type: link 24 | -------------------------------------------------------------------------------- /src/scrappers/qubika.yml: -------------------------------------------------------------------------------- 1 | url: https://qubika.com/blog/* 2 | listElementsQuery: .event-list li 3 | elementParser: 4 | - title: Event name 5 | query: .event 6 | type: text 7 | 8 | - title: Address 9 | query: .city 10 | type: text 11 | 12 | - title: Date 13 | query: .date 14 | type: text 15 | 16 | - title: Link 17 | query: a 18 | type: link 19 | -------------------------------------------------------------------------------- /src/scrappers/racius.yml: -------------------------------------------------------------------------------- 1 | url: https://www.racius.com/* 2 | listElementsQuery: article 3 | elementParser: 4 | - title: Name 5 | query: .results__name 6 | type: text 7 | 8 | - title: Activity 9 | query: .results__activity 10 | type: text 11 | 12 | - title: Location 13 | query: .results__col-location 14 | type: text -------------------------------------------------------------------------------- /src/scrappers/raymondjames.yml: -------------------------------------------------------------------------------- 1 | url: https://www.raymondjames.com/find-an-advisor 2 | listElementsQuery: .faa-result 3 | elementParser: 4 | - title: Photo 5 | query: img 6 | type: image 7 | 8 | - title: Name 9 | query: .advisor-name .media-heading 10 | type: text 11 | 12 | - title: Position 13 | query: .advisor-name h3:nth-child(2) 14 | type: text 15 | 16 | - title: Advisor phone number 17 | query: .advisor-phone 18 | type: text 19 | 20 | - title: Company 21 | query: .location-name 22 | type: text 23 | 24 | - title: Office Address 25 | query: .location-address 26 | type: text 27 | 28 | - title: Office Phone Number 29 | query: .location-phone 30 | type: text 31 | 32 | - title: Get directions link 33 | query: .result-links > a:nth-child(1) 34 | type: link 35 | 36 | - title: Contact branch link 37 | query: .result-links > a:nth-child(2) 38 | type: link 39 | -------------------------------------------------------------------------------- /src/scrappers/realstate.yml: -------------------------------------------------------------------------------- 1 | url: https://www.realestate.com.au/* 2 | listElementsQuery: '[data-testid="ResidentialCard"]' 3 | elementParser: 4 | - title: Address 5 | query: .residential-card__address-heading 6 | type: text 7 | 8 | - title: Property type 9 | query: '.piped-content:nth-child(2) .piped-content__inner:nth-child(2)' 10 | type: text 11 | 12 | - title: Price 13 | query: .residential-card__price 14 | type: text 15 | 16 | - title: Area 17 | query: '[aria-label*="building size"]' 18 | type: text 19 | 20 | - title: Bedrooms 21 | query: '[aria-label*="bedroom"]' 22 | type: text 23 | 24 | - title: Bathrooms 25 | query: '[aria-label*="bathroom"]' 26 | type: text 27 | 28 | - title: Garages 29 | query: '[aria-label*="parking"]' 30 | type: text 31 | 32 | - title: Property link 33 | query: a 34 | type: link 35 | -------------------------------------------------------------------------------- /src/scrappers/realtor.yml: -------------------------------------------------------------------------------- 1 | url: https://www.realtor.com/realestateandhomes-search/* 2 | listElementsQuery: '[id*="placeholder_property_"]' 3 | elementParser: 4 | - title: Property Type 5 | query: '[data-testid="card-description"]' 6 | type: text 7 | 8 | - title: Price 9 | query: '[data-testid="card-price"]' 10 | type: text 11 | 12 | - title: Address 13 | query: '[data-testid="card-address-1"]' 14 | type: text 15 | 16 | - title: Location 17 | query: '[data-testid="card-address-2"]' 18 | type: text 19 | 20 | - title: Bedrooms 21 | query: '[data-testid="property-meta-beds"]' 22 | type: text 23 | 24 | - title: Bathrooms 25 | query: '[data-testid="property-meta-baths"]' 26 | type: text 27 | 28 | - title: Area (sqft) 29 | query: '[data-testid="property-meta-sqft"] > span > .meta-value' 30 | type: text 31 | 32 | - title: Realtor URL 33 | query: a 34 | type: clean-url 35 | -------------------------------------------------------------------------------- /src/scrappers/recheio.yml: -------------------------------------------------------------------------------- 1 | url: https://www.recheio.pt/* 2 | listElementsQuery: ul.rch-search-layout li 3 | elementParser: 4 | - title: Image 5 | query: img 6 | type: image 7 | 8 | - title: Description 9 | query: span 10 | type: text -------------------------------------------------------------------------------- /src/scrappers/redfin.yml: -------------------------------------------------------------------------------- 1 | url: https://www.redfin.com/* 2 | listElementsQuery: '[id*="MapHomeCard_"]' 3 | elementParser: 4 | - title: Image 5 | query: img 6 | type: image 7 | 8 | - title: Address 9 | query: '[data-rf-test-id="abp-homeinfo-homeaddress"]' 10 | type: text 11 | 12 | - title: Price 13 | query: '[data-rf-test-name="homecard-price"]' 14 | type: text 15 | 16 | - title: Bedrooms 17 | query: '.HomeStatsV2 > .stats:nth-child(1)' 18 | type: text 19 | 20 | - title: Bathrooms 21 | query: '.HomeStatsV2 > .stats:nth-child(2)' 22 | type: text 23 | 24 | - title: Area 25 | query: '.HomeStatsV2 > .stats:nth-child(3)' 26 | type: text 27 | 28 | - title: Refin link 29 | query: a 30 | type: link 31 | -------------------------------------------------------------------------------- /src/scrappers/rightmove.yml: -------------------------------------------------------------------------------- 1 | url: https://www.rightmove.co.uk/* 2 | listElementsQuery: '[data-test*="propertyCard-"]' 3 | elementParser: 4 | - title: Address 5 | query: '[itemprop="address"]' 6 | type: text 7 | 8 | - title: Description 9 | query: '[itemprop="description"]' 10 | type: text 11 | 12 | - title: Property Type 13 | query: .text 14 | type: text 15 | 16 | - title: Price 17 | query: .propertyCard-priceValue 18 | type: text 19 | 20 | - title: Bedrooms 21 | query: .property-information .text:nth-child(3) 22 | type: text 23 | 24 | - title: Bathrooms 25 | query: .property-information .text:nth-child(5) 26 | type: text 27 | 28 | 29 | - title: Rightmove URL 30 | query: .propertyCard-link 31 | type: clean-url 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/scrappers/rosaoazul-es-list-products.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.rosaoazul.es/search/* 3 | - https://www.rosaoazul.es/cochecitos/* 4 | - https://www.rosaoazul.es/sillas-de-coche/* 5 | - https://www.rosaoazul.es/salud-y-cuidados/* 6 | - https://www.rosaoazul.es/hora-de-comer/* 7 | - https://www.rosaoazul.es/dormitorio/* 8 | - https://www.rosaoazul.es/juguetes/* 9 | - https://www.rosaoazul.es/moda/* 10 | - https://www.rosaoazul.es/ofertas/* 11 | listElementsQuery: .products__item 12 | elementParser: 13 | - title: Item Brand 14 | query: '.product__brand' 15 | type: text 16 | 17 | - title: Item model 18 | query: '.product__model' 19 | type: text 20 | 21 | - title: Item Description 22 | query: '.product__title' 23 | type: text 24 | 25 | - title: RRP 26 | query: '.product__price--old' 27 | type: text 28 | 29 | - title: Price 30 | query: '.product__price' 31 | type: text 32 | 33 | - title: URL 34 | query: .product__link 35 | type: link -------------------------------------------------------------------------------- /src/scrappers/rosaoazul-es-product-detail-page.yml: -------------------------------------------------------------------------------- 1 | url: https://www.rosaoazul.es/* 2 | listElementsQuery: .site-main 3 | includeHeader: false 4 | elementParser: 5 | 6 | - title: Item Description 7 | query: 'h1' 8 | type: text 9 | 10 | - title: Item Number 11 | query: '#product-description-content > div > p:nth-child(2)' 12 | type: text 13 | 14 | - title: EAN 15 | query: '#product-description-content > div > p:nth-child(3)' 16 | type: text 17 | 18 | - title: Price 19 | query: '[itemprop="price"]' 20 | type: text 21 | 22 | - title: RRP 23 | query: '#buybox-options > div.product-options__prices > div:nth-child(1) > div.col-sm-9' 24 | type: text 25 | -------------------------------------------------------------------------------- /src/scrappers/roseoubleu-fr-list-products.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.roseoubleu.fr/search/* 3 | - https://www.roseoubleu.fr/poussette/* 4 | - https://www.roseoubleu.fr/siege-auto/* 5 | - https://www.roseoubleu.fr/toilette/* 6 | - https://www.roseoubleu.fr/repas/* 7 | - https://www.roseoubleu.fr/chambre/* 8 | - https://www.roseoubleu.fr/eveil-jouet/* 9 | - https://www.roseoubleu.fr/mode/* 10 | - https://www.roseoubleu.fr/promos/* 11 | listElementsQuery: .products__item 12 | elementParser: 13 | - title: Item Brand 14 | query: '.product__brand' 15 | type: text 16 | 17 | - title: Item model 18 | query: '.product__model' 19 | type: text 20 | 21 | - title: Item Description 22 | query: '.product__title' 23 | type: text 24 | 25 | - title: RRP 26 | query: '.product__price--old' 27 | type: text 28 | 29 | - title: Price 30 | query: '.product__price' 31 | type: text 32 | 33 | - title: URL 34 | query: .product__link 35 | type: link -------------------------------------------------------------------------------- /src/scrappers/roseoubleu-fr-product-detail-page.yml: -------------------------------------------------------------------------------- 1 | url: https://www.roseoubleu.fr/* 2 | listElementsQuery: .site-main 3 | includeHeader: false 4 | elementParser: 5 | 6 | - title: Item Description 7 | query: 'h1' 8 | type: text 9 | 10 | - title: Item Number 11 | query: '#product-description-content > div > p:nth-child(2)' 12 | type: text 13 | 14 | - title: EAN 15 | query: '#product-description-content > div > p:nth-child(3)' 16 | type: text 17 | 18 | - title: Price 19 | query: '[itemprop="price"]' 20 | type: text 21 | 22 | - title: RRP 23 | query: '#buybox-options > div.product-options__prices > div:nth-child(1) > div.col-sm-9' 24 | type: text 25 | -------------------------------------------------------------------------------- /src/scrappers/shopify.yml: -------------------------------------------------------------------------------- 1 | url: https://apps.shopify.com/* 2 | listElementsQuery: '[data-controller="app-card"]' 3 | elementParser: 4 | - title: Logo 5 | query: img 6 | type: image 7 | 8 | - title: Application name 9 | query: .tw-text-body-md 10 | type: text 11 | 12 | - title: Price plan 13 | query: .tw-text-ellipsis 14 | type: text 15 | 16 | - title: Description 17 | query: .tw-text-fg-tertiary 18 | type: text 19 | 20 | - title: App link 21 | query: a 22 | type: link 23 | -------------------------------------------------------------------------------- /src/scrappers/standvirtual.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.standvirtual.com/autocaravanas* 3 | - https://www.standvirtual.com/comerciais* 4 | - https://www.standvirtual.com/motos* 5 | - https://www.standvirtual.com/carros* 6 | - https://www.standvirtual.com/pesados* 7 | listElementsQuery: 'article[data-id]' 8 | elementParser: 9 | - title: Brand 10 | query: h1 11 | type: text 12 | 13 | - title: Specs 14 | query: p 15 | type: text 16 | 17 | - title: Price 18 | query: '[data-testid="financing-widget"]' 19 | type: get-attribute 20 | attribute: data-price 21 | 22 | - title: Mileage 23 | query: '[data-parameter="mileage"]' 24 | type: text 25 | 26 | - title: Fuel type 27 | query: '[data-parameter="fuel_type"]' 28 | type: text 29 | 30 | - title: Gearbox 31 | query: '[data-parameter="gearbox"]' 32 | type: text 33 | 34 | - title: First registration year 35 | query: '[data-parameter="first_registration_year"]' 36 | type: text 37 | -------------------------------------------------------------------------------- /src/scrappers/suchen-mobile-park.yml: -------------------------------------------------------------------------------- 1 | url: https://www.mobile.de/park* 2 | listElementsQuery: '[class*="BaseListing_link"]' 3 | elementParser: 4 | - title: Car Name 5 | query: '[class*="ListingTitle_titleContainer"]' 6 | type: text 7 | 8 | - title: Attributes 9 | query: '[class*="ListingListView_attributes"]' 10 | type: text 11 | 12 | - title: Price 13 | query: '[class*="ListingPriceStandard_price"]' 14 | type: text 15 | 16 | - title: Mobile URL 17 | query: a 18 | type: link -------------------------------------------------------------------------------- /src/scrappers/suchen-mobile.yml: -------------------------------------------------------------------------------- 1 | url: https://suchen.mobile.de/* 2 | listElementsQuery: '[data-testid^="result-listing-"]:not(img)' 3 | elementParser: 4 | - title: Car Name 5 | query: h2 6 | type: text 7 | 8 | - title: Attributes 9 | query: '[data-testid="listing-details-attributes"]' 10 | type: text 11 | 12 | - title: Price 13 | query: '[data-testid="price-label"]' 14 | type: text 15 | 16 | - title: Price Vat 17 | query: '[data-testid="price-vat"]' 18 | type: text 19 | 20 | - title: Mobile URL 21 | query: a 22 | type: link 23 | -------------------------------------------------------------------------------- /src/scrappers/supercoach.yml: -------------------------------------------------------------------------------- 1 | url: https://supercoach.dailytelegraph.com.au/* 2 | listElementsQuery: '[class*="vm-NewsArticlesTabsSectionComponent-itemContainer"]' 3 | elementParser: 4 | - title: Title 5 | query: .vm-NewsArticlesTabsSectionComponent-title 6 | type: text 7 | 8 | - title: Description 9 | query: .vm-NewsArticlesTabsSectionComponent-desc 10 | type: text 11 | -------------------------------------------------------------------------------- /src/scrappers/suumo.yml: -------------------------------------------------------------------------------- 1 | url : https://suumo.jp/jj/* 2 | header: Suumo search results 3 | listElementsQuery: .property_unit-body 4 | elementParser: 5 | 6 | - title: Name 7 | query: .dottable-line dd 8 | type: text 9 | 10 | - title: Address 11 | query: .dottable-line:nth-child(3) dd 12 | type: text 13 | 14 | - title: Price 15 | query: .dottable-value 16 | type: text 17 | 18 | - title: Area 19 | query: .dottable-fix dd 20 | type: text 21 | 22 | - title: Floor plan 23 | query: td:nth-child(2) > dl > dd 24 | type: text 25 | 26 | - title: Property picks list 27 | query: .property_unit-info-pct 28 | type: text 29 | 30 | - title: Property Link 31 | query: a 32 | type: link 33 | -------------------------------------------------------------------------------- /src/scrappers/tik-tok-accounts.yml: -------------------------------------------------------------------------------- 1 | url: https://www.tiktok.com/search/user* 2 | header: TikTok Search Results 3 | listElementsQuery: '[class*="DivPanelContainer"] > [class*="-DivLink"]' 4 | elementParser: 5 | - title: Avatar 6 | query: '[class*="-ImgAvatar"]' 7 | type: image 8 | 9 | - title: Name 10 | query: '[class*="-PTitle"]' 11 | type: text 12 | 13 | - title: Followers count 14 | query: '[class*="-DivSubTitleWrapper"] > span' 15 | type: text 16 | 17 | - title: Description 18 | query: '[class*="-PDesc"]' 19 | type: text 20 | -------------------------------------------------------------------------------- /src/scrappers/tik-tok-video.yml: -------------------------------------------------------------------------------- 1 | url: https://www.tiktok.com/search/video* 2 | header: TikTok Search Results 3 | listElementsQuery: '[class*="-DivItemContainerForSearch"]' 4 | elementParser: 5 | - title: Description 6 | query: '[class*="-SpanText"]' 7 | type: text 8 | 9 | - title: Author 10 | query: '[class*="-PUniqueId"]' 11 | type: text 12 | 13 | - title: Views count 14 | query: '[class*="-StrongVideoCount"]' 15 | type: text 16 | 17 | - title: Video URL 18 | query: '[class*="-DivWrapper"] > a' 19 | type: link 20 | -------------------------------------------------------------------------------- /src/scrappers/tilbudsportalen.yml: -------------------------------------------------------------------------------- 1 | url: https://tilbudsportalen.dk/* 2 | listElementsQuery: .linkUdenUnderstreg 3 | elementParser: 4 | - title: Name 5 | query: h3 6 | type: text 7 | 8 | - title: Address 9 | query: .row:nth-child(2) .col-sm:nth-child(1) .panelTekst 10 | type: text 11 | 12 | - title: Contact person 13 | query: .row:nth-child(2) .col-sm:nth-child(2) .panelTekst 14 | type: text 15 | 16 | - title: Supervisory status 17 | query: .row:nth-child(2) .col-sm:nth-child(3) .panelTekst 18 | type: text 19 | 20 | - title: Tariffs 21 | query: .row:nth-child(2) .col-sm:nth-child(4) .panelTekst 22 | type: text 23 | 24 | - title: Offer types 25 | query: .row:nth-child(3) .col-sm:nth-child(1) .panelTekst 26 | type: text 27 | 28 | - title: Places on the offer type 29 | query: .row:nth-child(3) .col-sm:nth-child(2) .panelTekst 30 | type: text 31 | 32 | - title: URL 33 | type: link 34 | -------------------------------------------------------------------------------- /src/scrappers/trulia.yml: -------------------------------------------------------------------------------- 1 | url: https://www.trulia.com/* 2 | listElementsQuery: '[data-testid="home-card-sale"]' 3 | elementParser: 4 | - title: Address 5 | query: '[data-testid="property-address"]' 6 | type: text 7 | 8 | - title: Bedrooms 9 | query: '[data-testid="property-beds"]' 10 | type: text 11 | 12 | - title: Bathrooms 13 | query: '[data-testid="property-baths"]' 14 | type: text 15 | 16 | - title: Price 17 | query: '[data-testid="property-price"]' 18 | type: text 19 | 20 | - title: Area 21 | query: '[data-testid="property-floorSpace"]' 22 | type: text 23 | 24 | - title: Tulia link 25 | query: '[data-testid="property-card-link"]' 26 | type: link 27 | -------------------------------------------------------------------------------- /src/scrappers/twitter.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://twitter.com/* 3 | - https://x.com/* 4 | listElementsQuery: '[data-testid="UserCell"]' 5 | elementParser: 6 | - title: Avatar 7 | query: img 8 | type: image 9 | 10 | - title: Name 11 | query: 'div[role="button"] > div > div:nth-child(2) > div > div > div:nth-child(1) > div:nth-child(1)' 12 | type: text 13 | 14 | - title: Username 15 | query: 'div[role="button"] > div > div:nth-child(2) > div > div > div:nth-child(1) > div:nth-child(2)' 16 | type: text 17 | 18 | - title: Bio 19 | query: 'div[role="button"] > div > div:nth-child(2) > div:nth-child(2)' 20 | type: text 21 | -------------------------------------------------------------------------------- /src/scrappers/voeazul.yml: -------------------------------------------------------------------------------- 1 | url: 'https://www.voeazul.com.br/br/pt/home/selecao-voo*' 2 | listElementsQuery: '.trip-container .flight-card' 3 | elementParser: 4 | - title: Departure time 5 | query: .departure 6 | type: text 7 | 8 | - title: Arrival time 9 | query: .arrival 10 | type: text 11 | 12 | - title: Flight leg information 13 | query: .flight-leg-info 14 | type: text 15 | 16 | - title: Duration 17 | query: button 18 | type: text 19 | 20 | - title: Price starting at 21 | query: '[data-test-id="fare-price"]' 22 | type: text -------------------------------------------------------------------------------- /src/scrappers/vrbo.yml: -------------------------------------------------------------------------------- 1 | url: https://www.vrbo.com* 2 | listElementsQuery: '[data-stid="lodging-card-responsive"]' 3 | elementParser: 4 | - title: Title 5 | query: h3.uitk-heading 6 | type: text 7 | 8 | - title: Description 9 | query: .uitk-text 10 | type: text 11 | 12 | - title: Price information 13 | query: '[data-test-id="price-summary"]' 14 | type: text 15 | 16 | - title: Link 17 | query: '[data-stid="open-hotel-information"]' 18 | type: link -------------------------------------------------------------------------------- /src/scrappers/yahoo-finance.yml: -------------------------------------------------------------------------------- 1 | url: https://finance.yahoo.com/quote/*/financials 2 | parseTables: 3 | tables: 4 | - rows: '[class*="(tbhg)"]>[class*="(tbr)"]' 5 | cols: div > span 6 | 7 | - rows: '[class*="(tbr)"]' 8 | cols: '[data-test="fin-col"], [title]' 9 | 10 | mergeTablesBy: row 11 | -------------------------------------------------------------------------------- /src/scrappers/ycombinator.yml: -------------------------------------------------------------------------------- 1 | url: https://www.ycombinator.com/companies 2 | header: YCombinator results 3 | listElementsQuery: '[class*="_results_"] > a[class*="_company_"]' 4 | elementParser: 5 | - title: Logo 6 | query: img 7 | type: image 8 | 9 | - title: Description 10 | query: '[class*="_coDescription_"]' 11 | type: text 12 | 13 | - title: Company name 14 | query: '[class*="_coName_"]' 15 | type: text 16 | 17 | - title: Location 18 | query: '[class*="_coLocation_"]' 19 | type: text 20 | -------------------------------------------------------------------------------- /src/scrappers/yellow-pages.yml: -------------------------------------------------------------------------------- 1 | url: https://www.yellowpages.com/search* 2 | listElementsQuery: .result 3 | elementParser: 4 | - title: Logo 5 | query: img 6 | type: image 7 | 8 | - title: Name 9 | query: .business-name 10 | type: text 11 | 12 | - title: Phone number 13 | query: .phone 14 | type: text 15 | 16 | - title: Address 17 | query: .adr 18 | type: text 19 | 20 | - title: Categories 21 | query: .categories 22 | type: text 23 | 24 | - title: Website 25 | query: .track-visit-website 26 | type: link 27 | -------------------------------------------------------------------------------- /src/scrappers/yelp.yml: -------------------------------------------------------------------------------- 1 | url: https://www.yelp.*/search 2 | listElementsQuery: '[data-testid="serp-ia-card"]' 3 | elementParser: 4 | - title: Image 5 | query: img 6 | type: image 7 | 8 | - title: Name 9 | query: '[class*="businessName_"]' 10 | type: text 11 | 12 | - title: Rating 13 | query: 'span[data-font-weight="semibold"]' 14 | type: text 15 | 16 | - title: Categories 17 | query: '[class*="priceCategory"]' 18 | type: text 19 | 20 | - title: Yelp link 21 | query: '[class*="businessName_"] a' 22 | type: clean-url 23 | -------------------------------------------------------------------------------- /src/scrappers/youtube.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.youtube.com/feed/history* 3 | - https://www.youtube.com/results* 4 | - https://www.youtube.com/playlist* 5 | listElementsQuery: 'ytd-video-renderer, ytd-playlist-video-renderer, ytd-rich-item-renderer' 6 | elementParser: 7 | - title: Video thumbnail 8 | query: "img" 9 | type: image 10 | 11 | - title: Video title 12 | query: "#video-title" 13 | type: text 14 | 15 | - title: Video views 16 | query: "#metadata-line > span, yt-formatted-string > span" 17 | type: text 18 | 19 | - title: Video description 20 | query: ".metadata-snippet-text" 21 | type: text 22 | 23 | - title: Video duration 24 | query: "ytd-thumbnail-overlay-time-status-renderer > div" 25 | type: text 26 | 27 | - title: Video URL 28 | query: "#video-title" 29 | type: link 30 | 31 | - title: Channel 32 | query: ".ytd-channel-name a" 33 | type: text 34 | 35 | - title: Channel URL 36 | query: ".ytd-channel-name a" 37 | type: link 38 | -------------------------------------------------------------------------------- /src/scrappers/zillow.yml: -------------------------------------------------------------------------------- 1 | url: 2 | - https://www.zillow.com/homes/* 3 | - https://www.zillow.com/*?search* 4 | - https://www.zillow.com/*/*/?search* 5 | listElementsQuery: '[data-test="property-card"]' 6 | elementParser: 7 | - title: Image 8 | query: img 9 | type: image 10 | 11 | - title: Address 12 | query: '[data-test="property-card-addr"]' 13 | type: text 14 | 15 | - title: Price 16 | query: '[data-test="property-card-price"]' 17 | type: text 18 | 19 | - title: Bedrooms 20 | query: 'ul[class*="StyledPropertyCardHomeDetailsList"] li:nth-child(1)' 21 | type: text 22 | 23 | - title: Bathrooms 24 | query: 'ul[class*="StyledPropertyCardHomeDetailsList"] li:nth-child(2)' 25 | type: text 26 | 27 | - title: Area 28 | query: 'ul[class*="StyledPropertyCardHomeDetailsList"] li:nth-child(3)' 29 | type: text 30 | 31 | - title: Zillow link 32 | query: '[data-test="property-card-link"]' 33 | type: link 34 | -------------------------------------------------------------------------------- /src/scrappers/zoopla.yml: -------------------------------------------------------------------------------- 1 | url: https://www.zoopla.co.uk/* 2 | listElementsQuery: '[id*="listing_"]' 3 | elementParser: 4 | - title: Title 5 | query: '[data-testid="listing-title"]' 6 | type: text 7 | 8 | - title: Address 9 | query: address 10 | type: text 11 | 12 | - title: Description 13 | query: 'p:nth-child(3)' 14 | type: text 15 | 16 | - title: Price 17 | query: '[data-testid="listing-price"]' 18 | type: text 19 | 20 | - title: Link 21 | query: a 22 | type: clean-url 23 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | import { ErrorCodes } from "./error-codes"; 2 | 3 | export type ExceptionMessage = { 4 | code: ErrorCodes; 5 | message: string; 6 | } 7 | -------------------------------------------------------------------------------- /src/utils/chrome.ts: -------------------------------------------------------------------------------- 1 | import { customScrapper } from './scrappers/custom'; 2 | import { scrapHTMLTables } from './scrappers/html-tables'; 3 | import { scrapDivHTMLTables, ScrapDivTablesOptions } from './scrappers/div-tables'; 4 | 5 | export function getDomainName(url: string): string { 6 | const urlParsed = new URL(url); 7 | 8 | // Split the hostname into parts 9 | const hostnameParts = urlParsed.hostname.split('.'); 10 | 11 | // Determine the number of parts in the TLD 12 | const tldCount = urlParsed.hostname.endsWith(hostnameParts[hostnameParts.length - 1]) ? 1 : 2; 13 | 14 | // Remove the last n parts (TLD) 15 | for (let i = 0; i < tldCount; i++) { 16 | hostnameParts.pop(); 17 | } 18 | 19 | return hostnameParts.at(-1) ?? ''; 20 | } 21 | 22 | export async function getCurrentTab(): Promise { 23 | const [tab] = await chrome.tabs.query({ 24 | active: true, 25 | lastFocusedWindow: true, 26 | }); 27 | 28 | return tab; 29 | } 30 | 31 | export interface ScrapperOptions { 32 | url: string | Array; 33 | header?: string; 34 | includeHeader?: boolean; 35 | listElementsQuery?: string; 36 | elementParser?: Array<{ 37 | title: string; 38 | query?: string; // if the query is not specified, the scrapper will use the own element 39 | type: 'text' | 'image' | 'clean-url' | 'link' | 'get-attribute'; 40 | attribute?: string; 41 | }>; 42 | parseTables?: ScrapDivTablesOptions; 43 | } 44 | 45 | type ScrapperResultItem = { 46 | title: string; 47 | table: Array>; 48 | }; 49 | 50 | export type ScrapperResults = Array; 51 | 52 | export async function runScrapper(currentTab: chrome.tabs.Tab, options: ScrapperOptions | null): Promise { 53 | let computation: chrome.scripting.InjectionResult[]; 54 | 55 | if (!currentTab.id) { 56 | throw new Error('Invalid tab ID.'); // Handle the case where tab ID is missing 57 | } 58 | 59 | if (!options) { 60 | computation = await chrome.scripting.executeScript({ 61 | target: { tabId: currentTab.id }, 62 | func: scrapHTMLTables, 63 | }); 64 | } else if (options.parseTables) { 65 | computation = await chrome.scripting.executeScript({ 66 | target: { tabId: currentTab.id }, 67 | args: [options], 68 | func: scrapDivHTMLTables, 69 | }) as Array>; 70 | } else { 71 | computation = await chrome.scripting.executeScript({ 72 | target: { tabId: currentTab.id }, 73 | args: [options], 74 | func: customScrapper, 75 | }) as Array>; 76 | } 77 | 78 | return computation[0].result ?? []; 79 | } 80 | -------------------------------------------------------------------------------- /src/utils/copy.ts: -------------------------------------------------------------------------------- 1 | export function hasImage(cell: string): boolean { 2 | if (!cell) { 3 | return false; 4 | } 5 | 6 | return ( 7 | cell.startsWith('http') && 8 | (cell.toLowerCase().includes('/image/') || 9 | cell.toLowerCase().includes('/images/') || 10 | cell.toLowerCase().includes('thumbnail') || 11 | cell.includes('googleusercontent.com/p/') || 12 | cell.includes('photo') || 13 | cell.includes('.jpg') || 14 | cell.includes('.jpeg') || 15 | cell.includes('.png') || 16 | cell.includes('.gif') || 17 | cell.includes('.svg') || 18 | cell.includes('.webp')) 19 | ); 20 | } 21 | 22 | function processCell(cell: string) { 23 | if (!cell) { 24 | return ''; 25 | } else if (cell?.startsWith('+')) { 26 | return `='${cell}'`; 27 | } else if (hasImage(cell)) { 28 | return `=IMAGE("${cell}")`; 29 | } 30 | 31 | return cell; 32 | } 33 | 34 | export function array2tsv(data: string[][] = []): string { 35 | return `${data 36 | .map((row) => row.map(processCell).join('\t')) 37 | .join('\n') 38 | .toString()}`; 39 | } 40 | -------------------------------------------------------------------------------- /src/utils/rows-api/fetch.ts: -------------------------------------------------------------------------------- 1 | async function makeRequest(method: 'GET' | 'POST', url = "", data = {}) { 2 | const response = await fetch(url, { 3 | method, 4 | credentials: "same-origin", // include, *same-origin, omit 5 | headers: { 6 | 'Content-Type': 'application/json', 7 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 8 | // @ts-ignore 9 | Authorization: `Bearer ${import.meta.env.VITE_ROWS_API_KEY}`, 10 | }, 11 | body: method !== 'GET' ? JSON.stringify(data) : null, 12 | }); 13 | 14 | return response.json(); 15 | } 16 | 17 | export default { 18 | post: function (url: string, data = {}) { 19 | return makeRequest('POST', url, data); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/utils/rows-api/report.ts: -------------------------------------------------------------------------------- 1 | import { getCurrentTab } from '../chrome'; 2 | import fetch from './fetch'; 3 | import UAParser from 'ua-parser-js'; 4 | 5 | interface ReportUsageParams { 6 | action: 'copy_values' | 'open_in_Rows'; 7 | url?: string; 8 | } 9 | 10 | export async function createNewReportEntryRow(feedback? : string) { 11 | const tab = await getCurrentTab(); 12 | 13 | if (!tab) { 14 | return null; 15 | } 16 | 17 | const userAgent = new UAParser(navigator.userAgent); 18 | 19 | const row_cells = [ 20 | new Date().toUTCString(), 21 | tab.url, 22 | new URL(tab.url!).hostname, 23 | userAgent.getBrowser().name, 24 | userAgent.getBrowser().version, 25 | feedback ?? 'no table detected' 26 | ]; 27 | 28 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 29 | // @ts-ignore 30 | await fetch.post(`https://api.rows.com/v1/spreadsheets/${import.meta.env.VITE_SPREADSHEET_ID}/tables/${import.meta.env.VITE_TABLE_ID}/values/A1:F:append`, { 31 | values: [row_cells] 32 | }); 33 | } 34 | 35 | export async function reportUsage(params: ReportUsageParams): Promise { 36 | const { action } = params; 37 | const tab = await getCurrentTab(); 38 | if (!tab) { 39 | return; 40 | } 41 | 42 | const userAgent = new UAParser(navigator.userAgent); 43 | 44 | const row_cells = [ 45 | new Date(), 46 | params.url ? params.url : tab.url, 47 | new URL(params.url ? params.url : tab.url!).hostname, 48 | userAgent.getBrowser().name, 49 | userAgent.getBrowser().version, 50 | action, 51 | ]; 52 | 53 | // eslint-disable-next-line @typescript-eslint/ban-ts-comment 54 | // @ts-ignore 55 | await fetch.post(`https://api.rows.com/v1/spreadsheets/${import.meta.env.VITE_SPREADSHEET_ID}/tables/${import.meta.env.VITE_TABLE_ID_USAGE}/values/A1:F:append`, { 56 | values: [row_cells] 57 | }); 58 | } -------------------------------------------------------------------------------- /src/utils/scrapperUtils.ts: -------------------------------------------------------------------------------- 1 | import { ScrapperOptions, getDomainName } from './chrome'; 2 | import { urlMatchesPatternUrl } from './urlUtils'; 3 | import scrapperOptions from './../scrappers'; 4 | 5 | export function getScrapperOptionsByUrl(url: string, title: string): ScrapperOptions | null { 6 | let options; 7 | 8 | const domain = getDomainName(url); 9 | 10 | if (domain && scrapperOptions.has(domain)) { 11 | const scrappers = scrapperOptions.get(domain); 12 | if (!scrappers) return null; 13 | 14 | const scrapper = scrappers.find((scrapper) => { 15 | if (Array.isArray(scrapper.url)) { 16 | return scrapper.url.some((scrapperURL: string) => urlMatchesPatternUrl(url, scrapperURL)); 17 | } else { 18 | return urlMatchesPatternUrl(url, scrapper.url); 19 | } 20 | }); 21 | 22 | options = scrapper; 23 | } 24 | 25 | if (options) { 26 | if (!options.header) { 27 | return { 28 | header: title, 29 | ...options, 30 | }; 31 | } 32 | 33 | return options; 34 | } 35 | 36 | return null; 37 | } 38 | -------------------------------------------------------------------------------- /src/utils/scrappers/custom.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-non-null-assertion */ 2 | /* eslint-disable @typescript-eslint/explicit-module-boundary-types */ 3 | import { ScrapperOptions } from '../chrome'; 4 | import { DOM_Element } from './types'; 5 | 6 | export async function customScrapper(options: ScrapperOptions) { 7 | // parser functions 8 | function getText(element?: DOM_Element, query?: string) { 9 | let elem = element; 10 | 11 | if (query) { 12 | elem = element?.querySelector(query); 13 | } 14 | 15 | return elem?.innerText?.replaceAll('\n', ' ')?.trim() ?? ''; 16 | } 17 | 18 | function getImageSrc(element?: DOM_Element, query?: string) { 19 | let elem = element; 20 | 21 | if (query) { 22 | elem = element?.querySelector(query); 23 | } 24 | 25 | return elem ? elem.poster ?? elem.src : ''; 26 | } 27 | 28 | function getLink(element?: DOM_Element, query?: string) { 29 | let elem = element; 30 | 31 | if (query) { 32 | elem = element?.querySelector(query); 33 | } 34 | 35 | return elem ? elem.href : (element ? element.href : ''); 36 | } 37 | 38 | function getCleanUrl(element?: DOM_Element, query?: string) { 39 | let elem = element; 40 | 41 | if (query) { 42 | elem = element?.querySelector(query); 43 | } 44 | 45 | if (!elem) { 46 | return ''; 47 | } 48 | 49 | const url = new URL(elem.href); 50 | return url.origin + url.pathname; 51 | } 52 | 53 | function getAttribute(element?: DOM_Element, query?: string, attribute?: string) { 54 | let elem = element; 55 | 56 | if (query) { 57 | elem = element?.querySelector(query); 58 | } 59 | 60 | return elem?.getAttribute(attribute!)?.replaceAll('\n', ' ').trim() ?? ''; 61 | } 62 | 63 | function getFloat(element?: DOM_Element, query?: string) { 64 | const text = getText(element, query); 65 | 66 | return Number.parseFloat(text.replace(/[a-zA-Z_€$!#?&]|\s/g, '')).toString(); 67 | } 68 | 69 | function parse(element: DOM_Element, query?: string, type?: string, attribute?: string) { 70 | switch (type) { 71 | case 'text': 72 | return getText(element, query); 73 | case 'image': 74 | return getImageSrc(element, query); 75 | case 'clean-url': 76 | return getCleanUrl(element, query); 77 | case 'link': 78 | return getLink(element, query); 79 | case 'get-attribute': 80 | return getAttribute(element, query, attribute); 81 | case 'float': 82 | return getFloat(element, query); 83 | case 'self-link': 84 | return element ? element.href : ''; 85 | default: 86 | return ''; 87 | } 88 | } 89 | 90 | const tableElements = Array.from( 91 | document.querySelectorAll(options.listElementsQuery!) as Iterable 92 | ).map((element) => { 93 | return options.elementParser!.map((parserInfo) => 94 | parse(element, parserInfo?.query, parserInfo.type, parserInfo?.attribute) 95 | ); 96 | }); 97 | 98 | if (tableElements.length <= 0) { 99 | return []; 100 | } 101 | 102 | return [ 103 | { 104 | title: options.header, 105 | table: [[...options.elementParser!.map((element) => element.title)], ...tableElements], 106 | includeHeader: options.includeHeader ?? true, 107 | }, 108 | ]; 109 | } 110 | -------------------------------------------------------------------------------- /src/utils/scrappers/div-tables.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/no-non-null-assertion */ 2 | /* eslint-disable @typescript-eslint/explicit-module-boundary-types */ 3 | import { DOM_Element } from './types'; 4 | import { ScrapperOptions } from '../chrome'; 5 | 6 | export type ScrapDivTablesOptions = { 7 | tables: Array<{ 8 | rows: string; 9 | cols: string; 10 | }>; 11 | mergeTablesBy: 'row' | 'column'; 12 | }; 13 | 14 | export async function scrapDivHTMLTables(options: ScrapperOptions) { 15 | function getText(element: DOM_Element, query: string) { 16 | let elems: DOM_Element | Iterable = element; 17 | 18 | if (query) { 19 | elems = element?.querySelectorAll(query) as Iterable; 20 | } 21 | 22 | return Array.from(elems as Iterable)?.map( 23 | (element) => element?.innerText?.replaceAll('\n', ' ')?.trim() ?? '' 24 | ); 25 | } 26 | 27 | let table = []; 28 | 29 | const tables = options.parseTables!.tables.map((tableInfo) => { 30 | return Array.from( 31 | document.querySelectorAll(tableInfo.rows) as Iterable 32 | ).map((rowElement) => getText(rowElement, tableInfo.cols)); 33 | }); 34 | 35 | if (options.parseTables!.mergeTablesBy === 'column') { 36 | for (let i = 0; i <= tables[0].length; ++i) { 37 | table.push(tables.map((table) => table[i]).flat(Infinity)); 38 | } 39 | } else if (options.parseTables!.mergeTablesBy === 'row') { 40 | table = [...tables.flat(1)]; 41 | } else { 42 | table = tables; 43 | } 44 | 45 | return [ 46 | { 47 | title: options.header, 48 | table, 49 | includeHeader: true, 50 | }, 51 | ]; 52 | } 53 | -------------------------------------------------------------------------------- /src/utils/scrappers/html-tables.ts: -------------------------------------------------------------------------------- 1 | import { DOM_Element } from './types'; 2 | 3 | type Row = Array; 4 | type Table = Array; 5 | 6 | type ScrapperColumn = { 7 | value: string; 8 | colspan: number; 9 | rowspan: number; 10 | }; 11 | 12 | type ScrapperRow = Array; 13 | 14 | type TableWithRowAndColSpan = Array; 15 | 16 | // eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types 17 | export async function scrapHTMLTables() { 18 | function removeEmptyColumns(arr: Table) { 19 | // detect empty columns 20 | const emptyColumns = (arr[0] || []).map((_: string, index: number) => 21 | arr.some((col) => col[index]) 22 | ); 23 | 24 | // filter empty columns 25 | return arr.map((column) => column.filter((_: string, index: number) => emptyColumns[index])); 26 | } 27 | 28 | function toArray(table: TableWithRowAndColSpan) { 29 | const data: TableWithRowAndColSpan = []; 30 | 31 | for (let i = 0; i < table.length; i++) { 32 | const tr = table[i]; 33 | 34 | for (let j = 0; j < tr.length; j++) { 35 | const td = tr[j]; 36 | 37 | for (let c = 0; c < td.colspan; c++) { 38 | if (!data[i]) { 39 | data[i] = []; 40 | } 41 | 42 | data[i].push({ ...td, colspan: 1 }); 43 | } 44 | } 45 | } 46 | 47 | for (let i = 0; i < data.length; i++) { 48 | const tr = data[i]; 49 | for (let j = 0; j < tr.length; j++) { 50 | const td = tr[j]; 51 | for (let r = 1; r < td.rowspan; r++) { 52 | if (!data[i + r]) { 53 | data[i + r] = []; 54 | } 55 | data[i + r].splice(j, 0, { ...td, rowspan: 1 }); 56 | } 57 | } 58 | } 59 | 60 | return removeEmptyColumns(data.map((row) => row.map((col) => col.value))); 61 | } 62 | 63 | const titles: string[] = []; 64 | 65 | const tableElements = Array.from(document.querySelectorAll('table') as Iterable); 66 | 67 | const rowSelector = 'tr'; 68 | const colSelector = 'td,th'; 69 | 70 | const tables = tableElements.map((tableElement) => { 71 | let title = tableElement?.previousElementSibling?.innerText?.trim() ?? ''; 72 | let scrapElement = tableElement; 73 | 74 | while (title === '' || title.startsWith('.')) { 75 | const titleElement = scrapElement?.querySelector( 76 | 'caption,h6,h5,h4,h3,h2,h1,title' 77 | ) as DOM_Element; 78 | 79 | if (titleElement) { 80 | title = titleElement?.innerText?.replaceAll('\n', ' ')?.trim() ?? ''; 81 | } 82 | 83 | scrapElement = scrapElement?.parentElement as DOM_Element; 84 | } 85 | 86 | titles.push(title); 87 | 88 | // eslint-disable-next-line @typescript-eslint/no-non-null-assertion 89 | return Array.from(tableElement!.querySelectorAll(rowSelector)).map((tr) => { 90 | if (!tr) { 91 | return []; 92 | } 93 | 94 | return Array.from(tr.querySelectorAll(colSelector) as Iterable).map((td) => { 95 | if (!td) { 96 | return { value: '', colspan: 1, rowspan: 1 }; 97 | } 98 | 99 | const rowspan = +(td.getAttribute('rowspan') || 1) || 1; 100 | const colspan = +(td.getAttribute('colspan') || 1) || 1; 101 | 102 | const value = td.innerText || ''; 103 | 104 | return { 105 | value: value.replaceAll('\n', ' ').replaceAll('\t', ' ').trim(), 106 | colspan, 107 | rowspan, 108 | }; 109 | }); 110 | }); 111 | }); 112 | 113 | // Maps the table to the correct structure and after that remove the empty tables 114 | return tables 115 | .map((table, index: number) => ({ 116 | title: titles[index], 117 | table: toArray(table), 118 | includeHeader: true, 119 | })) 120 | .filter((table) => !table.table.every((row: Row) => row.every((col) => col === ''))); 121 | } 122 | -------------------------------------------------------------------------------- /src/utils/scrappers/types.ts: -------------------------------------------------------------------------------- 1 | type Nullable = T | null; 2 | 3 | interface DomElement extends Element { 4 | innerText: string; 5 | poster: string; 6 | src: string; 7 | href: string; 8 | previousElementSibling: DOM_Element; 9 | } 10 | 11 | export type DOM_Element = Nullable; 12 | -------------------------------------------------------------------------------- /src/utils/urlUtils.ts: -------------------------------------------------------------------------------- 1 | export function urlMatchesPatternUrl(url: string, patternURL: string): boolean { 2 | if (!patternURL) { 3 | return false; 4 | } 5 | 6 | const regex = new RegExp('^' + patternURL.replace(/\*/g, '.*') + '$'); 7 | return regex.test(url); 8 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2021", 4 | "useDefineForClassFields": true, 5 | "module": "ESNext", 6 | "lib": ["ES2021", "DOM", "DOM.Iterable"], 7 | "skipLibCheck": true, 8 | "paths": { 9 | "react": ["./node_modules/preact/compat/"], 10 | "react-dom": ["./node_modules/preact/compat/"] 11 | }, 12 | /* Bundler mode */ 13 | "moduleResolution": "node", 14 | "resolveJsonModule": true, 15 | "isolatedModules": false, 16 | "esModuleInterop": true, 17 | "noEmit": true, 18 | "jsx": "react-jsx", 19 | "jsxImportSource": "preact", 20 | /* Linting */ 21 | "strict": true, 22 | "noUnusedLocals": true, 23 | "noUnusedParameters": true, 24 | "noFallthroughCasesInSwitch": true, 25 | }, 26 | "include": ["src"], 27 | "references": [{ "path": "./tsconfig.node.json" }], 28 | "exclude": [ 29 | "e2e/**/*.test.ts" 30 | ] 31 | } 32 | -------------------------------------------------------------------------------- /tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "ESNext", 6 | "moduleResolution": "bundler", 7 | "allowSyntheticDefaultImports": true 8 | }, 9 | "include": ["vite.config.ts"] 10 | } 11 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite'; 2 | import preact from '@preact/preset-vite'; 3 | import { crx } from '@crxjs/vite-plugin'; 4 | import viteYaml from '@modyfi/vite-plugin-yaml'; 5 | import manifest from './manifest'; 6 | import npmPackage from './package'; 7 | 8 | const extensionManifest = { 9 | version: npmPackage.version, 10 | ...manifest, 11 | }; 12 | 13 | const e2eTestManifest = { 14 | ...extensionManifest, 15 | host_permissions: [''], 16 | }; 17 | 18 | export default defineConfig(({ mode }) => ({ 19 | plugins: [ 20 | preact(), 21 | viteYaml(), 22 | crx({ 23 | manifest: mode === 'e2e' ? e2eTestManifest : extensionManifest, 24 | }), 25 | ], 26 | })); 27 | --------------------------------------------------------------------------------