├── jsconfig.json
├── postcss.config.cjs
├── public
└── img
│ ├── icon128.png
│ ├── icon16.png
│ ├── icon32.png
│ ├── icon48.png
│ ├── icon-success16.png
│ ├── icon-success32.png
│ ├── icon-success48.png
│ ├── icon-success128.png
│ ├── icon.svg
│ └── icon-success.svg
├── .prettierignore
├── src
├── components
│ ├── Loading.jsx
│ ├── PageData.jsx
│ ├── ClipboardToast.jsx
│ ├── Navbar.jsx
│ ├── PageProps.jsx
│ └── Footer.jsx
├── Popup.css
├── index.jsx
├── contexts
│ ├── ClipboardProvider.jsx
│ ├── ThemeProvider.jsx
│ └── PageDataProvider.jsx
├── manifest.js
├── scripts
│ ├── background.js
│ └── content.js
└── Popup.jsx
├── .prettierrc
├── tailwind.config.js
├── .gitignore
├── popup.html
├── .npmignore
├── vite.config.js
├── .github
└── workflows
│ └── chrome-webstore-publish.yml
├── LICENSE
├── PrivacyPolicy.md
├── package.json
└── README.md
/jsconfig.json:
--------------------------------------------------------------------------------
1 | { "typeAcquisition": { "include": ["chrome"] } }
2 |
--------------------------------------------------------------------------------
/postcss.config.cjs:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: [require('tailwindcss'), require('autoprefixer')],
3 | }
4 |
--------------------------------------------------------------------------------
/public/img/icon128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon128.png
--------------------------------------------------------------------------------
/public/img/icon16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon16.png
--------------------------------------------------------------------------------
/public/img/icon32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon32.png
--------------------------------------------------------------------------------
/public/img/icon48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon48.png
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | # Ignore artifacts:
2 | build
3 | coverage
4 | node_modules
5 | pnpm-lock.yaml
6 | pnpm-workspace.yaml
7 |
--------------------------------------------------------------------------------
/src/components/Loading.jsx:
--------------------------------------------------------------------------------
1 | export const Loading = () => {
2 | return
3 | }
4 |
--------------------------------------------------------------------------------
/public/img/icon-success16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon-success16.png
--------------------------------------------------------------------------------
/public/img/icon-success32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon-success32.png
--------------------------------------------------------------------------------
/public/img/icon-success48.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon-success48.png
--------------------------------------------------------------------------------
/public/img/icon-success128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marksantiago290/Web-Scraping-Extension/HEAD/public/img/icon-success128.png
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "jsxSingleQuote": false,
3 | "singleQuote": true,
4 | "trailingComma": "all",
5 | "endOfLine": "lf",
6 | "printWidth": 100,
7 | "semi": false,
8 | "tabWidth": 2,
9 | "useTabs": false
10 | }
11 |
--------------------------------------------------------------------------------
/tailwind.config.js:
--------------------------------------------------------------------------------
1 | /** @type {import('tailwindcss').Config} */
2 | export default {
3 | content: ['./src/**/*.{js,ts,jsx,tsx}', './popup.html'],
4 | theme: {
5 | extend: {},
6 | },
7 | plugins: [require('daisyui')],
8 | daisyui: {
9 | themes: ['corporate', 'business'],
10 | },
11 | }
12 |
--------------------------------------------------------------------------------
/src/Popup.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
5 | html,
6 | body {
7 | height: 600px;
8 | width: 800px;
9 | top: 0;
10 | }
11 |
12 | .toast-hidden {
13 | opacity: 0;
14 | transition: all 250ms linear 3s;
15 | }
16 |
17 | .toast-shown {
18 | opacity: 1;
19 | transition: all 250ms linear;
20 | }
21 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/ignore-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 |
6 | # testing
7 | /coverage
8 |
9 | # production
10 | /build
11 | /package
12 |
13 | # misc
14 | .DS_Store
15 | .env.local
16 | .env.development.local
17 | .env.test.local
18 | .env.production.local
19 | .history
20 | *.log
21 |
22 | # secrets
23 | secrets.*.js
24 |
25 | client_secret_*.json
--------------------------------------------------------------------------------
/popup.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | NextScraper
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.npmignore:
--------------------------------------------------------------------------------
1 | # OS
2 | .DS_Store
3 |
4 | # ignore node dependency directories & lock
5 | node_modules
6 | yarn.lock
7 | pnpm-lock.yaml
8 | package-lock.json
9 |
10 | # ignore log files and local
11 | *.log
12 | *.local
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | .history
18 |
19 | # ignore compiled files
20 | build
21 | types
22 | coverage
23 |
24 | # ignore ide settings
25 | .idea
26 | .vscode
27 |
--------------------------------------------------------------------------------
/src/components/PageData.jsx:
--------------------------------------------------------------------------------
1 | import { useContext } from 'react'
2 | import { PageDataContext } from '../contexts/PageDataProvider'
3 |
4 | export const PageData = () => {
5 | const { jsonRef, pageEnabled } = useContext(PageDataContext)
6 |
7 | return (
8 |
14 | )
15 | }
16 |
--------------------------------------------------------------------------------
/src/components/ClipboardToast.jsx:
--------------------------------------------------------------------------------
1 | import { useContext } from 'react'
2 | import { ClipboardContext } from '../contexts/ClipboardProvider'
3 |
4 | export const ClipboardToast = () => {
5 | const { showToast } = useContext(ClipboardContext)
6 |
7 | return (
8 |
9 |
10 | Copied to clipboard!
11 |
12 |
13 | )
14 | }
15 |
--------------------------------------------------------------------------------
/src/index.jsx:
--------------------------------------------------------------------------------
1 | import React from 'react'
2 | import ReactDOM from 'react-dom/client'
3 | import { Popup } from './Popup'
4 | import { PageDataProvider } from './contexts/PageDataProvider'
5 | import { ThemeProvider } from './contexts/ThemeProvider'
6 | import { ClipboardProvider } from './contexts/ClipboardProvider'
7 |
8 | ReactDOM.createRoot(document.getElementById('app')).render(
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | ,
18 | )
19 |
--------------------------------------------------------------------------------
/vite.config.js:
--------------------------------------------------------------------------------
1 | import { defineConfig } from 'vite'
2 | import { crx } from '@crxjs/vite-plugin'
3 | import react from '@vitejs/plugin-react'
4 | import manifest from './src/manifest.js'
5 |
6 | // https://vitejs.dev/config/
7 | export default defineConfig(({ mode }) => {
8 | return {
9 | build: {
10 | emptyOutDir: true,
11 | outDir: 'build',
12 | rollupOptions: {
13 | output: {
14 | chunkFileNames: 'assets/chunk-[hash].js',
15 | },
16 | },
17 | },
18 | server: {
19 | port: 5173,
20 | strictPort: true,
21 | hmr: {
22 | port: 5173,
23 | },
24 | },
25 | plugins: [crx({ manifest }), react()],
26 | }
27 | })
28 |
--------------------------------------------------------------------------------
/src/contexts/ClipboardProvider.jsx:
--------------------------------------------------------------------------------
1 | import { createContext, useState } from 'react'
2 |
3 | export const ClipboardContext = createContext()
4 |
5 | export const ClipboardProvider = ({ children }) => {
6 | const [showToast, setShowToast] = useState(false)
7 |
8 | const copyToClipboard = (text) => {
9 | setShowToast(true)
10 | if ('clipboard' in navigator) navigator.clipboard.writeText(text)
11 | setTimeout(() => {
12 | setShowToast(false)
13 | }, 3000)
14 | }
15 |
16 | return (
17 |
20 | {children}
21 |
22 | )
23 | }
24 |
--------------------------------------------------------------------------------
/src/manifest.js:
--------------------------------------------------------------------------------
1 | import { defineManifest } from '@crxjs/vite-plugin'
2 | import packageData from '../package.json' assert { type: 'json' }
3 |
4 | export default defineManifest({
5 | name: packageData.displayName,
6 | description: packageData.description,
7 | version: packageData.version,
8 | manifest_version: 3,
9 | icons: {
10 | 16: 'img/icon16.png',
11 | 32: 'img/icon32.png',
12 | 48: 'img/icon48.png',
13 | 128: 'img/icon128.png',
14 | },
15 | action: {
16 | default_popup: 'popup.html',
17 | default_icon: 'img/icon48.png',
18 | },
19 | content_scripts: [
20 | {
21 | matches: ['http://*/*', 'https://*/*'],
22 | js: ['src/scripts/content.js'],
23 | },
24 | ],
25 | background: {
26 | service_worker: 'src/scripts/background.js',
27 | },
28 | permissions: ['activeTab', 'storage', 'webNavigation'],
29 | })
30 |
--------------------------------------------------------------------------------
/src/scripts/background.js:
--------------------------------------------------------------------------------
1 | let extensionActive = false
2 | let currentPageData = {}
3 |
4 | chrome.webNavigation.onCompleted.addListener(
5 | () => {
6 | chrome.tabs.query({ active: true, lastFocusedWindow: true }, ([tab]) => {
7 | chrome.tabs.sendMessage(tab.id, { type: 'update_icon' }, (res) => {
8 | if (res) {
9 | const prefix = res.pageEnabled ? '../../img/icon-success' : '../../img/icon'
10 | chrome.action.setIcon({
11 | path: {
12 | 16: prefix + '16.png',
13 | 32: prefix + '32.png',
14 | 48: prefix + '48.png',
15 | 128: prefix + '128.png',
16 | },
17 | tabId: tab.id,
18 | })
19 | }
20 | })
21 | })
22 | },
23 | { url: [{ schemes: ['http', 'https'] }] },
24 | )
25 |
26 | const updatePopup = () => {
27 | chrome.action.setPopup({
28 | popup: extensionActive ? '' : 'popup.html',
29 | tabId: tab.id,
30 | })
31 | }
32 |
--------------------------------------------------------------------------------
/.github/workflows/chrome-webstore-publish.yml:
--------------------------------------------------------------------------------
1 | name: Chrome Webstore Publish
2 |
3 | on:
4 | push:
5 | branches:
6 | - publish
7 |
8 | jobs:
9 | build:
10 | runs-on: ubuntu-latest
11 |
12 | steps:
13 | - name: Checkout code
14 | uses: actions/checkout@v4
15 |
16 | - name: Set up Node.js
17 | uses: actions/setup-node@v4
18 | with:
19 | node-version: 21.5.0
20 |
21 | - name: Install dependencies
22 | run: npm ci
23 |
24 | - name: Build
25 | run: npm run build
26 |
27 | - name: Zip build files
28 | run: (cd build && zip -r ../build.zip .)
29 |
30 | # Docs: https://github.com/marketplace/actions/chrome-extension-upload-action
31 | - name: Publish to Chrome Webstore
32 | uses: mnao305/chrome-extension-upload@v4.0.1
33 | with:
34 | file-path: build.zip
35 | extension-id: ${{ secrets.EXTENSION_ID }}
36 | client-id: ${{ secrets.CLIENT_ID }}
37 | client-secret: ${{ secrets.CLIENT_SECRET }}
38 | refresh-token: ${{ secrets.REFRESH_TOKEN }}
39 |
--------------------------------------------------------------------------------
/public/img/icon.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/public/img/icon-success.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2023-present, Peter Rauscher
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/PrivacyPolicy.md:
--------------------------------------------------------------------------------
1 | # NextScraper Privacy Policy
2 |
3 | NextScraper is a Chrome extension developed by Peter Rauscher that has a strong commitment to user privacy. I believe in transparency and want to ensure you understand how your privacy is protected when using this extension.
4 |
5 | ## Data Collection
6 |
7 | NextScraper **does not** collect, use, or distribute any user data whatsoever. We do not access, store, or transmit any personal information or browsing data from your Chrome browser. Your data remains entirely private and secure.
8 |
9 | ## Data Usage
10 |
11 | Since NextScraper does not collect any user data, there is no data usage related to personal information, browsing history, or any other user-related data.
12 |
13 | ## Data Sharing
14 |
15 | We do not share any user data with third parties as we do not collect or store such data. Your privacy is our top priority.
16 |
17 | ## Contact Us
18 |
19 | If you have any questions or concerns regarding your privacy while using NextScraper, please feel free to contact me at [peter@peterrauscher.com].
20 |
21 | Thank you for using NextScraper and trusting us to protect your privacy.
22 |
23 | _Last updated: [01/02/2024]_
24 |
--------------------------------------------------------------------------------
/src/contexts/ThemeProvider.jsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect } from 'react'
2 | import { createContext } from 'react'
3 |
4 | export const ThemeContext = createContext()
5 |
6 | export const ThemeProvider = ({ children }) => {
7 | const [theme, setTheme] = useState('corporate')
8 | const [themeLoading, setThemeLoading] = useState(true)
9 |
10 | const toggleTheme = (e) => {
11 | if (!themeLoading) {
12 | if (e.target.checked) setTheme('business')
13 | else setTheme('corporate')
14 | }
15 | }
16 |
17 | useEffect(() => {
18 | if (!themeLoading) {
19 | chrome.storage.local.set({ theme: theme }).catch((err) => console.error(err))
20 | document.querySelector('html').setAttribute('data-theme', theme)
21 | }
22 | }, [theme, themeLoading])
23 |
24 | useEffect(() => {
25 | setThemeLoading(true)
26 | chrome.storage.local.get(['theme']).then((result) => {
27 | if (result.theme) {
28 | setTheme(result.theme)
29 | document.querySelector('html').setAttribute('data-theme', result.theme)
30 | }
31 | setThemeLoading(false)
32 | })
33 | }, [])
34 |
35 | return (
36 |
43 | {children}
44 |
45 | )
46 | }
47 |
--------------------------------------------------------------------------------
/src/Popup.jsx:
--------------------------------------------------------------------------------
1 | import { useContext } from 'react'
2 |
3 | import './Popup.css'
4 | import { PageProps } from './components/PageProps'
5 | import { Navbar } from './components/Navbar'
6 | import { Footer } from './components/Footer'
7 | import { PageData } from './components/PageData'
8 | import { PageDataContext } from './contexts/PageDataProvider'
9 | import { ThemeContext } from './contexts/ThemeProvider'
10 | import { Loading } from './components/Loading'
11 | import { ClipboardToast } from './components/ClipboardToast'
12 |
13 | export const Popup = () => {
14 | const { loading, pageEnabled } = useContext(PageDataContext)
15 | const { themeLoading } = useContext(ThemeContext)
16 |
17 | if (loading || themeLoading) return
18 | else
19 | return (
20 | <>
21 |
22 |
23 | {pageEnabled ? (
24 | <>
25 |
29 |
30 | >
31 | ) : (
32 |
33 |
No Data 🚫
34 |
This page does not use React or Next.js.
35 |
36 | )}
37 | >
38 | )
39 | }
40 |
41 | export default Popup
42 |
--------------------------------------------------------------------------------
/src/contexts/PageDataProvider.jsx:
--------------------------------------------------------------------------------
1 | import { useRef, useState, useEffect } from 'react'
2 | import { createContext } from 'react'
3 |
4 | export const PageDataContext = createContext()
5 |
6 | export const PageDataProvider = ({ children }) => {
7 | const jsonRef = useRef(null)
8 | const [loading, setLoading] = useState(true)
9 | const [scrapedData, setScrapedData] = useState({
10 | url: '',
11 | stateType: '',
12 | jsonString: '{}',
13 | data: {},
14 | pageEnabled: false,
15 | })
16 |
17 | const scrapeData = async () => {
18 | chrome.tabs.query({ active: true, lastFocusedWindow: true }, ([tab]) => {
19 | setLoading(true)
20 | chrome.tabs.sendMessage(tab.id, { type: 'retreive_dom' }, (res) => {
21 | setScrapedData({
22 | url: tab.url,
23 | ...res,
24 | })
25 | setLoading(false)
26 | })
27 | })
28 | }
29 |
30 | useEffect(() => {
31 | if (jsonRef.current && scrapedData.jsonString)
32 | jsonRef.current.textContent = scrapedData.jsonString
33 | }, [scrapedData])
34 |
35 | useEffect(() => {
36 | scrapeData()
37 | }, [])
38 |
39 | return (
40 |
48 | {children}
49 |
50 | )
51 | }
52 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "nextscraper",
3 | "displayName": "NextScraper",
4 | "version": "1.0.0",
5 | "author": "Peter Rauscher",
6 | "description": "Easily view & export data from Next.js/React sites. Generate snippets in multiple languages to jumpstart your web scraping project.",
7 | "type": "module",
8 | "license": "MIT",
9 | "keywords": [
10 | "chrome-extension",
11 | "react",
12 | "nextjs",
13 | "web-scraping"
14 | ],
15 | "engines": {
16 | "node": ">=14.18.0"
17 | },
18 | "scripts": {
19 | "dev": "vite",
20 | "build": "vite build",
21 | "preview": "vite preview",
22 | "fmt": "prettier --write '**/*.{jsx,js,json,css,scss,md}'"
23 | },
24 | "dependencies": {
25 | "@fortawesome/fontawesome-svg-core": "^6.5.1",
26 | "@fortawesome/free-brands-svg-icons": "^6.5.1",
27 | "@fortawesome/free-solid-svg-icons": "^6.5.1",
28 | "@fortawesome/react-fontawesome": "^0.2.0",
29 | "clipboard": "^2.0.11",
30 | "highlight.js": "^11.9.0",
31 | "react": "^18.2.0",
32 | "react-dom": "^18.2.0"
33 | },
34 | "devDependencies": {
35 | "@crxjs/vite-plugin": "^2.0.0-beta.19",
36 | "@types/react": "^18.2.28",
37 | "@types/react-dom": "^18.2.13",
38 | "@vitejs/plugin-react": "^4.1.0",
39 | "autoprefixer": "^10.4.16",
40 | "daisyui": "^4.5.0",
41 | "glob": "^10.3.10",
42 | "postcss": "^8.4.32",
43 | "prettier": "^3.0.3",
44 | "tailwindcss": "^3.4.0",
45 | "vite": "^4.4.11"
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/components/Navbar.jsx:
--------------------------------------------------------------------------------
1 | import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
2 | import { faGithub } from '@fortawesome/free-brands-svg-icons'
3 | import { faSun, faMoon } from '@fortawesome/free-solid-svg-icons'
4 | import { useContext } from 'react'
5 | import { ThemeContext } from '../contexts/ThemeProvider'
6 | import { PageDataContext } from '../contexts/PageDataProvider'
7 |
8 | const openLink = (url) => {
9 | chrome.tabs.create({ url: url })
10 | }
11 |
12 | export const Navbar = () => {
13 | const { theme, toggleTheme } = useContext(ThemeContext)
14 | const { pageEnabled, scrapedData } = useContext(PageDataContext)
15 |
16 | return (
17 |
18 |
19 |
25 |
26 |
27 |
28 | {pageEnabled ? `${scrapedData.stateType} Data Found ✅` : 'NextScraper 🚀'}
29 |
30 |
31 |
32 |
42 |
43 |
44 | )
45 | }
46 |
--------------------------------------------------------------------------------
/src/scripts/content.js:
--------------------------------------------------------------------------------
1 | // navigator.serviceWorker.controller.postMessage({ message: 'Page visited' })
2 |
3 | chrome.runtime.onMessage.addListener((request, _, sendResponse) => {
4 | if (request.type === 'retreive_dom') {
5 | sendResponse(scrapePageData())
6 | } else if (request.type === 'update_icon') {
7 | sendResponse({ pageEnabled: checkForPageData() })
8 | }
9 | })
10 |
11 | const checkForPageData = () =>
12 | !!(
13 | document.querySelector('script#__NEXT_DATA__') ||
14 | document.querySelector('script[data-name=query]') ||
15 | Array.from(document.getElementsByTagName('script')).find((script) =>
16 | script.text.includes('window.__PRELOADED_STATE__ = {'),
17 | )
18 | )
19 |
20 | const scrapePageData = () => {
21 | // Search for NextJS state object
22 | let jsonString = document.querySelector('script#__NEXT_DATA__')?.text
23 | let stateType = jsonString ? 'Next.js' : ''
24 | if (!jsonString) {
25 | // If NextJS data is not found, search for React data
26 | jsonString = document.querySelector('script[data-name=query]')
27 | if (jsonString) {
28 | jsonString = jsonString.text.split('=')[1].trim().replace(/;+$/, '')
29 | stateType = 'React'
30 | } else {
31 | // If React data is not found, search for Redux data
32 | jsonString = Array.from(document.getElementsByTagName('script')).find((script) =>
33 | script.text.includes('window.__PRELOADED_STATE__ = {'),
34 | )?.text
35 | if (jsonString) {
36 | jsonString = jsonString
37 | .split('window.__PRELOADED_STATE__ =')[1]
38 | .split('window.__BUILD_CONTEXT__')[0]
39 | .trim()
40 | .replace(/;+$/, '')
41 | stateType = 'Redux'
42 | }
43 | }
44 | }
45 | let data = jsonString ? JSON.parse(jsonString) : {}
46 | return {
47 | jsonString: jsonString ? JSON.stringify(data, null, 2) : '{}',
48 | data: data,
49 | stateType: stateType,
50 | pageEnabled: stateType !== '',
51 | }
52 | }
53 |
54 | scrapePageData()
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | ## Install
3 |
4 | [![Chrome][Chrome-image]][Chrome-url]
5 |
6 | [Chrome-image]: https://img.shields.io/badge/Google_chrome-4285F4?style=for-the-badge&logo=Google-chrome&logoColor=white
7 | [Chrome-url]: https://chromewebstore.google.com/detail/nextscraper/kjlhnflincmlpkgahnidgebbngieobod
8 |
9 | ## Features
10 |
11 | - **Scrape Data from React/Next.js Sites:** Extract information from dynamic web applications instantly. Analyze and explore it right in NextScraper.
12 | - **Export Data:** Export data from Next.js, React, and Redux sites with ease. Download JSON files in one click.
13 | - **Code Snippet Generator:** Save time by letting NextScraper create fast code snippets that allow you to scrape data _**without**_ loading a headless browser.
14 |
15 | ## Privacy
16 |
17 | No data leaves your browser. Ever. This extension does not collect any analytics and the [source code is openly available on Github](https://github.com/marksantiago290/Web-Scraping-Extension.git). Feel free to build it yourself with the instructions below for your peace of mind.
18 |
19 | Read the full [Privacy Policy](/PrivacyPolicy.md) here.
20 |
21 | ## Building Locally
22 |
23 | 1. Install Node.js >= 14.18.0. Ideally use the latest LTS version.
24 | 2. Clone the Github repository.
25 | 3. Switch to the `publish` branch for the latest release.
26 | 4. Install dependencies and build:
27 | ```bash
28 | npm ci
29 | npm run build
30 | ```
31 | 5. Visit `chrome://extensions` in Google Chrome.
32 | 5. Enable developer mode and select `Load Unpacked`
33 | 6. Select the build folder that was created in the project directory.
34 | 7. The extension is now installed.
35 |
36 | ## Development
37 |
38 | ### Getting Started
39 |
40 | 1. Install Node.js >= 14.18.0. Ideally use the latest LTS version.
41 | 2. Fork or clone the repo.
42 | 3. In the project folder:
43 | ```bash
44 | npm ci
45 | npm run dev
46 | ```
47 | This should create a `build` folder in the project directory.
48 |
49 | 4. Visit `chrome://extensions` in Google Chrome.
50 | 5. Enable developer mode and select `Load Unpacked`
51 | 6. Select the build folder that was created.
52 | 7. Vite should live-reload any changes as you make them!
53 |
54 | ### Adding a Feature
55 | The ideal workflow looks something like this.
56 |
57 | 1. Fork the repository on GitHub.
58 | 2. Create a new branch describing the feature you'll work on.
59 | 3. Open a pull request to merge your branch with `main`.
60 | 4. Await/request review.
61 | 5. Your changes will be merged when approved.
62 |
--------------------------------------------------------------------------------
/src/components/PageProps.jsx:
--------------------------------------------------------------------------------
1 | import { useContext } from 'react'
2 | import { PageDataContext } from '../contexts/PageDataProvider'
3 | import { ClipboardContext } from '../contexts/ClipboardProvider'
4 |
5 | export const PageProps = () => {
6 | const { scrapedData } = useContext(PageDataContext)
7 | const { copyToClipboard } = useContext(ClipboardContext)
8 |
9 | const isNextJs = scrapedData.stateType && scrapedData.stateType === 'Next.js'
10 | const hasQuery = scrapedData.data && scrapedData.data.query
11 | const queryStringified = hasQuery ? JSON.stringify(scrapedData.data.query) : ''
12 |
13 | return (
14 |
15 |
16 |
17 | {scrapedData.url && (
18 | copyToClipboard(scrapedData.url)}
21 | >
22 | | URL |
23 | {scrapedData.url} |
24 |
25 | )}
26 | {scrapedData.data && scrapedData.data.page && isNextJs && (
27 | copyToClipboard(scrapedData.data.page)}
30 | >
31 | | page |
32 | {scrapedData.data.page} |
33 |
34 | )}
35 | {hasQuery && isNextJs && (
36 | copyToClipboard(queryStringified)}
39 | >
40 | | query |
41 | {queryStringified} |
42 |
43 | )}
44 | {scrapedData.data && scrapedData.data.assetPrefix && isNextJs && (
45 | copyToClipboard(scrapedData.data.assetPrefix)}
48 | >
49 | | assetPrefix |
50 | {scrapedData.data.assetPrefix} |
51 |
52 | )}
53 | {scrapedData.data && scrapedData.data.buildId && isNextJs && (
54 | copyToClipboard(scrapedData.data.buildId)}
57 | >
58 | | buildId |
59 | {scrapedData.data.buildId} |
60 |
61 | )}
62 |
63 |
64 |
65 | )
66 | }
67 |
--------------------------------------------------------------------------------
/src/components/Footer.jsx:
--------------------------------------------------------------------------------
1 | import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'
2 | import { faCopy, faFileExport, faCode } from '@fortawesome/free-solid-svg-icons'
3 | import { useContext } from 'react'
4 | import { PageDataContext } from '../contexts/PageDataProvider'
5 | import { ClipboardContext } from '../contexts/ClipboardProvider'
6 |
7 | export const Footer = () => {
8 | const { scrapedData, pageEnabled, jsonRef } = useContext(PageDataContext)
9 | const { copyToClipboard } = useContext(ClipboardContext)
10 |
11 | const copySnippet = (language) => {
12 | let snippet = ''
13 | switch (language) {
14 | case 'javascript':
15 | snippet = `const axios = require("axios");
16 | const cheerio = require("cheerio");
17 |
18 | const targetUrl = "${scrapedData.url}";
19 |
20 | axios
21 | .get(targetUrl)
22 | .then((response) => {
23 | if (response.status === 200) {
24 | const html = response.data;
25 | const $ = cheerio.load(html);
26 | // Check for a Next.js cache
27 | const cachedData = $("script#__NEXT_DATA__").text();
28 | if (!cachedData) {
29 | // Otherwise, check for a React state cache
30 | cachedData = $("script[data-name=query]")
31 | .text()
32 | .split("=")[1]
33 | .trim()
34 | .replace(/;+$/, "");
35 | }
36 | if (cachedData) {
37 | const parsedData = JSON.parse(cachedData);
38 | /**
39 | * TODO:
40 | * Do what you wish with the parsed Next.js/React data here!
41 | **/
42 | }
43 | } else {
44 | // You may need to spoof some headers in your request to bypass anti-scraping measures
45 | console.error("Failed to retrieve the page.");
46 | }
47 | })
48 | .catch((error) => {
49 | console.error("Error:", error.message);
50 | });
51 | `
52 | break
53 | case 'python':
54 | snippet = `import requests
55 | from bs4 import BeautifulSoup
56 | import json
57 |
58 | target_url = "${scrapedData.url}"
59 |
60 | try:
61 | response = requests.get(target_url)
62 | if response.status_code == 200:
63 | html = response.text
64 | soup = BeautifulSoup(html, 'html.parser')
65 |
66 | # Check for a Next.js cache
67 | cached_data = soup.find('script', id='__NEXT_DATA__').text
68 | if not cached_data:
69 | # Otherwise, check for a React state cache
70 | cached_data = soup.find('script', attrs={"data-name": "query"}).text.split('=')[1]
71 |
72 | if cached_data:
73 | # Remove any trailing semicolons
74 | cached_data = cached_data.strip().rstrip(';')
75 | # Extract JSON data
76 | parsed_data = json.loads(cached_data)
77 |
78 | # TODO: Do what you wish with the parsed Next.js/React data here!
79 | else:
80 | print("Next.js/React data not found on the page.")
81 | else:
82 | # You may need to spoof some headers in your request to bypass anti-scraping measures
83 | print("Failed to retrieve the page.")
84 |
85 | except Exception as e:
86 | print("Error:", str(e))
87 | `
88 | break
89 | default:
90 | return
91 | break
92 | }
93 | copyToClipboard(snippet)
94 | }
95 |
96 | const exportToFile = () => {
97 | if (scrapedData.jsonString === '{}') return
98 | const blob = new Blob([scrapedData.jsonString], {
99 | type: 'application/json',
100 | })
101 | const a = document.createElement('a')
102 | const url = URL.createObjectURL(blob)
103 | a.href = url
104 | a.download = `${scrapedData.stateType
105 | .toLowerCase()
106 | .replace('.', '')}_page_data_${Date.now()}.json`
107 | document.body.appendChild(a)
108 | a.click()
109 | document.body.removeChild(a)
110 | URL.revokeObjectURL(url)
111 | }
112 |
113 | return (
114 |
149 | )
150 | }
151 |
--------------------------------------------------------------------------------