├── .gitignore ├── README.md ├── screenshot.png ├── vercel.json ├── package.json ├── scrapCheck.js ├── index.js └── instagram_cookies.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akpadia02/imposter-x/HEAD/README.md -------------------------------------------------------------------------------- /screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akpadia02/imposter-x/HEAD/screenshot.png -------------------------------------------------------------------------------- /vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 2, 3 | "builds": [ 4 | { 5 | "src": "index.js", 6 | "use": "@vercel/node", 7 | "config": { 8 | "includeFiles": [ 9 | "node_modules/puppeteer-extra/**", 10 | "node_modules/puppeteer-extra-plugin-stealth/**" 11 | ] 12 | } 13 | } 14 | ], 15 | "routes": [ 16 | { 17 | "src": "/(.*)", 18 | "dest": "index.js" 19 | } 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "selenium-scrapper", 3 | "version": "1.0.0", 4 | "main": "index.js", 5 | "type": "module", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "start": "node index.js", 9 | "dev": "nodemon index.js" 10 | }, 11 | "keywords": [], 12 | "author": "", 13 | "license": "ISC", 14 | "description": "", 15 | "dependencies": { 16 | "@wdio/cli": "^9.7.2", 17 | "@wdio/local-runner": "^9.7.2", 18 | "@wdio/mocha-framework": "^9.7.2", 19 | "@wdio/selenium-standalone-service": "^8.14.0", 20 | "cors": "^2.8.5", 21 | "dotenv": "^16.4.7", 22 | "express": "^4.21.2", 23 | "nodemon": "^3.1.9", 24 | "puppeteer": "^24.2.0", 25 | "puppeteer-extra": "^3.3.6", 26 | "puppeteer-extra-plugin-stealth": "^2.11.2", 27 | "selenium-webdriver": "^4.28.1" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /scrapCheck.js: -------------------------------------------------------------------------------- 1 | import * as fs from "fs"; 2 | import * as cheerio from "cheerio"; 3 | 4 | const scrapeFromHtml = (filePath) => { 5 | try { 6 | // Read the HTML file 7 | const htmlContent = filepath 8 | 9 | // Load the HTML into Cheerio 10 | const $ = cheerio.load(htmlContent); 11 | 12 | // Find the span elements containing the numbers 13 | const stats = $("header").find("span.x5n08af"); 14 | 15 | if (stats.length >= 3) { 16 | const posts = $(stats[0]).text(); 17 | const followers = $(stats[1]).text(); 18 | const following = $(stats[2]).text(); 19 | 20 | console.log(`Followers: ${followers}`); 21 | console.log(`Following: ${following}`); 22 | console.log(`Posts: ${posts}`); 23 | } else { 24 | console.error("Could not find the necessary data. Check the selectors."); 25 | } 26 | } catch (error) { 27 | console.error("Error reading or parsing the file:", error); 28 | } 29 | }; 30 | 31 | // Example usage: Pass the HTML file path as an argument 32 | const filePath = "instagram_profile.html"; // Replace with your actual HTML file path 33 | scrapeFromHtml(filePath); 34 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | import express from "express"; 2 | import cors from "cors"; 3 | import puppeteer from "puppeteer-extra"; 4 | import StealthPlugin from "puppeteer-extra-plugin-stealth"; 5 | import * as cheerio from "cheerio"; 6 | import fs from "fs"; 7 | 8 | puppeteer.use(StealthPlugin()); 9 | 10 | const app = express(); 11 | const PORT = process.env.PORT || 3000; 12 | const username = "im.osterx.in"; 13 | const password = "imposter@15#12"; 14 | const cookiesFilePath = "./instagram_cookies.json"; 15 | 16 | // Enable CORS and JSON body parsing 17 | app.use(cors()); 18 | app.use(express.json()); 19 | 20 | const scrapeInstagram = async (profileUrl) => { 21 | const browser = await puppeteer.launch({ headless: true }); 22 | const page = await browser.newPage(); 23 | 24 | await page.setUserAgent( 25 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" 26 | ); 27 | await page.setViewport({ width: 1280, height: 800 }); 28 | 29 | if (fs.existsSync(cookiesFilePath)) { 30 | const cookies = JSON.parse(fs.readFileSync(cookiesFilePath, "utf8")); 31 | await page.setCookie(...cookies); 32 | } 33 | 34 | await page.goto("https://www.instagram.com/accounts/login/", { 35 | waitUntil: "networkidle2", 36 | }); 37 | 38 | if (page.url() !== "https://www.instagram.com/") { 39 | await page.waitForSelector('input[name="username"]', { visible: true }); 40 | await page.type('input[name="username"]', username); 41 | await page.type('input[name="password"]', password); 42 | await page.click('button[type="submit"]'); 43 | await page.waitForNavigation({ waitUntil: "networkidle2" }); 44 | 45 | const cookies = await page.cookies(); 46 | fs.writeFileSync(cookiesFilePath, JSON.stringify(cookies, null, 2)); 47 | } 48 | 49 | await page.goto(profileUrl, { waitUntil: "networkidle2" }); 50 | await page.waitForSelector("body"); 51 | 52 | const htmlContent = await page.content(); 53 | const $ = cheerio.load(htmlContent); 54 | 55 | let followers = $("a[href$='/followers/'] > span").text().trim(); 56 | let following = $("a[href$='/following/'] > span").text().trim(); 57 | 58 | await browser.close(); 59 | return { followers: followers || "Not Found", following: following || "Not Found" }; 60 | }; 61 | 62 | // POST endpoint to scrape Instagram profile 63 | app.post("/scrape", async (req, res) => { 64 | const { profile } = req.body; // Accept profile URL from request body 65 | if (!profile) { 66 | return res.status(400).json({ error: "Profile URL is required" }); 67 | } 68 | 69 | try { 70 | const data = await scrapeInstagram(profile); 71 | res.json(data); 72 | } catch (error) { 73 | res.status(500).json({ error: "Failed to scrape profile", details: error.message }); 74 | } 75 | }); 76 | 77 | app.listen(PORT, () => { 78 | console.log(`🚀 Server running on http://localhost:${PORT}`); 79 | }); 80 | -------------------------------------------------------------------------------- /instagram_cookies.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "ds_user_id", 4 | "value": "72541531538", 5 | "domain": ".instagram.com", 6 | "path": "/", 7 | "expires": 1746614979.677732, 8 | "size": 21, 9 | "httpOnly": false, 10 | "secure": true, 11 | "session": false, 12 | "priority": "Medium", 13 | "sameParty": false, 14 | "sourceScheme": "Secure" 15 | }, 16 | { 17 | "name": "rur", 18 | "value": "\"HIL\\05472541531538\\0541770375017:01f772b6d7130d0a4c38d42f57579979473b612e0c6efcbff6d3d31ee5f1edc6318cac18\"", 19 | "domain": ".instagram.com", 20 | "path": "/", 21 | "expires": -1, 22 | "size": 110, 23 | "httpOnly": true, 24 | "secure": true, 25 | "session": true, 26 | "sameSite": "Lax", 27 | "priority": "Medium", 28 | "sameParty": false, 29 | "sourceScheme": "Secure" 30 | }, 31 | { 32 | "name": "csrftoken", 33 | "value": "NRNYJV9paxDyrPIT3jdFOyoGOBOgxAEQ", 34 | "domain": ".instagram.com", 35 | "path": "/", 36 | "expires": 1770288579.677384, 37 | "size": 41, 38 | "httpOnly": false, 39 | "secure": true, 40 | "session": false, 41 | "priority": "Medium", 42 | "sameParty": false, 43 | "sourceScheme": "Secure" 44 | }, 45 | { 46 | "name": "datr", 47 | "value": "oJOkZyXeLMBf_aLKsivlbiQm", 48 | "domain": ".instagram.com", 49 | "path": "/", 50 | "expires": 1773398906.610287, 51 | "size": 28, 52 | "httpOnly": true, 53 | "secure": true, 54 | "session": false, 55 | "sameSite": "None", 56 | "priority": "Medium", 57 | "sameParty": false, 58 | "sourceScheme": "Secure" 59 | }, 60 | { 61 | "name": "ig_did", 62 | "value": "DBA233D6-315D-49DC-9149-A18860BF652F", 63 | "domain": ".instagram.com", 64 | "path": "/", 65 | "expires": 1770374911.436698, 66 | "size": 42, 67 | "httpOnly": true, 68 | "secure": true, 69 | "session": false, 70 | "priority": "Medium", 71 | "sameParty": false, 72 | "sourceScheme": "Secure" 73 | }, 74 | { 75 | "name": "sessionid", 76 | "value": "72541531538%3Az3PpPATq98qPI3%3A21%3AAYfiw9J4yG4rQUtu0GeppqP5nbi2pieJzMho4LO7oA", 77 | "domain": ".instagram.com", 78 | "path": "/", 79 | "expires": 1770374979.677948, 80 | "size": 87, 81 | "httpOnly": true, 82 | "secure": true, 83 | "session": false, 84 | "priority": "Medium", 85 | "sameParty": false, 86 | "sourceScheme": "Secure" 87 | }, 88 | { 89 | "name": "dpr", 90 | "value": "1.0000000149011612", 91 | "domain": ".instagram.com", 92 | "path": "/", 93 | "expires": 1739443779, 94 | "size": 21, 95 | "httpOnly": false, 96 | "secure": true, 97 | "session": false, 98 | "sameSite": "None", 99 | "priority": "Medium", 100 | "sameParty": false, 101 | "sourceScheme": "Secure" 102 | }, 103 | { 104 | "name": "wd", 105 | "value": "1280x800", 106 | "domain": ".instagram.com", 107 | "path": "/", 108 | "expires": 1739443756, 109 | "size": 10, 110 | "httpOnly": false, 111 | "secure": true, 112 | "session": false, 113 | "sameSite": "Lax", 114 | "priority": "Medium", 115 | "sameParty": false, 116 | "sourceScheme": "Secure" 117 | }, 118 | { 119 | "name": "mid", 120 | "value": "Z6SToAALAAFEv-hfzZ0fQLkTWy7i", 121 | "domain": ".instagram.com", 122 | "path": "/", 123 | "expires": 1773398908, 124 | "size": 31, 125 | "httpOnly": false, 126 | "secure": true, 127 | "session": false, 128 | "priority": "Medium", 129 | "sameParty": false, 130 | "sourceScheme": "Secure" 131 | }, 132 | { 133 | "name": "ig_nrcb", 134 | "value": "1", 135 | "domain": ".instagram.com", 136 | "path": "/", 137 | "expires": 1770374908.109154, 138 | "size": 8, 139 | "httpOnly": false, 140 | "secure": true, 141 | "session": false, 142 | "priority": "Medium", 143 | "sameParty": false, 144 | "sourceScheme": "Secure" 145 | } 146 | ] --------------------------------------------------------------------------------