├── .gitattributes
├── LICENSE
├── manual-scrape.js
├── auto-scrape-batch.js
├── scrape-with-original-tweet.js
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Arjun Aditya
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/manual-scrape.js:
--------------------------------------------------------------------------------
 1 | (() => {
 2 |   const scraped = new Set();
 3 |   const results = [];
 4 | 
 5 |   const extractTweets = () => {
 6 |     const articles = document.querySelectorAll("article");
 7 | 
 8 |     articles.forEach((article) => {
 9 |       const textEl = article.querySelector('div[data-testid="tweetText"]');
10 |       const userEl = article.querySelector('div[dir="ltr"] > span');
11 | 
12 |       const statGroup = article.querySelector('div[role="group"]');
13 |       if (!statGroup) return;
14 | 
15 |       let replies = null, reposts = null, likes = null, views = null;
16 | 
17 |       const statElements = statGroup.querySelectorAll('[aria-label]');
18 |       statElements.forEach((el) => {
19 |         const label = el.getAttribute("aria-label")?.toLowerCase() || "";
20 |         const match = label.match(/([\d.,Kk]+)/);
21 |         const value = match ? match[1].replace(/,/g, "") : null;
22 | 
23 |         if (label.includes("reply")) replies = value;
24 |         else if (label.includes("repost")) reposts = value;
25 |         else if (label.includes("like")) likes = value;
26 |         else if (label.includes("view")) views = value;
27 |       });
28 | 
29 |       const text = textEl?.innerText?.trim();
30 |       const username = userEl?.innerText?.trim();
31 | 
32 |       if (text && username) {
33 |         const id = `${username}::${text}`;
34 |         if (!scraped.has(id)) {
35 |           scraped.add(id);
36 |           results.push({ username, text, replies, reposts, likes, views });
37 |           console.log(`@${username} — 💬 ${replies} 🔁 ${reposts} ❤️ ${likes} 👁️ ${views}\n> ${text}`);
38 |         }
39 |       }
40 |     });
41 |   };
42 | 
43 |   extractTweets();
44 | 
45 |   const observer = new MutationObserver(() => {
46 |     extractTweets();
47 |   });
48 | 
49 |   observer.observe(document.body, { childList: true, subtree: true });
50 | 
51 |   console.log("Scraper is live... just keep scrolling!");
52 |   console.log("Use `downloadTweets()` to save as json.");
53 | 
54 |   window.downloadTweets = () => {
55 |     const blob = new Blob([JSON.stringify(results, null, 2)], { type: "application/json" });
56 |     const url = URL.createObjectURL(blob);
57 |     const a = document.createElement("a");
58 |     a.href = url;
59 |     a.download = "tweets_with_stats.json";
60 |     a.click();
61 |     URL.revokeObjectURL(url);
62 |     const message = `Downloaded ${results.length} tweets as tweets_with_stats.json`;
63 |     console.log(message);
64 |     return message;
65 |   };
66 | 
67 | })();
68 | 


--------------------------------------------------------------------------------
/auto-scrape-batch.js:
--------------------------------------------------------------------------------
 1 | (() => {
 2 |   window.currentChunk = [];
 3 |   const scraped = new Set();
 4 |   let chunk = 1;
 5 |   const CHUNK_SIZE = 100;
 6 | 
 7 |   const saveChunk = () => {
 8 |     const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
 9 |     const a = document.createElement("a");
10 |     a.href = URL.createObjectURL(blob);
11 |     a.download = `tweets_${chunk++}.json`;
12 |     a.click();
13 |     URL.revokeObjectURL(a.href);
14 |     console.log(`💾 Saved ${CHUNK_SIZE} tweets as tweets_${chunk - 1}.json`);
15 |     window.currentChunk = []; // 🔥 delete them from memory!
16 |   };
17 | 
18 |   const extractTweets = () => {
19 |     const articles = document.querySelectorAll("article");
20 |     articles.forEach((article) => {
21 |       const textEl = article.querySelector('div[data-testid="tweetText"]');
22 |       const userEl = article.querySelector('div[dir="ltr"] > span');
23 |       const statGroup = article.querySelector('div[role="group"]');
24 |       if (!textEl || !userEl || !statGroup) return;
25 | 
26 |       let replies = null, reposts = null, likes = null, views = null;
27 |       statGroup.querySelectorAll('[aria-label]').forEach((el) => {
28 |         const label = el.getAttribute("aria-label")?.toLowerCase() || "";
29 |         const value = label.match(/([\d.,Kk]+)/)?.[1]?.replace(/,/g, "") || null;
30 |         if (label.includes("reply")) replies = value;
31 |         else if (label.includes("repost")) reposts = value;
32 |         else if (label.includes("like")) likes = value;
33 |         else if (label.includes("view")) views = value;
34 |       });
35 | 
36 |       const text = textEl?.innerText?.trim();
37 |       const username = userEl?.innerText?.trim();
38 |       const id = `${username}::${text}`;
39 |       if (text && username && !scraped.has(id)) {
40 |         window.currentChunk.push({ username, text, replies, reposts, likes, views });
41 |         scraped.add(id);
42 |         console.log(`[${window.currentChunk.length}] @${username}: ${text}`);
43 |         if (window.currentChunk.length >= CHUNK_SIZE) saveChunk();
44 |       }
45 |     });
46 |   };
47 | 
48 |   const observer = new MutationObserver(() => extractTweets());
49 |   observer.observe(document.body, { childList: true, subtree: true });
50 | 
51 |   window.scrollInterval = setInterval(() => window.scrollBy(0, 1000), 1500);
52 | 
53 |   window.stopScroll = () => {
54 |     clearInterval(window.scrollInterval);
55 |     if (window.currentChunk.length > 0) {
56 |       const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
57 |       const a = document.createElement("a");
58 |       a.href = URL.createObjectURL(blob);
59 |       a.download = `tweets_final_${window.currentChunk.length}.json`;
60 |       a.click();
61 |       URL.revokeObjectURL(a.href);
62 |       console.log("🛑 Final partial chunk saved.");
63 |     } else {
64 |       console.log("🛑 Stopped. No tweets left to save.");
65 |     }
66 |   };
67 | 
68 |   console.log("🚀 Scraper started. Will auto-save every 100 tweets and flush memory each time.");
69 | })();
70 | 


--------------------------------------------------------------------------------
/scrape-with-original-tweet.js:
--------------------------------------------------------------------------------
  1 | (() => {
  2 |   window.currentChunk = [];
  3 |   const scraped = new Set();
  4 |   let chunk = 1;
  5 |   const CHUNK_SIZE = 100;
  6 |   
  7 |   const saveChunk = () => {
  8 |     const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
  9 |     const a = document.createElement("a");
 10 |     a.href = URL.createObjectURL(blob);
 11 |     a.download = `tweets_${chunk++}.json`;
 12 |     a.click();
 13 |     URL.revokeObjectURL(a.href);
 14 |     console.log(`💾 Saved ${CHUNK_SIZE} tweets as tweets_${chunk - 1}.json`);
 15 |     window.currentChunk = []; // 🔥 delete them from memory!
 16 |   };
 17 |   
 18 |   const extractTweetId = (article) => {
 19 |     // Method 1: Try to find a link with tweet ID pattern
 20 |     const tweetLink = article.querySelector('a[href*="/status/"]');
 21 |     if (tweetLink) {
 22 |       const href = tweetLink.getAttribute('href');
 23 |       const match = href.match(/\/status\/(\d+)/);
 24 |       if (match) return match[1];
 25 |     }
 26 |     
 27 |     // Method 2: Try to find time element with datetime attribute
 28 |     const timeEl = article.querySelector('time');
 29 |     if (timeEl) {
 30 |       const nearestLink = timeEl.closest('a') || timeEl.parentElement?.querySelector('a');
 31 |       if (nearestLink) {
 32 |         const href = nearestLink.getAttribute('href');
 33 |         const match = href?.match(/\/status\/(\d+)/);
 34 |         if (match) return match[1];
 35 |       }
 36 |     }
 37 |     
 38 |     // Method 3: Search all links in the article for status pattern
 39 |     const allLinks = article.querySelectorAll('a[href]');
 40 |     for (const link of allLinks) {
 41 |       const href = link.getAttribute('href');
 42 |       const match = href?.match(/\/status\/(\d+)/);
 43 |       if (match) return match[1];
 44 |     }
 45 |     
 46 |     return null;
 47 |   };
 48 |   
 49 |   const extractUsername = (article) => {
 50 |     // Method 1: Try to extract from any link that contains a username pattern
 51 |     const links = article.querySelectorAll('a[href]');
 52 |     for (const link of links) {
 53 |       const href = link.getAttribute('href');
 54 |       // Look for pattern like /username or /username/status/...
 55 |       const match = href?.match(/^\/([^\/]+)(?:\/|$)/);
 56 |       if (match && match[1] && !match[1].includes('status') && !match[1].includes('search') && !match[1].includes('home')) {
 57 |         return match[1];
 58 |       }
 59 |     }
 60 |     
 61 |     // Method 2: Look for elements that might contain @username
 62 |     const spanElements = article.querySelectorAll('span');
 63 |     for (const span of spanElements) {
 64 |       const text = span.innerText?.trim();
 65 |       if (text && text.startsWith('@')) {
 66 |         return text.substring(1); // Remove @ symbol
 67 |       }
 68 |     }
 69 |     
 70 |     // Method 3: Try to find username in data attributes or other patterns
 71 |     const userLinks = article.querySelectorAll('a[href*="/"]');
 72 |     for (const link of userLinks) {
 73 |       const href = link.getAttribute('href');
 74 |       if (href?.startsWith('/') && !href.includes('/status/') && !href.includes('/search') && !href.includes('/home')) {
 75 |         const username = href.substring(1).split('/')[0];
 76 |         if (username && username.length > 0 && !username.includes('?')) {
 77 |           return username;
 78 |         }
 79 |       }
 80 |     }
 81 |     
 82 |     return null;
 83 |   };
 84 |   
 85 |   const extractDisplayName = (article) => {
 86 |     // Try to find the display name (full name)
 87 |     const nameSelectors = [
 88 |       'div[dir="ltr"] > span',
 89 |       'a[role="link"] span',
 90 |       'div[data-testid="User-Name"] span'
 91 |     ];
 92 |     
 93 |     for (const selector of nameSelectors) {
 94 |       const element = article.querySelector(selector);
 95 |       if (element && element.innerText?.trim()) {
 96 |         const text = element.innerText.trim();
 97 |         // Make sure it's not a username (doesn't start with @)
 98 |         if (!text.startsWith('@')) {
 99 |           return text;
100 |         }
101 |       }
102 |     }
103 |     
104 |     return null;
105 |   };
106 |   
107 |   const extractTweets = () => {
108 |     const articles = document.querySelectorAll("article");
109 |     articles.forEach((article) => {
110 |       const textEl = article.querySelector('div[data-testid="tweetText"]');
111 |       const statGroup = article.querySelector('div[role="group"]');
112 |       
113 |       if (!textEl || !statGroup) return;
114 |       
115 |       // Extract engagement stats
116 |       let replies = null, reposts = null, likes = null, views = null;
117 |       statGroup.querySelectorAll('[aria-label]').forEach((el) => {
118 |         const label = el.getAttribute("aria-label")?.toLowerCase() || "";
119 |         const value = label.match(/([\d.,Kk]+)/)?.[1]?.replace(/,/g, "") || null;
120 |         if (label.includes("reply")) replies = value;
121 |         else if (label.includes("repost")) reposts = value;
122 |         else if (label.includes("like")) likes = value;
123 |         else if (label.includes("view")) views = value;
124 |       });
125 |       
126 |       // Extract basic info
127 |       const text = textEl?.innerText?.trim();
128 |       const username = extractUsername(article);
129 |       const displayName = extractDisplayName(article);
130 |       const tweetId = extractTweetId(article);
131 |       
132 |       // Create tweet URL if we have the ID and username
133 |       let tweetUrl = null;
134 |       if (tweetId && username) {
135 |         tweetUrl = `https://x.com/${username}/status/${tweetId}`;
136 |       }
137 |       
138 |       const id = `${username}::${text}`;
139 |       
140 |       if (text && username && !scraped.has(id)) {
141 |         const tweetData = {
142 |           username,
143 |           displayName,
144 |           text,
145 |           replies,
146 |           reposts,
147 |           likes,
148 |           views,
149 |           tweetId,
150 |           tweetUrl
151 |         };
152 |         
153 |         window.currentChunk.push(tweetData);
154 |         scraped.add(id);
155 |         console.log(`[${window.currentChunk.length}] @${username} (${displayName}): ${text}`);
156 |         if (tweetUrl) console.log(`   🔗 ${tweetUrl}`);
157 |         
158 |         if (window.currentChunk.length >= CHUNK_SIZE) saveChunk();
159 |       }
160 |     });
161 |   };
162 |   
163 |   const observer = new MutationObserver(() => extractTweets());
164 |   observer.observe(document.body, { childList: true, subtree: true });
165 |   
166 |   window.scrollInterval = setInterval(() => window.scrollBy(0, 1000), 1500);
167 |   
168 |   window.stopScroll = () => {
169 |     clearInterval(window.scrollInterval);
170 |     observer.disconnect(); // Stop observing when done
171 |     
172 |     if (window.currentChunk.length > 0) {
173 |       const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
174 |       const a = document.createElement("a");
175 |       a.href = URL.createObjectURL(blob);
176 |       a.download = `tweets_final_${window.currentChunk.length}.json`;
177 |       a.click();
178 |       URL.revokeObjectURL(a.href);
179 |       console.log("🛑 Final partial chunk saved.");
180 |     } else {
181 |       console.log("🛑 Stopped. No tweets left to save.");
182 |     }
183 |   };
184 |   
185 |   console.log("🚀 Enhanced scraper started. Will auto-save every 100 tweets with tweet URLs!");
186 |   console.log("📝 Each tweet now includes: username, displayName, text, engagement stats, tweetId, and tweetUrl");
187 |   console.log("⏹️ Call window.stopScroll() to stop and save remaining tweets");
188 | })();


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Scrape Tweets while Scrolling
  2 | 
  3 | Scrape Tweets while Scrolling
  4 | 
  5 | 
  6 | 
  7 | 
  8 | 1. Go to Chrome 
  9 | 
 10 | 
 11 | 2. Go to X.com
 12 | 
 13 | 
 14 | 3. Open your Browser Console and Paste
 15 | 
 16 | ```js
 17 | (() => {
 18 |   const scraped = new Set();
 19 |   const results = [];
 20 | 
 21 |   const extractTweets = () => {
 22 |     const articles = document.querySelectorAll("article");
 23 | 
 24 |     articles.forEach((article) => {
 25 |       const textEl = article.querySelector('div[data-testid="tweetText"]');
 26 |       const userEl = article.querySelector('div[dir="ltr"] > span');
 27 | 
 28 |       const statGroup = article.querySelector('div[role="group"]');
 29 |       if (!statGroup) return;
 30 | 
 31 |       let replies = null, reposts = null, likes = null, views = null;
 32 | 
 33 |       const statElements = statGroup.querySelectorAll('[aria-label]');
 34 |       statElements.forEach((el) => {
 35 |         const label = el.getAttribute("aria-label")?.toLowerCase() || "";
 36 |         const match = label.match(/([\d.,Kk]+)/);
 37 |         const value = match ? match[1].replace(/,/g, "") : null;
 38 | 
 39 |         if (label.includes("reply")) replies = value;
 40 |         else if (label.includes("repost")) reposts = value;
 41 |         else if (label.includes("like")) likes = value;
 42 |         else if (label.includes("view")) views = value;
 43 |       });
 44 | 
 45 |       const text = textEl?.innerText?.trim();
 46 |       const username = userEl?.innerText?.trim();
 47 | 
 48 |       if (text && username) {
 49 |         const id = `${username}::${text}`;
 50 |         if (!scraped.has(id)) {
 51 |           scraped.add(id);
 52 |           results.push({ username, text, replies, reposts, likes, views });
 53 |           console.log(`@${username} — 💬 ${replies} 🔁 ${reposts} ❤️ ${likes} 👁️ ${views}\n> ${text}`);
 54 |         }
 55 |       }
 56 |     });
 57 |   };
 58 | 
 59 |   extractTweets();
 60 | 
 61 |   const observer = new MutationObserver(() => {
 62 |     extractTweets();
 63 |   });
 64 | 
 65 |   observer.observe(document.body, { childList: true, subtree: true });
 66 | 
 67 |   console.log("Scraper is live... just keep scrolling!");
 68 |   console.log("Use `downloadTweets()` to save as json.");
 69 | 
 70 |   window.downloadTweets = () => {
 71 |     const blob = new Blob([JSON.stringify(results, null, 2)], { type: "application/json" });
 72 |     const url = URL.createObjectURL(blob);
 73 |     const a = document.createElement("a");
 74 |     a.href = url;
 75 |     a.download = "tweets_with_stats.json";
 76 |     a.click();
 77 |     URL.revokeObjectURL(url);
 78 |     const message = `Downloaded ${results.length} tweets as tweets_with_stats.json`;
 79 |     console.log(message);
 80 |     return message;
 81 |   };
 82 | 
 83 | })();
 84 | ```
 85 | 
 86 | Voila you're done
 87 | 
 88 | download via thia
 89 | ```js
 90 | downloadTweets()
 91 | ```
 92 | 
 93 | 
 94 | very random but this the graphql endpoint
 95 | 
 96 | <details>
 97 |   <summary>X Graphql Endpoint</summary>
 98 | 
 99 | ```bash
100 | https://x.com/i/api/graphql/0uQE4rvNofAr4pboHOZWVA/UserTweets?variables={
101 |   "userId": "1654221044503408640",
102 |   "count": 20,
103 |   "includePromotedContent": true,
104 |   "withQuickPromoteEligibilityTweetFields": true,
105 |   "withVoice": true
106 | }&features={
107 |   "rweb_video_screen_enabled": false,
108 |   "payments_enabled": false,
109 |   "profile_label_improvements_pcf_label_in_post_enabled": true,
110 |   "rweb_tipjar_consumption_enabled": true,
111 |   "verified_phone_label_enabled": true,
112 |   "creator_subscriptions_tweet_preview_api_enabled": true,
113 |   "responsive_web_graphql_timeline_navigation_enabled": true,
114 |   "responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
115 |   "premium_content_api_read_enabled": false,
116 |   "communities_web_enable_tweet_community_results_fetch": true,
117 |   "c9s_tweet_anatomy_moderator_badge_enabled": true,
118 |   "responsive_web_grok_analyze_button_fetch_trends_enabled": false,
119 |   "responsive_web_grok_analyze_post_followups_enabled": true,
120 |   "responsive_web_jetfuel_frame": true,
121 |   "responsive_web_grok_share_attachment_enabled": true,
122 |   "articles_preview_enabled": true,
123 |   "responsive_web_edit_tweet_api_enabled": true,
124 |   "graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
125 |   "view_counts_everywhere_api_enabled": true,
126 |   "longform_notetweets_consumption_enabled": true,
127 |   "responsive_web_twitter_article_tweet_consumption_enabled": true,
128 |   "tweet_awards_web_tipping_enabled": false,
129 |   "responsive_web_grok_show_grok_translated_post": false,
130 |   "responsive_web_grok_analysis_button_from_backend": true,
131 |   "creator_subscriptions_quote_tweet_preview_enabled": false,
132 |   "freedom_of_speech_not_reach_fetch_enabled": true,
133 |   "standardized_nudges_misinfo": true,
134 |   "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
135 |   "longform_notetweets_rich_text_read_enabled": true,
136 |   "longform_notetweets_inline_media_enabled": true,
137 |   "responsive_web_grok_image_annotation_enabled": true,
138 |   "responsive_web_grok_community_note_auto_translation_is_enabled": false,
139 |   "responsive_web_enhance_cards_enabled": false
140 | }&fieldToggles={
141 |   "withArticlePlainText": false
142 | }
143 | ```
144 | 
145 | ```bash
146 | curl 'https://x.com/i/api/graphql/0uQE4rvNofAr4pboHOZWVA/UserTweets?variables=...' \
147 |   -H 'authorization: Bearer AAAAAAAAAAAAAAAAANRegergerAAAAAnNwIzUejRCOuH5...' \
148 |   -H 'x-csrf-token: <your ct0 token>' \
149 |   -H 'cookie: auth_token=...; ct0=...' \
150 |   -H 'x-twitter-auth-type: OAuth2Session' \
151 |   -H 'x-twitter-active-user: yes'
152 | ```
153 | 
154 | </details>
155 | 
156 | you can do whatever the heck u want wit this info and pls use your web console it's love
157 | 
158 | alsooooo
159 | 
160 | Most likely i'm banned from twitter for this basic thing or maybe just winning + freedom of speech is a joke and flawed with their own standards.
161 | 
162 | But again use this for educational purposes only and don't misuse this but one of my main reason to build this is to replicate a persona of my fav twitter creators and write tweets like them :3
163 | 
164 | Wait are you lazy? You need Auto Scroll
165 | 
166 | ### Auto Scroll with Batch Scraping
167 | 
168 | 1. first step to start scraping
169 | ```js
170 | (() => {
171 |   window.currentChunk = [];
172 |   const scraped = new Set();
173 |   let chunk = 1;
174 |   const CHUNK_SIZE = 100;
175 | 
176 |   const saveChunk = () => {
177 |     const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
178 |     const a = document.createElement("a");
179 |     a.href = URL.createObjectURL(blob);
180 |     a.download = `tweets_${chunk++}.json`;
181 |     a.click();
182 |     URL.revokeObjectURL(a.href);
183 |     console.log(`💾 Saved ${CHUNK_SIZE} tweets as tweets_${chunk - 1}.json`);
184 |     window.currentChunk = []; // 🔥 delete them from memory!
185 |   };
186 | 
187 |   const extractTweets = () => {
188 |     const articles = document.querySelectorAll("article");
189 |     articles.forEach((article) => {
190 |       const textEl = article.querySelector('div[data-testid="tweetText"]');
191 |       const userEl = article.querySelector('div[dir="ltr"] > span');
192 |       const statGroup = article.querySelector('div[role="group"]');
193 |       if (!textEl || !userEl || !statGroup) return;
194 | 
195 |       let replies = null, reposts = null, likes = null, views = null;
196 |       statGroup.querySelectorAll('[aria-label]').forEach((el) => {
197 |         const label = el.getAttribute("aria-label")?.toLowerCase() || "";
198 |         const value = label.match(/([\d.,Kk]+)/)?.[1]?.replace(/,/g, "") || null;
199 |         if (label.includes("reply")) replies = value;
200 |         else if (label.includes("repost")) reposts = value;
201 |         else if (label.includes("like")) likes = value;
202 |         else if (label.includes("view")) views = value;
203 |       });
204 | 
205 |       const text = textEl?.innerText?.trim();
206 |       const username = userEl?.innerText?.trim();
207 |       const id = `${username}::${text}`;
208 |       if (text && username && !scraped.has(id)) {
209 |         window.currentChunk.push({ username, text, replies, reposts, likes, views });
210 |         scraped.add(id);
211 |         console.log(`[${window.currentChunk.length}] @${username}: ${text}`);
212 |         if (window.currentChunk.length >= CHUNK_SIZE) saveChunk();
213 |       }
214 |     });
215 |   };
216 | 
217 |   const observer = new MutationObserver(() => extractTweets());
218 |   observer.observe(document.body, { childList: true, subtree: true });
219 | 
220 |   window.scrollInterval = setInterval(() => window.scrollBy(0, 1000), 1500);
221 | 
222 |   window.stopScroll = () => {
223 |     clearInterval(window.scrollInterval);
224 |     if (window.currentChunk.length > 0) {
225 |       const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
226 |       const a = document.createElement("a");
227 |       a.href = URL.createObjectURL(blob);
228 |       a.download = `tweets_final_${window.currentChunk.length}.json`;
229 |       a.click();
230 |       URL.revokeObjectURL(a.href);
231 |       console.log("🛑 Final partial chunk saved.");
232 |     } else {
233 |       console.log("🛑 Stopped. No tweets left to save.");
234 |     }
235 |   };
236 | 
237 |   console.log("🚀 Scraper started. Will auto-save every 100 tweets and flush memory each time.");
238 | })();
239 | ```
240 | 
241 | 2. Stop Scroll
242 | ```js
243 | stopScroll();
244 | ```
245 | 
246 | this will download all the tweets saved
247 | 
248 | 
249 | 
250 | 
251 | 4. Cleanup (Reset Everything) ~ optional
252 | 
253 | ```js
254 |   delete window.currentChunk;
255 |   delete window.scrollInterval;
256 |   delete window.stopScroll;
257 | ```
258 | 
259 | 
260 | 
261 | 
262 | ## Want the original tweet and the username in the scrapper data 
263 | > Try this 
264 | ```js
265 | 
266 | (() => {
267 |   window.currentChunk = [];
268 |   const scraped = new Set();
269 |   let chunk = 1;
270 |   const CHUNK_SIZE = 100;
271 |   
272 |   const saveChunk = () => {
273 |     const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
274 |     const a = document.createElement("a");
275 |     a.href = URL.createObjectURL(blob);
276 |     a.download = `tweets_${chunk++}.json`;
277 |     a.click();
278 |     URL.revokeObjectURL(a.href);
279 |     console.log(`💾 Saved ${CHUNK_SIZE} tweets as tweets_${chunk - 1}.json`);
280 |     window.currentChunk = []; // 🔥 delete them from memory!
281 |   };
282 |   
283 |   const extractTweetId = (article) => {
284 |     // Method 1: Try to find a link with tweet ID pattern
285 |     const tweetLink = article.querySelector('a[href*="/status/"]');
286 |     if (tweetLink) {
287 |       const href = tweetLink.getAttribute('href');
288 |       const match = href.match(/\/status\/(\d+)/);
289 |       if (match) return match[1];
290 |     }
291 |     
292 |     // Method 2: Try to find time element with datetime attribute
293 |     const timeEl = article.querySelector('time');
294 |     if (timeEl) {
295 |       const nearestLink = timeEl.closest('a') || timeEl.parentElement?.querySelector('a');
296 |       if (nearestLink) {
297 |         const href = nearestLink.getAttribute('href');
298 |         const match = href?.match(/\/status\/(\d+)/);
299 |         if (match) return match[1];
300 |       }
301 |     }
302 |     
303 |     // Method 3: Search all links in the article for status pattern
304 |     const allLinks = article.querySelectorAll('a[href]');
305 |     for (const link of allLinks) {
306 |       const href = link.getAttribute('href');
307 |       const match = href?.match(/\/status\/(\d+)/);
308 |       if (match) return match[1];
309 |     }
310 |     
311 |     return null;
312 |   };
313 |   
314 |   const extractUsername = (article) => {
315 |     // Method 1: Try to extract from any link that contains a username pattern
316 |     const links = article.querySelectorAll('a[href]');
317 |     for (const link of links) {
318 |       const href = link.getAttribute('href');
319 |       // Look for pattern like /username or /username/status/...
320 |       const match = href?.match(/^\/([^\/]+)(?:\/|$)/);
321 |       if (match && match[1] && !match[1].includes('status') && !match[1].includes('search') && !match[1].includes('home')) {
322 |         return match[1];
323 |       }
324 |     }
325 |     
326 |     // Method 2: Look for elements that might contain @username
327 |     const spanElements = article.querySelectorAll('span');
328 |     for (const span of spanElements) {
329 |       const text = span.innerText?.trim();
330 |       if (text && text.startsWith('@')) {
331 |         return text.substring(1); // Remove @ symbol
332 |       }
333 |     }
334 |     
335 |     // Method 3: Try to find username in data attributes or other patterns
336 |     const userLinks = article.querySelectorAll('a[href*="/"]');
337 |     for (const link of userLinks) {
338 |       const href = link.getAttribute('href');
339 |       if (href?.startsWith('/') && !href.includes('/status/') && !href.includes('/search') && !href.includes('/home')) {
340 |         const username = href.substring(1).split('/')[0];
341 |         if (username && username.length > 0 && !username.includes('?')) {
342 |           return username;
343 |         }
344 |       }
345 |     }
346 |     
347 |     return null;
348 |   };
349 |   
350 |   const extractDisplayName = (article) => {
351 |     // Try to find the display name (full name)
352 |     const nameSelectors = [
353 |       'div[dir="ltr"] > span',
354 |       'a[role="link"] span',
355 |       'div[data-testid="User-Name"] span'
356 |     ];
357 |     
358 |     for (const selector of nameSelectors) {
359 |       const element = article.querySelector(selector);
360 |       if (element && element.innerText?.trim()) {
361 |         const text = element.innerText.trim();
362 |         // Make sure it's not a username (doesn't start with @)
363 |         if (!text.startsWith('@')) {
364 |           return text;
365 |         }
366 |       }
367 |     }
368 |     
369 |     return null;
370 |   };
371 |   
372 |   const extractTweets = () => {
373 |     const articles = document.querySelectorAll("article");
374 |     articles.forEach((article) => {
375 |       const textEl = article.querySelector('div[data-testid="tweetText"]');
376 |       const statGroup = article.querySelector('div[role="group"]');
377 |       
378 |       if (!textEl || !statGroup) return;
379 |       
380 |       // Extract engagement stats
381 |       let replies = null, reposts = null, likes = null, views = null;
382 |       statGroup.querySelectorAll('[aria-label]').forEach((el) => {
383 |         const label = el.getAttribute("aria-label")?.toLowerCase() || "";
384 |         const value = label.match(/([\d.,Kk]+)/)?.[1]?.replace(/,/g, "") || null;
385 |         if (label.includes("reply")) replies = value;
386 |         else if (label.includes("repost")) reposts = value;
387 |         else if (label.includes("like")) likes = value;
388 |         else if (label.includes("view")) views = value;
389 |       });
390 |       
391 |       // Extract basic info
392 |       const text = textEl?.innerText?.trim();
393 |       const username = extractUsername(article);
394 |       const displayName = extractDisplayName(article);
395 |       const tweetId = extractTweetId(article);
396 |       
397 |       // Create tweet URL if we have the ID and username
398 |       let tweetUrl = null;
399 |       if (tweetId && username) {
400 |         tweetUrl = `https://x.com/${username}/status/${tweetId}`;
401 |       }
402 |       
403 |       const id = `${username}::${text}`;
404 |       
405 |       if (text && username && !scraped.has(id)) {
406 |         const tweetData = {
407 |           username,
408 |           displayName,
409 |           text,
410 |           replies,
411 |           reposts,
412 |           likes,
413 |           views,
414 |           tweetId,
415 |           tweetUrl
416 |         };
417 |         
418 |         window.currentChunk.push(tweetData);
419 |         scraped.add(id);
420 |         console.log(`[${window.currentChunk.length}] @${username} (${displayName}): ${text}`);
421 |         if (tweetUrl) console.log(`   🔗 ${tweetUrl}`);
422 |         
423 |         if (window.currentChunk.length >= CHUNK_SIZE) saveChunk();
424 |       }
425 |     });
426 |   };
427 |   
428 |   const observer = new MutationObserver(() => extractTweets());
429 |   observer.observe(document.body, { childList: true, subtree: true });
430 |   
431 |   window.scrollInterval = setInterval(() => window.scrollBy(0, 1000), 1500);
432 |   
433 |   window.stopScroll = () => {
434 |     clearInterval(window.scrollInterval);
435 |     observer.disconnect(); // Stop observing when done
436 |     
437 |     if (window.currentChunk.length > 0) {
438 |       const blob = new Blob([JSON.stringify(window.currentChunk, null, 2)], { type: "application/json" });
439 |       const a = document.createElement("a");
440 |       a.href = URL.createObjectURL(blob);
441 |       a.download = `tweets_final_${window.currentChunk.length}.json`;
442 |       a.click();
443 |       URL.revokeObjectURL(a.href);
444 |       console.log("🛑 Final partial chunk saved.");
445 |     } else {
446 |       console.log("🛑 Stopped. No tweets left to save.");
447 |     }
448 |   };
449 |   
450 |   console.log("🚀 Enhanced scraper started. Will auto-save every 100 tweets with tweet URLs!");
451 |   console.log("📝 Each tweet now includes: username, displayName, text, engagement stats, tweetId, and tweetUrl");
452 |   console.log("⏹️ Call window.stopScroll() to stop and save remaining tweets");
453 | })();
454 | ```


--------------------------------------------------------------------------------