├── extract_tweets.js ├── extract_urls.js ├── extract_likes.js ├── extract_usernames.js ├── username_collector.js ├── likes_collector.js ├── README.md └── tweet_collector.js /extract_tweets.js: -------------------------------------------------------------------------------- 1 | var dictstring = JSON.stringify(dict); 2 | var myBlob = new Blob([dictstring], {type: 'text/plain'}); 3 | var url = window.URL.createObjectURL(myBlob); 4 | var anchor = document.createElement("a"); 5 | anchor.href = url; 6 | anchor.download = "tweets.txt"; 7 | anchor.click(); 8 | window.URL.revokeObjectURL(url); -------------------------------------------------------------------------------- /extract_urls.js: -------------------------------------------------------------------------------- 1 | var urlString = Array.from(window.tweetCollectorUrlSet).join('\n'); 2 | var myBlob = new Blob([urlString], {type: 'text/plain'}); 3 | var url = window.URL.createObjectURL(myBlob); 4 | var anchor = document.createElement("a"); 5 | anchor.href = url; 6 | anchor.download = "urls.txt"; 7 | anchor.click(); 8 | window.URL.revokeObjectURL(url); 9 | -------------------------------------------------------------------------------- /extract_likes.js: -------------------------------------------------------------------------------- 1 | var itemCountsString = JSON.stringify(itemCounts, null, 2); 2 | 3 | var myBlob = new Blob([itemCountsString], {type: 'application/json'}); 4 | var url = window.URL.createObjectURL(myBlob); 5 | var anchor = document.createElement("a"); 6 | anchor.href = url; 7 | anchor.download = "itemCounts.json"; // Change the file name to reflect the content 8 | anchor.click(); 9 | window.URL.revokeObjectURL(url); -------------------------------------------------------------------------------- /extract_usernames.js: -------------------------------------------------------------------------------- 1 | var followers = "" 2 | set.forEach(function(values) 3 | { 4 | followers+=values+"\n" 5 | }); 6 | var myBlob = new Blob([followers], {type: 'text/plain'}); 7 | var url = window.URL.createObjectURL(myBlob); 8 | var anchor = document.createElement("a"); 9 | anchor.href = url; 10 | anchor.download = "followers.txt"; 11 | anchor.click(); 12 | window.URL.revokeObjectURL(url); -------------------------------------------------------------------------------- /username_collector.js: -------------------------------------------------------------------------------- 1 | var i = 1; 2 | var set = new Set(); 3 | 4 | function exit() { 5 | window.collectedUsernames = Array.from(set); 6 | clearTimeout(window.usernameCollectorLoopTimeout); 7 | } 8 | 9 | function loop() { 10 | window.usernameCollectorLoopTimeout = setTimeout(function() { 11 | window.scrollBy(0,1000); 12 | var element = document.getElementsByClassName("css-901oao css-bfa6kz r-14j79pv r-18u37iz r-37j5jr r-a023e6 r-16dba41 r-rjixqe r-bcqeeo r-qvutc0"); 13 | for(var j=0;j { 15 | if (div.getAttribute('data-testid') && div.getAttribute('data-testid').includes('UserAvatar-Container-') && div.closest(`[role="list"]`)) { 16 | var itemId = div.getAttribute('data-testid').replace('UserAvatar-Container-', ""); 17 | if (itemCounts[itemId]) { 18 | itemCounts[itemId]++; 19 | } else { 20 | itemCounts[itemId] = 1; 21 | } 22 | } 23 | }); 24 | i++; 25 | if (i < 10000) { 26 | loop(); 27 | } else { 28 | exit(); 29 | } 30 | }, 500) 31 | } 32 | loop(); 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Twitter JS Scraper 2 | [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) 3 | ## Introduction 4 | There are many tools available for collecting tweets. Some of these tools make use of the official Twitter API, which has a number of limitations. Other tools collect tweets using selenium and Python. This repository allows you to collect tweets using just your web browser. 5 | 6 | ## Usage 7 | - Open your browser. For me, it is Firefox. 8 | - Search on Twitter for the phrase you want. As an example, I search for [#Elon Musk](https://twitter.com/search?q=%23ElonMusk&src=typeahead_click&f=live). 9 | - Click the latest tab to see the latest tweets. Advanced Search can be used as well. 10 | - Right-click the screen and click Inspect (Q). 11 | - Then click the Console tab. 12 | 13 | ### Tweet collector 14 | 15 | Use the code in the [`tweet_collector.js`](https://github.com/vahidbaghi/twitter-js-scraper/blob/main/tweet_collector.js) file to collect tweet text, number of likes, retweets, replies, tweet ID (Status), time and username. Copy the code and paste it into the Console tab, and then press Enter. By doing this, the script will run and automatically scroll the page and collect tweets. 16 | 17 | To stop the collection process at any time, you can type `exit()` in the console. This will stop the automatic scrolling and make the collected data available through window variables. Otherwise, the script will continue until it reaches the scroll limit or you can use [`extract_tweets.js`](https://github.com/vahidbaghi/twitter-js-scraper/blob/main/extract_tweets.js) to save the collected tweets. 18 | 19 | In the [`tweet_collector.js`](https://github.com/vahidbaghi/twitter-js-scraper/blob/main/tweet_collector.js) file, the number 100000 indicates the number of times you want to scroll. The number 500 also indicates that scrolling is performed every 500 milliseconds. Adjust these values to suit your needs. 20 | 21 | ### Username collector 22 | 23 | To collect usernames of people who: 24 | -Liked a tweet 25 | -Retweeted a tweet 26 | -Followers of a person 27 | -Following of a person 28 | Use the code in the [`username_collector.js`](https://github.com/vahidbaghi/twitter-js-scraper/blob/main/username_collector.js) file. For example, I open the [Elon Musk following](https://twitter.com/elonmusk/following) page. Then paste the above code in the Console tab and then press Enter. You can stop the collection at any time by typing `exit()` in the console. 29 | 30 | ### Extract usernames 31 | 32 | Use the code in the [`extract_usernames.js`](https://github.com/vahidbaghi/twitter-js-scraper/blob/main/extract_usernames.js) file to save the collected usernames. 33 | 34 | If you open the page of people who liked / retweeted a tweet and use the code inside [`username_collector.js`](https://github.com/vahidbaghi/twitter-js-scraper/blob/main/username_collector.js) file, you will notice that the scrolling does not work properly. This problem is solved using the following method: 35 | 36 | ![](https://i.imgur.com/07w6i5V.gif) 37 | 38 | ## License 39 | 40 | The project is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). 41 | -------------------------------------------------------------------------------- /tweet_collector.js: -------------------------------------------------------------------------------- 1 | if (typeof window.tweetCollectorDict === 'undefined') { 2 | window.tweetCollectorDict = {}; 3 | } 4 | if (typeof window.tweetCollectorUrlSet === 'undefined') { 5 | window.tweetCollectorUrlSet = new Set(); 6 | } 7 | 8 | function collectTweetData() { 9 | let tweetElements = document.querySelectorAll('[data-testid="tweet"]'); 10 | tweetElements.forEach(element => { 11 | try { 12 | let replyElement = element.querySelector('[data-testid="reply"]'); 13 | let retweetElement = element.querySelector('[data-testid="retweet"]'); 14 | let likeElement = element.querySelector('[data-testid="like"]'); 15 | let timeElement = element.querySelector('time'); 16 | let textElement = element.querySelector('[data-testid="tweetText"]'); 17 | let tweetLink = element.querySelector('a[href*="/status/"]'); 18 | 19 | let reply = replyElement ? replyElement.textContent.match(/(\d+)/) : null; 20 | let retweet = retweetElement ? retweetElement.textContent.match(/(\d+)/) : null; 21 | let like = likeElement ? likeElement.textContent.match(/(\d+)/) : null; 22 | let time = timeElement ? timeElement.getAttribute('datetime') : null; 23 | let status = tweetLink ? tweetLink.href.match(/\/status\/(\d+)/) : null; 24 | 25 | if (textElement && time && tweetLink) { 26 | let href = tweetLink.getAttribute('href'); 27 | let userId = element.querySelector('a[href^="/"][role="link"]').getAttribute('href').replace('/', ''); 28 | let fullUrl = `https://x.com${href.replace('/i/status/', `/${userId}/status/`)}`; 29 | 30 | if (!fullUrl.match(/\/(photo|video|media)\/\d+$/)) { 31 | window.tweetCollectorUrlSet.add(fullUrl); 32 | window.tweetCollectorDict[textElement.textContent.split("\n")[0] + "-" + time] = { 33 | "like": like ? like[1] : "0", 34 | "reply": reply ? reply[1] : "0", 35 | "retweet": retweet ? retweet[1] : "0", 36 | "time": time, 37 | "text": textElement.textContent, 38 | "status": status ? status[1] : "", 39 | "url": fullUrl 40 | }; 41 | } 42 | } 43 | } catch (error) { 44 | console.log(error); 45 | } 46 | }); 47 | } 48 | 49 | function exit() { 50 | window.collectedUrls = Array.from(window.tweetCollectorUrlSet); 51 | clearTimeout(window.tweetCollectorLoopTimeout); 52 | } 53 | 54 | function loop() { 55 | let i = 0; 56 | function iterate() { 57 | window.tweetCollectorLoopTimeout = setTimeout(function() { 58 | window.scrollBy(0, 1000); 59 | collectTweetData(); 60 | 61 | i++; 62 | if (i < 100000) { 63 | iterate(); 64 | } else { 65 | exit(); 66 | } 67 | }, 500); 68 | } 69 | iterate(); 70 | } 71 | 72 | if (typeof window.tweetCollectorLoopTimeout === 'undefined') { 73 | loop(); 74 | } 75 | --------------------------------------------------------------------------------