├── .env ├── .gitignore ├── .idea ├── Scweet.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── LICENSE.txt ├── README.md ├── Scweet.egg-info ├── PKG-INFO ├── SOURCES.txt ├── dependency_links.txt ├── requires.txt └── top_level.txt ├── Scweet ├── __init__.py ├── __version__.py ├── const.py ├── debug.log ├── mailtm.py ├── scweet.py ├── scweet_v1.8.py ├── user.py └── utils.py ├── example.py ├── images ├── 1_1.jpg ├── 2_1.jpg ├── 3_1.jpg ├── 4_1.jpg ├── 5_1.jpg ├── 6_1.jpg ├── 7_1.jpg └── 8_1.jpg ├── requirements.txt ├── setup.cfg └── setup.py /.env: -------------------------------------------------------------------------------- 1 | EMAIL='' 2 | PASSWORD='' 3 | EMAIL_PASSWORD='' 4 | USERNAME='' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore packaging metadata 2 | *.egg-info/ 3 | dist/ 4 | build/ 5 | 6 | # Ignore IDE settings 7 | .idea/ 8 | 9 | # Ignore images if not used in README 10 | images/ 11 | 12 | # Ignore specific files inside Scweet/ 13 | Scweet/debug.log 14 | Scweet/scweet_v1.8.py 15 | Scweet/user.py 16 | -------------------------------------------------------------------------------- /.idea/Scweet.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 15 | 16 | 19 | 20 | 22 | 23 | 24 | 25 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 1628461210353 66 | 70 | 71 | 1641220203456 72 | 77 | 78 | 1641221379276 79 | 84 | 85 | 1641221862491 86 | 91 | 94 | 95 | 104 | 105 | 106 | 107 | 108 | 110 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | Copyright (c) 2020 Altimis Teams 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in all 10 | copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🐦 Scweet: A Simple and Unlimited Twitter Scraper in Python 2 | 3 | [![Scweet Actor Status](https://apify.com/actor-badge?actor=altimis/scweet)](https://apify.com/altimis/scweet) 4 | [![PyPI Downloads](https://static.pepy.tech/badge/scweet/month)](https://pepy.tech/projects/scweet) 5 | [![PyPI Version](https://img.shields.io/pypi/v/scweet.svg)](https://pypi.org/project/scweet/) 6 | [![License](https://img.shields.io/github/license/Altimis/scweet)](https://github.com/Altimis/scweet/blob/main/LICENSE) 7 | 8 | > **Note:** Scweet is **not affiliated with Twitter/X**. Use responsibly and lawfully. 9 | 10 | --- 11 | 12 | ## 🚀 Scweet on Apify: Cloud-Powered Scraping 13 | 14 | For heavy-duty scraping, we recommend using [**Scweet on Apify**](https://apify.com/altimis/scweet?fpr=a40q9&fp_sid=jeb97) – a cloud-based solution that offers: 15 | - **Zero setup:** No need to install or maintain infrastructure. 16 | - **Incredible Speed:** Up to **1000 tweets per minute**. 17 | - **High Reliability:** Managed and isolated runs for consistent performance. 18 | - **Free Usage Tier:** Get started for free with a generous quota—perfect for experiments, small projects, or learning how Scweet works. Once you exceed the free quota, you'll pay only **$0.30 per 1,000 tweets**. 19 | 20 | [![Run on Apify](https://apify.com/static/run-on-apify-button.svg)](https://apify.com/altimis/scweet?fpr=a40q9&fp_sid=jeb97) 21 | 22 | --- 23 | 24 | ## 🚀 Recent X Platform Changes & Scweet v3 Update 25 | 26 | Scweet has recently encountered challenges due to major changes on **X (formerly Twitter)**. In response, we’re excited to announce the new **Scweet v3** release! 27 | 28 | ### ✨ What’s New in v3: 29 | - ✅ Fully **asynchronous architecture** for faster, smoother scraping 30 | - 🧠 **No more manual Chromedriver setup** – Scweet handles Chromium internally with **[Nodriver](https://github.com/ultrafunkamsterdam/nodriver)** 31 | - 🚀 Enhanced for **personal and research-level scraping** 32 | - 🧑‍🤝‍🧑 **Follower & following scraping is back!** (see below 👇) 33 | 34 | --- 35 | 36 | ## 📌 What is Scweet? 37 | 38 | Scweet is a Python-based scraping tool designed to fetch tweets and user data **without relying on traditional Twitter APIs**, which have become increasingly restricted. 39 | 40 | With Scweet, you can: 41 | - Scrape tweets by keywords, hashtags, mentions, accounts, or timeframes 42 | - Get detailed user profile information 43 | - ✅ Retrieve followers, following, and verified followers! 44 | 45 | --- 46 | 47 | ## 🔧 Key Features 48 | 49 | ### 🐤 `scrape()` – Tweet Scraper 50 | 51 | Scrape tweets between two dates using keywords, hashtags, mentions, or specific accounts. 52 | 53 | **✅ Available arguments include:** 54 | ```python 55 | - since, until 56 | - words 57 | - from_account, to_account, mention_account 58 | - hashtag, lang 59 | - limit, display_type, resume 60 | - filter_replies, proximity, geocode 61 | - minlikes, minretweets, minreplies 62 | - save_dir, custom_csv_name 63 | ``` 64 | 65 | --- 66 | 67 | ### 👤 `get_user_information()` – User Info Scraper 68 | 69 | Fetch profile details for a list of handles. Returns a dictionary with: 70 | - `username`, `verified_followers` 71 | - `following`, `location`, `website`, `join_date`, `description` 72 | 73 | **🧩 Arguments:** 74 | ```python 75 | - handles # List of Twitter/X handles 76 | - login (bool) # Required for complete data 77 | ``` 78 | 79 | --- 80 | 81 | ### 🧑‍🤝‍🧑 `get_followers()`, `get_following()`, `get_verified_followers()` – NEW! 🎉 82 | 83 | Scweet now supports scraping followers and followings again! 84 | 85 | > ⚠️ **Important Note:** This functionality relies on browser rendering and may trigger rate-limiting or account lockouts. Use with caution and always stay logged in during scraping. 86 | 87 | **🧩 Example Usage:** 88 | ```python 89 | handle = "x_born_to_die_x" 90 | 91 | # Get followers 92 | followers = scweet.get_followers(handle=handle, login=True, stay_logged_in=True, sleep=1) 93 | 94 | # Get following 95 | following = scweet.get_following(handle=handle, login=True, stay_logged_in=True, sleep=1) 96 | 97 | # Get only verified followers 98 | verified = scweet.get_verified_followers(handle=handle, login=True, stay_logged_in=True, sleep=1) 99 | ``` 100 | 101 | --- 102 | 103 | ## 🛠️ Class Initialization & Configuration 104 | 105 | Customize Scweet’s behavior during setup: 106 | 107 | ```python 108 | scweet = Scweet( 109 | proxy=None, # Dict or None 110 | cookies=None, # Nodriver-based cookie handling 111 | cookies_path='cookies', # Folder for saving/loading cookies 112 | user_agent=None, # Optional custom user agent 113 | disable_images=True, # Speeds up scraping 114 | env_path='.env', # Path to your .env file 115 | n_splits=-1, # Date range splitting 116 | concurrency=5, # Number of concurrent tabs 117 | headless=True, # Headless scraping 118 | scroll_ratio=100 # Adjust for scroll depth/speed 119 | ) 120 | ``` 121 | 122 | --- 123 | 124 | ## 🔐 Authentication 125 | 126 | Scweet requires login for tweets, user info, and followers/following. 127 | 128 | Set up your `.env` file like this: 129 | 130 | ```env 131 | EMAIL=your_email@example.com 132 | EMAIL_PASSWORD=your_email_password 133 | USERNAME=your_username 134 | PASSWORD=your_password 135 | ``` 136 | 137 | Need a temp email? Use built-in MailTM integration: 138 | 139 | ```python 140 | from Scweet.utils import create_mailtm_email 141 | email, password = create_mailtm_email() 142 | ``` 143 | 144 | --- 145 | 146 | ## 🔧 Installation 147 | 148 | ```bash 149 | pip install Scweet 150 | ``` 151 | Requires **Python 3.7+** and a Chromium-based browser. 152 | 153 | --- 154 | 155 | ## 💡 Example Usage 156 | 157 | ### 🐍 Python Script 158 | 159 | ```python 160 | from Scweet.scweet import Scweet 161 | from Scweet.utils import create_mailtm_email 162 | 163 | scweet = Scweet(proxy=None, cookies=None, cookies_path='cookies', 164 | user_agent=None, disable_images=True, env_path='.env', 165 | n_splits=-1, concurrency=5, headless=False, scroll_ratio=100) 166 | 167 | # Get followers (⚠️ requires login) 168 | followers = scweet.get_followers(handle="x_born_to_die_x", login=True, stay_logged_in=True, sleep=1) 169 | print(followers) 170 | 171 | # Get user profile data 172 | infos = scweet.get_user_information(handles=["x_born_to_die_x", "Nabila_Gl"], login=True) 173 | print(infos) 174 | 175 | # Scrape tweets 176 | results = scweet.scrape( 177 | since="2022-10-01", 178 | until="2022-10-06", 179 | words=["bitcoin", "ethereum"], 180 | lang="en", 181 | limit=20, 182 | minlikes=10, 183 | minretweets=10, 184 | save_dir='outputs', 185 | custom_csv_name='crypto.csv' 186 | ) 187 | print(len(results)) 188 | ``` 189 | 190 | --- 191 | 192 | ## 📝 Example Output 193 | 194 | | tweetId | UserScreenName | Text | Likes | Retweets | Timestamp | 195 | |--------|----------------|------|-------|----------|-----------| 196 | | ... | @elonmusk | ... | 18787 | 1000 | 2022-10-05T17:44:46.000Z | 197 | 198 | > Full CSV output includes user info, tweet text, stats, embedded replies, media, and more. 199 | 200 | --- 201 | 202 | ## ☁️ Scweet on Apify (Cloud) 203 | 204 | Need powerful, scalable, high-volume scraping? 205 | Try [**Scweet on Apify**](https://apify.com/altimis/scweet): 206 | 207 | - 🚀 Up to **1000 tweets/minute** 208 | - 📦 Export to datasets 209 | - 🔒 Secure, isolated browser instances 210 | - 🔁 Ideal for automation & research projects 211 | 212 | --- 213 | 214 | ## 🙏 Responsible Use 215 | 216 | We care deeply about ethical scraping. 217 | 218 | > **Please:** Use Scweet for research, education, and lawful purposes only. Respect platform terms and user privacy. 219 | 220 | --- 221 | 222 | ## 📎 Resources 223 | 224 | - 📄 [Example Script](https://github.com/Altimis/Scweet/blob/master/example.py) 225 | - 🐞 [Issues / Bugs](https://github.com/Altimis/Scweet/issues) 226 | - 🌐 [Scweet on Apify](https://apify.com/altimis/scweet) 227 | 228 | --- 229 | 230 | ## ⭐ Star & Contribute 231 | 232 | If you find Scweet useful, consider **starring** the repo ⭐ 233 | We welcome **PRs**, bug reports, and feature suggestions! 234 | 235 | --- 236 | 237 | MIT License • © 2020–2025 Altimis -------------------------------------------------------------------------------- /Scweet.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.4 2 | Name: Scweet 3 | Version: 3.1 4 | Summary: Tool for scraping Tweets 5 | Home-page: https://github.com/Altimis/Scweet 6 | Download-URL: https://github.com/Altimis/Scweet/archive/v3.0.tar.gz 7 | Author: Yassine AIT JEDDI 8 | Author-email: aitjeddiyassine@gmail.com 9 | License: MIT 10 | Keywords: twitter,scraper,python,crawl,following,followers,twitter-scraper,tweets 11 | Classifier: Development Status :: 4 - Beta 12 | Classifier: Intended Audience :: Developers 13 | Classifier: Topic :: Software Development :: Build Tools 14 | Classifier: License :: OSI Approved :: MIT License 15 | Classifier: Programming Language :: Python :: 3.7 16 | Classifier: Programming Language :: Python :: 3.8 17 | Classifier: Programming Language :: Python :: 3.9 18 | Classifier: Programming Language :: Python :: 3.10 19 | Classifier: Programming Language :: Python :: 3.11 20 | Description-Content-Type: text/markdown 21 | License-File: LICENSE.txt 22 | Requires-Dist: certifi 23 | Requires-Dist: python-dotenv 24 | Requires-Dist: urllib3 25 | Requires-Dist: PyVirtualDisplay 26 | Requires-Dist: beautifulsoup4==4.12.3 27 | Requires-Dist: nodriver==0.38.post1 28 | Requires-Dist: requests 29 | Dynamic: author 30 | Dynamic: author-email 31 | Dynamic: classifier 32 | Dynamic: description 33 | Dynamic: description-content-type 34 | Dynamic: download-url 35 | Dynamic: home-page 36 | Dynamic: keywords 37 | Dynamic: license 38 | Dynamic: license-file 39 | Dynamic: requires-dist 40 | Dynamic: summary 41 | 42 | 43 | 44 | # 🐦 Scweet: A Simple and Unlimited Twitter Scraper in Python 45 | 46 | [![Scweet Actor Status](https://apify.com/actor-badge?actor=altimis/scweet)](https://apify.com/altimis/scweet) 47 | [![PyPI Downloads](https://static.pepy.tech/badge/scweet/month)](https://pepy.tech/projects/scweet) 48 | [![PyPI Version](https://img.shields.io/pypi/v/scweet.svg)](https://pypi.org/project/scweet/) 49 | [![License](https://img.shields.io/github/license/Altimis/scweet)](https://github.com/Altimis/scweet/blob/main/LICENSE) 50 | 51 | > **Note:** Scweet is **not affiliated with Twitter/X**. Use responsibly and lawfully. 52 | 53 | --- 54 | 55 | ## 🚀 Recent X Platform Changes & Scweet v3.0 Update 56 | 57 | Scweet has recently encountered challenges due to major changes on **X (formerly Twitter)**. In response, we’re excited to announce the new **Scweet v3.0** release! 58 | 59 | ### ✨ What’s New in v3.0: 60 | - ✅ Fully **asynchronous architecture** for faster, smoother scraping 61 | - 🧠 **No more manual Chromedriver setup** – Scweet handles Chromium internally 62 | - 🚀 Enhanced for **personal and research-level scraping** 63 | - ⚠️ **Follower/following scraping temporarily disabled** (to return in future updates) 64 | 65 | > 🔧 For heavy-duty scraping, we recommend using **[Scweet on Apify](https://apify.com/altimis/scweet)** – a cloud-based solution offering higher throughput and stability (up to **1000 tweets/minute**), no infrastructure setup needed. 66 | 67 | ⚠️ **Responsible Use Reminder** 68 | Whether running locally or in the cloud, **always scrape tweets ethically, lawfully, and respectfully**. 69 | 70 | --- 71 | 72 | ## 📌 What is Scweet? 73 | 74 | Scweet is a Python-based scraping tool designed to fetch tweets and user data **without relying on traditional Twitter APIs**, which have become increasingly restricted. 75 | 76 | With Scweet, you can: 77 | - Scrape tweets by keywords, hashtags, mentions, accounts, or timeframes 78 | - Get detailed user profile information 79 | - (Coming soon) Retrieve followers/following lists again! 80 | 81 | --- 82 | 83 | ## 🔧 Key Features 84 | 85 | ### 🐤 `scrape()` – Tweet Scraper 86 | 87 | Scrape tweets between two dates using keywords, hashtags, mentions, or specific accounts. 88 | 89 | **✅ Available arguments include:** 90 | ```python 91 | - since, until # Date range (format: YYYY-MM-DD) 92 | - words # Keywords (string or list, use "//" separator for strings) 93 | - from_account # Tweets from a user 94 | - to_account # Tweets to a user 95 | - mention_account # Tweets mentioning a user 96 | - hashtag # Search by hashtag 97 | - lang # Language code (e.g. "en") 98 | - limit # Max number of tweets 99 | - display_type # "Top" or "Latest" 100 | - resume # Resume from previous CSV 101 | - filter_replies # Include/exclude replies 102 | - proximity # Local tweet filtering 103 | - geocode # Geolocation filtering 104 | - minlikes # Tweets with minimum likes count 105 | - minretweets # Tweets with minimum retweets count 106 | - minreplies # Tweets with minimum replies count 107 | - save_dir # Output directory 108 | - custom_csv_name # Output csv name 109 | ``` 110 | --- 111 | 112 | ### 👤 `get_user_information()` – User Info Scraper 113 | 114 | Fetch profile details for a list of handles. Returns a dictionary with: 115 | - `username` (display name) 116 | - `following` (number of accounts they follow) 117 | - `verified_followers` (number of verified followers) 118 | - `location`, `website`, `join_date`, `description` 119 | 120 | **🧩 Arguments:** 121 | ```python 122 | - handles # List of Twitter/X handles 123 | - login (bool) # Set True to login and access full data 124 | ``` 125 | 126 | --- 127 | 128 | ### 🔒 `get_users_followers()` & `get_users_following()` 129 | ⚠️ **Currently Disabled due to platform changes** 130 | These will be re-enabled in future versions as we work around new limitations. 131 | 132 | --- 133 | 134 | ## 🛠️ Class Initialization & Configuration 135 | 136 | You can customize Scweet’s behavior during initialization: 137 | 138 | ```python 139 | scweet = Scweet( 140 | proxy=None, # Dict or None {host, post, username, pasword} 141 | cookies=None, # Use saved cookies file 142 | cookies_path='cookies', # Folder path where cookies will be saved/loaded in future usage 143 | user_agent=None, # Custom user agent string 144 | env_path='.env', # Environment variables 145 | n_splits=-1, # Split date interval (-1 for daily) 146 | concurrency=5, # Concurrent tabs 147 | headless=True, # Run headlessly 148 | scroll_ratio=100, # Adjust scroll behavior 149 | code_callback=None # Optional custom login code handler. Scweet only handles MailTM emails to get the code if X asks for it. 150 | ) 151 | ``` 152 | 153 | --- 154 | 155 | ## 🔐 Authentication 156 | 157 | Scweet requires login to fetch tweets. Set up your `.env` file like this: 158 | 159 | ```env 160 | EMAIL=your_email@example.com 161 | EMAIL_PASSWORD=your_email_password 162 | USERNAME=your_username 163 | PASSWORD=your_password 164 | ``` 165 | 166 | Use the built-in helper to create disposable login emails: 167 | 168 | ```python 169 | from Scweet.utils import create_mailtm_email 170 | ``` 171 | 172 | For custom email providers, pass your own `code_callback`. 173 | 174 | --- 175 | 176 | ## 🔧 Installation 177 | 178 | ```bash 179 | pip install Scweet==3.0 180 | ``` 181 | Make sure your environment is set up with Python 3.7+, chrome browser and pip is available. 182 | 183 | ## 💡 Example Usage 184 | 185 | ### 🐍 Python Script 186 | 187 | ```python 188 | from Scweet.scweet import Scweet 189 | from Scweet.user import get_user_information 190 | 191 | scweet = Scweet(proxy=None, cookies=None, cookies_path='cookies', 192 | user_agent=None, disable_images=True, env_path='.env', 193 | n_splits=-1, concurrency=5, headless=True, scroll_ratio=100) 194 | 195 | # Get user profile info 196 | handles = ['nagouzil', 'yassineaitjeddi', 'TahaAlamIdrissi'] 197 | infos = scweet.get_user_information(handles=handles, login=True) 198 | print(infos) 199 | 200 | # Scrape tweets with keywords 201 | results = scweet.scrape( 202 | since="2022-10-01", 203 | until="2022-10-06", 204 | words=['bitcoin', 'ethereum'], 205 | lang="en", 206 | limit=20, 207 | display_type="Top", 208 | resume=False, 209 | filter_replies=False, 210 | minlikes=10, 211 | minretweets=10, 212 | save_dir='outputs', 213 | custom_csv_name='crypto.csv' 214 | ) 215 | print(len(results)) 216 | scweet.close() 217 | ``` 218 | 219 | ### 📝 Example Output 220 | 221 | When you scrape tweets using the scrape() function, the results will be written to a CSV file, with each row representing a tweet. Here’s an example of what the output might look like: 222 | 223 | 224 | | tweetId | UserScreenName | UserName | Timestamp | Text | Embedded_text | Emojis | Comments | Likes | Retweets | Image link | Tweet URL | 225 | |--------------------|----------------|-----------|--------------------------|-------------------------------------------------------------------------------------------|--------------------------|--------|----------|-------|----------|--------------------------------------------------------------------------------------------------|--------------------------------------------------| 226 | | 1577716440299442187 | @elonmusk | Elon Musk | 2022-10-05T17:44:46.000Z | 10.69.3 will actually be a major upgrade. We’re keeping .69 just because haha. | Replying to@WholeMarsBlog | | 1256 | 18787 | 1000 | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577716440299442187 | 227 | | 1577737664689848326 | @elonmusk | Elon Musk | 2022-10-05T19:09:06.000Z | Twitter is an accelerant to fulfilling the original http://X.com vision | Replying to@TEDchris | | 967 | 10967 | 931 | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577737664689848326 | 228 | | 1577747565533069312 | @elonmusk | Elon Musk | 2022-10-05T19:48:27.000Z | That wouldn’t be hard to do | Replying to@ashleevance | | 1326 | 31734 | 1011 | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577747565533069312 | 229 | | 1577732106784051214 | @elonmusk | Elon Musk | 2022-10-05T18:47:01.000Z | *"I do not think it is simple at all, but I have yet to hear any realistic path to peace.* | | | – | – | – | – | /elonmusk/status/1577732106784051214 | 230 | 231 | 232 | **Columns description**: 233 | 234 | - **tweetId**: The unique identifier for the tweet. 235 | - **UserScreenName**: The Twitter/X handle of the user who posted the tweet. 236 | - **UserName**: The display name of the user. 237 | - **Timestamp**: The date and time the tweet was posted. 238 | - **Text**: The content of the tweet. 239 | - **Embedded_text**: If the tweet is a reply, this will show the user being replied to. 240 | - **Emojis**: Any emojis used in the tweet. 241 | - **Comments**: Number of replies to the tweet. 242 | - **Likes**: Number of likes the tweet received. 243 | - **Retweets**: Number of retweets the tweet received. 244 | - **Image link**: A link to the image(s) attached to the tweet, if any. 245 | - **Tweet URL**: Direct URL to the tweet. 246 | 247 | --- 248 | 249 | ## ☁️ Scweet on Apify (Cloud) 250 | 251 | Need powerful, scalable, high-volume scraping? 252 | Try [**Scweet on Apify**](https://apify.com/altimis/scweet) – a no-setup cloud solution: 253 | 254 | - 🚀 Up to **1000 tweets/minute** 255 | - 📦 Exports to datasets or files 256 | - 🔒 Secure, isolated runs 257 | - 🔁 Ideal for automation, long-term projects 258 | 259 | --- 260 | 261 | ## 🙏 Responsible Use 262 | 263 | We care deeply about ethical scraping. 264 | 265 | > **Please:** Use Scweet for research, archiving, and lawful purposes only. 266 | 267 | --- 268 | 269 | ## 📎 Resources 270 | 271 | - 📄 [Example Script](https://github.com/Altimis/Scweet/blob/master/example.py) 272 | - 🐞 [Issues / Bugs](https://github.com/Altimis/Scweet/issues) 273 | - 🌐 [Scweet on Apify](https://apify.com/altimis/scweet) 274 | 275 | --- 276 | 277 | ## ⭐ Star & Contribute 278 | 279 | If you find Scweet useful, consider **starring** the repo ⭐ 280 | We welcome **PRs**, bug reports, and ideas for new features! 281 | 282 | --- 283 | 284 | MIT License • © 2020–2025 Altimis 285 | -------------------------------------------------------------------------------- /Scweet.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | LICENSE.txt 2 | README.md 3 | setup.cfg 4 | setup.py 5 | Scweet/__init__.py 6 | Scweet/__version__.py 7 | Scweet/const.py 8 | Scweet/mailtm.py 9 | Scweet/scweet.py 10 | Scweet/scweet_v1.8.py 11 | Scweet/user.py 12 | Scweet/utils.py 13 | Scweet.egg-info/PKG-INFO 14 | Scweet.egg-info/SOURCES.txt 15 | Scweet.egg-info/dependency_links.txt 16 | Scweet.egg-info/requires.txt 17 | Scweet.egg-info/top_level.txt -------------------------------------------------------------------------------- /Scweet.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Scweet.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | certifi 2 | python-dotenv 3 | urllib3 4 | PyVirtualDisplay 5 | beautifulsoup4==4.12.3 6 | nodriver==0.38.post1 7 | requests 8 | -------------------------------------------------------------------------------- /Scweet.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | Scweet 2 | -------------------------------------------------------------------------------- /Scweet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/Scweet/__init__.py -------------------------------------------------------------------------------- /Scweet/__version__.py: -------------------------------------------------------------------------------- 1 | VERSION = (3, 1) 2 | 3 | __version__ = '.'.join(map(str, VERSION)) -------------------------------------------------------------------------------- /Scweet/const.py: -------------------------------------------------------------------------------- 1 | import dotenv 2 | import os 3 | from pathlib import Path 4 | 5 | current_dir = Path(__file__).parent.absolute() 6 | 7 | 8 | # env_file = os.getenv("SCWEET_ENV_FILE", current_dir.parent.joinpath(".env")) 9 | # dotenv.load_dotenv(env_file, verbose=True) 10 | 11 | 12 | def load_env_variable(key, default_value=None, none_allowed=False): 13 | v = os.getenv(key, default=default_value) 14 | if v is None and not none_allowed: 15 | raise RuntimeError(f"{key} returned {v} but this is not allowed!") 16 | return v 17 | 18 | 19 | def get_email(env): 20 | dotenv.load_dotenv(env, verbose=True, override=True) 21 | return load_env_variable("EMAIL", none_allowed=False) 22 | 23 | 24 | def get_email_password(env): 25 | dotenv.load_dotenv(env, verbose=True, override=True) 26 | return load_env_variable("EMAIL_PASSWORD", none_allowed=True) 27 | 28 | 29 | def get_password(env): 30 | dotenv.load_dotenv(env, verbose=True, override=True) 31 | return load_env_variable("PASSWORD", none_allowed=False) 32 | 33 | 34 | def get_username(env): 35 | dotenv.load_dotenv(env, verbose=True, override=True) 36 | return load_env_variable("USERNAME", none_allowed=False) 37 | -------------------------------------------------------------------------------- /Scweet/debug.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/Scweet/debug.log -------------------------------------------------------------------------------- /Scweet/mailtm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Scweet - Twitter Scraping Tool 3 | Author: Yassine Ait Jeddi (@altimis) 4 | License: MIT 5 | Repository: https://github.com/Altimis/scweet 6 | """ 7 | 8 | import requests 9 | 10 | 11 | class Domain: 12 | def __init__(self, domainJson): 13 | self.domain = domainJson["domain"] 14 | self.id = domainJson["id"] 15 | 16 | 17 | class Mail: 18 | def __init__(self, emailJson, token): 19 | self.fromAddress = emailJson["from"]["address"] 20 | self.toAddress = [] 21 | for receiver in emailJson["to"]: 22 | self.toAddress.append(receiver["address"]) 23 | self.session = requests.Session() 24 | self.token = token 25 | self.fromName = emailJson["from"]["name"] 26 | self.subject = emailJson["subject"] 27 | self.size = emailJson["size"] 28 | self.id = emailJson["id"] 29 | self.text = self.read()["text"] 30 | 31 | def read(self): 32 | r = self.session.get( 33 | "https://api.mail.tm/messages/" + self.id, 34 | headers={ 35 | "Authorization": "Bearer " + self.token, 36 | "Content-Type": "application/json", 37 | }, 38 | ) 39 | 40 | return r.json() 41 | 42 | def delete(self): 43 | r = self.session.delete( 44 | "https://api.mail.tm/messages/" + self.id, 45 | headers={ 46 | "Authorization": "Bearer " + self.token, 47 | "Content-Type": "application/json", 48 | }, 49 | ) 50 | 51 | if r.status_code == 204: 52 | return 0 53 | if r.status_code == 404: 54 | return 1 55 | 56 | 57 | class MailTMClient: 58 | def getAvailableDomains(self): 59 | r = self.session.get("https://api.mail.tm/domains") 60 | domains = [] 61 | for domainJson in r.json()["hydra:member"]: 62 | # Only fetch public & active domains for now. 63 | if domainJson["isActive"] == True and domainJson["isPrivate"] == False: 64 | domains.append(Domain(domainJson)) 65 | return domains 66 | 67 | def register(self, address, password): 68 | r = self.session.post( 69 | "https://api.mail.tm/accounts", 70 | json={ 71 | "address": address, 72 | "password": password, 73 | }, 74 | ) 75 | 76 | if r.status_code == 201 or r.status_code == 200: 77 | (responseCode, response) = self.login(address, password) 78 | if responseCode == 0: 79 | return (0, response) 80 | elif r.status_code == 400: 81 | return (1, r.json()["detail"]) 82 | elif r.status_code == 422: 83 | return (2, r.json()["detail"]) 84 | print(f'response {r.status_code}') 85 | return -1, None 86 | 87 | def login(self, address, password): 88 | r = self.session.post( 89 | "https://api.mail.tm/token", 90 | json={ 91 | "address": address, 92 | "password": password, 93 | }, 94 | ) 95 | 96 | if r.status_code == 200: 97 | return (0, r.json()["token"]) 98 | if r.status_code == 401: 99 | return (1, r.json()["message"]) 100 | 101 | def getInbox(self): 102 | r = self.session.get( 103 | "https://api.mail.tm/messages", 104 | headers={ 105 | "Authorization": "Bearer " + self.token, 106 | "Content-Type": "application/json", 107 | }, 108 | ) 109 | 110 | inbox = [] 111 | for emailJson in r.json()["hydra:member"]: 112 | inbox.append(Mail(emailJson, self.token)) 113 | return inbox 114 | 115 | def __init__(self, token=None): 116 | self.session = requests.Session() 117 | self.token = token if token is not None else token 118 | -------------------------------------------------------------------------------- /Scweet/scweet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Scweet - Twitter Scraping Tool 3 | Author: Yassine Ait Jeddi (@altimis) 4 | License: MIT 5 | Repository: https://github.com/Altimis/scweet 6 | """ 7 | 8 | import asyncio 9 | import logging 10 | import argparse 11 | import csv 12 | import json 13 | import re 14 | import os 15 | import math 16 | from datetime import datetime, timedelta, date 17 | from typing import Awaitable, Callable, Optional, Union, List 18 | 19 | import platform 20 | 21 | import nodriver as uc 22 | from requests.cookies import create_cookie 23 | from bs4 import BeautifulSoup 24 | from pyvirtualdisplay import Display 25 | 26 | from .const import get_username, get_password, get_email, get_email_password 27 | from .utils import (check_element_if_exists_by_text, check_element_if_exists_by_css, 28 | get_code_from_email, extract_count_from_aria_label) 29 | 30 | logging.getLogger('urllib3').setLevel(logging.WARNING) 31 | logging.getLogger('seleniumwire').setLevel(logging.ERROR) 32 | logging.getLogger('selenium').setLevel(logging.ERROR) 33 | logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(message)s') 34 | 35 | # display = Display(visible=0, size=(1024, 768)) 36 | 37 | 38 | def parse_followers(text): 39 | text = text.split(' ')[0] 40 | if 'K' in text: 41 | followers = int(float(text.replace('K', '')) * 1000) 42 | elif 'M' in text: 43 | followers = int(float(text.replace('M', '')) * 1000000) 44 | else: 45 | text = text.replace(',', '') 46 | followers = int(text) 47 | return followers 48 | 49 | 50 | class Scweet: 51 | main_tab: uc.Tab 52 | def __init__(self, proxy=None, cookies=None, cookies_path=None, user_agent=None, 53 | disable_images=False, env_path=None, n_splits=5, concurrency=5, headless=True, scroll_ratio=30, 54 | code_callback: Optional[Callable[[str, str], Awaitable[str]]] = None): 55 | self.driver = None 56 | self.proxy = proxy 57 | self.cookies = cookies 58 | self.user_agent = user_agent 59 | self.cookies_path = cookies_path 60 | self.disable_images = disable_images 61 | self.env_path = env_path 62 | self.n_splits = n_splits 63 | self.concurrency = concurrency 64 | self.headless = headless 65 | self.scroll_ratio = scroll_ratio 66 | self.logged_in = False 67 | self.suspended = False 68 | # If no custom code callback is provided, use the default get_code_from_email for mailtm 69 | self.code_callback = code_callback or get_code_from_email 70 | self.display = None 71 | if self.headless: 72 | if self.headless and platform.system() in ["Linux"]: 73 | logging.info("Starting pyvirtualdisplay for Linux headless mode") 74 | self.display = Display(visible=0, size=(1024, 768)) 75 | self.display.start() 76 | 77 | async def init_nodriver(self): 78 | config = uc.Config() 79 | config.lang = "en-US" 80 | # Enable built-in headless mode for Windows and macOS 81 | if self.headless and platform.system() in ["Windows", "Darwin"]: 82 | logging.info("Using nodriver's headless mode for Windows/macOS") 83 | config.headless = True 84 | 85 | if self.proxy: 86 | logging.info(f"setting proxy : {self.proxy['host']}:{self.proxy['port']}") 87 | config.add_argument(f"--proxy-server={self.proxy['host']}:{self.proxy['port']}") 88 | if self.user_agent: 89 | config.add_argument(f'--user-agent={self.user_agent}') 90 | if self.disable_images: 91 | config.add_argument(f'--blink-settings=imagesEnabled=false') 92 | self.driver = await uc.start(config) 93 | self.main_tab = await self.driver.get("draft:,") 94 | if self.proxy: 95 | self.main_tab.add_handler(uc.cdp.fetch.RequestPaused, self.req_paused) 96 | self.main_tab.add_handler( 97 | uc.cdp.fetch.AuthRequired, self.auth_challenge_handler 98 | ) 99 | 100 | await self.main_tab.send(uc.cdp.fetch.enable(handle_auth_requests=True)) 101 | page = await self.driver.get("https://www.whatismyip.com/") 102 | await asyncio.sleep(5) 103 | 104 | async def auth_challenge_handler(self, event: uc.cdp.fetch.AuthRequired): 105 | # Split the credentials 106 | # Respond to the authentication challenge 107 | asyncio.create_task( 108 | self.main_tab.send( 109 | uc.cdp.fetch.continue_with_auth( 110 | request_id=event.request_id, 111 | auth_challenge_response=uc.cdp.fetch.AuthChallengeResponse( 112 | response="ProvideCredentials", 113 | username=self.proxy['username'], 114 | password=self.proxy['password'], 115 | ), 116 | ) 117 | ) 118 | ) 119 | 120 | async def req_paused(self, event: uc.cdp.fetch.RequestPaused): 121 | asyncio.create_task( 122 | self.main_tab.send( 123 | uc.cdp.fetch.continue_request(request_id=event.request_id) 124 | ) 125 | ) 126 | 127 | async def enter_code(self, code): 128 | try: 129 | code_el = await self.main_tab.select("input[data-testid=ocfEnterTextTextInput]") 130 | await self.main_tab.sleep(15) 131 | if not code: 132 | return False 133 | await code_el.send_keys(code) 134 | await self.main_tab.sleep(2) 135 | try: 136 | next = await self.main_tab.find("Suivant", best_match=True) 137 | except Exception as e: 138 | next = await self.main_tab.find("Next", best_match=True) 139 | except Exception as err: 140 | next = await self.main_tab.find("Se Connecter", best_match=True) 141 | except Exception as eerr: 142 | next = await self.main_tab.find("Login", best_match=True) 143 | 144 | await next.click() 145 | await self.main_tab.sleep(2) 146 | return True 147 | 148 | except Exception as e: 149 | print(f"couldn't enter code : {e}") 150 | return False 151 | 152 | async def enter_username(self, username): 153 | try: 154 | username_el = await self.main_tab.select("input[data-testid=ocfEnterTextTextInput]") 155 | await username_el.send_keys(username) 156 | await self.main_tab.sleep(1) 157 | try: 158 | next = await self.main_tab.find("Suivant", best_match=True) 159 | except Exception as e: 160 | next = await self.main_tab.find("Next", best_match=True) 161 | except Exception as err: 162 | next = await self.main_tab.find("Se Connecter", best_match=True) 163 | except Exception as eerr: 164 | next = await self.main_tab.find("Login", best_match=True) 165 | 166 | await next.click() 167 | await self.main_tab.sleep(1) 168 | except Exception as e: 169 | print(f"Error entering username : {e}") 170 | pass 171 | 172 | async def normal_login(self, account): 173 | # enter username 174 | email_el = await self.main_tab.select("input[autocomplete=username]") 175 | await email_el.send_keys(account['email_address']) 176 | await self.main_tab.sleep(1) 177 | logging.info('Entered email') 178 | 179 | # click next 180 | try: 181 | next = await self.main_tab.find("Suivant", best_match=True) 182 | except: 183 | next = await self.main_tab.find("Next", best_match=True) 184 | await next.click() 185 | await self.main_tab.sleep(1) 186 | 187 | # check if username is required and enter 188 | try: 189 | await self.main_tab.sleep(1) 190 | await self.main_tab.find( 191 | "Entrez votre adresse email ou votre nom d'utilisateur.") # Enter your phone number or username 192 | await self.enter_username(account['username']) 193 | logging.info('entered username') 194 | except: 195 | pass 196 | 197 | try: 198 | await self.main_tab.sleep(1) 199 | await self.main_tab.find( 200 | "Enter your phone number or username") # Enter your phone number or username 201 | await self.enter_username(account['username']) 202 | logging.info('Entered username') 203 | except: 204 | pass 205 | 206 | # enter password 207 | password_el = await self.main_tab.select("input[autocomplete=current-password]") 208 | await password_el.send_keys(account['password']) 209 | await self.main_tab.sleep(2) 210 | logging.info('Entered password') 211 | 212 | # click login 213 | try: 214 | next = await self.main_tab.find("Se Connecter", best_match=True) 215 | except Exception as e: 216 | next = await self.main_tab.find("Login", best_match=True) 217 | except Exception as err: 218 | pass 219 | 220 | await self.main_tab.sleep(1) 221 | await next.click() 222 | 223 | if await self._is_logged_in(): 224 | logging.info("Logged in successfully.") 225 | self.cookies = await self.driver.cookies.get_all(requests_cookie_format=True) 226 | if self.cookies_path: 227 | await self.driver.cookies.save(f"{self.cookies_path}/{account['username']}_cookies.dat") 228 | return self.main_tab, True, "", self.cookies 229 | 230 | # wait for code to be sent if required 231 | if (await check_element_if_exists_by_text(self.main_tab, "Code de confirmation") or 232 | await check_element_if_exists_by_text(self.main_tab, "Confirmation code")): 233 | # code = input("Enter the code you received in your email : ") 234 | await self.main_tab.sleep(10) 235 | code = await self.code_callback(account.get('email_address'), account.get('email_password')) 236 | code_status = await self.enter_code(code) 237 | if not code_status: 238 | return self.main_tab, False, "code_not_found", None 239 | logging.info('Entered Confirmation code') 240 | 241 | 242 | if (await check_element_if_exists_by_text(self.main_tab, 243 | "Please verify your email address.", timeout=20) or 244 | await check_element_if_exists_by_text(self.main_tab, 245 | 'Your account has been locked.', timeout=20)): 246 | return self.main_tab, False, "Account locked.", None 247 | 248 | # check if login is successful 249 | if await self._is_logged_in(): 250 | logging.info("Logged in successfully.") 251 | self.cookies = await self.driver.cookies.get_all(requests_cookie_format=True) 252 | if self.cookies_path: 253 | await self.driver.cookies.save(f"{self.cookies_path}/{account['username']}_cookies.dat") 254 | return self.main_tab, True, "", self.cookies 255 | else: 256 | return None, False, "Locked", None 257 | 258 | async def login(self): 259 | # await self.init_nodriver() 260 | if self.logged_in: 261 | return self.main_tab, True, "", self.cookies 262 | account = { 263 | "email_address": get_email(self.env_path), 264 | "password": get_password(self.env_path), 265 | "username": get_username(self.env_path), 266 | "email_password": get_email_password(self.env_path) 267 | } 268 | if not account.get('email_address') or not account.get('password') or not account.get('username'): 269 | logging.info(f"Provide twitter account credentials to login.") 270 | return self.main_tab, False, "Account_creds_required", None 271 | self.main_tab = await self.driver.get("https://x.com/login") 272 | await self.main_tab.sleep(2) 273 | if os.path.exists(f"{self.cookies_path}/{account['username']}_cookies.dat"): 274 | logging.info(f"Loading cookies from path {self.cookies_path} ...") 275 | await self.driver.cookies.load(f"{self.cookies_path}/{account['username']}_cookies.dat") 276 | self.main_tab = await self.driver.get("https://x.com/login") 277 | await self.main_tab.sleep(3) 278 | elif self.cookies: 279 | logging.info(f"Loading cookies from file ...") 280 | await self.load_cookies(self.cookies) 281 | self.main_tab = await self.driver.get("https://x.com/login") 282 | await self.main_tab.sleep(3) 283 | 284 | if await self._is_logged_in(): 285 | logging.info(f"Logged in successfully to {account.get('username')}") 286 | return self.main_tab, True, "", self.cookies 287 | 288 | if await check_element_if_exists_by_css(self.main_tab, "input[autocomplete=username]"): 289 | logging.info(f"Login in from scratch to {account.get('username')}") 290 | return await self.normal_login(account) 291 | else: 292 | logging.info("Something unexpected happened. Aborting.") 293 | return self.main_tab, False, "Other", None 294 | 295 | async def _is_logged_in(self): 296 | try: 297 | home = await self.main_tab.select("a[href='/home']") 298 | self.logged_in = True 299 | return True 300 | except Exception as e: 301 | return False 302 | 303 | async def load_cookies(self, cookie_dicts): 304 | for cdict in cookie_dicts: 305 | # Recreate the cookie using requests' create_cookie function 306 | c = create_cookie( 307 | name=cdict["name"], 308 | value=cdict["value"], 309 | domain=cdict["domain"], 310 | path=cdict["path"], 311 | expires=cdict["expires"], 312 | secure=cdict["secure"] 313 | ) 314 | self.driver.cookies.set_cookie(c) 315 | 316 | async def get_data(self, post_soup): 317 | # username 318 | username_tag = post_soup.find('span') 319 | username = username_tag.get_text(strip=True) if username_tag else "" 320 | 321 | # handle: a span with '@' 322 | handle_tag = post_soup.find('span', text=lambda t: t and '@' in t) 323 | handle = handle_tag.get_text(strip=True) if handle_tag else "" 324 | 325 | # postdate: