├── .env
├── .gitignore
├── .idea
├── Scweet.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── LICENSE.txt
├── README.md
├── Scweet.egg-info
├── PKG-INFO
├── SOURCES.txt
├── dependency_links.txt
├── requires.txt
└── top_level.txt
├── Scweet
├── __init__.py
├── __version__.py
├── const.py
├── debug.log
├── mailtm.py
├── scweet.py
├── scweet_v1.8.py
├── user.py
└── utils.py
├── example.py
├── images
├── 1_1.jpg
├── 2_1.jpg
├── 3_1.jpg
├── 4_1.jpg
├── 5_1.jpg
├── 6_1.jpg
├── 7_1.jpg
└── 8_1.jpg
├── requirements.txt
├── setup.cfg
└── setup.py
/.env:
--------------------------------------------------------------------------------
1 | EMAIL=''
2 | PASSWORD=''
3 | EMAIL_PASSWORD=''
4 | USERNAME=''
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore packaging metadata
2 | *.egg-info/
3 | dist/
4 | build/
5 |
6 | # Ignore IDE settings
7 | .idea/
8 |
9 | # Ignore images if not used in README
10 | images/
11 |
12 | # Ignore specific files inside Scweet/
13 | Scweet/debug.log
14 | Scweet/scweet_v1.8.py
15 | Scweet/user.py
16 |
--------------------------------------------------------------------------------
/.idea/Scweet.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | 1628461210353
66 |
67 |
68 | 1628461210353
69 |
70 |
71 | 1641220203456
72 |
73 |
74 |
75 | 1641220203456
76 |
77 |
78 | 1641221379276
79 |
80 |
81 |
82 | 1641221379276
83 |
84 |
85 | 1641221862491
86 |
87 |
88 |
89 | 1641221862491
90 |
91 |
92 |
93 |
94 |
95 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 | Copyright (c) 2020 Altimis Teams
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 | The above copyright notice and this permission notice shall be included in all
10 | copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🐦 Scweet: A Simple and Unlimited Twitter Scraper in Python
2 |
3 | [](https://apify.com/altimis/scweet)
4 | [](https://pepy.tech/projects/scweet)
5 | [](https://pypi.org/project/scweet/)
6 | [](https://github.com/Altimis/scweet/blob/main/LICENSE)
7 |
8 | > **Note:** Scweet is **not affiliated with Twitter/X**. Use responsibly and lawfully.
9 |
10 | ---
11 |
12 | ## 🚀 Scweet on Apify: Cloud-Powered Scraping
13 |
14 | For heavy-duty scraping, we recommend using [**Scweet on Apify**](https://apify.com/altimis/scweet?fpr=a40q9&fp_sid=jeb97) – a cloud-based solution that offers:
15 | - **Zero setup:** No need to install or maintain infrastructure.
16 | - **Incredible Speed:** Up to **1000 tweets per minute**.
17 | - **High Reliability:** Managed and isolated runs for consistent performance.
18 | - **Free Usage Tier:** Get started for free with a generous quota—perfect for experiments, small projects, or learning how Scweet works. Once you exceed the free quota, you'll pay only **$0.30 per 1,000 tweets**.
19 |
20 | [](https://apify.com/altimis/scweet?fpr=a40q9&fp_sid=jeb97)
21 |
22 | ---
23 |
24 | ## 🚀 Recent X Platform Changes & Scweet v3 Update
25 |
26 | Scweet has recently encountered challenges due to major changes on **X (formerly Twitter)**. In response, we’re excited to announce the new **Scweet v3** release!
27 |
28 | ### ✨ What’s New in v3:
29 | - ✅ Fully **asynchronous architecture** for faster, smoother scraping
30 | - 🧠 **No more manual Chromedriver setup** – Scweet handles Chromium internally with **[Nodriver](https://github.com/ultrafunkamsterdam/nodriver)**
31 | - 🚀 Enhanced for **personal and research-level scraping**
32 | - 🧑🤝🧑 **Follower & following scraping is back!** (see below 👇)
33 |
34 | ---
35 |
36 | ## 📌 What is Scweet?
37 |
38 | Scweet is a Python-based scraping tool designed to fetch tweets and user data **without relying on traditional Twitter APIs**, which have become increasingly restricted.
39 |
40 | With Scweet, you can:
41 | - Scrape tweets by keywords, hashtags, mentions, accounts, or timeframes
42 | - Get detailed user profile information
43 | - ✅ Retrieve followers, following, and verified followers!
44 |
45 | ---
46 |
47 | ## 🔧 Key Features
48 |
49 | ### 🐤 `scrape()` – Tweet Scraper
50 |
51 | Scrape tweets between two dates using keywords, hashtags, mentions, or specific accounts.
52 |
53 | **✅ Available arguments include:**
54 | ```python
55 | - since, until
56 | - words
57 | - from_account, to_account, mention_account
58 | - hashtag, lang
59 | - limit, display_type, resume
60 | - filter_replies, proximity, geocode
61 | - minlikes, minretweets, minreplies
62 | - save_dir, custom_csv_name
63 | ```
64 |
65 | ---
66 |
67 | ### 👤 `get_user_information()` – User Info Scraper
68 |
69 | Fetch profile details for a list of handles. Returns a dictionary with:
70 | - `username`, `verified_followers`
71 | - `following`, `location`, `website`, `join_date`, `description`
72 |
73 | **🧩 Arguments:**
74 | ```python
75 | - handles # List of Twitter/X handles
76 | - login (bool) # Required for complete data
77 | ```
78 |
79 | ---
80 |
81 | ### 🧑🤝🧑 `get_followers()`, `get_following()`, `get_verified_followers()` – NEW! 🎉
82 |
83 | Scweet now supports scraping followers and followings again!
84 |
85 | > ⚠️ **Important Note:** This functionality relies on browser rendering and may trigger rate-limiting or account lockouts. Use with caution and always stay logged in during scraping.
86 |
87 | **🧩 Example Usage:**
88 | ```python
89 | handle = "x_born_to_die_x"
90 |
91 | # Get followers
92 | followers = scweet.get_followers(handle=handle, login=True, stay_logged_in=True, sleep=1)
93 |
94 | # Get following
95 | following = scweet.get_following(handle=handle, login=True, stay_logged_in=True, sleep=1)
96 |
97 | # Get only verified followers
98 | verified = scweet.get_verified_followers(handle=handle, login=True, stay_logged_in=True, sleep=1)
99 | ```
100 |
101 | ---
102 |
103 | ## 🛠️ Class Initialization & Configuration
104 |
105 | Customize Scweet’s behavior during setup:
106 |
107 | ```python
108 | scweet = Scweet(
109 | proxy=None, # Dict or None
110 | cookies=None, # Nodriver-based cookie handling
111 | cookies_path='cookies', # Folder for saving/loading cookies
112 | user_agent=None, # Optional custom user agent
113 | disable_images=True, # Speeds up scraping
114 | env_path='.env', # Path to your .env file
115 | n_splits=-1, # Date range splitting
116 | concurrency=5, # Number of concurrent tabs
117 | headless=True, # Headless scraping
118 | scroll_ratio=100 # Adjust for scroll depth/speed
119 | )
120 | ```
121 |
122 | ---
123 |
124 | ## 🔐 Authentication
125 |
126 | Scweet requires login for tweets, user info, and followers/following.
127 |
128 | Set up your `.env` file like this:
129 |
130 | ```env
131 | EMAIL=your_email@example.com
132 | EMAIL_PASSWORD=your_email_password
133 | USERNAME=your_username
134 | PASSWORD=your_password
135 | ```
136 |
137 | Need a temp email? Use built-in MailTM integration:
138 |
139 | ```python
140 | from Scweet.utils import create_mailtm_email
141 | email, password = create_mailtm_email()
142 | ```
143 |
144 | ---
145 |
146 | ## 🔧 Installation
147 |
148 | ```bash
149 | pip install Scweet
150 | ```
151 | Requires **Python 3.7+** and a Chromium-based browser.
152 |
153 | ---
154 |
155 | ## 💡 Example Usage
156 |
157 | ### 🐍 Python Script
158 |
159 | ```python
160 | from Scweet.scweet import Scweet
161 | from Scweet.utils import create_mailtm_email
162 |
163 | scweet = Scweet(proxy=None, cookies=None, cookies_path='cookies',
164 | user_agent=None, disable_images=True, env_path='.env',
165 | n_splits=-1, concurrency=5, headless=False, scroll_ratio=100)
166 |
167 | # Get followers (⚠️ requires login)
168 | followers = scweet.get_followers(handle="x_born_to_die_x", login=True, stay_logged_in=True, sleep=1)
169 | print(followers)
170 |
171 | # Get user profile data
172 | infos = scweet.get_user_information(handles=["x_born_to_die_x", "Nabila_Gl"], login=True)
173 | print(infos)
174 |
175 | # Scrape tweets
176 | results = scweet.scrape(
177 | since="2022-10-01",
178 | until="2022-10-06",
179 | words=["bitcoin", "ethereum"],
180 | lang="en",
181 | limit=20,
182 | minlikes=10,
183 | minretweets=10,
184 | save_dir='outputs',
185 | custom_csv_name='crypto.csv'
186 | )
187 | print(len(results))
188 | ```
189 |
190 | ---
191 |
192 | ## 📝 Example Output
193 |
194 | | tweetId | UserScreenName | Text | Likes | Retweets | Timestamp |
195 | |--------|----------------|------|-------|----------|-----------|
196 | | ... | @elonmusk | ... | 18787 | 1000 | 2022-10-05T17:44:46.000Z |
197 |
198 | > Full CSV output includes user info, tweet text, stats, embedded replies, media, and more.
199 |
200 | ---
201 |
202 | ## ☁️ Scweet on Apify (Cloud)
203 |
204 | Need powerful, scalable, high-volume scraping?
205 | Try [**Scweet on Apify**](https://apify.com/altimis/scweet):
206 |
207 | - 🚀 Up to **1000 tweets/minute**
208 | - 📦 Export to datasets
209 | - 🔒 Secure, isolated browser instances
210 | - 🔁 Ideal for automation & research projects
211 |
212 | ---
213 |
214 | ## 🙏 Responsible Use
215 |
216 | We care deeply about ethical scraping.
217 |
218 | > **Please:** Use Scweet for research, education, and lawful purposes only. Respect platform terms and user privacy.
219 |
220 | ---
221 |
222 | ## 📎 Resources
223 |
224 | - 📄 [Example Script](https://github.com/Altimis/Scweet/blob/master/example.py)
225 | - 🐞 [Issues / Bugs](https://github.com/Altimis/Scweet/issues)
226 | - 🌐 [Scweet on Apify](https://apify.com/altimis/scweet)
227 |
228 | ---
229 |
230 | ## ⭐ Star & Contribute
231 |
232 | If you find Scweet useful, consider **starring** the repo ⭐
233 | We welcome **PRs**, bug reports, and feature suggestions!
234 |
235 | ---
236 |
237 | MIT License • © 2020–2025 Altimis
--------------------------------------------------------------------------------
/Scweet.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
1 | Metadata-Version: 2.4
2 | Name: Scweet
3 | Version: 3.1
4 | Summary: Tool for scraping Tweets
5 | Home-page: https://github.com/Altimis/Scweet
6 | Download-URL: https://github.com/Altimis/Scweet/archive/v3.0.tar.gz
7 | Author: Yassine AIT JEDDI
8 | Author-email: aitjeddiyassine@gmail.com
9 | License: MIT
10 | Keywords: twitter,scraper,python,crawl,following,followers,twitter-scraper,tweets
11 | Classifier: Development Status :: 4 - Beta
12 | Classifier: Intended Audience :: Developers
13 | Classifier: Topic :: Software Development :: Build Tools
14 | Classifier: License :: OSI Approved :: MIT License
15 | Classifier: Programming Language :: Python :: 3.7
16 | Classifier: Programming Language :: Python :: 3.8
17 | Classifier: Programming Language :: Python :: 3.9
18 | Classifier: Programming Language :: Python :: 3.10
19 | Classifier: Programming Language :: Python :: 3.11
20 | Description-Content-Type: text/markdown
21 | License-File: LICENSE.txt
22 | Requires-Dist: certifi
23 | Requires-Dist: python-dotenv
24 | Requires-Dist: urllib3
25 | Requires-Dist: PyVirtualDisplay
26 | Requires-Dist: beautifulsoup4==4.12.3
27 | Requires-Dist: nodriver==0.38.post1
28 | Requires-Dist: requests
29 | Dynamic: author
30 | Dynamic: author-email
31 | Dynamic: classifier
32 | Dynamic: description
33 | Dynamic: description-content-type
34 | Dynamic: download-url
35 | Dynamic: home-page
36 | Dynamic: keywords
37 | Dynamic: license
38 | Dynamic: license-file
39 | Dynamic: requires-dist
40 | Dynamic: summary
41 |
42 |
43 |
44 | # 🐦 Scweet: A Simple and Unlimited Twitter Scraper in Python
45 |
46 | [](https://apify.com/altimis/scweet)
47 | [](https://pepy.tech/projects/scweet)
48 | [](https://pypi.org/project/scweet/)
49 | [](https://github.com/Altimis/scweet/blob/main/LICENSE)
50 |
51 | > **Note:** Scweet is **not affiliated with Twitter/X**. Use responsibly and lawfully.
52 |
53 | ---
54 |
55 | ## 🚀 Recent X Platform Changes & Scweet v3.0 Update
56 |
57 | Scweet has recently encountered challenges due to major changes on **X (formerly Twitter)**. In response, we’re excited to announce the new **Scweet v3.0** release!
58 |
59 | ### ✨ What’s New in v3.0:
60 | - ✅ Fully **asynchronous architecture** for faster, smoother scraping
61 | - 🧠 **No more manual Chromedriver setup** – Scweet handles Chromium internally
62 | - 🚀 Enhanced for **personal and research-level scraping**
63 | - ⚠️ **Follower/following scraping temporarily disabled** (to return in future updates)
64 |
65 | > 🔧 For heavy-duty scraping, we recommend using **[Scweet on Apify](https://apify.com/altimis/scweet)** – a cloud-based solution offering higher throughput and stability (up to **1000 tweets/minute**), no infrastructure setup needed.
66 |
67 | ⚠️ **Responsible Use Reminder**
68 | Whether running locally or in the cloud, **always scrape tweets ethically, lawfully, and respectfully**.
69 |
70 | ---
71 |
72 | ## 📌 What is Scweet?
73 |
74 | Scweet is a Python-based scraping tool designed to fetch tweets and user data **without relying on traditional Twitter APIs**, which have become increasingly restricted.
75 |
76 | With Scweet, you can:
77 | - Scrape tweets by keywords, hashtags, mentions, accounts, or timeframes
78 | - Get detailed user profile information
79 | - (Coming soon) Retrieve followers/following lists again!
80 |
81 | ---
82 |
83 | ## 🔧 Key Features
84 |
85 | ### 🐤 `scrape()` – Tweet Scraper
86 |
87 | Scrape tweets between two dates using keywords, hashtags, mentions, or specific accounts.
88 |
89 | **✅ Available arguments include:**
90 | ```python
91 | - since, until # Date range (format: YYYY-MM-DD)
92 | - words # Keywords (string or list, use "//" separator for strings)
93 | - from_account # Tweets from a user
94 | - to_account # Tweets to a user
95 | - mention_account # Tweets mentioning a user
96 | - hashtag # Search by hashtag
97 | - lang # Language code (e.g. "en")
98 | - limit # Max number of tweets
99 | - display_type # "Top" or "Latest"
100 | - resume # Resume from previous CSV
101 | - filter_replies # Include/exclude replies
102 | - proximity # Local tweet filtering
103 | - geocode # Geolocation filtering
104 | - minlikes # Tweets with minimum likes count
105 | - minretweets # Tweets with minimum retweets count
106 | - minreplies # Tweets with minimum replies count
107 | - save_dir # Output directory
108 | - custom_csv_name # Output csv name
109 | ```
110 | ---
111 |
112 | ### 👤 `get_user_information()` – User Info Scraper
113 |
114 | Fetch profile details for a list of handles. Returns a dictionary with:
115 | - `username` (display name)
116 | - `following` (number of accounts they follow)
117 | - `verified_followers` (number of verified followers)
118 | - `location`, `website`, `join_date`, `description`
119 |
120 | **🧩 Arguments:**
121 | ```python
122 | - handles # List of Twitter/X handles
123 | - login (bool) # Set True to login and access full data
124 | ```
125 |
126 | ---
127 |
128 | ### 🔒 `get_users_followers()` & `get_users_following()`
129 | ⚠️ **Currently Disabled due to platform changes**
130 | These will be re-enabled in future versions as we work around new limitations.
131 |
132 | ---
133 |
134 | ## 🛠️ Class Initialization & Configuration
135 |
136 | You can customize Scweet’s behavior during initialization:
137 |
138 | ```python
139 | scweet = Scweet(
140 | proxy=None, # Dict or None {host, post, username, pasword}
141 | cookies=None, # Use saved cookies file
142 | cookies_path='cookies', # Folder path where cookies will be saved/loaded in future usage
143 | user_agent=None, # Custom user agent string
144 | env_path='.env', # Environment variables
145 | n_splits=-1, # Split date interval (-1 for daily)
146 | concurrency=5, # Concurrent tabs
147 | headless=True, # Run headlessly
148 | scroll_ratio=100, # Adjust scroll behavior
149 | code_callback=None # Optional custom login code handler. Scweet only handles MailTM emails to get the code if X asks for it.
150 | )
151 | ```
152 |
153 | ---
154 |
155 | ## 🔐 Authentication
156 |
157 | Scweet requires login to fetch tweets. Set up your `.env` file like this:
158 |
159 | ```env
160 | EMAIL=your_email@example.com
161 | EMAIL_PASSWORD=your_email_password
162 | USERNAME=your_username
163 | PASSWORD=your_password
164 | ```
165 |
166 | Use the built-in helper to create disposable login emails:
167 |
168 | ```python
169 | from Scweet.utils import create_mailtm_email
170 | ```
171 |
172 | For custom email providers, pass your own `code_callback`.
173 |
174 | ---
175 |
176 | ## 🔧 Installation
177 |
178 | ```bash
179 | pip install Scweet==3.0
180 | ```
181 | Make sure your environment is set up with Python 3.7+, chrome browser and pip is available.
182 |
183 | ## 💡 Example Usage
184 |
185 | ### 🐍 Python Script
186 |
187 | ```python
188 | from Scweet.scweet import Scweet
189 | from Scweet.user import get_user_information
190 |
191 | scweet = Scweet(proxy=None, cookies=None, cookies_path='cookies',
192 | user_agent=None, disable_images=True, env_path='.env',
193 | n_splits=-1, concurrency=5, headless=True, scroll_ratio=100)
194 |
195 | # Get user profile info
196 | handles = ['nagouzil', 'yassineaitjeddi', 'TahaAlamIdrissi']
197 | infos = scweet.get_user_information(handles=handles, login=True)
198 | print(infos)
199 |
200 | # Scrape tweets with keywords
201 | results = scweet.scrape(
202 | since="2022-10-01",
203 | until="2022-10-06",
204 | words=['bitcoin', 'ethereum'],
205 | lang="en",
206 | limit=20,
207 | display_type="Top",
208 | resume=False,
209 | filter_replies=False,
210 | minlikes=10,
211 | minretweets=10,
212 | save_dir='outputs',
213 | custom_csv_name='crypto.csv'
214 | )
215 | print(len(results))
216 | scweet.close()
217 | ```
218 |
219 | ### 📝 Example Output
220 |
221 | When you scrape tweets using the scrape() function, the results will be written to a CSV file, with each row representing a tweet. Here’s an example of what the output might look like:
222 |
223 |
224 | | tweetId | UserScreenName | UserName | Timestamp | Text | Embedded_text | Emojis | Comments | Likes | Retweets | Image link | Tweet URL |
225 | |--------------------|----------------|-----------|--------------------------|-------------------------------------------------------------------------------------------|--------------------------|--------|----------|-------|----------|--------------------------------------------------------------------------------------------------|--------------------------------------------------|
226 | | 1577716440299442187 | @elonmusk | Elon Musk | 2022-10-05T17:44:46.000Z | 10.69.3 will actually be a major upgrade. We’re keeping .69 just because haha. | Replying to@WholeMarsBlog | | 1256 | 18787 | 1000 | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577716440299442187 |
227 | | 1577737664689848326 | @elonmusk | Elon Musk | 2022-10-05T19:09:06.000Z | Twitter is an accelerant to fulfilling the original http://X.com vision | Replying to@TEDchris | | 967 | 10967 | 931 | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577737664689848326 |
228 | | 1577747565533069312 | @elonmusk | Elon Musk | 2022-10-05T19:48:27.000Z | That wouldn’t be hard to do | Replying to@ashleevance | | 1326 | 31734 | 1011 | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577747565533069312 |
229 | | 1577732106784051214 | @elonmusk | Elon Musk | 2022-10-05T18:47:01.000Z | *"I do not think it is simple at all, but I have yet to hear any realistic path to peace.* | | | – | – | – | – | /elonmusk/status/1577732106784051214 |
230 |
231 |
232 | **Columns description**:
233 |
234 | - **tweetId**: The unique identifier for the tweet.
235 | - **UserScreenName**: The Twitter/X handle of the user who posted the tweet.
236 | - **UserName**: The display name of the user.
237 | - **Timestamp**: The date and time the tweet was posted.
238 | - **Text**: The content of the tweet.
239 | - **Embedded_text**: If the tweet is a reply, this will show the user being replied to.
240 | - **Emojis**: Any emojis used in the tweet.
241 | - **Comments**: Number of replies to the tweet.
242 | - **Likes**: Number of likes the tweet received.
243 | - **Retweets**: Number of retweets the tweet received.
244 | - **Image link**: A link to the image(s) attached to the tweet, if any.
245 | - **Tweet URL**: Direct URL to the tweet.
246 |
247 | ---
248 |
249 | ## ☁️ Scweet on Apify (Cloud)
250 |
251 | Need powerful, scalable, high-volume scraping?
252 | Try [**Scweet on Apify**](https://apify.com/altimis/scweet) – a no-setup cloud solution:
253 |
254 | - 🚀 Up to **1000 tweets/minute**
255 | - 📦 Exports to datasets or files
256 | - 🔒 Secure, isolated runs
257 | - 🔁 Ideal for automation, long-term projects
258 |
259 | ---
260 |
261 | ## 🙏 Responsible Use
262 |
263 | We care deeply about ethical scraping.
264 |
265 | > **Please:** Use Scweet for research, archiving, and lawful purposes only.
266 |
267 | ---
268 |
269 | ## 📎 Resources
270 |
271 | - 📄 [Example Script](https://github.com/Altimis/Scweet/blob/master/example.py)
272 | - 🐞 [Issues / Bugs](https://github.com/Altimis/Scweet/issues)
273 | - 🌐 [Scweet on Apify](https://apify.com/altimis/scweet)
274 |
275 | ---
276 |
277 | ## ⭐ Star & Contribute
278 |
279 | If you find Scweet useful, consider **starring** the repo ⭐
280 | We welcome **PRs**, bug reports, and ideas for new features!
281 |
282 | ---
283 |
284 | MIT License • © 2020–2025 Altimis
285 |
--------------------------------------------------------------------------------
/Scweet.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | LICENSE.txt
2 | README.md
3 | setup.cfg
4 | setup.py
5 | Scweet/__init__.py
6 | Scweet/__version__.py
7 | Scweet/const.py
8 | Scweet/mailtm.py
9 | Scweet/scweet.py
10 | Scweet/scweet_v1.8.py
11 | Scweet/user.py
12 | Scweet/utils.py
13 | Scweet.egg-info/PKG-INFO
14 | Scweet.egg-info/SOURCES.txt
15 | Scweet.egg-info/dependency_links.txt
16 | Scweet.egg-info/requires.txt
17 | Scweet.egg-info/top_level.txt
--------------------------------------------------------------------------------
/Scweet.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Scweet.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | certifi
2 | python-dotenv
3 | urllib3
4 | PyVirtualDisplay
5 | beautifulsoup4==4.12.3
6 | nodriver==0.38.post1
7 | requests
8 |
--------------------------------------------------------------------------------
/Scweet.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | Scweet
2 |
--------------------------------------------------------------------------------
/Scweet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/Scweet/__init__.py
--------------------------------------------------------------------------------
/Scweet/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (3, 1)
2 |
3 | __version__ = '.'.join(map(str, VERSION))
--------------------------------------------------------------------------------
/Scweet/const.py:
--------------------------------------------------------------------------------
1 | import dotenv
2 | import os
3 | from pathlib import Path
4 |
5 | current_dir = Path(__file__).parent.absolute()
6 |
7 |
8 | # env_file = os.getenv("SCWEET_ENV_FILE", current_dir.parent.joinpath(".env"))
9 | # dotenv.load_dotenv(env_file, verbose=True)
10 |
11 |
12 | def load_env_variable(key, default_value=None, none_allowed=False):
13 | v = os.getenv(key, default=default_value)
14 | if v is None and not none_allowed:
15 | raise RuntimeError(f"{key} returned {v} but this is not allowed!")
16 | return v
17 |
18 |
19 | def get_email(env):
20 | dotenv.load_dotenv(env, verbose=True, override=True)
21 | return load_env_variable("EMAIL", none_allowed=False)
22 |
23 |
24 | def get_email_password(env):
25 | dotenv.load_dotenv(env, verbose=True, override=True)
26 | return load_env_variable("EMAIL_PASSWORD", none_allowed=True)
27 |
28 |
29 | def get_password(env):
30 | dotenv.load_dotenv(env, verbose=True, override=True)
31 | return load_env_variable("PASSWORD", none_allowed=False)
32 |
33 |
34 | def get_username(env):
35 | dotenv.load_dotenv(env, verbose=True, override=True)
36 | return load_env_variable("USERNAME", none_allowed=False)
37 |
--------------------------------------------------------------------------------
/Scweet/debug.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/Scweet/debug.log
--------------------------------------------------------------------------------
/Scweet/mailtm.py:
--------------------------------------------------------------------------------
1 | """
2 | Scweet - Twitter Scraping Tool
3 | Author: Yassine Ait Jeddi (@altimis)
4 | License: MIT
5 | Repository: https://github.com/Altimis/scweet
6 | """
7 |
8 | import requests
9 |
10 |
11 | class Domain:
12 | def __init__(self, domainJson):
13 | self.domain = domainJson["domain"]
14 | self.id = domainJson["id"]
15 |
16 |
17 | class Mail:
18 | def __init__(self, emailJson, token):
19 | self.fromAddress = emailJson["from"]["address"]
20 | self.toAddress = []
21 | for receiver in emailJson["to"]:
22 | self.toAddress.append(receiver["address"])
23 | self.session = requests.Session()
24 | self.token = token
25 | self.fromName = emailJson["from"]["name"]
26 | self.subject = emailJson["subject"]
27 | self.size = emailJson["size"]
28 | self.id = emailJson["id"]
29 | self.text = self.read()["text"]
30 |
31 | def read(self):
32 | r = self.session.get(
33 | "https://api.mail.tm/messages/" + self.id,
34 | headers={
35 | "Authorization": "Bearer " + self.token,
36 | "Content-Type": "application/json",
37 | },
38 | )
39 |
40 | return r.json()
41 |
42 | def delete(self):
43 | r = self.session.delete(
44 | "https://api.mail.tm/messages/" + self.id,
45 | headers={
46 | "Authorization": "Bearer " + self.token,
47 | "Content-Type": "application/json",
48 | },
49 | )
50 |
51 | if r.status_code == 204:
52 | return 0
53 | if r.status_code == 404:
54 | return 1
55 |
56 |
57 | class MailTMClient:
58 | def getAvailableDomains(self):
59 | r = self.session.get("https://api.mail.tm/domains")
60 | domains = []
61 | for domainJson in r.json()["hydra:member"]:
62 | # Only fetch public & active domains for now.
63 | if domainJson["isActive"] == True and domainJson["isPrivate"] == False:
64 | domains.append(Domain(domainJson))
65 | return domains
66 |
67 | def register(self, address, password):
68 | r = self.session.post(
69 | "https://api.mail.tm/accounts",
70 | json={
71 | "address": address,
72 | "password": password,
73 | },
74 | )
75 |
76 | if r.status_code == 201 or r.status_code == 200:
77 | (responseCode, response) = self.login(address, password)
78 | if responseCode == 0:
79 | return (0, response)
80 | elif r.status_code == 400:
81 | return (1, r.json()["detail"])
82 | elif r.status_code == 422:
83 | return (2, r.json()["detail"])
84 | print(f'response {r.status_code}')
85 | return -1, None
86 |
87 | def login(self, address, password):
88 | r = self.session.post(
89 | "https://api.mail.tm/token",
90 | json={
91 | "address": address,
92 | "password": password,
93 | },
94 | )
95 |
96 | if r.status_code == 200:
97 | return (0, r.json()["token"])
98 | if r.status_code == 401:
99 | return (1, r.json()["message"])
100 |
101 | def getInbox(self):
102 | r = self.session.get(
103 | "https://api.mail.tm/messages",
104 | headers={
105 | "Authorization": "Bearer " + self.token,
106 | "Content-Type": "application/json",
107 | },
108 | )
109 |
110 | inbox = []
111 | for emailJson in r.json()["hydra:member"]:
112 | inbox.append(Mail(emailJson, self.token))
113 | return inbox
114 |
115 | def __init__(self, token=None):
116 | self.session = requests.Session()
117 | self.token = token if token is not None else token
118 |
--------------------------------------------------------------------------------
/Scweet/scweet.py:
--------------------------------------------------------------------------------
1 | """
2 | Scweet - Twitter Scraping Tool
3 | Author: Yassine Ait Jeddi (@altimis)
4 | License: MIT
5 | Repository: https://github.com/Altimis/scweet
6 | """
7 |
8 | import asyncio
9 | import logging
10 | import argparse
11 | import csv
12 | import json
13 | import re
14 | import os
15 | import math
16 | from datetime import datetime, timedelta, date
17 | from typing import Awaitable, Callable, Optional, Union, List
18 |
19 | import platform
20 |
21 | import nodriver as uc
22 | from requests.cookies import create_cookie
23 | from bs4 import BeautifulSoup
24 | from pyvirtualdisplay import Display
25 |
26 | from .const import get_username, get_password, get_email, get_email_password
27 | from .utils import (check_element_if_exists_by_text, check_element_if_exists_by_css,
28 | get_code_from_email, extract_count_from_aria_label)
29 |
30 | logging.getLogger('urllib3').setLevel(logging.WARNING)
31 | logging.getLogger('seleniumwire').setLevel(logging.ERROR)
32 | logging.getLogger('selenium').setLevel(logging.ERROR)
33 | logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(message)s')
34 |
35 | # display = Display(visible=0, size=(1024, 768))
36 |
37 |
38 | def parse_followers(text):
39 | text = text.split(' ')[0]
40 | if 'K' in text:
41 | followers = int(float(text.replace('K', '')) * 1000)
42 | elif 'M' in text:
43 | followers = int(float(text.replace('M', '')) * 1000000)
44 | else:
45 | text = text.replace(',', '')
46 | followers = int(text)
47 | return followers
48 |
49 |
50 | class Scweet:
51 | main_tab: uc.Tab
52 | def __init__(self, proxy=None, cookies=None, cookies_path=None, user_agent=None,
53 | disable_images=False, env_path=None, n_splits=5, concurrency=5, headless=True, scroll_ratio=30,
54 | code_callback: Optional[Callable[[str, str], Awaitable[str]]] = None):
55 | self.driver = None
56 | self.proxy = proxy
57 | self.cookies = cookies
58 | self.user_agent = user_agent
59 | self.cookies_path = cookies_path
60 | self.disable_images = disable_images
61 | self.env_path = env_path
62 | self.n_splits = n_splits
63 | self.concurrency = concurrency
64 | self.headless = headless
65 | self.scroll_ratio = scroll_ratio
66 | self.logged_in = False
67 | self.suspended = False
68 | # If no custom code callback is provided, use the default get_code_from_email for mailtm
69 | self.code_callback = code_callback or get_code_from_email
70 | self.display = None
71 | if self.headless:
72 | if self.headless and platform.system() in ["Linux"]:
73 | logging.info("Starting pyvirtualdisplay for Linux headless mode")
74 | self.display = Display(visible=0, size=(1024, 768))
75 | self.display.start()
76 |
77 | async def init_nodriver(self):
78 | config = uc.Config()
79 | config.lang = "en-US"
80 | # Enable built-in headless mode for Windows and macOS
81 | if self.headless and platform.system() in ["Windows", "Darwin"]:
82 | logging.info("Using nodriver's headless mode for Windows/macOS")
83 | config.headless = True
84 |
85 | if self.proxy:
86 | logging.info(f"setting proxy : {self.proxy['host']}:{self.proxy['port']}")
87 | config.add_argument(f"--proxy-server={self.proxy['host']}:{self.proxy['port']}")
88 | if self.user_agent:
89 | config.add_argument(f'--user-agent={self.user_agent}')
90 | if self.disable_images:
91 | config.add_argument(f'--blink-settings=imagesEnabled=false')
92 | self.driver = await uc.start(config)
93 | self.main_tab = await self.driver.get("draft:,")
94 | if self.proxy:
95 | self.main_tab.add_handler(uc.cdp.fetch.RequestPaused, self.req_paused)
96 | self.main_tab.add_handler(
97 | uc.cdp.fetch.AuthRequired, self.auth_challenge_handler
98 | )
99 |
100 | await self.main_tab.send(uc.cdp.fetch.enable(handle_auth_requests=True))
101 | page = await self.driver.get("https://www.whatismyip.com/")
102 | await asyncio.sleep(5)
103 |
104 | async def auth_challenge_handler(self, event: uc.cdp.fetch.AuthRequired):
105 | # Split the credentials
106 | # Respond to the authentication challenge
107 | asyncio.create_task(
108 | self.main_tab.send(
109 | uc.cdp.fetch.continue_with_auth(
110 | request_id=event.request_id,
111 | auth_challenge_response=uc.cdp.fetch.AuthChallengeResponse(
112 | response="ProvideCredentials",
113 | username=self.proxy['username'],
114 | password=self.proxy['password'],
115 | ),
116 | )
117 | )
118 | )
119 |
120 | async def req_paused(self, event: uc.cdp.fetch.RequestPaused):
121 | asyncio.create_task(
122 | self.main_tab.send(
123 | uc.cdp.fetch.continue_request(request_id=event.request_id)
124 | )
125 | )
126 |
127 | async def enter_code(self, code):
128 | try:
129 | code_el = await self.main_tab.select("input[data-testid=ocfEnterTextTextInput]")
130 | await self.main_tab.sleep(15)
131 | if not code:
132 | return False
133 | await code_el.send_keys(code)
134 | await self.main_tab.sleep(2)
135 | try:
136 | next = await self.main_tab.find("Suivant", best_match=True)
137 | except Exception as e:
138 | next = await self.main_tab.find("Next", best_match=True)
139 | except Exception as err:
140 | next = await self.main_tab.find("Se Connecter", best_match=True)
141 | except Exception as eerr:
142 | next = await self.main_tab.find("Login", best_match=True)
143 |
144 | await next.click()
145 | await self.main_tab.sleep(2)
146 | return True
147 |
148 | except Exception as e:
149 | print(f"couldn't enter code : {e}")
150 | return False
151 |
152 | async def enter_username(self, username):
153 | try:
154 | username_el = await self.main_tab.select("input[data-testid=ocfEnterTextTextInput]")
155 | await username_el.send_keys(username)
156 | await self.main_tab.sleep(1)
157 | try:
158 | next = await self.main_tab.find("Suivant", best_match=True)
159 | except Exception as e:
160 | next = await self.main_tab.find("Next", best_match=True)
161 | except Exception as err:
162 | next = await self.main_tab.find("Se Connecter", best_match=True)
163 | except Exception as eerr:
164 | next = await self.main_tab.find("Login", best_match=True)
165 |
166 | await next.click()
167 | await self.main_tab.sleep(1)
168 | except Exception as e:
169 | print(f"Error entering username : {e}")
170 | pass
171 |
172 | async def normal_login(self, account):
173 | # enter username
174 | email_el = await self.main_tab.select("input[autocomplete=username]")
175 | await email_el.send_keys(account['email_address'])
176 | await self.main_tab.sleep(1)
177 | logging.info('Entered email')
178 |
179 | # click next
180 | try:
181 | next = await self.main_tab.find("Suivant", best_match=True)
182 | except:
183 | next = await self.main_tab.find("Next", best_match=True)
184 | await next.click()
185 | await self.main_tab.sleep(1)
186 |
187 | # check if username is required and enter
188 | try:
189 | await self.main_tab.sleep(1)
190 | await self.main_tab.find(
191 | "Entrez votre adresse email ou votre nom d'utilisateur.") # Enter your phone number or username
192 | await self.enter_username(account['username'])
193 | logging.info('entered username')
194 | except:
195 | pass
196 |
197 | try:
198 | await self.main_tab.sleep(1)
199 | await self.main_tab.find(
200 | "Enter your phone number or username") # Enter your phone number or username
201 | await self.enter_username(account['username'])
202 | logging.info('Entered username')
203 | except:
204 | pass
205 |
206 | # enter password
207 | password_el = await self.main_tab.select("input[autocomplete=current-password]")
208 | await password_el.send_keys(account['password'])
209 | await self.main_tab.sleep(2)
210 | logging.info('Entered password')
211 |
212 | # click login
213 | try:
214 | next = await self.main_tab.find("Se Connecter", best_match=True)
215 | except Exception as e:
216 | next = await self.main_tab.find("Login", best_match=True)
217 | except Exception as err:
218 | pass
219 |
220 | await self.main_tab.sleep(1)
221 | await next.click()
222 |
223 | if await self._is_logged_in():
224 | logging.info("Logged in successfully.")
225 | self.cookies = await self.driver.cookies.get_all(requests_cookie_format=True)
226 | if self.cookies_path:
227 | await self.driver.cookies.save(f"{self.cookies_path}/{account['username']}_cookies.dat")
228 | return self.main_tab, True, "", self.cookies
229 |
230 | # wait for code to be sent if required
231 | if (await check_element_if_exists_by_text(self.main_tab, "Code de confirmation") or
232 | await check_element_if_exists_by_text(self.main_tab, "Confirmation code")):
233 | # code = input("Enter the code you received in your email : ")
234 | await self.main_tab.sleep(10)
235 | code = await self.code_callback(account.get('email_address'), account.get('email_password'))
236 | code_status = await self.enter_code(code)
237 | if not code_status:
238 | return self.main_tab, False, "code_not_found", None
239 | logging.info('Entered Confirmation code')
240 |
241 |
242 | if (await check_element_if_exists_by_text(self.main_tab,
243 | "Please verify your email address.", timeout=20) or
244 | await check_element_if_exists_by_text(self.main_tab,
245 | 'Your account has been locked.', timeout=20)):
246 | return self.main_tab, False, "Account locked.", None
247 |
248 | # check if login is successful
249 | if await self._is_logged_in():
250 | logging.info("Logged in successfully.")
251 | self.cookies = await self.driver.cookies.get_all(requests_cookie_format=True)
252 | if self.cookies_path:
253 | await self.driver.cookies.save(f"{self.cookies_path}/{account['username']}_cookies.dat")
254 | return self.main_tab, True, "", self.cookies
255 | else:
256 | return None, False, "Locked", None
257 |
258 | async def login(self):
259 | # await self.init_nodriver()
260 | if self.logged_in:
261 | return self.main_tab, True, "", self.cookies
262 | account = {
263 | "email_address": get_email(self.env_path),
264 | "password": get_password(self.env_path),
265 | "username": get_username(self.env_path),
266 | "email_password": get_email_password(self.env_path)
267 | }
268 | if not account.get('email_address') or not account.get('password') or not account.get('username'):
269 | logging.info(f"Provide twitter account credentials to login.")
270 | return self.main_tab, False, "Account_creds_required", None
271 | self.main_tab = await self.driver.get("https://x.com/login")
272 | await self.main_tab.sleep(2)
273 | if os.path.exists(f"{self.cookies_path}/{account['username']}_cookies.dat"):
274 | logging.info(f"Loading cookies from path {self.cookies_path} ...")
275 | await self.driver.cookies.load(f"{self.cookies_path}/{account['username']}_cookies.dat")
276 | self.main_tab = await self.driver.get("https://x.com/login")
277 | await self.main_tab.sleep(3)
278 | elif self.cookies:
279 | logging.info(f"Loading cookies from file ...")
280 | await self.load_cookies(self.cookies)
281 | self.main_tab = await self.driver.get("https://x.com/login")
282 | await self.main_tab.sleep(3)
283 |
284 | if await self._is_logged_in():
285 | logging.info(f"Logged in successfully to {account.get('username')}")
286 | return self.main_tab, True, "", self.cookies
287 |
288 | if await check_element_if_exists_by_css(self.main_tab, "input[autocomplete=username]"):
289 | logging.info(f"Login in from scratch to {account.get('username')}")
290 | return await self.normal_login(account)
291 | else:
292 | logging.info("Something unexpected happened. Aborting.")
293 | return self.main_tab, False, "Other", None
294 |
295 | async def _is_logged_in(self):
296 | try:
297 | home = await self.main_tab.select("a[href='/home']")
298 | self.logged_in = True
299 | return True
300 | except Exception as e:
301 | return False
302 |
303 | async def load_cookies(self, cookie_dicts):
304 | for cdict in cookie_dicts:
305 | # Recreate the cookie using requests' create_cookie function
306 | c = create_cookie(
307 | name=cdict["name"],
308 | value=cdict["value"],
309 | domain=cdict["domain"],
310 | path=cdict["path"],
311 | expires=cdict["expires"],
312 | secure=cdict["secure"]
313 | )
314 | self.driver.cookies.set_cookie(c)
315 |
316 | async def get_data(self, post_soup):
317 | # username
318 | username_tag = post_soup.find('span')
319 | username = username_tag.get_text(strip=True) if username_tag else ""
320 |
321 | # handle: a span with '@'
322 | handle_tag = post_soup.find('span', text=lambda t: t and '@' in t)
323 | handle = handle_tag.get_text(strip=True) if handle_tag else ""
324 |
325 | # postdate: