├── .env
├── .gitignore
├── .idea
    ├── Scweet.iml
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    ├── vcs.xml
    └── workspace.xml
├── LICENSE.txt
├── README.md
├── Scweet.egg-info
    ├── PKG-INFO
    ├── SOURCES.txt
    ├── dependency_links.txt
    ├── requires.txt
    └── top_level.txt
├── Scweet
    ├── __init__.py
    ├── __version__.py
    ├── const.py
    ├── debug.log
    ├── mailtm.py
    ├── scweet.py
    ├── scweet_v1.8.py
    ├── user.py
    └── utils.py
├── example.py
├── images
    ├── 1_1.jpg
    ├── 2_1.jpg
    ├── 3_1.jpg
    ├── 4_1.jpg
    ├── 5_1.jpg
    ├── 6_1.jpg
    ├── 7_1.jpg
    └── 8_1.jpg
├── requirements.txt
├── setup.cfg
└── setup.py


/.env:
--------------------------------------------------------------------------------
1 | EMAIL=''
2 | PASSWORD=''
3 | EMAIL_PASSWORD=''
4 | USERNAME=''


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore packaging metadata
 2 | *.egg-info/
 3 | dist/
 4 | build/
 5 | 
 6 | # Ignore IDE settings
 7 | .idea/
 8 | 
 9 | # Ignore images if not used in README
10 | images/
11 | 
12 | # Ignore specific files inside Scweet/
13 | Scweet/debug.log
14 | Scweet/scweet_v1.8.py
15 | Scweet/user.py
16 | 


--------------------------------------------------------------------------------
/.idea/Scweet.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$">
 5 |       <excludeFolder url="file://$MODULE_DIR$/venv" />
 6 |     </content>
 7 |     <orderEntry type="inheritedJdk" />
 8 |     <orderEntry type="sourceFolder" forTests="false" />
 9 |   </component>
10 |   <component name="PyDocumentationSettings">
11 |     <option name="format" value="PLAIN" />
12 |     <option name="myDocStringFormat" value="Plain" />
13 |   </component>
14 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (Scweet)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/Scweet.iml" filepath="$PROJECT_DIR$/.idea/Scweet.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project version="4">
  3 |   <component name="ChangeListManager">
  4 |     <list default="true" id="ebeafb11-70f8-48c8-a753-08ca4641f6a5" name="Changes" comment="Fix login timeout error">
  5 |       <change beforePath="$PROJECT_DIR$/.env" beforeDir="false" afterPath="$PROJECT_DIR$/.env" afterDir="false" />
  6 |       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
  7 |       <change beforePath="$PROJECT_DIR$/Example.py" beforeDir="false" afterPath="$PROJECT_DIR$/Example.py" afterDir="false" />
  8 |       <change beforePath="$PROJECT_DIR$/Scweet/__version__.py" beforeDir="false" afterPath="$PROJECT_DIR$/Scweet/__version__.py" afterDir="false" />
  9 |     </list>
 10 |     <option name="SHOW_DIALOG" value="false" />
 11 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
 12 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
 13 |     <option name="LAST_RESOLUTION" value="IGNORE" />
 14 |   </component>
 15 |   <component name="Git.Settings">
 16 |     <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
 17 |     <option name="RESET_MODE" value="HARD" />
 18 |   </component>
 19 |   <component name="MarkdownSettingsMigration">
 20 |     <option name="stateVersion" value="1" />
 21 |   </component>
 22 |   <component name="ProjectId" id="1wSo8bh6mkEcPmD6copMija6JR9" />
 23 |   <component name="ProjectLevelVcsManager" settingsEditedManually="true" />
 24 |   <component name="ProjectViewState">
 25 |     <option name="hideEmptyMiddlePackages" value="true" />
 26 |     <option name="showLibraryContents" value="true" />
 27 |   </component>
 28 |   <component name="PropertiesComponent">
 29 |     <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
 30 |     <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
 31 |     <property name="last_opened_file_path" value="$PROJECT_DIR$" />
 32 |   </component>
 33 |   <component name="RunManager">
 34 |     <configuration name="Example" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
 35 |       <module name="Scweet" />
 36 |       <option name="INTERPRETER_OPTIONS" value="" />
 37 |       <option name="PARENT_ENVS" value="true" />
 38 |       <envs>
 39 |         <env name="PYTHONUNBUFFERED" value="1" />
 40 |       </envs>
 41 |       <option name="SDK_HOME" value="" />
 42 |       <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
 43 |       <option name="IS_MODULE_SDK" value="true" />
 44 |       <option name="ADD_CONTENT_ROOTS" value="true" />
 45 |       <option name="ADD_SOURCE_ROOTS" value="true" />
 46 |       <option name="SCRIPT_NAME" value="$PROJECT_DIR$/Example.py" />
 47 |       <option name="PARAMETERS" value="" />
 48 |       <option name="SHOW_COMMAND_LINE" value="false" />
 49 |       <option name="EMULATE_TERMINAL" value="false" />
 50 |       <option name="MODULE_MODE" value="false" />
 51 |       <option name="REDIRECT_INPUT" value="false" />
 52 |       <option name="INPUT_FILE" value="" />
 53 |       <method v="2" />
 54 |     </configuration>
 55 |     <recent_temporary>
 56 |       <list>
 57 |         <item itemvalue="Python.Example" />
 58 |       </list>
 59 |     </recent_temporary>
 60 |   </component>
 61 |   <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
 62 |   <component name="TaskManager">
 63 |     <task active="true" id="Default" summary="Default task">
 64 |       <changelist id="ebeafb11-70f8-48c8-a753-08ca4641f6a5" name="Changes" comment="" />
 65 |       <created>1628461210353</created>
 66 |       <option name="number" value="Default" />
 67 |       <option name="presentableId" value="Default" />
 68 |       <updated>1628461210353</updated>
 69 |     </task>
 70 |     <task id="LOCAL-00001" summary="Fix login timeout error">
 71 |       <created>1641220203456</created>
 72 |       <option name="number" value="00001" />
 73 |       <option name="presentableId" value="LOCAL-00001" />
 74 |       <option name="project" value="LOCAL" />
 75 |       <updated>1641220203456</updated>
 76 |     </task>
 77 |     <task id="LOCAL-00002" summary="Fix login timeout error">
 78 |       <created>1641221379276</created>
 79 |       <option name="number" value="00002" />
 80 |       <option name="presentableId" value="LOCAL-00002" />
 81 |       <option name="project" value="LOCAL" />
 82 |       <updated>1641221379276</updated>
 83 |     </task>
 84 |     <task id="LOCAL-00003" summary="V 1.7">
 85 |       <created>1641221862491</created>
 86 |       <option name="number" value="00003" />
 87 |       <option name="presentableId" value="LOCAL-00003" />
 88 |       <option name="project" value="LOCAL" />
 89 |       <updated>1641221862491</updated>
 90 |     </task>
 91 |     <option name="localTasksCounter" value="4" />
 92 |     <servers />
 93 |   </component>
 94 |   <component name="Vcs.Log.Tabs.Properties">
 95 |     <option name="TAB_STATES">
 96 |       <map>
 97 |         <entry key="MAIN">
 98 |           <value>
 99 |             <State />
100 |           </value>
101 |         </entry>
102 |       </map>
103 |     </option>
104 |   </component>
105 |   <component name="VcsManagerConfiguration">
106 |     <MESSAGE value="Fix login timeout error" />
107 |     <MESSAGE value="V 1.7" />
108 |     <option name="LAST_COMMIT_MESSAGE" value="V 1.7" />
109 |   </component>
110 | </project>


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | Copyright (c) 2020 Altimis Teams
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | The above copyright notice and this permission notice shall be included in all
10 | copies or substantial portions of the Software.
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🐦 Scweet: A Simple and Unlimited Twitter Scraper in Python
  2 | 
  3 | [![Scweet Actor Status](https://apify.com/actor-badge?actor=altimis/scweet)](https://apify.com/altimis/scweet)
  4 | [![PyPI Downloads](https://static.pepy.tech/badge/scweet/month)](https://pepy.tech/projects/scweet)
  5 | [![PyPI Version](https://img.shields.io/pypi/v/scweet.svg)](https://pypi.org/project/scweet/)
  6 | [![License](https://img.shields.io/github/license/Altimis/scweet)](https://github.com/Altimis/scweet/blob/main/LICENSE)
  7 | 
  8 | > **Note:** Scweet is **not affiliated with Twitter/X**. Use responsibly and lawfully.
  9 | 
 10 | ---
 11 | 
 12 | ## 🚀 Scweet on Apify: Cloud-Powered Scraping
 13 | 
 14 | For heavy-duty scraping, we recommend using [**Scweet on Apify**](https://apify.com/altimis/scweet?fpr=a40q9&fp_sid=jeb97) – a cloud-based solution that offers:
 15 | - **Zero setup:** No need to install or maintain infrastructure.
 16 | - **Incredible Speed:** Up to **1000 tweets per minute**.
 17 | - **High Reliability:** Managed and isolated runs for consistent performance.
 18 | - **Free Usage Tier:** Get started for free with a generous quota—perfect for experiments, small projects, or learning how Scweet works. Once you exceed the free quota, you'll pay only **$0.30 per 1,000 tweets**.
 19 | 
 20 | [![Run on Apify](https://apify.com/static/run-on-apify-button.svg)](https://apify.com/altimis/scweet?fpr=a40q9&fp_sid=jeb97)
 21 | 
 22 | ---
 23 | 
 24 | ## 🚀 Recent X Platform Changes & Scweet v3 Update
 25 | 
 26 | Scweet has recently encountered challenges due to major changes on **X (formerly Twitter)**. In response, we’re excited to announce the new **Scweet v3** release!
 27 | 
 28 | ### ✨ What’s New in v3:
 29 | - ✅ Fully **asynchronous architecture** for faster, smoother scraping
 30 | - 🧠 **No more manual Chromedriver setup** – Scweet handles Chromium internally with **[Nodriver](https://github.com/ultrafunkamsterdam/nodriver)**
 31 | - 🚀 Enhanced for **personal and research-level scraping**
 32 | - 🧑‍🤝‍🧑 **Follower & following scraping is back!** (see below 👇)
 33 | 
 34 | ---
 35 | 
 36 | ## 📌 What is Scweet?
 37 | 
 38 | Scweet is a Python-based scraping tool designed to fetch tweets and user data **without relying on traditional Twitter APIs**, which have become increasingly restricted.
 39 | 
 40 | With Scweet, you can:
 41 | - Scrape tweets by keywords, hashtags, mentions, accounts, or timeframes
 42 | - Get detailed user profile information
 43 | - ✅ Retrieve followers, following, and verified followers!
 44 | 
 45 | ---
 46 | 
 47 | ## 🔧 Key Features
 48 | 
 49 | ### 🐤 `scrape()` – Tweet Scraper
 50 | 
 51 | Scrape tweets between two dates using keywords, hashtags, mentions, or specific accounts.
 52 | 
 53 | **✅ Available arguments include:**
 54 | ```python
 55 | - since, until
 56 | - words
 57 | - from_account, to_account, mention_account
 58 | - hashtag, lang
 59 | - limit, display_type, resume
 60 | - filter_replies, proximity, geocode
 61 | - minlikes, minretweets, minreplies
 62 | - save_dir, custom_csv_name
 63 | ```
 64 | 
 65 | ---
 66 | 
 67 | ### 👤 `get_user_information()` – User Info Scraper
 68 | 
 69 | Fetch profile details for a list of handles. Returns a dictionary with:
 70 | - `username`, `verified_followers`
 71 | - `following`, `location`, `website`, `join_date`, `description`
 72 | 
 73 | **🧩 Arguments:**
 74 | ```python
 75 | - handles        # List of Twitter/X handles
 76 | - login (bool)   # Required for complete data
 77 | ```
 78 | 
 79 | ---
 80 | 
 81 | ### 🧑‍🤝‍🧑 `get_followers()`, `get_following()`, `get_verified_followers()` – NEW! 🎉
 82 | 
 83 | Scweet now supports scraping followers and followings again!
 84 | 
 85 | > ⚠️ **Important Note:** This functionality relies on browser rendering and may trigger rate-limiting or account lockouts. Use with caution and always stay logged in during scraping.
 86 | 
 87 | **🧩 Example Usage:**
 88 | ```python
 89 | handle = "x_born_to_die_x"
 90 | 
 91 | # Get followers
 92 | followers = scweet.get_followers(handle=handle, login=True, stay_logged_in=True, sleep=1)
 93 | 
 94 | # Get following
 95 | following = scweet.get_following(handle=handle, login=True, stay_logged_in=True, sleep=1)
 96 | 
 97 | # Get only verified followers
 98 | verified = scweet.get_verified_followers(handle=handle, login=True, stay_logged_in=True, sleep=1)
 99 | ```
100 | 
101 | ---
102 | 
103 | ## 🛠️ Class Initialization & Configuration
104 | 
105 | Customize Scweet’s behavior during setup:
106 | 
107 | ```python
108 | scweet = Scweet(
109 |   proxy=None,                 # Dict or None
110 |   cookies=None,               # Nodriver-based cookie handling
111 |   cookies_path='cookies',     # Folder for saving/loading cookies
112 |   user_agent=None,            # Optional custom user agent
113 |   disable_images=True,        # Speeds up scraping
114 |   env_path='.env',            # Path to your .env file
115 |   n_splits=-1,                # Date range splitting
116 |   concurrency=5,              # Number of concurrent tabs
117 |   headless=True,              # Headless scraping
118 |   scroll_ratio=100            # Adjust for scroll depth/speed
119 | )
120 | ```
121 | 
122 | ---
123 | 
124 | ## 🔐 Authentication
125 | 
126 | Scweet requires login for tweets, user info, and followers/following.
127 | 
128 | Set up your `.env` file like this:
129 | 
130 | ```env
131 | EMAIL=your_email@example.com
132 | EMAIL_PASSWORD=your_email_password
133 | USERNAME=your_username
134 | PASSWORD=your_password
135 | ```
136 | 
137 | Need a temp email? Use built-in MailTM integration:
138 | 
139 | ```python
140 | from Scweet.utils import create_mailtm_email
141 | email, password = create_mailtm_email()
142 | ```
143 | 
144 | ---
145 | 
146 | ## 🔧 Installation
147 | 
148 | ```bash
149 | pip install Scweet
150 | ```
151 | Requires **Python 3.7+** and a Chromium-based browser.
152 | 
153 | ---
154 | 
155 | ## 💡 Example Usage
156 | 
157 | ### 🐍 Python Script
158 | 
159 | ```python
160 | from Scweet.scweet import Scweet
161 | from Scweet.utils import create_mailtm_email
162 | 
163 | scweet = Scweet(proxy=None, cookies=None, cookies_path='cookies',
164 |                 user_agent=None, disable_images=True, env_path='.env',
165 |                 n_splits=-1, concurrency=5, headless=False, scroll_ratio=100)
166 | 
167 | # Get followers (⚠️ requires login)
168 | followers = scweet.get_followers(handle="x_born_to_die_x", login=True, stay_logged_in=True, sleep=1)
169 | print(followers)
170 | 
171 | # Get user profile data
172 | infos = scweet.get_user_information(handles=["x_born_to_die_x", "Nabila_Gl"], login=True)
173 | print(infos)
174 | 
175 | # Scrape tweets
176 | results = scweet.scrape(
177 |   since="2022-10-01",
178 |   until="2022-10-06",
179 |   words=["bitcoin", "ethereum"],
180 |   lang="en",
181 |   limit=20,
182 |   minlikes=10,
183 |   minretweets=10,
184 |   save_dir='outputs',
185 |   custom_csv_name='crypto.csv'
186 | )
187 | print(len(results))
188 | ```
189 | 
190 | ---
191 | 
192 | ## 📝 Example Output 
193 | 
194 | | tweetId | UserScreenName | Text | Likes | Retweets | Timestamp |
195 | |--------|----------------|------|-------|----------|-----------|
196 | | ...    | @elonmusk      | ...  | 18787 | 1000     | 2022-10-05T17:44:46.000Z |
197 | 
198 | > Full CSV output includes user info, tweet text, stats, embedded replies, media, and more.
199 | 
200 | ---
201 | 
202 | ## ☁️ Scweet on Apify (Cloud)
203 | 
204 | Need powerful, scalable, high-volume scraping?  
205 | Try [**Scweet on Apify**](https://apify.com/altimis/scweet):
206 | 
207 | - 🚀 Up to **1000 tweets/minute**
208 | - 📦 Export to datasets
209 | - 🔒 Secure, isolated browser instances
210 | - 🔁 Ideal for automation & research projects
211 | 
212 | ---
213 | 
214 | ## 🙏 Responsible Use
215 | 
216 | We care deeply about ethical scraping.
217 | 
218 | > **Please:** Use Scweet for research, education, and lawful purposes only. Respect platform terms and user privacy.
219 | 
220 | ---
221 | 
222 | ## 📎 Resources
223 | 
224 | - 📄 [Example Script](https://github.com/Altimis/Scweet/blob/master/example.py)
225 | - 🐞 [Issues / Bugs](https://github.com/Altimis/Scweet/issues)
226 | - 🌐 [Scweet on Apify](https://apify.com/altimis/scweet)
227 | 
228 | ---
229 | 
230 | ## ⭐ Star & Contribute
231 | 
232 | If you find Scweet useful, consider **starring** the repo ⭐  
233 | We welcome **PRs**, bug reports, and feature suggestions!
234 | 
235 | ---
236 | 
237 | MIT License • © 2020–2025 Altimis


--------------------------------------------------------------------------------
/Scweet.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
  1 | Metadata-Version: 2.4
  2 | Name: Scweet
  3 | Version: 3.1
  4 | Summary: Tool for scraping Tweets
  5 | Home-page: https://github.com/Altimis/Scweet
  6 | Download-URL: https://github.com/Altimis/Scweet/archive/v3.0.tar.gz
  7 | Author: Yassine AIT JEDDI
  8 | Author-email: aitjeddiyassine@gmail.com
  9 | License: MIT
 10 | Keywords: twitter,scraper,python,crawl,following,followers,twitter-scraper,tweets
 11 | Classifier: Development Status :: 4 - Beta
 12 | Classifier: Intended Audience :: Developers
 13 | Classifier: Topic :: Software Development :: Build Tools
 14 | Classifier: License :: OSI Approved :: MIT License
 15 | Classifier: Programming Language :: Python :: 3.7
 16 | Classifier: Programming Language :: Python :: 3.8
 17 | Classifier: Programming Language :: Python :: 3.9
 18 | Classifier: Programming Language :: Python :: 3.10
 19 | Classifier: Programming Language :: Python :: 3.11
 20 | Description-Content-Type: text/markdown
 21 | License-File: LICENSE.txt
 22 | Requires-Dist: certifi
 23 | Requires-Dist: python-dotenv
 24 | Requires-Dist: urllib3
 25 | Requires-Dist: PyVirtualDisplay
 26 | Requires-Dist: beautifulsoup4==4.12.3
 27 | Requires-Dist: nodriver==0.38.post1
 28 | Requires-Dist: requests
 29 | Dynamic: author
 30 | Dynamic: author-email
 31 | Dynamic: classifier
 32 | Dynamic: description
 33 | Dynamic: description-content-type
 34 | Dynamic: download-url
 35 | Dynamic: home-page
 36 | Dynamic: keywords
 37 | Dynamic: license
 38 | Dynamic: license-file
 39 | Dynamic: requires-dist
 40 | Dynamic: summary
 41 | 
 42 | 
 43 | 
 44 | # 🐦 Scweet: A Simple and Unlimited Twitter Scraper in Python
 45 | 
 46 | [![Scweet Actor Status](https://apify.com/actor-badge?actor=altimis/scweet)](https://apify.com/altimis/scweet)
 47 | [![PyPI Downloads](https://static.pepy.tech/badge/scweet/month)](https://pepy.tech/projects/scweet)
 48 | [![PyPI Version](https://img.shields.io/pypi/v/scweet.svg)](https://pypi.org/project/scweet/)
 49 | [![License](https://img.shields.io/github/license/Altimis/scweet)](https://github.com/Altimis/scweet/blob/main/LICENSE)
 50 | 
 51 | > **Note:** Scweet is **not affiliated with Twitter/X**. Use responsibly and lawfully.
 52 | 
 53 | ---
 54 | 
 55 | ## 🚀 Recent X Platform Changes & Scweet v3.0 Update
 56 | 
 57 | Scweet has recently encountered challenges due to major changes on **X (formerly Twitter)**. In response, we’re excited to announce the new **Scweet v3.0** release!
 58 | 
 59 | ### ✨ What’s New in v3.0:
 60 | - ✅ Fully **asynchronous architecture** for faster, smoother scraping
 61 | - 🧠 **No more manual Chromedriver setup** – Scweet handles Chromium internally
 62 | - 🚀 Enhanced for **personal and research-level scraping**
 63 | - ⚠️ **Follower/following scraping temporarily disabled** (to return in future updates)
 64 | 
 65 | > 🔧 For heavy-duty scraping, we recommend using **[Scweet on Apify](https://apify.com/altimis/scweet)** – a cloud-based solution offering higher throughput and stability (up to **1000 tweets/minute**), no infrastructure setup needed.
 66 | 
 67 | ⚠️ **Responsible Use Reminder**  
 68 | Whether running locally or in the cloud, **always scrape tweets ethically, lawfully, and respectfully**.
 69 | 
 70 | ---
 71 | 
 72 | ## 📌 What is Scweet?
 73 | 
 74 | Scweet is a Python-based scraping tool designed to fetch tweets and user data **without relying on traditional Twitter APIs**, which have become increasingly restricted.
 75 | 
 76 | With Scweet, you can:
 77 | - Scrape tweets by keywords, hashtags, mentions, accounts, or timeframes
 78 | - Get detailed user profile information
 79 | - (Coming soon) Retrieve followers/following lists again!
 80 | 
 81 | ---
 82 | 
 83 | ## 🔧 Key Features
 84 | 
 85 | ### 🐤 `scrape()` – Tweet Scraper
 86 | 
 87 | Scrape tweets between two dates using keywords, hashtags, mentions, or specific accounts.
 88 | 
 89 | **✅ Available arguments include:**
 90 | ```python
 91 | - since, until            # Date range (format: YYYY-MM-DD)
 92 | - words                   # Keywords (string or list, use "//" separator for strings)
 93 | - from_account            # Tweets from a user
 94 | - to_account              # Tweets to a user
 95 | - mention_account         # Tweets mentioning a user
 96 | - hashtag                 # Search by hashtag
 97 | - lang                    # Language code (e.g. "en")
 98 | - limit                   # Max number of tweets
 99 | - display_type            # "Top" or "Latest"
100 | - resume                  # Resume from previous CSV
101 | - filter_replies          # Include/exclude replies
102 | - proximity               # Local tweet filtering
103 | - geocode                 # Geolocation filtering
104 | - minlikes                # Tweets with minimum likes count
105 | - minretweets             # Tweets with minimum retweets count
106 | - minreplies              # Tweets with minimum replies count
107 | - save_dir                # Output directory
108 | - custom_csv_name         # Output csv name 
109 | ```
110 | ---
111 | 
112 | ### 👤 `get_user_information()` – User Info Scraper
113 | 
114 | Fetch profile details for a list of handles. Returns a dictionary with:
115 | - `username` (display name)
116 | - `following` (number of accounts they follow)
117 | - `verified_followers` (number of verified followers)
118 | - `location`, `website`, `join_date`, `description`
119 | 
120 | **🧩 Arguments:**
121 | ```python
122 | - handles        # List of Twitter/X handles
123 | - login (bool)   # Set True to login and access full data
124 | ```
125 | 
126 | ---
127 | 
128 | ### 🔒 `get_users_followers()` & `get_users_following()`  
129 | ⚠️ **Currently Disabled due to platform changes**  
130 | These will be re-enabled in future versions as we work around new limitations.
131 | 
132 | ---
133 | 
134 | ## 🛠️ Class Initialization & Configuration
135 | 
136 | You can customize Scweet’s behavior during initialization:
137 | 
138 | ```python
139 | scweet = Scweet(
140 |   proxy=None,                              # Dict or None {host, post, username, pasword}
141 |   cookies=None,                            # Use saved cookies file
142 |   cookies_path='cookies',                  # Folder path where cookies will be saved/loaded in future usage
143 |   user_agent=None,                         # Custom user agent string
144 |   env_path='.env',                         # Environment variables
145 |   n_splits=-1,                             # Split date interval (-1 for daily)
146 |   concurrency=5,                           # Concurrent tabs
147 |   headless=True,                           # Run headlessly
148 |   scroll_ratio=100,                        # Adjust scroll behavior
149 |   code_callback=None                       # Optional custom login code handler. Scweet only handles MailTM emails to get the code if X asks for it.
150 | )
151 | ```
152 | 
153 | ---
154 | 
155 | ## 🔐 Authentication
156 | 
157 | Scweet requires login to fetch tweets. Set up your `.env` file like this:
158 | 
159 | ```env
160 | EMAIL=your_email@example.com
161 | EMAIL_PASSWORD=your_email_password
162 | USERNAME=your_username
163 | PASSWORD=your_password
164 | ```
165 | 
166 | Use the built-in helper to create disposable login emails:
167 | 
168 | ```python
169 | from Scweet.utils import create_mailtm_email
170 | ```
171 | 
172 | For custom email providers, pass your own `code_callback`.
173 | 
174 | ---
175 | 
176 | ## 🔧 Installation
177 | 
178 | ```bash
179 | pip install Scweet==3.0
180 | ```
181 | Make sure your environment is set up with Python 3.7+, chrome browser and pip is available.
182 | 
183 | ## 💡 Example Usage
184 | 
185 | ### 🐍 Python Script
186 | 
187 | ```python
188 | from Scweet.scweet import Scweet
189 | from Scweet.user import get_user_information
190 | 
191 | scweet = Scweet(proxy=None, cookies=None, cookies_path='cookies',
192 |                 user_agent=None, disable_images=True, env_path='.env',
193 |                 n_splits=-1, concurrency=5, headless=True, scroll_ratio=100)
194 | 
195 | # Get user profile info
196 | handles = ['nagouzil', 'yassineaitjeddi', 'TahaAlamIdrissi']
197 | infos = scweet.get_user_information(handles=handles, login=True)
198 | print(infos)
199 | 
200 | # Scrape tweets with keywords
201 | results = scweet.scrape(
202 |   since="2022-10-01",
203 |   until="2022-10-06",
204 |   words=['bitcoin', 'ethereum'],
205 |   lang="en",
206 |   limit=20,
207 |   display_type="Top",
208 |   resume=False,
209 |   filter_replies=False,
210 |   minlikes=10,
211 |   minretweets=10,
212 |   save_dir='outputs',
213 |   custom_csv_name='crypto.csv'
214 | )
215 | print(len(results))
216 | scweet.close()
217 | ```
218 | 
219 | ### 📝 Example Output 
220 | 
221 | When you scrape tweets using the scrape() function, the results will be written to a CSV file, with each row representing a tweet. Here’s an example of what the output might look like:
222 | 
223 | 
224 | | tweetId            | UserScreenName | UserName  | Timestamp               | Text                                                                                     | Embedded_text            | Emojis | Comments | Likes | Retweets | Image link                                                                                     | Tweet URL                                         |
225 | |--------------------|----------------|-----------|--------------------------|-------------------------------------------------------------------------------------------|--------------------------|--------|----------|-------|----------|--------------------------------------------------------------------------------------------------|--------------------------------------------------|
226 | | 1577716440299442187 | @elonmusk      | Elon Musk | 2022-10-05T17:44:46.000Z | 10.69.3 will actually be a major upgrade. We’re keeping .69 just because haha.            | Replying to@WholeMarsBlog |        | 1256     | 18787 | 1000     | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577716440299442187             |
227 | | 1577737664689848326 | @elonmusk      | Elon Musk | 2022-10-05T19:09:06.000Z | Twitter is an accelerant to fulfilling the original http://X.com vision                  | Replying to@TEDchris      |        | 967      | 10967 | 931      | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577737664689848326             |
228 | | 1577747565533069312 | @elonmusk      | Elon Musk | 2022-10-05T19:48:27.000Z | That wouldn’t be hard to do                                                                | Replying to@ashleevance   |        | 1326     | 31734 | 1011     | https://pbs.twimg.com/profile_images/1683899100922511378/5lY42eHs_bigger.jpg | /elonmusk/status/1577747565533069312             |
229 | | 1577732106784051214 | @elonmusk      | Elon Musk | 2022-10-05T18:47:01.000Z | *"I do not think it is simple at all, but I have yet to hear any realistic path to peace.* |                          |        | –        | –     | –        | –                                                                                                | /elonmusk/status/1577732106784051214             |
230 | 
231 | 
232 | **Columns description**:
233 | 
234 | - **tweetId**: The unique identifier for the tweet.
235 | - **UserScreenName**: The Twitter/X handle of the user who posted the tweet.
236 | - **UserName**: The display name of the user.
237 | - **Timestamp**: The date and time the tweet was posted.
238 | - **Text**: The content of the tweet.
239 | - **Embedded_text**: If the tweet is a reply, this will show the user being replied to.
240 | - **Emojis**: Any emojis used in the tweet.
241 | - **Comments**: Number of replies to the tweet.
242 | - **Likes**: Number of likes the tweet received.
243 | - **Retweets**: Number of retweets the tweet received.
244 | - **Image link**: A link to the image(s) attached to the tweet, if any.
245 | - **Tweet URL**: Direct URL to the tweet.
246 | 
247 | ---
248 | 
249 | ## ☁️ Scweet on Apify (Cloud)
250 | 
251 | Need powerful, scalable, high-volume scraping?  
252 | Try [**Scweet on Apify**](https://apify.com/altimis/scweet) – a no-setup cloud solution:
253 | 
254 | - 🚀 Up to **1000 tweets/minute**
255 | - 📦 Exports to datasets or files
256 | - 🔒 Secure, isolated runs
257 | - 🔁 Ideal for automation, long-term projects
258 | 
259 | ---
260 | 
261 | ## 🙏 Responsible Use
262 | 
263 | We care deeply about ethical scraping.
264 | 
265 | > **Please:** Use Scweet for research, archiving, and lawful purposes only.  
266 | 
267 | ---
268 | 
269 | ## 📎 Resources
270 | 
271 | - 📄 [Example Script](https://github.com/Altimis/Scweet/blob/master/example.py)
272 | - 🐞 [Issues / Bugs](https://github.com/Altimis/Scweet/issues)
273 | - 🌐 [Scweet on Apify](https://apify.com/altimis/scweet)
274 | 
275 | ---
276 | 
277 | ## ⭐ Star & Contribute
278 | 
279 | If you find Scweet useful, consider **starring** the repo ⭐  
280 | We welcome **PRs**, bug reports, and ideas for new features!
281 | 
282 | ---
283 | 
284 | MIT License • © 2020–2025 Altimis
285 | 


--------------------------------------------------------------------------------
/Scweet.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
 1 | LICENSE.txt
 2 | README.md
 3 | setup.cfg
 4 | setup.py
 5 | Scweet/__init__.py
 6 | Scweet/__version__.py
 7 | Scweet/const.py
 8 | Scweet/mailtm.py
 9 | Scweet/scweet.py
10 | Scweet/scweet_v1.8.py
11 | Scweet/user.py
12 | Scweet/utils.py
13 | Scweet.egg-info/PKG-INFO
14 | Scweet.egg-info/SOURCES.txt
15 | Scweet.egg-info/dependency_links.txt
16 | Scweet.egg-info/requires.txt
17 | Scweet.egg-info/top_level.txt


--------------------------------------------------------------------------------
/Scweet.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Scweet.egg-info/requires.txt:
--------------------------------------------------------------------------------
1 | certifi
2 | python-dotenv
3 | urllib3
4 | PyVirtualDisplay
5 | beautifulsoup4==4.12.3
6 | nodriver==0.38.post1
7 | requests
8 | 


--------------------------------------------------------------------------------
/Scweet.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | Scweet
2 | 


--------------------------------------------------------------------------------
/Scweet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/Scweet/__init__.py


--------------------------------------------------------------------------------
/Scweet/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (3, 1)
2 | 
3 | __version__ = '.'.join(map(str, VERSION))


--------------------------------------------------------------------------------
/Scweet/const.py:
--------------------------------------------------------------------------------
 1 | import dotenv
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | current_dir = Path(__file__).parent.absolute()
 6 | 
 7 | 
 8 | # env_file = os.getenv("SCWEET_ENV_FILE", current_dir.parent.joinpath(".env"))
 9 | # dotenv.load_dotenv(env_file, verbose=True)
10 | 
11 | 
12 | def load_env_variable(key, default_value=None, none_allowed=False):
13 |     v = os.getenv(key, default=default_value)
14 |     if v is None and not none_allowed:
15 |         raise RuntimeError(f"{key} returned {v} but this is not allowed!")
16 |     return v
17 | 
18 | 
19 | def get_email(env):
20 |     dotenv.load_dotenv(env, verbose=True, override=True)
21 |     return load_env_variable("EMAIL", none_allowed=False)
22 | 
23 | 
24 | def get_email_password(env):
25 |     dotenv.load_dotenv(env, verbose=True, override=True)
26 |     return load_env_variable("EMAIL_PASSWORD", none_allowed=True)
27 | 
28 | 
29 | def get_password(env):
30 |     dotenv.load_dotenv(env, verbose=True, override=True)
31 |     return load_env_variable("PASSWORD", none_allowed=False)
32 | 
33 | 
34 | def get_username(env):
35 |     dotenv.load_dotenv(env, verbose=True, override=True)
36 |     return load_env_variable("USERNAME", none_allowed=False)
37 | 


--------------------------------------------------------------------------------
/Scweet/debug.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/Scweet/debug.log


--------------------------------------------------------------------------------
/Scweet/mailtm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Scweet - Twitter Scraping Tool
  3 | Author: Yassine Ait Jeddi (@altimis)
  4 | License: MIT
  5 | Repository: https://github.com/Altimis/scweet
  6 | """
  7 | 
  8 | import requests
  9 | 
 10 | 
 11 | class Domain:
 12 |     def __init__(self, domainJson):
 13 |         self.domain = domainJson["domain"]
 14 |         self.id = domainJson["id"]
 15 | 
 16 | 
 17 | class Mail:
 18 |     def __init__(self, emailJson, token):
 19 |         self.fromAddress = emailJson["from"]["address"]
 20 |         self.toAddress = []
 21 |         for receiver in emailJson["to"]:
 22 |             self.toAddress.append(receiver["address"])
 23 |         self.session = requests.Session()
 24 |         self.token = token
 25 |         self.fromName = emailJson["from"]["name"]
 26 |         self.subject = emailJson["subject"]
 27 |         self.size = emailJson["size"]
 28 |         self.id = emailJson["id"]
 29 |         self.text = self.read()["text"]
 30 | 
 31 |     def read(self):
 32 |         r = self.session.get(
 33 |             "https://api.mail.tm/messages/" + self.id,
 34 |             headers={
 35 |                 "Authorization": "Bearer " + self.token,
 36 |                 "Content-Type": "application/json",
 37 |             },
 38 |         )
 39 | 
 40 |         return r.json()
 41 | 
 42 |     def delete(self):
 43 |         r = self.session.delete(
 44 |             "https://api.mail.tm/messages/" + self.id,
 45 |             headers={
 46 |                 "Authorization": "Bearer " + self.token,
 47 |                 "Content-Type": "application/json",
 48 |             },
 49 |         )
 50 | 
 51 |         if r.status_code == 204:
 52 |             return 0
 53 |         if r.status_code == 404:
 54 |             return 1
 55 | 
 56 | 
 57 | class MailTMClient:
 58 |     def getAvailableDomains(self):
 59 |         r = self.session.get("https://api.mail.tm/domains")
 60 |         domains = []
 61 |         for domainJson in r.json()["hydra:member"]:
 62 |             # Only fetch public & active domains for now.
 63 |             if domainJson["isActive"] == True and domainJson["isPrivate"] == False:
 64 |                 domains.append(Domain(domainJson))
 65 |         return domains
 66 | 
 67 |     def register(self, address, password):
 68 |         r = self.session.post(
 69 |             "https://api.mail.tm/accounts",
 70 |             json={
 71 |                 "address": address,
 72 |                 "password": password,
 73 |             },
 74 |         )
 75 | 
 76 |         if r.status_code == 201 or r.status_code == 200:
 77 |             (responseCode, response) = self.login(address, password)
 78 |             if responseCode == 0:
 79 |                 return (0, response)
 80 |         elif r.status_code == 400:
 81 |             return (1, r.json()["detail"])
 82 |         elif r.status_code == 422:
 83 |             return (2, r.json()["detail"])
 84 |         print(f'response {r.status_code}')
 85 |         return -1, None
 86 | 
 87 |     def login(self, address, password):
 88 |         r = self.session.post(
 89 |             "https://api.mail.tm/token",
 90 |             json={
 91 |                 "address": address,
 92 |                 "password": password,
 93 |             },
 94 |         )
 95 | 
 96 |         if r.status_code == 200:
 97 |             return (0, r.json()["token"])
 98 |         if r.status_code == 401:
 99 |             return (1, r.json()["message"])
100 | 
101 |     def getInbox(self):
102 |         r = self.session.get(
103 |             "https://api.mail.tm/messages",
104 |             headers={
105 |                 "Authorization": "Bearer " + self.token,
106 |                 "Content-Type": "application/json",
107 |             },
108 |         )
109 | 
110 |         inbox = []
111 |         for emailJson in r.json()["hydra:member"]:
112 |             inbox.append(Mail(emailJson, self.token))
113 |         return inbox
114 | 
115 |     def __init__(self, token=None):
116 |         self.session = requests.Session()
117 |         self.token = token if token is not None else token
118 | 


--------------------------------------------------------------------------------
/Scweet/scweet.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Scweet - Twitter Scraping Tool
   3 | Author: Yassine Ait Jeddi (@altimis)
   4 | License: MIT
   5 | Repository: https://github.com/Altimis/scweet
   6 | """
   7 | 
   8 | import asyncio
   9 | import logging
  10 | import argparse
  11 | import csv
  12 | import json
  13 | import re
  14 | import os
  15 | import math
  16 | from datetime import datetime, timedelta, date
  17 | from typing import Awaitable, Callable, Optional, Union, List
  18 | 
  19 | import platform
  20 | 
  21 | import nodriver as uc
  22 | from requests.cookies import create_cookie
  23 | from bs4 import BeautifulSoup
  24 | from pyvirtualdisplay import Display
  25 | 
  26 | from .const import get_username, get_password, get_email, get_email_password
  27 | from .utils import (check_element_if_exists_by_text, check_element_if_exists_by_css,
  28 |                     get_code_from_email, extract_count_from_aria_label)
  29 | 
  30 | logging.getLogger('urllib3').setLevel(logging.WARNING)
  31 | logging.getLogger('seleniumwire').setLevel(logging.ERROR)
  32 | logging.getLogger('selenium').setLevel(logging.ERROR)
  33 | logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(message)s')
  34 | 
  35 | # display = Display(visible=0, size=(1024, 768))
  36 | 
  37 | 
  38 | def parse_followers(text):
  39 |     text = text.split(' ')[0]
  40 |     if 'K' in text:
  41 |         followers = int(float(text.replace('K', '')) * 1000)
  42 |     elif 'M' in text:
  43 |         followers = int(float(text.replace('M', '')) * 1000000)
  44 |     else:
  45 |         text = text.replace(',', '')
  46 |         followers = int(text)
  47 |     return followers
  48 | 
  49 | 
  50 | class Scweet:
  51 |     main_tab: uc.Tab
  52 |     def __init__(self, proxy=None, cookies=None, cookies_path=None, user_agent=None,
  53 |                  disable_images=False, env_path=None, n_splits=5, concurrency=5, headless=True, scroll_ratio=30,
  54 |                  code_callback: Optional[Callable[[str, str], Awaitable[str]]] = None):
  55 |         self.driver = None
  56 |         self.proxy = proxy
  57 |         self.cookies = cookies
  58 |         self.user_agent = user_agent
  59 |         self.cookies_path = cookies_path
  60 |         self.disable_images = disable_images
  61 |         self.env_path = env_path
  62 |         self.n_splits = n_splits
  63 |         self.concurrency = concurrency
  64 |         self.headless = headless
  65 |         self.scroll_ratio = scroll_ratio
  66 |         self.logged_in = False
  67 |         self.suspended = False
  68 |         # If no custom code callback is provided, use the default get_code_from_email for mailtm
  69 |         self.code_callback = code_callback or get_code_from_email
  70 |         self.display = None
  71 |         if self.headless:
  72 |             if self.headless and platform.system() in ["Linux"]:
  73 |                 logging.info("Starting pyvirtualdisplay for Linux headless mode")
  74 |                 self.display = Display(visible=0, size=(1024, 768))
  75 |                 self.display.start()
  76 | 
  77 |     async def init_nodriver(self):
  78 |         config = uc.Config()
  79 |         config.lang = "en-US"
  80 |         # Enable built-in headless mode for Windows and macOS
  81 |         if self.headless and platform.system() in ["Windows", "Darwin"]:
  82 |             logging.info("Using nodriver's headless mode for Windows/macOS")
  83 |             config.headless = True
  84 | 
  85 |         if self.proxy:
  86 |             logging.info(f"setting proxy : {self.proxy['host']}:{self.proxy['port']}")
  87 |             config.add_argument(f"--proxy-server={self.proxy['host']}:{self.proxy['port']}")
  88 |         if self.user_agent:
  89 |             config.add_argument(f'--user-agent={self.user_agent}')
  90 |         if self.disable_images:
  91 |             config.add_argument(f'--blink-settings=imagesEnabled=false')
  92 |         self.driver = await uc.start(config)
  93 |         self.main_tab = await self.driver.get("draft:,")
  94 |         if self.proxy:
  95 |             self.main_tab.add_handler(uc.cdp.fetch.RequestPaused, self.req_paused)
  96 |             self.main_tab.add_handler(
  97 |                 uc.cdp.fetch.AuthRequired, self.auth_challenge_handler
  98 |              )
  99 | 
 100 |             await self.main_tab.send(uc.cdp.fetch.enable(handle_auth_requests=True))
 101 |             page = await self.driver.get("https://www.whatismyip.com/")
 102 |             await asyncio.sleep(5)
 103 | 
 104 |     async def auth_challenge_handler(self, event: uc.cdp.fetch.AuthRequired):
 105 |         # Split the credentials
 106 |         # Respond to the authentication challenge
 107 |         asyncio.create_task(
 108 |             self.main_tab.send(
 109 |                 uc.cdp.fetch.continue_with_auth(
 110 |                     request_id=event.request_id,
 111 |                     auth_challenge_response=uc.cdp.fetch.AuthChallengeResponse(
 112 |                         response="ProvideCredentials",
 113 |                         username=self.proxy['username'],
 114 |                         password=self.proxy['password'],
 115 |                     ),
 116 |                 )
 117 |             )
 118 |         )
 119 | 
 120 |     async def req_paused(self, event: uc.cdp.fetch.RequestPaused):
 121 |         asyncio.create_task(
 122 |             self.main_tab.send(
 123 |                 uc.cdp.fetch.continue_request(request_id=event.request_id)
 124 |             )
 125 |         )
 126 | 
 127 |     async def enter_code(self, code):
 128 |         try:
 129 |             code_el = await self.main_tab.select("input[data-testid=ocfEnterTextTextInput]")
 130 |             await self.main_tab.sleep(15)
 131 |             if not code:
 132 |                 return False
 133 |             await code_el.send_keys(code)
 134 |             await self.main_tab.sleep(2)
 135 |             try:
 136 |                 next = await self.main_tab.find("Suivant", best_match=True)
 137 |             except Exception as e:
 138 |                 next = await self.main_tab.find("Next", best_match=True)
 139 |             except Exception as err:
 140 |                 next = await self.main_tab.find("Se Connecter", best_match=True)
 141 |             except Exception as eerr:
 142 |                 next = await self.main_tab.find("Login", best_match=True)
 143 | 
 144 |             await next.click()
 145 |             await self.main_tab.sleep(2)
 146 |             return True
 147 | 
 148 |         except Exception as e:
 149 |             print(f"couldn't enter code : {e}")
 150 |             return False
 151 | 
 152 |     async def enter_username(self, username):
 153 |         try:
 154 |             username_el = await self.main_tab.select("input[data-testid=ocfEnterTextTextInput]")
 155 |             await username_el.send_keys(username)
 156 |             await self.main_tab.sleep(1)
 157 |             try:
 158 |                 next = await self.main_tab.find("Suivant", best_match=True)
 159 |             except Exception as e:
 160 |                 next = await self.main_tab.find("Next", best_match=True)
 161 |             except Exception as err:
 162 |                 next = await self.main_tab.find("Se Connecter", best_match=True)
 163 |             except Exception as eerr:
 164 |                 next = await self.main_tab.find("Login", best_match=True)
 165 | 
 166 |             await next.click()
 167 |             await self.main_tab.sleep(1)
 168 |         except Exception as e:
 169 |             print(f"Error entering username : {e}")
 170 |             pass
 171 | 
 172 |     async def normal_login(self, account):
 173 |         # enter username
 174 |         email_el = await self.main_tab.select("input[autocomplete=username]")
 175 |         await email_el.send_keys(account['email_address'])
 176 |         await self.main_tab.sleep(1)
 177 |         logging.info('Entered email')
 178 | 
 179 |         # click next
 180 |         try:
 181 |             next = await self.main_tab.find("Suivant", best_match=True)
 182 |         except:
 183 |             next = await self.main_tab.find("Next", best_match=True)
 184 |         await next.click()
 185 |         await self.main_tab.sleep(1)
 186 | 
 187 |         # check if username is required and enter
 188 |         try:
 189 |             await self.main_tab.sleep(1)
 190 |             await self.main_tab.find(
 191 |                 "Entrez votre adresse email ou votre nom d'utilisateur.")  # Enter your phone number or username
 192 |             await self.enter_username(account['username'])
 193 |             logging.info('entered username')
 194 |         except:
 195 |             pass
 196 | 
 197 |         try:
 198 |             await self.main_tab.sleep(1)
 199 |             await self.main_tab.find(
 200 |                 "Enter your phone number or username")  # Enter your phone number or username
 201 |             await self.enter_username(account['username'])
 202 |             logging.info('Entered username')
 203 |         except:
 204 |             pass
 205 | 
 206 |         # enter password
 207 |         password_el = await self.main_tab.select("input[autocomplete=current-password]")
 208 |         await password_el.send_keys(account['password'])
 209 |         await self.main_tab.sleep(2)
 210 |         logging.info('Entered password')
 211 | 
 212 |         # click login
 213 |         try:
 214 |             next = await self.main_tab.find("Se Connecter", best_match=True)
 215 |         except Exception as e:
 216 |             next = await self.main_tab.find("Login", best_match=True)
 217 |         except Exception as err:
 218 |             pass
 219 | 
 220 |         await self.main_tab.sleep(1)
 221 |         await next.click()
 222 | 
 223 |         if await self._is_logged_in():
 224 |             logging.info("Logged in successfully.")
 225 |             self.cookies = await self.driver.cookies.get_all(requests_cookie_format=True)
 226 |             if self.cookies_path:
 227 |                 await self.driver.cookies.save(f"{self.cookies_path}/{account['username']}_cookies.dat")
 228 |             return self.main_tab, True, "", self.cookies
 229 | 
 230 |         # wait for code to be sent if required
 231 |         if (await check_element_if_exists_by_text(self.main_tab, "Code de confirmation") or
 232 |                 await check_element_if_exists_by_text(self.main_tab, "Confirmation code")):
 233 |             # code = input("Enter the code you received in your email : ")
 234 |             await self.main_tab.sleep(10)
 235 |             code = await self.code_callback(account.get('email_address'), account.get('email_password'))
 236 |             code_status = await self.enter_code(code)
 237 |             if not code_status:
 238 |                 return self.main_tab, False, "code_not_found", None
 239 |             logging.info('Entered Confirmation code')
 240 | 
 241 | 
 242 |         if (await check_element_if_exists_by_text(self.main_tab,
 243 |                                                  "Please verify your email address.", timeout=20) or
 244 |                 await check_element_if_exists_by_text(self.main_tab,
 245 |                                                       'Your account has been locked.', timeout=20)):
 246 |             return self.main_tab, False, "Account locked.", None
 247 | 
 248 |         # check if login is successful
 249 |         if await self._is_logged_in():
 250 |             logging.info("Logged in successfully.")
 251 |             self.cookies = await self.driver.cookies.get_all(requests_cookie_format=True)
 252 |             if self.cookies_path:
 253 |                 await self.driver.cookies.save(f"{self.cookies_path}/{account['username']}_cookies.dat")
 254 |             return self.main_tab, True, "", self.cookies
 255 |         else:
 256 |             return None, False, "Locked", None
 257 | 
 258 |     async def login(self):
 259 |         # await self.init_nodriver()
 260 |         if self.logged_in:
 261 |             return self.main_tab, True, "", self.cookies
 262 |         account = {
 263 |             "email_address": get_email(self.env_path),
 264 |             "password": get_password(self.env_path),
 265 |             "username": get_username(self.env_path),
 266 |             "email_password": get_email_password(self.env_path)
 267 |         }
 268 |         if not account.get('email_address') or not account.get('password') or not account.get('username'):
 269 |             logging.info(f"Provide twitter account credentials to login.")
 270 |             return self.main_tab, False, "Account_creds_required", None
 271 |         self.main_tab = await self.driver.get("https://x.com/login")
 272 |         await self.main_tab.sleep(2)
 273 |         if os.path.exists(f"{self.cookies_path}/{account['username']}_cookies.dat"):
 274 |             logging.info(f"Loading cookies from path {self.cookies_path} ...")
 275 |             await self.driver.cookies.load(f"{self.cookies_path}/{account['username']}_cookies.dat")
 276 |             self.main_tab = await self.driver.get("https://x.com/login")
 277 |             await self.main_tab.sleep(3)
 278 |         elif self.cookies:
 279 |             logging.info(f"Loading cookies from file ...")
 280 |             await self.load_cookies(self.cookies)
 281 |             self.main_tab = await self.driver.get("https://x.com/login")
 282 |             await self.main_tab.sleep(3)
 283 | 
 284 |         if await self._is_logged_in():
 285 |             logging.info(f"Logged in successfully to {account.get('username')}")
 286 |             return self.main_tab, True, "", self.cookies
 287 | 
 288 |         if await check_element_if_exists_by_css(self.main_tab, "input[autocomplete=username]"):
 289 |             logging.info(f"Login in from scratch to {account.get('username')}")
 290 |             return await self.normal_login(account)
 291 |         else:
 292 |             logging.info("Something unexpected happened. Aborting.")
 293 |             return self.main_tab, False, "Other", None
 294 | 
 295 |     async def _is_logged_in(self):
 296 |         try:
 297 |             home = await self.main_tab.select("a[href='/home']")
 298 |             self.logged_in = True
 299 |             return True
 300 |         except Exception as e:
 301 |             return False
 302 | 
 303 |     async def load_cookies(self, cookie_dicts):
 304 |         for cdict in cookie_dicts:
 305 |             # Recreate the cookie using requests' create_cookie function
 306 |             c = create_cookie(
 307 |                 name=cdict["name"],
 308 |                 value=cdict["value"],
 309 |                 domain=cdict["domain"],
 310 |                 path=cdict["path"],
 311 |                 expires=cdict["expires"],
 312 |                 secure=cdict["secure"]
 313 |             )
 314 |             self.driver.cookies.set_cookie(c)
 315 | 
 316 |     async def get_data(self, post_soup):
 317 |         # username
 318 |         username_tag = post_soup.find('span')
 319 |         username = username_tag.get_text(strip=True) if username_tag else ""
 320 | 
 321 |         # handle: a span with '@'
 322 |         handle_tag = post_soup.find('span', text=lambda t: t and '@' in t)
 323 |         handle = handle_tag.get_text(strip=True) if handle_tag else ""
 324 | 
 325 |         # postdate: <time datetime="...">
 326 |         time_tag = post_soup.find('time')
 327 |         postdate = time_tag['datetime'] if time_tag and time_tag.has_attr('datetime') else ""
 328 | 
 329 |         # Full tweet text from div[data-testid=tweetText]
 330 |         tweet_text_div = post_soup.select_one('div[data-testid="tweetText"]')
 331 |         text = tweet_text_div.get_text(strip=True) if tweet_text_div else ""
 332 | 
 333 |         # embedded text (as previously handled)
 334 |         embedded_div = post_soup.select_one('div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(2)')
 335 |         embedded = embedded_div.get_text(strip=True) if embedded_div else ""
 336 | 
 337 |         # Counts from aria-label
 338 |         reply_div = post_soup.find('button', {'data-testid': 'reply'})
 339 |         retweet_div = post_soup.find('button', {'data-testid': 'retweet'})
 340 |         like_div = post_soup.find('button', {'data-testid': 'like'})
 341 | 
 342 |         reply_cnt = extract_count_from_aria_label(reply_div)
 343 |         retweet_cnt = extract_count_from_aria_label(retweet_div)
 344 |         like_cnt = extract_count_from_aria_label(like_div)
 345 | 
 346 |         # image links
 347 |         image_links = []
 348 |         image_tags = post_soup.select('div:nth-of-type(2) > div:nth-of-type(2) img')
 349 |         for img in image_tags:
 350 |             src = img.get('src', '')
 351 |             if 'https://pbs.twimg.com/' in src:
 352 |                 image_links.append(src)
 353 | 
 354 |         # Check if promoted
 355 |         promoted_tag = post_soup.find('span', text='Promoted')
 356 |         if promoted_tag:
 357 |             return None  # Ignore promoted tweets
 358 | 
 359 |         # Emojis
 360 |         emoji_tags = post_soup.find_all('img', src=lambda s: s and 'emoji' in s)
 361 |         emoji_list = []
 362 |         for tag in emoji_tags:
 363 |             filename = tag.get('src', '')
 364 |             match = re.search(r'svg/([a-z0-9]+)\.svg', filename)
 365 |             if match:
 366 |                 try:
 367 |                     emoji_cp = int(match.group(1), 16)
 368 |                     emoji = chr(emoji_cp)
 369 |                     emoji_list.append(emoji)
 370 |                 except ValueError:
 371 |                     continue
 372 |         emojis = ' '.join(emoji_list)
 373 | 
 374 |         # tweet URL: <a href="/.../status/...">
 375 |         url_tag = post_soup.find('a', href=lambda h: h and '/status/' in h)
 376 |         tweet_url = url_tag['href'] if url_tag else ""
 377 | 
 378 |         tweet = {
 379 |             "username": username,
 380 |             "handle": handle,
 381 |             "postdate": postdate,
 382 |             "text": text,
 383 |             "embedded": embedded,  # reverted to embedded logic
 384 |             "emojis": emojis,
 385 |             "reply_cnt": reply_cnt,
 386 |             "retweet_cnt": retweet_cnt,
 387 |             "like_cnt": like_cnt,
 388 |             "image_links": image_links,
 389 |             "tweet_url": tweet_url
 390 |         }
 391 | 
 392 |         return tweet
 393 | 
 394 |     def get_follows(self, **scrape_kwargs):
 395 |         return asyncio.run(self.aget_follows(**scrape_kwargs))
 396 | 
 397 |     def get_followers(self, **scrape_kwargs):
 398 |         return asyncio.run(self.aget_followers(**scrape_kwargs))
 399 | 
 400 |     def get_following(self, **scrape_kwargs):
 401 |         return asyncio.run(self.aget_following(**scrape_kwargs))
 402 | 
 403 |     def get_verified_followers(self, **scrape_kwargs):
 404 |         return asyncio.run(self.aget_verified_followers(**scrape_kwargs))
 405 | 
 406 |     async def aget_followers(self, handle, login=True, stay_logged_in=True, sleep=2):
 407 |         return await self.aget_follows(handle=handle, type="followers",
 408 |                                        login=login, stay_logged_in=stay_logged_in, sleep=sleep)
 409 | 
 410 |     async def aget_following(self, handle, login=True, stay_logged_in=True, sleep=2):
 411 |         return await self.aget_follows(handle=handle, type="following",
 412 |                                        login=login, stay_logged_in=stay_logged_in, sleep=sleep)
 413 | 
 414 |     async def aget_verified_followers(self, handle, login=True, stay_logged_in=True, sleep=2):
 415 |         return await self.aget_follows(handle=handle, type="verified_followers",
 416 |                                        login=login, stay_logged_in=stay_logged_in, sleep=sleep)
 417 | 
 418 |     async def aget_follows(self, handle, type="following", login=True, stay_logged_in=True, sleep=2):
 419 |         assert type in ["followers", "verified_followers", "following"]
 420 |         if self.suspended:
 421 |             logging.info(f"Account suspended. Use another one.")
 422 |             return {}
 423 |         if not self.driver:
 424 |             await self.init_nodriver()
 425 |         if login:
 426 |             _, logged_in, reason, _ = await self.login()
 427 |             if not logged_in:
 428 |                 return {}
 429 |         tab = await self.driver.get(f"https://x.com/{handle}/{type}")
 430 |         await tab.sleep(sleep)
 431 | 
 432 |         num_scrolls = 0
 433 |         follow_ids = set()
 434 |         previous_len = 0
 435 |         while True:
 436 |             await tab.scroll_down(self.scroll_ratio)
 437 |             await tab.sleep(sleep)
 438 |             # count the number of li elements if they keep increase
 439 |             html_el = await tab.get_content()
 440 |             if 'Your account is suspended' in html_el:
 441 |                 logging.info(f"Account suspended. Use another one.")
 442 |                 self.suspended = True
 443 |                 if not stay_logged_in:
 444 |                     await self.close()
 445 |                 return list(follow_ids)
 446 |             soup = BeautifulSoup(html_el, 'html.parser')
 447 |             page_cards = soup.select('button[data-testid*="UserCell"]')
 448 |             for card in page_cards:
 449 |                 card_text = card.get_text(separator=' ', strip=True)
 450 |                 match = re.search(r'(@\w+)', card_text)
 451 |                 if match:
 452 |                     username = match.group(1)
 453 |                     if username not in follow_ids:
 454 |                         follow_ids.add(username)
 455 |             if len(follow_ids) == previous_len:
 456 |                 break
 457 |             previous_len = len(follow_ids)
 458 |             num_scrolls += 1
 459 |             logging.info(f"Fetched {len(follow_ids)} in scroll {num_scrolls}")
 460 | 
 461 |         if not stay_logged_in:
 462 |             await self.close()
 463 |         return list(follow_ids)
 464 | 
 465 | 
 466 | 
 467 |     async def consume_html(self, html_queue, index, all_posts_data):
 468 |         """
 469 |         This coroutine runs concurrently with the main fetch loop.
 470 |         It consumes HTML from the queue and updates all_posts_data.
 471 |         """
 472 |         while True:
 473 |             html_el = await html_queue.get()
 474 |             await self.aget_data(html_el, index, all_posts_data)
 475 |             html_queue.task_done()
 476 | 
 477 |     async def aget_data(self, html_content, index, all_posts_data):
 478 |         data_file_name = f"data_{index}.json"
 479 |         soup = BeautifulSoup(html_content, 'html.parser')
 480 |         posts = soup.select('article[data-testid=tweet]')
 481 |         for post_soup in posts:
 482 |             data = await self.get_data(post_soup)
 483 |             if data:
 484 |                 # Use the tweet_url as key
 485 |                 tweet_id = data['tweet_url'].split("/")[-1]
 486 |                 if tweet_id not in all_posts_data:
 487 |                     all_posts_data[tweet_id] = data
 488 |                     # Instead of continually reading and writing data.json, we write a separate file per task:
 489 |                     with open(data_file_name, "w") as f:
 490 |                         json.dump(all_posts_data, f)
 491 |         logging.info(f"[tab {index}] Extracted {len(all_posts_data)} tweets in total")
 492 |         return all_posts_data
 493 | 
 494 |     async def fetch_tweets(self, url, index, limit):
 495 |         tab = await self.driver.get(url, new_tab=True)
 496 |         if await check_element_if_exists_by_text(tab, "Retry"):
 497 |             retry = await tab.find('Retry')
 498 |             await retry.click()
 499 |             await tab.sleep(3)
 500 | 
 501 |         await tab.scroll_down(self.scroll_ratio//2)
 502 |         await tab.sleep(2)
 503 | 
 504 |         num_scrolls = 0
 505 |         all_posts_data = {}
 506 |         last_len = 0
 507 |         html_queue = asyncio.Queue()
 508 |         consumer_task = asyncio.create_task(self.consume_html(html_queue, index, all_posts_data))
 509 | 
 510 |         while True:
 511 |             await tab.activate()
 512 |             await tab.scroll_down(self.scroll_ratio)
 513 |             await tab.sleep(0.5)
 514 |             html_el = await tab.get_content()
 515 |             # Put the HTML in the queue for processing by the consumer
 516 |             await html_queue.put(html_el)
 517 |             num_scrolls += 1
 518 | 
 519 |             if await check_element_if_exists_by_text(tab, "Something went wrong. Try reloading."):
 520 |                 logging.info(f"Something went wrong (rate limit) for tab index {index}")
 521 |                 break
 522 |             elif num_scrolls%5 == 0:
 523 |                 current_len = len(all_posts_data)
 524 |                 if current_len == last_len:
 525 |                     logging.info(f"No more tweets for tab index {index}")
 526 |                     break
 527 |                 last_len = current_len
 528 |             elif len(all_posts_data)>limit:
 529 |                 logging.info("Reached desired tweets count.")
 530 |                 break
 531 | 
 532 |         # Done producing, now wait for the consumer to finish processing all queued HTML
 533 |         await html_queue.join()
 534 |         # Cancel the consumer task if it's still running
 535 |         consumer_task.cancel()
 536 |         try:
 537 |             await consumer_task
 538 |         except asyncio.CancelledError:
 539 |             pass
 540 | 
 541 |         await tab.sleep(1)
 542 |         await tab.close()
 543 | 
 544 |         logging.info(f"Scrolling ended after {num_scrolls} scrolls")
 545 |         logging.info(f"{len(all_posts_data)} unique tweets found after scrolling")
 546 | 
 547 |         return all_posts_data
 548 | 
 549 |     def scrape(self, **scrape_kwargs):
 550 |         """
 551 |         Synchronously execute the asynchronous scrape method.
 552 |         Users can call this method without handling asyncio themselves.
 553 |         """
 554 |         return asyncio.run(self.ascrape(**scrape_kwargs))
 555 | 
 556 |     async def ascrape(
 557 |             self,
 558 |             since: str,
 559 |             until: str = None,
 560 |             words: Union[str, list] = None,
 561 |             to_account: str = None,
 562 |             from_account: str = None,
 563 |             mention_account: str = None,
 564 |             lang: str = None,
 565 |             limit: float = float("inf"),
 566 |             display_type: str = "Top",
 567 |             resume: bool = False,
 568 |             hashtag: str = None,
 569 |             save_dir: str = "outputs",
 570 |             filter_replies: bool = False,
 571 |             proximity: bool = False,
 572 |             geocode: str = None,
 573 |             minreplies=None,
 574 |             minlikes=None,
 575 |             minretweets=None,
 576 |             custom_csv_name=None
 577 |     ):
 578 |         """
 579 |         Scrape tweets between [since, until] using concurrency, writing incrementally to CSV.
 580 |         If resume=True and a CSV file with the same name exists, we read its max 'Timestamp'
 581 |         and override `since` if it is more recent.
 582 |         """
 583 |         if not self.driver:
 584 |             await self.init_nodriver()
 585 | 
 586 |         if not until:
 587 |             until = date.today().strftime("%Y-%m-%d")
 588 | 
 589 |         # 1) Possibly override 'since' if resuming
 590 |         # -----------------------------------------
 591 |         # Build the default CSV path (we'll do this early so we know the file name)
 592 |         if words and isinstance(words, str):
 593 |             words = words.split("//")
 594 | 
 595 |         if not os.path.exists(save_dir):
 596 |             os.makedirs(save_dir)
 597 | 
 598 |         if words:
 599 |             fname_part = '_'.join(words)
 600 |         elif from_account:
 601 |             fname_part = from_account
 602 |         elif to_account:
 603 |             fname_part = to_account
 604 |         elif mention_account:
 605 |             fname_part = mention_account
 606 |         elif hashtag:
 607 |             fname_part = hashtag
 608 |         else:
 609 |             fname_part = "tweets"
 610 | 
 611 |         if not custom_csv_name:
 612 |             csv_filename = f"{save_dir}/{fname_part}_{since}_{until}.csv"
 613 |         else:
 614 |             csv_filename = f'{save_dir}/{custom_csv_name}'
 615 | 
 616 |         # If resume is True and the CSV already exists, read the last date
 617 |         if resume and os.path.exists(csv_filename):
 618 |             last_date_str = self.get_last_date_from_csv(csv_filename)
 619 |             if last_date_str:
 620 |                 try:
 621 |                     # parse the CSV's last date (which we store as ISO8601 w/ 'T' but might have .000Z)
 622 |                     last_date_dt = datetime.fromisoformat(last_date_str.replace('Z', ''))
 623 |                     # Compare with the user-provided 'since' date
 624 |                     user_since_dt = datetime.strptime(since, "%Y-%m-%d")
 625 | 
 626 |                     # If the CSV's last timestamp is more recent, override
 627 |                     if last_date_dt.date() >= user_since_dt.date():
 628 |                         # new since = that CSV date
 629 |                         new_since_str = last_date_dt.strftime("%Y-%m-%d")
 630 |                         logging.info(f"[resume=True] Overriding since={since} -> {new_since_str}")
 631 |                         since = new_since_str
 632 |                 except Exception as e:
 633 |                     logging.info(f"Could not parse last date from CSV: {e}")
 634 |                     # keep the original since
 635 | 
 636 |         # 2) Prepare the CSV header
 637 |         # -----------------------------------------
 638 |         header = [
 639 |             "tweetId", "UserScreenName", "UserName", "Timestamp", "Text",
 640 |             "Embedded_text", "Emojis", "Comments", "Likes",
 641 |             "Retweets", "Image link", "Tweet URL"
 642 |         ]
 643 | 
 644 |         # 3) Build all URLs (using your own build_search_url)
 645 |         # -----------------------------------------
 646 |         urls = self.build_search_url(
 647 |             since=since,
 648 |             until=until,
 649 |             lang=lang,
 650 |             display_type=display_type,
 651 |             words=words,
 652 |             to_account=to_account,
 653 |             from_account=from_account,
 654 |             mention_account=mention_account,
 655 |             hashtag=hashtag,
 656 |             filter_replies=filter_replies,
 657 |             proximity=proximity,
 658 |             geocode=geocode,
 659 |             minreplies=minreplies,
 660 |             minlikes=minlikes,
 661 |             minretweets=minretweets,
 662 |             n=self.n_splits
 663 |         )
 664 | 
 665 |         logging.info(f"{len(urls)} urls generated")
 666 | 
 667 |         # 4) Figure out write mode for CSV
 668 |         # -----------------------------------------
 669 |         write_mode = "a" if (resume and os.path.exists(csv_filename)) else "w"
 670 |         total_tweets = 0
 671 |         all_data = {}
 672 | 
 673 |         # 5) Open the CSV file
 674 |         # -----------------------------------------
 675 |         with open(csv_filename, write_mode, newline="", encoding="utf-8") as f:
 676 |             writer = csv.writer(f)
 677 |             if write_mode == "w":
 678 |                 writer.writerow(header)
 679 | 
 680 |             # 6) Initialize driver + optional login
 681 |             # -----------------------------------------
 682 |             main_tab, logged_in, reason, new_cookies = await self.login()
 683 |             if not logged_in:
 684 |                 logging.info(f"Couldn't login due to {reason}")
 685 |                 return {}
 686 | 
 687 |             # 7) Concurrency loop: fetch each chunk
 688 |             # -----------------------------------------
 689 |             for i in range(0, len(urls), self.concurrency):
 690 |                 chunk = urls[i: i + self.concurrency]
 691 |                 logging.info(f"Processing chunk with {len(chunk)} urls")
 692 |                 tasks = [
 693 |                     asyncio.create_task(self.fetch_tweets(url, index=i + j, limit=limit))
 694 |                     for j, url in enumerate(chunk)
 695 |                 ]
 696 |                 results_list = await asyncio.gather(*tasks)
 697 | 
 698 |                 # Write each tweet row to CSV
 699 |                 for result_dict in results_list:
 700 |                     all_data.update(result_dict)
 701 |                     for tweet_id, tweet_data in result_dict.items():
 702 |                         row = [
 703 |                             tweet_id,
 704 |                             tweet_data.get("handle", ""),  # "UserScreenName"
 705 |                             tweet_data.get("username", ""),  # "UserName"
 706 |                             tweet_data.get("postdate", ""),  # "Timestamp"
 707 |                             tweet_data.get("text", ""),  # "Text"
 708 |                             tweet_data.get("embedded", ""),  # "Embedded_text"
 709 |                             tweet_data.get("emojis", ""),  # "Emojis"
 710 |                             tweet_data.get("reply_cnt", "0"),  # "Comments"
 711 |                             tweet_data.get("like_cnt", "0"),  # "Likes"
 712 |                             tweet_data.get("retweet_cnt", "0"),  # "Retweets"
 713 |                             " ".join(tweet_data.get("image_links", [])),  # "Image link"
 714 |                             tweet_data.get("tweet_url", ""),  # "Tweet URL"
 715 |                         ]
 716 |                         writer.writerow(row)
 717 |                         total_tweets += 1
 718 | 
 719 |                         if total_tweets >= limit:
 720 |                             logging.info(f"Reached limit of {limit} tweets. Stopping early.")
 721 |                             break
 722 |                     if total_tweets >= limit:
 723 |                         break
 724 |                 if total_tweets >= limit:
 725 |                     break
 726 | 
 727 |             # 8) Done scraping
 728 |             # -----------------------------------------
 729 |             logging.info(f"Scraping completed. Total tweets written: {total_tweets}")
 730 | 
 731 |             # Cancel any lingering tasks before shutting down.
 732 |             pending_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
 733 |             for task in pending_tasks:
 734 |                 task.cancel()
 735 |             await asyncio.gather(*pending_tasks, return_exceptions=True)
 736 | 
 737 |             # close driver if needed
 738 |             await self.close()
 739 | 
 740 |         return all_data
 741 | 
 742 |     def get_last_date_from_csv(self, path):
 743 |         """
 744 |         Reads the CSV file line-by-line to find the max 'Timestamp'.
 745 |         Returns the max date as a string in '%Y-%m-%dT%H:%M:%S.000Z' format or None.
 746 |         """
 747 |         max_dt = None
 748 |         try:
 749 |             with open(path, "r", encoding="utf-8") as f:
 750 |                 reader = csv.reader(f)
 751 |                 # First row: header
 752 |                 header = next(reader, None)
 753 |                 if not header or "Timestamp" not in header:
 754 |                     return None
 755 |                 ts_idx = header.index("Timestamp")
 756 | 
 757 |                 # Read each row
 758 |                 for row in reader:
 759 |                     if len(row) <= ts_idx:
 760 |                         continue
 761 |                     timestamp_str = row[ts_idx].strip()
 762 |                     if not timestamp_str:
 763 |                         continue
 764 | 
 765 |                     # Attempt to parse. If your CSV uses an exact known format,
 766 |                     # call datetime.strptime(timestamp_str, "<your_format>")
 767 |                     # For example, if you store it as 2025-01-21T18:34:59.000Z:
 768 |                     try:
 769 |                         # remove trailing 'Z' to parse with fromisoformat in Python 3.11+
 770 |                         dt = datetime.fromisoformat(timestamp_str.replace('Z', ''))
 771 |                     except:
 772 |                         # fallback formats, or skip
 773 |                         dt = None
 774 | 
 775 |                     if dt and (max_dt is None or dt > max_dt):
 776 |                         max_dt = dt
 777 |         except:
 778 |             return None
 779 | 
 780 |         if max_dt:
 781 |             return max_dt.strftime('%Y-%m-%dT%H:%M:%S.000Z')
 782 |         return None
 783 | 
 784 |     def build_search_url(self,
 785 |                          since: str,
 786 |                          until: str,
 787 |                          lang: str = None,
 788 |                          display_type: str = "Top",
 789 |                          words: Union[str, list] = None,
 790 |                          to_account: str = None,
 791 |                          from_account: str = None,
 792 |                          mention_account: str = None,
 793 |                          hashtag: str = None,
 794 |                          filter_replies: bool = False,
 795 |                          proximity: bool = False,
 796 |                          geocode: str = None,
 797 |                          minreplies: int = None,
 798 |                          minlikes: int = None,
 799 |                          minretweets: int = None,
 800 |                          n: int = 10
 801 |                          ) -> List[str]:
 802 | 
 803 |         display_type_allowed = {"Top", "Recent", "latest", "image"}
 804 |         if display_type not in display_type_allowed:
 805 |             raise ValueError(f"display_type must be one of {display_type_allowed}")
 806 | 
 807 |         # Convert `since` and `until` to datetime
 808 |         since_dt = datetime.strptime(since, "%Y-%m-%d")
 809 |         until_dt = datetime.strptime(until, "%Y-%m-%d")
 810 | 
 811 |         # Edge case: if since_dt >= until_dt, just return one interval
 812 |         total_days = (until_dt - since_dt).days
 813 |         if total_days < 1:
 814 |             total_days = 1
 815 | 
 816 |         # Set interval based on n.
 817 |         # If n == -1, split by day so that n becomes the number of days.
 818 |         if n == -1:
 819 |             n = total_days
 820 |             interval = 1  # interval in days
 821 |         else:
 822 |             interval = total_days / n
 823 | 
 824 |         # Prepare account/hashtag strings
 825 |         from_str = f"(from%3A{from_account})%20" if from_account else ""
 826 |         to_str = f"(to%3A{to_account})%20" if to_account else ""
 827 |         mention_str = f"(%40{mention_account})%20" if mention_account else ""
 828 |         hashtag_str = f"(%23{hashtag})%20" if hashtag else ""
 829 | 
 830 |         # Prepare words string
 831 |         if words:
 832 |             if isinstance(words, list) and len(words) > 1:
 833 |                 # e.g. (python OR selenium)
 834 |                 words_str = "(" + "%20OR%20".join(w.strip() for w in words) + ")%20"
 835 |             else:
 836 |                 # single word or single-element list
 837 |                 if isinstance(words, list):
 838 |                     single_word = words[0]
 839 |                 else:
 840 |                     single_word = words
 841 |                 words_str = f"({single_word})%20"
 842 |         else:
 843 |             words_str = ""
 844 | 
 845 |         # Language
 846 |         lang_str = f"lang%3A{lang}" if lang else ""
 847 | 
 848 |         # Display type -> &f=live or &f=image, etc.
 849 |         if display_type.lower() == "latest":
 850 |             display_type_str = "&f=live"
 851 |         elif display_type.lower() == "image":
 852 |             display_type_str = "&f=image"
 853 |         else:
 854 |             display_type_str = ""
 855 | 
 856 |         # Filter replies
 857 |         filter_replies_str = "%20-filter%3Areplies" if filter_replies else ""
 858 | 
 859 |         # Proximity
 860 |         proximity_str = "&lf=on" if proximity else ""
 861 | 
 862 |         # geocode
 863 |         geocode_str = f"%20geocode%3A{geocode}" if geocode else ""
 864 | 
 865 |         # min number of replies, likes, retweets
 866 |         minreplies_str = f"%20min_replies%3A{minreplies}" if minreplies is not None else ""
 867 |         minlikes_str = f"%20min_faves%3A{minlikes}" if minlikes is not None else ""
 868 |         minretweets_str = f"%20min_retweets%3A{minretweets}" if minretweets is not None else ""
 869 | 
 870 |         # Build intervals
 871 |         urls = []
 872 |         for i in range(n):
 873 |             current_since = since_dt + timedelta(days=i * interval)
 874 |             current_until = since_dt + timedelta(days=(i + 1) * interval)
 875 | 
 876 |             # Cap current_until at the final date
 877 |             if current_until > until_dt:
 878 |                 current_until = until_dt
 879 | 
 880 |             since_part = f"since%3A{current_since.strftime('%Y-%m-%d')}%20"
 881 |             until_part = f"until%3A{current_until.strftime('%Y-%m-%d')}%20"
 882 | 
 883 |             # Build final path
 884 |             path = (
 885 |                     "https://x.com/search?q="
 886 |                     + words_str
 887 |                     + from_str
 888 |                     + to_str
 889 |                     + mention_str
 890 |                     + hashtag_str
 891 |                     + until_part
 892 |                     + since_part
 893 |                     + lang_str
 894 |                     + filter_replies_str
 895 |                     + geocode_str
 896 |                     + minreplies_str
 897 |                     + minlikes_str
 898 |                     + minretweets_str
 899 |                     + "&src=typed_query"
 900 |                     + display_type_str
 901 |                     + proximity_str
 902 |             )
 903 | 
 904 |             urls.append(path)
 905 | 
 906 |             # If we've reached or passed the final date, stop early
 907 |             if current_until >= until_dt:
 908 |                 break
 909 | 
 910 |         return urls
 911 | 
 912 |     async def consume_profile(self, html_queue, all_infos):
 913 |         while True:
 914 |             handle, html_el = await html_queue.get()
 915 |             await self.aget_profile(html_el, handle, all_infos)
 916 |             html_queue.task_done()
 917 | 
 918 |     async def aget_profile(self, html, handle, all_infos):
 919 |         """
 920 |         Extract user profile information from the provided HTML and update all_infos.
 921 | 
 922 |         Expected fields:
 923 |           - username (display name)
 924 |           - following
 925 |           - verified_followers
 926 |           - location
 927 |           - website
 928 |           - join_date
 929 |           - description
 930 |         """
 931 |         soup = BeautifulSoup(html, 'html.parser')
 932 | 
 933 |         # Extract following using the element that shows following count.
 934 |         try:
 935 |             # This selector finds an <a> whose href contains "/following", then finds a nested span with text.
 936 |             following_elem = soup.select_one('a[href*="/following"] span span')
 937 |             following = following_elem.get_text(strip=True) if following_elem else None
 938 |             if following:
 939 |                 following = parse_followers(following)
 940 |         except Exception:
 941 |             following = None
 942 | 
 943 |         # Extract verified followers from the corresponding element.
 944 |         try:
 945 |             verified_elem = soup.select_one('a[href*="/verified_followers"] span span')
 946 |             verified_followers = verified_elem.get_text(strip=True) if verified_elem else None
 947 |             if verified_followers:
 948 |                 verified_followers = parse_followers(verified_followers)
 949 |         except Exception:
 950 |             verified_followers = None
 951 | 
 952 |         # Extract username (display name) from the element with data-testid="UserName".
 953 |         try:
 954 |             username_elem = soup.select_one('div[data-testid="UserName"] span')
 955 |             username = username_elem.get_text(strip=True) if username_elem else None
 956 |         except Exception:
 957 |             username = None
 958 | 
 959 |         # Extract location using the element with data-testid "UserLocation".
 960 |         try:
 961 |             location_elem = soup.select_one('span[data-testid="UserLocation"] span')
 962 |             location = location_elem.get_text(strip=True) if location_elem else ""
 963 |         except Exception:
 964 |             location = ""
 965 | 
 966 |         # Extract website using the element with data-testid "UserUrl"; we use the href attribute.
 967 |         try:
 968 |             website_elem = soup.select_one('a[data-testid="UserUrl"]')
 969 |             website = website_elem.get("href", "") if website_elem else ""
 970 |         except Exception:
 971 |             website = ""
 972 | 
 973 |         # Extract join date using the element with data-testid "UserJoinDate".
 974 |         try:
 975 |             join_date_elem = soup.select_one('span[data-testid="UserJoinDate"] span')
 976 |             join_date = join_date_elem.get_text(strip=True) if join_date_elem else ""
 977 |         except Exception:
 978 |             join_date = ""
 979 | 
 980 |         # Extract profile description from the element with data-testid "UserDescription".
 981 |         try:
 982 |             desc_elem = soup.select_one('div[data-testid="UserDescription"]')
 983 |             desc = desc_elem.get_text(strip=True) if desc_elem else ""
 984 |         except Exception:
 985 |             desc = ""
 986 | 
 987 |         # Build the profile dictionary.
 988 |         profile = {
 989 |             "username": username,
 990 |             "following": following,
 991 |             "verified_followers": verified_followers,
 992 |             "location": location,
 993 |             "website": website,
 994 |             "join_date": join_date,
 995 |             "description": desc
 996 |         }
 997 |         all_infos[handle] = profile
 998 |         return all_infos
 999 | 
1000 |     async def process_handles_chunk(self, handles_chunk):
1001 |         tab = await self.driver.get("https://x.com", new_tab=True)
1002 |         all_infos = {}
1003 |         profiles_queue = asyncio.Queue()
1004 |         consumer_task = asyncio.create_task(self.consume_profile(profiles_queue, all_infos))
1005 |         for handle in handles_chunk:
1006 |             try:
1007 |                 await tab.activate()
1008 |                 await tab.get(f"https://x.com/{handle}")
1009 |                 await tab.sleep(4)
1010 |                 await tab.activate()
1011 |                 html_el = await tab.get_content()
1012 |                 # Put the HTML in the queue for processing by the consumer
1013 |                 await profiles_queue.put((handle, html_el))
1014 |             except Exception as e:
1015 |                 pass
1016 | 
1017 |         await profiles_queue.join()
1018 |         consumer_task.cancel()
1019 |         try:
1020 |             await consumer_task
1021 |         except asyncio.CancelledError:
1022 |             pass
1023 | 
1024 |         await tab.close()
1025 |         return all_infos
1026 | 
1027 |     def get_user_information(self, **profiles_kwargs):
1028 |         return asyncio.run(self.aget_user_information(**profiles_kwargs))
1029 | 
1030 |     async def aget_user_information(self, handles, login=False):
1031 |         if not self.driver:
1032 |             await self.init_nodriver()
1033 | 
1034 |         if login:
1035 |             _, logged_in, reason, _= await self.login()
1036 |             if not logged_in:
1037 |                 return {}
1038 |         chunk_size = math.ceil(len(handles) / self.concurrency)
1039 |         tasks = []
1040 | 
1041 |         for i in range(0, len(handles), chunk_size):
1042 |             chunk = handles[i: i + chunk_size]
1043 |             tasks.append(asyncio.create_task(self.process_handles_chunk(chunk)))
1044 | 
1045 |         results_list = await asyncio.gather(*tasks)
1046 | 
1047 |         consolidated_results = {}
1048 |         for result in results_list:
1049 |             consolidated_results.update(result)
1050 | 
1051 |         # Cancel any lingering tasks before shutting down.
1052 |         pending_tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
1053 |         for task in pending_tasks:
1054 |             task.cancel()
1055 |         await asyncio.gather(*pending_tasks, return_exceptions=True)
1056 | 
1057 |         await self.close()
1058 | 
1059 |         return consolidated_results
1060 | 
1061 |     async def close(self):
1062 |         if self.display:
1063 |             logging.info("Stopping virtual display")
1064 |             self.display.stop()
1065 |         if self.driver:
1066 |             self.driver.stop()
1067 |             self.driver = None
1068 | 
1069 |     async def __aenter__(self):
1070 |         await self.init_nodriver()
1071 | 
1072 |     async def __aexit__(self, exc_type, exc, tb):
1073 |         await self.close()
1074 | 
1075 | 
1076 | if __name__ == '__main__':
1077 |     parser = argparse.ArgumentParser(description='Scrape tweets.')
1078 | 
1079 |     parser.add_argument('--words', type=str,
1080 |                         help='Queries. they should be devided by "//" : Cat//Dog.', default=None)
1081 |     parser.add_argument('--from_account', type=str,
1082 |                         help='Tweets from this account (example : @Tesla).', default=None)
1083 |     parser.add_argument('--to_account', type=str,
1084 |                         help='Tweets replyed to this account (example : @Tesla).', default=None)
1085 |     parser.add_argument('--mention_account', type=str,
1086 |                         help='Tweets mention a account (example : @Tesla).', default=None)
1087 |     parser.add_argument('--hashtag', type=str,
1088 |                         help='Hashtag', default=None)
1089 |     parser.add_argument('--until', type=str,
1090 |                         help='Max date for search query. example : %%Y-%%m-%%d.', required=True)
1091 |     parser.add_argument('--since', type=str,
1092 |                         help='Start date for search query. example : %%Y-%%m-%%d.', required=True)
1093 |     parser.add_argument('--n_splits', type=int,
1094 |                         help='Number of splits which will be performed on the since/until interval.',
1095 |                         default=5)
1096 |     parser.add_argument('--lang', type=str,
1097 |                         help='Tweets language. example : "en" for english and "fr" for french.', default=None)
1098 |     parser.add_argument('--headless', type=bool,
1099 |                         help='Headless webdrives or not. True or False', default=False)
1100 |     parser.add_argument('--limit', type=int,
1101 |                         help='Limit tweets per <interval>', default=float("inf"))
1102 |     parser.add_argument('--display_type', type=str,
1103 |                         help='Display type of twitter page : Latest or Top', default="Top")
1104 |     parser.add_argument('--resume', type=bool,
1105 |                         help='Resume the last scraping. specify the csv file path.', default=False)
1106 |     parser.add_argument('--proxy', type=str,
1107 |                         help='Proxy server', default=None)
1108 |     parser.add_argument('--proximity', type=bool,
1109 |                         help='Proximity', default=False)
1110 |     parser.add_argument('--geocode', type=str,
1111 |                         help='Geographical location coordinates to center the search, radius. No compatible with proximity',
1112 |                         default=None)
1113 |     parser.add_argument('--minreplies', type=int,
1114 |                         help='Min. number of replies to the tweet', default=None)
1115 |     parser.add_argument('--minlikes', type=int,
1116 |                         help='Min. number of likes to the tweet', default=None)
1117 |     parser.add_argument('--minretweets', type=int,
1118 |                         help='Min. number of retweets to the tweet', default=None)
1119 |     parser.add_argument('--cookies_path', type=str,
1120 |                         help='Cookies path for login', default=None)
1121 |     parser.add_argument('--user_agent', type=str,
1122 |                         help='User agent', default=None)
1123 |     parser.add_argument('--disable_images', type=bool,
1124 |                         help='Display images while crawling', default=False)
1125 |     parser.add_argument('--env_path', type=str,
1126 |                         help='.env file holding account credentials', default=".env")
1127 |     parser.add_argument('--concurrency', type=int,
1128 |                         help='Number of concurrent crawling in the same n_split', default=5)
1129 | 
1130 |     args = parser.parse_args()
1131 | 
1132 |     words = args.words
1133 |     until = args.until
1134 |     since = args.since
1135 |     n_splits = args.interval
1136 |     lang = args.lang
1137 |     headless = args.headless
1138 |     limit = args.limit
1139 |     display_type = args.display_type
1140 |     from_account = args.from_account
1141 |     to_account = args.to_account
1142 |     mention_account = args.mention_account
1143 |     hashtag = args.hashtag
1144 |     resume = args.resume
1145 |     proxy = args.proxy
1146 |     proximity = args.proximity
1147 |     geocode = args.geocode
1148 |     minreplies = args.minreplies
1149 |     minlikes = args.minlikes
1150 |     minretweets = args.minlikes
1151 |     cookies_path = args.cookies_path
1152 |     user_agent = args.user_agent
1153 |     disable_images = args.disable_images
1154 |     env_path = args.env_path
1155 |     concurrency = args.concurrency
1156 | 
1157 | 
1158 | 
1159 |     scweet = Scweet(None, None, cookies_path, user_agent, disable_images, env_path, n_splits, concurrency, headless)
1160 | 
1161 |     scweet.scrape(since=since, until=until, words=words, to_account=to_account, from_account=from_account,
1162 |                      mention_account=mention_account,
1163 |                      hashtag=hashtag, lang=lang, limit=limit,
1164 |                      display_type=display_type, resume=resume, filter_replies=False, proximity=proximity,
1165 |                      geocode=geocode, minreplies=minreplies, minlikes=minlikes, minretweets=minretweets)
1166 | 


--------------------------------------------------------------------------------
/Scweet/scweet_v1.8.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import os
  3 | import datetime
  4 | import argparse
  5 | from time import sleep
  6 | import random
  7 | import pandas as pd
  8 | 
  9 | from .utils import init_driver, get_last_date_from_csv, log_search_page, keep_scroling, dowload_images
 10 | 
 11 | 
 12 | 
 13 | def scrape(since, until=None, words=None, to_account=None, from_account=None, mention_account=None, interval=5, lang=None,
 14 |           headless=True, limit=float("inf"), display_type="Top", resume=False, proxy=None, hashtag=None, 
 15 |           show_images=False, save_images=False, save_dir="outputs", filter_replies=False, proximity=False, 
 16 |           geocode=None, minreplies=None, minlikes=None, minretweets=None):
 17 |     """
 18 |     scrape data from twitter using requests, starting from <since> until <until>. The program make a search between each <since> and <until_local>
 19 |     until it reaches the <until> date if it's given, else it stops at the actual date.
 20 | 
 21 |     return:
 22 |     data : df containing all tweets scraped with the associated features.
 23 |     save a csv file containing all tweets scraped with the associated features.
 24 |     """
 25 | 
 26 |     # ------------------------- Variables : 
 27 |     # header of csv
 28 |     header = ['UserScreenName', 'UserName', 'Timestamp', 'Text', 'Embedded_text', 'Emojis', 'Comments', 'Likes', 'Retweets',
 29 |                   'Image link', 'Tweet URL']
 30 |     # list that contains all data 
 31 |     data = []
 32 |     # unique tweet ids
 33 |     tweet_ids = set()
 34 |     # write mode 
 35 |     write_mode = 'w'
 36 |     # start scraping from <since> until <until>
 37 |     # add the <interval> to <since> to get <until_local> for the first refresh
 38 |     until_local = datetime.datetime.strptime(since, '%Y-%m-%d') + datetime.timedelta(days=interval)
 39 |     # if <until>=None, set it to the actual date
 40 |     if until is None:
 41 |         until = datetime.date.today().strftime("%Y-%m-%d")
 42 |     # set refresh at 0. we refresh the page for each <interval> of time.
 43 |     refresh = 0
 44 | 
 45 |     # ------------------------- settings :
 46 |     # file path
 47 |     if words:
 48 |         if type(words) == str : 
 49 |             words = words.split("//")
 50 |         path = save_dir + "/" + '_'.join(words) + '_' + str(since).split(' ')[0] + '_' + \
 51 |                str(until).split(' ')[0] + '.csv'
 52 |     elif from_account:
 53 |         path = save_dir + "/" + from_account + '_' + str(since).split(' ')[0] + '_' + str(until).split(' ')[
 54 |             0] + '.csv'
 55 |     elif to_account:
 56 |         path = save_dir + "/" + to_account + '_' + str(since).split(' ')[0] + '_' + str(until).split(' ')[
 57 |             0] + '.csv'
 58 |     elif mention_account:
 59 |         path = save_dir + "/" + mention_account + '_' + str(init_date).split(' ')[0] + '_' + str(max_date).split(' ')[
 60 |             0] + '.csv'
 61 |     elif hashtag:
 62 |         path = save_dir + "/" + hashtag + '_' + str(since).split(' ')[0] + '_' + str(until).split(' ')[
 63 |             0] + '.csv'
 64 |     # create the <save_dir>
 65 |     if not os.path.exists(save_dir):
 66 |         os.makedirs(save_dir)
 67 |     # show images during scraping (for saving purpose)
 68 |     if save_images == True:
 69 |         show_images = True
 70 |     # initiate the driver
 71 |     driver = init_driver(headless, proxy, show_images)
 72 |     # resume scraping from previous work
 73 |     if resume:
 74 |         since = str(get_last_date_from_csv(path))[:10]
 75 |         write_mode = 'a'
 76 | 
 77 |     #------------------------- start scraping : keep searching until until
 78 |     # open the file
 79 |     with open(path, write_mode, newline='', encoding='utf-8') as f:
 80 |         writer = csv.writer(f)
 81 |         if write_mode == 'w':
 82 |             # write the csv header
 83 |             writer.writerow(header)
 84 |         # log search page for a specific <interval> of time and keep scrolling unltil scrolling stops or reach the <until>
 85 |         while until_local <= datetime.datetime.strptime(until, '%Y-%m-%d'):
 86 |             # number of scrolls
 87 |             scroll = 0
 88 |             # convert <since> and <until_local> to str
 89 |             if type(since) != str :
 90 |                 since = datetime.datetime.strftime(since, '%Y-%m-%d')
 91 |             if type(until_local) != str :
 92 |                 until_local = datetime.datetime.strftime(until_local, '%Y-%m-%d')
 93 |             # log search page between <since> and <until_local>
 94 |             path = log_search_page(driver=driver, words=words, since=since,
 95 |                             until_local=until_local, to_account=to_account,
 96 |                             from_account=from_account, mention_account=mention_account, hashtag=hashtag, lang=lang, 
 97 |                             display_type=display_type, filter_replies=filter_replies, proximity=proximity,
 98 |                             geocode=geocode, minreplies=minreplies, minlikes=minlikes, minretweets=minretweets)
 99 |             # number of logged pages (refresh each <interval>)
100 |             refresh += 1
101 |             # number of days crossed
102 |             #days_passed = refresh * interval
103 |             # last position of the page : the purpose for this is to know if we reached the end of the page or not so
104 |             # that we refresh for another <since> and <until_local>
105 |             last_position = driver.execute_script("return window.pageYOffset;")
106 |             # should we keep scrolling ?
107 |             scrolling = True
108 |             print("looking for tweets between " + str(since) + " and " + str(until_local) + " ...")
109 |             print(" path : {}".format(path))
110 |             # number of tweets parsed
111 |             tweet_parsed = 0
112 |             # sleep 
113 |             sleep(random.uniform(0.5, 1.5))
114 |             # start scrolling and get tweets
115 |             driver, data, writer, tweet_ids, scrolling, tweet_parsed, scroll, last_position = \
116 |                 keep_scroling(driver, data, writer, tweet_ids, scrolling, tweet_parsed, limit, scroll, last_position)
117 | 
118 |             # keep updating <start date> and <end date> for every search
119 |             if type(since) == str:
120 |                 since = datetime.datetime.strptime(since, '%Y-%m-%d') + datetime.timedelta(days=interval)
121 |             else:
122 |                 since = since + datetime.timedelta(days=interval)
123 |             if type(since) != str:
124 |                 until_local = datetime.datetime.strptime(until_local, '%Y-%m-%d') + datetime.timedelta(days=interval)
125 |             else:
126 |                 until_local = until_local + datetime.timedelta(days=interval)
127 | 
128 |     data = pd.DataFrame(data, columns = ['UserScreenName', 'UserName', 'Timestamp', 'Text', 'Embedded_text', 'Emojis', 
129 |                               'Comments', 'Likes', 'Retweets','Image link', 'Tweet URL'])
130 | 
131 |     # save images
132 |     if save_images==True:
133 |         print("Saving images ...")
134 |         save_images_dir = "images"
135 |         if not os.path.exists(save_images_dir):
136 |             os.makedirs(save_images_dir)
137 | 
138 |         dowload_images(data["Image link"], save_images_dir)
139 | 
140 |     # close the web driver
141 |     driver.close()
142 | 
143 |     return data
144 | 
145 | if __name__ == '__main__':
146 |     parser = argparse.ArgumentParser(description='Scrape tweets.')
147 | 
148 |     parser.add_argument('--words', type=str,
149 |                         help='Queries. they should be devided by "//" : Cat//Dog.', default=None)
150 |     parser.add_argument('--from_account', type=str,
151 |                         help='Tweets from this account (example : @Tesla).', default=None)
152 |     parser.add_argument('--to_account', type=str,
153 |                         help='Tweets replyed to this account (example : @Tesla).', default=None)
154 |     parser.add_argument('--mention_account', type=str,
155 |                         help='Tweets mention a account (example : @Tesla).', default=None)
156 |     parser.add_argument('--hashtag', type=str, 
157 |                         help='Hashtag', default=None) 
158 |     parser.add_argument('--until', type=str,
159 |                         help='Max date for search query. example : %%Y-%%m-%%d.', required=True)
160 |     parser.add_argument('--since', type=str,
161 |                         help='Start date for search query. example : %%Y-%%m-%%d.', required=True)
162 |     parser.add_argument('--interval', type=int,
163 |                         help='Interval days between each start date and end date for search queries. example : 5.',
164 |                         default=1)
165 |     parser.add_argument('--lang', type=str,
166 |                         help='Tweets language. example : "en" for english and "fr" for french.', default=None)
167 |     parser.add_argument('--headless', type=bool,
168 |                         help='Headless webdrives or not. True or False', default=False)
169 |     parser.add_argument('--limit', type=int,
170 |                         help='Limit tweets per <interval>', default=float("inf"))
171 |     parser.add_argument('--display_type', type=str,
172 |                         help='Display type of twitter page : Latest or Top', default="Top")
173 |     parser.add_argument('--resume', type=bool,
174 |                         help='Resume the last scraping. specify the csv file path.', default=False)
175 |     parser.add_argument('--proxy', type=str,
176 |                         help='Proxy server', default=None)
177 |     parser.add_argument('--proximity', type=bool,
178 |                         help='Proximity', default=False)                        
179 |     parser.add_argument('--geocode', type=str,
180 |                         help='Geographical location coordinates to center the search, radius. No compatible with proximity', default=None)
181 |     parser.add_argument('--minreplies', type=int,
182 |                         help='Min. number of replies to the tweet', default=None)
183 |     parser.add_argument('--minlikes', type=int,
184 |                         help='Min. number of likes to the tweet', default=None)
185 |     parser.add_argument('--minretweets', type=int,
186 |                         help='Min. number of retweets to the tweet', default=None)
187 |                             
188 | 
189 |     args = parser.parse_args()
190 | 
191 |     words = args.words
192 |     until = args.until
193 |     since = args.since
194 |     interval = args.interval
195 |     lang = args.lang
196 |     headless = args.headless
197 |     limit = args.limit
198 |     display_type = args.display_type
199 |     from_account = args.from_account
200 |     to_account = args.to_account
201 |     mention_account = args.mention_account
202 |     hashtag = args.hashtag
203 |     resume = args.resume
204 |     proxy = args.proxy
205 |     proximity = args.proximity
206 |     geocode = args.geocode
207 |     minreplies = args.minreplies
208 |     minlikes = args.minlikes
209 |     minretweets = args.minlikes
210 | 
211 |     data = scrape(since=since, until=until, words=words, to_account=to_account, from_account=from_account, mention_account=mention_account,
212 |                 hashtag=hashtag, interval=interval, lang=lang, headless=headless, limit=limit,
213 |                 display_type=display_type, resume=resume, proxy=proxy, filter_replies=False, proximity=proximity,
214 |                 geocode=geocode, minreplies=minreplies, minlikes=minlikes, minretweets=minretweets)
215 | 


--------------------------------------------------------------------------------
/Scweet/user.py:
--------------------------------------------------------------------------------
  1 | from . import utils
  2 | from time import sleep
  3 | import random
  4 | import json
  5 | 
  6 | 
  7 | def get_user_information(users, driver=None, headless=True):
  8 |     """ get user information if the "from_account" argument is specified """
  9 | 
 10 |     driver = utils.init_driver(headless=headless)
 11 | 
 12 |     users_info = {}
 13 | 
 14 |     for i, user in enumerate(users):
 15 | 
 16 |         log_user_page(user, driver)
 17 | 
 18 |         if user is not None:
 19 | 
 20 |             try:
 21 |                 following = driver.find_element_by_xpath(
 22 |                     '//a[contains(@href,"/following")]/span[1]/span[1]').text
 23 |                 followers = driver.find_element_by_xpath(
 24 |                     '//a[contains(@href,"/followers")]/span[1]/span[1]').text
 25 |             except Exception as e:
 26 |                 # print(e)
 27 |                 return
 28 | 
 29 |             try:
 30 |                 element = driver.find_element_by_xpath('//div[contains(@data-testid,"UserProfileHeader_Items")]//a[1]')
 31 |                 website = element.get_attribute("href")
 32 |             except Exception as e:
 33 |                 # print(e)
 34 |                 website = ""
 35 | 
 36 |             try:
 37 |                 desc = driver.find_element_by_xpath('//div[contains(@data-testid,"UserDescription")]').text
 38 |             except Exception as e:
 39 |                 # print(e)
 40 |                 desc = ""
 41 |             a = 0
 42 |             try:
 43 |                 join_date = driver.find_element_by_xpath(
 44 |                     '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[3]').text
 45 |                 birthday = driver.find_element_by_xpath(
 46 |                     '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[2]').text
 47 |                 location = driver.find_element_by_xpath(
 48 |                     '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text
 49 |             except Exception as e:
 50 |                 # print(e)
 51 |                 try:
 52 |                     join_date = driver.find_element_by_xpath(
 53 |                         '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[2]').text
 54 |                     span1 = driver.find_element_by_xpath(
 55 |                         '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text
 56 |                     if hasNumbers(span1):
 57 |                         birthday = span1
 58 |                         location = ""
 59 |                     else:
 60 |                         location = span1
 61 |                         birthday = ""
 62 |                 except Exception as e:
 63 |                     # print(e)
 64 |                     try:
 65 |                         join_date = driver.find_element_by_xpath(
 66 |                             '//div[contains(@data-testid,"UserProfileHeader_Items")]/span[1]').text
 67 |                         birthday = ""
 68 |                         location = ""
 69 |                     except Exception as e:
 70 |                         # print(e)
 71 |                         join_date = ""
 72 |                         birthday = ""
 73 |                         location = ""
 74 |             print("--------------- " + user + " information : ---------------")
 75 |             print("Following : ", following)
 76 |             print("Followers : ", followers)
 77 |             print("Location : ", location)
 78 |             print("Join date : ", join_date)
 79 |             print("Birth date : ", birthday)
 80 |             print("Description : ", desc)
 81 |             print("Website : ", website)
 82 |             users_info[user] = [following, followers, join_date, birthday, location, website, desc]
 83 | 
 84 |             if i == len(users) - 1:
 85 |                 driver.close()
 86 |                 return users_info
 87 |         else:
 88 |             print("You must specify the user")
 89 |             continue
 90 | 
 91 | 
 92 | def log_user_page(user, driver, headless=True):
 93 |     sleep(random.uniform(1, 2))
 94 |     driver.get('https://twitter.com/' + user)
 95 |     sleep(random.uniform(1, 2))
 96 | 
 97 | 
 98 | def get_users_followers(users, env, verbose=1, headless=True, wait=2, limit=float('inf'), file_path=None):
 99 |     followers = utils.get_users_follow(users, headless, env, "followers", verbose, wait=wait, limit=limit)
100 | 
101 |     if file_path == None:
102 |         file_path = 'outputs/' + str(users[0]) + '_' + str(users[-1]) + '_' + 'followers.json'
103 |     else:
104 |         file_path = file_path + str(users[0]) + '_' + str(users[-1]) + '_' + 'followers.json'
105 |     with open(file_path, 'w') as f:
106 |         json.dump(followers, f)
107 |         print(f"file saved in {file_path}")
108 |     return followers
109 | 
110 | 
111 | def get_users_following(users, env, verbose=1, headless=True, wait=2, limit=float('inf'), file_path=None):
112 |     following = utils.get_users_follow(users, headless, env, "following", verbose, wait=wait, limit=limit)
113 | 
114 |     if file_path == None:
115 |         file_path = 'outputs/' + str(users[0]) + '_' + str(users[-1]) + '_' + 'following.json'
116 |     else:
117 |         file_path = file_path + str(users[0]) + '_' + str(users[-1]) + '_' + 'following.json'
118 |     with open(file_path, 'w') as f:
119 |         json.dump(following, f)
120 |         print(f"file saved in {file_path}")
121 |     return following
122 | 
123 | 
124 | def hasNumbers(inputString):
125 |     return any(char.isdigit() for char in inputString)
126 | 


--------------------------------------------------------------------------------
/Scweet/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Scweet - Twitter Scraping Tool
  3 | Author: Yassine Ait Jeddi (@altimis)
  4 | License: MIT
  5 | Repository: https://github.com/Altimis/scweet
  6 | """
  7 | 
  8 | import re
  9 | import random
 10 | import string
 11 | from .mailtm import *
 12 | import urllib
 13 | 
 14 | 
 15 | async def check_element_if_exists_by_text(tab, text, timeout=10):
 16 |     try:
 17 |         await tab.find(text, timeout=timeout)
 18 |         return True
 19 |     except:
 20 |         return False
 21 | 
 22 | 
 23 | async def check_element_if_exists_by_css(tab, css, timeout=20):
 24 |     try:
 25 |         await tab.select(css, timeout=timeout)
 26 |         return True
 27 |     except:
 28 |         return False
 29 | 
 30 | 
 31 | async def get_code_from_email(email_address, email_password):
 32 |     try:
 33 |         retries = 0
 34 |         while retries < 5:
 35 |             mailclient = MailTMClient()
 36 |             resp_code, token = mailclient.login(email_address, email_password)
 37 |             if 'Invalid' in token:
 38 |                 print("couldn't login to email")
 39 |                 return "code_not_found"
 40 |             r = requests.get(
 41 |                 "https://api.mail.tm/messages",
 42 |                 headers={
 43 |                     "Authorization": "Bearer " + token,
 44 |                     "Content-Type": "application/json",
 45 |                 },
 46 |             )
 47 |             inbox = []
 48 |             for emailJson in r.json()["hydra:member"]:
 49 |                 inbox.append(Mail(emailJson, token))
 50 |             ig_messages = [mss.read() for mss in inbox] # if mss.read()['from']['name'] == "Instagram"]
 51 |             message = ig_messages[0]
 52 |             text = message['subject']
 53 |             print(text)
 54 |             match = re.search(r'Your X confirmation code is (.+)\b', text)
 55 |             if match:
 56 |                 verif_code = match.group(1)  # Access the first capturing group
 57 |                 log = f"Verification code found: {verif_code}"
 58 |                 print(log)
 59 |                 return verif_code
 60 |             else:
 61 |                 retries += 1
 62 |                 continue
 63 | 
 64 |         return "code_not_found"
 65 |     except Exception as e:
 66 |         log = f"An error occurred while fetching email: {e}"
 67 |         print(log)
 68 |         return "code_not_found"
 69 | 
 70 | 
 71 | def extract_count_from_aria_label(element):
 72 |     if not element:
 73 |         return "0"
 74 |     aria_label = element.get('aria-label', '')
 75 |     match = re.search(r'(\d+)', aria_label)
 76 |     if match:
 77 |         return match.group(1)
 78 |     return "0"
 79 | 
 80 | 
 81 | def dowload_images(urls, save_dir):
 82 |     for i, url_v in enumerate(urls):
 83 |         for j, url in enumerate(url_v):
 84 |             urllib.request.urlretrieve(url, save_dir + '/' + str(i + 1) + '_' + str(j + 1) + ".jpg")
 85 | 
 86 | 
 87 | def generate_mail_prefix():
 88 |     """
 89 |     Generate a random, human-readable email prefix.
 90 | 
 91 |     Returns:
 92 |         str: A randomly generated email prefix.
 93 |     """
 94 |     # Define components for the email prefix
 95 |     words = [
 96 |         "jola", "needs", "abass", "smart", "xaax", "looool",
 97 |         "brav", "smih", "kond", "jit", "blaso", "sota", "kaw", "jlov"
 98 |     ]
 99 |     separators = ['.', '_', '-', '']
100 |     random_word = random.choice(words)
101 |     random_number = ''.join(random.choices(string.digits, k=4))  # 4-digit number
102 |     random_letters = ''.join(random.choices(string.ascii_lowercase, k=3))  # 3 random letters
103 |     separator = random.choice(separators)
104 | 
105 |     # Combine the components
106 |     prefix = f"{random_word}{separator}{random_letters}{separator}{random_number}"
107 | 
108 |     return prefix
109 | 
110 | def generate_password(length=12):
111 |     """
112 |     Generate a strong password with the specified length.
113 | 
114 |     Parameters:
115 |         length (int): Length of the password to be generated. Default is 12.
116 | 
117 |     Returns:
118 |         str: A randomly generated password.
119 |     """
120 |     if length < 8:
121 |         raise ValueError("Password length should be at least 8 characters.")
122 | 
123 |     # Define the character pools
124 |     lower = string.ascii_lowercase
125 |     upper = string.ascii_uppercase
126 |     digits = string.digits
127 |     special = string.punctuation
128 | 
129 |     # Ensure the password includes at least one of each type of character
130 |     all_characters = lower + upper + digits + special
131 |     password = [
132 |         random.choice(lower),
133 |         random.choice(upper),
134 |         random.choice(digits),
135 |         random.choice(special),
136 |     ]
137 | 
138 |     # Fill the rest of the password length with random characters from all pools
139 |     password += random.choices(all_characters, k=length - 4)
140 | 
141 |     # Shuffle the password to ensure randomness
142 |     random.shuffle(password)
143 | 
144 |     return ''.join(password)
145 | 
146 | def create_mailtm_email(max_retries=10):
147 |     """
148 |     Synchronous method to create an email account via MailTMClient.
149 |     Keeps trying until the email is successfully created or max_retries is reached.
150 | 
151 |     Returns a tuple (email, password) if successful; otherwise, returns (None, None).
152 |     """
153 |     retries = 0
154 |     mailtm = MailTMClient()
155 | 
156 |     while retries < max_retries:
157 |         try:
158 |             print("Creating email ...")
159 |             available_domains = mailtm.getAvailableDomains()
160 |             if available_domains and len(available_domains) > 0:
161 |                 available_domain = available_domains[0].domain
162 |             else:
163 |                 retries += 1
164 |                 continue
165 | 
166 |             # Assuming generate_mail_prefix and generate_password are now synchronous methods.
167 |             email_prefix = generate_mail_prefix()
168 |             password = generate_password()
169 |             email = f"{email_prefix}@{available_domain}"
170 |             resp, key = mailtm.register(email, password)
171 |             if resp == 0:
172 |                 print(f"Email {email} created ")
173 |                 return email, password
174 |             else:
175 |                 print("Registration failed; retrying...")
176 |         except Exception as ex:
177 |             print(f"Exception in create_email: {ex}")
178 |         retries += 1
179 | 
180 |     print("Max retries reached; no email created.")
181 |     return None, None
182 | 
183 | 


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | from Scweet.scweet import Scweet
 2 | from Scweet.utils import create_mailtm_email
 3 | import asyncio
 4 | from time import sleep
 5 | 
 6 | """
 7 | proxy = {
 8 |     "host": "xxx.xxx.xxx.xxx",
 9 |     "port": "xxxxx",
10 |     "username": "xxxxxxx",
11 |     "password": "xxxxxxx"
12 | }
13 | """
14 | 
15 | 
16 | 
17 | proxy = None # add proxy settings. IF the proxy is public, you can provide empty username and password
18 | cookies = None # current library implementation depends on Nodriver cookies handling.
19 | cookies_directory = 'cookies' # directory where you want to save/load the cookies 'username_cookies.dat'
20 | user_agent = None
21 | disable_images = True # disable loading images while fetching
22 | env_path = '.env' # .env path where twitter account credentials are
23 | n_splits = -1 # set the number of splits that you want to perform on the date interval (the bigger the interval and the splits, the bigger the scraped tweets)
24 | concurrency = 5 # tweets and profiles fetching run in parallel (on multiple browser tabs at the same time). Adjust depending on ressources.
25 | headless = False
26 | scroll_ratio = 100 # scrolling ratio while fetching tweets. adjust between 30 to 200 to optimize tweets fetching.
27 | login = True # this is used for get_user_information method. X asks for login sometimes to display user profile.
28 | # You are always required to login for tweets fetching.
29 | # We recommend signing up for twiiter using MailTM or other email providers and setup the code_callback method which will be used internally to get the code from email if requested.
30 | # Scweet already have an internal method that handles the case of MailTM emails.
31 | # Use the function create_mailtm_email it you want to create an email and use it for twitter signup.
32 | 
33 | # Use case :
34 | 
35 | # create email :
36 | # email_address, email_password = create_mailtm_email()
37 | # print(email_address, email_password)
38 | 
39 | # init the scweet class
40 | scweet = Scweet(proxy, cookies, cookies_directory, user_agent, disable_images, env_path,
41 |                 n_splits=n_splits, concurrency=concurrency, headless=headless, scroll_ratio=scroll_ratio)
42 | 
43 | # get followers, following, verified_followers (login required)
44 | # fetching followers and following is limited on the browser. Be cautious as accounts are more susceptible to get suspended this way.
45 | handle = "x_born_to_die_x"
46 | 
47 | following = scweet.get_followers(handle=handle, login=True, stay_logged_in=True, sleep=1)
48 | # scweet.get_following
49 | # scweet.get_verified_followers
50 | print(following)
51 | 
52 | # get users profile data using handles (usernames)
53 | handles = ['Nabila_Gl', 'geceeekusuu', 'pabu232', 'av_ahmet', 'x_born_to_die_x']
54 | infos = scweet.get_user_information(handles=handles, login=True)
55 | print(infos)
56 | 
57 | # fetch tweets based on words (you can do the same for hashtags)
58 | all_results = scweet.scrape(since="2022-10-01", until="2022-10-06", words=['bitcoin', 'ethereum'], to_account=None, from_account=None,
59 |                               lang="en", limit=20,
60 |                               display_type="Top", resume=False, filter_replies=False, proximity=False,
61 |                               geocode=None, minreplies=10, minlikes=10, minretweets=10, save_dir='outputs',
62 |                             custom_csv_name='bitcoin_ethereum.csv')
63 | 
64 | print(len(all_results))
65 | all_results = scweet.scrape(since="2022-10-01", until="2022-10-06", words=None, to_account=None, from_account="elonmusk",
66 |                               lang="en", limit=20,
67 |                               display_type="Top", resume=False, filter_replies=False, proximity=False,
68 |                               geocode=None, minreplies=10, minlikes=10, minretweets=10, save_dir='outputs',
69 |                             custom_csv_name='elonmusk.csv')
70 | 
71 | print(len(all_results))


--------------------------------------------------------------------------------
/images/1_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/1_1.jpg


--------------------------------------------------------------------------------
/images/2_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/2_1.jpg


--------------------------------------------------------------------------------
/images/3_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/3_1.jpg


--------------------------------------------------------------------------------
/images/4_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/4_1.jpg


--------------------------------------------------------------------------------
/images/5_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/5_1.jpg


--------------------------------------------------------------------------------
/images/6_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/6_1.jpg


--------------------------------------------------------------------------------
/images/7_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/7_1.jpg


--------------------------------------------------------------------------------
/images/8_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Altimis/Scweet/74e23e61e4a73aa71cdf76c1ea9981f83802ddb1/images/8_1.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi
2 | python-dotenv
3 | urllib3
4 | PyVirtualDisplay
5 | requests
6 | beautifulsoup4==4.12.3
7 | nodriver==0.38.post1
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description_file = README.md


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | from distutils.core import setup
 3 | import setuptools
 4 | import io
 5 | import os
 6 | 
 7 | VERSION = None
 8 | 
 9 | here = os.path.abspath(os.path.dirname(__file__))
10 | 
11 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
12 |     long_description = '\n' + f.read()
13 | 
14 | about = {}
15 | if not VERSION:
16 |     with open(os.path.join(here, 'Scweet', '__version__.py')) as f:
17 |         exec(f.read(), about)
18 | else:
19 |     about['__version__'] = VERSION
20 | 
21 | setup(
22 |   name='Scweet',
23 |   packages=['Scweet'],
24 |   version=about['__version__'],
25 |   license='MIT',
26 |   description='Tool for scraping Tweets',
27 |   long_description=long_description,
28 |   long_description_content_type="text/markdown",
29 |   author='Yassine AIT JEDDI',
30 |   author_email='aitjeddiyassine@gmail.com',
31 |   url='https://github.com/Altimis/Scweet',
32 |   download_url='https://github.com/Altimis/Scweet/archive/v3.0.tar.gz',
33 |   keywords=['twitter', 'scraper', 'python', "crawl", "following", "followers", "twitter-scraper", "tweets"],
34 |   install_requires=[
35 |       'certifi',
36 |       'python-dotenv',
37 |       'urllib3',
38 |       'PyVirtualDisplay',
39 |       'beautifulsoup4==4.12.3',
40 |       'nodriver==0.38.post1',
41 |       'requests'
42 |   ],
43 |   classifiers=[
44 |     'Development Status :: 4 - Beta',
45 |     'Intended Audience :: Developers',
46 |     'Topic :: Software Development :: Build Tools',
47 |     'License :: OSI Approved :: MIT License',
48 |     'Programming Language :: Python :: 3.7',
49 |     'Programming Language :: Python :: 3.8',
50 |     'Programming Language :: Python :: 3.9',
51 |     'Programming Language :: Python :: 3.10',
52 |     'Programming Language :: Python :: 3.11',
53 |   ],
54 | )
55 | 


--------------------------------------------------------------------------------