├── .gitignore ├── LICENSE ├── assets ├── account.gif ├── scrape.gif ├── search.gif ├── spaces-audio.gif ├── spaces-transcript-01.gif └── spaces-transcript-02.gif ├── examples ├── example.ipynb └── simple_example.py ├── readme.md ├── scripts ├── clean.sh └── update.py ├── setup.py ├── setup.sh └── twitter ├── __init__.py ├── __version__.py ├── account.py ├── constants.py ├── login.py ├── scraper.py ├── search.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Trevor Hobenshield 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/account.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/account.gif -------------------------------------------------------------------------------- /assets/scrape.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/scrape.gif -------------------------------------------------------------------------------- /assets/search.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/search.gif -------------------------------------------------------------------------------- /assets/spaces-audio.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/spaces-audio.gif -------------------------------------------------------------------------------- /assets/spaces-transcript-01.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/spaces-transcript-01.gif -------------------------------------------------------------------------------- /assets/spaces-transcript-02.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/spaces-transcript-02.gif -------------------------------------------------------------------------------- /examples/simple_example.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from twitter.util import find_key 3 | from twitter.scraper import Scraper 4 | 5 | 6 | def parse_tweets(data: list | dict) -> pd.DataFrame: 7 | """ 8 | Parse small subset of relevant features into a DataFrame. 9 | 10 | Note: structure of GraphQL response is not consistent, this example may not work in all cases. 11 | 12 | @param data: tweets (raw GraphQL response data) 13 | @return: DataFrame of tweets 14 | """ 15 | df = ( 16 | pd.json_normalize(( 17 | x.get('result', {}).get('tweet', {}).get('legacy') for x in find_key(data, 'tweet_results')), 18 | max_level=1 19 | ) 20 | .assign(created_at=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y")) 21 | .sort_values('created_at', ascending=False) 22 | .reset_index(drop=True) 23 | ) 24 | numeric = [ 25 | 'user_id_str', 26 | 'id_str', 27 | 'favorite_count', 28 | 'quote_count', 29 | 'reply_count', 30 | 'retweet_count', 31 | ] 32 | df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce') 33 | df = df[[ 34 | 'id_str', 35 | 'user_id_str', 36 | 'created_at', 37 | 'full_text', 38 | 'favorite_count', 39 | 'quote_count', 40 | 'reply_count', 41 | 'retweet_count', 42 | 'lang', 43 | ]] 44 | return df 45 | 46 | 47 | if __name__ == '__main__': 48 | ## sign-in with credentials 49 | email, username, password = ..., ..., ... 50 | scraper = Scraper(email, username, password) 51 | 52 | ## or, resume session using cookies 53 | # scraper = Scraper(cookies={"ct0": ..., "auth_token": ...}) 54 | 55 | tweets = scraper.tweets([ 56 | ..., # tweet ids 57 | ]) 58 | 59 | df = parse_tweets(tweets) 60 | 61 | df.to_csv('tweets.csv') 62 | # df.to_parquet('tweets.parquet', engine='pyarrow') 63 | -------------------------------------------------------------------------------- /scripts/clean.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | if [ -d '../dist' ] ; then 4 | rm -r ../dist 5 | fi 6 | if [ -d '../build' ] ; then 7 | rm -r ../build 8 | fi 9 | if [ -d '../twitter_api_client.egg-info' ] ; then 10 | rm -r ../twitter_api_client.egg-info 11 | fi -------------------------------------------------------------------------------- /scripts/update.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging.config 3 | import platform 4 | import random 5 | import re 6 | import subprocess 7 | from asyncio import Semaphore 8 | from functools import partial 9 | from logging import getLogger, Logger 10 | from pathlib import Path 11 | from typing import Generator 12 | 13 | import aiofiles 14 | import chompjs 15 | import orjson 16 | from httpx import AsyncClient, Response, Limits, Client 17 | from selectolax.lexbor import LexborHTMLParser 18 | from tqdm.asyncio import tqdm_asyncio 19 | 20 | try: 21 | get_ipython() 22 | import nest_asyncio 23 | 24 | nest_asyncio.apply() 25 | except: 26 | ... 27 | 28 | if platform.system() != 'Windows': 29 | try: 30 | import uvloop 31 | 32 | uvloop.install() 33 | except: 34 | ... 35 | 36 | dump_json = partial(orjson.dumps, option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS) 37 | 38 | 39 | def mkdir(path: str | Path) -> Path: 40 | p = Path(path) 41 | p.mkdir(exist_ok=True, parents=True) 42 | return p 43 | 44 | 45 | logging.config.dictConfig({ 46 | 'version': 1, 47 | 'disable_existing_loggers': False, 48 | 'formatters': { 49 | 'standard': { 50 | 'format': '%(asctime)s.%(msecs)03d [%(levelname)s] :: %(message)s', 51 | 'datefmt': '%Y-%m-%d %H:%M:%S' 52 | } 53 | }, 54 | 'handlers': { 55 | 'file': { 56 | 'class': 'logging.FileHandler', 57 | 'level': 'DEBUG', 58 | 'formatter': 'standard', 59 | 'filename': 'log.log', 60 | 'mode': 'a' 61 | }, 62 | 'console_warning': { 63 | 'class': 'logging.StreamHandler', 64 | 'level': 'WARNING', 65 | 'formatter': 'standard' 66 | }, 67 | 'console_info': { 68 | 'class': 'logging.StreamHandler', 69 | 'level': 'INFO', 70 | 'formatter': 'standard', 71 | 'filters': ['info_only'] 72 | } 73 | }, 74 | 'filters': { 75 | 'info_only': { 76 | '()': lambda: lambda record: record.levelno == logging.INFO 77 | } 78 | }, 79 | 'loggers': { 80 | 'my_logger': { 81 | 'handlers': ['file', 'console_warning', 'console_info'], 82 | 'level': 'DEBUG' 83 | } 84 | } 85 | }) 86 | logger = getLogger(list(Logger.manager.loggerDict)[-1]) 87 | 88 | PATH_DATA = mkdir('data') 89 | 90 | PATH_HOMEPAGE = PATH_DATA / 'x.html' 91 | PATH_INITIAL_STATE = PATH_DATA / 'initial_state.json' 92 | PATH_FEATURES = PATH_DATA / 'features.json' 93 | PATH_LIMITS = PATH_DATA / 'limits.json' 94 | PATH_OPS = PATH_DATA / 'ops.json' 95 | PATH_MAIN = PATH_DATA / 'main.js' 96 | PATH_URLS = PATH_DATA / 'csp.txt' 97 | STRINGS = PATH_DATA / 'strings.txt' 98 | PATHS = PATH_DATA / 'paths.txt' 99 | JS_FILES_MAP = PATH_DATA / 'js.json' 100 | JS_FILES = mkdir(PATH_DATA / 'js') 101 | OPERATIONS = PATH_DATA / 'operations' 102 | 103 | USER_AGENTS = [ 104 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.1.1', 105 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.3', 106 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.1', 107 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.', 108 | ] 109 | 110 | _a = 'a.js' 111 | _base = 'https://abs.twimg.com/responsive-web/client-web' 112 | 113 | 114 | async def backoff(fn: callable, sem: Semaphore, *args, m: int = 20, b: int = 2, max_retries: int = 8, **kwargs) -> any: 115 | ignore_status_codes = kwargs.pop('ignore_status_codes', []) 116 | for i in range(max_retries + 1): 117 | try: 118 | async with sem: 119 | r = await fn(*args, **kwargs) 120 | if r.status_code in ignore_status_codes: 121 | return r 122 | r.raise_for_status() 123 | return r 124 | except Exception as e: 125 | if i == max_retries: 126 | logger.warning(f'Max retries exceeded\n{e}') 127 | return 128 | t = min(random.random() * (b ** i), m) 129 | logger.info(f'Retrying in {f"{t:.2f}"} seconds\n{e}') 130 | await asyncio.sleep(t) 131 | 132 | 133 | def download(urls: list[str], out: str = 'tmp', sz: int = None, fname_fn: partial = None, **kwargs) -> Generator: 134 | async def get(client: AsyncClient, sem: Semaphore, url: str): 135 | fname = url.split('/')[-1] if not fname_fn else fname_fn(url) 136 | async with aiofiles.open(f'{_out}/{fname}', 'wb') as fp: 137 | r = await backoff(client.get, sem, url, **kwargs) 138 | async for chunk in r.aiter_bytes(sz): 139 | await fp.write(chunk) 140 | return r 141 | 142 | _out = mkdir(out) 143 | return (partial(get, url=u) for u in urls) 144 | 145 | 146 | def send(cfgs: list[dict], **kwargs) -> Generator: 147 | async def f(client: AsyncClient, sem: Semaphore, cfg: dict) -> Response: 148 | return await backoff(client.request, sem, **cfg, **kwargs) 149 | 150 | return (partial(f, cfg=cfg) for cfg in cfgs) 151 | 152 | 153 | async def process(fns: Generator, max_connections: int = 2000, **kwargs): 154 | client_defaults = { 155 | 'cookies': kwargs.pop('cookies', None), 156 | 'headers': {'user-agent': random.choice(USER_AGENTS)} | kwargs.pop('headers', {}), 157 | 'timeout': kwargs.pop('timeout', 30.0), 158 | 'verify': kwargs.pop('verify', False), 159 | 'http2': kwargs.pop('http2', True), 160 | 'follow_redirects': kwargs.pop('follow_redirects', True), 161 | 'limits': kwargs.pop('limits', Limits( 162 | max_connections=max_connections, 163 | max_keepalive_connections=None, 164 | keepalive_expiry=5.0, 165 | )) 166 | } 167 | # tqdm 168 | desc = kwargs.pop('desc', None) 169 | sem = Semaphore(max_connections) 170 | async with AsyncClient(**client_defaults, **kwargs) as client: 171 | tasks = (fn(client=client, sem=sem) for fn in fns) 172 | if desc: 173 | return await tqdm_asyncio.gather(*tasks, desc=desc) 174 | return await asyncio.gather(*tasks) 175 | 176 | 177 | def _get_endpoints(res: Response, out: Path = JS_FILES_MAP) -> dict: 178 | temp = re.findall('\+"\."\+(\{.*\})\[e\]\+?' + '"' + _a + '"', res.text)[0] 179 | endpoints = orjson.loads(temp.replace('vendor:', '"vendor":').replace('api:', '"api":')) 180 | if out: 181 | out.write_bytes(dump_json(endpoints)) 182 | return endpoints 183 | 184 | 185 | def get_js_files(r: Response, out: Path = JS_FILES) -> None: 186 | endpoints = _get_endpoints(r) 187 | csp = sorted({x.strip(';') for x in r.headers.get("content-security-policy").split() if x.startswith("https://")}) 188 | PATH_URLS.write_text('\n'.join(csp)) 189 | urls = [ 190 | f'{_base}/{k}.{v}{_a}' 191 | for k, v in endpoints.items() 192 | if not re.search(r'participantreaction|\.countries-|emojipicker|i18n|icons\/', k, flags=re.I) 193 | ] 194 | asyncio.run(process(download(urls, out=out), desc='Downloading JS files')) 195 | 196 | 197 | def parse_matches(matches: list[tuple]) -> dict: 198 | d = {} 199 | for m in matches: 200 | d[m[1]] = { 201 | "queryId": m[0], 202 | "operationName": m[1], 203 | "operationType": m[2], 204 | "featureSwitches": sorted(re.sub(r'[\s"\']', '', x) for x in (m[3].split(',') if m[3] else [])), 205 | "fieldToggles": sorted(re.sub(r'[\s"\']', '', x) for x in (m[4].split(',') if m[4] else [])) 206 | } 207 | return d 208 | 209 | 210 | def main(): 211 | client = Client(headers={'user-agent': random.choice(USER_AGENTS)}, follow_redirects=True, http2=True) 212 | r1 = client.get('https://x.com') 213 | PATH_HOMEPAGE.write_text(r1.text) 214 | 215 | try: 216 | get_js_files(r1) 217 | except Exception as e: 218 | logger.warning(f'Failed to get js files\t\t{e}') 219 | 220 | main_js = re.findall(r'href="(https\:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.\w+\.js)"', r1.text)[0] 221 | r2 = client.get(main_js) 222 | PATH_MAIN.write_text(r2.text) 223 | 224 | expr = r'\{[^{}]*queryId:\s?"([^"]+)",\s*operationName:\s?"([^"]+)",\s*operationType:\s?"([^"]+)",\s*metadata:\s?\{\s*featureSwitches:\s?\[(.*?)\],\s*fieldToggles:\s?\[(.*?)\]\s*\}\s*\}' 225 | 226 | matches = re.findall(expr, r2.text, flags=re.A) 227 | ops = parse_matches(matches) 228 | 229 | # search all js files for more GraphQL operation definitions 230 | for p in JS_FILES.iterdir(): 231 | matches = re.findall(expr, p.read_text(), flags=re.A) 232 | ops |= parse_matches(matches) 233 | 234 | PATH_OPS.write_bytes(dump_json(ops)) 235 | html = LexborHTMLParser(PATH_HOMEPAGE.read_text()) 236 | k = 'window.__INITIAL_STATE__=' 237 | PATH_INITIAL_STATE.write_bytes(dump_json(chompjs.parse_js_object([x for x in html.css('script') if k in x.text()][0].text().replace(k, '').strip(';')))) 238 | 239 | data = orjson.loads(PATH_INITIAL_STATE.read_bytes()) 240 | config = data['featureSwitch']['defaultConfig'] | data['featureSwitch']['user']['config'] 241 | features = {k: v.get('value') for k, v in config.items() if isinstance(v.get('value'), bool)} 242 | numeric = {k: v.get('value') for k, v in config.items() if isinstance(v.get('value'), int) and not isinstance(v.get('value'), bool)} 243 | PATH_FEATURES.write_bytes(dump_json(features)) 244 | PATH_LIMITS.write_bytes(dump_json(numeric)) 245 | 246 | 247 | if __name__ == '__main__': 248 | main() 249 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | from setuptools import find_packages, setup 3 | from pathlib import Path 4 | 5 | install_requires = [ 6 | 'aiofiles', 7 | 'nest_asyncio', 8 | 'httpx', 9 | 'tqdm', 10 | 'orjson', 11 | 'm3u8', 12 | 'websockets', 13 | 'uvloop; platform_system != "Windows"', 14 | ] 15 | 16 | about = {} 17 | exec((Path().cwd() / 'twitter' / '__version__.py').read_text(), about) 18 | 19 | setup( 20 | name=about['__title__'], 21 | version=about['__version__'], 22 | author=about['__author__'], 23 | description=about['__description__'], 24 | license=about['__license__'], 25 | long_description=dedent(''' 26 | 27 | ## Implementation of X/Twitter v1, v2, and GraphQL APIs. 28 | 29 | 30 | ## Table of Contents 31 | 32 | * [Installation](#installation) 33 | * [Automation](#automation) 34 | * [Scraping](#scraping) 35 | * [Get all user/tweet data](#get-all-usertweet-data) 36 | * [Resume Pagination](#resume-pagination) 37 | * [Search](#search) 38 | * [Spaces](#spaces) 39 | * [Live Audio Capture](#live-audio-capture) 40 | * [Live Transcript Capture](#live-transcript-capture) 41 | * [Search and Metadata](#search-and-metadata) 42 | * [Automated Solvers](#automated-solvers) 43 | * [Example API Responses](#example-api-responses) 44 | 45 | ### Installation 46 | 47 | ```bash 48 | pip install twitter-api-client 49 | ``` 50 | 51 | ### Automation 52 | 53 | ```python 54 | from twitter.account import Account 55 | 56 | ## sign-in with credentials 57 | email, username, password = ..., ..., ... 58 | account = Account(email, username, password) 59 | 60 | ## or, resume session using cookies 61 | # account = Account(cookies={"ct0": ..., "auth_token": ...}) 62 | 63 | ## or, resume session using cookies (JSON file) 64 | # account = Account(cookies='twitter.cookies') 65 | 66 | 67 | account.tweet('test 123') 68 | account.untweet(123456) 69 | account.retweet(123456) 70 | account.unretweet(123456) 71 | account.reply('foo', tweet_id=123456) 72 | account.quote('bar', tweet_id=123456) 73 | account.schedule_tweet('schedule foo', 1681851240) 74 | account.unschedule_tweet(123456) 75 | 76 | account.tweet('hello world', media=[ 77 | {'media': 'test.jpg', 'alt': 'some alt text', 'tagged_users': [123]}, 78 | {'media': 'test.jpeg', 'alt': 'some alt text', 'tagged_users': [123]}, 79 | {'media': 'test.png', 'alt': 'some alt text', 'tagged_users': [123]}, 80 | {'media': 'test.jfif', 'alt': 'some alt text', 'tagged_users': [123]}, 81 | ]) 82 | 83 | account.schedule_tweet('foo bar', '2023-04-18 15:42', media=[ 84 | {'media': 'test.gif', 'alt': 'some alt text'}, 85 | ]) 86 | 87 | account.schedule_reply('hello world', '2023-04-19 15:42', tweet_id=123456, media=[ 88 | {'media': 'test.gif', 'alt': 'some alt text'}, 89 | ]) 90 | 91 | account.dm('my message', [1234], media='test.jpg') 92 | 93 | account.create_poll('test poll 123', ['hello', 'world', 'foo', 'bar'], 10080) 94 | 95 | # tweets 96 | account.like(123456) 97 | account.unlike(123456) 98 | account.bookmark(123456) 99 | account.unbookmark(123456) 100 | account.pin(123456) 101 | account.unpin(123456) 102 | 103 | # users 104 | account.follow(1234) 105 | account.unfollow(1234) 106 | account.mute(1234) 107 | account.unmute(1234) 108 | account.enable_notifications(1234) 109 | account.disable_notifications(1234) 110 | account.block(1234) 111 | account.unblock(1234) 112 | 113 | # user profile 114 | account.update_profile_image('test.jpg') 115 | account.update_profile_banner('test.png') 116 | account.update_profile_info(name='Foo Bar', description='test 123', location='Victoria, BC') 117 | 118 | # topics 119 | account.follow_topic(111) 120 | account.unfollow_topic(111) 121 | 122 | # lists 123 | account.create_list('My List', 'description of my list', private=False) 124 | account.update_list(222, 'My Updated List', 'some updated description', private=False) 125 | account.update_list_banner(222, 'test.png') 126 | account.delete_list_banner(222) 127 | account.add_list_member(222, 1234) 128 | account.remove_list_member(222, 1234) 129 | account.delete_list(222) 130 | account.pin_list(222) 131 | account.unpin_list(222) 132 | 133 | # refresh all pinned lists in this order 134 | account.update_pinned_lists([222, 111, 333]) 135 | 136 | # unpin all lists 137 | account.update_pinned_lists([]) 138 | 139 | # get timelines 140 | timeline = account.home_timeline() 141 | latest_timeline = account.home_latest_timeline(limit=500) 142 | 143 | # get bookmarks 144 | bookmarks = account.bookmarks() 145 | 146 | # get DM inbox metadata 147 | inbox = account.dm_inbox() 148 | 149 | # get DMs from all conversations 150 | dms = account.dm_history() 151 | 152 | # get DMs from specific conversations 153 | dms = account.dm_history(['123456-789012', '345678-901234']) 154 | 155 | # search DMs by keyword 156 | dms = account.dm_search('test123') 157 | 158 | # delete entire conversation 159 | account.dm_delete(conversation_id='123456-789012') 160 | 161 | # delete (hide) specific DM 162 | account.dm_delete(message_id='123456') 163 | 164 | # get all scheduled tweets 165 | scheduled_tweets = account.scheduled_tweets() 166 | 167 | # delete a scheduled tweet 168 | account.delete_scheduled_tweet(12345678) 169 | 170 | # get all draft tweets 171 | draft_tweets = account.draft_tweets() 172 | 173 | # delete a draft tweet 174 | account.delete_draft_tweet(12345678) 175 | 176 | # delete all scheduled tweets 177 | account.clear_scheduled_tweets() 178 | 179 | # delete all draft tweets 180 | account.clear_draft_tweets() 181 | 182 | # example configuration 183 | account.update_settings({ 184 | "address_book_live_sync_enabled": False, 185 | "allow_ads_personalization": False, 186 | "allow_authenticated_periscope_requests": True, 187 | "allow_dm_groups_from": "following", 188 | "allow_dms_from": "following", 189 | "allow_location_history_personalization": False, 190 | "allow_logged_out_device_personalization": False, 191 | "allow_media_tagging": "none", 192 | "allow_sharing_data_for_third_party_personalization": False, 193 | "alt_text_compose_enabled": None, 194 | "always_use_https": True, 195 | "autoplay_disabled": False, 196 | "country_code": "us", 197 | "discoverable_by_email": False, 198 | "discoverable_by_mobile_phone": False, 199 | "display_sensitive_media": False, 200 | "dm_quality_filter": "enabled", 201 | "dm_receipt_setting": "all_disabled", 202 | "geo_enabled": False, 203 | "include_alt_text_compose": True, 204 | "include_mention_filter": True, 205 | "include_nsfw_admin_flag": True, 206 | "include_nsfw_user_flag": True, 207 | "include_ranked_timeline": True, 208 | "language": "en", 209 | "mention_filter": "unfiltered", 210 | "nsfw_admin": False, 211 | "nsfw_user": False, 212 | "personalized_trends": True, 213 | "protected": False, 214 | "ranked_timeline_eligible": None, 215 | "ranked_timeline_setting": None, 216 | "require_password_login": False, 217 | "requires_login_verification": False, 218 | "sleep_time": { 219 | "enabled": False, 220 | "end_time": None, 221 | "start_time": None 222 | }, 223 | "translator_type": "none", 224 | "universal_quality_filtering_enabled": "enabled", 225 | "use_cookie_personalization": False, 226 | }) 227 | 228 | # example configuration 229 | account.update_search_settings({ 230 | "optInFiltering": True, # filter nsfw content 231 | "optInBlocking": True, # filter blocked accounts 232 | }) 233 | 234 | notifications = account.notifications() 235 | 236 | account.change_password('old pwd','new pwd') 237 | 238 | ``` 239 | 240 | ### Scraping 241 | 242 | #### Get all user/tweet data 243 | 244 | Two special batch queries `scraper.tweets_by_ids` and `scraper.users_by_ids` should be preferred when applicable. These endpoints are more much more efficient and have higher rate limits than their unbatched counterparts. See the table below for a comparison. 245 | 246 | | Endpoint | Batch Size | Rate Limit | 247 | |---------------|----------------|---------------| 248 | | tweets_by_ids | ~220 | 500 / 15 mins | 249 | | tweets_by_id | 1 | 50 / 15 mins | 250 | | users_by_ids | ~220 | 100 / 15 mins | 251 | | users_by_id | 1 | 500 / 15 mins | 252 | 253 | *As of Fall 2023 login by username/password is unstable. Using cookies is now recommended.* 254 | 255 | ```python 256 | from twitter.scraper import Scraper 257 | 258 | ## sign-in with credentials 259 | email, username, password = ..., ..., ... 260 | scraper = Scraper(email, username, password) 261 | 262 | ## or, resume session using cookies 263 | # scraper = Scraper(cookies={"ct0": ..., "auth_token": ...}) 264 | 265 | ## or, resume session using cookies (JSON file) 266 | # scraper = Scraper(cookies='twitter.cookies') 267 | 268 | ## or, initialize guest session (limited endpoints) 269 | # from twitter.util import init_session 270 | # scraper = Scraper(session=init_session()) 271 | 272 | # user data 273 | users = scraper.users(['foo', 'bar', 'hello', 'world']) 274 | users = scraper.users_by_ids([123, 234, 345]) # preferred 275 | users = scraper.users_by_id([123, 234, 345]) 276 | tweets = scraper.tweets([123, 234, 345]) 277 | likes = scraper.likes([123, 234, 345]) 278 | tweets_and_replies = scraper.tweets_and_replies([123, 234, 345]) 279 | media = scraper.media([123, 234, 345]) 280 | following = scraper.following([123, 234, 345]) 281 | followers = scraper.followers([123, 234, 345]) 282 | scraper.tweet_stats([111111, 222222, 333333]) 283 | 284 | # get recommended users based on user 285 | scraper.recommended_users() 286 | scraper.recommended_users([123]) 287 | 288 | # tweet data 289 | tweets = scraper.tweets_by_ids([987, 876, 754]) # preferred 290 | tweets = scraper.tweets_by_id([987, 876, 754]) 291 | tweet_details = scraper.tweets_details([987, 876, 754]) 292 | retweeters = scraper.retweeters([987, 876, 754]) 293 | favoriters = scraper.favoriters([987, 876, 754]) 294 | 295 | scraper.download_media([ 296 | 111111, 297 | 222222, 298 | 333333, 299 | 444444, 300 | ]) 301 | 302 | # trends 303 | scraper.trends() 304 | ``` 305 | 306 | #### Resume Pagination 307 | **Pagination is already done by default**, however there are circumstances where you may need to resume pagination from a specific cursor. For example, the `Followers` endpoint only allows for 50 requests every 15 minutes. In this case, we can resume from where we left off by providing a specific cursor value. 308 | ```python 309 | from twitter.scraper import Scraper 310 | 311 | email, username, password = ...,...,... 312 | scraper = Scraper(email, username, password) 313 | 314 | user_id = 44196397 315 | cursor = '1767341853908517597|1663601806447476672' # example cursor 316 | limit = 100 # arbitrary limit for demonstration 317 | follower_subset, last_cursor = scraper.followers([user_id], limit=limit, cursor=cursor) 318 | 319 | # use last_cursor to resume pagination 320 | ``` 321 | 322 | #### Search 323 | 324 | ```python 325 | from twitter.search import Search 326 | 327 | email, username, password = ..., ..., ... 328 | # default output directory is `data/search_results` if save=True 329 | search = Search(email, username, password, save=True, debug=1) 330 | 331 | res = search.run( 332 | limit=37, 333 | retries=5, 334 | queries=[ 335 | { 336 | 'category': 'Top', 337 | 'query': 'paperswithcode -tensorflow -tf' 338 | }, 339 | { 340 | 'category': 'Latest', 341 | 'query': 'test' 342 | }, 343 | { 344 | 'category': 'People', 345 | 'query': 'brasil portugal -argentina' 346 | }, 347 | { 348 | 'category': 'Photos', 349 | 'query': 'greece' 350 | }, 351 | { 352 | 'category': 'Videos', 353 | 'query': 'italy' 354 | }, 355 | ], 356 | ) 357 | ``` 358 | 359 | **Search Operators Reference** 360 | 361 | https://developer.twitter.com/en/docs/twitter-api/v1/rules-and-filtering/search-operators 362 | 363 | https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query 364 | 365 | ### Spaces 366 | 367 | #### Live Audio Capture 368 | 369 | Capture live audio for up to 500 streams per IP 370 | 371 | ```python 372 | from twitter.scraper import Scraper 373 | from twitter.util import init_session 374 | 375 | session = init_session() # initialize guest session, no login required 376 | scraper = Scraper(session=session) 377 | 378 | rooms = [...] 379 | scraper.spaces_live(rooms=rooms) # capture live audio from list of rooms 380 | ``` 381 | 382 | #### Live Transcript Capture 383 | 384 | **Raw transcript chunks** 385 | 386 | ```python 387 | from twitter.scraper import Scraper 388 | from twitter.util import init_session 389 | 390 | session = init_session() # initialize guest session, no login required 391 | scraper = Scraper(session=session) 392 | 393 | # room must be live, i.e. in "Running" state 394 | scraper.space_live_transcript('1zqKVPlQNApJB', frequency=2) # word-level live transcript. (dirty, on-the-fly transcription before post-processing) 395 | ``` 396 | 397 | **Processed (final) transcript chunks** 398 | 399 | ```python 400 | from twitter.scraper import Scraper 401 | from twitter.util import init_session 402 | 403 | session = init_session() # initialize guest session, no login required 404 | scraper = Scraper(session=session) 405 | 406 | # room must be live, i.e. in "Running" state 407 | scraper.space_live_transcript('1zqKVPlQNApJB', frequency=1) # finalized live transcript. (clean) 408 | ``` 409 | 410 | #### Search and Metadata 411 | ```python 412 | from twitter.scraper import Scraper 413 | from twitter.util import init_session 414 | from twitter.constants import SpaceCategory 415 | 416 | session = init_session() # initialize guest session, no login required 417 | scraper = Scraper(session=session) 418 | 419 | # download audio and chat-log from space 420 | spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX'], audio=True, chat=True) 421 | 422 | # pull metadata only 423 | spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX']) 424 | 425 | # search for spaces in "Upcoming", "Top" and "Live" categories 426 | spaces = scraper.spaces(search=[ 427 | { 428 | 'filter': SpaceCategory.Upcoming, 429 | 'query': 'hello' 430 | }, 431 | { 432 | 'filter': SpaceCategory.Top, 433 | 'query': 'world' 434 | }, 435 | { 436 | 'filter': SpaceCategory.Live, 437 | 'query': 'foo bar' 438 | } 439 | ]) 440 | ``` 441 | 442 | ### Automated Solvers 443 | 444 | > This requires installation of the [proton-api-client](https://pypi.org/project/proton-api-client) package 445 | 446 | To set up automated email confirmation/verification solvers, add your Proton Mail credentials below as shown. 447 | This removes the need to manually solve email challenges via the web app. These credentials can be used 448 | in `Scraper`, `Account`, and `Search` constructors. 449 | 450 | E.g. 451 | 452 | ```python 453 | from twitter.account import Account 454 | from twitter.util import get_code 455 | from proton.client import ProtonMail 456 | 457 | proton_username, proton_password = ..., ... 458 | proton = lambda: get_code(ProtonMail(proton_username, proton_password)) 459 | 460 | email, username, password = ..., ..., ... 461 | account = Account(email, username, password, proton=proton) 462 | ``` 463 | 464 | '''), 465 | python_requires=">=3.10.10", 466 | long_description_content_type='text/markdown', 467 | author_email='trevorhobenshield@gmail.com', 468 | url='https://github.com/trevorhobenshield/twitter-api-client', 469 | install_requires=install_requires, 470 | keywords='twitter api client async search automation bot scrape', 471 | packages=find_packages(), 472 | include_package_data=True, 473 | classifiers=[ 474 | 'Environment :: Web Environment', 475 | 'Intended Audience :: Developers', 476 | 'Natural Language :: English', 477 | 'Operating System :: Unix', 478 | 'Operating System :: MacOS :: MacOS X', 479 | 'Operating System :: Microsoft :: Windows', 480 | 'Programming Language :: Python', 481 | 'Programming Language :: Python :: 3', 482 | 'Programming Language :: Python :: 3.10', 483 | 'Programming Language :: Python :: 3.11', 484 | 'Programming Language :: Python :: 3.12', 485 | 'Topic :: Internet :: WWW/HTTP', 486 | 'Topic :: Software Development :: Libraries', 487 | 'Topic :: Software Development :: Libraries :: Python Modules', 488 | ] 489 | ) 490 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/bash 2 | 3 | python -m build 4 | python -m twine upload dist/* -------------------------------------------------------------------------------- /twitter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/twitter/__init__.py -------------------------------------------------------------------------------- /twitter/__version__.py: -------------------------------------------------------------------------------- 1 | __title__ = "twitter-api-client" 2 | __description__ = "Implementation of X/Twitter v1, v2, and GraphQL APIs." 3 | __version__ = "0.10.22" 4 | __author__ = "Trevor Hobenshield" 5 | __license__ = "MIT" -------------------------------------------------------------------------------- /twitter/account.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import hashlib 3 | import logging.config 4 | import math 5 | import mimetypes 6 | import platform 7 | from copy import deepcopy 8 | from datetime import datetime 9 | from string import ascii_letters 10 | from uuid import uuid1, getnode 11 | 12 | from httpx import AsyncClient, Limits 13 | from tqdm import tqdm 14 | from tqdm.asyncio import tqdm_asyncio 15 | 16 | from .constants import * 17 | from .login import login 18 | from .util import * 19 | 20 | try: 21 | if get_ipython().__class__.__name__ == 'ZMQInteractiveShell': 22 | import nest_asyncio 23 | nest_asyncio.apply() 24 | except: 25 | ... 26 | 27 | if platform.system() != 'Windows': 28 | try: 29 | import uvloop 30 | uvloop.install() 31 | except ImportError as e: 32 | ... 33 | 34 | 35 | class Account: 36 | 37 | def __init__(self, email: str = None, username: str = None, password: str = None, session: Client = None, **kwargs): 38 | self.save = kwargs.get('save', True) 39 | self.debug = kwargs.get('debug', 0) 40 | self.gql_api = 'https://twitter.com/i/api/graphql' 41 | self.v1_api = 'https://api.twitter.com/1.1' 42 | self.v2_api = 'https://twitter.com/i/api/2' 43 | self.logger = self._init_logger(**kwargs) 44 | self.session = self._validate_session(email, username, password, session, **kwargs) 45 | self.rate_limits = {} 46 | 47 | def gql(self, method: str, operation: tuple, variables: dict, features: dict = Operation.default_features) -> dict: 48 | qid, op = operation 49 | params = { 50 | 'queryId': qid, 51 | 'features': features, 52 | 'variables': Operation.default_variables | variables 53 | } 54 | if method == 'POST': 55 | data = {'json': params} 56 | else: 57 | data = {'params': {k: orjson.dumps(v).decode() for k, v in params.items()}} 58 | r = self.session.request( 59 | method=method, 60 | url=f'{self.gql_api}/{qid}/{op}', 61 | headers=get_headers(self.session), 62 | **data 63 | ) 64 | self.rate_limits[op] = {k: int(v) for k, v in r.headers.items() if 'rate-limit' in k} 65 | if self.debug: 66 | log(self.logger, self.debug, r) 67 | return r.json() 68 | 69 | def v1(self, path: str, params: dict) -> dict: 70 | headers = get_headers(self.session) 71 | headers['content-type'] = 'application/x-www-form-urlencoded' 72 | r = self.session.post(f'{self.v1_api}/{path}', headers=headers, data=urlencode(params)) 73 | if self.debug: 74 | log(self.logger, self.debug, r) 75 | return r.json() 76 | 77 | def create_poll(self, text: str, choices: list[str], poll_duration: int) -> dict: 78 | options = { 79 | "twitter:card": "poll4choice_text_only", 80 | "twitter:api:api:endpoint": "1", 81 | "twitter:long:duration_minutes": poll_duration # max: 10080 82 | } 83 | for i, c in enumerate(choices): 84 | options[f"twitter:string:choice{i + 1}_label"] = c 85 | 86 | headers = get_headers(self.session) 87 | headers['content-type'] = 'application/x-www-form-urlencoded' 88 | url = 'https://caps.twitter.com/v2/cards/create.json' 89 | r = self.session.post(url, headers=headers, params={'card_data': orjson.dumps(options).decode()}) 90 | card_uri = r.json()['card_uri'] 91 | r = self.tweet(text, poll_params={'card_uri': card_uri}) 92 | return r 93 | 94 | def dm(self, text: str, receivers: list[int], media: str = '') -> dict: 95 | variables = { 96 | "message": {}, 97 | "requestId": str(uuid1(getnode())), 98 | "target": {"participant_ids": receivers}, 99 | } 100 | if media: 101 | media_id = self._upload_media(media, is_dm=True) 102 | variables['message']['media'] = {'id': media_id, 'text': text} 103 | else: 104 | variables['message']['text'] = {'text': text} 105 | res = self.gql('POST', Operation.useSendMessageMutation, variables) 106 | if find_key(res, 'dm_validation_failure_type'): 107 | if self.debug: 108 | self.logger.debug(f"{RED}Failed to send DM(s) to {receivers}{RESET}") 109 | return res 110 | 111 | def tweet(self, text: str, *, media: any = None, **kwargs) -> dict: 112 | variables = { 113 | 'tweet_text': text, 114 | 'dark_request': False, 115 | 'media': { 116 | 'media_entities': [], 117 | 'possibly_sensitive': False, 118 | }, 119 | 'semantic_annotation_ids': [], 120 | } 121 | 122 | if reply_params := kwargs.get('reply_params', {}): 123 | variables |= reply_params 124 | if quote_params := kwargs.get('quote_params', {}): 125 | variables |= quote_params 126 | if poll_params := kwargs.get('poll_params', {}): 127 | variables |= poll_params 128 | 129 | draft = kwargs.get('draft') 130 | schedule = kwargs.get('schedule') 131 | 132 | if draft or schedule: 133 | variables = { 134 | 'post_tweet_request': { 135 | 'auto_populate_reply_metadata': False, 136 | 'status': text, 137 | 'exclude_reply_user_ids': [], 138 | 'media_ids': [], 139 | }, 140 | } 141 | if media: 142 | for m in media: 143 | media_id = self._upload_media(m['media']) 144 | variables['post_tweet_request']['media_ids'].append(media_id) 145 | if alt := m.get('alt'): 146 | self._add_alt_text(media_id, alt) 147 | 148 | if schedule: 149 | variables['execute_at'] = ( 150 | datetime.strptime(schedule, "%Y-%m-%d %H:%M").timestamp() 151 | if isinstance(schedule, str) 152 | else schedule 153 | ) 154 | return self.gql('POST', Operation.CreateScheduledTweet, variables) 155 | 156 | return self.gql('POST', Operation.CreateDraftTweet, variables) 157 | 158 | # regular tweet 159 | if media: 160 | for m in media: 161 | media_id = self._upload_media(m['media']) 162 | variables['media']['media_entities'].append({ 163 | 'media_id': media_id, 164 | 'tagged_users': m.get('tagged_users', []) 165 | }) 166 | if alt := m.get('alt'): 167 | self._add_alt_text(media_id, alt) 168 | 169 | return self.gql('POST', Operation.CreateTweet, variables) 170 | 171 | def schedule_tweet(self, text: str, date: int | str, *, media: list = None) -> dict: 172 | variables = { 173 | 'post_tweet_request': { 174 | 'auto_populate_reply_metadata': False, 175 | 'status': text, 176 | 'exclude_reply_user_ids': [], 177 | 'media_ids': [], 178 | }, 179 | 'execute_at': ( 180 | datetime.strptime(date, "%Y-%m-%d %H:%M").timestamp() 181 | if isinstance(date, str) 182 | else date 183 | ), 184 | } 185 | if media: 186 | for m in media: 187 | media_id = self._upload_media(m['media']) 188 | variables['post_tweet_request']['media_ids'].append(media_id) 189 | if alt := m.get('alt'): 190 | self._add_alt_text(media_id, alt) 191 | return self.gql('POST', Operation.CreateScheduledTweet, variables) 192 | 193 | def schedule_reply(self, text: str, date: int | str, tweet_id: int, *, media: list = None) -> dict: 194 | variables = { 195 | 'post_tweet_request': { 196 | 'auto_populate_reply_metadata': True, 197 | 'in_reply_to_status_id': tweet_id, 198 | 'status': text, 199 | 'exclude_reply_user_ids': [], 200 | 'media_ids': [], 201 | }, 202 | 'execute_at': ( 203 | datetime.strptime(date, "%Y-%m-%d %H:%M").timestamp() 204 | if isinstance(date, str) 205 | else date 206 | ), 207 | } 208 | if media: 209 | for m in media: 210 | media_id = self._upload_media(m['media']) 211 | variables['post_tweet_request']['media_ids'].append(media_id) 212 | if alt := m.get('alt'): 213 | self._add_alt_text(media_id, alt) 214 | return self.gql('POST', Operation.CreateScheduledTweet, variables) 215 | 216 | def unschedule_tweet(self, tweet_id: int) -> dict: 217 | variables = {'scheduled_tweet_id': tweet_id} 218 | return self.gql('POST', Operation.DeleteScheduledTweet, variables) 219 | 220 | def untweet(self, tweet_id: int) -> dict: 221 | variables = {'tweet_id': tweet_id, 'dark_request': False} 222 | return self.gql('POST', Operation.DeleteTweet, variables) 223 | 224 | def reply(self, text: str, tweet_id: int) -> dict: 225 | variables = { 226 | 'tweet_text': text, 227 | 'reply': { 228 | 'in_reply_to_tweet_id': tweet_id, 229 | 'exclude_reply_user_ids': [], 230 | }, 231 | 'batch_compose': 'BatchSubsequent', 232 | 'dark_request': False, 233 | 'media': { 234 | 'media_entities': [], 235 | 'possibly_sensitive': False, 236 | }, 237 | 'semantic_annotation_ids': [], 238 | } 239 | return self.gql('POST', Operation.CreateTweet, variables) 240 | 241 | def quote(self, text: str, tweet_id: int) -> dict: 242 | variables = { 243 | 'tweet_text': text, 244 | # can use `i` as it resolves to screen_name 245 | 'attachment_url': f'https://twitter.com/i/status/{tweet_id}', 246 | 'dark_request': False, 247 | 'media': { 248 | 'media_entities': [], 249 | 'possibly_sensitive': False, 250 | }, 251 | 'semantic_annotation_ids': [], 252 | } 253 | return self.gql('POST', Operation.CreateTweet, variables) 254 | 255 | def retweet(self, tweet_id: int) -> dict: 256 | variables = {"tweet_id": tweet_id, "dark_request": False} 257 | return self.gql('POST', Operation.CreateRetweet, variables) 258 | 259 | def unretweet(self, tweet_id: int) -> dict: 260 | variables = {"source_tweet_id": tweet_id, "dark_request": False} 261 | return self.gql('POST', Operation.DeleteRetweet, variables) 262 | 263 | def like(self, tweet_id: int) -> dict: 264 | variables = {'tweet_id': tweet_id} 265 | return self.gql('POST', Operation.FavoriteTweet, variables) 266 | 267 | def unlike(self, tweet_id: int) -> dict: 268 | variables = {'tweet_id': tweet_id} 269 | return self.gql('POST', Operation.UnfavoriteTweet, variables) 270 | 271 | def bookmark(self, tweet_id: int) -> dict: 272 | variables = {'tweet_id': tweet_id} 273 | return self.gql('POST', Operation.CreateBookmark, variables) 274 | 275 | def unbookmark(self, tweet_id: int) -> dict: 276 | variables = {'tweet_id': tweet_id} 277 | return self.gql('POST', Operation.DeleteBookmark, variables) 278 | 279 | def create_list(self, name: str, description: str, private: bool) -> dict: 280 | variables = { 281 | "isPrivate": private, 282 | "name": name, 283 | "description": description, 284 | } 285 | return self.gql('POST', Operation.CreateList, variables) 286 | 287 | def update_list(self, list_id: int, name: str, description: str, private: bool) -> dict: 288 | variables = { 289 | "listId": list_id, 290 | "isPrivate": private, 291 | "name": name, 292 | "description": description, 293 | } 294 | return self.gql('POST', Operation.UpdateList, variables) 295 | 296 | def update_pinned_lists(self, list_ids: list[int]) -> dict: 297 | """ 298 | Update pinned lists. 299 | Reset all pinned lists and pin all specified lists in the order they are provided. 300 | 301 | @param list_ids: list of list ids to pin 302 | @return: response 303 | """ 304 | return self.gql('POST', Operation.ListsPinMany, {'listIds': list_ids}) 305 | 306 | def pin_list(self, list_id: int) -> dict: 307 | return self.gql('POST', Operation.ListPinOne, {'listId': list_id}) 308 | 309 | def unpin_list(self, list_id: int) -> dict: 310 | return self.gql('POST', Operation.ListUnpinOne, {'listId': list_id}) 311 | 312 | def add_list_member(self, list_id: int, user_id: int) -> dict: 313 | return self.gql('POST', Operation.ListAddMember, {'listId': list_id, "userId": user_id}) 314 | 315 | def remove_list_member(self, list_id: int, user_id: int) -> dict: 316 | return self.gql('POST', Operation.ListRemoveMember, {'listId': list_id, "userId": user_id}) 317 | 318 | def delete_list(self, list_id: int) -> dict: 319 | return self.gql('POST', Operation.DeleteList, {'listId': list_id}) 320 | 321 | def update_list_banner(self, list_id: int, media: str) -> dict: 322 | media_id = self._upload_media(media) 323 | variables = {'listId': list_id, 'mediaId': media_id} 324 | return self.gql('POST', Operation.EditListBanner, variables) 325 | 326 | def delete_list_banner(self, list_id: int) -> dict: 327 | return self.gql('POST', Operation.DeleteListBanner, {'listId': list_id}) 328 | 329 | def follow_topic(self, topic_id: int) -> dict: 330 | return self.gql('POST', Operation.TopicFollow, {'topicId': str(topic_id)}) 331 | 332 | def unfollow_topic(self, topic_id: int) -> dict: 333 | return self.gql('POST', Operation.TopicUnfollow, {'topicId': str(topic_id)}) 334 | 335 | def pin(self, tweet_id: int) -> dict: 336 | return self.v1('account/pin_tweet.json', {'tweet_mode': 'extended', 'id': tweet_id}) 337 | 338 | def unpin(self, tweet_id: int) -> dict: 339 | return self.v1('account/unpin_tweet.json', {'tweet_mode': 'extended', 'id': tweet_id}) 340 | 341 | def follow(self, user_id: int) -> dict: 342 | settings = deepcopy(follow_settings) 343 | settings |= {"user_id": user_id} 344 | return self.v1('friendships/create.json', settings) 345 | 346 | def unfollow(self, user_id: int) -> dict: 347 | settings = deepcopy(follow_settings) 348 | settings |= {"user_id": user_id} 349 | return self.v1('friendships/destroy.json', settings) 350 | 351 | def mute(self, user_id: int) -> dict: 352 | return self.v1('mutes/users/create.json', {'user_id': user_id}) 353 | 354 | def unmute(self, user_id: int) -> dict: 355 | return self.v1('mutes/users/destroy.json', {'user_id': user_id}) 356 | 357 | def enable_follower_notifications(self, user_id: int) -> dict: 358 | settings = deepcopy(follower_notification_settings) 359 | settings |= {'id': user_id, 'device': 'true'} 360 | return self.v1('friendships/update.json', settings) 361 | 362 | def disable_follower_notifications(self, user_id: int) -> dict: 363 | settings = deepcopy(follower_notification_settings) 364 | settings |= {'id': user_id, 'device': 'false'} 365 | return self.v1('friendships/update.json', settings) 366 | 367 | def block(self, user_id: int) -> dict: 368 | return self.v1('blocks/create.json', {'user_id': user_id}) 369 | 370 | def unblock(self, user_id: int) -> dict: 371 | return self.v1('blocks/destroy.json', {'user_id': user_id}) 372 | 373 | def update_profile_image(self, media: str) -> Response: 374 | media_id = self._upload_media(media, is_profile=True) 375 | url = f'{self.v1_api}/account/update_profile_image.json' 376 | headers = get_headers(self.session) 377 | params = {'media_id': media_id} 378 | r = self.session.post(url, headers=headers, params=params) 379 | return r 380 | 381 | def update_profile_banner(self, media: str) -> Response: 382 | media_id = self._upload_media(media, is_profile=True) 383 | url = f'{self.v1_api}/account/update_profile_banner.json' 384 | headers = get_headers(self.session) 385 | params = {'media_id': media_id} 386 | r = self.session.post(url, headers=headers, params=params) 387 | return r 388 | 389 | def update_profile_info(self, **kwargs) -> Response: 390 | url = f'{self.v1_api}/account/update_profile.json' 391 | headers = get_headers(self.session) 392 | r = self.session.post(url, headers=headers, params=kwargs) 393 | return r 394 | 395 | def update_search_settings(self, settings: dict) -> Response: 396 | twid = int(self.session.cookies.get('twid').split('=')[-1].strip('"')) 397 | headers = get_headers(self.session) 398 | r = self.session.post( 399 | url=f'{self.v1_api}/strato/column/User/{twid}/search/searchSafety', 400 | headers=headers, 401 | json=settings, 402 | ) 403 | return r 404 | 405 | def update_settings(self, settings: dict) -> dict: 406 | return self.v1('account/settings.json', settings) 407 | 408 | def change_password(self, old: str, new: str) -> dict: 409 | params = { 410 | 'current_password': old, 411 | 'password': new, 412 | 'password_confirmation': new 413 | } 414 | headers = get_headers(self.session) 415 | headers['content-type'] = 'application/x-www-form-urlencoded' 416 | url = 'https://twitter.com/i/api/i/account/change_password.json' 417 | r = self.session.post(url, headers=headers, data=urlencode(params)) 418 | return r.json() 419 | 420 | def remove_interests(self, *args): 421 | """ 422 | Pass 'all' to remove all interests 423 | """ 424 | r = self.session.get( 425 | f'{self.v1_api}/account/personalization/twitter_interests.json', 426 | headers=get_headers(self.session) 427 | ) 428 | current_interests = r.json()['interested_in'] 429 | if args == 'all': 430 | disabled_interests = [x['id'] for x in current_interests] 431 | else: 432 | disabled_interests = [x['id'] for x in current_interests if x['display_name'] in args] 433 | payload = { 434 | "preferences": { 435 | "interest_preferences": { 436 | "disabled_interests": disabled_interests, 437 | "disabled_partner_interests": [] 438 | } 439 | } 440 | } 441 | r = self.session.post( 442 | f'{self.v1_api}/account/personalization/p13n_preferences.json', 443 | headers=get_headers(self.session), 444 | json=payload 445 | ) 446 | return r 447 | 448 | def home_timeline(self, limit=math.inf) -> list[dict]: 449 | return self._paginate('POST', Operation.HomeTimeline, Operation.default_variables, limit) 450 | 451 | def home_latest_timeline(self, limit=math.inf) -> list[dict]: 452 | return self._paginate('POST', Operation.HomeLatestTimeline, Operation.default_variables, limit) 453 | 454 | def bookmarks(self, limit=math.inf) -> list[dict]: 455 | return self._paginate('GET', Operation.Bookmarks, {}, limit) 456 | 457 | def _paginate(self, method: str, operation: tuple, variables: dict, limit: int) -> list[dict]: 458 | initial_data = self.gql(method, operation, variables) 459 | res = [initial_data] 460 | ids = set(find_key(initial_data, 'rest_id')) 461 | dups = 0 462 | DUP_LIMIT = 3 463 | 464 | cursor = get_cursor(initial_data) 465 | while (dups < DUP_LIMIT) and cursor: 466 | prev_len = len(ids) 467 | if prev_len >= limit: 468 | return res 469 | 470 | variables['cursor'] = cursor 471 | data = self.gql(method, operation, variables) 472 | 473 | cursor = get_cursor(data) 474 | ids |= set(find_key(data, 'rest_id')) 475 | 476 | if self.debug: 477 | self.logger.debug(f'cursor: {cursor}\tunique results: {len(ids)}') 478 | 479 | if prev_len == len(ids): 480 | dups += 1 481 | 482 | res.append(data) 483 | return res 484 | 485 | def _upload_media(self, filename: str, is_dm: bool = False, is_profile=False) -> int | None: 486 | """ 487 | https://developer.twitter.com/en/docs/twitter-api/v1/media/upload-media/uploading-media/media-best-practices 488 | """ 489 | 490 | def check_media(category: str, size: int) -> None: 491 | fmt = lambda x: f'{(x / 1e6):.2f} MB' 492 | msg = lambda x: f'cannot upload {fmt(size)} {category}, max size is {fmt(x)}' 493 | if category == 'image' and size > MAX_IMAGE_SIZE: 494 | raise Exception(msg(MAX_IMAGE_SIZE)) 495 | if category == 'gif' and size > MAX_GIF_SIZE: 496 | raise Exception(msg(MAX_GIF_SIZE)) 497 | if category == 'video' and size > MAX_VIDEO_SIZE: 498 | raise Exception(msg(MAX_VIDEO_SIZE)) 499 | 500 | # if is_profile: 501 | # url = 'https://upload.twitter.com/i/media/upload.json' 502 | # else: 503 | # url = 'https://upload.twitter.com/1.1/media/upload.json' 504 | 505 | url = 'https://upload.twitter.com/i/media/upload.json' 506 | 507 | file = Path(filename) 508 | total_bytes = file.stat().st_size 509 | headers = get_headers(self.session) 510 | 511 | upload_type = 'dm' if is_dm else 'tweet' 512 | media_type = mimetypes.guess_type(file)[0] 513 | media_category = f'{upload_type}_gif' if 'gif' in media_type else f'{upload_type}_{media_type.split("/")[0]}' 514 | 515 | check_media(media_category, total_bytes) 516 | 517 | params = {'command': 'INIT', 'media_type': media_type, 'total_bytes': total_bytes, 518 | 'media_category': media_category} 519 | r = self.session.post(url=url, headers=headers, params=params) 520 | 521 | if r.status_code >= 400: 522 | raise Exception(f'{r.text}') 523 | 524 | media_id = r.json()['media_id'] 525 | 526 | desc = f"uploading: {file.name}" 527 | with tqdm(total=total_bytes, desc=desc, unit='B', unit_scale=True, unit_divisor=1024) as pbar: 528 | with open(file, 'rb') as fp: 529 | i = 0 530 | while chunk := fp.read(UPLOAD_CHUNK_SIZE): 531 | params = {'command': 'APPEND', 'media_id': media_id, 'segment_index': i} 532 | try: 533 | pad = bytes(''.join(random.choices(ascii_letters, k=16)), encoding='utf-8') 534 | data = b''.join([ 535 | b'------WebKitFormBoundary', 536 | pad, 537 | b'\r\nContent-Disposition: form-data; name="media"; filename="blob"', 538 | b'\r\nContent-Type: application/octet-stream', 539 | b'\r\n\r\n', 540 | chunk, 541 | b'\r\n------WebKitFormBoundary', 542 | pad, 543 | b'--\r\n', 544 | ]) 545 | _headers = {b'content-type': b'multipart/form-data; boundary=----WebKitFormBoundary' + pad} 546 | r = self.session.post(url=url, headers=headers | _headers, params=params, content=data) 547 | except Exception as e: 548 | if self.debug: 549 | self.logger.error(f'Failed to upload chunk, trying alternative method\n{e}') 550 | try: 551 | files = {'media': chunk} 552 | r = self.session.post(url=url, headers=headers, params=params, files=files) 553 | except Exception as e: 554 | if self.debug: 555 | self.logger.error(f'Failed to upload chunk\n{e}') 556 | return 557 | 558 | if r.status_code < 200 or r.status_code > 299: 559 | if self.debug: 560 | self.logger.debug(f'{RED}{r.status_code} {r.text}{RESET}') 561 | 562 | i += 1 563 | pbar.update(fp.tell() - pbar.n) 564 | 565 | params = {'command': 'FINALIZE', 'media_id': media_id, 'allow_async': 'true'} 566 | if is_dm: 567 | params |= {'original_md5': hashlib.md5(file.read_bytes()).hexdigest()} 568 | r = self.session.post(url=url, headers=headers, params=params) 569 | if r.status_code == 400: 570 | if self.debug: 571 | self.logger.debug(f'{RED}{r.status_code} {r.text}{RESET}') 572 | return 573 | 574 | # self.logger.debug(f'processing, please wait...') 575 | processing_info = r.json().get('processing_info') 576 | while processing_info: 577 | state = processing_info['state'] 578 | if error := processing_info.get("error"): 579 | if self.debug: 580 | self.logger.debug(f'{RED}{error}{RESET}') 581 | return 582 | if state == MEDIA_UPLOAD_SUCCEED: 583 | break 584 | if state == MEDIA_UPLOAD_FAIL: 585 | if self.debug: 586 | self.logger.debug(f'{RED}{r.status_code} {r.text} {RESET}') 587 | return 588 | check_after_secs = processing_info.get('check_after_secs', random.randint(1, 5)) 589 | time.sleep(check_after_secs) 590 | params = {'command': 'STATUS', 'media_id': media_id} 591 | r = self.session.get(url=url, headers=headers, params=params) 592 | processing_info = r.json().get('processing_info') 593 | # self.logger.debug('processing complete') 594 | return media_id 595 | 596 | def _add_alt_text(self, media_id: int, text: str) -> Response: 597 | params = {"media_id": media_id, "alt_text": {"text": text}} 598 | url = f'{self.v1_api}/media/metadata/create.json' 599 | r = self.session.post(url, headers=get_headers(self.session), json=params) 600 | return r 601 | 602 | def _init_logger(self, **kwargs) -> Logger: 603 | if kwargs.get('debug'): 604 | cfg = kwargs.get('log_config') 605 | logging.config.dictConfig(cfg or LOG_CONFIG) 606 | 607 | # only support one logger 608 | logger_name = list(LOG_CONFIG['loggers'].keys())[0] 609 | 610 | # set level of all other loggers to ERROR 611 | for name in logging.root.manager.loggerDict: 612 | if name != logger_name: 613 | logging.getLogger(name).setLevel(logging.ERROR) 614 | 615 | return logging.getLogger(logger_name) 616 | 617 | @staticmethod 618 | def _validate_session(*args, **kwargs): 619 | email, username, password, session = args 620 | 621 | # validate credentials 622 | if all((email, username, password)): 623 | session = login(email, username, password, **kwargs) 624 | session._init_with_cookies = False 625 | return session 626 | 627 | # invalid credentials, try validating session 628 | if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}): 629 | session._init_with_cookies = True 630 | return session 631 | 632 | # invalid credentials and session 633 | cookies = kwargs.get('cookies') 634 | 635 | # try validating cookies dict 636 | if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}): 637 | _session = Client(cookies=cookies, follow_redirects=True) 638 | _session._init_with_cookies = True 639 | _session.headers.update(get_headers(_session)) 640 | return _session 641 | 642 | # try validating cookies from file 643 | if isinstance(cookies, str): 644 | _session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True) 645 | _session._init_with_cookies = True 646 | _session.headers.update(get_headers(_session)) 647 | return _session 648 | 649 | raise Exception('Session not authenticated. ' 650 | 'Please use an authenticated session or remove the `session` argument and try again.') 651 | 652 | def dm_inbox(self) -> dict: 653 | """ 654 | Get DM inbox metadata. 655 | 656 | @return: inbox as dict 657 | """ 658 | r = self.session.get( 659 | f'{self.v1_api}/dm/inbox_initial_state.json', 660 | headers=get_headers(self.session), 661 | params=dm_params 662 | ) 663 | return r.json() 664 | 665 | def dm_history(self, conversation_ids: list[str] = None) -> list[dict]: 666 | """ 667 | Get DM history. 668 | 669 | Call without arguments to get all DMS from all conversations. 670 | 671 | @param conversation_ids: optional list of conversation ids 672 | @return: list of messages as dicts 673 | """ 674 | 675 | async def get(session: AsyncClient, conversation_id: str): 676 | params = deepcopy(dm_params) 677 | r = await session.get( 678 | f'{self.v1_api}/dm/conversation/{conversation_id}.json', 679 | params=params, 680 | ) 681 | res = r.json().get('conversation_timeline', {}) 682 | data = [x.get('message') for x in res.get('entries', [])] 683 | entry_id = res.get('min_entry_id') 684 | while entry_id: 685 | params['max_id'] = entry_id 686 | r = await session.get( 687 | f'{self.v1_api}/dm/conversation/{conversation_id}.json', 688 | params=params, 689 | ) 690 | res = r.json().get('conversation_timeline', {}) 691 | data.extend(x['message'] for x in res.get('entries', [])) 692 | entry_id = res.get('min_entry_id') 693 | return data 694 | 695 | async def process(ids): 696 | limits = Limits(max_connections=100) 697 | headers, cookies = get_headers(self.session), self.session.cookies 698 | async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c: 699 | return await tqdm_asyncio.gather(*(get(c, _id) for _id in ids), desc="Getting DMs") 700 | 701 | if conversation_ids: 702 | ids = conversation_ids 703 | else: 704 | # get all conversations 705 | inbox = self.dm_inbox() 706 | ids = list(inbox['inbox_initial_state']['conversations']) 707 | 708 | return asyncio.run(process(ids)) 709 | 710 | def dm_delete(self, *, conversation_id: str = None, message_id: str = None) -> dict: 711 | """ 712 | Delete operations 713 | 714 | - delete (hide) a single DM 715 | - delete an entire conversation 716 | 717 | @param conversation_id: the conversation id 718 | @param message_id: the message id 719 | @return: result metadata 720 | """ 721 | self.session.headers.update(headers=get_headers(self.session)) 722 | results = {'conversation': None, 'message': None} 723 | if conversation_id: 724 | results['conversation'] = self.session.post( 725 | f'{self.v1_api}/dm/conversation/{conversation_id}/delete.json', 726 | ).text # not json response 727 | if message_id: 728 | # delete single message 729 | _id, op = Operation.DMMessageDeleteMutation 730 | results['message'] = self.session.post( 731 | f'{self.gql_api}/{_id}/{op}', 732 | json={'queryId': _id, 'variables': {'messageId': message_id}}, 733 | ).json() 734 | return results 735 | 736 | def dm_search(self, query: str) -> dict: 737 | """ 738 | Search DMs by keyword 739 | 740 | @param query: search term 741 | @return: search results as dict 742 | """ 743 | 744 | def get(cursor=None): 745 | if cursor: 746 | params['variables']['cursor'] = cursor.pop() 747 | _id, op = Operation.DmAllSearchSlice 748 | r = self.session.get( 749 | f'{self.gql_api}/{_id}/{op}', 750 | params=build_params(params), 751 | ) 752 | res = r.json() 753 | cursor = find_key(res, 'next_cursor') 754 | return res, cursor 755 | 756 | self.session.headers.update(headers=get_headers(self.session)) 757 | variables = deepcopy(Operation.default_variables) 758 | variables['count'] = 50 # strict limit, errors thrown if exceeded 759 | variables['query'] = query 760 | params = {'variables': variables, 'features': Operation.default_features} 761 | res, cursor = get() 762 | data = [res] 763 | while cursor: 764 | res, cursor = get(cursor) 765 | data.append(res) 766 | return {'query': query, 'data': data} 767 | 768 | def scheduled_tweets(self, ascending: bool = True) -> dict: 769 | variables = {"ascending": ascending} 770 | return self.gql('GET', Operation.FetchScheduledTweets, variables) 771 | 772 | def delete_scheduled_tweet(self, tweet_id: int) -> dict: 773 | """duplicate, same as `unschedule_tweet()`""" 774 | variables = {'scheduled_tweet_id': tweet_id} 775 | return self.gql('POST', Operation.DeleteScheduledTweet, variables) 776 | 777 | def clear_scheduled_tweets(self) -> None: 778 | user_id = int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0]) 779 | drafts = self.gql('GET', Operation.FetchScheduledTweets, {"ascending": True}) 780 | for _id in set(find_key(drafts, 'rest_id')): 781 | if _id != user_id: 782 | self.gql('POST', Operation.DeleteScheduledTweet, {'scheduled_tweet_id': _id}) 783 | 784 | def draft_tweets(self, ascending: bool = True) -> dict: 785 | variables = {"ascending": ascending} 786 | return self.gql('GET', Operation.FetchDraftTweets, variables) 787 | 788 | def delete_draft_tweet(self, tweet_id: int) -> dict: 789 | variables = {'draft_tweet_id': tweet_id} 790 | return self.gql('POST', Operation.DeleteDraftTweet, variables) 791 | 792 | def clear_draft_tweets(self) -> None: 793 | user_id = int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0]) 794 | drafts = self.gql('GET', Operation.FetchDraftTweets, {"ascending": True}) 795 | for _id in set(find_key(drafts, 'rest_id')): 796 | if _id != user_id: 797 | self.gql('POST', Operation.DeleteDraftTweet, {'draft_tweet_id': _id}) 798 | 799 | def notifications(self, params: dict = None) -> dict: 800 | r = self.session.get( 801 | f'{self.v2_api}/notifications/all.json', 802 | headers=get_headers(self.session), 803 | params=params or live_notification_params 804 | ) 805 | if self.debug: 806 | log(self.logger, self.debug, r) 807 | return r.json() 808 | 809 | def recommendations(self, params: dict = None) -> dict: 810 | r = self.session.get( 811 | f'{self.v1_api}/users/recommendations.json', 812 | headers=get_headers(self.session), 813 | params=params or recommendations_params 814 | ) 815 | if self.debug: 816 | log(self.logger, self.debug, r) 817 | return r.json() 818 | 819 | def fleetline(self, params: dict = None) -> dict: 820 | r = self.session.get( 821 | 'https://twitter.com/i/api/fleets/v1/fleetline', 822 | headers=get_headers(self.session), 823 | params=params or {} 824 | ) 825 | if self.debug: 826 | log(self.logger, self.debug, r) 827 | return r.json() 828 | 829 | @property 830 | def id(self) -> int: 831 | """ Get User ID """ 832 | return int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0]) 833 | 834 | def save_cookies(self, fname: str = None): 835 | """ Save cookies to file """ 836 | cookies = self.session.cookies 837 | Path(f'{fname or cookies.get("username")}.cookies').write_bytes(orjson.dumps(dict(cookies))) 838 | -------------------------------------------------------------------------------- /twitter/constants.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | # todo: not accurate measure. value will decrease as new gql features/variables are required. (actual limitation is request size, i.e. new gql features an variables contribute to total request size) 4 | MAX_GQL_CHAR_LIMIT = 4_200 5 | 6 | MAX_ENDPOINT_LIMIT = 500 # 500/15 mins 7 | 8 | MAX_IMAGE_SIZE = 5_242_880 # ~5 MB 9 | MAX_GIF_SIZE = 15_728_640 # ~15 MB 10 | MAX_VIDEO_SIZE = 536_870_912 # ~530 MB 11 | 12 | UPLOAD_CHUNK_SIZE = 4 * 1024 * 1024 13 | MEDIA_UPLOAD_SUCCEED = 'succeeded' 14 | MEDIA_UPLOAD_FAIL = 'failed' 15 | 16 | BLACK = '\x1b[30m' 17 | RED = '\x1b[31m' 18 | GREEN = '\x1b[32m' 19 | YELLOW = '\x1b[33m' 20 | ORANGE = '\x1b[38;5;208m' 21 | BLUE = '\x1b[34m' 22 | MAGENTA = '\x1b[35m' 23 | CYAN = '\x1b[36m' 24 | WHITE = '\x1b[37m' 25 | BOLD = '\x1b[1m' 26 | RESET = '\x1b[0m' 27 | 28 | LOG_CONFIG = { 29 | 'version': 1, 30 | 'disable_existing_loggers': False, 31 | 'formatters': { 32 | 'standard': { 33 | 'format': '%(asctime)s.%(msecs)03d [%(levelname)s] :: %(message)s', 34 | 'datefmt': '%Y-%m-%d %H:%M:%S' 35 | }, 36 | }, 37 | 'handlers': { 38 | 'console': { 39 | 'class': 'logging.StreamHandler', 40 | 'level': 'DEBUG', 41 | 'formatter': 'standard', 42 | 'stream': 'ext://sys.stdout', 43 | }, 44 | 'file': { 45 | 'class': 'logging.FileHandler', 46 | 'level': 'DEBUG', 47 | 'formatter': 'standard', 48 | 'filename': 'twitter.log', 49 | 'mode': 'a', 50 | }, 51 | }, 52 | 'loggers': { 53 | 'twitter': { 54 | 'handlers': ['console', 'file'], 55 | 'level': 'DEBUG', 56 | } 57 | } 58 | } 59 | 60 | USER_AGENTS = [ 61 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', 62 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.3', 63 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0', 64 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.20', 65 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.3', 66 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', 67 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.2 Safari/605.1.15', 68 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/116.0', 69 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15', 70 | ] 71 | 72 | 73 | @dataclass 74 | class SearchCategory: 75 | Top = 'Top' 76 | Latest = 'Latest' 77 | People = 'People' 78 | Photos = 'Photos' 79 | Videos = 'Videos' 80 | 81 | 82 | @dataclass 83 | class SpaceCategory: 84 | Top = 'Top' 85 | Live = 'Live' 86 | Upcoming = 'Upcoming' 87 | 88 | 89 | @dataclass 90 | class SpaceState: 91 | Ended = 'Ended' 92 | Canceled = 'Canceled' 93 | NotStarted = 'NotStarted' 94 | PrePublished = 'PrePublished' 95 | Running = 'Running' 96 | TimedOut = 'TimedOut' 97 | 98 | 99 | @dataclass 100 | class Operation: 101 | # todo: dynamically update 102 | SearchTimeline = {'rawQuery': str, 'product': str}, 'nK1dw4oV3k4w5TdtcAdSww', 'SearchTimeline' 103 | AudioSpaceById = {'id': str}, 'fYAuJHiY3TmYdBmrRtIKhA', 'AudioSpaceById' 104 | AudioSpaceSearch = {'filter': str, 'query': str}, 'NTq79TuSz6fHj8lQaferJw', 'AudioSpaceSearch', 105 | UserByScreenName = {'screen_name': str}, 'sLVLhk0bGj3MVFEKTdax1w', 'UserByScreenName' 106 | UserTweets = {'userId': int}, 'HuTx74BxAnezK1gWvYY7zg', 'UserTweets' 107 | ProfileSpotlightsQuery = {'screen_name': str}, '9zwVLJ48lmVUk8u_Gh9DmA', 'ProfileSpotlightsQuery' 108 | UserByRestId = {'userId': int}, 'GazOglcBvgLigl3ywt6b3Q', 'UserByRestId' 109 | UsersByRestIds = {'userIds': list}, 'OJBgJQIrij6e3cjqQ3Zu1Q', 'UsersByRestIds' 110 | UserMedia = {'userId': int}, 'YqiE3JL1KNgf9nSljYdxaA', 'UserMedia' 111 | UserTweetsAndReplies = {'userId': int}, 'RIWc55YCNyUJ-U3HHGYkdg', 'UserTweetsAndReplies' 112 | TweetResultByRestId = {'tweetId': int}, 'D_jNhjWZeRZT5NURzfJZSQ', 'TweetResultByRestId' 113 | TweetResultsByRestIds = {'tweetIds': list[int | str]}, 'BWy5aoI-WvwbeSiHUIf2Hw', 'TweetResultsByRestIds' 114 | TweetDetail = {'focalTweetId': int}, 'zXaXQgfyR4GxE21uwYQSyA', 'TweetDetail' 115 | TweetStats = {'rest_id': int}, 'EvbTkPDT-xQCfupPu0rWMA', 'TweetStats' 116 | Likes = {'userId': int}, 'nXEl0lfN_XSznVMlprThgQ', 'Likes' 117 | Followers = {'userId': int}, 'pd8Tt1qUz1YWrICegqZ8cw', 'Followers' 118 | Following = {'userId': int}, 'wjvx62Hye2dGVvnvVco0xA', 'Following' 119 | Retweeters = {'tweetId': int}, '0BoJlKAxoNPQUHRftlwZ2w', 'Retweeters' 120 | Favoriters = {'tweetId': int}, 'XRRjv1-uj1HZn3o324etOQ', 'Favoriters' 121 | ConnectTabTimeline = {'context': dict}, 'lq02A-gEzbLefqTgD_PFzQ', 'ConnectTabTimeline' 122 | 123 | # Account Operations 124 | useSendMessageMutation = 'MaxK2PKX1F9Z-9SwqwavTw', 'useSendMessageMutation' 125 | CreateTweet = '7TKRKCPuAGsmYde0CudbVg', 'CreateTweet' 126 | DeleteTweet = 'VaenaVgh5q5ih7kvyVjgtg', 'DeleteTweet' 127 | CreateScheduledTweet = 'LCVzRQGxOaGnOnYH01NQXg', 'CreateScheduledTweet' 128 | DeleteScheduledTweet = 'CTOVqej0JBXAZSwkp1US0g', 'DeleteScheduledTweet' 129 | CreateRetweet = 'ojPdsZsimiJrUGLR1sjUtA', 'CreateRetweet' 130 | DeleteRetweet = 'iQtK4dl5hBmXewYZuEOKVw', 'DeleteRetweet' 131 | FavoriteTweet = 'lI07N6Otwv1PhnEgXILM7A', 'FavoriteTweet' 132 | UnfavoriteTweet = 'ZYKSe-w7KEslx3JhSIk5LA', 'UnfavoriteTweet' 133 | CreateBookmark = 'aoDbu3RHznuiSkQ9aNM67Q', 'CreateBookmark' 134 | DeleteBookmark = 'Wlmlj2-xzyS1GN3a6cj-mQ', 'DeleteBookmark' 135 | CreateList = 'hQAsnViq2BrMLbPuQ9umDA', 'CreateList' 136 | UpdateList = '4dCEFWtxEbhnSLcJdJ6PNg', 'UpdateList' 137 | ListsPinMany = '2X4Vqu6XLneR-XZnGK5MAw', 'ListsPinMany' 138 | ListPinOne = '2pYlo-kjdXoNOZJoLzI6KA', 'ListPinOne' 139 | ListUnpinOne = 'c4ce-hzx6V4heV5IzdeBkA', 'ListUnpinOne' 140 | ListAddMember = 'P8tyfv2_0HzofrB5f6_ugw', 'ListAddMember' 141 | ListRemoveMember = 'DBZowzFN492FFkBPBptCwg', 'ListRemoveMember' 142 | DeleteList = 'UnN9Th1BDbeLjpgjGSpL3Q', 'DeleteList' 143 | EditListBanner = 'Uk0ZwKSMYng56aQdeJD1yw', 'EditListBanner' 144 | DeleteListBanner = '-bOKetDVCMl20qXn7YDXIA', 'DeleteListBanner' 145 | TopicFollow = 'ElqSLWFmsPL4NlZI5e1Grg', 'TopicFollow' 146 | TopicUnfollow = 'srwjU6JM_ZKTj_QMfUGNcw', 'TopicUnfollow' 147 | HomeLatestTimeline = 'zhX91JE87mWvfprhYE97xA', 'HomeLatestTimeline' 148 | HomeTimeline = 'HCosKfLNW1AcOo3la3mMgg', 'HomeTimeline' 149 | Bookmarks = 'tmd4ifV8RHltzn8ymGg1aw', 'Bookmarks' 150 | 151 | # misc/not implemented 152 | AdAccounts = 'a8KxGfFQAmm3WxqemuqSRA', 'AdAccounts' 153 | ArticleTimeline = 'o9FyvnC-xg8mVBXqL4g-rg', 'ArticleTimeline' 154 | ArticleTweetsTimeline = 'x4ywSpvg6BesoDszkfbFQg', 'ArticleTweetsTimeline' 155 | AudienceEstimate = '1LYVUabJBYkPlUAWRabB3g', 'AudienceEstimate' 156 | AuthenticatedUserTFLists = 'QjN8ZdavFDqxUjNn3r9cig', 'AuthenticatedUserTFLists' 157 | BirdwatchAliasSelect = '3ss48WFwGokBH_gj8t_8aQ', 'BirdwatchAliasSelect' 158 | BirdwatchCreateAppeal = 'TKdL0YFsX4DMOpMKeneLvA', 'BirdwatchCreateAppeal' 159 | BirdwatchCreateNote = '36EUZZyaciVmNrq4CRZcmw', 'BirdwatchCreateNote' 160 | BirdwatchCreateRating = 'bD3AEK9BMCSpRods_ng2fA', 'BirdwatchCreateRating' 161 | BirdwatchDeleteNote = 'IKS_qrShkDyor6Ri1ahd9g', 'BirdwatchDeleteNote' 162 | BirdwatchDeleteRating = 'OpvCOyOoQClUND66zDzrnA', 'BirdwatchDeleteRating' 163 | BirdwatchEditNotificationSettings = 'FLgLReVIssXjB_ui3wcrRQ', 'BirdwatchEditNotificationSettings' 164 | BirdwatchFetchAliasSelfSelectOptions = 'szoXMke8AZOErso908iglw', 'BirdwatchFetchAliasSelfSelectOptions' 165 | BirdwatchFetchAliasSelfSelectStatus = 'LUEdtkcpBlGktUtms4BvwA', 'BirdwatchFetchAliasSelfSelectStatus' 166 | BirdwatchFetchAuthenticatedUserProfile = 'pMbW6Y4LuS5MzlSOEqERJQ', 'BirdwatchFetchAuthenticatedUserProfile' 167 | BirdwatchFetchBirdwatchProfile = 'btgGtchypc3D491MJ7XXWA', 'BirdwatchFetchBirdwatchProfile' 168 | BirdwatchFetchContributorNotesSlice = 't6r3Wq7wripUW9gB3FQNBw', 'BirdwatchFetchContributorNotesSlice' 169 | BirdwatchFetchGlobalTimeline = 'L3LftPt6fhYqoQ5Vnxm7UQ', 'BirdwatchFetchGlobalTimeline' 170 | BirdwatchFetchNotes = 'ZGMhf1M7kPKMOhEk1nz0Yw', 'BirdwatchFetchNotes' 171 | BirdwatchFetchOneNote = 'GO8BR2MM2WZB63cdOoC7lw', 'BirdwatchFetchOneNote' 172 | BirdwatchFetchPublicData = '9bDdJ6AL26RLkcUShEcF-A', 'BirdwatchFetchPublicData' 173 | BirdwatchProfileAcknowledgeEarnOut = 'cED9wJy8Nd1kZCCYuIq9zQ', 'BirdwatchProfileAcknowledgeEarnOut' 174 | BizProfileFetchUser = '6OFpJ3TH3p8JpwOSgfgyhg', 'BizProfileFetchUser' 175 | BlockedAccountsAll = 'h52d1F7dumWGE1tJAhQBpg', 'BlockedAccountsAll' 176 | BlockedAccountsAutoBlock = '8w-D2OhT0jmGzXaNY--UQA', 'BlockedAccountsAutoBlock' 177 | BlockedAccountsImported = '8LDNeOEm0kA98uoDsqXvMg', 'BlockedAccountsImported' 178 | BookmarkFolderTimeline = '13H7EUATwethsj-XxX5ohw', 'BookmarkFolderTimeline' 179 | BookmarkFoldersSlice = 'i78YDd0Tza-dV4SYs58kRg', 'BookmarkFoldersSlice' 180 | BookmarksAllDelete = 'skiACZKC1GDYli-M8RzEPQ', 'BookmarksAllDelete' 181 | Budgets = 'mbK3oSQotwcJXyQIBE3uYw', 'Budgets' 182 | CardPreviewByTweetText = 'jnwTSDR-Eo_HWlSkXPcMGA', 'CardPreviewByTweetText' 183 | CheckTweetForNudge = 'C2dcvh7H69JALtomErxWlA', 'CheckTweetForNudge' 184 | CombinedLists = 'rIxum3avpCu7APi7mxTNjw', 'CombinedLists' 185 | CommunitiesMainDiscoveryModule = '8UB2fhB8TiYIW2M6vbBFXg', 'CommunitiesMainDiscoveryModule' 186 | CommunitiesMainPageTimeline = 'DzcxPzkGYVQk-BD0pqAcZw', 'CommunitiesMainPageTimeline' 187 | CommunitiesMembershipsSlice = 's8-oxdVsoJ3w2CFD0nFt9g', 'CommunitiesMembershipsSlice' 188 | CommunitiesMembershipsTimeline = 'QXo-eKTsvhpCyFotNz2u6g', 'CommunitiesMembershipsTimeline' 189 | CommunityAboutTimeline = 'plOgdpBzpVVQbTOEVuRc_A', 'CommunityAboutTimeline' 190 | CommunityByRestId = 'bCVwRBDPi15jrdJQ7NCENQ', 'CommunityByRestId' 191 | CommunityCreateRule = 'dShPoN6voXRusgxC1uvGog', 'CommunityCreateRule' 192 | CommunityDiscoveryTimeline = 'b3rceNUXWRyo5mSwVZF74Q', 'CommunityDiscoveryTimeline' 193 | CommunityEditBannerMedia = 'KVkZwp8Q6xy6iyhlQE5d7Q', 'CommunityEditBannerMedia' 194 | CommunityEditName = 'SKToKhvm3Z4Rir8ENCJ3YQ', 'CommunityEditName' 195 | CommunityEditPurpose = 'eMat-u2kx6KocreGTAt-hA', 'CommunityEditPurpose' 196 | CommunityEditRule = '9nEl5bNcdteuPGbGCdvEFA', 'CommunityEditRule' 197 | CommunityEditTheme = '4OhW6gWJwiu-JTAgBPsU1w', 'CommunityEditTheme' 198 | CommunityHashtagsTimeline = 'hril1TsnshopHbmnjdUmhQ', 'CommunityHashtagsTimeline' 199 | CommunityMemberRelationshipTypeahead = 'NEwac2-8ONgf0756ne8oXA', 'CommunityMemberRelationshipTypeahead' 200 | CommunityModerationKeepTweet = 'f_YqrHSCc1mPlG-aB7pFRw', 'CommunityModerationKeepTweet' 201 | CommunityModerationTweetCasesSlice = 'V-iC7tjWOlzBJ44SanqGzw', 'CommunityModerationTweetCasesSlice' 202 | CommunityRemoveBannerMedia = 'lSdK1v30qVhm37rDTgHq0Q', 'CommunityRemoveBannerMedia' 203 | CommunityRemoveRule = 'EI_g43Ss_Ixg0EC4K7nzlQ', 'CommunityRemoveRule' 204 | CommunityReorderRules = 'VwluNMGnl5uaNZ3LnlCQ_A', 'CommunityReorderRules' 205 | CommunityTweetsRankedTimeline = 'P38EspBBPhAfSKPP74-s2Q', 'CommunityTweetsRankedTimeline' 206 | CommunityTweetsTimeline = '2JgHOlqfeLusxAT0yGQJjg', 'CommunityTweetsTimeline' 207 | CommunityUpdateRole = '5eq76kkUqfdCzInCtcxQOA', 'CommunityUpdateRole' 208 | CommunityUserInvite = 'x8hUNaBCOV2tSalqB9cwWQ', 'CommunityUserInvite' 209 | CommunityUserRelationshipTypeahead = 'gi_UGcUurYp6N6p2BaLJqQ', 'CommunityUserRelationshipTypeahead' 210 | ConversationControlChange = 'hb1elGcj6769uT8qVYqtjw', 'ConversationControlChange' 211 | ConversationControlDelete = 'OoMO_aSZ1ZXjegeamF9QmA', 'ConversationControlDelete' 212 | ConvertRitoSuggestedActions = '2njnYoE69O2jdUM7KMEnDw', 'ConvertRitoSuggestedActions' 213 | Coupons = 'R1h43jnAl2bsDoUkgZb7NQ', 'Coupons' 214 | CreateCommunity = 'lRjZKTRcWuqwtYwCWGy9_w', 'CreateCommunity' 215 | CreateCustomerPortalSession = '2LHXrd1uYeaMWhciZgPZFw', 'CreateCustomerPortalSession' 216 | CreateDraftTweet = 'cH9HZWz_EW9gnswvA4ZRiQ', 'CreateDraftTweet' 217 | CreateNoteTweet = 'Pyx6nga4XtTVhfTh1gtX1A', 'CreateNoteTweet' 218 | CreateQuickPromotion = 'oDSoVgHhJxnd5IkckgPZdg', 'CreateQuickPromotion' 219 | CreateTrustedFriendsList = '2tP8XUYeLHKjq5RHvuvpZw', 'CreateTrustedFriendsList' 220 | CreateTweetDownvote = 'Eo65jl-gww30avDgrXvhUA', 'CreateTweetDownvote' 221 | CreateTweetReaction = 'D7M6X3h4-mJE8UB1Ap3_dQ', 'CreateTweetReaction' 222 | DataSaverMode = 'xF6sXnKJfS2AOylzxRjf6A', 'DataSaverMode' 223 | DeleteBookmarkFolder = '2UTTsO-6zs93XqlEUZPsSg', 'DeleteBookmarkFolder' 224 | DeleteDraftTweet = 'bkh9G3FGgTldS9iTKWWYYw', 'DeleteDraftTweet' 225 | DeletePaymentMethod = 'VaaLGwK5KNLoc7wsOmp4uw', 'DeletePaymentMethod' 226 | DeleteTweetDownvote = 'VNEvEGXaUAMfiExP8Tbezw', 'DeleteTweetDownvote' 227 | DeleteTweetReaction = 'GKwK0Rj4EdkfwdHQMZTpuw', 'DeleteTweetReaction' 228 | DisableUserAccountLabel = '_ckHEj05gan2VfNHG6thBA', 'DisableUserAccountLabel' 229 | DisableVerifiedPhoneLabel = 'g2m0pAOamawNtVIfjXNMJg', 'DisableVerifiedPhoneLabel' 230 | DismissRitoSuggestedAction = 'jYvwa61cv3NwNP24iUru6g', 'DismissRitoSuggestedAction' 231 | DmAllSearchSlice = 'U-QXVRZ6iddb1QuZweh5DQ', 'DmAllSearchSlice' 232 | DmGroupSearchSlice = '5zpY1dCR-8NyxQJS_CFJoQ', 'DmGroupSearchSlice' 233 | DmMutedTimeline = 'lrcWa13oyrQc7L33wRdLAQ', 'DmMutedTimeline' 234 | DMMessageDeleteMutation = 'BJ6DtxA2llfjnRoRjaiIiw', 'DMMessageDeleteMutation' 235 | DmNsfwMediaFilterUpdate = 'of_N6O33zfyD4qsFJMYFxA', 'DmNsfwMediaFilterUpdate' 236 | DmPeopleSearchSlice = 'xYSm8m5kJnzm_gFCn5GH-w', 'DmPeopleSearchSlice' 237 | EditBookmarkFolder = 'a6kPp1cS1Dgbsjhapz1PNw', 'EditBookmarkFolder' 238 | EditDraftTweet = 'JIeXE-I6BZXHfxsgOkyHYQ', 'EditDraftTweet' 239 | EditScheduledTweet = '_mHkQ5LHpRRjSXKOcG6eZw', 'EditScheduledTweet' 240 | EnableLoggedOutWebNotifications = 'BqIHKmwZKtiUBPi07jKctg', 'EnableLoggedOutWebNotifications' 241 | EnableVerifiedPhoneLabel = 'C3RJFfMsb_KcEytpKmRRkw', 'EnableVerifiedPhoneLabel' 242 | EnrollCoupon = 'SOyGmNGaEXcvk15s5bqDrA', 'EnrollCoupon' 243 | ExplorePage = 'fkypGKlR9Xz9kLvUZDLoXw', 'ExplorePage' 244 | FeatureSettingsUpdate = '-btar_vkBwWA7s3YWfp_9g', 'FeatureSettingsUpdate' 245 | FetchDraftTweets = 'ZkqIq_xRhiUme0PBJNpRtg', 'FetchDraftTweets' 246 | FetchScheduledTweets = 'ITtjAzvlZni2wWXwf295Qg', 'FetchScheduledTweets' 247 | FollowersYouKnow = 'RvojYJJB90VwJ0rdVhbjMQ', 'FollowersYouKnow' 248 | ForYouExplore = 'wVEXnyTWzQlEsIuLq_D3tw', 'ForYouExplore' 249 | GenericTimelineById = 'LZfAdxTdNolKXw6ZkoY_kA', 'GenericTimelineById' 250 | GetSafetyModeSettings = 'AhxTX0lkbIos4WG53xwzSA', 'GetSafetyModeSettings' 251 | GetTweetReactionTimeline = 'ihIcULrtrtPGlCuprduRrA', 'GetTweetReactionTimeline' 252 | GetUserClaims = 'lFi3xnx0auUUnyG4YwpCNw', 'GetUserClaims' 253 | GraphQLError = '2V2W3HIBuMW83vEMtfo_Rg', 'GraphQLError' 254 | ImmersiveMedia = 'UGQD_VslAJBJ4XzigsBYAA', 'ImmersiveMedia' 255 | JoinCommunity = 'PXO-mA1KfmLqB9I6R-lOng', 'JoinCommunity' 256 | LeaveCommunity = 'AtiTdhEyRN8ruNFW069ewQ', 'LeaveCommunity' 257 | ListByRestId = 'wXzyA5vM_aVkBL9G8Vp3kw', 'ListByRestId' 258 | ListBySlug = '3-E3eSWorCv24kYkK3CCiQ', 'ListBySlug' 259 | ListCreationRecommendedUsers = 'Zf8ZwG57EKtss-rPlryIqg', 'ListCreationRecommendedUsers' 260 | ListEditRecommendedUsers = '-F4wsOirYNXjjg-ZjccQpQ', 'ListEditRecommendedUsers' 261 | ListLatestTweetsTimeline = '2TemLyqrMpTeAmysdbnVqw', 'ListLatestTweetsTimeline' 262 | ListMembers = 'vA952kfgGw6hh8KatWnbqw', 'ListMembers' 263 | ListMemberships = 'BlEXXdARdSeL_0KyKHHvvg', 'ListMemberships' 264 | ListOwnerships = 'wQcOSjSQ8NtgxIwvYl1lMg', 'ListOwnerships' 265 | ListPins = 'J0JOhmi8HSsle8LfSWv0cw', 'ListPins' 266 | ListProductSubscriptions = 'wwdBYgScze0_Jnan79jEUw', 'ListProductSubscriptions' 267 | ListRankedTweetsTimeline = '07lytXX9oG9uCld1RY4b0w', 'ListRankedTweetsTimeline' 268 | ListSubscribe = 'FjvrQI3k-97JIUbEE6Gxcw', 'ListSubscribe' 269 | ListSubscribers = 'e57wIELAAe0fYt4Hmqsk6g', 'ListSubscribers' 270 | ListUnsubscribe = 'bXyvW9HoS_Omy4ADhexj8A', 'ListUnsubscribe' 271 | ListsDiscovery = 'ehnzbxPHA69pyaV2EydN1g', 'ListsDiscovery' 272 | ListsManagementPageTimeline = 'nhYp4n09Hi5n2hQWseQztg', 'ListsManagementPageTimeline' 273 | LiveCommerceItemsSlice = '-lnNX56S2YrZYrLzbccFAQ', 'LiveCommerceItemsSlice' 274 | ModerateTweet = 'pjFnHGVqCjTcZol0xcBJjw', 'ModerateTweet' 275 | ModeratedTimeline = 'hnaqw2Vok5OETdBVa_uexw', 'ModeratedTimeline' 276 | MuteList = 'ZYyanJsskNUcltu9bliMLA', 'MuteList' 277 | MutedAccounts = '-G9eXTmseyiSenbqjrEG6w', 'MutedAccounts' 278 | NoteworthyAccountsPage = '3fOJzEwYMnVyzwgLTLIBkw', 'NoteworthyAccountsPage' 279 | PaymentMethods = 'mPF_G9okpbZuLcD6mN8K9g', 'PaymentMethods' 280 | PinReply = 'GA2_1uKP9b_GyR4MVAQXAw', 'PinReply' 281 | ProfileUserPhoneState = '5kUWP8C1hcd6omvg6HXXTQ', 'ProfileUserPhoneState' 282 | PutClientEducationFlag = 'IjQ-egg0uPkY11NyPMfRMQ', 'PutClientEducationFlag' 283 | QuickPromoteEligibility = 'LtpCXh66W-uXh7u7XSRA8Q', 'QuickPromoteEligibility' 284 | RemoveFollower = 'QpNfg0kpPRfjROQ_9eOLXA', 'RemoveFollower' 285 | RemoveTweetFromBookmarkFolder = '2Qbj9XZvtUvyJB4gFwWfaA', 'RemoveTweetFromBookmarkFolder' 286 | RequestToJoinCommunity = '6G66cW5zuxPXmHOeBOjF2w', 'RequestToJoinCommunity' 287 | RitoActionedTweetsTimeline = 'px9Zbs48D-YdQPEROK6-nA', 'RitoActionedTweetsTimeline' 288 | RitoFlaggedAccountsTimeline = 'lMzaBZHIbD6GuPqJJQubMg', 'RitoFlaggedAccountsTimeline' 289 | RitoFlaggedTweetsTimeline = 'iCuXMibh6yj9AelyjKXDeA', 'RitoFlaggedTweetsTimeline' 290 | RitoSuggestedActionsFacePile = 'GnQKeEdL1LyeK3dTQCS1yw', 'RitoSuggestedActionsFacePile' 291 | SetDefault = 'QEMLEzEMzoPNbeauKCCLbg', 'SetDefault' 292 | SetSafetyModeSettings = 'qSJIPIpf4gA7Wn21bT3D4w', 'SetSafetyModeSettings' 293 | SharingAudiospacesListeningDataWithFollowersUpdate = '5h0kNbk3ii97rmfY6CdgAA', 'SharingAudiospacesListeningDataWithFollowersUpdate' 294 | SubscribeToScheduledSpace = 'Sxn4YOlaAwEKjnjWV0h7Mw', 'SubscribeToScheduledSpace' 295 | SubscriptionCheckoutUrlWithEligibility = 'hKfOOObQr5JmfmxW0YtPvg', 'SubscriptionCheckoutUrlWithEligibility' 296 | SubscriptionProductDetails = 'f0dExZDmFWFSWMCPQSAemQ', 'SubscriptionProductDetails' 297 | SubscriptionProductFeaturesFetch = 'Me2CVcAXxvK2WMr-Nh_Qqg', 'SubscriptionProductFeaturesFetch' 298 | SuperFollowers = 'o0YtPFnd4Lk_pOQb9alCvA', 'SuperFollowers' 299 | TopicByRestId = '4OUZZOonV2h60I0wdlQb_w', 'TopicByRestId' 300 | TopicLandingPage = 'mAKQjs1kyTS75VLZzuIXXw', 'TopicLandingPage' 301 | TopicNotInterested = 'cPCFdDAaqRjlMRYInZzoDA', 'TopicNotInterested' 302 | TopicToFollowSidebar = 'RPWVYYupHVZkJOnokbt2cw', 'TopicToFollowSidebar' 303 | TopicUndoNotInterested = '4tVnt6FoSxaX8L-mDDJo4Q', 'TopicUndoNotInterested' 304 | TopicsManagementPage = 'Jvdjpe8qzsJD84BpK3qdkQ', 'TopicsManagementPage' 305 | TopicsPickerPage = 'UvG-XXtWNcJN1LzF0u3ByA', 'TopicsPickerPage' 306 | TopicsPickerPageById = 't6kH4v2c_VzWKljc2yNwHA', 'TopicsPickerPageById' 307 | TrustedFriendsTypeahead = 'RRnOwHttRGscWKC1zY9VRA', 'TrustedFriendsTypeahead' 308 | TweetEditHistory = '8eaWKjHszkS-G_hprUd9AA', 'TweetEditHistory' 309 | TwitterArticleByRestId = 'hwrvh-Qt24lcprL-BDfqRA', 'TwitterArticleByRestId' 310 | TwitterArticleCreate = 'aV-sm-IkvwplcxdYDoLZHQ', 'TwitterArticleCreate' 311 | TwitterArticleDelete = '6st-stMDc7KBqLT8KvWhHg', 'TwitterArticleDelete' 312 | TwitterArticleUpdateCoverImage = 'fpcVRSAsjvkwmCiN1HheqQ', 'TwitterArticleUpdateCoverImage' 313 | TwitterArticleUpdateData = 'XpBTYp_QXwyZ0XT0JXCBJw', 'TwitterArticleUpdateData' 314 | TwitterArticleUpdateMedia = '3ojmmegfBC_oHyrmPhxj-g', 'TwitterArticleUpdateMedia' 315 | TwitterArticleUpdateTitle = 'dvH6Ql989I4e5jWEV7HfaQ', 'TwitterArticleUpdateTitle' 316 | TwitterArticleUpdateVisibility = '8M35gHyfpcy3S4UXejUGfA', 'TwitterArticleUpdateVisibility' 317 | TwitterArticlesSlice = 'UUPSi_aS8_kHDFTWqSBPUA', 'TwitterArticlesSlice' 318 | UnmentionUserFromConversation = 'xVW9j3OqoBRY9d6_2OONEg', 'UnmentionUserFromConversation' 319 | UnmoderateTweet = 'pVSyu6PA57TLvIE4nN2tsA', 'UnmoderateTweet' 320 | UnmuteList = 'pMZrHRNsmEkXgbn3tOyr7Q', 'UnmuteList' 321 | UnpinReply = 'iRe6ig5OV1EzOtldNIuGDQ', 'UnpinReply' 322 | UnsubscribeFromScheduledSpace = 'Zevhh76Msw574ZSs2NQHGQ', 'UnsubscribeFromScheduledSpace' 323 | UrtFixtures = 'I_0j1mjMwv94SdS66S4pqw', 'UrtFixtures' 324 | UserAboutTimeline = 'dm7ReTFJoeU0qkiZCO1E1g', 'UserAboutTimeline' 325 | UserAccountLabel = 'rD5gLxVmMvtdtYU1UHWlFQ', 'UserAccountLabel' 326 | UserBusinessProfileTeamTimeline = 'dq1eUCn3N8v0BywlP4nT7A', 'UserBusinessProfileTeamTimeline' 327 | UserPromotableTweets = 'jF-OgMv-9vAym3JaCPUnhQ', 'UserPromotableTweets' 328 | UserSessionsList = 'vJ-XatpmQSG8bDch8-t9Jw', 'UserSessionsList' 329 | UserSuperFollowTweets = '1by3q8-AJWdNYhtltjlPTQ', 'UserSuperFollowTweets' 330 | Viewer = 'okNaf-6AQWu2DD2H_MAoVw', 'Viewer' 331 | ViewerEmailSettings = 'JpjlNgn4sLGvS6tgpTzYBg', 'ViewerEmailSettings' 332 | ViewerTeams = 'D8mVcJSVv66_3NcR7fOf6g', 'ViewerTeams' 333 | ViewingOtherUsersTopicsPage = 'tYXo6h_rpnHXbdLUFMatZA', 'ViewingOtherUsersTopicsPage' 334 | WriteDataSaverPreferences = 'H03etWvZGz41YASxAU2YPg', 'WriteDataSaverPreferences' 335 | WriteEmailNotificationSettings = '2qKKYFQift8p5-J1k6kqxQ', 'WriteEmailNotificationSettings' 336 | adFreeArticleDomains = 'zwTrX9CtnMvWlBXjsx95RQ', 'adFreeArticleDomains' 337 | articleNudgeDomains = '88Bu08U2ddaVVjKmmXjVYg', 'articleNudgeDomains' 338 | bookmarkTweetToFolder = '4KHZvvNbHNf07bsgnL9gWA', 'bookmarkTweetToFolder' 339 | createBookmarkFolder = '6Xxqpq8TM_CREYiuof_h5w', 'createBookmarkFolder' 340 | getAltTextPromptPreference = 'PFIxTk8owMoZgiMccP0r4g', 'getAltTextPromptPreference' 341 | getCaptionsAlwaysDisplayPreference = 'BwgMOGpOViDS0ri7VUgglg', 'getCaptionsAlwaysDisplayPreference' 342 | timelinesFeedback = 'vfVbgvTPTQ-dF_PQ5lD1WQ', 'timelinesFeedback' 343 | updateAltTextPromptPreference = 'aQKrduk_DA46XfOQDkcEng', 'updateAltTextPromptPreference' 344 | updateCaptionsAlwaysDisplayPreference = 'uCUQhvZ5sJ9qHinRp6CFlQ', 'updateCaptionsAlwaysDisplayPreference' 345 | 346 | default_variables = { 347 | 'count': 1000, 348 | 'withSafetyModeUserFields': True, 349 | 'includePromotedContent': True, 350 | 'withQuickPromoteEligibilityTweetFields': True, 351 | 'withVoice': True, 352 | 'withV2Timeline': True, 353 | 'withDownvotePerspective': False, 354 | 'withBirdwatchNotes': True, 355 | 'withCommunity': True, 356 | 'withSuperFollowsUserFields': True, 357 | 'withReactionsMetadata': False, 358 | 'withReactionsPerspective': False, 359 | 'withSuperFollowsTweetFields': True, 360 | 'isMetatagsQuery': False, 361 | 'withReplays': True, 362 | 'withClientEventToken': False, 363 | 'withAttachments': True, 364 | 'withConversationQueryHighlights': True, 365 | 'withMessageQueryHighlights': True, 366 | 'withMessages': True, 367 | } 368 | default_features = { 369 | # new 370 | 'c9s_tweet_anatomy_moderator_badge_enabled': True, 371 | 'responsive_web_home_pinned_timelines_enabled': True, 372 | 373 | 'blue_business_profile_image_shape_enabled': True, 374 | 'creator_subscriptions_tweet_preview_api_enabled': True, 375 | 'freedom_of_speech_not_reach_fetch_enabled': True, 376 | 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True, 377 | 'graphql_timeline_v2_bookmark_timeline': True, 378 | 'hidden_profile_likes_enabled': True, 379 | 'highlights_tweets_tab_ui_enabled': True, 380 | 'interactive_text_enabled': True, 381 | 'longform_notetweets_consumption_enabled': True, 382 | 'longform_notetweets_inline_media_enabled': True, 383 | 'longform_notetweets_rich_text_read_enabled': True, 384 | 'longform_notetweets_richtext_consumption_enabled': True, 385 | 'profile_foundations_tweet_stats_enabled': True, 386 | 'profile_foundations_tweet_stats_tweet_frequency': True, 387 | 'responsive_web_birdwatch_note_limit_enabled': True, 388 | 'responsive_web_edit_tweet_api_enabled': True, 389 | 'responsive_web_enhance_cards_enabled': False, 390 | 'responsive_web_graphql_exclude_directive_enabled': True, 391 | 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False, 392 | 'responsive_web_graphql_timeline_navigation_enabled': True, 393 | 'responsive_web_media_download_video_enabled': False, 394 | 'responsive_web_text_conversations_enabled': False, 395 | 'responsive_web_twitter_article_data_v2_enabled': True, 396 | 'responsive_web_twitter_article_tweet_consumption_enabled': False, 397 | 'responsive_web_twitter_blue_verified_badge_is_enabled': True, 398 | 'rweb_lists_timeline_redesign_enabled': True, 399 | 'spaces_2022_h2_clipping': True, 400 | 'spaces_2022_h2_spaces_communities': True, 401 | 'standardized_nudges_misinfo': True, 402 | 'subscriptions_verification_info_verified_since_enabled': True, 403 | 'tweet_awards_web_tipping_enabled': False, 404 | 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True, 405 | 'tweetypie_unmention_optimization_enabled': True, 406 | 'verified_phone_label_enabled': False, 407 | 'vibe_api_enabled': True, 408 | 'view_counts_everywhere_api_enabled': True 409 | } 410 | 411 | 412 | trending_params = { 413 | 'include_profile_interstitial_type': '1', 414 | 'include_blocking': '1', 415 | 'include_blocked_by': '1', 416 | 'include_followed_by': '1', 417 | 'include_want_retweets': '1', 418 | 'include_mute_edge': '1', 419 | 'include_can_dm': '1', 420 | 'include_can_media_tag': '1', 421 | 'include_ext_has_nft_avatar': '1', 422 | 'include_ext_is_blue_verified': '1', 423 | 'include_ext_verified_type': '1', 424 | 'skip_status': '1', 425 | 'cards_platform': 'Web-12', 426 | 'include_cards': '1', 427 | 'include_ext_alt_text': 'true', 428 | 'include_ext_limited_action_results': 'false', 429 | 'include_quote_count': 'true', 430 | 'include_reply_count': '1', 431 | 'tweet_mode': 'extended', 432 | 'include_ext_views': 'true', 433 | 'include_entities': 'true', 434 | 'include_user_entities': 'true', 435 | 'include_ext_media_color': 'true', 436 | 'include_ext_media_availability': 'true', 437 | 'include_ext_sensitive_media_warning': 'true', 438 | 'include_ext_trusted_friends_metadata': 'true', 439 | 'send_error_codes': 'true', 440 | 'simple_quoted_tweet': 'true', 441 | 'count': 1000, 442 | 'requestContext': 'launch', 443 | 'include_page_configuration': 'true', 444 | 'initial_tab_id': 'trending', 445 | 'entity_tokens': 'false', 446 | 'ext': 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,vibe' 447 | } 448 | 449 | account_settings = { 450 | 'address_book_live_sync_enabled': False, 451 | 'allow_ads_personalization': False, 452 | 'allow_authenticated_periscope_requests': True, 453 | 'allow_dm_groups_from': 'following', 454 | 'allow_dms_from': 'following', # all 455 | 'allow_location_history_personalization': False, 456 | 'allow_logged_out_device_personalization': False, 457 | 'allow_media_tagging': 'none', # all, following 458 | 'allow_sharing_data_for_third_party_personalization': False, 459 | 'alt_text_compose_enabled': None, 460 | 'always_use_https': True, 461 | 'autoplay_disabled': False, 462 | 'country_code': 'us', 463 | 'discoverable_by_email': False, 464 | 'discoverable_by_mobile_phone': False, 465 | 'display_sensitive_media': True, 466 | 'dm_quality_filter': 'enabled', # disabled 467 | 'dm_receipt_setting': 'all_disabled', # all_enabled 468 | 'geo_enabled': False, 469 | 'include_alt_text_compose': True, 470 | 'include_mention_filter': True, 471 | 'include_nsfw_admin_flag': True, 472 | 'include_nsfw_user_flag': True, 473 | 'include_ranked_timeline': True, 474 | 'language': 'en', 475 | 'mention_filter': 'unfiltered', 476 | 'nsfw_admin': False, 477 | 'nsfw_user': False, 478 | 'personalized_trends': True, 479 | 'protected': False, 480 | 'ranked_timeline_eligible': None, 481 | 'ranked_timeline_setting': None, 482 | 'require_password_login': False, 483 | 'requires_login_verification': False, 484 | 'settings_metadata': {}, 485 | 'sleep_time': { 486 | 'enabled': False, 487 | 'end_time': None, 488 | 'start_time': None 489 | }, 490 | 'translator_type': 'none', 491 | 'universal_quality_filtering_enabled': 'enabled', 492 | 'use_cookie_personalization': False, 493 | ## todo: not yet implemented - requires additional steps 494 | # 'allow_contributor_request': 'all', 495 | # 'protect_password_reset': False, 496 | } 497 | follower_notification_settings = { 498 | 'cursor': '-1', 499 | 'include_profile_interstitial_type': '1', 500 | 'include_blocking': '1', 501 | 'include_blocked_by': '1', 502 | 'include_followed_by': '1', 503 | 'include_want_retweets': '1', 504 | 'include_mute_edge': '1', 505 | 'include_can_dm': '1', 506 | 'include_can_media_tag': '1', 507 | 'include_ext_has_nft_avatar': '1', 508 | 'include_ext_is_blue_verified': '1', 509 | 'include_ext_verified_type': '1', 510 | 'skip_status': '1', 511 | } 512 | 513 | follow_settings = { 514 | 'include_profile_interstitial_type': '1', 515 | 'include_blocking': '1', 516 | 'include_blocked_by': '1', 517 | 'include_followed_by': '1', 518 | 'include_want_retweets': '1', 519 | 'include_mute_edge': '1', 520 | 'include_can_dm': '1', 521 | 'include_can_media_tag': '1', 522 | 'include_ext_has_nft_avatar': '1', 523 | 'include_ext_is_blue_verified': '1', 524 | 'include_ext_verified_type': '1', 525 | 'skip_status': '1', 526 | } 527 | 528 | account_search_settings = { 529 | 'optInFiltering': True, # filter out nsfw content 530 | 'optInBlocking': True, # filter out blocked accounts 531 | } 532 | 533 | profile_settings = { 534 | 'birthdate_day': int, 535 | 'birthdate_month': int, 536 | 'birthdate_year': int, # 1985 537 | 'birthdate_visibility': str, # 'self', 538 | 'birthdate_year_visibility': str, # 'self', 539 | 'displayNameMaxLength': int, # '50', 540 | 'url': str, # 'https://example.com', 541 | 'name': str, # 'foo', 542 | 'description': str, # 'bar', 543 | 'location': str, # 'world', 544 | } 545 | 546 | search_config = { 547 | 'include_profile_interstitial_type': 1, 548 | 'include_blocking': 1, 549 | 'include_blocked_by': 1, 550 | 'include_followed_by': 1, 551 | 'include_want_retweets': 1, 552 | 'include_mute_edge': 1, 553 | 'include_can_dm': 1, 554 | 'include_can_media_tag': 1, 555 | 'include_ext_has_nft_avatar': 1, 556 | 'include_ext_is_blue_verified': 1, 557 | 'include_ext_verified_type': 1, 558 | 'skip_status': 1, 559 | 'cards_platform': 'Web-12', 560 | 'include_cards': 1, 561 | 'include_ext_alt_text': 'true', 562 | 'include_ext_limited_action_results': 'false', 563 | 'include_quote_count': 'true', 564 | 'include_reply_count': 1, 565 | 'tweet_mode': 'extended', 566 | 'include_ext_collab_control': 'true', 567 | 'include_ext_views': 'true', 568 | 'include_entities': 'true', 569 | 'include_user_entities': 'true', 570 | 'include_ext_media_color': 'true', 571 | 'include_ext_media_availability': 'true', 572 | 'include_ext_sensitive_media_warning': 'true', 573 | 'include_ext_trusted_friends_metadata': 'true', 574 | 'send_error_codes': 'true', 575 | 'simple_quoted_tweet': 'true', 576 | 'query_source': 'typed_query', 577 | 'count': 1000, 578 | 'q': '', 579 | 'requestContext': 'launch', 580 | 'pc': 1, 581 | 'spelling_corrections': 1, 582 | 'include_ext_edit_control': 'true', 583 | 'ext': 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe' 584 | } 585 | 586 | dm_params = { 587 | 'context': 'FETCH_DM_CONVERSATION', 588 | 'include_profile_interstitial_type': '1', 589 | 'include_blocking': '1', 590 | 'include_blocked_by': '1', 591 | 'include_followed_by': '1', 592 | 'include_want_retweets': '1', 593 | 'include_mute_edge': '1', 594 | 'include_can_dm': '1', 595 | 'include_can_media_tag': '1', 596 | 'include_ext_has_nft_avatar': '1', 597 | 'include_ext_is_blue_verified': '1', 598 | 'include_ext_verified_type': '1', 599 | 'include_ext_profile_image_shape': '1', 600 | 'skip_status': '1', 601 | 'dm_secret_conversations_enabled': 'false', 602 | 'krs_registration_enabled': 'true', 603 | 'cards_platform': 'Web-12', 604 | 'include_cards': '1', 605 | 'include_ext_alt_text': 'true', 606 | 'include_ext_limited_action_results': 'false', 607 | 'include_quote_count': 'true', 608 | 'include_reply_count': '1', 609 | 'tweet_mode': 'extended', 610 | 'include_ext_views': 'true', 611 | 'dm_users': 'false', 612 | 'include_groups': 'true', 613 | 'include_inbox_timelines': 'true', 614 | 'include_ext_media_color': 'true', 615 | 'supports_reactions': 'true', 616 | 'include_conversation_info': 'true', 617 | 'ext': 'mediaColor,altText,mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,superFollowMetadata,unmentionInfo,editControl', 618 | } 619 | 620 | live_notification_params = params = { 621 | "cards_platform": "Web-12", 622 | "count": "50", # max value 623 | "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,superFollowMetadata,unmentionInfo,editControl", 624 | "include_blocked_by": "1", 625 | "include_blocking": "1", 626 | "include_can_dm": "1", 627 | "include_can_media_tag": "1", 628 | "include_cards": "1", 629 | "include_entities": "true", 630 | "include_ext_alt_text": "true", 631 | "include_ext_has_nft_avatar": "1", 632 | "include_ext_is_blue_verified": "1", 633 | "include_ext_limited_action_results": "true", 634 | "include_ext_media_availability": "true", 635 | "include_ext_media_color": "true", 636 | "include_ext_profile_image_shape": "1", 637 | "include_ext_sensitive_media_warning": "true", 638 | "include_ext_trusted_friends_metadata": "true", 639 | "include_ext_verified_type": "1", 640 | "include_ext_views": "true", 641 | "include_followed_by": "1", 642 | "include_mute_edge": "1", 643 | "include_profile_interstitial_type": "1", 644 | "include_quote_count": "true", 645 | "include_reply_count": "1", 646 | "include_user_entities": "true", 647 | "include_want_retweets": "1", 648 | "send_error_codes": "true", 649 | "simple_quoted_tweet": "true", 650 | "skip_status": "1", 651 | "tweet_mode": "extended" 652 | } 653 | 654 | recommendations_params = { 655 | 'include_profile_interstitial_type': '1', 656 | 'include_blocking': '1', 657 | 'include_blocked_by': '1', 658 | 'include_followed_by': '1', 659 | 'include_want_retweets': '1', 660 | 'include_mute_edge': '1', 661 | 'include_can_dm': '1', 662 | 'include_can_media_tag': '1', 663 | 'include_ext_has_nft_avatar': '1', 664 | 'include_ext_is_blue_verified': '1', 665 | 'include_ext_verified_type': '1', 666 | 'include_ext_profile_image_shape': '1', 667 | 'skip_status': '1', 668 | 'pc': 'true', 669 | 'display_location': 'profile_accounts_sidebar', 670 | 'limit': 100, 671 | 'ext': 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,superFollowMetadata,unmentionInfo,editControl' 672 | } 673 | -------------------------------------------------------------------------------- /twitter/login.py: -------------------------------------------------------------------------------- 1 | import random 2 | import sys 3 | 4 | from httpx import Client 5 | 6 | from .constants import YELLOW, RED, BOLD, RESET, USER_AGENTS 7 | from .util import find_key 8 | 9 | def update_token(client: Client, key: str, url: str, **kwargs) -> Client: 10 | caller_name = sys._getframe(1).f_code.co_name 11 | try: 12 | headers = { 13 | 'x-guest-token': client.cookies.get('guest_token', ''), 14 | 'x-csrf-token': client.cookies.get('ct0', ''), 15 | 'x-twitter-auth-type': 'OAuth2Client' if client.cookies.get('auth_token') else '', 16 | } 17 | client.headers.update(headers) 18 | r = client.post(url, **kwargs) 19 | info = r.json() 20 | 21 | for task in info.get('subtasks', []): 22 | if task.get('enter_text', {}).get('keyboard_type') == 'email': 23 | print(f"[{YELLOW}warning{RESET}] {' '.join(find_key(task, 'text'))}") 24 | client.cookies.set('confirm_email', 'true') # signal that email challenge must be solved 25 | 26 | if task.get('subtask_id') == 'LoginAcid': 27 | if task['enter_text']['hint_text'].casefold() == 'confirmation code': 28 | print(f"[{YELLOW}warning{RESET}] email confirmation code challenge.") 29 | client.cookies.set('confirmation_code', 'true') 30 | 31 | client.cookies.set(key, info[key]) 32 | 33 | except KeyError as e: 34 | client.cookies.set('flow_errors', 'true') # signal that an error occurred somewhere in the flow 35 | print(f'[{RED}error{RESET}] failed to update token at {BOLD}{caller_name}{RESET}\n{e}') 36 | return client 37 | 38 | 39 | def init_guest_token(client: Client) -> Client: 40 | return update_token(client, 'guest_token', 'https://api.twitter.com/1.1/guest/activate.json') 41 | 42 | 43 | def flow_start(client: Client) -> Client: 44 | return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', 45 | params={'flow_name': 'login'}, 46 | json={ 47 | "input_flow_data": { 48 | "flow_context": { 49 | "debug_overrides": {}, 50 | "start_location": {"location": "splash_screen"} 51 | } 52 | }, "subtask_versions": {} 53 | }) 54 | 55 | 56 | def flow_instrumentation(client: Client) -> Client: 57 | return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={ 58 | "flow_token": client.cookies.get('flow_token'), 59 | "subtask_inputs": [{ 60 | "subtask_id": "LoginJsInstrumentationSubtask", 61 | "js_instrumentation": {"response": "{}", "link": "next_link"} 62 | }], 63 | }) 64 | 65 | 66 | def flow_username(client: Client) -> Client: 67 | return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={ 68 | "flow_token": client.cookies.get('flow_token'), 69 | "subtask_inputs": [{ 70 | "subtask_id": "LoginEnterUserIdentifierSSO", 71 | "settings_list": { 72 | "setting_responses": [{ 73 | "key": "user_identifier", 74 | "response_data": {"text_data": {"result": client.cookies.get('username')}} 75 | }], "link": "next_link"}}], 76 | }) 77 | 78 | 79 | def flow_password(client: Client) -> Client: 80 | return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={ 81 | "flow_token": client.cookies.get('flow_token'), 82 | "subtask_inputs": [{ 83 | "subtask_id": "LoginEnterPassword", 84 | "enter_password": {"password": client.cookies.get('password'), "link": "next_link"}}] 85 | }) 86 | 87 | 88 | def flow_duplication_check(client: Client) -> Client: 89 | return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={ 90 | "flow_token": client.cookies.get('flow_token'), 91 | "subtask_inputs": [{ 92 | "subtask_id": "AccountDuplicationCheck", 93 | "check_logged_in_account": {"link": "AccountDuplicationCheck_false"}, 94 | }], 95 | }) 96 | 97 | 98 | def confirm_email(client: Client) -> Client: 99 | return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={ 100 | "flow_token": client.cookies.get('flow_token'), 101 | "subtask_inputs": [ 102 | { 103 | "subtask_id": "LoginAcid", 104 | "enter_text": { 105 | "text": client.cookies.get('email'), 106 | "link": "next_link" 107 | } 108 | }] 109 | }) 110 | 111 | 112 | def solve_confirmation_challenge(client: Client, **kwargs) -> Client: 113 | if fn := kwargs.get('proton'): 114 | confirmation_code = fn() 115 | return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={ 116 | "flow_token": client.cookies.get('flow_token'), 117 | 'subtask_inputs': [ 118 | { 119 | 'subtask_id': 'LoginAcid', 120 | 'enter_text': { 121 | 'text': confirmation_code, 122 | 'link': 'next_link', 123 | }, 124 | }, 125 | ], 126 | }) 127 | 128 | 129 | def execute_login_flow(client: Client, **kwargs) -> Client | None: 130 | client = init_guest_token(client) 131 | for fn in [flow_start, flow_instrumentation, flow_username, flow_password, flow_duplication_check]: 132 | client = fn(client) 133 | 134 | # solve email challenge 135 | if client.cookies.get('confirm_email') == 'true': 136 | client = confirm_email(client) 137 | 138 | # solve confirmation challenge (Proton Mail only) 139 | if client.cookies.get('confirmation_code') == 'true': 140 | if not kwargs.get('proton'): 141 | print(f'[{RED}warning{RESET}] Please check your email for a confirmation code' 142 | f' and log in again using the web app. If you wish to automatically solve' 143 | f' email confirmation challenges, add a Proton Mail account in your account settings') 144 | return 145 | client = solve_confirmation_challenge(client, **kwargs) 146 | return client 147 | 148 | 149 | def login(email: str, username: str, password: str, **kwargs) -> Client: 150 | client = Client( 151 | cookies={ 152 | "email": email, 153 | "username": username, 154 | "password": password, 155 | "guest_token": None, 156 | "flow_token": None, 157 | }, 158 | headers={ 159 | 'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA', 160 | 'content-type': 'application/json', 161 | 'user-agent': random.choice(USER_AGENTS), 162 | 'x-twitter-active-user': 'yes', 163 | 'x-twitter-client-language': 'en', 164 | }, 165 | follow_redirects=True 166 | ) 167 | client = execute_login_flow(client, **kwargs) 168 | if not client or client.cookies.get('flow_errors') == 'true': 169 | raise Exception(f'[{RED}error{RESET}] {BOLD}{username}{RESET} login failed') 170 | return client 171 | -------------------------------------------------------------------------------- /twitter/scraper.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging.config 3 | import math 4 | import platform 5 | import sys 6 | from functools import partial 7 | from typing import Generator 8 | 9 | import websockets 10 | from httpx import AsyncClient, Limits, ReadTimeout, URL 11 | from tqdm.asyncio import tqdm_asyncio 12 | 13 | from .constants import * 14 | from .login import login 15 | from .util import * 16 | 17 | try: 18 | if get_ipython().__class__.__name__ == 'ZMQInteractiveShell': 19 | import nest_asyncio 20 | 21 | nest_asyncio.apply() 22 | except: 23 | ... 24 | 25 | if platform.system() != 'Windows': 26 | try: 27 | import uvloop 28 | 29 | uvloop.install() 30 | except ImportError as e: 31 | ... 32 | 33 | 34 | class Scraper: 35 | def __init__(self, email: str = None, username: str = None, password: str = None, session: Client = None, **kwargs): 36 | self.save = kwargs.get('save', True) 37 | self.debug = kwargs.get('debug', 0) 38 | self.pbar = kwargs.get('pbar', True) 39 | self.out = Path(kwargs.get('out', 'data')) 40 | self.guest = False 41 | self.logger = self._init_logger(**kwargs) 42 | self.session = self._validate_session(email, username, password, session, **kwargs) 43 | self.rate_limits = {} 44 | 45 | def users(self, screen_names: list[str], **kwargs) -> list[dict]: 46 | """ 47 | Get user data by screen names. 48 | 49 | @param screen_names: list of screen names (usernames) 50 | @param kwargs: optional keyword arguments 51 | @return: list of user data as dicts 52 | """ 53 | return self._run(Operation.UserByScreenName, screen_names, **kwargs) 54 | 55 | def tweets_by_id(self, tweet_ids: list[int | str], **kwargs) -> list[dict]: 56 | """ 57 | Get tweet metadata by tweet ids. 58 | 59 | @param tweet_ids: list of tweet ids 60 | @param kwargs: optional keyword arguments 61 | @return: list of tweet data as dicts 62 | """ 63 | return self._run(Operation.TweetResultByRestId, tweet_ids, **kwargs) 64 | 65 | def tweets_by_ids(self, tweet_ids: list[int | str], **kwargs) -> list[dict]: 66 | """ 67 | Get tweet metadata by tweet ids. 68 | 69 | Special batch query for tweet data. Most efficient way to get tweets. 70 | 71 | @param tweet_ids: list of tweet ids 72 | @param kwargs: optional keyword arguments 73 | @return: list of tweet data as dicts 74 | """ 75 | return self._run(Operation.TweetResultsByRestIds, batch_ids(tweet_ids), **kwargs) 76 | 77 | def tweets_details(self, tweet_ids: list[int], **kwargs) -> list[dict]: 78 | """ 79 | Get tweet data by tweet ids. 80 | 81 | Includes tweet metadata as well as comments, replies, etc. 82 | 83 | @param tweet_ids: list of tweet ids 84 | @param kwargs: optional keyword arguments 85 | @return: list of tweet data as dicts 86 | """ 87 | return self._run(Operation.TweetDetail, tweet_ids, **kwargs) 88 | 89 | def tweets(self, user_ids: list[int], **kwargs) -> list[dict]: 90 | """ 91 | Get tweets by user ids. 92 | 93 | Metadata for users tweets. 94 | 95 | @param user_ids: list of user ids 96 | @param kwargs: optional keyword arguments 97 | @return: list of tweet data as dicts 98 | """ 99 | return self._run(Operation.UserTweets, user_ids, **kwargs) 100 | 101 | def tweets_and_replies(self, user_ids: list[int], **kwargs) -> list[dict]: 102 | """ 103 | Get tweets and replies by user ids. 104 | 105 | Tweet metadata, including replies. 106 | 107 | @param user_ids: list of user ids 108 | @param kwargs: optional keyword arguments 109 | @return: list of tweet data as dicts 110 | """ 111 | return self._run(Operation.UserTweetsAndReplies, user_ids, **kwargs) 112 | 113 | def media(self, user_ids: list[int], **kwargs) -> list[dict]: 114 | """ 115 | Get media by user ids. 116 | 117 | Tweet metadata, filtered for tweets containing media. 118 | 119 | @param user_ids: list of user ids 120 | @param kwargs: optional keyword arguments 121 | @return: list of tweet data as dicts 122 | """ 123 | return self._run(Operation.UserMedia, user_ids, **kwargs) 124 | 125 | def likes(self, user_ids: list[int], **kwargs) -> list[dict]: 126 | """ 127 | Get likes by user ids. 128 | 129 | Tweet metadata for tweets liked by users. 130 | 131 | @param user_ids: list of user ids 132 | @param kwargs: optional keyword arguments 133 | @return: list of tweet data as dicts 134 | """ 135 | return self._run(Operation.Likes, user_ids, **kwargs) 136 | 137 | def followers(self, user_ids: list[int], **kwargs) -> list[dict]: 138 | """ 139 | Get followers by user ids. 140 | 141 | User data for users followers list. 142 | 143 | @param user_ids: list of user ids 144 | @param kwargs: optional keyword arguments 145 | @return: list of user data as dicts 146 | """ 147 | return self._run(Operation.Followers, user_ids, **kwargs) 148 | 149 | def following(self, user_ids: list[int], **kwargs) -> list[dict]: 150 | """ 151 | Get following by user ids. 152 | 153 | User metadata for users following list. 154 | 155 | @param user_ids: list of user ids 156 | @param kwargs: optional keyword arguments 157 | @return: list of user data as dicts 158 | """ 159 | return self._run(Operation.Following, user_ids, **kwargs) 160 | 161 | def favoriters(self, tweet_ids: list[int], **kwargs) -> list[dict]: 162 | """ 163 | Get favoriters by tweet ids. 164 | 165 | User data for users who liked these tweets. 166 | 167 | @param tweet_ids: list of tweet ids 168 | @param kwargs: optional keyword arguments 169 | @return: list of user data as dicts 170 | """ 171 | return self._run(Operation.Favoriters, tweet_ids, **kwargs) 172 | 173 | def retweeters(self, tweet_ids: list[int], **kwargs) -> list[dict]: 174 | """ 175 | Get retweeters by tweet ids. 176 | 177 | User data for users who retweeted these tweets. 178 | 179 | @param tweet_ids: list of tweet ids 180 | @param kwargs: optional keyword arguments 181 | @return: list of user data as dicts 182 | """ 183 | return self._run(Operation.Retweeters, tweet_ids, **kwargs) 184 | 185 | def tweet_stats(self, user_ids: list[int], **kwargs) -> list[dict]: 186 | """ 187 | Get tweet statistics by user ids. 188 | 189 | @param user_ids: list of user ids 190 | @param kwargs: optional keyword arguments 191 | @return: list of tweet statistics as dicts 192 | """ 193 | return self._run(Operation.TweetStats, user_ids, **kwargs) 194 | 195 | def users_by_ids(self, user_ids: list[int], **kwargs) -> list[dict]: 196 | """ 197 | Get user data by user ids. 198 | 199 | Special batch query for user data. Most efficient way to get user data. 200 | 201 | @param user_ids: list of user ids 202 | @param kwargs: optional keyword arguments 203 | @return: list of user data as dicts 204 | """ 205 | return self._run(Operation.UsersByRestIds, batch_ids(user_ids), **kwargs) 206 | 207 | def recommended_users(self, user_ids: list[int] = None, **kwargs) -> list[dict]: 208 | """ 209 | Get recommended users by user ids, or general recommendations if no user ids are provided. 210 | 211 | @param user_ids: list of user ids 212 | @param kwargs: optional keyword arguments 213 | @return: list of recommended users data as dicts 214 | """ 215 | if user_ids: 216 | contexts = [{"context": orjson.dumps({"contextualUserId": x}).decode()} for x in user_ids] 217 | else: 218 | contexts = [{'context': None}] 219 | return self._run(Operation.ConnectTabTimeline, contexts, **kwargs) 220 | 221 | def profile_spotlights(self, screen_names: list[str], **kwargs) -> list[dict]: 222 | """ 223 | Get user data by screen names. 224 | 225 | This endpoint is included for completeness only. 226 | Use the batched query `users_by_ids` instead if you wish to pull user profile data. 227 | 228 | @param screen_names: list of user screen names (usernames) 229 | @param kwargs: optional keyword arguments 230 | @return: list of user data as dicts 231 | """ 232 | return self._run(Operation.ProfileSpotlightsQuery, screen_names, **kwargs) 233 | 234 | def users_by_id(self, user_ids: list[int], **kwargs) -> list[dict]: 235 | """ 236 | Get user data by user ids. 237 | 238 | This endpoint is included for completeness only. 239 | Use the batched query `users_by_ids` instead if you wish to pull user profile data. 240 | 241 | 242 | @param user_ids: list of user ids 243 | @param kwargs: optional keyword arguments 244 | @return: list of user data as dicts 245 | """ 246 | return self._run(Operation.UserByRestId, user_ids, **kwargs) 247 | 248 | def download_media(self, ids: list[int], photos: bool = True, videos: bool = True, cards: bool = True, hq_img_variant: bool = True, video_thumb: bool = False, out: str = 'media', 249 | metadata_out: str = 'media.json', **kwargs) -> dict: 250 | """ 251 | Download and extract media metadata from Tweets 252 | 253 | @param ids: list of Tweet IDs 254 | @param photos: download images 255 | @param videos: download videos 256 | @param cards: download cards 257 | @param hq_img_variant: download highest quality image, options: {"orig", "4096x4096"} 258 | @param video_thumb: download video thumbnails 259 | @param out: output file for media 260 | @param metadata_out: output file for media metadata 261 | @return: media data 262 | """ 263 | 264 | async def process(fns: Generator) -> list: 265 | limits = { 266 | 'max_connections': kwargs.pop('max_connections', 1000), 267 | 'max_keepalive_connections': kwargs.pop('max_keepalive_connections', None), 268 | 'keepalive_expiry': kwargs.pop('keepalive_expiry', 5.0), 269 | } 270 | headers = {'user-agent': random.choice(USER_AGENTS)} 271 | async with AsyncClient(limits=Limits(**limits), headers=headers, http2=True, verify=False, timeout=60, follow_redirects=True) as client: 272 | return await tqdm_asyncio.gather(*(fn(client=client) for fn in fns), desc='Downloading Media') 273 | 274 | def download(urls: list[tuple], out: str) -> Generator: 275 | out = Path(out) 276 | out.mkdir(parents=True, exist_ok=True) 277 | chunk_size = kwargs.pop('chunk_size', None) 278 | 279 | async def get(client: AsyncClient, url: str): 280 | tid, cdn_url = url 281 | ext = urlsplit(cdn_url).path.split('/')[-1] 282 | fname = out / f'{tid}_{ext}' 283 | async with aiofiles.open(fname, 'wb') as fp: 284 | async with client.stream('GET', cdn_url) as r: 285 | async for chunk in r.aiter_raw(chunk_size): 286 | await fp.write(chunk) 287 | 288 | return (partial(get, url=u) for u in urls) 289 | 290 | tweets = self.tweets_by_ids(ids, **kwargs) 291 | media = {} 292 | for data in tweets: 293 | for tweet in data.get('data', {}).get('tweetResult', []): 294 | # TweetWithVisibilityResults and Tweet have different structures 295 | root = tweet.get('result', {}).get('tweet', {}) or tweet.get('result', {}) 296 | if _id := root.get('rest_id'): 297 | date = root.get('legacy', {}).get('created_at', '') 298 | uid = root.get('legacy', {}).get('user_id_str', '') 299 | media[_id] = {'date': date, 'uid': uid, 'img': set(), 'video': {'thumb': set(), 'video_info': {}, 'hq': set()}, 'card': []} 300 | for _media in (y for x in find_key(root, 'media') for y in x if isinstance(x, list)): 301 | if videos: 302 | if vinfo := _media.get('video_info'): 303 | hq = sorted(vinfo.get('variants', []), key=lambda x: -x.get('bitrate', 0))[0]['url'] 304 | media[_id]['video']['video_info'] |= vinfo 305 | media[_id]['video']['hq'].add(hq) 306 | if video_thumb: 307 | if url := _media.get('media_url_https', ''): 308 | media[_id]['video']['thumb'].add(url) 309 | if photos: 310 | if (url := _media.get('media_url_https', '')) and "_video_thumb" not in url: 311 | if hq_img_variant: 312 | url = f'{url}?name=orig' 313 | media[_id]['img'].add(url) 314 | if cards: 315 | if card := root.get('card', {}).get('legacy', {}): 316 | media[_id]['card'].extend(card.get('binding_values', [])) 317 | if metadata_out: 318 | media = set2list(media) 319 | metadata_out = Path(metadata_out) 320 | metadata_out.parent.mkdir(parents=True, exist_ok=True) # if user specifies subdir 321 | metadata_out.write_bytes(orjson.dumps(media)) 322 | 323 | res = [] 324 | for k, v in media.items(): 325 | tmp = [] 326 | if photos: 327 | tmp.extend(v['img']) 328 | if videos: 329 | tmp.extend(v['video']['hq']) 330 | if video_thumb: 331 | tmp.extend(v['video']['thumb']) 332 | if cards: 333 | tmp.extend(parse_card_media(v['card'])) 334 | res.extend([(k, m) for m in tmp]) 335 | asyncio.run(process(download(res, out))) 336 | return media 337 | 338 | def trends(self, utc: list[str] = None) -> dict: 339 | """ 340 | Get trends for all UTC offsets 341 | 342 | @param utc: optional list of specific UTC offsets 343 | @return: dict of trends 344 | """ 345 | 346 | async def get_trends(client: AsyncClient, offset: str, url: str): 347 | try: 348 | client.headers['x-twitter-utcoffset'] = offset 349 | r = await client.get(url) 350 | trends = find_key(r.json(), 'item') 351 | return {t['content']['trend']['name']: t for t in trends} 352 | except Exception as e: 353 | if self.debug: 354 | self.logger.error(f'[{RED}error{RESET}] Failed to get trends\n{e}') 355 | 356 | async def process(): 357 | url = set_qs('https://twitter.com/i/api/2/guide.json', trending_params) 358 | offsets = utc or ["-1200", "-1100", "-1000", "-0900", "-0800", "-0700", "-0600", "-0500", "-0400", "-0300", 359 | "-0200", "-0100", "+0000", "+0100", "+0200", "+0300", "+0400", "+0500", "+0600", "+0700", 360 | "+0800", "+0900", "+1000", "+1100", "+1200", "+1300", "+1400"] 361 | async with AsyncClient(headers=get_headers(self.session)) as client: 362 | tasks = (get_trends(client, o, url) for o in offsets) 363 | if self.pbar: 364 | return await tqdm_asyncio.gather(*tasks, desc='Getting trends') 365 | return await asyncio.gather(*tasks) 366 | 367 | trends = asyncio.run(process()) 368 | out = self.out / 'raw' / 'trends' 369 | out.mkdir(parents=True, exist_ok=True) 370 | (out / f'{time.time_ns()}.json').write_text(orjson.dumps( 371 | {k: v for d in trends for k, v in d.items()}, 372 | option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS).decode(), encoding='utf-8') 373 | return trends 374 | 375 | def spaces(self, *, rooms: list[str] = None, search: list[dict] = None, audio: bool = False, chat: bool = False, 376 | **kwargs) -> list[dict]: 377 | """ 378 | Get Twitter spaces data 379 | 380 | - Get data for specific rooms or search for rooms. 381 | - Get audio and/or chat data for rooms. 382 | 383 | @param rooms: list of room ids 384 | @param search: list of dicts containing search parameters 385 | @param audio: flag to include audio data 386 | @param chat: flag to include chat data 387 | @param kwargs: optional keyword arguments 388 | @return: list of spaces data 389 | """ 390 | if rooms: 391 | spaces = self._run(Operation.AudioSpaceById, rooms, **kwargs) 392 | else: 393 | res = self._run(Operation.AudioSpaceSearch, search, **kwargs) 394 | search_results = set(find_key(res, 'rest_id')) 395 | spaces = self._run(Operation.AudioSpaceById, search_results, **kwargs) 396 | if audio or chat: 397 | return self._get_space_data(spaces, audio, chat) 398 | return spaces 399 | 400 | def _get_space_data(self, spaces: list[dict], audio=True, chat=True): 401 | streams = self._check_streams(spaces) 402 | chat_data = None 403 | if chat: 404 | temp = [] # get necessary keys instead of passing large dicts 405 | for stream in filter(lambda x: x['stream'], streams): 406 | meta = stream['space']['data']['audioSpace']['metadata'] 407 | if meta['state'] not in {SpaceState.Running, SpaceState.NotStarted}: 408 | temp.append({ 409 | 'rest_id': meta['rest_id'], 410 | 'chat_token': stream['stream']['chatToken'], 411 | 'media_key': meta['media_key'], 412 | 'state': meta['state'], 413 | }) 414 | chat_data = self._get_chat_data(temp) 415 | if audio: 416 | temp = [] 417 | for stream in streams: 418 | if stream.get('stream'): 419 | chunks = self._get_chunks(stream['stream']['source']['location']) 420 | temp.append({ 421 | 'rest_id': stream['space']['data']['audioSpace']['metadata']['rest_id'], 422 | 'chunks': chunks, 423 | }) 424 | self._download_audio(temp) 425 | return chat_data 426 | 427 | async def _get_stream(self, client: AsyncClient, media_key: str) -> dict | None: 428 | params = { 429 | 'client': 'web', 430 | 'use_syndication_guest_id': 'false', 431 | 'cookie_set_host': 'twitter.com', 432 | } 433 | url = f'https://twitter.com/i/api/1.1/live_video_stream/status/{media_key}' 434 | try: 435 | r = await client.get(url, params=params) 436 | return r.json() 437 | except Exception as e: 438 | if self.debug: 439 | self.logger.error(f'stream not available for playback\n{e}') 440 | 441 | async def _init_chat(self, client: AsyncClient, chat_token: str) -> dict: 442 | payload = {'chat_token': chat_token} # stream['chatToken'] 443 | url = 'https://proxsee.pscp.tv/api/v2/accessChatPublic' 444 | r = await client.post(url, json=payload) 445 | return r.json() 446 | 447 | async def _get_chat(self, client: AsyncClient, endpoint: str, access_token: str, cursor: str = '') -> list[dict]: 448 | payload = { 449 | 'access_token': access_token, 450 | 'cursor': cursor, 451 | 'limit': 1000, # or 0 452 | 'since': None, 453 | 'quick_get': True, 454 | } 455 | url = f"{endpoint}/chatapi/v1/history" 456 | r = await client.post(url, json=payload) 457 | data = r.json() 458 | res = [data] 459 | while cursor := data.get('cursor'): 460 | try: 461 | r = await client.post(url, json=payload | {'cursor': cursor}) 462 | if r.status_code == 503: 463 | # not our fault, service error, something went wrong with the stream 464 | break 465 | data = r.json() 466 | res.append(data) 467 | except ReadTimeout as e: 468 | if self.debug: 469 | self.logger.debug(f'End of chat data\n{e}') 470 | break 471 | 472 | parsed = [] 473 | for r in res: 474 | messages = r.get('messages', []) 475 | for msg in messages: 476 | try: 477 | msg['payload'] = orjson.loads(msg.get('payload', '{}')) 478 | msg['payload']['body'] = orjson.loads(msg['payload'].get('body')) 479 | except Exception as e: 480 | if self.debug: 481 | self.logger.error(f'Failed to parse chat message\n{e}') 482 | parsed.extend(messages) 483 | return parsed 484 | 485 | def _get_chunks(self, location: str) -> list[str]: 486 | try: 487 | url = URL(location) 488 | stream_type = url.params.get('type') 489 | r = self.session.get( 490 | url=location, 491 | params={'type': stream_type}, 492 | headers={'authority': url.host} 493 | ) 494 | # don't need an m3u8 parser 495 | chunks = re.findall('\n(chunk_.*)\n', r.text, flags=re.I) 496 | url = '/'.join(location.split('/')[:-1]) 497 | return [f'{url}/{chunk}' for chunk in chunks] 498 | except Exception as e: 499 | if self.debug: 500 | self.logger.error(f'Failed to get chunks\n{e}') 501 | 502 | def _get_chat_data(self, keys: list[dict]) -> list[dict]: 503 | async def get(c: AsyncClient, key: dict) -> dict: 504 | info = await self._init_chat(c, key['chat_token']) 505 | chat = await self._get_chat(c, info['endpoint'], info['access_token']) 506 | if self.save: 507 | (self.out / 'raw' / f"chat_{key['rest_id']}.json").write_bytes(orjson.dumps(chat)) 508 | return { 509 | 'space': key['rest_id'], 510 | 'chat': chat, 511 | 'info': info, 512 | } 513 | 514 | async def process(): 515 | (self.out / 'raw').mkdir(parents=True, exist_ok=True) 516 | limits = Limits(max_connections=100, max_keepalive_connections=10) 517 | headers = self.session.headers if self.guest else get_headers(self.session) 518 | cookies = self.session.cookies 519 | async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c: 520 | tasks = (get(c, key) for key in keys) 521 | if self.pbar: 522 | return await tqdm_asyncio.gather(*tasks, desc='Downloading chat data') 523 | return await asyncio.gather(*tasks) 524 | 525 | return asyncio.run(process()) 526 | 527 | def _download_audio(self, data: list[dict]) -> None: 528 | async def get(s: AsyncClient, chunk: str, rest_id: str) -> tuple: 529 | r = await s.get(chunk) 530 | return rest_id, r 531 | 532 | async def process(data: list[dict]) -> list: 533 | limits = Limits(max_connections=100, max_keepalive_connections=10) 534 | headers = self.session.headers if self.guest else get_headers(self.session) 535 | cookies = self.session.cookies 536 | async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c: 537 | tasks = [] 538 | for d in data: 539 | tasks.extend([get(c, chunk, d['rest_id']) for chunk in d['chunks']]) 540 | if self.pbar: 541 | return await tqdm_asyncio.gather(*tasks, desc='Downloading audio') 542 | return await asyncio.gather(*tasks) 543 | 544 | chunks = asyncio.run(process(data)) 545 | streams = {} 546 | [streams.setdefault(_id, []).append(chunk) for _id, chunk in chunks] 547 | # ensure chunks are in correct order 548 | for k, v in streams.items(): 549 | streams[k] = sorted(v, key=lambda x: int(re.findall('_(\d+)_\w\.aac$', x.url.path)[0])) 550 | out = self.out / 'audio' 551 | out.mkdir(parents=True, exist_ok=True) 552 | for space_id, chunks in streams.items(): 553 | # 1hr ~= 50mb 554 | with open(out / f'{space_id}.aac', 'wb') as fp: 555 | [fp.write(c.content) for c in chunks] 556 | 557 | def _check_streams(self, keys: list[dict]) -> list[dict]: 558 | async def get(c: AsyncClient, space: dict) -> dict: 559 | media_key = space['data']['audioSpace']['metadata']['media_key'] 560 | stream = await self._get_stream(c, media_key) 561 | return {'space': space, 'stream': stream} 562 | 563 | async def process(): 564 | limits = Limits(max_connections=100, max_keepalive_connections=10) 565 | headers = self.session.headers if self.guest else get_headers(self.session) 566 | cookies = self.session.cookies 567 | async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c: 568 | return await asyncio.gather(*(get(c, key) for key in keys)) 569 | 570 | return asyncio.run(process()) 571 | 572 | def _run(self, operation: tuple[dict, str, str], queries: set | list[int | str | list | dict], **kwargs): 573 | keys, qid, name = operation 574 | # stay within rate-limits 575 | if (l := len(queries)) > MAX_ENDPOINT_LIMIT: 576 | if self.debug: 577 | self.logger.warning(f'Got {l} queries, truncating to first 500.') 578 | queries = list(queries)[:MAX_ENDPOINT_LIMIT] 579 | 580 | if all(isinstance(q, dict) for q in queries): 581 | data = asyncio.run(self._process(operation, list(queries), **kwargs)) 582 | return get_json(data, **kwargs) 583 | 584 | # queries are of type set | list[int|str], need to convert to list[dict] 585 | _queries = [{k: q} for q in queries for k, v in keys.items()] 586 | res = asyncio.run(self._process(operation, _queries, **kwargs)) 587 | data = get_json(res, **kwargs) 588 | return data.pop() if kwargs.get('cursor') else flatten(data) 589 | 590 | async def _query(self, client: AsyncClient, operation: tuple, **kwargs) -> Response: 591 | keys, qid, name = operation 592 | params = { 593 | 'variables': Operation.default_variables | keys | kwargs, 594 | 'features': Operation.default_features, 595 | } 596 | r = await client.get(f'https://twitter.com/i/api/graphql/{qid}/{name}', params=build_params(params)) 597 | 598 | try: 599 | self.rate_limits[name] = {k: int(v) for k, v in r.headers.items() if 'rate-limit' in k} 600 | except Exception as e: 601 | self.logger.debug(f'{e}') 602 | 603 | if self.debug: 604 | log(self.logger, self.debug, r) 605 | if self.save: 606 | await save_json(r, self.out, name, **kwargs) 607 | return r 608 | 609 | async def _process(self, operation: tuple, queries: list[dict], **kwargs): 610 | headers = self.session.headers if self.guest else get_headers(self.session) 611 | cookies = self.session.cookies 612 | async with AsyncClient(limits=Limits(max_connections=MAX_ENDPOINT_LIMIT), headers=headers, cookies=cookies, timeout=20) as c: 613 | tasks = (self._paginate(c, operation, **q, **kwargs) for q in queries) 614 | if self.pbar: 615 | return await tqdm_asyncio.gather(*tasks, desc=operation[-1]) 616 | return await asyncio.gather(*tasks) 617 | 618 | async def _paginate(self, client: AsyncClient, operation: tuple, **kwargs): 619 | limit = kwargs.pop('limit', math.inf) 620 | cursor = kwargs.pop('cursor', None) 621 | is_resuming = False 622 | dups = 0 623 | DUP_LIMIT = 3 624 | if cursor: 625 | is_resuming = True 626 | res = [] 627 | ids = set() 628 | else: 629 | try: 630 | r = await self._query(client, operation, **kwargs) 631 | initial_data = r.json() 632 | res = [r] 633 | ids = {x for x in find_key(initial_data, 'rest_id') if x[0].isnumeric()} 634 | 635 | cursor = get_cursor(initial_data) 636 | except Exception as e: 637 | if self.debug: 638 | self.logger.error(f'Failed to get initial pagination data: {e}') 639 | return 640 | while (dups < DUP_LIMIT) and cursor: 641 | prev_len = len(ids) 642 | if prev_len >= limit: 643 | break 644 | try: 645 | r = await self._query(client, operation, cursor=cursor, **kwargs) 646 | data = r.json() 647 | except Exception as e: 648 | if self.debug: 649 | self.logger.error(f'Failed to get pagination data\n{e}') 650 | return 651 | cursor = get_cursor(data) 652 | ids |= {x for x in find_key(data, 'rest_id') if x[0].isnumeric()} 653 | 654 | if self.debug: 655 | self.logger.debug(f'Unique results: {len(ids)}\tcursor: {cursor}') 656 | if prev_len == len(ids): 657 | dups += 1 658 | res.append(r) 659 | if is_resuming: 660 | return res, cursor 661 | return res 662 | 663 | async def _space_listener(self, chat: dict, frequency: int): 664 | rand_color = lambda: random.choice([RED, GREEN, RESET, BLUE, CYAN, MAGENTA, YELLOW]) 665 | uri = f"wss://{URL(chat['endpoint']).host}/chatapi/v1/chatnow" 666 | with open('chatlog.jsonl', 'ab') as fp: 667 | async with websockets.connect(uri) as ws: 668 | await ws.send(orjson.dumps({ 669 | "payload": orjson.dumps({"access_token": chat['access_token']}).decode(), 670 | "kind": 3 671 | }).decode()) 672 | await ws.send(orjson.dumps({ 673 | "payload": orjson.dumps({ 674 | "body": orjson.dumps({ 675 | "room": chat['room_id'] 676 | }).decode(), 677 | "kind": 1 678 | }).decode(), 679 | "kind": 2 680 | }).decode()) 681 | 682 | prev_message = '' 683 | prev_user = '' 684 | while True: 685 | msg = await ws.recv() 686 | temp = orjson.loads(msg) 687 | kind = temp.get('kind') 688 | if kind == 1: 689 | signature = temp.get('signature') 690 | payload = orjson.loads(temp.get('payload')) 691 | payload['body'] = orjson.loads(payload.get('body')) 692 | res = { 693 | 'kind': kind, 694 | 'payload': payload, 695 | 'signature': signature, 696 | } 697 | fp.write(orjson.dumps(res) + b'\n') 698 | body = payload['body'] 699 | message = body.get('body') 700 | user = body.get('username') 701 | # user_id = body.get('user_id') 702 | final = body.get('final') 703 | 704 | if frequency == 1: 705 | if final: 706 | if user != prev_user: 707 | print() 708 | print(f"({rand_color()}{user}{RESET})") 709 | prev_user = user 710 | # print(message, end=' ') 711 | print(message) 712 | 713 | # dirty 714 | if frequency == 2: 715 | if user and (not final): 716 | if user != prev_user: 717 | print() 718 | print(f"({rand_color()}{user}{RESET})") 719 | prev_user = user 720 | new_message = re.sub(f'^({prev_message})', '', message, flags=re.I).strip() 721 | if len(new_message) < 100: 722 | print(new_message, end=' ') 723 | prev_message = message 724 | 725 | async def _get_live_chats(self, client: Client, spaces: list[dict]): 726 | async def get(c: AsyncClient, space: dict) -> list[dict]: 727 | media_key = space['data']['audioSpace']['metadata']['media_key'] 728 | r = await c.get( 729 | url=f'https://twitter.com/i/api/1.1/live_video_stream/status/{media_key}', 730 | params={ 731 | 'client': 'web', 732 | 'use_syndication_guest_id': 'false', 733 | 'cookie_set_host': 'twitter.com', 734 | }) 735 | r = await c.post( 736 | url='https://proxsee.pscp.tv/api/v2/accessChatPublic', 737 | json={'chat_token': r.json()['chatToken']} 738 | ) 739 | return r.json() 740 | 741 | limits = Limits(max_connections=100) 742 | async with AsyncClient(headers=client.headers, limits=limits, timeout=30) as c: 743 | tasks = (get(c, _id) for _id in spaces) 744 | if self.pbar: 745 | return await tqdm_asyncio.gather(*tasks, desc='Getting live transcripts') 746 | return await asyncio.gather(*tasks) 747 | 748 | def space_live_transcript(self, room: str, frequency: int = 1): 749 | """ 750 | Log live transcript of a space 751 | 752 | @param room: room id 753 | @param frequency: granularity of transcript. 1 for real-time, 2 for post-processed or "finalized" transcript 754 | @return: None 755 | """ 756 | 757 | async def get(spaces: list[dict]): 758 | client = init_session() 759 | chats = await self._get_live_chats(client, spaces) 760 | await asyncio.gather(*(self._space_listener(c, frequency) for c in chats)) 761 | 762 | spaces = self.spaces(rooms=[room]) 763 | asyncio.run(get(spaces)) 764 | 765 | def spaces_live(self, rooms: list[str]): 766 | """ 767 | Capture live audio stream from spaces 768 | 769 | Limited to 500 rooms per IP, as defined by twitter's rate limits. 770 | 771 | @param rooms: list of room ids 772 | @return: None 773 | """ 774 | chunk_idx = lambda chunk: re.findall('_(\d+)_\w\.aac', chunk)[0] 775 | sort_chunks = lambda chunks: sorted(chunks, key=lambda x: int(chunk_idx(x))) 776 | parse_chunks = lambda txt: re.findall('\n(chunk_.*)\n', txt, flags=re.I) 777 | 778 | async def get_m3u8(client: AsyncClient, space: dict) -> dict: 779 | try: 780 | media_key = space['data']['audioSpace']['metadata']['media_key'] 781 | r = await client.get( 782 | url=f'https://twitter.com/i/api/1.1/live_video_stream/status/{media_key}', 783 | params={'client': 'web', 'use_syndication_guest_id': 'false', 'cookie_set_host': 'twitter.com'} 784 | ) 785 | data = r.json() 786 | room = data['shareUrl'].split('/')[-1] 787 | return {"url": data['source']['location'], "room": room} 788 | except Exception as e: 789 | room = space['data']['audioSpace']['metadata']['rest_id'] 790 | if self.debug: 791 | self.logger.error(f'Failed to get stream info for https://twitter.com/i/spaces/{room}\n{e}') 792 | 793 | async def get_chunks(client: AsyncClient, url: str) -> list[str]: 794 | try: 795 | url = URL(url) 796 | r = await client.get( 797 | url=url, 798 | params={'type': url.params.get('type')}, 799 | headers={'authority': url.host} 800 | ) 801 | base = '/'.join(str(url).split('/')[:-1]) 802 | return [f'{base}/{c}' for c in parse_chunks(r.text)] 803 | except Exception as e: 804 | if self.debug: 805 | self.logger.error(f'Failed to get chunks\n{e}') 806 | 807 | async def poll_space(client: AsyncClient, space: dict) -> dict | None: 808 | curr = 0 809 | lim = 10 810 | all_chunks = set() 811 | playlist = await get_m3u8(client, space) 812 | if not playlist: return 813 | chunks = await get_chunks(client, playlist['url']) 814 | if not chunks: return 815 | out = self.out / 'live' 816 | out.mkdir(parents=True, exist_ok=True) 817 | async with aiofiles.open(out / f'{playlist["room"]}.aac', 'wb') as fp: 818 | while curr < lim: 819 | chunks = await get_chunks(client, playlist['url']) 820 | if not chunks: 821 | return {'space': space, 'chunks': sort_chunks(all_chunks)} 822 | new_chunks = set(chunks) - all_chunks 823 | all_chunks |= new_chunks 824 | for c in sort_chunks(new_chunks): 825 | try: 826 | if self.debug: 827 | self.logger.debug(f"write: chunk [{chunk_idx(c)}]\t{c}") 828 | r = await client.get(c) 829 | await fp.write(r.content) 830 | except Exception as e: 831 | if self.debug: 832 | self.logger.error(f'Failed to write chunk {c}\n{e}') 833 | curr = 0 if new_chunks else curr + 1 834 | # wait for new chunks. dynamic playlist is updated every 2-3 seconds 835 | await asyncio.sleep(random.random() + 1.5) 836 | return {'space': space, 'chunks': sort_chunks(all_chunks)} 837 | 838 | async def process(spaces: list[dict]): 839 | limits = Limits(max_connections=100) 840 | headers, cookies = self.session.headers, self.session.cookies 841 | async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c: 842 | return await asyncio.gather(*(poll_space(c, space) for space in spaces)) 843 | 844 | spaces = self.spaces(rooms=rooms) 845 | return asyncio.run(process(spaces)) 846 | 847 | def _init_logger(self, **kwargs) -> Logger: 848 | if kwargs.get('debug'): 849 | cfg = kwargs.get('log_config') 850 | logging.config.dictConfig(cfg or LOG_CONFIG) 851 | 852 | # only support one logger 853 | logger_name = list(LOG_CONFIG['loggers'].keys())[0] 854 | 855 | # set level of all other loggers to ERROR 856 | for name in logging.root.manager.loggerDict: 857 | if name != logger_name: 858 | logging.getLogger(name).setLevel(logging.ERROR) 859 | 860 | return logging.getLogger(logger_name) 861 | 862 | def _validate_session(self, *args, **kwargs): 863 | email, username, password, session = args 864 | 865 | # validate credentials 866 | if all((email, username, password)): 867 | return login(email, username, password, **kwargs) 868 | 869 | # invalid credentials, try validating session 870 | if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}): 871 | return session 872 | 873 | # invalid credentials and session 874 | cookies = kwargs.get('cookies') 875 | 876 | # try validating cookies dict 877 | if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}): 878 | _session = Client(cookies=cookies, follow_redirects=True) 879 | _session.headers.update(get_headers(_session)) 880 | return _session 881 | 882 | # try validating cookies from file 883 | if isinstance(cookies, str): 884 | _session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True) 885 | _session.headers.update(get_headers(_session)) 886 | return _session 887 | 888 | # no session, credentials, or cookies provided. use guest session. 889 | if self.debug: 890 | self.logger.warning(f'{RED}This is a guest session, some endpoints cannot be accessed.{RESET}\n') 891 | self.guest = True 892 | return session 893 | 894 | @property 895 | def id(self) -> int: 896 | """ Get User ID """ 897 | return int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0]) 898 | 899 | def save_cookies(self, fname: str = None): 900 | """ Save cookies to file """ 901 | cookies = self.session.cookies 902 | Path(f'{fname or cookies.get("username")}.cookies').write_bytes(orjson.dumps(dict(cookies))) 903 | 904 | def _v1_rate_limits(self): 905 | return self.session.get('https://api.twitter.com/1.1/application/rate_limit_status.json').json() 906 | -------------------------------------------------------------------------------- /twitter/search.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging.config 3 | import math 4 | import platform 5 | import random 6 | import re 7 | import time 8 | from logging import Logger 9 | from pathlib import Path 10 | 11 | import orjson 12 | from httpx import AsyncClient, Client 13 | 14 | from .constants import * 15 | from .login import login 16 | from .util import get_headers, find_key, build_params 17 | 18 | reset = '\x1b[0m' 19 | colors = [f'\x1b[{i}m' for i in range(31, 37)] 20 | 21 | try: 22 | if get_ipython().__class__.__name__ == 'ZMQInteractiveShell': 23 | import nest_asyncio 24 | 25 | nest_asyncio.apply() 26 | except: 27 | ... 28 | 29 | if platform.system() != 'Windows': 30 | try: 31 | import uvloop 32 | 33 | uvloop.install() 34 | except ImportError as e: 35 | ... 36 | 37 | 38 | class Search: 39 | def __init__(self, email: str = None, username: str = None, password: str = None, session: Client = None, **kwargs): 40 | self.save = kwargs.get('save', True) 41 | self.debug = kwargs.get('debug', 0) 42 | self.logger = self._init_logger(**kwargs) 43 | self.session = self._validate_session(email, username, password, session, **kwargs) 44 | 45 | def run(self, queries: list[dict], limit: int = math.inf, out: str = 'data/search_results', **kwargs): 46 | out = Path(out) 47 | out.mkdir(parents=True, exist_ok=True) 48 | return asyncio.run(self.process(queries, limit, out, **kwargs)) 49 | 50 | async def process(self, queries: list[dict], limit: int, out: Path, **kwargs) -> list: 51 | async with AsyncClient(headers=get_headers(self.session)) as s: 52 | return await asyncio.gather(*(self.paginate(s, q, limit, out, **kwargs) for q in queries)) 53 | 54 | async def paginate(self, client: AsyncClient, query: dict, limit: int, out: Path, **kwargs) -> list[dict]: 55 | params = { 56 | 'variables': { 57 | 'count': 20, 58 | 'querySource': 'typed_query', 59 | 'rawQuery': query['query'], 60 | 'product': query['category'] 61 | }, 62 | 'features': Operation.default_features, 63 | 'fieldToggles': {'withArticleRichContentState': False}, 64 | } 65 | 66 | res = [] 67 | cursor = '' 68 | total = set() 69 | while True: 70 | if cursor: 71 | params['variables']['cursor'] = cursor 72 | data, entries, cursor = await self.backoff(lambda: self.get(client, params), **kwargs) 73 | res.extend(entries) 74 | if len(entries) <= 2 or len(total) >= limit: # just cursors 75 | if self.debug: 76 | self.logger.debug(f'[{GREEN}success{RESET}] Returned {len(total)} search results for {query["query"]}') 77 | return res 78 | total |= set(find_key(entries, 'entryId')) 79 | if self.debug: 80 | self.logger.debug(f'{query["query"]}') 81 | if self.save: 82 | (out / f'{time.time_ns()}.json').write_bytes(orjson.dumps(entries)) 83 | 84 | async def get(self, client: AsyncClient, params: dict) -> tuple: 85 | _, qid, name = Operation.SearchTimeline 86 | r = await client.get(f'https://twitter.com/i/api/graphql/{qid}/{name}', params=build_params(params)) 87 | data = r.json() 88 | cursor = self.get_cursor(data) 89 | entries = [y for x in find_key(data, 'entries') for y in x if re.search(r'^(tweet|user)-', y['entryId'])] 90 | # add on query info 91 | for e in entries: 92 | e['query'] = params['variables']['rawQuery'] 93 | return data, entries, cursor 94 | 95 | def get_cursor(self, data: list[dict]): 96 | for e in find_key(data, 'content'): 97 | if e.get('cursorType') == 'Bottom': 98 | return e['value'] 99 | 100 | async def backoff(self, fn, **kwargs): 101 | retries = kwargs.get('retries', 3) 102 | for i in range(retries + 1): 103 | try: 104 | data, entries, cursor = await fn() 105 | if errors := data.get('errors'): 106 | for e in errors: 107 | if self.debug: 108 | self.logger.warning(f'{YELLOW}{e.get("message")}{RESET}') 109 | return [], [], '' 110 | ids = set(find_key(data, 'entryId')) 111 | if len(ids) >= 2: 112 | return data, entries, cursor 113 | except Exception as e: 114 | if i == retries: 115 | if self.debug: 116 | self.logger.debug(f'Max retries exceeded\n{e}') 117 | return 118 | t = 2 ** i + random.random() 119 | if self.debug: 120 | self.logger.debug(f'Retrying in {f"{t:.2f}"} seconds\t\t{e}') 121 | await asyncio.sleep(t) 122 | 123 | def _init_logger(self, **kwargs) -> Logger: 124 | if kwargs.get('debug'): 125 | cfg = kwargs.get('log_config') 126 | logging.config.dictConfig(cfg or LOG_CONFIG) 127 | 128 | # only support one logger 129 | logger_name = list(LOG_CONFIG['loggers'].keys())[0] 130 | 131 | # set level of all other loggers to ERROR 132 | for name in logging.root.manager.loggerDict: 133 | if name != logger_name: 134 | logging.getLogger(name).setLevel(logging.ERROR) 135 | 136 | return logging.getLogger(logger_name) 137 | 138 | @staticmethod 139 | def _validate_session(*args, **kwargs): 140 | email, username, password, session = args 141 | 142 | # validate credentials 143 | if all((email, username, password)): 144 | return login(email, username, password, **kwargs) 145 | 146 | # invalid credentials, try validating session 147 | if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}): 148 | return session 149 | 150 | # invalid credentials and session 151 | cookies = kwargs.get('cookies') 152 | 153 | # try validating cookies dict 154 | if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}): 155 | _session = Client(cookies=cookies, follow_redirects=True) 156 | _session.headers.update(get_headers(_session)) 157 | return _session 158 | 159 | # try validating cookies from file 160 | if isinstance(cookies, str): 161 | _session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True) 162 | _session.headers.update(get_headers(_session)) 163 | return _session 164 | 165 | raise Exception('Session not authenticated. ' 166 | 'Please use an authenticated session or remove the `session` argument and try again.') 167 | 168 | @property 169 | def id(self) -> int: 170 | """ Get User ID """ 171 | return int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0]) 172 | 173 | def save_cookies(self, fname: str = None): 174 | """ Save cookies to file """ 175 | cookies = self.session.cookies 176 | Path(f'{fname or cookies.get("username")}.cookies').write_bytes(orjson.dumps(dict(cookies))) 177 | -------------------------------------------------------------------------------- /twitter/util.py: -------------------------------------------------------------------------------- 1 | import random 2 | import re 3 | import time 4 | from logging import Logger 5 | from pathlib import Path 6 | from urllib.parse import urlsplit, urlencode, urlunsplit, parse_qs, quote 7 | 8 | import aiofiles 9 | import orjson 10 | from aiofiles.os import makedirs 11 | from httpx import Response, Client 12 | from textwrap import dedent 13 | 14 | from .constants import GREEN, MAGENTA, RED, RESET, MAX_GQL_CHAR_LIMIT, USER_AGENTS, ORANGE 15 | 16 | 17 | def init_session(): 18 | client = Client(headers={ 19 | 'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs=1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA', 20 | 'user-agent': random.choice(USER_AGENTS), 21 | }, follow_redirects=True) 22 | r = client.post('https://api.twitter.com/1.1/guest/activate.json').json() 23 | client.headers.update({ 24 | 'content-type': 'application/json', 25 | 'x-guest-token': r['guest_token'], 26 | 'x-twitter-active-user': 'yes', 27 | }) 28 | return client 29 | 30 | 31 | def batch_ids(ids: list[int | str], char_limit: int = MAX_GQL_CHAR_LIMIT) -> list[list]: 32 | """To avoid 431 errors""" 33 | res, batch, length = [], [], 0 34 | for x in map(str, ids): 35 | if length + len(x) > char_limit: 36 | res.append(batch) 37 | batch, length = [], 0 38 | batch.append(x) 39 | length += len(x) 40 | res.append(batch) if batch else ... 41 | # print(f'Batched {sum(map(len, res))} ids into {len(res)} requests') 42 | return res 43 | 44 | 45 | def build_params(params: dict) -> dict: 46 | return {k: orjson.dumps(v).decode() for k, v in params.items()} 47 | 48 | 49 | async def save_json(r: Response, path: str | Path, name: str, **kwargs): 50 | try: 51 | data = r.json() 52 | kwargs.pop('cursor', None) 53 | 54 | # special case: only 2 endpoints have batch requests as of Dec 2023 55 | if name in {'TweetResultsByRestIds', 'UsersByRestIds'}: 56 | out = f'{path}/batch' 57 | else: 58 | out = f'{path}/{"_".join(map(str, kwargs.values()))}' 59 | await makedirs(out, exist_ok=True) 60 | async with aiofiles.open(f'{out}/{time.time_ns()}_{name}.json', 'wb') as fp: 61 | await fp.write(orjson.dumps(data)) 62 | 63 | except Exception as e: 64 | print(f'Failed to save JSON data for {kwargs}\n{e}') 65 | 66 | 67 | def flatten(seq: list | tuple) -> list: 68 | flat = [] 69 | for e in seq: 70 | if isinstance(e, list | tuple): 71 | flat.extend(flatten(e)) 72 | else: 73 | flat.append(e) 74 | return flat 75 | 76 | 77 | def get_json(res: list[Response], **kwargs) -> list: 78 | cursor = kwargs.get('cursor') 79 | temp = res 80 | if any(isinstance(r, (list, tuple)) for r in res): 81 | temp = flatten(res) 82 | results = [] 83 | for r in temp: 84 | try: 85 | data = r.json() 86 | if cursor: 87 | results.append([data, cursor]) 88 | else: 89 | results.append(data) 90 | except Exception as e: 91 | print('Cannot parse JSON response', e) 92 | print(dedent(f'''{ORANGE} 93 | Checklist: 94 | 1. Log-in via the browser and confirm your account is not blocked, or has pending security challenges. 95 | 2. Copy the `ct0` and `auth_token` cookies from the browser. 96 | 3. Re-run your program using these new cookies. 97 | {RESET}''')) 98 | return results 99 | 100 | 101 | def set_qs(url: str, qs: dict, update=False, **kwargs) -> str: 102 | *_, q, f = urlsplit(url) 103 | return urlunsplit((*_, urlencode(qs | parse_qs(q) if update else qs, doseq=True, quote_via=quote, 104 | safe=kwargs.get('safe', '')), f)) 105 | 106 | 107 | def get_cursor(data: list | dict) -> str: 108 | # inefficient, but need to deal with arbitrary schema 109 | entries = find_key(data, 'entries') 110 | if entries: 111 | for entry in entries.pop(): 112 | entry_id = entry.get('entryId', '') 113 | if ('cursor-bottom' in entry_id) or ('cursor-showmorethreads' in entry_id): 114 | content = entry['content'] 115 | if itemContent := content.get('itemContent'): 116 | return itemContent['value'] # v2 cursor 117 | return content['value'] # v1 cursor 118 | 119 | 120 | def get_headers(session, **kwargs) -> dict: 121 | """ 122 | Get the headers required for authenticated requests 123 | """ 124 | cookies = session.cookies 125 | # todo httpx cookie issues 126 | try: 127 | if session._init_with_cookies: 128 | cookies.delete('ct0', domain='.twitter.com') 129 | except: 130 | ... 131 | headers = kwargs | { 132 | 'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs=1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA', 133 | 'cookie': '; '.join(f'{k}={v}' for k, v in cookies.items()), 134 | 'referer': 'https://twitter.com/', 135 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', 136 | 'x-csrf-token': cookies.get('ct0', ''), 137 | 'x-guest-token': cookies.get('guest_token', ''), 138 | 'x-twitter-auth-type': 'OAuth2Session' if cookies.get('auth_token') else '', 139 | 'x-twitter-active-user': 'yes', 140 | 'x-twitter-client-language': 'en', 141 | } 142 | return dict(sorted({k.lower(): v for k, v in headers.items()}.items())) 143 | 144 | 145 | def find_key(obj: any, key: str) -> list: 146 | """ 147 | Find all values of a given key within a nested dict or list of dicts 148 | 149 | Most data of interest is nested, and sometimes defined by different schemas. 150 | It is not worth our time to enumerate all absolute paths to a given key, then update 151 | the paths in our parsing functions every time Twitter changes their API. 152 | Instead, we recursively search for the key here, then run post-processing functions on the results. 153 | 154 | @param obj: dictionary or list of dictionaries 155 | @param key: key to search for 156 | @return: list of values 157 | """ 158 | 159 | def helper(obj: any, key: str, L: list) -> list: 160 | if not obj: 161 | return L 162 | 163 | if isinstance(obj, list): 164 | for e in obj: 165 | L.extend(helper(e, key, [])) 166 | return L 167 | 168 | if isinstance(obj, dict) and obj.get(key): 169 | L.append(obj[key]) 170 | 171 | if isinstance(obj, dict) and obj: 172 | for k in obj: 173 | L.extend(helper(obj[k], key, [])) 174 | return L 175 | 176 | return helper(obj, key, []) 177 | 178 | 179 | def log(logger: Logger, level: int, r: Response): 180 | def stat(r, txt, data): 181 | if level >= 1: 182 | logger.debug(f'{r.url.path}') 183 | if level >= 2: 184 | logger.debug(f'{r.url}') 185 | if level >= 3: 186 | logger.debug(f'{txt}') 187 | if level >= 4: 188 | logger.debug(f'{data}') 189 | 190 | try: 191 | limits = {k: v for k, v in r.headers.items() if 'x-rate-limit' in k} 192 | current_time = int(time.time()) 193 | wait = int(r.headers.get('x-rate-limit-reset', current_time)) - current_time 194 | remaining = limits.get('x-rate-limit-remaining') 195 | limit = limits.get('x-rate-limit-limit') 196 | logger.debug(f"remaining: {MAGENTA}{remaining}/{limit}{RESET} requests") 197 | logger.debug(f'reset: {MAGENTA}{(wait / 60):.2f}{RESET} minutes') 198 | except Exception as e: 199 | logger.error(f'Rate limit info unavailable: {e}') 200 | 201 | try: 202 | status = r.status_code 203 | txt, data, = r.text, r.json() 204 | if 'json' in r.headers.get('content-type', ''): 205 | if data.get('errors') and not find_key(data, 'instructions'): 206 | logger.error(f'[{RED}error{RESET}] {status} {data}') 207 | else: 208 | logger.debug(fmt_status(status)) 209 | stat(r, txt, data) 210 | else: 211 | logger.debug(fmt_status(status)) 212 | stat(r, txt, {}) 213 | except Exception as e: 214 | logger.error(f'Failed to log: {e}') 215 | 216 | 217 | def fmt_status(status: int) -> str: 218 | color = None 219 | if 200 <= status < 300: 220 | color = GREEN 221 | elif 300 <= status < 400: 222 | color = MAGENTA 223 | elif 400 <= status < 600: 224 | color = RED 225 | return f'[{color}{status}{RESET}]' 226 | 227 | 228 | def get_code(cls, retries=5) -> str | None: 229 | """ Get verification code from Proton Mail inbox """ 230 | 231 | def poll_inbox(): 232 | inbox = cls.inbox() 233 | for c in inbox.get('Conversations', []): 234 | if c['Senders'][0]['Address'] in {'info@twitter.com', 'info@x.com'}: 235 | exprs = ['Your Twitter confirmation code is (.+)', '(.+) is your Twitter verification code'] 236 | if temp := list(filter(None, (re.search(expr, c['Subject']) for expr in exprs))): 237 | return temp[0].group(1) 238 | 239 | for i in range(retries + 1): 240 | if code := poll_inbox(): 241 | return code 242 | if i == retries: 243 | print(f'Max retries exceeded') 244 | return 245 | t = 2 ** i + random.random() 246 | print(f'Retrying in {f"{t:.2f}"} seconds') 247 | time.sleep(t) 248 | 249 | 250 | def parse_card_media(cards): 251 | res = [] 252 | for c in cards: 253 | img = c.get('value', {}).get('image_value', {}) 254 | if c.get('key') == 'photo_image_full_size_original': 255 | url = img.get('url') 256 | res.append([url, img.get('width', 0) * img.get('height', 0)]) 257 | return [t[0] for t in sorted(res, key=lambda x: -x[1])] 258 | 259 | 260 | def set2list(d): 261 | if isinstance(d, dict): 262 | return {k: set2list(v) for k, v in d.items()} 263 | if isinstance(d, set): 264 | return list(d) 265 | return d 266 | --------------------------------------------------------------------------------