├── .gitignore
├── LICENSE
├── assets
    ├── account.gif
    ├── scrape.gif
    ├── search.gif
    ├── spaces-audio.gif
    ├── spaces-transcript-01.gif
    └── spaces-transcript-02.gif
├── examples
    ├── example.ipynb
    └── simple_example.py
├── readme.md
├── scripts
    ├── clean.sh
    └── update.py
├── setup.py
├── setup.sh
└── twitter
    ├── __init__.py
    ├── __version__.py
    ├── account.py
    ├── constants.py
    ├── login.py
    ├── scraper.py
    ├── search.py
    └── util.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Trevor Hobenshield
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/assets/account.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/account.gif


--------------------------------------------------------------------------------
/assets/scrape.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/scrape.gif


--------------------------------------------------------------------------------
/assets/search.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/search.gif


--------------------------------------------------------------------------------
/assets/spaces-audio.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/spaces-audio.gif


--------------------------------------------------------------------------------
/assets/spaces-transcript-01.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/spaces-transcript-01.gif


--------------------------------------------------------------------------------
/assets/spaces-transcript-02.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/assets/spaces-transcript-02.gif


--------------------------------------------------------------------------------
/examples/simple_example.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from twitter.util import find_key
 3 | from twitter.scraper import Scraper
 4 | 
 5 | 
 6 | def parse_tweets(data: list | dict) -> pd.DataFrame:
 7 |     """
 8 |     Parse small subset of relevant features into a DataFrame.
 9 | 
10 |     Note: structure of GraphQL response is not consistent, this example may not work in all cases.
11 | 
12 |     @param data: tweets (raw GraphQL response data)
13 |     @return: DataFrame of tweets
14 |     """
15 |     df = (
16 |         pd.json_normalize((
17 |             x.get('result', {}).get('tweet', {}).get('legacy') for x in find_key(data, 'tweet_results')),
18 |             max_level=1
19 |         )
20 |         .assign(created_at=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y"))
21 |         .sort_values('created_at', ascending=False)
22 |         .reset_index(drop=True)
23 |     )
24 |     numeric = [
25 |         'user_id_str',
26 |         'id_str',
27 |         'favorite_count',
28 |         'quote_count',
29 |         'reply_count',
30 |         'retweet_count',
31 |     ]
32 |     df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')
33 |     df = df[[
34 |         'id_str',
35 |         'user_id_str',
36 |         'created_at',
37 |         'full_text',
38 |         'favorite_count',
39 |         'quote_count',
40 |         'reply_count',
41 |         'retweet_count',
42 |         'lang',
43 |     ]]
44 |     return df
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     ## sign-in with credentials
49 |     email, username, password = ..., ..., ...
50 |     scraper = Scraper(email, username, password)
51 | 
52 |     ## or, resume session using cookies
53 |     # scraper = Scraper(cookies={"ct0": ..., "auth_token": ...})
54 | 
55 |     tweets = scraper.tweets([
56 |         ...,  # tweet ids
57 |     ])
58 | 
59 |     df = parse_tweets(tweets)
60 | 
61 |     df.to_csv('tweets.csv')
62 |     # df.to_parquet('tweets.parquet', engine='pyarrow')
63 | 


--------------------------------------------------------------------------------
/scripts/clean.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/bash
 2 | 
 3 | if [ -d '../dist' ] ; then
 4 |     rm -r ../dist
 5 | fi
 6 | if [ -d '../build' ] ; then
 7 |     rm -r ../build
 8 | fi
 9 | if [ -d '../twitter_api_client.egg-info' ] ; then
10 |     rm -r ../twitter_api_client.egg-info
11 | fi


--------------------------------------------------------------------------------
/scripts/update.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging.config
  3 | import platform
  4 | import random
  5 | import re
  6 | import subprocess
  7 | from asyncio import Semaphore
  8 | from functools import partial
  9 | from logging import getLogger, Logger
 10 | from pathlib import Path
 11 | from typing import Generator
 12 | 
 13 | import aiofiles
 14 | import chompjs
 15 | import orjson
 16 | from httpx import AsyncClient, Response, Limits, Client
 17 | from selectolax.lexbor import LexborHTMLParser
 18 | from tqdm.asyncio import tqdm_asyncio
 19 | 
 20 | try:
 21 |     get_ipython()
 22 |     import nest_asyncio
 23 | 
 24 |     nest_asyncio.apply()
 25 | except:
 26 |     ...
 27 | 
 28 | if platform.system() != 'Windows':
 29 |     try:
 30 |         import uvloop
 31 | 
 32 |         uvloop.install()
 33 |     except:
 34 |         ...
 35 | 
 36 | dump_json = partial(orjson.dumps, option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS)
 37 | 
 38 | 
 39 | def mkdir(path: str | Path) -> Path:
 40 |     p = Path(path)
 41 |     p.mkdir(exist_ok=True, parents=True)
 42 |     return p
 43 | 
 44 | 
 45 | logging.config.dictConfig({
 46 |     'version': 1,
 47 |     'disable_existing_loggers': False,
 48 |     'formatters': {
 49 |         'standard': {
 50 |             'format': '%(asctime)s.%(msecs)03d [%(levelname)s] :: %(message)s',
 51 |             'datefmt': '%Y-%m-%d %H:%M:%S'
 52 |         }
 53 |     },
 54 |     'handlers': {
 55 |         'file': {
 56 |             'class': 'logging.FileHandler',
 57 |             'level': 'DEBUG',
 58 |             'formatter': 'standard',
 59 |             'filename': 'log.log',
 60 |             'mode': 'a'
 61 |         },
 62 |         'console_warning': {
 63 |             'class': 'logging.StreamHandler',
 64 |             'level': 'WARNING',
 65 |             'formatter': 'standard'
 66 |         },
 67 |         'console_info': {
 68 |             'class': 'logging.StreamHandler',
 69 |             'level': 'INFO',
 70 |             'formatter': 'standard',
 71 |             'filters': ['info_only']
 72 |         }
 73 |     },
 74 |     'filters': {
 75 |         'info_only': {
 76 |             '()': lambda: lambda record: record.levelno == logging.INFO
 77 |         }
 78 |     },
 79 |     'loggers': {
 80 |         'my_logger': {
 81 |             'handlers': ['file', 'console_warning', 'console_info'],
 82 |             'level': 'DEBUG'
 83 |         }
 84 |     }
 85 | })
 86 | logger = getLogger(list(Logger.manager.loggerDict)[-1])
 87 | 
 88 | PATH_DATA = mkdir('data')
 89 | 
 90 | PATH_HOMEPAGE = PATH_DATA / 'x.html'
 91 | PATH_INITIAL_STATE = PATH_DATA / 'initial_state.json'
 92 | PATH_FEATURES = PATH_DATA / 'features.json'
 93 | PATH_LIMITS = PATH_DATA / 'limits.json'
 94 | PATH_OPS = PATH_DATA / 'ops.json'
 95 | PATH_MAIN = PATH_DATA / 'main.js'
 96 | PATH_URLS = PATH_DATA / 'csp.txt'
 97 | STRINGS = PATH_DATA / 'strings.txt'
 98 | PATHS = PATH_DATA / 'paths.txt'
 99 | JS_FILES_MAP = PATH_DATA / 'js.json'
100 | JS_FILES = mkdir(PATH_DATA / 'js')
101 | OPERATIONS = PATH_DATA / 'operations'
102 | 
103 | USER_AGENTS = [
104 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.1.1',
105 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.3',
106 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.1',
107 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.',
108 | ]
109 | 
110 | _a = 'a.js'
111 | _base = 'https://abs.twimg.com/responsive-web/client-web'
112 | 
113 | 
114 | async def backoff(fn: callable, sem: Semaphore, *args, m: int = 20, b: int = 2, max_retries: int = 8, **kwargs) -> any:
115 |     ignore_status_codes = kwargs.pop('ignore_status_codes', [])
116 |     for i in range(max_retries + 1):
117 |         try:
118 |             async with sem:
119 |                 r = await fn(*args, **kwargs)
120 |                 if r.status_code in ignore_status_codes:
121 |                     return r
122 |                 r.raise_for_status()
123 |                 return r
124 |         except Exception as e:
125 |             if i == max_retries:
126 |                 logger.warning(f'Max retries exceeded\n{e}')
127 |                 return
128 |             t = min(random.random() * (b ** i), m)
129 |             logger.info(f'Retrying in {f"{t:.2f}"} seconds\n{e}')
130 |             await asyncio.sleep(t)
131 | 
132 | 
133 | def download(urls: list[str], out: str = 'tmp', sz: int = None, fname_fn: partial = None, **kwargs) -> Generator:
134 |     async def get(client: AsyncClient, sem: Semaphore, url: str):
135 |         fname = url.split('/')[-1] if not fname_fn else fname_fn(url)
136 |         async with aiofiles.open(f'{_out}/{fname}', 'wb') as fp:
137 |             r = await backoff(client.get, sem, url, **kwargs)
138 |             async for chunk in r.aiter_bytes(sz):
139 |                 await fp.write(chunk)
140 |             return r
141 | 
142 |     _out = mkdir(out)
143 |     return (partial(get, url=u) for u in urls)
144 | 
145 | 
146 | def send(cfgs: list[dict], **kwargs) -> Generator:
147 |     async def f(client: AsyncClient, sem: Semaphore, cfg: dict) -> Response:
148 |         return await backoff(client.request, sem, **cfg, **kwargs)
149 | 
150 |     return (partial(f, cfg=cfg) for cfg in cfgs)
151 | 
152 | 
153 | async def process(fns: Generator, max_connections: int = 2000, **kwargs):
154 |     client_defaults = {
155 |         'cookies': kwargs.pop('cookies', None),
156 |         'headers': {'user-agent': random.choice(USER_AGENTS)} | kwargs.pop('headers', {}),
157 |         'timeout': kwargs.pop('timeout', 30.0),
158 |         'verify': kwargs.pop('verify', False),
159 |         'http2': kwargs.pop('http2', True),
160 |         'follow_redirects': kwargs.pop('follow_redirects', True),
161 |         'limits': kwargs.pop('limits', Limits(
162 |             max_connections=max_connections,
163 |             max_keepalive_connections=None,
164 |             keepalive_expiry=5.0,
165 |         ))
166 |     }
167 |     # tqdm
168 |     desc = kwargs.pop('desc', None)
169 |     sem = Semaphore(max_connections)
170 |     async with AsyncClient(**client_defaults, **kwargs) as client:
171 |         tasks = (fn(client=client, sem=sem) for fn in fns)
172 |         if desc:
173 |             return await tqdm_asyncio.gather(*tasks, desc=desc)
174 |         return await asyncio.gather(*tasks)
175 | 
176 | 
177 | def _get_endpoints(res: Response, out: Path = JS_FILES_MAP) -> dict:
178 |     temp = re.findall('\+"\."\+(\{.*\})\[e\]\+?' + '"' + _a + '"', res.text)[0]
179 |     endpoints = orjson.loads(temp.replace('vendor:', '"vendor":').replace('api:', '"api":'))
180 |     if out:
181 |         out.write_bytes(dump_json(endpoints))
182 |     return endpoints
183 | 
184 | 
185 | def get_js_files(r: Response, out: Path = JS_FILES) -> None:
186 |     endpoints = _get_endpoints(r)
187 |     csp = sorted({x.strip(';') for x in r.headers.get("content-security-policy").split() if x.startswith("https://")})
188 |     PATH_URLS.write_text('\n'.join(csp))
189 |     urls = [
190 |         f'{_base}/{k}.{v}{_a}'
191 |         for k, v in endpoints.items()
192 |         if not re.search(r'participantreaction|\.countries-|emojipicker|i18n|icons\/', k, flags=re.I)
193 |     ]
194 |     asyncio.run(process(download(urls, out=out), desc='Downloading JS files'))
195 | 
196 | 
197 | def parse_matches(matches: list[tuple]) -> dict:
198 |     d = {}
199 |     for m in matches:
200 |         d[m[1]] = {
201 |             "queryId": m[0],
202 |             "operationName": m[1],
203 |             "operationType": m[2],
204 |             "featureSwitches": sorted(re.sub(r'[\s"\']', '', x) for x in (m[3].split(',') if m[3] else [])),
205 |             "fieldToggles": sorted(re.sub(r'[\s"\']', '', x) for x in (m[4].split(',') if m[4] else []))
206 |         }
207 |     return d
208 | 
209 | 
210 | def main():
211 |     client = Client(headers={'user-agent': random.choice(USER_AGENTS)}, follow_redirects=True, http2=True)
212 |     r1 = client.get('https://x.com')
213 |     PATH_HOMEPAGE.write_text(r1.text)
214 | 
215 |     try:
216 |         get_js_files(r1)
217 |     except Exception as e:
218 |         logger.warning(f'Failed to get js files\t\t{e}')
219 | 
220 |     main_js = re.findall(r'href="(https\:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.\w+\.js)"', r1.text)[0]
221 |     r2 = client.get(main_js)
222 |     PATH_MAIN.write_text(r2.text)
223 | 
224 |     expr = r'\{[^{}]*queryId:\s?"([^"]+)",\s*operationName:\s?"([^"]+)",\s*operationType:\s?"([^"]+)",\s*metadata:\s?\{\s*featureSwitches:\s?\[(.*?)\],\s*fieldToggles:\s?\[(.*?)\]\s*\}\s*\}'
225 | 
226 |     matches = re.findall(expr, r2.text, flags=re.A)
227 |     ops = parse_matches(matches)
228 | 
229 |     # search all js files for more GraphQL operation definitions
230 |     for p in JS_FILES.iterdir():
231 |         matches = re.findall(expr, p.read_text(), flags=re.A)
232 |         ops |= parse_matches(matches)
233 | 
234 |     PATH_OPS.write_bytes(dump_json(ops))
235 |     html = LexborHTMLParser(PATH_HOMEPAGE.read_text())
236 |     k = 'window.__INITIAL_STATE__='
237 |     PATH_INITIAL_STATE.write_bytes(dump_json(chompjs.parse_js_object([x for x in html.css('script') if k in x.text()][0].text().replace(k, '').strip(';'))))
238 | 
239 |     data = orjson.loads(PATH_INITIAL_STATE.read_bytes())
240 |     config = data['featureSwitch']['defaultConfig'] | data['featureSwitch']['user']['config']
241 |     features = {k: v.get('value') for k, v in config.items() if isinstance(v.get('value'), bool)}
242 |     numeric = {k: v.get('value') for k, v in config.items() if isinstance(v.get('value'), int) and not isinstance(v.get('value'), bool)}
243 |     PATH_FEATURES.write_bytes(dump_json(features))
244 |     PATH_LIMITS.write_bytes(dump_json(numeric))
245 | 
246 | 
247 | if __name__ == '__main__':
248 |     main()
249 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from textwrap import dedent
  2 | from setuptools import find_packages, setup
  3 | from pathlib import Path
  4 | 
  5 | install_requires = [
  6 |     'aiofiles',
  7 |     'nest_asyncio',
  8 |     'httpx',
  9 |     'tqdm',
 10 |     'orjson',
 11 |     'm3u8',
 12 |     'websockets',
 13 |     'uvloop; platform_system != "Windows"',
 14 | ]
 15 | 
 16 | about = {}
 17 | exec((Path().cwd() / 'twitter' / '__version__.py').read_text(), about)
 18 | 
 19 | setup(
 20 |     name=about['__title__'],
 21 |     version=about['__version__'],
 22 |     author=about['__author__'],
 23 |     description=about['__description__'],
 24 |     license=about['__license__'],
 25 |     long_description=dedent('''
 26 | 
 27 |     ## Implementation of X/Twitter v1, v2, and GraphQL APIs.
 28 | 
 29 | 
 30 |     ## Table of Contents
 31 | 
 32 |     * [Installation](#installation)
 33 |     * [Automation](#automation)
 34 |     * [Scraping](#scraping)
 35 |       * [Get all user/tweet data](#get-all-usertweet-data)
 36 |       * [Resume Pagination](#resume-pagination)
 37 |       * [Search](#search)
 38 |     * [Spaces](#spaces)
 39 |       * [Live Audio Capture](#live-audio-capture)
 40 |       * [Live Transcript Capture](#live-transcript-capture)
 41 |       * [Search and Metadata](#search-and-metadata)
 42 |     * [Automated Solvers](#automated-solvers)
 43 |     * [Example API Responses](#example-api-responses)
 44 | 
 45 |     ### Installation
 46 | 
 47 |     ```bash
 48 |     pip install twitter-api-client
 49 |     ```
 50 | 
 51 |     ### Automation
 52 | 
 53 |     ```python
 54 |     from twitter.account import Account
 55 | 
 56 |     ## sign-in with credentials
 57 |     email, username, password = ..., ..., ...
 58 |     account = Account(email, username, password)
 59 | 
 60 |     ## or, resume session using cookies
 61 |     # account = Account(cookies={"ct0": ..., "auth_token": ...})
 62 | 
 63 |     ## or, resume session using cookies (JSON file)
 64 |     # account = Account(cookies='twitter.cookies')
 65 | 
 66 | 
 67 |     account.tweet('test 123')
 68 |     account.untweet(123456)
 69 |     account.retweet(123456)
 70 |     account.unretweet(123456)
 71 |     account.reply('foo', tweet_id=123456)
 72 |     account.quote('bar', tweet_id=123456)
 73 |     account.schedule_tweet('schedule foo', 1681851240)
 74 |     account.unschedule_tweet(123456)
 75 | 
 76 |     account.tweet('hello world', media=[
 77 |         {'media': 'test.jpg', 'alt': 'some alt text', 'tagged_users': [123]},
 78 |         {'media': 'test.jpeg', 'alt': 'some alt text', 'tagged_users': [123]},
 79 |         {'media': 'test.png', 'alt': 'some alt text', 'tagged_users': [123]},
 80 |         {'media': 'test.jfif', 'alt': 'some alt text', 'tagged_users': [123]},
 81 |     ])
 82 | 
 83 |     account.schedule_tweet('foo bar', '2023-04-18 15:42', media=[
 84 |         {'media': 'test.gif', 'alt': 'some alt text'},
 85 |     ])
 86 | 
 87 |     account.schedule_reply('hello world', '2023-04-19 15:42', tweet_id=123456, media=[
 88 |         {'media': 'test.gif', 'alt': 'some alt text'},
 89 |     ])
 90 | 
 91 |     account.dm('my message', [1234], media='test.jpg')
 92 | 
 93 |     account.create_poll('test poll 123', ['hello', 'world', 'foo', 'bar'], 10080)
 94 | 
 95 |     # tweets
 96 |     account.like(123456)
 97 |     account.unlike(123456)
 98 |     account.bookmark(123456)
 99 |     account.unbookmark(123456)
100 |     account.pin(123456)
101 |     account.unpin(123456)
102 | 
103 |     # users
104 |     account.follow(1234)
105 |     account.unfollow(1234)
106 |     account.mute(1234)
107 |     account.unmute(1234)
108 |     account.enable_notifications(1234)
109 |     account.disable_notifications(1234)
110 |     account.block(1234)
111 |     account.unblock(1234)
112 | 
113 |     # user profile
114 |     account.update_profile_image('test.jpg')
115 |     account.update_profile_banner('test.png')
116 |     account.update_profile_info(name='Foo Bar', description='test 123', location='Victoria, BC')
117 | 
118 |     # topics
119 |     account.follow_topic(111)
120 |     account.unfollow_topic(111)
121 | 
122 |     # lists
123 |     account.create_list('My List', 'description of my list', private=False)
124 |     account.update_list(222, 'My Updated List', 'some updated description', private=False)
125 |     account.update_list_banner(222, 'test.png')
126 |     account.delete_list_banner(222)
127 |     account.add_list_member(222, 1234)
128 |     account.remove_list_member(222, 1234)
129 |     account.delete_list(222)
130 |     account.pin_list(222)
131 |     account.unpin_list(222)
132 | 
133 |     # refresh all pinned lists in this order
134 |     account.update_pinned_lists([222, 111, 333])
135 | 
136 |     # unpin all lists
137 |     account.update_pinned_lists([])
138 | 
139 |     # get timelines
140 |     timeline = account.home_timeline()
141 |     latest_timeline = account.home_latest_timeline(limit=500)
142 | 
143 |     # get bookmarks
144 |     bookmarks = account.bookmarks()
145 | 
146 |     # get DM inbox metadata
147 |     inbox = account.dm_inbox()
148 | 
149 |     # get DMs from all conversations
150 |     dms = account.dm_history()
151 | 
152 |     # get DMs from specific conversations
153 |     dms = account.dm_history(['123456-789012', '345678-901234'])
154 | 
155 |     # search DMs by keyword
156 |     dms = account.dm_search('test123')
157 | 
158 |     # delete entire conversation
159 |     account.dm_delete(conversation_id='123456-789012')
160 | 
161 |     # delete (hide) specific DM
162 |     account.dm_delete(message_id='123456')
163 | 
164 |     # get all scheduled tweets
165 |     scheduled_tweets = account.scheduled_tweets()
166 | 
167 |     # delete a scheduled tweet
168 |     account.delete_scheduled_tweet(12345678)
169 | 
170 |     # get all draft tweets
171 |     draft_tweets = account.draft_tweets()
172 | 
173 |     # delete a draft tweet
174 |     account.delete_draft_tweet(12345678)
175 | 
176 |     # delete all scheduled tweets
177 |     account.clear_scheduled_tweets()
178 | 
179 |     # delete all draft tweets
180 |     account.clear_draft_tweets()
181 | 
182 |     # example configuration
183 |     account.update_settings({
184 |         "address_book_live_sync_enabled": False,
185 |         "allow_ads_personalization": False,
186 |         "allow_authenticated_periscope_requests": True,
187 |         "allow_dm_groups_from": "following",
188 |         "allow_dms_from": "following",
189 |         "allow_location_history_personalization": False,
190 |         "allow_logged_out_device_personalization": False,
191 |         "allow_media_tagging": "none",
192 |         "allow_sharing_data_for_third_party_personalization": False,
193 |         "alt_text_compose_enabled": None,
194 |         "always_use_https": True,
195 |         "autoplay_disabled": False,
196 |         "country_code": "us",
197 |         "discoverable_by_email": False,
198 |         "discoverable_by_mobile_phone": False,
199 |         "display_sensitive_media": False,
200 |         "dm_quality_filter": "enabled",
201 |         "dm_receipt_setting": "all_disabled",
202 |         "geo_enabled": False,
203 |         "include_alt_text_compose": True,
204 |         "include_mention_filter": True,
205 |         "include_nsfw_admin_flag": True,
206 |         "include_nsfw_user_flag": True,
207 |         "include_ranked_timeline": True,
208 |         "language": "en",
209 |         "mention_filter": "unfiltered",
210 |         "nsfw_admin": False,
211 |         "nsfw_user": False,
212 |         "personalized_trends": True,
213 |         "protected": False,
214 |         "ranked_timeline_eligible": None,
215 |         "ranked_timeline_setting": None,
216 |         "require_password_login": False,
217 |         "requires_login_verification": False,
218 |         "sleep_time": {
219 |             "enabled": False,
220 |             "end_time": None,
221 |             "start_time": None
222 |         },
223 |         "translator_type": "none",
224 |         "universal_quality_filtering_enabled": "enabled",
225 |         "use_cookie_personalization": False,
226 |     })
227 | 
228 |     # example configuration
229 |     account.update_search_settings({
230 |         "optInFiltering": True,  # filter nsfw content
231 |         "optInBlocking": True,  # filter blocked accounts
232 |     })
233 | 
234 |     notifications = account.notifications()
235 | 
236 |     account.change_password('old pwd','new pwd')
237 | 
238 |     ```
239 | 
240 |     ### Scraping
241 | 
242 |     #### Get all user/tweet data
243 |     
244 |     Two special batch queries `scraper.tweets_by_ids` and `scraper.users_by_ids` should be preferred when applicable. These endpoints are more much more efficient and have higher rate limits than their unbatched counterparts. See the table below for a comparison.
245 |     
246 |     | Endpoint      | Batch Size     | Rate Limit    |
247 |     |---------------|----------------|---------------|
248 |     | tweets_by_ids | ~220           | 500 / 15 mins |
249 |     | tweets_by_id  | 1              | 50 / 15 mins  |
250 |     | users_by_ids  | ~220           | 100 / 15 mins |
251 |     | users_by_id   | 1              | 500 / 15 mins |
252 |     
253 |     *As of Fall 2023 login by username/password is unstable. Using cookies is now recommended.*
254 |     
255 |     ```python
256 |     from twitter.scraper import Scraper
257 | 
258 |     ## sign-in with credentials
259 |     email, username, password = ..., ..., ...
260 |     scraper = Scraper(email, username, password)
261 | 
262 |     ## or, resume session using cookies
263 |     # scraper = Scraper(cookies={"ct0": ..., "auth_token": ...})
264 | 
265 |     ## or, resume session using cookies (JSON file)
266 |     # scraper = Scraper(cookies='twitter.cookies')
267 | 
268 |     ## or, initialize guest session (limited endpoints)
269 |     # from twitter.util import init_session
270 |     # scraper = Scraper(session=init_session())
271 | 
272 |     # user data
273 |     users = scraper.users(['foo', 'bar', 'hello', 'world'])
274 |     users = scraper.users_by_ids([123, 234, 345]) # preferred
275 |     users = scraper.users_by_id([123, 234, 345])
276 |     tweets = scraper.tweets([123, 234, 345])
277 |     likes = scraper.likes([123, 234, 345])
278 |     tweets_and_replies = scraper.tweets_and_replies([123, 234, 345])
279 |     media = scraper.media([123, 234, 345])
280 |     following = scraper.following([123, 234, 345])
281 |     followers = scraper.followers([123, 234, 345])
282 |     scraper.tweet_stats([111111, 222222, 333333])
283 |     
284 |     # get recommended users based on user
285 |     scraper.recommended_users()
286 |     scraper.recommended_users([123])
287 |     
288 |     # tweet data
289 |     tweets = scraper.tweets_by_ids([987, 876, 754]) # preferred
290 |     tweets = scraper.tweets_by_id([987, 876, 754])
291 |     tweet_details = scraper.tweets_details([987, 876, 754])
292 |     retweeters = scraper.retweeters([987, 876, 754])
293 |     favoriters = scraper.favoriters([987, 876, 754])
294 |     
295 |     scraper.download_media([
296 |         111111,
297 |         222222,
298 |         333333,
299 |         444444,
300 |     ])
301 |     
302 |     # trends
303 |     scraper.trends()
304 |     ```
305 | 
306 |     #### Resume Pagination
307 |     **Pagination is already done by default**, however there are circumstances where you may need to resume pagination from a specific cursor. For example, the `Followers` endpoint only allows for 50 requests every 15 minutes. In this case, we can resume from where we left off by providing a specific cursor value.
308 |     ```python
309 |     from twitter.scraper import Scraper
310 | 
311 |     email, username, password = ...,...,...
312 |     scraper = Scraper(email, username, password)
313 | 
314 |     user_id = 44196397
315 |     cursor = '1767341853908517597|1663601806447476672'  # example cursor
316 |     limit = 100  # arbitrary limit for demonstration
317 |     follower_subset, last_cursor = scraper.followers([user_id], limit=limit, cursor=cursor)
318 | 
319 |     # use last_cursor to resume pagination
320 |     ```
321 | 
322 |     #### Search
323 | 
324 |     ```python
325 |     from twitter.search import Search
326 | 
327 |     email, username, password = ..., ..., ...
328 |     # default output directory is `data/search_results` if save=True
329 |     search = Search(email, username, password, save=True, debug=1)
330 | 
331 |     res = search.run(
332 |         limit=37,
333 |         retries=5,
334 |         queries=[
335 |             {
336 |                 'category': 'Top',
337 |                 'query': 'paperswithcode -tensorflow -tf'
338 |             },
339 |             {
340 |                 'category': 'Latest',
341 |                 'query': 'test'
342 |             },
343 |             {
344 |                 'category': 'People',
345 |                 'query': 'brasil portugal -argentina'
346 |             },
347 |             {
348 |                 'category': 'Photos',
349 |                 'query': 'greece'
350 |             },
351 |             {
352 |                 'category': 'Videos',
353 |                 'query': 'italy'
354 |             },
355 |         ],
356 |     )
357 |     ```
358 | 
359 |     **Search Operators Reference**
360 | 
361 |     https://developer.twitter.com/en/docs/twitter-api/v1/rules-and-filtering/search-operators
362 | 
363 |     https://developer.twitter.com/en/docs/twitter-api/tweets/search/integrate/build-a-query
364 | 
365 |     ### Spaces
366 | 
367 |     #### Live Audio Capture
368 | 
369 |     Capture live audio for up to 500 streams per IP
370 | 
371 |     ```python
372 |     from twitter.scraper import Scraper
373 |     from twitter.util import init_session
374 | 
375 |     session = init_session() # initialize guest session, no login required
376 |     scraper = Scraper(session=session)
377 | 
378 |     rooms = [...]
379 |     scraper.spaces_live(rooms=rooms) # capture live audio from list of rooms
380 |     ```
381 | 
382 |     #### Live Transcript Capture
383 | 
384 |     **Raw transcript chunks**
385 | 
386 |     ```python
387 |     from twitter.scraper import Scraper
388 |     from twitter.util import init_session
389 | 
390 |     session = init_session() # initialize guest session, no login required
391 |     scraper = Scraper(session=session)
392 | 
393 |     # room must be live, i.e. in "Running" state
394 |     scraper.space_live_transcript('1zqKVPlQNApJB', frequency=2)  # word-level live transcript. (dirty, on-the-fly transcription before post-processing)
395 |     ```
396 | 
397 |     **Processed (final) transcript chunks**
398 | 
399 |     ```python
400 |     from twitter.scraper import Scraper
401 |     from twitter.util import init_session
402 | 
403 |     session = init_session() # initialize guest session, no login required
404 |     scraper = Scraper(session=session)
405 | 
406 |     # room must be live, i.e. in "Running" state
407 |     scraper.space_live_transcript('1zqKVPlQNApJB', frequency=1)  # finalized live transcript.  (clean)
408 |     ```
409 | 
410 |     #### Search and Metadata
411 |     ```python
412 |     from twitter.scraper import Scraper
413 |     from twitter.util import init_session
414 |     from twitter.constants import SpaceCategory
415 | 
416 |     session = init_session() # initialize guest session, no login required
417 |     scraper = Scraper(session=session)
418 | 
419 |     # download audio and chat-log from space
420 |     spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX'], audio=True, chat=True)
421 | 
422 |     # pull metadata only
423 |     spaces = scraper.spaces(rooms=['1eaJbrAPnBVJX', '1eaJbrAlZjjJX'])
424 | 
425 |     # search for spaces in "Upcoming", "Top" and "Live" categories
426 |     spaces = scraper.spaces(search=[
427 |         {
428 |             'filter': SpaceCategory.Upcoming,
429 |             'query': 'hello'
430 |         },
431 |         {
432 |             'filter': SpaceCategory.Top,
433 |             'query': 'world'
434 |         },
435 |         {
436 |             'filter': SpaceCategory.Live,
437 |             'query': 'foo bar'
438 |         }
439 |     ])
440 |     ```
441 | 
442 |     ### Automated Solvers
443 | 
444 |     > This requires installation of the [proton-api-client](https://pypi.org/project/proton-api-client) package
445 | 
446 |     To set up automated email confirmation/verification solvers, add your Proton Mail credentials below as shown.
447 |     This removes the need to manually solve email challenges via the web app. These credentials can be used
448 |     in `Scraper`, `Account`, and `Search` constructors.
449 | 
450 |     E.g.
451 | 
452 |     ```python
453 |     from twitter.account import Account
454 |     from twitter.util import get_code
455 |     from proton.client import ProtonMail
456 | 
457 |     proton_username, proton_password = ..., ...
458 |     proton = lambda: get_code(ProtonMail(proton_username, proton_password))
459 | 
460 |     email, username, password = ..., ..., ...
461 |     account = Account(email, username, password, proton=proton)
462 |     ```
463 | 
464 |     '''),
465 |     python_requires=">=3.10.10",
466 |     long_description_content_type='text/markdown',
467 |     author_email='trevorhobenshield@gmail.com',
468 |     url='https://github.com/trevorhobenshield/twitter-api-client',
469 |     install_requires=install_requires,
470 |     keywords='twitter api client async search automation bot scrape',
471 |     packages=find_packages(),
472 |     include_package_data=True,
473 |     classifiers=[
474 |         'Environment :: Web Environment',
475 |         'Intended Audience :: Developers',
476 |         'Natural Language :: English',
477 |         'Operating System :: Unix',
478 |         'Operating System :: MacOS :: MacOS X',
479 |         'Operating System :: Microsoft :: Windows',
480 |         'Programming Language :: Python',
481 |         'Programming Language :: Python :: 3',
482 |         'Programming Language :: Python :: 3.10',
483 |         'Programming Language :: Python :: 3.11',
484 |         'Programming Language :: Python :: 3.12',
485 |         'Topic :: Internet :: WWW/HTTP',
486 |         'Topic :: Software Development :: Libraries',
487 |         'Topic :: Software Development :: Libraries :: Python Modules',
488 |     ]
489 | )
490 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/bash
2 | 
3 | python -m build
4 | python -m twine upload dist/*


--------------------------------------------------------------------------------
/twitter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/trevorhobenshield/twitter-api-client/c150f1a3492ce3db15b954f2bc18b4976500a73b/twitter/__init__.py


--------------------------------------------------------------------------------
/twitter/__version__.py:
--------------------------------------------------------------------------------
1 | __title__ = "twitter-api-client"
2 | __description__ = "Implementation of X/Twitter v1, v2, and GraphQL APIs."
3 | __version__ = "0.10.22"
4 | __author__ = "Trevor Hobenshield"
5 | __license__ = "MIT"


--------------------------------------------------------------------------------
/twitter/account.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import hashlib
  3 | import logging.config
  4 | import math
  5 | import mimetypes
  6 | import platform
  7 | from copy import deepcopy
  8 | from datetime import datetime
  9 | from string import ascii_letters
 10 | from uuid import uuid1, getnode
 11 | 
 12 | from httpx import AsyncClient, Limits
 13 | from tqdm import tqdm
 14 | from tqdm.asyncio import tqdm_asyncio
 15 | 
 16 | from .constants import *
 17 | from .login import login
 18 | from .util import *
 19 | 
 20 | try:
 21 |     if get_ipython().__class__.__name__ == 'ZMQInteractiveShell':
 22 |         import nest_asyncio
 23 |         nest_asyncio.apply()
 24 | except:
 25 |     ...
 26 | 
 27 | if platform.system() != 'Windows':
 28 |     try:
 29 |         import uvloop
 30 |         uvloop.install()
 31 |     except ImportError as e:
 32 |         ...
 33 | 
 34 | 
 35 | class Account:
 36 | 
 37 |     def __init__(self, email: str = None, username: str = None, password: str = None, session: Client = None, **kwargs):
 38 |         self.save = kwargs.get('save', True)
 39 |         self.debug = kwargs.get('debug', 0)
 40 |         self.gql_api = 'https://twitter.com/i/api/graphql'
 41 |         self.v1_api = 'https://api.twitter.com/1.1'
 42 |         self.v2_api = 'https://twitter.com/i/api/2'
 43 |         self.logger = self._init_logger(**kwargs)
 44 |         self.session = self._validate_session(email, username, password, session, **kwargs)
 45 |         self.rate_limits = {}
 46 | 
 47 |     def gql(self, method: str, operation: tuple, variables: dict, features: dict = Operation.default_features) -> dict:
 48 |         qid, op = operation
 49 |         params = {
 50 |             'queryId': qid,
 51 |             'features': features,
 52 |             'variables': Operation.default_variables | variables
 53 |         }
 54 |         if method == 'POST':
 55 |             data = {'json': params}
 56 |         else:
 57 |             data = {'params': {k: orjson.dumps(v).decode() for k, v in params.items()}}
 58 |         r = self.session.request(
 59 |             method=method,
 60 |             url=f'{self.gql_api}/{qid}/{op}',
 61 |             headers=get_headers(self.session),
 62 |             **data
 63 |         )
 64 |         self.rate_limits[op] = {k: int(v) for k, v in r.headers.items() if 'rate-limit' in k}
 65 |         if self.debug:
 66 |             log(self.logger, self.debug, r)
 67 |         return r.json()
 68 | 
 69 |     def v1(self, path: str, params: dict) -> dict:
 70 |         headers = get_headers(self.session)
 71 |         headers['content-type'] = 'application/x-www-form-urlencoded'
 72 |         r = self.session.post(f'{self.v1_api}/{path}', headers=headers, data=urlencode(params))
 73 |         if self.debug:
 74 |             log(self.logger, self.debug, r)
 75 |         return r.json()
 76 | 
 77 |     def create_poll(self, text: str, choices: list[str], poll_duration: int) -> dict:
 78 |         options = {
 79 |             "twitter:card": "poll4choice_text_only",
 80 |             "twitter:api:api:endpoint": "1",
 81 |             "twitter:long:duration_minutes": poll_duration  # max: 10080
 82 |         }
 83 |         for i, c in enumerate(choices):
 84 |             options[f"twitter:string:choice{i + 1}_label"] = c
 85 | 
 86 |         headers = get_headers(self.session)
 87 |         headers['content-type'] = 'application/x-www-form-urlencoded'
 88 |         url = 'https://caps.twitter.com/v2/cards/create.json'
 89 |         r = self.session.post(url, headers=headers, params={'card_data': orjson.dumps(options).decode()})
 90 |         card_uri = r.json()['card_uri']
 91 |         r = self.tweet(text, poll_params={'card_uri': card_uri})
 92 |         return r
 93 | 
 94 |     def dm(self, text: str, receivers: list[int], media: str = '') -> dict:
 95 |         variables = {
 96 |             "message": {},
 97 |             "requestId": str(uuid1(getnode())),
 98 |             "target": {"participant_ids": receivers},
 99 |         }
100 |         if media:
101 |             media_id = self._upload_media(media, is_dm=True)
102 |             variables['message']['media'] = {'id': media_id, 'text': text}
103 |         else:
104 |             variables['message']['text'] = {'text': text}
105 |         res = self.gql('POST', Operation.useSendMessageMutation, variables)
106 |         if find_key(res, 'dm_validation_failure_type'):
107 |             if self.debug:
108 |                 self.logger.debug(f"{RED}Failed to send DM(s) to {receivers}{RESET}")
109 |         return res
110 | 
111 |     def tweet(self, text: str, *, media: any = None, **kwargs) -> dict:
112 |         variables = {
113 |             'tweet_text': text,
114 |             'dark_request': False,
115 |             'media': {
116 |                 'media_entities': [],
117 |                 'possibly_sensitive': False,
118 |             },
119 |             'semantic_annotation_ids': [],
120 |         }
121 | 
122 |         if reply_params := kwargs.get('reply_params', {}):
123 |             variables |= reply_params
124 |         if quote_params := kwargs.get('quote_params', {}):
125 |             variables |= quote_params
126 |         if poll_params := kwargs.get('poll_params', {}):
127 |             variables |= poll_params
128 | 
129 |         draft = kwargs.get('draft')
130 |         schedule = kwargs.get('schedule')
131 | 
132 |         if draft or schedule:
133 |             variables = {
134 |                 'post_tweet_request': {
135 |                     'auto_populate_reply_metadata': False,
136 |                     'status': text,
137 |                     'exclude_reply_user_ids': [],
138 |                     'media_ids': [],
139 |                 },
140 |             }
141 |             if media:
142 |                 for m in media:
143 |                     media_id = self._upload_media(m['media'])
144 |                     variables['post_tweet_request']['media_ids'].append(media_id)
145 |                     if alt := m.get('alt'):
146 |                         self._add_alt_text(media_id, alt)
147 | 
148 |             if schedule:
149 |                 variables['execute_at'] = (
150 |                     datetime.strptime(schedule, "%Y-%m-%d %H:%M").timestamp()
151 |                     if isinstance(schedule, str)
152 |                     else schedule
153 |                 )
154 |                 return self.gql('POST', Operation.CreateScheduledTweet, variables)
155 | 
156 |             return self.gql('POST', Operation.CreateDraftTweet, variables)
157 | 
158 |         # regular tweet
159 |         if media:
160 |             for m in media:
161 |                 media_id = self._upload_media(m['media'])
162 |                 variables['media']['media_entities'].append({
163 |                     'media_id': media_id,
164 |                     'tagged_users': m.get('tagged_users', [])
165 |                 })
166 |                 if alt := m.get('alt'):
167 |                     self._add_alt_text(media_id, alt)
168 | 
169 |         return self.gql('POST', Operation.CreateTweet, variables)
170 | 
171 |     def schedule_tweet(self, text: str, date: int | str, *, media: list = None) -> dict:
172 |         variables = {
173 |             'post_tweet_request': {
174 |                 'auto_populate_reply_metadata': False,
175 |                 'status': text,
176 |                 'exclude_reply_user_ids': [],
177 |                 'media_ids': [],
178 |             },
179 |             'execute_at': (
180 |                 datetime.strptime(date, "%Y-%m-%d %H:%M").timestamp()
181 |                 if isinstance(date, str)
182 |                 else date
183 |             ),
184 |         }
185 |         if media:
186 |             for m in media:
187 |                 media_id = self._upload_media(m['media'])
188 |                 variables['post_tweet_request']['media_ids'].append(media_id)
189 |                 if alt := m.get('alt'):
190 |                     self._add_alt_text(media_id, alt)
191 |         return self.gql('POST', Operation.CreateScheduledTweet, variables)
192 | 
193 |     def schedule_reply(self, text: str, date: int | str, tweet_id: int, *, media: list = None) -> dict:
194 |         variables = {
195 |             'post_tweet_request': {
196 |                 'auto_populate_reply_metadata': True,
197 |                 'in_reply_to_status_id': tweet_id,
198 |                 'status': text,
199 |                 'exclude_reply_user_ids': [],
200 |                 'media_ids': [],
201 |             },
202 |             'execute_at': (
203 |                 datetime.strptime(date, "%Y-%m-%d %H:%M").timestamp()
204 |                 if isinstance(date, str)
205 |                 else date
206 |             ),
207 |         }
208 |         if media:
209 |             for m in media:
210 |                 media_id = self._upload_media(m['media'])
211 |                 variables['post_tweet_request']['media_ids'].append(media_id)
212 |                 if alt := m.get('alt'):
213 |                     self._add_alt_text(media_id, alt)
214 |         return self.gql('POST', Operation.CreateScheduledTweet, variables)
215 | 
216 |     def unschedule_tweet(self, tweet_id: int) -> dict:
217 |         variables = {'scheduled_tweet_id': tweet_id}
218 |         return self.gql('POST', Operation.DeleteScheduledTweet, variables)
219 | 
220 |     def untweet(self, tweet_id: int) -> dict:
221 |         variables = {'tweet_id': tweet_id, 'dark_request': False}
222 |         return self.gql('POST', Operation.DeleteTweet, variables)
223 | 
224 |     def reply(self, text: str, tweet_id: int) -> dict:
225 |         variables = {
226 |             'tweet_text': text,
227 |             'reply': {
228 |                 'in_reply_to_tweet_id': tweet_id,
229 |                 'exclude_reply_user_ids': [],
230 |             },
231 |             'batch_compose': 'BatchSubsequent',
232 |             'dark_request': False,
233 |             'media': {
234 |                 'media_entities': [],
235 |                 'possibly_sensitive': False,
236 |             },
237 |             'semantic_annotation_ids': [],
238 |         }
239 |         return self.gql('POST', Operation.CreateTweet, variables)
240 | 
241 |     def quote(self, text: str, tweet_id: int) -> dict:
242 |         variables = {
243 |             'tweet_text': text,
244 |             # can use `i` as it resolves to screen_name
245 |             'attachment_url': f'https://twitter.com/i/status/{tweet_id}',
246 |             'dark_request': False,
247 |             'media': {
248 |                 'media_entities': [],
249 |                 'possibly_sensitive': False,
250 |             },
251 |             'semantic_annotation_ids': [],
252 |         }
253 |         return self.gql('POST', Operation.CreateTweet, variables)
254 | 
255 |     def retweet(self, tweet_id: int) -> dict:
256 |         variables = {"tweet_id": tweet_id, "dark_request": False}
257 |         return self.gql('POST', Operation.CreateRetweet, variables)
258 | 
259 |     def unretweet(self, tweet_id: int) -> dict:
260 |         variables = {"source_tweet_id": tweet_id, "dark_request": False}
261 |         return self.gql('POST', Operation.DeleteRetweet, variables)
262 | 
263 |     def like(self, tweet_id: int) -> dict:
264 |         variables = {'tweet_id': tweet_id}
265 |         return self.gql('POST', Operation.FavoriteTweet, variables)
266 | 
267 |     def unlike(self, tweet_id: int) -> dict:
268 |         variables = {'tweet_id': tweet_id}
269 |         return self.gql('POST', Operation.UnfavoriteTweet, variables)
270 | 
271 |     def bookmark(self, tweet_id: int) -> dict:
272 |         variables = {'tweet_id': tweet_id}
273 |         return self.gql('POST', Operation.CreateBookmark, variables)
274 | 
275 |     def unbookmark(self, tweet_id: int) -> dict:
276 |         variables = {'tweet_id': tweet_id}
277 |         return self.gql('POST', Operation.DeleteBookmark, variables)
278 | 
279 |     def create_list(self, name: str, description: str, private: bool) -> dict:
280 |         variables = {
281 |             "isPrivate": private,
282 |             "name": name,
283 |             "description": description,
284 |         }
285 |         return self.gql('POST', Operation.CreateList, variables)
286 | 
287 |     def update_list(self, list_id: int, name: str, description: str, private: bool) -> dict:
288 |         variables = {
289 |             "listId": list_id,
290 |             "isPrivate": private,
291 |             "name": name,
292 |             "description": description,
293 |         }
294 |         return self.gql('POST', Operation.UpdateList, variables)
295 | 
296 |     def update_pinned_lists(self, list_ids: list[int]) -> dict:
297 |         """
298 |         Update pinned lists.
299 |         Reset all pinned lists and pin all specified lists in the order they are provided.
300 | 
301 |         @param list_ids: list of list ids to pin
302 |         @return: response
303 |         """
304 |         return self.gql('POST', Operation.ListsPinMany, {'listIds': list_ids})
305 | 
306 |     def pin_list(self, list_id: int) -> dict:
307 |         return self.gql('POST', Operation.ListPinOne, {'listId': list_id})
308 | 
309 |     def unpin_list(self, list_id: int) -> dict:
310 |         return self.gql('POST', Operation.ListUnpinOne, {'listId': list_id})
311 | 
312 |     def add_list_member(self, list_id: int, user_id: int) -> dict:
313 |         return self.gql('POST', Operation.ListAddMember, {'listId': list_id, "userId": user_id})
314 | 
315 |     def remove_list_member(self, list_id: int, user_id: int) -> dict:
316 |         return self.gql('POST', Operation.ListRemoveMember, {'listId': list_id, "userId": user_id})
317 | 
318 |     def delete_list(self, list_id: int) -> dict:
319 |         return self.gql('POST', Operation.DeleteList, {'listId': list_id})
320 | 
321 |     def update_list_banner(self, list_id: int, media: str) -> dict:
322 |         media_id = self._upload_media(media)
323 |         variables = {'listId': list_id, 'mediaId': media_id}
324 |         return self.gql('POST', Operation.EditListBanner, variables)
325 | 
326 |     def delete_list_banner(self, list_id: int) -> dict:
327 |         return self.gql('POST', Operation.DeleteListBanner, {'listId': list_id})
328 | 
329 |     def follow_topic(self, topic_id: int) -> dict:
330 |         return self.gql('POST', Operation.TopicFollow, {'topicId': str(topic_id)})
331 | 
332 |     def unfollow_topic(self, topic_id: int) -> dict:
333 |         return self.gql('POST', Operation.TopicUnfollow, {'topicId': str(topic_id)})
334 | 
335 |     def pin(self, tweet_id: int) -> dict:
336 |         return self.v1('account/pin_tweet.json', {'tweet_mode': 'extended', 'id': tweet_id})
337 | 
338 |     def unpin(self, tweet_id: int) -> dict:
339 |         return self.v1('account/unpin_tweet.json', {'tweet_mode': 'extended', 'id': tweet_id})
340 | 
341 |     def follow(self, user_id: int) -> dict:
342 |         settings = deepcopy(follow_settings)
343 |         settings |= {"user_id": user_id}
344 |         return self.v1('friendships/create.json', settings)
345 | 
346 |     def unfollow(self, user_id: int) -> dict:
347 |         settings = deepcopy(follow_settings)
348 |         settings |= {"user_id": user_id}
349 |         return self.v1('friendships/destroy.json', settings)
350 | 
351 |     def mute(self, user_id: int) -> dict:
352 |         return self.v1('mutes/users/create.json', {'user_id': user_id})
353 | 
354 |     def unmute(self, user_id: int) -> dict:
355 |         return self.v1('mutes/users/destroy.json', {'user_id': user_id})
356 | 
357 |     def enable_follower_notifications(self, user_id: int) -> dict:
358 |         settings = deepcopy(follower_notification_settings)
359 |         settings |= {'id': user_id, 'device': 'true'}
360 |         return self.v1('friendships/update.json', settings)
361 | 
362 |     def disable_follower_notifications(self, user_id: int) -> dict:
363 |         settings = deepcopy(follower_notification_settings)
364 |         settings |= {'id': user_id, 'device': 'false'}
365 |         return self.v1('friendships/update.json', settings)
366 | 
367 |     def block(self, user_id: int) -> dict:
368 |         return self.v1('blocks/create.json', {'user_id': user_id})
369 | 
370 |     def unblock(self, user_id: int) -> dict:
371 |         return self.v1('blocks/destroy.json', {'user_id': user_id})
372 | 
373 |     def update_profile_image(self, media: str) -> Response:
374 |         media_id = self._upload_media(media, is_profile=True)
375 |         url = f'{self.v1_api}/account/update_profile_image.json'
376 |         headers = get_headers(self.session)
377 |         params = {'media_id': media_id}
378 |         r = self.session.post(url, headers=headers, params=params)
379 |         return r
380 | 
381 |     def update_profile_banner(self, media: str) -> Response:
382 |         media_id = self._upload_media(media, is_profile=True)
383 |         url = f'{self.v1_api}/account/update_profile_banner.json'
384 |         headers = get_headers(self.session)
385 |         params = {'media_id': media_id}
386 |         r = self.session.post(url, headers=headers, params=params)
387 |         return r
388 | 
389 |     def update_profile_info(self, **kwargs) -> Response:
390 |         url = f'{self.v1_api}/account/update_profile.json'
391 |         headers = get_headers(self.session)
392 |         r = self.session.post(url, headers=headers, params=kwargs)
393 |         return r
394 | 
395 |     def update_search_settings(self, settings: dict) -> Response:
396 |         twid = int(self.session.cookies.get('twid').split('=')[-1].strip('"'))
397 |         headers = get_headers(self.session)
398 |         r = self.session.post(
399 |             url=f'{self.v1_api}/strato/column/User/{twid}/search/searchSafety',
400 |             headers=headers,
401 |             json=settings,
402 |         )
403 |         return r
404 | 
405 |     def update_settings(self, settings: dict) -> dict:
406 |         return self.v1('account/settings.json', settings)
407 | 
408 |     def change_password(self, old: str, new: str) -> dict:
409 |         params = {
410 |             'current_password': old,
411 |             'password': new,
412 |             'password_confirmation': new
413 |         }
414 |         headers = get_headers(self.session)
415 |         headers['content-type'] = 'application/x-www-form-urlencoded'
416 |         url = 'https://twitter.com/i/api/i/account/change_password.json'
417 |         r = self.session.post(url, headers=headers, data=urlencode(params))
418 |         return r.json()
419 | 
420 |     def remove_interests(self, *args):
421 |         """
422 |         Pass 'all' to remove all interests
423 |         """
424 |         r = self.session.get(
425 |             f'{self.v1_api}/account/personalization/twitter_interests.json',
426 |             headers=get_headers(self.session)
427 |         )
428 |         current_interests = r.json()['interested_in']
429 |         if args == 'all':
430 |             disabled_interests = [x['id'] for x in current_interests]
431 |         else:
432 |             disabled_interests = [x['id'] for x in current_interests if x['display_name'] in args]
433 |         payload = {
434 |             "preferences": {
435 |                 "interest_preferences": {
436 |                     "disabled_interests": disabled_interests,
437 |                     "disabled_partner_interests": []
438 |                 }
439 |             }
440 |         }
441 |         r = self.session.post(
442 |             f'{self.v1_api}/account/personalization/p13n_preferences.json',
443 |             headers=get_headers(self.session),
444 |             json=payload
445 |         )
446 |         return r
447 | 
448 |     def home_timeline(self, limit=math.inf) -> list[dict]:
449 |         return self._paginate('POST', Operation.HomeTimeline, Operation.default_variables, limit)
450 | 
451 |     def home_latest_timeline(self, limit=math.inf) -> list[dict]:
452 |         return self._paginate('POST', Operation.HomeLatestTimeline, Operation.default_variables, limit)
453 | 
454 |     def bookmarks(self, limit=math.inf) -> list[dict]:
455 |         return self._paginate('GET', Operation.Bookmarks, {}, limit)
456 | 
457 |     def _paginate(self, method: str, operation: tuple, variables: dict, limit: int) -> list[dict]:
458 |         initial_data = self.gql(method, operation, variables)
459 |         res = [initial_data]
460 |         ids = set(find_key(initial_data, 'rest_id'))
461 |         dups = 0
462 |         DUP_LIMIT = 3
463 | 
464 |         cursor = get_cursor(initial_data)
465 |         while (dups < DUP_LIMIT) and cursor:
466 |             prev_len = len(ids)
467 |             if prev_len >= limit:
468 |                 return res
469 | 
470 |             variables['cursor'] = cursor
471 |             data = self.gql(method, operation, variables)
472 | 
473 |             cursor = get_cursor(data)
474 |             ids |= set(find_key(data, 'rest_id'))
475 | 
476 |             if self.debug:
477 |                 self.logger.debug(f'cursor: {cursor}\tunique results: {len(ids)}')
478 | 
479 |             if prev_len == len(ids):
480 |                 dups += 1
481 | 
482 |             res.append(data)
483 |         return res
484 | 
485 |     def _upload_media(self, filename: str, is_dm: bool = False, is_profile=False) -> int | None:
486 |         """
487 |         https://developer.twitter.com/en/docs/twitter-api/v1/media/upload-media/uploading-media/media-best-practices
488 |         """
489 | 
490 |         def check_media(category: str, size: int) -> None:
491 |             fmt = lambda x: f'{(x / 1e6):.2f} MB'
492 |             msg = lambda x: f'cannot upload {fmt(size)} {category}, max size is {fmt(x)}'
493 |             if category == 'image' and size > MAX_IMAGE_SIZE:
494 |                 raise Exception(msg(MAX_IMAGE_SIZE))
495 |             if category == 'gif' and size > MAX_GIF_SIZE:
496 |                 raise Exception(msg(MAX_GIF_SIZE))
497 |             if category == 'video' and size > MAX_VIDEO_SIZE:
498 |                 raise Exception(msg(MAX_VIDEO_SIZE))
499 | 
500 |         # if is_profile:
501 |         #     url = 'https://upload.twitter.com/i/media/upload.json'
502 |         # else:
503 |         #     url = 'https://upload.twitter.com/1.1/media/upload.json'
504 | 
505 |         url = 'https://upload.twitter.com/i/media/upload.json'
506 | 
507 |         file = Path(filename)
508 |         total_bytes = file.stat().st_size
509 |         headers = get_headers(self.session)
510 | 
511 |         upload_type = 'dm' if is_dm else 'tweet'
512 |         media_type = mimetypes.guess_type(file)[0]
513 |         media_category = f'{upload_type}_gif' if 'gif' in media_type else f'{upload_type}_{media_type.split("/")[0]}'
514 | 
515 |         check_media(media_category, total_bytes)
516 | 
517 |         params = {'command': 'INIT', 'media_type': media_type, 'total_bytes': total_bytes,
518 |                   'media_category': media_category}
519 |         r = self.session.post(url=url, headers=headers, params=params)
520 | 
521 |         if r.status_code >= 400:
522 |             raise Exception(f'{r.text}')
523 | 
524 |         media_id = r.json()['media_id']
525 | 
526 |         desc = f"uploading: {file.name}"
527 |         with tqdm(total=total_bytes, desc=desc, unit='B', unit_scale=True, unit_divisor=1024) as pbar:
528 |             with open(file, 'rb') as fp:
529 |                 i = 0
530 |                 while chunk := fp.read(UPLOAD_CHUNK_SIZE):
531 |                     params = {'command': 'APPEND', 'media_id': media_id, 'segment_index': i}
532 |                     try:
533 |                         pad = bytes(''.join(random.choices(ascii_letters, k=16)), encoding='utf-8')
534 |                         data = b''.join([
535 |                             b'------WebKitFormBoundary',
536 |                             pad,
537 |                             b'\r\nContent-Disposition: form-data; name="media"; filename="blob"',
538 |                             b'\r\nContent-Type: application/octet-stream',
539 |                             b'\r\n\r\n',
540 |                             chunk,
541 |                             b'\r\n------WebKitFormBoundary',
542 |                             pad,
543 |                             b'--\r\n',
544 |                         ])
545 |                         _headers = {b'content-type': b'multipart/form-data; boundary=----WebKitFormBoundary' + pad}
546 |                         r = self.session.post(url=url, headers=headers | _headers, params=params, content=data)
547 |                     except Exception as e:
548 |                         if self.debug:
549 |                             self.logger.error(f'Failed to upload chunk, trying alternative method\n{e}')
550 |                         try:
551 |                             files = {'media': chunk}
552 |                             r = self.session.post(url=url, headers=headers, params=params, files=files)
553 |                         except Exception as e:
554 |                             if self.debug:
555 |                                 self.logger.error(f'Failed to upload chunk\n{e}')
556 |                             return
557 | 
558 |                     if r.status_code < 200 or r.status_code > 299:
559 |                         if self.debug:
560 |                             self.logger.debug(f'{RED}{r.status_code} {r.text}{RESET}')
561 | 
562 |                     i += 1
563 |                     pbar.update(fp.tell() - pbar.n)
564 | 
565 |         params = {'command': 'FINALIZE', 'media_id': media_id, 'allow_async': 'true'}
566 |         if is_dm:
567 |             params |= {'original_md5': hashlib.md5(file.read_bytes()).hexdigest()}
568 |         r = self.session.post(url=url, headers=headers, params=params)
569 |         if r.status_code == 400:
570 |             if self.debug:
571 |                 self.logger.debug(f'{RED}{r.status_code} {r.text}{RESET}')
572 |             return
573 | 
574 |         # self.logger.debug(f'processing, please wait...')
575 |         processing_info = r.json().get('processing_info')
576 |         while processing_info:
577 |             state = processing_info['state']
578 |             if error := processing_info.get("error"):
579 |                 if self.debug:
580 |                     self.logger.debug(f'{RED}{error}{RESET}')
581 |                 return
582 |             if state == MEDIA_UPLOAD_SUCCEED:
583 |                 break
584 |             if state == MEDIA_UPLOAD_FAIL:
585 |                 if self.debug:
586 |                     self.logger.debug(f'{RED}{r.status_code} {r.text} {RESET}')
587 |                 return
588 |             check_after_secs = processing_info.get('check_after_secs', random.randint(1, 5))
589 |             time.sleep(check_after_secs)
590 |             params = {'command': 'STATUS', 'media_id': media_id}
591 |             r = self.session.get(url=url, headers=headers, params=params)
592 |             processing_info = r.json().get('processing_info')
593 |         # self.logger.debug('processing complete')
594 |         return media_id
595 | 
596 |     def _add_alt_text(self, media_id: int, text: str) -> Response:
597 |         params = {"media_id": media_id, "alt_text": {"text": text}}
598 |         url = f'{self.v1_api}/media/metadata/create.json'
599 |         r = self.session.post(url, headers=get_headers(self.session), json=params)
600 |         return r
601 | 
602 |     def _init_logger(self, **kwargs) -> Logger:
603 |         if kwargs.get('debug'):
604 |             cfg = kwargs.get('log_config')
605 |             logging.config.dictConfig(cfg or LOG_CONFIG)
606 | 
607 |             # only support one logger
608 |             logger_name = list(LOG_CONFIG['loggers'].keys())[0]
609 | 
610 |             # set level of all other loggers to ERROR
611 |             for name in logging.root.manager.loggerDict:
612 |                 if name != logger_name:
613 |                     logging.getLogger(name).setLevel(logging.ERROR)
614 | 
615 |             return logging.getLogger(logger_name)
616 | 
617 |     @staticmethod
618 |     def _validate_session(*args, **kwargs):
619 |         email, username, password, session = args
620 | 
621 |         # validate credentials
622 |         if all((email, username, password)):
623 |             session = login(email, username, password, **kwargs)
624 |             session._init_with_cookies = False
625 |             return session
626 | 
627 |         # invalid credentials, try validating session
628 |         if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}):
629 |             session._init_with_cookies = True
630 |             return session
631 | 
632 |         # invalid credentials and session
633 |         cookies = kwargs.get('cookies')
634 | 
635 |         # try validating cookies dict
636 |         if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}):
637 |             _session = Client(cookies=cookies, follow_redirects=True)
638 |             _session._init_with_cookies = True
639 |             _session.headers.update(get_headers(_session))
640 |             return _session
641 | 
642 |         # try validating cookies from file
643 |         if isinstance(cookies, str):
644 |             _session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True)
645 |             _session._init_with_cookies = True
646 |             _session.headers.update(get_headers(_session))
647 |             return _session
648 | 
649 |         raise Exception('Session not authenticated. '
650 |                         'Please use an authenticated session or remove the `session` argument and try again.')
651 | 
652 |     def dm_inbox(self) -> dict:
653 |         """
654 |         Get DM inbox metadata.
655 | 
656 |         @return: inbox as dict
657 |         """
658 |         r = self.session.get(
659 |             f'{self.v1_api}/dm/inbox_initial_state.json',
660 |             headers=get_headers(self.session),
661 |             params=dm_params
662 |         )
663 |         return r.json()
664 | 
665 |     def dm_history(self, conversation_ids: list[str] = None) -> list[dict]:
666 |         """
667 |         Get DM history.
668 | 
669 |         Call without arguments to get all DMS from all conversations.
670 | 
671 |         @param conversation_ids: optional list of conversation ids
672 |         @return: list of messages as dicts
673 |         """
674 | 
675 |         async def get(session: AsyncClient, conversation_id: str):
676 |             params = deepcopy(dm_params)
677 |             r = await session.get(
678 |                 f'{self.v1_api}/dm/conversation/{conversation_id}.json',
679 |                 params=params,
680 |             )
681 |             res = r.json().get('conversation_timeline', {})
682 |             data = [x.get('message') for x in res.get('entries', [])]
683 |             entry_id = res.get('min_entry_id')
684 |             while entry_id:
685 |                 params['max_id'] = entry_id
686 |                 r = await session.get(
687 |                     f'{self.v1_api}/dm/conversation/{conversation_id}.json',
688 |                     params=params,
689 |                 )
690 |                 res = r.json().get('conversation_timeline', {})
691 |                 data.extend(x['message'] for x in res.get('entries', []))
692 |                 entry_id = res.get('min_entry_id')
693 |             return data
694 | 
695 |         async def process(ids):
696 |             limits = Limits(max_connections=100)
697 |             headers, cookies = get_headers(self.session), self.session.cookies
698 |             async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c:
699 |                 return await tqdm_asyncio.gather(*(get(c, _id) for _id in ids), desc="Getting DMs")
700 | 
701 |         if conversation_ids:
702 |             ids = conversation_ids
703 |         else:
704 |             # get all conversations
705 |             inbox = self.dm_inbox()
706 |             ids = list(inbox['inbox_initial_state']['conversations'])
707 | 
708 |         return asyncio.run(process(ids))
709 | 
710 |     def dm_delete(self, *, conversation_id: str = None, message_id: str = None) -> dict:
711 |         """
712 |         Delete operations
713 | 
714 |         - delete (hide) a single DM
715 |         - delete an entire conversation
716 | 
717 |         @param conversation_id: the conversation id
718 |         @param message_id: the message id
719 |         @return: result metadata
720 |         """
721 |         self.session.headers.update(headers=get_headers(self.session))
722 |         results = {'conversation': None, 'message': None}
723 |         if conversation_id:
724 |             results['conversation'] = self.session.post(
725 |                 f'{self.v1_api}/dm/conversation/{conversation_id}/delete.json',
726 |             ).text  # not json response
727 |         if message_id:
728 |             # delete single message
729 |             _id, op = Operation.DMMessageDeleteMutation
730 |             results['message'] = self.session.post(
731 |                 f'{self.gql_api}/{_id}/{op}',
732 |                 json={'queryId': _id, 'variables': {'messageId': message_id}},
733 |             ).json()
734 |         return results
735 | 
736 |     def dm_search(self, query: str) -> dict:
737 |         """
738 |         Search DMs by keyword
739 | 
740 |         @param query: search term
741 |         @return: search results as dict
742 |         """
743 | 
744 |         def get(cursor=None):
745 |             if cursor:
746 |                 params['variables']['cursor'] = cursor.pop()
747 |             _id, op = Operation.DmAllSearchSlice
748 |             r = self.session.get(
749 |                 f'{self.gql_api}/{_id}/{op}',
750 |                 params=build_params(params),
751 |             )
752 |             res = r.json()
753 |             cursor = find_key(res, 'next_cursor')
754 |             return res, cursor
755 | 
756 |         self.session.headers.update(headers=get_headers(self.session))
757 |         variables = deepcopy(Operation.default_variables)
758 |         variables['count'] = 50  # strict limit, errors thrown if exceeded
759 |         variables['query'] = query
760 |         params = {'variables': variables, 'features': Operation.default_features}
761 |         res, cursor = get()
762 |         data = [res]
763 |         while cursor:
764 |             res, cursor = get(cursor)
765 |             data.append(res)
766 |         return {'query': query, 'data': data}
767 | 
768 |     def scheduled_tweets(self, ascending: bool = True) -> dict:
769 |         variables = {"ascending": ascending}
770 |         return self.gql('GET', Operation.FetchScheduledTweets, variables)
771 | 
772 |     def delete_scheduled_tweet(self, tweet_id: int) -> dict:
773 |         """duplicate, same as `unschedule_tweet()`"""
774 |         variables = {'scheduled_tweet_id': tweet_id}
775 |         return self.gql('POST', Operation.DeleteScheduledTweet, variables)
776 | 
777 |     def clear_scheduled_tweets(self) -> None:
778 |         user_id = int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0])
779 |         drafts = self.gql('GET', Operation.FetchScheduledTweets, {"ascending": True})
780 |         for _id in set(find_key(drafts, 'rest_id')):
781 |             if _id != user_id:
782 |                 self.gql('POST', Operation.DeleteScheduledTweet, {'scheduled_tweet_id': _id})
783 | 
784 |     def draft_tweets(self, ascending: bool = True) -> dict:
785 |         variables = {"ascending": ascending}
786 |         return self.gql('GET', Operation.FetchDraftTweets, variables)
787 | 
788 |     def delete_draft_tweet(self, tweet_id: int) -> dict:
789 |         variables = {'draft_tweet_id': tweet_id}
790 |         return self.gql('POST', Operation.DeleteDraftTweet, variables)
791 | 
792 |     def clear_draft_tweets(self) -> None:
793 |         user_id = int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0])
794 |         drafts = self.gql('GET', Operation.FetchDraftTweets, {"ascending": True})
795 |         for _id in set(find_key(drafts, 'rest_id')):
796 |             if _id != user_id:
797 |                 self.gql('POST', Operation.DeleteDraftTweet, {'draft_tweet_id': _id})
798 | 
799 |     def notifications(self, params: dict = None) -> dict:
800 |         r = self.session.get(
801 |             f'{self.v2_api}/notifications/all.json',
802 |             headers=get_headers(self.session),
803 |             params=params or live_notification_params
804 |         )
805 |         if self.debug:
806 |             log(self.logger, self.debug, r)
807 |         return r.json()
808 | 
809 |     def recommendations(self, params: dict = None) -> dict:
810 |         r = self.session.get(
811 |             f'{self.v1_api}/users/recommendations.json',
812 |             headers=get_headers(self.session),
813 |             params=params or recommendations_params
814 |         )
815 |         if self.debug:
816 |             log(self.logger, self.debug, r)
817 |         return r.json()
818 | 
819 |     def fleetline(self, params: dict = None) -> dict:
820 |         r = self.session.get(
821 |             'https://twitter.com/i/api/fleets/v1/fleetline',
822 |             headers=get_headers(self.session),
823 |             params=params or {}
824 |         )
825 |         if self.debug:
826 |             log(self.logger, self.debug, r)
827 |         return r.json()
828 | 
829 |     @property
830 |     def id(self) -> int:
831 |         """ Get User ID """
832 |         return int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0])
833 | 
834 |     def save_cookies(self, fname: str = None):
835 |         """ Save cookies to file """
836 |         cookies = self.session.cookies
837 |         Path(f'{fname or cookies.get("username")}.cookies').write_bytes(orjson.dumps(dict(cookies)))
838 | 


--------------------------------------------------------------------------------
/twitter/constants.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | 
  3 | # todo: not accurate measure. value will decrease as new gql features/variables are required. (actual limitation is request size, i.e. new gql features an variables contribute to total request size)
  4 | MAX_GQL_CHAR_LIMIT = 4_200
  5 | 
  6 | MAX_ENDPOINT_LIMIT = 500  # 500/15 mins
  7 | 
  8 | MAX_IMAGE_SIZE = 5_242_880  # ~5 MB
  9 | MAX_GIF_SIZE = 15_728_640  # ~15 MB
 10 | MAX_VIDEO_SIZE = 536_870_912  # ~530 MB
 11 | 
 12 | UPLOAD_CHUNK_SIZE = 4 * 1024 * 1024
 13 | MEDIA_UPLOAD_SUCCEED = 'succeeded'
 14 | MEDIA_UPLOAD_FAIL = 'failed'
 15 | 
 16 | BLACK = '\x1b[30m'
 17 | RED = '\x1b[31m'
 18 | GREEN = '\x1b[32m'
 19 | YELLOW = '\x1b[33m'
 20 | ORANGE = '\x1b[38;5;208m'
 21 | BLUE = '\x1b[34m'
 22 | MAGENTA = '\x1b[35m'
 23 | CYAN = '\x1b[36m'
 24 | WHITE = '\x1b[37m'
 25 | BOLD = '\x1b[1m'
 26 | RESET = '\x1b[0m'
 27 | 
 28 | LOG_CONFIG = {
 29 |     'version': 1,
 30 |     'disable_existing_loggers': False,
 31 |     'formatters': {
 32 |         'standard': {
 33 |             'format': '%(asctime)s.%(msecs)03d [%(levelname)s] :: %(message)s',
 34 |             'datefmt': '%Y-%m-%d %H:%M:%S'
 35 |         },
 36 |     },
 37 |     'handlers': {
 38 |         'console': {
 39 |             'class': 'logging.StreamHandler',
 40 |             'level': 'DEBUG',
 41 |             'formatter': 'standard',
 42 |             'stream': 'ext://sys.stdout',
 43 |         },
 44 |         'file': {
 45 |             'class': 'logging.FileHandler',
 46 |             'level': 'DEBUG',
 47 |             'formatter': 'standard',
 48 |             'filename': 'twitter.log',
 49 |             'mode': 'a',
 50 |         },
 51 |     },
 52 |     'loggers': {
 53 |         'twitter': {
 54 |             'handlers': ['console', 'file'],
 55 |             'level': 'DEBUG',
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | USER_AGENTS = [
 61 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
 62 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.3',
 63 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0',
 64 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.20',
 65 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.3',
 66 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
 67 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.2 Safari/605.1.15',
 68 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:109.0) Gecko/20100101 Firefox/116.0',
 69 |     'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5.1 Safari/605.1.15',
 70 | ]
 71 | 
 72 | 
 73 | @dataclass
 74 | class SearchCategory:
 75 |     Top = 'Top'
 76 |     Latest = 'Latest'
 77 |     People = 'People'
 78 |     Photos = 'Photos'
 79 |     Videos = 'Videos'
 80 | 
 81 | 
 82 | @dataclass
 83 | class SpaceCategory:
 84 |     Top = 'Top'
 85 |     Live = 'Live'
 86 |     Upcoming = 'Upcoming'
 87 | 
 88 | 
 89 | @dataclass
 90 | class SpaceState:
 91 |     Ended = 'Ended'
 92 |     Canceled = 'Canceled'
 93 |     NotStarted = 'NotStarted'
 94 |     PrePublished = 'PrePublished'
 95 |     Running = 'Running'
 96 |     TimedOut = 'TimedOut'
 97 | 
 98 | 
 99 | @dataclass
100 | class Operation:
101 |     # todo: dynamically update
102 |     SearchTimeline = {'rawQuery': str, 'product': str}, 'nK1dw4oV3k4w5TdtcAdSww', 'SearchTimeline'
103 |     AudioSpaceById = {'id': str}, 'fYAuJHiY3TmYdBmrRtIKhA', 'AudioSpaceById'
104 |     AudioSpaceSearch = {'filter': str, 'query': str}, 'NTq79TuSz6fHj8lQaferJw', 'AudioSpaceSearch',
105 |     UserByScreenName = {'screen_name': str}, 'sLVLhk0bGj3MVFEKTdax1w', 'UserByScreenName'
106 |     UserTweets = {'userId': int}, 'HuTx74BxAnezK1gWvYY7zg', 'UserTweets'
107 |     ProfileSpotlightsQuery = {'screen_name': str}, '9zwVLJ48lmVUk8u_Gh9DmA', 'ProfileSpotlightsQuery'
108 |     UserByRestId = {'userId': int}, 'GazOglcBvgLigl3ywt6b3Q', 'UserByRestId'
109 |     UsersByRestIds = {'userIds': list}, 'OJBgJQIrij6e3cjqQ3Zu1Q', 'UsersByRestIds'
110 |     UserMedia = {'userId': int}, 'YqiE3JL1KNgf9nSljYdxaA', 'UserMedia'
111 |     UserTweetsAndReplies = {'userId': int}, 'RIWc55YCNyUJ-U3HHGYkdg', 'UserTweetsAndReplies'
112 |     TweetResultByRestId = {'tweetId': int}, 'D_jNhjWZeRZT5NURzfJZSQ', 'TweetResultByRestId'
113 |     TweetResultsByRestIds = {'tweetIds': list[int | str]}, 'BWy5aoI-WvwbeSiHUIf2Hw', 'TweetResultsByRestIds'
114 |     TweetDetail = {'focalTweetId': int}, 'zXaXQgfyR4GxE21uwYQSyA', 'TweetDetail'
115 |     TweetStats = {'rest_id': int}, 'EvbTkPDT-xQCfupPu0rWMA', 'TweetStats'
116 |     Likes = {'userId': int}, 'nXEl0lfN_XSznVMlprThgQ', 'Likes'
117 |     Followers = {'userId': int}, 'pd8Tt1qUz1YWrICegqZ8cw', 'Followers'
118 |     Following = {'userId': int}, 'wjvx62Hye2dGVvnvVco0xA', 'Following'
119 |     Retweeters = {'tweetId': int}, '0BoJlKAxoNPQUHRftlwZ2w', 'Retweeters'
120 |     Favoriters = {'tweetId': int}, 'XRRjv1-uj1HZn3o324etOQ', 'Favoriters'
121 |     ConnectTabTimeline = {'context': dict}, 'lq02A-gEzbLefqTgD_PFzQ', 'ConnectTabTimeline'
122 | 
123 |     # Account Operations
124 |     useSendMessageMutation = 'MaxK2PKX1F9Z-9SwqwavTw', 'useSendMessageMutation'
125 |     CreateTweet = '7TKRKCPuAGsmYde0CudbVg', 'CreateTweet'
126 |     DeleteTweet = 'VaenaVgh5q5ih7kvyVjgtg', 'DeleteTweet'
127 |     CreateScheduledTweet = 'LCVzRQGxOaGnOnYH01NQXg', 'CreateScheduledTweet'
128 |     DeleteScheduledTweet = 'CTOVqej0JBXAZSwkp1US0g', 'DeleteScheduledTweet'
129 |     CreateRetweet = 'ojPdsZsimiJrUGLR1sjUtA', 'CreateRetweet'
130 |     DeleteRetweet = 'iQtK4dl5hBmXewYZuEOKVw', 'DeleteRetweet'
131 |     FavoriteTweet = 'lI07N6Otwv1PhnEgXILM7A', 'FavoriteTweet'
132 |     UnfavoriteTweet = 'ZYKSe-w7KEslx3JhSIk5LA', 'UnfavoriteTweet'
133 |     CreateBookmark = 'aoDbu3RHznuiSkQ9aNM67Q', 'CreateBookmark'
134 |     DeleteBookmark = 'Wlmlj2-xzyS1GN3a6cj-mQ', 'DeleteBookmark'
135 |     CreateList = 'hQAsnViq2BrMLbPuQ9umDA', 'CreateList'
136 |     UpdateList = '4dCEFWtxEbhnSLcJdJ6PNg', 'UpdateList'
137 |     ListsPinMany = '2X4Vqu6XLneR-XZnGK5MAw', 'ListsPinMany'
138 |     ListPinOne = '2pYlo-kjdXoNOZJoLzI6KA', 'ListPinOne'
139 |     ListUnpinOne = 'c4ce-hzx6V4heV5IzdeBkA', 'ListUnpinOne'
140 |     ListAddMember = 'P8tyfv2_0HzofrB5f6_ugw', 'ListAddMember'
141 |     ListRemoveMember = 'DBZowzFN492FFkBPBptCwg', 'ListRemoveMember'
142 |     DeleteList = 'UnN9Th1BDbeLjpgjGSpL3Q', 'DeleteList'
143 |     EditListBanner = 'Uk0ZwKSMYng56aQdeJD1yw', 'EditListBanner'
144 |     DeleteListBanner = '-bOKetDVCMl20qXn7YDXIA', 'DeleteListBanner'
145 |     TopicFollow = 'ElqSLWFmsPL4NlZI5e1Grg', 'TopicFollow'
146 |     TopicUnfollow = 'srwjU6JM_ZKTj_QMfUGNcw', 'TopicUnfollow'
147 |     HomeLatestTimeline = 'zhX91JE87mWvfprhYE97xA', 'HomeLatestTimeline'
148 |     HomeTimeline = 'HCosKfLNW1AcOo3la3mMgg', 'HomeTimeline'
149 |     Bookmarks = 'tmd4ifV8RHltzn8ymGg1aw', 'Bookmarks'
150 | 
151 |     # misc/not implemented
152 |     AdAccounts = 'a8KxGfFQAmm3WxqemuqSRA', 'AdAccounts'
153 |     ArticleTimeline = 'o9FyvnC-xg8mVBXqL4g-rg', 'ArticleTimeline'
154 |     ArticleTweetsTimeline = 'x4ywSpvg6BesoDszkfbFQg', 'ArticleTweetsTimeline'
155 |     AudienceEstimate = '1LYVUabJBYkPlUAWRabB3g', 'AudienceEstimate'
156 |     AuthenticatedUserTFLists = 'QjN8ZdavFDqxUjNn3r9cig', 'AuthenticatedUserTFLists'
157 |     BirdwatchAliasSelect = '3ss48WFwGokBH_gj8t_8aQ', 'BirdwatchAliasSelect'
158 |     BirdwatchCreateAppeal = 'TKdL0YFsX4DMOpMKeneLvA', 'BirdwatchCreateAppeal'
159 |     BirdwatchCreateNote = '36EUZZyaciVmNrq4CRZcmw', 'BirdwatchCreateNote'
160 |     BirdwatchCreateRating = 'bD3AEK9BMCSpRods_ng2fA', 'BirdwatchCreateRating'
161 |     BirdwatchDeleteNote = 'IKS_qrShkDyor6Ri1ahd9g', 'BirdwatchDeleteNote'
162 |     BirdwatchDeleteRating = 'OpvCOyOoQClUND66zDzrnA', 'BirdwatchDeleteRating'
163 |     BirdwatchEditNotificationSettings = 'FLgLReVIssXjB_ui3wcrRQ', 'BirdwatchEditNotificationSettings'
164 |     BirdwatchFetchAliasSelfSelectOptions = 'szoXMke8AZOErso908iglw', 'BirdwatchFetchAliasSelfSelectOptions'
165 |     BirdwatchFetchAliasSelfSelectStatus = 'LUEdtkcpBlGktUtms4BvwA', 'BirdwatchFetchAliasSelfSelectStatus'
166 |     BirdwatchFetchAuthenticatedUserProfile = 'pMbW6Y4LuS5MzlSOEqERJQ', 'BirdwatchFetchAuthenticatedUserProfile'
167 |     BirdwatchFetchBirdwatchProfile = 'btgGtchypc3D491MJ7XXWA', 'BirdwatchFetchBirdwatchProfile'
168 |     BirdwatchFetchContributorNotesSlice = 't6r3Wq7wripUW9gB3FQNBw', 'BirdwatchFetchContributorNotesSlice'
169 |     BirdwatchFetchGlobalTimeline = 'L3LftPt6fhYqoQ5Vnxm7UQ', 'BirdwatchFetchGlobalTimeline'
170 |     BirdwatchFetchNotes = 'ZGMhf1M7kPKMOhEk1nz0Yw', 'BirdwatchFetchNotes'
171 |     BirdwatchFetchOneNote = 'GO8BR2MM2WZB63cdOoC7lw', 'BirdwatchFetchOneNote'
172 |     BirdwatchFetchPublicData = '9bDdJ6AL26RLkcUShEcF-A', 'BirdwatchFetchPublicData'
173 |     BirdwatchProfileAcknowledgeEarnOut = 'cED9wJy8Nd1kZCCYuIq9zQ', 'BirdwatchProfileAcknowledgeEarnOut'
174 |     BizProfileFetchUser = '6OFpJ3TH3p8JpwOSgfgyhg', 'BizProfileFetchUser'
175 |     BlockedAccountsAll = 'h52d1F7dumWGE1tJAhQBpg', 'BlockedAccountsAll'
176 |     BlockedAccountsAutoBlock = '8w-D2OhT0jmGzXaNY--UQA', 'BlockedAccountsAutoBlock'
177 |     BlockedAccountsImported = '8LDNeOEm0kA98uoDsqXvMg', 'BlockedAccountsImported'
178 |     BookmarkFolderTimeline = '13H7EUATwethsj-XxX5ohw', 'BookmarkFolderTimeline'
179 |     BookmarkFoldersSlice = 'i78YDd0Tza-dV4SYs58kRg', 'BookmarkFoldersSlice'
180 |     BookmarksAllDelete = 'skiACZKC1GDYli-M8RzEPQ', 'BookmarksAllDelete'
181 |     Budgets = 'mbK3oSQotwcJXyQIBE3uYw', 'Budgets'
182 |     CardPreviewByTweetText = 'jnwTSDR-Eo_HWlSkXPcMGA', 'CardPreviewByTweetText'
183 |     CheckTweetForNudge = 'C2dcvh7H69JALtomErxWlA', 'CheckTweetForNudge'
184 |     CombinedLists = 'rIxum3avpCu7APi7mxTNjw', 'CombinedLists'
185 |     CommunitiesMainDiscoveryModule = '8UB2fhB8TiYIW2M6vbBFXg', 'CommunitiesMainDiscoveryModule'
186 |     CommunitiesMainPageTimeline = 'DzcxPzkGYVQk-BD0pqAcZw', 'CommunitiesMainPageTimeline'
187 |     CommunitiesMembershipsSlice = 's8-oxdVsoJ3w2CFD0nFt9g', 'CommunitiesMembershipsSlice'
188 |     CommunitiesMembershipsTimeline = 'QXo-eKTsvhpCyFotNz2u6g', 'CommunitiesMembershipsTimeline'
189 |     CommunityAboutTimeline = 'plOgdpBzpVVQbTOEVuRc_A', 'CommunityAboutTimeline'
190 |     CommunityByRestId = 'bCVwRBDPi15jrdJQ7NCENQ', 'CommunityByRestId'
191 |     CommunityCreateRule = 'dShPoN6voXRusgxC1uvGog', 'CommunityCreateRule'
192 |     CommunityDiscoveryTimeline = 'b3rceNUXWRyo5mSwVZF74Q', 'CommunityDiscoveryTimeline'
193 |     CommunityEditBannerMedia = 'KVkZwp8Q6xy6iyhlQE5d7Q', 'CommunityEditBannerMedia'
194 |     CommunityEditName = 'SKToKhvm3Z4Rir8ENCJ3YQ', 'CommunityEditName'
195 |     CommunityEditPurpose = 'eMat-u2kx6KocreGTAt-hA', 'CommunityEditPurpose'
196 |     CommunityEditRule = '9nEl5bNcdteuPGbGCdvEFA', 'CommunityEditRule'
197 |     CommunityEditTheme = '4OhW6gWJwiu-JTAgBPsU1w', 'CommunityEditTheme'
198 |     CommunityHashtagsTimeline = 'hril1TsnshopHbmnjdUmhQ', 'CommunityHashtagsTimeline'
199 |     CommunityMemberRelationshipTypeahead = 'NEwac2-8ONgf0756ne8oXA', 'CommunityMemberRelationshipTypeahead'
200 |     CommunityModerationKeepTweet = 'f_YqrHSCc1mPlG-aB7pFRw', 'CommunityModerationKeepTweet'
201 |     CommunityModerationTweetCasesSlice = 'V-iC7tjWOlzBJ44SanqGzw', 'CommunityModerationTweetCasesSlice'
202 |     CommunityRemoveBannerMedia = 'lSdK1v30qVhm37rDTgHq0Q', 'CommunityRemoveBannerMedia'
203 |     CommunityRemoveRule = 'EI_g43Ss_Ixg0EC4K7nzlQ', 'CommunityRemoveRule'
204 |     CommunityReorderRules = 'VwluNMGnl5uaNZ3LnlCQ_A', 'CommunityReorderRules'
205 |     CommunityTweetsRankedTimeline = 'P38EspBBPhAfSKPP74-s2Q', 'CommunityTweetsRankedTimeline'
206 |     CommunityTweetsTimeline = '2JgHOlqfeLusxAT0yGQJjg', 'CommunityTweetsTimeline'
207 |     CommunityUpdateRole = '5eq76kkUqfdCzInCtcxQOA', 'CommunityUpdateRole'
208 |     CommunityUserInvite = 'x8hUNaBCOV2tSalqB9cwWQ', 'CommunityUserInvite'
209 |     CommunityUserRelationshipTypeahead = 'gi_UGcUurYp6N6p2BaLJqQ', 'CommunityUserRelationshipTypeahead'
210 |     ConversationControlChange = 'hb1elGcj6769uT8qVYqtjw', 'ConversationControlChange'
211 |     ConversationControlDelete = 'OoMO_aSZ1ZXjegeamF9QmA', 'ConversationControlDelete'
212 |     ConvertRitoSuggestedActions = '2njnYoE69O2jdUM7KMEnDw', 'ConvertRitoSuggestedActions'
213 |     Coupons = 'R1h43jnAl2bsDoUkgZb7NQ', 'Coupons'
214 |     CreateCommunity = 'lRjZKTRcWuqwtYwCWGy9_w', 'CreateCommunity'
215 |     CreateCustomerPortalSession = '2LHXrd1uYeaMWhciZgPZFw', 'CreateCustomerPortalSession'
216 |     CreateDraftTweet = 'cH9HZWz_EW9gnswvA4ZRiQ', 'CreateDraftTweet'
217 |     CreateNoteTweet = 'Pyx6nga4XtTVhfTh1gtX1A', 'CreateNoteTweet'
218 |     CreateQuickPromotion = 'oDSoVgHhJxnd5IkckgPZdg', 'CreateQuickPromotion'
219 |     CreateTrustedFriendsList = '2tP8XUYeLHKjq5RHvuvpZw', 'CreateTrustedFriendsList'
220 |     CreateTweetDownvote = 'Eo65jl-gww30avDgrXvhUA', 'CreateTweetDownvote'
221 |     CreateTweetReaction = 'D7M6X3h4-mJE8UB1Ap3_dQ', 'CreateTweetReaction'
222 |     DataSaverMode = 'xF6sXnKJfS2AOylzxRjf6A', 'DataSaverMode'
223 |     DeleteBookmarkFolder = '2UTTsO-6zs93XqlEUZPsSg', 'DeleteBookmarkFolder'
224 |     DeleteDraftTweet = 'bkh9G3FGgTldS9iTKWWYYw', 'DeleteDraftTweet'
225 |     DeletePaymentMethod = 'VaaLGwK5KNLoc7wsOmp4uw', 'DeletePaymentMethod'
226 |     DeleteTweetDownvote = 'VNEvEGXaUAMfiExP8Tbezw', 'DeleteTweetDownvote'
227 |     DeleteTweetReaction = 'GKwK0Rj4EdkfwdHQMZTpuw', 'DeleteTweetReaction'
228 |     DisableUserAccountLabel = '_ckHEj05gan2VfNHG6thBA', 'DisableUserAccountLabel'
229 |     DisableVerifiedPhoneLabel = 'g2m0pAOamawNtVIfjXNMJg', 'DisableVerifiedPhoneLabel'
230 |     DismissRitoSuggestedAction = 'jYvwa61cv3NwNP24iUru6g', 'DismissRitoSuggestedAction'
231 |     DmAllSearchSlice = 'U-QXVRZ6iddb1QuZweh5DQ', 'DmAllSearchSlice'
232 |     DmGroupSearchSlice = '5zpY1dCR-8NyxQJS_CFJoQ', 'DmGroupSearchSlice'
233 |     DmMutedTimeline = 'lrcWa13oyrQc7L33wRdLAQ', 'DmMutedTimeline'
234 |     DMMessageDeleteMutation = 'BJ6DtxA2llfjnRoRjaiIiw', 'DMMessageDeleteMutation'
235 |     DmNsfwMediaFilterUpdate = 'of_N6O33zfyD4qsFJMYFxA', 'DmNsfwMediaFilterUpdate'
236 |     DmPeopleSearchSlice = 'xYSm8m5kJnzm_gFCn5GH-w', 'DmPeopleSearchSlice'
237 |     EditBookmarkFolder = 'a6kPp1cS1Dgbsjhapz1PNw', 'EditBookmarkFolder'
238 |     EditDraftTweet = 'JIeXE-I6BZXHfxsgOkyHYQ', 'EditDraftTweet'
239 |     EditScheduledTweet = '_mHkQ5LHpRRjSXKOcG6eZw', 'EditScheduledTweet'
240 |     EnableLoggedOutWebNotifications = 'BqIHKmwZKtiUBPi07jKctg', 'EnableLoggedOutWebNotifications'
241 |     EnableVerifiedPhoneLabel = 'C3RJFfMsb_KcEytpKmRRkw', 'EnableVerifiedPhoneLabel'
242 |     EnrollCoupon = 'SOyGmNGaEXcvk15s5bqDrA', 'EnrollCoupon'
243 |     ExplorePage = 'fkypGKlR9Xz9kLvUZDLoXw', 'ExplorePage'
244 |     FeatureSettingsUpdate = '-btar_vkBwWA7s3YWfp_9g', 'FeatureSettingsUpdate'
245 |     FetchDraftTweets = 'ZkqIq_xRhiUme0PBJNpRtg', 'FetchDraftTweets'
246 |     FetchScheduledTweets = 'ITtjAzvlZni2wWXwf295Qg', 'FetchScheduledTweets'
247 |     FollowersYouKnow = 'RvojYJJB90VwJ0rdVhbjMQ', 'FollowersYouKnow'
248 |     ForYouExplore = 'wVEXnyTWzQlEsIuLq_D3tw', 'ForYouExplore'
249 |     GenericTimelineById = 'LZfAdxTdNolKXw6ZkoY_kA', 'GenericTimelineById'
250 |     GetSafetyModeSettings = 'AhxTX0lkbIos4WG53xwzSA', 'GetSafetyModeSettings'
251 |     GetTweetReactionTimeline = 'ihIcULrtrtPGlCuprduRrA', 'GetTweetReactionTimeline'
252 |     GetUserClaims = 'lFi3xnx0auUUnyG4YwpCNw', 'GetUserClaims'
253 |     GraphQLError = '2V2W3HIBuMW83vEMtfo_Rg', 'GraphQLError'
254 |     ImmersiveMedia = 'UGQD_VslAJBJ4XzigsBYAA', 'ImmersiveMedia'
255 |     JoinCommunity = 'PXO-mA1KfmLqB9I6R-lOng', 'JoinCommunity'
256 |     LeaveCommunity = 'AtiTdhEyRN8ruNFW069ewQ', 'LeaveCommunity'
257 |     ListByRestId = 'wXzyA5vM_aVkBL9G8Vp3kw', 'ListByRestId'
258 |     ListBySlug = '3-E3eSWorCv24kYkK3CCiQ', 'ListBySlug'
259 |     ListCreationRecommendedUsers = 'Zf8ZwG57EKtss-rPlryIqg', 'ListCreationRecommendedUsers'
260 |     ListEditRecommendedUsers = '-F4wsOirYNXjjg-ZjccQpQ', 'ListEditRecommendedUsers'
261 |     ListLatestTweetsTimeline = '2TemLyqrMpTeAmysdbnVqw', 'ListLatestTweetsTimeline'
262 |     ListMembers = 'vA952kfgGw6hh8KatWnbqw', 'ListMembers'
263 |     ListMemberships = 'BlEXXdARdSeL_0KyKHHvvg', 'ListMemberships'
264 |     ListOwnerships = 'wQcOSjSQ8NtgxIwvYl1lMg', 'ListOwnerships'
265 |     ListPins = 'J0JOhmi8HSsle8LfSWv0cw', 'ListPins'
266 |     ListProductSubscriptions = 'wwdBYgScze0_Jnan79jEUw', 'ListProductSubscriptions'
267 |     ListRankedTweetsTimeline = '07lytXX9oG9uCld1RY4b0w', 'ListRankedTweetsTimeline'
268 |     ListSubscribe = 'FjvrQI3k-97JIUbEE6Gxcw', 'ListSubscribe'
269 |     ListSubscribers = 'e57wIELAAe0fYt4Hmqsk6g', 'ListSubscribers'
270 |     ListUnsubscribe = 'bXyvW9HoS_Omy4ADhexj8A', 'ListUnsubscribe'
271 |     ListsDiscovery = 'ehnzbxPHA69pyaV2EydN1g', 'ListsDiscovery'
272 |     ListsManagementPageTimeline = 'nhYp4n09Hi5n2hQWseQztg', 'ListsManagementPageTimeline'
273 |     LiveCommerceItemsSlice = '-lnNX56S2YrZYrLzbccFAQ', 'LiveCommerceItemsSlice'
274 |     ModerateTweet = 'pjFnHGVqCjTcZol0xcBJjw', 'ModerateTweet'
275 |     ModeratedTimeline = 'hnaqw2Vok5OETdBVa_uexw', 'ModeratedTimeline'
276 |     MuteList = 'ZYyanJsskNUcltu9bliMLA', 'MuteList'
277 |     MutedAccounts = '-G9eXTmseyiSenbqjrEG6w', 'MutedAccounts'
278 |     NoteworthyAccountsPage = '3fOJzEwYMnVyzwgLTLIBkw', 'NoteworthyAccountsPage'
279 |     PaymentMethods = 'mPF_G9okpbZuLcD6mN8K9g', 'PaymentMethods'
280 |     PinReply = 'GA2_1uKP9b_GyR4MVAQXAw', 'PinReply'
281 |     ProfileUserPhoneState = '5kUWP8C1hcd6omvg6HXXTQ', 'ProfileUserPhoneState'
282 |     PutClientEducationFlag = 'IjQ-egg0uPkY11NyPMfRMQ', 'PutClientEducationFlag'
283 |     QuickPromoteEligibility = 'LtpCXh66W-uXh7u7XSRA8Q', 'QuickPromoteEligibility'
284 |     RemoveFollower = 'QpNfg0kpPRfjROQ_9eOLXA', 'RemoveFollower'
285 |     RemoveTweetFromBookmarkFolder = '2Qbj9XZvtUvyJB4gFwWfaA', 'RemoveTweetFromBookmarkFolder'
286 |     RequestToJoinCommunity = '6G66cW5zuxPXmHOeBOjF2w', 'RequestToJoinCommunity'
287 |     RitoActionedTweetsTimeline = 'px9Zbs48D-YdQPEROK6-nA', 'RitoActionedTweetsTimeline'
288 |     RitoFlaggedAccountsTimeline = 'lMzaBZHIbD6GuPqJJQubMg', 'RitoFlaggedAccountsTimeline'
289 |     RitoFlaggedTweetsTimeline = 'iCuXMibh6yj9AelyjKXDeA', 'RitoFlaggedTweetsTimeline'
290 |     RitoSuggestedActionsFacePile = 'GnQKeEdL1LyeK3dTQCS1yw', 'RitoSuggestedActionsFacePile'
291 |     SetDefault = 'QEMLEzEMzoPNbeauKCCLbg', 'SetDefault'
292 |     SetSafetyModeSettings = 'qSJIPIpf4gA7Wn21bT3D4w', 'SetSafetyModeSettings'
293 |     SharingAudiospacesListeningDataWithFollowersUpdate = '5h0kNbk3ii97rmfY6CdgAA', 'SharingAudiospacesListeningDataWithFollowersUpdate'
294 |     SubscribeToScheduledSpace = 'Sxn4YOlaAwEKjnjWV0h7Mw', 'SubscribeToScheduledSpace'
295 |     SubscriptionCheckoutUrlWithEligibility = 'hKfOOObQr5JmfmxW0YtPvg', 'SubscriptionCheckoutUrlWithEligibility'
296 |     SubscriptionProductDetails = 'f0dExZDmFWFSWMCPQSAemQ', 'SubscriptionProductDetails'
297 |     SubscriptionProductFeaturesFetch = 'Me2CVcAXxvK2WMr-Nh_Qqg', 'SubscriptionProductFeaturesFetch'
298 |     SuperFollowers = 'o0YtPFnd4Lk_pOQb9alCvA', 'SuperFollowers'
299 |     TopicByRestId = '4OUZZOonV2h60I0wdlQb_w', 'TopicByRestId'
300 |     TopicLandingPage = 'mAKQjs1kyTS75VLZzuIXXw', 'TopicLandingPage'
301 |     TopicNotInterested = 'cPCFdDAaqRjlMRYInZzoDA', 'TopicNotInterested'
302 |     TopicToFollowSidebar = 'RPWVYYupHVZkJOnokbt2cw', 'TopicToFollowSidebar'
303 |     TopicUndoNotInterested = '4tVnt6FoSxaX8L-mDDJo4Q', 'TopicUndoNotInterested'
304 |     TopicsManagementPage = 'Jvdjpe8qzsJD84BpK3qdkQ', 'TopicsManagementPage'
305 |     TopicsPickerPage = 'UvG-XXtWNcJN1LzF0u3ByA', 'TopicsPickerPage'
306 |     TopicsPickerPageById = 't6kH4v2c_VzWKljc2yNwHA', 'TopicsPickerPageById'
307 |     TrustedFriendsTypeahead = 'RRnOwHttRGscWKC1zY9VRA', 'TrustedFriendsTypeahead'
308 |     TweetEditHistory = '8eaWKjHszkS-G_hprUd9AA', 'TweetEditHistory'
309 |     TwitterArticleByRestId = 'hwrvh-Qt24lcprL-BDfqRA', 'TwitterArticleByRestId'
310 |     TwitterArticleCreate = 'aV-sm-IkvwplcxdYDoLZHQ', 'TwitterArticleCreate'
311 |     TwitterArticleDelete = '6st-stMDc7KBqLT8KvWhHg', 'TwitterArticleDelete'
312 |     TwitterArticleUpdateCoverImage = 'fpcVRSAsjvkwmCiN1HheqQ', 'TwitterArticleUpdateCoverImage'
313 |     TwitterArticleUpdateData = 'XpBTYp_QXwyZ0XT0JXCBJw', 'TwitterArticleUpdateData'
314 |     TwitterArticleUpdateMedia = '3ojmmegfBC_oHyrmPhxj-g', 'TwitterArticleUpdateMedia'
315 |     TwitterArticleUpdateTitle = 'dvH6Ql989I4e5jWEV7HfaQ', 'TwitterArticleUpdateTitle'
316 |     TwitterArticleUpdateVisibility = '8M35gHyfpcy3S4UXejUGfA', 'TwitterArticleUpdateVisibility'
317 |     TwitterArticlesSlice = 'UUPSi_aS8_kHDFTWqSBPUA', 'TwitterArticlesSlice'
318 |     UnmentionUserFromConversation = 'xVW9j3OqoBRY9d6_2OONEg', 'UnmentionUserFromConversation'
319 |     UnmoderateTweet = 'pVSyu6PA57TLvIE4nN2tsA', 'UnmoderateTweet'
320 |     UnmuteList = 'pMZrHRNsmEkXgbn3tOyr7Q', 'UnmuteList'
321 |     UnpinReply = 'iRe6ig5OV1EzOtldNIuGDQ', 'UnpinReply'
322 |     UnsubscribeFromScheduledSpace = 'Zevhh76Msw574ZSs2NQHGQ', 'UnsubscribeFromScheduledSpace'
323 |     UrtFixtures = 'I_0j1mjMwv94SdS66S4pqw', 'UrtFixtures'
324 |     UserAboutTimeline = 'dm7ReTFJoeU0qkiZCO1E1g', 'UserAboutTimeline'
325 |     UserAccountLabel = 'rD5gLxVmMvtdtYU1UHWlFQ', 'UserAccountLabel'
326 |     UserBusinessProfileTeamTimeline = 'dq1eUCn3N8v0BywlP4nT7A', 'UserBusinessProfileTeamTimeline'
327 |     UserPromotableTweets = 'jF-OgMv-9vAym3JaCPUnhQ', 'UserPromotableTweets'
328 |     UserSessionsList = 'vJ-XatpmQSG8bDch8-t9Jw', 'UserSessionsList'
329 |     UserSuperFollowTweets = '1by3q8-AJWdNYhtltjlPTQ', 'UserSuperFollowTweets'
330 |     Viewer = 'okNaf-6AQWu2DD2H_MAoVw', 'Viewer'
331 |     ViewerEmailSettings = 'JpjlNgn4sLGvS6tgpTzYBg', 'ViewerEmailSettings'
332 |     ViewerTeams = 'D8mVcJSVv66_3NcR7fOf6g', 'ViewerTeams'
333 |     ViewingOtherUsersTopicsPage = 'tYXo6h_rpnHXbdLUFMatZA', 'ViewingOtherUsersTopicsPage'
334 |     WriteDataSaverPreferences = 'H03etWvZGz41YASxAU2YPg', 'WriteDataSaverPreferences'
335 |     WriteEmailNotificationSettings = '2qKKYFQift8p5-J1k6kqxQ', 'WriteEmailNotificationSettings'
336 |     adFreeArticleDomains = 'zwTrX9CtnMvWlBXjsx95RQ', 'adFreeArticleDomains'
337 |     articleNudgeDomains = '88Bu08U2ddaVVjKmmXjVYg', 'articleNudgeDomains'
338 |     bookmarkTweetToFolder = '4KHZvvNbHNf07bsgnL9gWA', 'bookmarkTweetToFolder'
339 |     createBookmarkFolder = '6Xxqpq8TM_CREYiuof_h5w', 'createBookmarkFolder'
340 |     getAltTextPromptPreference = 'PFIxTk8owMoZgiMccP0r4g', 'getAltTextPromptPreference'
341 |     getCaptionsAlwaysDisplayPreference = 'BwgMOGpOViDS0ri7VUgglg', 'getCaptionsAlwaysDisplayPreference'
342 |     timelinesFeedback = 'vfVbgvTPTQ-dF_PQ5lD1WQ', 'timelinesFeedback'
343 |     updateAltTextPromptPreference = 'aQKrduk_DA46XfOQDkcEng', 'updateAltTextPromptPreference'
344 |     updateCaptionsAlwaysDisplayPreference = 'uCUQhvZ5sJ9qHinRp6CFlQ', 'updateCaptionsAlwaysDisplayPreference'
345 | 
346 |     default_variables = {
347 |         'count': 1000,
348 |         'withSafetyModeUserFields': True,
349 |         'includePromotedContent': True,
350 |         'withQuickPromoteEligibilityTweetFields': True,
351 |         'withVoice': True,
352 |         'withV2Timeline': True,
353 |         'withDownvotePerspective': False,
354 |         'withBirdwatchNotes': True,
355 |         'withCommunity': True,
356 |         'withSuperFollowsUserFields': True,
357 |         'withReactionsMetadata': False,
358 |         'withReactionsPerspective': False,
359 |         'withSuperFollowsTweetFields': True,
360 |         'isMetatagsQuery': False,
361 |         'withReplays': True,
362 |         'withClientEventToken': False,
363 |         'withAttachments': True,
364 |         'withConversationQueryHighlights': True,
365 |         'withMessageQueryHighlights': True,
366 |         'withMessages': True,
367 |     }
368 |     default_features = {
369 |         # new
370 |         'c9s_tweet_anatomy_moderator_badge_enabled': True,
371 |         'responsive_web_home_pinned_timelines_enabled': True,
372 | 
373 |         'blue_business_profile_image_shape_enabled': True,
374 |         'creator_subscriptions_tweet_preview_api_enabled': True,
375 |         'freedom_of_speech_not_reach_fetch_enabled': True,
376 |         'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
377 |         'graphql_timeline_v2_bookmark_timeline': True,
378 |         'hidden_profile_likes_enabled': True,
379 |         'highlights_tweets_tab_ui_enabled': True,
380 |         'interactive_text_enabled': True,
381 |         'longform_notetweets_consumption_enabled': True,
382 |         'longform_notetweets_inline_media_enabled': True,
383 |         'longform_notetweets_rich_text_read_enabled': True,
384 |         'longform_notetweets_richtext_consumption_enabled': True,
385 |         'profile_foundations_tweet_stats_enabled': True,
386 |         'profile_foundations_tweet_stats_tweet_frequency': True,
387 |         'responsive_web_birdwatch_note_limit_enabled': True,
388 |         'responsive_web_edit_tweet_api_enabled': True,
389 |         'responsive_web_enhance_cards_enabled': False,
390 |         'responsive_web_graphql_exclude_directive_enabled': True,
391 |         'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
392 |         'responsive_web_graphql_timeline_navigation_enabled': True,
393 |         'responsive_web_media_download_video_enabled': False,
394 |         'responsive_web_text_conversations_enabled': False,
395 |         'responsive_web_twitter_article_data_v2_enabled': True,
396 |         'responsive_web_twitter_article_tweet_consumption_enabled': False,
397 |         'responsive_web_twitter_blue_verified_badge_is_enabled': True,
398 |         'rweb_lists_timeline_redesign_enabled': True,
399 |         'spaces_2022_h2_clipping': True,
400 |         'spaces_2022_h2_spaces_communities': True,
401 |         'standardized_nudges_misinfo': True,
402 |         'subscriptions_verification_info_verified_since_enabled': True,
403 |         'tweet_awards_web_tipping_enabled': False,
404 |         'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
405 |         'tweetypie_unmention_optimization_enabled': True,
406 |         'verified_phone_label_enabled': False,
407 |         'vibe_api_enabled': True,
408 |         'view_counts_everywhere_api_enabled': True
409 |     }
410 | 
411 | 
412 | trending_params = {
413 |     'include_profile_interstitial_type': '1',
414 |     'include_blocking': '1',
415 |     'include_blocked_by': '1',
416 |     'include_followed_by': '1',
417 |     'include_want_retweets': '1',
418 |     'include_mute_edge': '1',
419 |     'include_can_dm': '1',
420 |     'include_can_media_tag': '1',
421 |     'include_ext_has_nft_avatar': '1',
422 |     'include_ext_is_blue_verified': '1',
423 |     'include_ext_verified_type': '1',
424 |     'skip_status': '1',
425 |     'cards_platform': 'Web-12',
426 |     'include_cards': '1',
427 |     'include_ext_alt_text': 'true',
428 |     'include_ext_limited_action_results': 'false',
429 |     'include_quote_count': 'true',
430 |     'include_reply_count': '1',
431 |     'tweet_mode': 'extended',
432 |     'include_ext_views': 'true',
433 |     'include_entities': 'true',
434 |     'include_user_entities': 'true',
435 |     'include_ext_media_color': 'true',
436 |     'include_ext_media_availability': 'true',
437 |     'include_ext_sensitive_media_warning': 'true',
438 |     'include_ext_trusted_friends_metadata': 'true',
439 |     'send_error_codes': 'true',
440 |     'simple_quoted_tweet': 'true',
441 |     'count': 1000,
442 |     'requestContext': 'launch',
443 |     'include_page_configuration': 'true',
444 |     'initial_tab_id': 'trending',
445 |     'entity_tokens': 'false',
446 |     'ext': 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,vibe'
447 | }
448 | 
449 | account_settings = {
450 |     'address_book_live_sync_enabled': False,
451 |     'allow_ads_personalization': False,
452 |     'allow_authenticated_periscope_requests': True,
453 |     'allow_dm_groups_from': 'following',
454 |     'allow_dms_from': 'following',  # all
455 |     'allow_location_history_personalization': False,
456 |     'allow_logged_out_device_personalization': False,
457 |     'allow_media_tagging': 'none',  # all, following
458 |     'allow_sharing_data_for_third_party_personalization': False,
459 |     'alt_text_compose_enabled': None,
460 |     'always_use_https': True,
461 |     'autoplay_disabled': False,
462 |     'country_code': 'us',
463 |     'discoverable_by_email': False,
464 |     'discoverable_by_mobile_phone': False,
465 |     'display_sensitive_media': True,
466 |     'dm_quality_filter': 'enabled',  # disabled
467 |     'dm_receipt_setting': 'all_disabled',  # all_enabled
468 |     'geo_enabled': False,
469 |     'include_alt_text_compose': True,
470 |     'include_mention_filter': True,
471 |     'include_nsfw_admin_flag': True,
472 |     'include_nsfw_user_flag': True,
473 |     'include_ranked_timeline': True,
474 |     'language': 'en',
475 |     'mention_filter': 'unfiltered',
476 |     'nsfw_admin': False,
477 |     'nsfw_user': False,
478 |     'personalized_trends': True,
479 |     'protected': False,
480 |     'ranked_timeline_eligible': None,
481 |     'ranked_timeline_setting': None,
482 |     'require_password_login': False,
483 |     'requires_login_verification': False,
484 |     'settings_metadata': {},
485 |     'sleep_time': {
486 |         'enabled': False,
487 |         'end_time': None,
488 |         'start_time': None
489 |     },
490 |     'translator_type': 'none',
491 |     'universal_quality_filtering_enabled': 'enabled',
492 |     'use_cookie_personalization': False,
493 |     ## todo: not yet implemented - requires additional steps
494 |     # 'allow_contributor_request': 'all',
495 |     # 'protect_password_reset': False,
496 | }
497 | follower_notification_settings = {
498 |     'cursor': '-1',
499 |     'include_profile_interstitial_type': '1',
500 |     'include_blocking': '1',
501 |     'include_blocked_by': '1',
502 |     'include_followed_by': '1',
503 |     'include_want_retweets': '1',
504 |     'include_mute_edge': '1',
505 |     'include_can_dm': '1',
506 |     'include_can_media_tag': '1',
507 |     'include_ext_has_nft_avatar': '1',
508 |     'include_ext_is_blue_verified': '1',
509 |     'include_ext_verified_type': '1',
510 |     'skip_status': '1',
511 | }
512 | 
513 | follow_settings = {
514 |     'include_profile_interstitial_type': '1',
515 |     'include_blocking': '1',
516 |     'include_blocked_by': '1',
517 |     'include_followed_by': '1',
518 |     'include_want_retweets': '1',
519 |     'include_mute_edge': '1',
520 |     'include_can_dm': '1',
521 |     'include_can_media_tag': '1',
522 |     'include_ext_has_nft_avatar': '1',
523 |     'include_ext_is_blue_verified': '1',
524 |     'include_ext_verified_type': '1',
525 |     'skip_status': '1',
526 | }
527 | 
528 | account_search_settings = {
529 |     'optInFiltering': True,  # filter out nsfw content
530 |     'optInBlocking': True,  # filter out blocked accounts
531 | }
532 | 
533 | profile_settings = {
534 |     'birthdate_day': int,
535 |     'birthdate_month': int,
536 |     'birthdate_year': int,  # 1985
537 |     'birthdate_visibility': str,  # 'self',
538 |     'birthdate_year_visibility': str,  # 'self',
539 |     'displayNameMaxLength': int,  # '50',
540 |     'url': str,  # 'https://example.com',
541 |     'name': str,  # 'foo',
542 |     'description': str,  # 'bar',
543 |     'location': str,  # 'world',
544 | }
545 | 
546 | search_config = {
547 |     'include_profile_interstitial_type': 1,
548 |     'include_blocking': 1,
549 |     'include_blocked_by': 1,
550 |     'include_followed_by': 1,
551 |     'include_want_retweets': 1,
552 |     'include_mute_edge': 1,
553 |     'include_can_dm': 1,
554 |     'include_can_media_tag': 1,
555 |     'include_ext_has_nft_avatar': 1,
556 |     'include_ext_is_blue_verified': 1,
557 |     'include_ext_verified_type': 1,
558 |     'skip_status': 1,
559 |     'cards_platform': 'Web-12',
560 |     'include_cards': 1,
561 |     'include_ext_alt_text': 'true',
562 |     'include_ext_limited_action_results': 'false',
563 |     'include_quote_count': 'true',
564 |     'include_reply_count': 1,
565 |     'tweet_mode': 'extended',
566 |     'include_ext_collab_control': 'true',
567 |     'include_ext_views': 'true',
568 |     'include_entities': 'true',
569 |     'include_user_entities': 'true',
570 |     'include_ext_media_color': 'true',
571 |     'include_ext_media_availability': 'true',
572 |     'include_ext_sensitive_media_warning': 'true',
573 |     'include_ext_trusted_friends_metadata': 'true',
574 |     'send_error_codes': 'true',
575 |     'simple_quoted_tweet': 'true',
576 |     'query_source': 'typed_query',
577 |     'count': 1000,
578 |     'q': '',
579 |     'requestContext': 'launch',
580 |     'pc': 1,
581 |     'spelling_corrections': 1,
582 |     'include_ext_edit_control': 'true',
583 |     'ext': 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,enrichments,superFollowMetadata,unmentionInfo,editControl,collab_control,vibe'
584 | }
585 | 
586 | dm_params = {
587 |     'context': 'FETCH_DM_CONVERSATION',
588 |     'include_profile_interstitial_type': '1',
589 |     'include_blocking': '1',
590 |     'include_blocked_by': '1',
591 |     'include_followed_by': '1',
592 |     'include_want_retweets': '1',
593 |     'include_mute_edge': '1',
594 |     'include_can_dm': '1',
595 |     'include_can_media_tag': '1',
596 |     'include_ext_has_nft_avatar': '1',
597 |     'include_ext_is_blue_verified': '1',
598 |     'include_ext_verified_type': '1',
599 |     'include_ext_profile_image_shape': '1',
600 |     'skip_status': '1',
601 |     'dm_secret_conversations_enabled': 'false',
602 |     'krs_registration_enabled': 'true',
603 |     'cards_platform': 'Web-12',
604 |     'include_cards': '1',
605 |     'include_ext_alt_text': 'true',
606 |     'include_ext_limited_action_results': 'false',
607 |     'include_quote_count': 'true',
608 |     'include_reply_count': '1',
609 |     'tweet_mode': 'extended',
610 |     'include_ext_views': 'true',
611 |     'dm_users': 'false',
612 |     'include_groups': 'true',
613 |     'include_inbox_timelines': 'true',
614 |     'include_ext_media_color': 'true',
615 |     'supports_reactions': 'true',
616 |     'include_conversation_info': 'true',
617 |     'ext': 'mediaColor,altText,mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,superFollowMetadata,unmentionInfo,editControl',
618 | }
619 | 
620 | live_notification_params = params = {
621 |     "cards_platform": "Web-12",
622 |     "count": "50",  # max value
623 |     "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,superFollowMetadata,unmentionInfo,editControl",
624 |     "include_blocked_by": "1",
625 |     "include_blocking": "1",
626 |     "include_can_dm": "1",
627 |     "include_can_media_tag": "1",
628 |     "include_cards": "1",
629 |     "include_entities": "true",
630 |     "include_ext_alt_text": "true",
631 |     "include_ext_has_nft_avatar": "1",
632 |     "include_ext_is_blue_verified": "1",
633 |     "include_ext_limited_action_results": "true",
634 |     "include_ext_media_availability": "true",
635 |     "include_ext_media_color": "true",
636 |     "include_ext_profile_image_shape": "1",
637 |     "include_ext_sensitive_media_warning": "true",
638 |     "include_ext_trusted_friends_metadata": "true",
639 |     "include_ext_verified_type": "1",
640 |     "include_ext_views": "true",
641 |     "include_followed_by": "1",
642 |     "include_mute_edge": "1",
643 |     "include_profile_interstitial_type": "1",
644 |     "include_quote_count": "true",
645 |     "include_reply_count": "1",
646 |     "include_user_entities": "true",
647 |     "include_want_retweets": "1",
648 |     "send_error_codes": "true",
649 |     "simple_quoted_tweet": "true",
650 |     "skip_status": "1",
651 |     "tweet_mode": "extended"
652 | }
653 | 
654 | recommendations_params = {
655 |     'include_profile_interstitial_type': '1',
656 |     'include_blocking': '1',
657 |     'include_blocked_by': '1',
658 |     'include_followed_by': '1',
659 |     'include_want_retweets': '1',
660 |     'include_mute_edge': '1',
661 |     'include_can_dm': '1',
662 |     'include_can_media_tag': '1',
663 |     'include_ext_has_nft_avatar': '1',
664 |     'include_ext_is_blue_verified': '1',
665 |     'include_ext_verified_type': '1',
666 |     'include_ext_profile_image_shape': '1',
667 |     'skip_status': '1',
668 |     'pc': 'true',
669 |     'display_location': 'profile_accounts_sidebar',
670 |     'limit': 100,
671 |     'ext': 'mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,birdwatchPivot,superFollowMetadata,unmentionInfo,editControl'
672 | }
673 | 


--------------------------------------------------------------------------------
/twitter/login.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import sys
  3 | 
  4 | from httpx import Client
  5 | 
  6 | from .constants import YELLOW, RED, BOLD, RESET, USER_AGENTS
  7 | from .util import find_key
  8 | 
  9 | def update_token(client: Client, key: str, url: str, **kwargs) -> Client:
 10 |     caller_name = sys._getframe(1).f_code.co_name
 11 |     try:
 12 |         headers = {
 13 |             'x-guest-token': client.cookies.get('guest_token', ''),
 14 |             'x-csrf-token': client.cookies.get('ct0', ''),
 15 |             'x-twitter-auth-type': 'OAuth2Client' if client.cookies.get('auth_token') else '',
 16 |         }
 17 |         client.headers.update(headers)
 18 |         r = client.post(url, **kwargs)
 19 |         info = r.json()
 20 | 
 21 |         for task in info.get('subtasks', []):
 22 |             if task.get('enter_text', {}).get('keyboard_type') == 'email':
 23 |                 print(f"[{YELLOW}warning{RESET}] {' '.join(find_key(task, 'text'))}")
 24 |                 client.cookies.set('confirm_email', 'true')  # signal that email challenge must be solved
 25 | 
 26 |             if task.get('subtask_id') == 'LoginAcid':
 27 |                 if task['enter_text']['hint_text'].casefold() == 'confirmation code':
 28 |                     print(f"[{YELLOW}warning{RESET}] email confirmation code challenge.")
 29 |                     client.cookies.set('confirmation_code', 'true')
 30 | 
 31 |         client.cookies.set(key, info[key])
 32 | 
 33 |     except KeyError as e:
 34 |         client.cookies.set('flow_errors', 'true')  # signal that an error occurred somewhere in the flow
 35 |         print(f'[{RED}error{RESET}] failed to update token at {BOLD}{caller_name}{RESET}\n{e}')
 36 |     return client
 37 | 
 38 | 
 39 | def init_guest_token(client: Client) -> Client:
 40 |     return update_token(client, 'guest_token', 'https://api.twitter.com/1.1/guest/activate.json')
 41 | 
 42 | 
 43 | def flow_start(client: Client) -> Client:
 44 |     return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json',
 45 |                         params={'flow_name': 'login'},
 46 |                         json={
 47 |                             "input_flow_data": {
 48 |                                 "flow_context": {
 49 |                                     "debug_overrides": {},
 50 |                                     "start_location": {"location": "splash_screen"}
 51 |                                 }
 52 |                             }, "subtask_versions": {}
 53 |                         })
 54 | 
 55 | 
 56 | def flow_instrumentation(client: Client) -> Client:
 57 |     return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={
 58 |         "flow_token": client.cookies.get('flow_token'),
 59 |         "subtask_inputs": [{
 60 |             "subtask_id": "LoginJsInstrumentationSubtask",
 61 |             "js_instrumentation": {"response": "{}", "link": "next_link"}
 62 |         }],
 63 |     })
 64 | 
 65 | 
 66 | def flow_username(client: Client) -> Client:
 67 |     return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={
 68 |         "flow_token": client.cookies.get('flow_token'),
 69 |         "subtask_inputs": [{
 70 |             "subtask_id": "LoginEnterUserIdentifierSSO",
 71 |             "settings_list": {
 72 |                 "setting_responses": [{
 73 |                     "key": "user_identifier",
 74 |                     "response_data": {"text_data": {"result": client.cookies.get('username')}}
 75 |                 }], "link": "next_link"}}],
 76 |     })
 77 | 
 78 | 
 79 | def flow_password(client: Client) -> Client:
 80 |     return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={
 81 |         "flow_token": client.cookies.get('flow_token'),
 82 |         "subtask_inputs": [{
 83 |             "subtask_id": "LoginEnterPassword",
 84 |             "enter_password": {"password": client.cookies.get('password'), "link": "next_link"}}]
 85 |     })
 86 | 
 87 | 
 88 | def flow_duplication_check(client: Client) -> Client:
 89 |     return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={
 90 |         "flow_token": client.cookies.get('flow_token'),
 91 |         "subtask_inputs": [{
 92 |             "subtask_id": "AccountDuplicationCheck",
 93 |             "check_logged_in_account": {"link": "AccountDuplicationCheck_false"},
 94 |         }],
 95 |     })
 96 | 
 97 | 
 98 | def confirm_email(client: Client) -> Client:
 99 |     return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={
100 |         "flow_token": client.cookies.get('flow_token'),
101 |         "subtask_inputs": [
102 |             {
103 |                 "subtask_id": "LoginAcid",
104 |                 "enter_text": {
105 |                     "text": client.cookies.get('email'),
106 |                     "link": "next_link"
107 |                 }
108 |             }]
109 |     })
110 | 
111 | 
112 | def solve_confirmation_challenge(client: Client, **kwargs) -> Client:
113 |     if fn := kwargs.get('proton'):
114 |         confirmation_code = fn()
115 |         return update_token(client, 'flow_token', 'https://api.twitter.com/1.1/onboarding/task.json', json={
116 |             "flow_token": client.cookies.get('flow_token'),
117 |             'subtask_inputs': [
118 |                 {
119 |                     'subtask_id': 'LoginAcid',
120 |                     'enter_text': {
121 |                         'text': confirmation_code,
122 |                         'link': 'next_link',
123 |                     },
124 |                 },
125 |             ],
126 |         })
127 | 
128 | 
129 | def execute_login_flow(client: Client, **kwargs) -> Client | None:
130 |     client = init_guest_token(client)
131 |     for fn in [flow_start, flow_instrumentation, flow_username, flow_password, flow_duplication_check]:
132 |         client = fn(client)
133 | 
134 |     # solve email challenge
135 |     if client.cookies.get('confirm_email') == 'true':
136 |         client = confirm_email(client)
137 | 
138 |     # solve confirmation challenge (Proton Mail only)
139 |     if client.cookies.get('confirmation_code') == 'true':
140 |         if not kwargs.get('proton'):
141 |             print(f'[{RED}warning{RESET}] Please check your email for a confirmation code'
142 |                   f' and log in again using the web app. If you wish to automatically solve'
143 |                   f' email confirmation challenges, add a Proton Mail account in your account settings')
144 |             return
145 |         client = solve_confirmation_challenge(client, **kwargs)
146 |     return client
147 | 
148 | 
149 | def login(email: str, username: str, password: str, **kwargs) -> Client:
150 |     client = Client(
151 |         cookies={
152 |             "email": email,
153 |             "username": username,
154 |             "password": password,
155 |             "guest_token": None,
156 |             "flow_token": None,
157 |         },
158 |         headers={
159 |             'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
160 |             'content-type': 'application/json',
161 |             'user-agent': random.choice(USER_AGENTS),
162 |             'x-twitter-active-user': 'yes',
163 |             'x-twitter-client-language': 'en',
164 |         },
165 |         follow_redirects=True
166 |     )
167 |     client = execute_login_flow(client, **kwargs)
168 |     if not client or client.cookies.get('flow_errors') == 'true':
169 |         raise Exception(f'[{RED}error{RESET}] {BOLD}{username}{RESET} login failed')
170 |     return client
171 | 


--------------------------------------------------------------------------------
/twitter/scraper.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging.config
  3 | import math
  4 | import platform
  5 | import sys
  6 | from functools import partial
  7 | from typing import Generator
  8 | 
  9 | import websockets
 10 | from httpx import AsyncClient, Limits, ReadTimeout, URL
 11 | from tqdm.asyncio import tqdm_asyncio
 12 | 
 13 | from .constants import *
 14 | from .login import login
 15 | from .util import *
 16 | 
 17 | try:
 18 |     if get_ipython().__class__.__name__ == 'ZMQInteractiveShell':
 19 |         import nest_asyncio
 20 | 
 21 |         nest_asyncio.apply()
 22 | except:
 23 |     ...
 24 | 
 25 | if platform.system() != 'Windows':
 26 |     try:
 27 |         import uvloop
 28 | 
 29 |         uvloop.install()
 30 |     except ImportError as e:
 31 |         ...
 32 | 
 33 | 
 34 | class Scraper:
 35 |     def __init__(self, email: str = None, username: str = None, password: str = None, session: Client = None, **kwargs):
 36 |         self.save = kwargs.get('save', True)
 37 |         self.debug = kwargs.get('debug', 0)
 38 |         self.pbar = kwargs.get('pbar', True)
 39 |         self.out = Path(kwargs.get('out', 'data'))
 40 |         self.guest = False
 41 |         self.logger = self._init_logger(**kwargs)
 42 |         self.session = self._validate_session(email, username, password, session, **kwargs)
 43 |         self.rate_limits = {}
 44 | 
 45 |     def users(self, screen_names: list[str], **kwargs) -> list[dict]:
 46 |         """
 47 |         Get user data by screen names.
 48 | 
 49 |         @param screen_names: list of screen names (usernames)
 50 |         @param kwargs: optional keyword arguments
 51 |         @return: list of user data as dicts
 52 |         """
 53 |         return self._run(Operation.UserByScreenName, screen_names, **kwargs)
 54 | 
 55 |     def tweets_by_id(self, tweet_ids: list[int | str], **kwargs) -> list[dict]:
 56 |         """
 57 |         Get tweet metadata by tweet ids.
 58 | 
 59 |         @param tweet_ids: list of tweet ids
 60 |         @param kwargs: optional keyword arguments
 61 |         @return: list of tweet data as dicts
 62 |         """
 63 |         return self._run(Operation.TweetResultByRestId, tweet_ids, **kwargs)
 64 | 
 65 |     def tweets_by_ids(self, tweet_ids: list[int | str], **kwargs) -> list[dict]:
 66 |         """
 67 |         Get tweet metadata by tweet ids.
 68 | 
 69 |         Special batch query for tweet data. Most efficient way to get tweets.
 70 | 
 71 |         @param tweet_ids: list of tweet ids
 72 |         @param kwargs: optional keyword arguments
 73 |         @return: list of tweet data as dicts
 74 |         """
 75 |         return self._run(Operation.TweetResultsByRestIds, batch_ids(tweet_ids), **kwargs)
 76 | 
 77 |     def tweets_details(self, tweet_ids: list[int], **kwargs) -> list[dict]:
 78 |         """
 79 |         Get tweet data by tweet ids.
 80 | 
 81 |         Includes tweet metadata as well as comments, replies, etc.
 82 | 
 83 |         @param tweet_ids: list of tweet ids
 84 |         @param kwargs: optional keyword arguments
 85 |         @return: list of tweet data as dicts
 86 |         """
 87 |         return self._run(Operation.TweetDetail, tweet_ids, **kwargs)
 88 | 
 89 |     def tweets(self, user_ids: list[int], **kwargs) -> list[dict]:
 90 |         """
 91 |         Get tweets by user ids.
 92 | 
 93 |         Metadata for users tweets.
 94 | 
 95 |         @param user_ids: list of user ids
 96 |         @param kwargs: optional keyword arguments
 97 |         @return: list of tweet data as dicts
 98 |         """
 99 |         return self._run(Operation.UserTweets, user_ids, **kwargs)
100 | 
101 |     def tweets_and_replies(self, user_ids: list[int], **kwargs) -> list[dict]:
102 |         """
103 |         Get tweets and replies by user ids.
104 | 
105 |         Tweet metadata, including replies.
106 | 
107 |         @param user_ids: list of user ids
108 |         @param kwargs: optional keyword arguments
109 |         @return: list of tweet data as dicts
110 |         """
111 |         return self._run(Operation.UserTweetsAndReplies, user_ids, **kwargs)
112 | 
113 |     def media(self, user_ids: list[int], **kwargs) -> list[dict]:
114 |         """
115 |         Get media by user ids.
116 | 
117 |         Tweet metadata, filtered for tweets containing media.
118 | 
119 |         @param user_ids: list of user ids
120 |         @param kwargs: optional keyword arguments
121 |         @return: list of tweet data as dicts
122 |         """
123 |         return self._run(Operation.UserMedia, user_ids, **kwargs)
124 | 
125 |     def likes(self, user_ids: list[int], **kwargs) -> list[dict]:
126 |         """
127 |         Get likes by user ids.
128 | 
129 |         Tweet metadata for tweets liked by users.
130 | 
131 |         @param user_ids: list of user ids
132 |         @param kwargs: optional keyword arguments
133 |         @return: list of tweet data as dicts
134 |         """
135 |         return self._run(Operation.Likes, user_ids, **kwargs)
136 | 
137 |     def followers(self, user_ids: list[int], **kwargs) -> list[dict]:
138 |         """
139 |         Get followers by user ids.
140 | 
141 |         User data for users followers list.
142 | 
143 |         @param user_ids: list of user ids
144 |         @param kwargs: optional keyword arguments
145 |         @return: list of user data as dicts
146 |         """
147 |         return self._run(Operation.Followers, user_ids, **kwargs)
148 | 
149 |     def following(self, user_ids: list[int], **kwargs) -> list[dict]:
150 |         """
151 |         Get following by user ids.
152 | 
153 |         User metadata for users following list.
154 | 
155 |         @param user_ids: list of user ids
156 |         @param kwargs: optional keyword arguments
157 |         @return: list of user data as dicts
158 |         """
159 |         return self._run(Operation.Following, user_ids, **kwargs)
160 | 
161 |     def favoriters(self, tweet_ids: list[int], **kwargs) -> list[dict]:
162 |         """
163 |         Get favoriters by tweet ids.
164 | 
165 |         User data for users who liked these tweets.
166 | 
167 |         @param tweet_ids: list of tweet ids
168 |         @param kwargs: optional keyword arguments
169 |         @return: list of user data as dicts
170 |         """
171 |         return self._run(Operation.Favoriters, tweet_ids, **kwargs)
172 | 
173 |     def retweeters(self, tweet_ids: list[int], **kwargs) -> list[dict]:
174 |         """
175 |         Get retweeters by tweet ids.
176 | 
177 |         User data for users who retweeted these tweets.
178 | 
179 |         @param tweet_ids: list of tweet ids
180 |         @param kwargs: optional keyword arguments
181 |         @return: list of user data as dicts
182 |         """
183 |         return self._run(Operation.Retweeters, tweet_ids, **kwargs)
184 | 
185 |     def tweet_stats(self, user_ids: list[int], **kwargs) -> list[dict]:
186 |         """
187 |         Get tweet statistics by user ids.
188 | 
189 |         @param user_ids: list of user ids
190 |         @param kwargs: optional keyword arguments
191 |         @return: list of tweet statistics as dicts
192 |         """
193 |         return self._run(Operation.TweetStats, user_ids, **kwargs)
194 | 
195 |     def users_by_ids(self, user_ids: list[int], **kwargs) -> list[dict]:
196 |         """
197 |         Get user data by user ids.
198 | 
199 |         Special batch query for user data. Most efficient way to get user data.
200 | 
201 |         @param user_ids: list of user ids
202 |         @param kwargs: optional keyword arguments
203 |         @return: list of user data as dicts
204 |         """
205 |         return self._run(Operation.UsersByRestIds, batch_ids(user_ids), **kwargs)
206 | 
207 |     def recommended_users(self, user_ids: list[int] = None, **kwargs) -> list[dict]:
208 |         """
209 |         Get recommended users by user ids, or general recommendations if no user ids are provided.
210 | 
211 |         @param user_ids: list of user ids
212 |         @param kwargs: optional keyword arguments
213 |         @return: list of recommended users data as dicts
214 |         """
215 |         if user_ids:
216 |             contexts = [{"context": orjson.dumps({"contextualUserId": x}).decode()} for x in user_ids]
217 |         else:
218 |             contexts = [{'context': None}]
219 |         return self._run(Operation.ConnectTabTimeline, contexts, **kwargs)
220 | 
221 |     def profile_spotlights(self, screen_names: list[str], **kwargs) -> list[dict]:
222 |         """
223 |         Get user data by screen names.
224 | 
225 |         This endpoint is included for completeness only.
226 |         Use the batched query `users_by_ids` instead if you wish to pull user profile data.
227 | 
228 |         @param screen_names: list of user screen names (usernames)
229 |         @param kwargs: optional keyword arguments
230 |         @return: list of user data as dicts
231 |         """
232 |         return self._run(Operation.ProfileSpotlightsQuery, screen_names, **kwargs)
233 | 
234 |     def users_by_id(self, user_ids: list[int], **kwargs) -> list[dict]:
235 |         """
236 |         Get user data by user ids.
237 | 
238 |         This endpoint is included for completeness only.
239 |         Use the batched query `users_by_ids` instead if you wish to pull user profile data.
240 | 
241 | 
242 |         @param user_ids: list of user ids
243 |         @param kwargs: optional keyword arguments
244 |         @return: list of user data as dicts
245 |         """
246 |         return self._run(Operation.UserByRestId, user_ids, **kwargs)
247 | 
248 |     def download_media(self, ids: list[int], photos: bool = True, videos: bool = True, cards: bool = True, hq_img_variant: bool = True, video_thumb: bool = False, out: str = 'media',
249 |                        metadata_out: str = 'media.json', **kwargs) -> dict:
250 |         """
251 |         Download and extract media metadata from Tweets
252 | 
253 |         @param ids: list of Tweet IDs
254 |         @param photos: download images
255 |         @param videos: download videos
256 |         @param cards: download cards
257 |         @param hq_img_variant: download highest quality image, options: {"orig", "4096x4096"}
258 |         @param video_thumb: download video thumbnails
259 |         @param out: output file for media
260 |         @param metadata_out: output file for media metadata
261 |         @return: media data
262 |         """
263 | 
264 |         async def process(fns: Generator) -> list:
265 |             limits = {
266 |                 'max_connections': kwargs.pop('max_connections', 1000),
267 |                 'max_keepalive_connections': kwargs.pop('max_keepalive_connections', None),
268 |                 'keepalive_expiry': kwargs.pop('keepalive_expiry', 5.0),
269 |             }
270 |             headers = {'user-agent': random.choice(USER_AGENTS)}
271 |             async with AsyncClient(limits=Limits(**limits), headers=headers, http2=True, verify=False, timeout=60, follow_redirects=True) as client:
272 |                 return await tqdm_asyncio.gather(*(fn(client=client) for fn in fns), desc='Downloading Media')
273 | 
274 |         def download(urls: list[tuple], out: str) -> Generator:
275 |             out = Path(out)
276 |             out.mkdir(parents=True, exist_ok=True)
277 |             chunk_size = kwargs.pop('chunk_size', None)
278 | 
279 |             async def get(client: AsyncClient, url: str):
280 |                 tid, cdn_url = url
281 |                 ext = urlsplit(cdn_url).path.split('/')[-1]
282 |                 fname = out / f'{tid}_{ext}'
283 |                 async with aiofiles.open(fname, 'wb') as fp:
284 |                     async with client.stream('GET', cdn_url) as r:
285 |                         async for chunk in r.aiter_raw(chunk_size):
286 |                             await fp.write(chunk)
287 | 
288 |             return (partial(get, url=u) for u in urls)
289 | 
290 |         tweets = self.tweets_by_ids(ids, **kwargs)
291 |         media = {}
292 |         for data in tweets:
293 |             for tweet in data.get('data', {}).get('tweetResult', []):
294 |                 # TweetWithVisibilityResults and Tweet have different structures
295 |                 root = tweet.get('result', {}).get('tweet', {}) or tweet.get('result', {})
296 |                 if _id := root.get('rest_id'):
297 |                     date = root.get('legacy', {}).get('created_at', '')
298 |                     uid = root.get('legacy', {}).get('user_id_str', '')
299 |                     media[_id] = {'date': date, 'uid': uid, 'img': set(), 'video': {'thumb': set(), 'video_info': {}, 'hq': set()}, 'card': []}
300 |                     for _media in (y for x in find_key(root, 'media') for y in x if isinstance(x, list)):
301 |                         if videos:
302 |                             if vinfo := _media.get('video_info'):
303 |                                 hq = sorted(vinfo.get('variants', []), key=lambda x: -x.get('bitrate', 0))[0]['url']
304 |                                 media[_id]['video']['video_info'] |= vinfo
305 |                                 media[_id]['video']['hq'].add(hq)
306 |                         if video_thumb:
307 |                             if url := _media.get('media_url_https', ''):
308 |                                 media[_id]['video']['thumb'].add(url)
309 |                         if photos:
310 |                             if (url := _media.get('media_url_https', '')) and "_video_thumb" not in url:
311 |                                 if hq_img_variant:
312 |                                     url = f'{url}?name=orig'
313 |                                 media[_id]['img'].add(url)
314 |                     if cards:
315 |                         if card := root.get('card', {}).get('legacy', {}):
316 |                             media[_id]['card'].extend(card.get('binding_values', []))
317 |         if metadata_out:
318 |             media = set2list(media)
319 |             metadata_out = Path(metadata_out)
320 |             metadata_out.parent.mkdir(parents=True, exist_ok=True)  # if user specifies subdir
321 |             metadata_out.write_bytes(orjson.dumps(media))
322 | 
323 |         res = []
324 |         for k, v in media.items():
325 |             tmp = []
326 |             if photos:
327 |                 tmp.extend(v['img'])
328 |             if videos:
329 |                 tmp.extend(v['video']['hq'])
330 |             if video_thumb:
331 |                 tmp.extend(v['video']['thumb'])
332 |             if cards:
333 |                 tmp.extend(parse_card_media(v['card']))
334 |             res.extend([(k, m) for m in tmp])
335 |         asyncio.run(process(download(res, out)))
336 |         return media
337 | 
338 |     def trends(self, utc: list[str] = None) -> dict:
339 |         """
340 |         Get trends for all UTC offsets
341 | 
342 |         @param utc: optional list of specific UTC offsets
343 |         @return: dict of trends
344 |         """
345 | 
346 |         async def get_trends(client: AsyncClient, offset: str, url: str):
347 |             try:
348 |                 client.headers['x-twitter-utcoffset'] = offset
349 |                 r = await client.get(url)
350 |                 trends = find_key(r.json(), 'item')
351 |                 return {t['content']['trend']['name']: t for t in trends}
352 |             except Exception as e:
353 |                 if self.debug:
354 |                     self.logger.error(f'[{RED}error{RESET}] Failed to get trends\n{e}')
355 | 
356 |         async def process():
357 |             url = set_qs('https://twitter.com/i/api/2/guide.json', trending_params)
358 |             offsets = utc or ["-1200", "-1100", "-1000", "-0900", "-0800", "-0700", "-0600", "-0500", "-0400", "-0300",
359 |                               "-0200", "-0100", "+0000", "+0100", "+0200", "+0300", "+0400", "+0500", "+0600", "+0700",
360 |                               "+0800", "+0900", "+1000", "+1100", "+1200", "+1300", "+1400"]
361 |             async with AsyncClient(headers=get_headers(self.session)) as client:
362 |                 tasks = (get_trends(client, o, url) for o in offsets)
363 |                 if self.pbar:
364 |                     return await tqdm_asyncio.gather(*tasks, desc='Getting trends')
365 |                 return await asyncio.gather(*tasks)
366 | 
367 |         trends = asyncio.run(process())
368 |         out = self.out / 'raw' / 'trends'
369 |         out.mkdir(parents=True, exist_ok=True)
370 |         (out / f'{time.time_ns()}.json').write_text(orjson.dumps(
371 |             {k: v for d in trends for k, v in d.items()},
372 |             option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS).decode(), encoding='utf-8')
373 |         return trends
374 | 
375 |     def spaces(self, *, rooms: list[str] = None, search: list[dict] = None, audio: bool = False, chat: bool = False,
376 |                **kwargs) -> list[dict]:
377 |         """
378 |         Get Twitter spaces data
379 | 
380 |         - Get data for specific rooms or search for rooms.
381 |         - Get audio and/or chat data for rooms.
382 | 
383 |         @param rooms: list of room ids
384 |         @param search: list of dicts containing search parameters
385 |         @param audio: flag to include audio data
386 |         @param chat: flag to include chat data
387 |         @param kwargs: optional keyword arguments
388 |         @return: list of spaces data
389 |         """
390 |         if rooms:
391 |             spaces = self._run(Operation.AudioSpaceById, rooms, **kwargs)
392 |         else:
393 |             res = self._run(Operation.AudioSpaceSearch, search, **kwargs)
394 |             search_results = set(find_key(res, 'rest_id'))
395 |             spaces = self._run(Operation.AudioSpaceById, search_results, **kwargs)
396 |         if audio or chat:
397 |             return self._get_space_data(spaces, audio, chat)
398 |         return spaces
399 | 
400 |     def _get_space_data(self, spaces: list[dict], audio=True, chat=True):
401 |         streams = self._check_streams(spaces)
402 |         chat_data = None
403 |         if chat:
404 |             temp = []  # get necessary keys instead of passing large dicts
405 |             for stream in filter(lambda x: x['stream'], streams):
406 |                 meta = stream['space']['data']['audioSpace']['metadata']
407 |                 if meta['state'] not in {SpaceState.Running, SpaceState.NotStarted}:
408 |                     temp.append({
409 |                         'rest_id': meta['rest_id'],
410 |                         'chat_token': stream['stream']['chatToken'],
411 |                         'media_key': meta['media_key'],
412 |                         'state': meta['state'],
413 |                     })
414 |             chat_data = self._get_chat_data(temp)
415 |         if audio:
416 |             temp = []
417 |             for stream in streams:
418 |                 if stream.get('stream'):
419 |                     chunks = self._get_chunks(stream['stream']['source']['location'])
420 |                     temp.append({
421 |                         'rest_id': stream['space']['data']['audioSpace']['metadata']['rest_id'],
422 |                         'chunks': chunks,
423 |                     })
424 |             self._download_audio(temp)
425 |         return chat_data
426 | 
427 |     async def _get_stream(self, client: AsyncClient, media_key: str) -> dict | None:
428 |         params = {
429 |             'client': 'web',
430 |             'use_syndication_guest_id': 'false',
431 |             'cookie_set_host': 'twitter.com',
432 |         }
433 |         url = f'https://twitter.com/i/api/1.1/live_video_stream/status/{media_key}'
434 |         try:
435 |             r = await client.get(url, params=params)
436 |             return r.json()
437 |         except Exception as e:
438 |             if self.debug:
439 |                 self.logger.error(f'stream not available for playback\n{e}')
440 | 
441 |     async def _init_chat(self, client: AsyncClient, chat_token: str) -> dict:
442 |         payload = {'chat_token': chat_token}  # stream['chatToken']
443 |         url = 'https://proxsee.pscp.tv/api/v2/accessChatPublic'
444 |         r = await client.post(url, json=payload)
445 |         return r.json()
446 | 
447 |     async def _get_chat(self, client: AsyncClient, endpoint: str, access_token: str, cursor: str = '') -> list[dict]:
448 |         payload = {
449 |             'access_token': access_token,
450 |             'cursor': cursor,
451 |             'limit': 1000,  # or 0
452 |             'since': None,
453 |             'quick_get': True,
454 |         }
455 |         url = f"{endpoint}/chatapi/v1/history"
456 |         r = await client.post(url, json=payload)
457 |         data = r.json()
458 |         res = [data]
459 |         while cursor := data.get('cursor'):
460 |             try:
461 |                 r = await client.post(url, json=payload | {'cursor': cursor})
462 |                 if r.status_code == 503:
463 |                     # not our fault, service error, something went wrong with the stream
464 |                     break
465 |                 data = r.json()
466 |                 res.append(data)
467 |             except ReadTimeout as e:
468 |                 if self.debug:
469 |                     self.logger.debug(f'End of chat data\n{e}')
470 |                 break
471 | 
472 |         parsed = []
473 |         for r in res:
474 |             messages = r.get('messages', [])
475 |             for msg in messages:
476 |                 try:
477 |                     msg['payload'] = orjson.loads(msg.get('payload', '{}'))
478 |                     msg['payload']['body'] = orjson.loads(msg['payload'].get('body'))
479 |                 except Exception as e:
480 |                     if self.debug:
481 |                         self.logger.error(f'Failed to parse chat message\n{e}')
482 |             parsed.extend(messages)
483 |         return parsed
484 | 
485 |     def _get_chunks(self, location: str) -> list[str]:
486 |         try:
487 |             url = URL(location)
488 |             stream_type = url.params.get('type')
489 |             r = self.session.get(
490 |                 url=location,
491 |                 params={'type': stream_type},
492 |                 headers={'authority': url.host}
493 |             )
494 |             # don't need an m3u8 parser
495 |             chunks = re.findall('\n(chunk_.*)\n', r.text, flags=re.I)
496 |             url = '/'.join(location.split('/')[:-1])
497 |             return [f'{url}/{chunk}' for chunk in chunks]
498 |         except Exception as e:
499 |             if self.debug:
500 |                 self.logger.error(f'Failed to get chunks\n{e}')
501 | 
502 |     def _get_chat_data(self, keys: list[dict]) -> list[dict]:
503 |         async def get(c: AsyncClient, key: dict) -> dict:
504 |             info = await self._init_chat(c, key['chat_token'])
505 |             chat = await self._get_chat(c, info['endpoint'], info['access_token'])
506 |             if self.save:
507 |                 (self.out / 'raw' / f"chat_{key['rest_id']}.json").write_bytes(orjson.dumps(chat))
508 |             return {
509 |                 'space': key['rest_id'],
510 |                 'chat': chat,
511 |                 'info': info,
512 |             }
513 | 
514 |         async def process():
515 |             (self.out / 'raw').mkdir(parents=True, exist_ok=True)
516 |             limits = Limits(max_connections=100, max_keepalive_connections=10)
517 |             headers = self.session.headers if self.guest else get_headers(self.session)
518 |             cookies = self.session.cookies
519 |             async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c:
520 |                 tasks = (get(c, key) for key in keys)
521 |                 if self.pbar:
522 |                     return await tqdm_asyncio.gather(*tasks, desc='Downloading chat data')
523 |                 return await asyncio.gather(*tasks)
524 | 
525 |         return asyncio.run(process())
526 | 
527 |     def _download_audio(self, data: list[dict]) -> None:
528 |         async def get(s: AsyncClient, chunk: str, rest_id: str) -> tuple:
529 |             r = await s.get(chunk)
530 |             return rest_id, r
531 | 
532 |         async def process(data: list[dict]) -> list:
533 |             limits = Limits(max_connections=100, max_keepalive_connections=10)
534 |             headers = self.session.headers if self.guest else get_headers(self.session)
535 |             cookies = self.session.cookies
536 |             async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c:
537 |                 tasks = []
538 |                 for d in data:
539 |                     tasks.extend([get(c, chunk, d['rest_id']) for chunk in d['chunks']])
540 |                 if self.pbar:
541 |                     return await tqdm_asyncio.gather(*tasks, desc='Downloading audio')
542 |                 return await asyncio.gather(*tasks)
543 | 
544 |         chunks = asyncio.run(process(data))
545 |         streams = {}
546 |         [streams.setdefault(_id, []).append(chunk) for _id, chunk in chunks]
547 |         # ensure chunks are in correct order
548 |         for k, v in streams.items():
549 |             streams[k] = sorted(v, key=lambda x: int(re.findall('_(\d+)_\w\.aac$', x.url.path)[0]))
550 |         out = self.out / 'audio'
551 |         out.mkdir(parents=True, exist_ok=True)
552 |         for space_id, chunks in streams.items():
553 |             # 1hr ~= 50mb
554 |             with open(out / f'{space_id}.aac', 'wb') as fp:
555 |                 [fp.write(c.content) for c in chunks]
556 | 
557 |     def _check_streams(self, keys: list[dict]) -> list[dict]:
558 |         async def get(c: AsyncClient, space: dict) -> dict:
559 |             media_key = space['data']['audioSpace']['metadata']['media_key']
560 |             stream = await self._get_stream(c, media_key)
561 |             return {'space': space, 'stream': stream}
562 | 
563 |         async def process():
564 |             limits = Limits(max_connections=100, max_keepalive_connections=10)
565 |             headers = self.session.headers if self.guest else get_headers(self.session)
566 |             cookies = self.session.cookies
567 |             async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c:
568 |                 return await asyncio.gather(*(get(c, key) for key in keys))
569 | 
570 |         return asyncio.run(process())
571 | 
572 |     def _run(self, operation: tuple[dict, str, str], queries: set | list[int | str | list | dict], **kwargs):
573 |         keys, qid, name = operation
574 |         # stay within rate-limits
575 |         if (l := len(queries)) > MAX_ENDPOINT_LIMIT:
576 |             if self.debug:
577 |                 self.logger.warning(f'Got {l} queries, truncating to first 500.')
578 |             queries = list(queries)[:MAX_ENDPOINT_LIMIT]
579 | 
580 |         if all(isinstance(q, dict) for q in queries):
581 |             data = asyncio.run(self._process(operation, list(queries), **kwargs))
582 |             return get_json(data, **kwargs)
583 | 
584 |         # queries are of type set | list[int|str], need to convert to list[dict]
585 |         _queries = [{k: q} for q in queries for k, v in keys.items()]
586 |         res = asyncio.run(self._process(operation, _queries, **kwargs))
587 |         data = get_json(res, **kwargs)
588 |         return data.pop() if kwargs.get('cursor') else flatten(data)
589 | 
590 |     async def _query(self, client: AsyncClient, operation: tuple, **kwargs) -> Response:
591 |         keys, qid, name = operation
592 |         params = {
593 |             'variables': Operation.default_variables | keys | kwargs,
594 |             'features': Operation.default_features,
595 |         }
596 |         r = await client.get(f'https://twitter.com/i/api/graphql/{qid}/{name}', params=build_params(params))
597 | 
598 |         try:
599 |             self.rate_limits[name] = {k: int(v) for k, v in r.headers.items() if 'rate-limit' in k}
600 |         except Exception as e:
601 |             self.logger.debug(f'{e}')
602 | 
603 |         if self.debug:
604 |             log(self.logger, self.debug, r)
605 |         if self.save:
606 |             await save_json(r, self.out, name, **kwargs)
607 |         return r
608 | 
609 |     async def _process(self, operation: tuple, queries: list[dict], **kwargs):
610 |         headers = self.session.headers if self.guest else get_headers(self.session)
611 |         cookies = self.session.cookies
612 |         async with AsyncClient(limits=Limits(max_connections=MAX_ENDPOINT_LIMIT), headers=headers, cookies=cookies, timeout=20) as c:
613 |             tasks = (self._paginate(c, operation, **q, **kwargs) for q in queries)
614 |             if self.pbar:
615 |                 return await tqdm_asyncio.gather(*tasks, desc=operation[-1])
616 |             return await asyncio.gather(*tasks)
617 | 
618 |     async def _paginate(self, client: AsyncClient, operation: tuple, **kwargs):
619 |         limit = kwargs.pop('limit', math.inf)
620 |         cursor = kwargs.pop('cursor', None)
621 |         is_resuming = False
622 |         dups = 0
623 |         DUP_LIMIT = 3
624 |         if cursor:
625 |             is_resuming = True
626 |             res = []
627 |             ids = set()
628 |         else:
629 |             try:
630 |                 r = await self._query(client, operation, **kwargs)
631 |                 initial_data = r.json()
632 |                 res = [r]
633 |                 ids = {x for x in find_key(initial_data, 'rest_id') if x[0].isnumeric()}
634 | 
635 |                 cursor = get_cursor(initial_data)
636 |             except Exception as e:
637 |                 if self.debug:
638 |                     self.logger.error(f'Failed to get initial pagination data: {e}')
639 |                 return
640 |         while (dups < DUP_LIMIT) and cursor:
641 |             prev_len = len(ids)
642 |             if prev_len >= limit:
643 |                 break
644 |             try:
645 |                 r = await self._query(client, operation, cursor=cursor, **kwargs)
646 |                 data = r.json()
647 |             except Exception as e:
648 |                 if self.debug:
649 |                     self.logger.error(f'Failed to get pagination data\n{e}')
650 |                 return
651 |             cursor = get_cursor(data)
652 |             ids |= {x for x in find_key(data, 'rest_id') if x[0].isnumeric()}
653 | 
654 |             if self.debug:
655 |                 self.logger.debug(f'Unique results: {len(ids)}\tcursor: {cursor}')
656 |             if prev_len == len(ids):
657 |                 dups += 1
658 |             res.append(r)
659 |         if is_resuming:
660 |             return res, cursor
661 |         return res
662 | 
663 |     async def _space_listener(self, chat: dict, frequency: int):
664 |         rand_color = lambda: random.choice([RED, GREEN, RESET, BLUE, CYAN, MAGENTA, YELLOW])
665 |         uri = f"wss://{URL(chat['endpoint']).host}/chatapi/v1/chatnow"
666 |         with open('chatlog.jsonl', 'ab') as fp:
667 |             async with websockets.connect(uri) as ws:
668 |                 await ws.send(orjson.dumps({
669 |                     "payload": orjson.dumps({"access_token": chat['access_token']}).decode(),
670 |                     "kind": 3
671 |                 }).decode())
672 |                 await ws.send(orjson.dumps({
673 |                     "payload": orjson.dumps({
674 |                         "body": orjson.dumps({
675 |                             "room": chat['room_id']
676 |                         }).decode(),
677 |                         "kind": 1
678 |                     }).decode(),
679 |                     "kind": 2
680 |                 }).decode())
681 | 
682 |                 prev_message = ''
683 |                 prev_user = ''
684 |                 while True:
685 |                     msg = await ws.recv()
686 |                     temp = orjson.loads(msg)
687 |                     kind = temp.get('kind')
688 |                     if kind == 1:
689 |                         signature = temp.get('signature')
690 |                         payload = orjson.loads(temp.get('payload'))
691 |                         payload['body'] = orjson.loads(payload.get('body'))
692 |                         res = {
693 |                             'kind': kind,
694 |                             'payload': payload,
695 |                             'signature': signature,
696 |                         }
697 |                         fp.write(orjson.dumps(res) + b'\n')
698 |                         body = payload['body']
699 |                         message = body.get('body')
700 |                         user = body.get('username')
701 |                         # user_id = body.get('user_id')
702 |                         final = body.get('final')
703 | 
704 |                         if frequency == 1:
705 |                             if final:
706 |                                 if user != prev_user:
707 |                                     print()
708 |                                     print(f"({rand_color()}{user}{RESET})")
709 |                                     prev_user = user
710 |                                 # print(message, end=' ')
711 |                                 print(message)
712 | 
713 |                         # dirty
714 |                         if frequency == 2:
715 |                             if user and (not final):
716 |                                 if user != prev_user:
717 |                                     print()
718 |                                     print(f"({rand_color()}{user}{RESET})")
719 |                                     prev_user = user
720 |                                 new_message = re.sub(f'^({prev_message})', '', message, flags=re.I).strip()
721 |                                 if len(new_message) < 100:
722 |                                     print(new_message, end=' ')
723 |                                     prev_message = message
724 | 
725 |     async def _get_live_chats(self, client: Client, spaces: list[dict]):
726 |         async def get(c: AsyncClient, space: dict) -> list[dict]:
727 |             media_key = space['data']['audioSpace']['metadata']['media_key']
728 |             r = await c.get(
729 |                 url=f'https://twitter.com/i/api/1.1/live_video_stream/status/{media_key}',
730 |                 params={
731 |                     'client': 'web',
732 |                     'use_syndication_guest_id': 'false',
733 |                     'cookie_set_host': 'twitter.com',
734 |                 })
735 |             r = await c.post(
736 |                 url='https://proxsee.pscp.tv/api/v2/accessChatPublic',
737 |                 json={'chat_token': r.json()['chatToken']}
738 |             )
739 |             return r.json()
740 | 
741 |         limits = Limits(max_connections=100)
742 |         async with AsyncClient(headers=client.headers, limits=limits, timeout=30) as c:
743 |             tasks = (get(c, _id) for _id in spaces)
744 |             if self.pbar:
745 |                 return await tqdm_asyncio.gather(*tasks, desc='Getting live transcripts')
746 |             return await asyncio.gather(*tasks)
747 | 
748 |     def space_live_transcript(self, room: str, frequency: int = 1):
749 |         """
750 |         Log live transcript of a space
751 | 
752 |         @param room: room id
753 |         @param frequency: granularity of transcript. 1 for real-time, 2 for post-processed or "finalized" transcript
754 |         @return: None
755 |         """
756 | 
757 |         async def get(spaces: list[dict]):
758 |             client = init_session()
759 |             chats = await self._get_live_chats(client, spaces)
760 |             await asyncio.gather(*(self._space_listener(c, frequency) for c in chats))
761 | 
762 |         spaces = self.spaces(rooms=[room])
763 |         asyncio.run(get(spaces))
764 | 
765 |     def spaces_live(self, rooms: list[str]):
766 |         """
767 |         Capture live audio stream from spaces
768 | 
769 |         Limited to 500 rooms per IP, as defined by twitter's rate limits.
770 | 
771 |         @param rooms: list of room ids
772 |         @return: None
773 |         """
774 |         chunk_idx = lambda chunk: re.findall('_(\d+)_\w\.aac', chunk)[0]
775 |         sort_chunks = lambda chunks: sorted(chunks, key=lambda x: int(chunk_idx(x)))
776 |         parse_chunks = lambda txt: re.findall('\n(chunk_.*)\n', txt, flags=re.I)
777 | 
778 |         async def get_m3u8(client: AsyncClient, space: dict) -> dict:
779 |             try:
780 |                 media_key = space['data']['audioSpace']['metadata']['media_key']
781 |                 r = await client.get(
782 |                     url=f'https://twitter.com/i/api/1.1/live_video_stream/status/{media_key}',
783 |                     params={'client': 'web', 'use_syndication_guest_id': 'false', 'cookie_set_host': 'twitter.com'}
784 |                 )
785 |                 data = r.json()
786 |                 room = data['shareUrl'].split('/')[-1]
787 |                 return {"url": data['source']['location'], "room": room}
788 |             except Exception as e:
789 |                 room = space['data']['audioSpace']['metadata']['rest_id']
790 |                 if self.debug:
791 |                     self.logger.error(f'Failed to get stream info for https://twitter.com/i/spaces/{room}\n{e}')
792 | 
793 |         async def get_chunks(client: AsyncClient, url: str) -> list[str]:
794 |             try:
795 |                 url = URL(url)
796 |                 r = await client.get(
797 |                     url=url,
798 |                     params={'type': url.params.get('type')},
799 |                     headers={'authority': url.host}
800 |                 )
801 |                 base = '/'.join(str(url).split('/')[:-1])
802 |                 return [f'{base}/{c}' for c in parse_chunks(r.text)]
803 |             except Exception as e:
804 |                 if self.debug:
805 |                     self.logger.error(f'Failed to get chunks\n{e}')
806 | 
807 |         async def poll_space(client: AsyncClient, space: dict) -> dict | None:
808 |             curr = 0
809 |             lim = 10
810 |             all_chunks = set()
811 |             playlist = await get_m3u8(client, space)
812 |             if not playlist: return
813 |             chunks = await get_chunks(client, playlist['url'])
814 |             if not chunks: return
815 |             out = self.out / 'live'
816 |             out.mkdir(parents=True, exist_ok=True)
817 |             async with aiofiles.open(out / f'{playlist["room"]}.aac', 'wb') as fp:
818 |                 while curr < lim:
819 |                     chunks = await get_chunks(client, playlist['url'])
820 |                     if not chunks:
821 |                         return {'space': space, 'chunks': sort_chunks(all_chunks)}
822 |                     new_chunks = set(chunks) - all_chunks
823 |                     all_chunks |= new_chunks
824 |                     for c in sort_chunks(new_chunks):
825 |                         try:
826 |                             if self.debug:
827 |                                 self.logger.debug(f"write: chunk [{chunk_idx(c)}]\t{c}")
828 |                             r = await client.get(c)
829 |                             await fp.write(r.content)
830 |                         except Exception as e:
831 |                             if self.debug:
832 |                                 self.logger.error(f'Failed to write chunk {c}\n{e}')
833 |                     curr = 0 if new_chunks else curr + 1
834 |                     # wait for new chunks. dynamic playlist is updated every 2-3 seconds
835 |                     await asyncio.sleep(random.random() + 1.5)
836 |             return {'space': space, 'chunks': sort_chunks(all_chunks)}
837 | 
838 |         async def process(spaces: list[dict]):
839 |             limits = Limits(max_connections=100)
840 |             headers, cookies = self.session.headers, self.session.cookies
841 |             async with AsyncClient(limits=limits, headers=headers, cookies=cookies, timeout=20) as c:
842 |                 return await asyncio.gather(*(poll_space(c, space) for space in spaces))
843 | 
844 |         spaces = self.spaces(rooms=rooms)
845 |         return asyncio.run(process(spaces))
846 | 
847 |     def _init_logger(self, **kwargs) -> Logger:
848 |         if kwargs.get('debug'):
849 |             cfg = kwargs.get('log_config')
850 |             logging.config.dictConfig(cfg or LOG_CONFIG)
851 | 
852 |             # only support one logger
853 |             logger_name = list(LOG_CONFIG['loggers'].keys())[0]
854 | 
855 |             # set level of all other loggers to ERROR
856 |             for name in logging.root.manager.loggerDict:
857 |                 if name != logger_name:
858 |                     logging.getLogger(name).setLevel(logging.ERROR)
859 | 
860 |             return logging.getLogger(logger_name)
861 | 
862 |     def _validate_session(self, *args, **kwargs):
863 |         email, username, password, session = args
864 | 
865 |         # validate credentials
866 |         if all((email, username, password)):
867 |             return login(email, username, password, **kwargs)
868 | 
869 |         # invalid credentials, try validating session
870 |         if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}):
871 |             return session
872 | 
873 |         # invalid credentials and session
874 |         cookies = kwargs.get('cookies')
875 | 
876 |         # try validating cookies dict
877 |         if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}):
878 |             _session = Client(cookies=cookies, follow_redirects=True)
879 |             _session.headers.update(get_headers(_session))
880 |             return _session
881 | 
882 |         # try validating cookies from file
883 |         if isinstance(cookies, str):
884 |             _session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True)
885 |             _session.headers.update(get_headers(_session))
886 |             return _session
887 | 
888 |         # no session, credentials, or cookies provided. use guest session.
889 |         if self.debug:
890 |             self.logger.warning(f'{RED}This is a guest session, some endpoints cannot be accessed.{RESET}\n')
891 |         self.guest = True
892 |         return session
893 | 
894 |     @property
895 |     def id(self) -> int:
896 |         """ Get User ID """
897 |         return int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0])
898 | 
899 |     def save_cookies(self, fname: str = None):
900 |         """ Save cookies to file """
901 |         cookies = self.session.cookies
902 |         Path(f'{fname or cookies.get("username")}.cookies').write_bytes(orjson.dumps(dict(cookies)))
903 | 
904 |     def _v1_rate_limits(self):
905 |         return self.session.get('https://api.twitter.com/1.1/application/rate_limit_status.json').json()
906 | 


--------------------------------------------------------------------------------
/twitter/search.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import logging.config
  3 | import math
  4 | import platform
  5 | import random
  6 | import re
  7 | import time
  8 | from logging import Logger
  9 | from pathlib import Path
 10 | 
 11 | import orjson
 12 | from httpx import AsyncClient, Client
 13 | 
 14 | from .constants import *
 15 | from .login import login
 16 | from .util import get_headers, find_key, build_params
 17 | 
 18 | reset = '\x1b[0m'
 19 | colors = [f'\x1b[{i}m' for i in range(31, 37)]
 20 | 
 21 | try:
 22 |     if get_ipython().__class__.__name__ == 'ZMQInteractiveShell':
 23 |         import nest_asyncio
 24 | 
 25 |         nest_asyncio.apply()
 26 | except:
 27 |     ...
 28 | 
 29 | if platform.system() != 'Windows':
 30 |     try:
 31 |         import uvloop
 32 | 
 33 |         uvloop.install()
 34 |     except ImportError as e:
 35 |         ...
 36 | 
 37 | 
 38 | class Search:
 39 |     def __init__(self, email: str = None, username: str = None, password: str = None, session: Client = None, **kwargs):
 40 |         self.save = kwargs.get('save', True)
 41 |         self.debug = kwargs.get('debug', 0)
 42 |         self.logger = self._init_logger(**kwargs)
 43 |         self.session = self._validate_session(email, username, password, session, **kwargs)
 44 | 
 45 |     def run(self, queries: list[dict], limit: int = math.inf, out: str = 'data/search_results', **kwargs):
 46 |         out = Path(out)
 47 |         out.mkdir(parents=True, exist_ok=True)
 48 |         return asyncio.run(self.process(queries, limit, out, **kwargs))
 49 | 
 50 |     async def process(self, queries: list[dict], limit: int, out: Path, **kwargs) -> list:
 51 |         async with AsyncClient(headers=get_headers(self.session)) as s:
 52 |             return await asyncio.gather(*(self.paginate(s, q, limit, out, **kwargs) for q in queries))
 53 | 
 54 |     async def paginate(self, client: AsyncClient, query: dict, limit: int, out: Path, **kwargs) -> list[dict]:
 55 |         params = {
 56 |             'variables': {
 57 |                 'count': 20,
 58 |                 'querySource': 'typed_query',
 59 |                 'rawQuery': query['query'],
 60 |                 'product': query['category']
 61 |             },
 62 |             'features': Operation.default_features,
 63 |             'fieldToggles': {'withArticleRichContentState': False},
 64 |         }
 65 | 
 66 |         res = []
 67 |         cursor = ''
 68 |         total = set()
 69 |         while True:
 70 |             if cursor:
 71 |                 params['variables']['cursor'] = cursor
 72 |             data, entries, cursor = await self.backoff(lambda: self.get(client, params), **kwargs)
 73 |             res.extend(entries)
 74 |             if len(entries) <= 2 or len(total) >= limit:  # just cursors
 75 |                 if self.debug:
 76 |                     self.logger.debug(f'[{GREEN}success{RESET}] Returned {len(total)} search results for {query["query"]}')
 77 |                 return res
 78 |             total |= set(find_key(entries, 'entryId'))
 79 |             if self.debug:
 80 |                 self.logger.debug(f'{query["query"]}')
 81 |             if self.save:
 82 |                 (out / f'{time.time_ns()}.json').write_bytes(orjson.dumps(entries))
 83 | 
 84 |     async def get(self, client: AsyncClient, params: dict) -> tuple:
 85 |         _, qid, name = Operation.SearchTimeline
 86 |         r = await client.get(f'https://twitter.com/i/api/graphql/{qid}/{name}', params=build_params(params))
 87 |         data = r.json()
 88 |         cursor = self.get_cursor(data)
 89 |         entries = [y for x in find_key(data, 'entries') for y in x if re.search(r'^(tweet|user)-', y['entryId'])]
 90 |         # add on query info
 91 |         for e in entries:
 92 |             e['query'] = params['variables']['rawQuery']
 93 |         return data, entries, cursor
 94 | 
 95 |     def get_cursor(self, data: list[dict]):
 96 |         for e in find_key(data, 'content'):
 97 |             if e.get('cursorType') == 'Bottom':
 98 |                 return e['value']
 99 | 
100 |     async def backoff(self, fn, **kwargs):
101 |         retries = kwargs.get('retries', 3)
102 |         for i in range(retries + 1):
103 |             try:
104 |                 data, entries, cursor = await fn()
105 |                 if errors := data.get('errors'):
106 |                     for e in errors:
107 |                         if self.debug:
108 |                             self.logger.warning(f'{YELLOW}{e.get("message")}{RESET}')
109 |                         return [], [], ''
110 |                 ids = set(find_key(data, 'entryId'))
111 |                 if len(ids) >= 2:
112 |                     return data, entries, cursor
113 |             except Exception as e:
114 |                 if i == retries:
115 |                     if self.debug:
116 |                         self.logger.debug(f'Max retries exceeded\n{e}')
117 |                     return
118 |                 t = 2 ** i + random.random()
119 |                 if self.debug:
120 |                     self.logger.debug(f'Retrying in {f"{t:.2f}"} seconds\t\t{e}')
121 |                 await asyncio.sleep(t)
122 | 
123 |     def _init_logger(self, **kwargs) -> Logger:
124 |         if kwargs.get('debug'):
125 |             cfg = kwargs.get('log_config')
126 |             logging.config.dictConfig(cfg or LOG_CONFIG)
127 | 
128 |             # only support one logger
129 |             logger_name = list(LOG_CONFIG['loggers'].keys())[0]
130 | 
131 |             # set level of all other loggers to ERROR
132 |             for name in logging.root.manager.loggerDict:
133 |                 if name != logger_name:
134 |                     logging.getLogger(name).setLevel(logging.ERROR)
135 | 
136 |             return logging.getLogger(logger_name)
137 | 
138 |     @staticmethod
139 |     def _validate_session(*args, **kwargs):
140 |         email, username, password, session = args
141 | 
142 |         # validate credentials
143 |         if all((email, username, password)):
144 |             return login(email, username, password, **kwargs)
145 | 
146 |         # invalid credentials, try validating session
147 |         if session and all(session.cookies.get(c) for c in {'ct0', 'auth_token'}):
148 |             return session
149 | 
150 |         # invalid credentials and session
151 |         cookies = kwargs.get('cookies')
152 | 
153 |         # try validating cookies dict
154 |         if isinstance(cookies, dict) and all(cookies.get(c) for c in {'ct0', 'auth_token'}):
155 |             _session = Client(cookies=cookies, follow_redirects=True)
156 |             _session.headers.update(get_headers(_session))
157 |             return _session
158 | 
159 |         # try validating cookies from file
160 |         if isinstance(cookies, str):
161 |             _session = Client(cookies=orjson.loads(Path(cookies).read_bytes()), follow_redirects=True)
162 |             _session.headers.update(get_headers(_session))
163 |             return _session
164 | 
165 |         raise Exception('Session not authenticated. '
166 |                         'Please use an authenticated session or remove the `session` argument and try again.')
167 | 
168 |     @property
169 |     def id(self) -> int:
170 |         """ Get User ID """
171 |         return int(re.findall('"u=(\d+)"', self.session.cookies.get('twid'))[0])
172 | 
173 |     def save_cookies(self, fname: str = None):
174 |         """ Save cookies to file """
175 |         cookies = self.session.cookies
176 |         Path(f'{fname or cookies.get("username")}.cookies').write_bytes(orjson.dumps(dict(cookies)))
177 | 


--------------------------------------------------------------------------------
/twitter/util.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import re
  3 | import time
  4 | from logging import Logger
  5 | from pathlib import Path
  6 | from urllib.parse import urlsplit, urlencode, urlunsplit, parse_qs, quote
  7 | 
  8 | import aiofiles
  9 | import orjson
 10 | from aiofiles.os import makedirs
 11 | from httpx import Response, Client
 12 | from textwrap import dedent
 13 | 
 14 | from .constants import GREEN, MAGENTA, RED, RESET, MAX_GQL_CHAR_LIMIT, USER_AGENTS, ORANGE
 15 | 
 16 | 
 17 | def init_session():
 18 |     client = Client(headers={
 19 |         'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs=1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
 20 |         'user-agent': random.choice(USER_AGENTS),
 21 |     }, follow_redirects=True)
 22 |     r = client.post('https://api.twitter.com/1.1/guest/activate.json').json()
 23 |     client.headers.update({
 24 |         'content-type': 'application/json',
 25 |         'x-guest-token': r['guest_token'],
 26 |         'x-twitter-active-user': 'yes',
 27 |     })
 28 |     return client
 29 | 
 30 | 
 31 | def batch_ids(ids: list[int | str], char_limit: int = MAX_GQL_CHAR_LIMIT) -> list[list]:
 32 |     """To avoid 431 errors"""
 33 |     res, batch, length = [], [], 0
 34 |     for x in map(str, ids):
 35 |         if length + len(x) > char_limit:
 36 |             res.append(batch)
 37 |             batch, length = [], 0
 38 |         batch.append(x)
 39 |         length += len(x)
 40 |     res.append(batch) if batch else ...
 41 |     # print(f'Batched {sum(map(len, res))} ids into {len(res)} requests')
 42 |     return res
 43 | 
 44 | 
 45 | def build_params(params: dict) -> dict:
 46 |     return {k: orjson.dumps(v).decode() for k, v in params.items()}
 47 | 
 48 | 
 49 | async def save_json(r: Response, path: str | Path, name: str, **kwargs):
 50 |     try:
 51 |         data = r.json()
 52 |         kwargs.pop('cursor', None)
 53 | 
 54 |         # special case: only 2 endpoints have batch requests as of Dec 2023
 55 |         if name in {'TweetResultsByRestIds', 'UsersByRestIds'}:
 56 |             out = f'{path}/batch'
 57 |         else:
 58 |             out = f'{path}/{"_".join(map(str, kwargs.values()))}'
 59 |         await makedirs(out, exist_ok=True)
 60 |         async with aiofiles.open(f'{out}/{time.time_ns()}_{name}.json', 'wb') as fp:
 61 |             await fp.write(orjson.dumps(data))
 62 | 
 63 |     except Exception as e:
 64 |         print(f'Failed to save JSON data for {kwargs}\n{e}')
 65 | 
 66 | 
 67 | def flatten(seq: list | tuple) -> list:
 68 |     flat = []
 69 |     for e in seq:
 70 |         if isinstance(e, list | tuple):
 71 |             flat.extend(flatten(e))
 72 |         else:
 73 |             flat.append(e)
 74 |     return flat
 75 | 
 76 | 
 77 | def get_json(res: list[Response], **kwargs) -> list:
 78 |     cursor = kwargs.get('cursor')
 79 |     temp = res
 80 |     if any(isinstance(r, (list, tuple)) for r in res):
 81 |         temp = flatten(res)
 82 |     results = []
 83 |     for r in temp:
 84 |         try:
 85 |             data = r.json()
 86 |             if cursor:
 87 |                 results.append([data, cursor])
 88 |             else:
 89 |                 results.append(data)
 90 |         except Exception as e:
 91 |             print('Cannot parse JSON response', e)
 92 |             print(dedent(f'''{ORANGE}
 93 |             Checklist:
 94 |                 1. Log-in via the browser and confirm your account is not blocked, or has pending security challenges.
 95 |                 2. Copy the `ct0` and `auth_token` cookies from the browser.
 96 |                 3. Re-run your program using these new cookies.
 97 |             {RESET}'''))
 98 |     return results
 99 | 
100 | 
101 | def set_qs(url: str, qs: dict, update=False, **kwargs) -> str:
102 |     *_, q, f = urlsplit(url)
103 |     return urlunsplit((*_, urlencode(qs | parse_qs(q) if update else qs, doseq=True, quote_via=quote,
104 |                                      safe=kwargs.get('safe', '')), f))
105 | 
106 | 
107 | def get_cursor(data: list | dict) -> str:
108 |     # inefficient, but need to deal with arbitrary schema
109 |     entries = find_key(data, 'entries')
110 |     if entries:
111 |         for entry in entries.pop():
112 |             entry_id = entry.get('entryId', '')
113 |             if ('cursor-bottom' in entry_id) or ('cursor-showmorethreads' in entry_id):
114 |                 content = entry['content']
115 |                 if itemContent := content.get('itemContent'):
116 |                     return itemContent['value']  # v2 cursor
117 |                 return content['value']  # v1 cursor
118 | 
119 | 
120 | def get_headers(session, **kwargs) -> dict:
121 |     """
122 |     Get the headers required for authenticated requests
123 |     """
124 |     cookies = session.cookies
125 |     # todo httpx cookie issues
126 |     try:
127 |         if session._init_with_cookies:
128 |             cookies.delete('ct0', domain='.twitter.com')
129 |     except:
130 |         ...
131 |     headers = kwargs | {
132 |         'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs=1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
133 |         'cookie': '; '.join(f'{k}={v}' for k, v in cookies.items()),
134 |         'referer': 'https://twitter.com/',
135 |         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36',
136 |         'x-csrf-token': cookies.get('ct0', ''),
137 |         'x-guest-token': cookies.get('guest_token', ''),
138 |         'x-twitter-auth-type': 'OAuth2Session' if cookies.get('auth_token') else '',
139 |         'x-twitter-active-user': 'yes',
140 |         'x-twitter-client-language': 'en',
141 |     }
142 |     return dict(sorted({k.lower(): v for k, v in headers.items()}.items()))
143 | 
144 | 
145 | def find_key(obj: any, key: str) -> list:
146 |     """
147 |     Find all values of a given key within a nested dict or list of dicts
148 | 
149 |     Most data of interest is nested, and sometimes defined by different schemas.
150 |     It is not worth our time to enumerate all absolute paths to a given key, then update
151 |     the paths in our parsing functions every time Twitter changes their API.
152 |     Instead, we recursively search for the key here, then run post-processing functions on the results.
153 | 
154 |     @param obj: dictionary or list of dictionaries
155 |     @param key: key to search for
156 |     @return: list of values
157 |     """
158 | 
159 |     def helper(obj: any, key: str, L: list) -> list:
160 |         if not obj:
161 |             return L
162 | 
163 |         if isinstance(obj, list):
164 |             for e in obj:
165 |                 L.extend(helper(e, key, []))
166 |             return L
167 | 
168 |         if isinstance(obj, dict) and obj.get(key):
169 |             L.append(obj[key])
170 | 
171 |         if isinstance(obj, dict) and obj:
172 |             for k in obj:
173 |                 L.extend(helper(obj[k], key, []))
174 |         return L
175 | 
176 |     return helper(obj, key, [])
177 | 
178 | 
179 | def log(logger: Logger, level: int, r: Response):
180 |     def stat(r, txt, data):
181 |         if level >= 1:
182 |             logger.debug(f'{r.url.path}')
183 |         if level >= 2:
184 |             logger.debug(f'{r.url}')
185 |         if level >= 3:
186 |             logger.debug(f'{txt}')
187 |         if level >= 4:
188 |             logger.debug(f'{data}')
189 | 
190 |         try:
191 |             limits = {k: v for k, v in r.headers.items() if 'x-rate-limit' in k}
192 |             current_time = int(time.time())
193 |             wait = int(r.headers.get('x-rate-limit-reset', current_time)) - current_time
194 |             remaining = limits.get('x-rate-limit-remaining')
195 |             limit = limits.get('x-rate-limit-limit')
196 |             logger.debug(f"remaining: {MAGENTA}{remaining}/{limit}{RESET} requests")
197 |             logger.debug(f'reset:     {MAGENTA}{(wait / 60):.2f}{RESET} minutes')
198 |         except Exception as e:
199 |             logger.error(f'Rate limit info unavailable: {e}')
200 | 
201 |     try:
202 |         status = r.status_code
203 |         txt, data, = r.text, r.json()
204 |         if 'json' in r.headers.get('content-type', ''):
205 |             if data.get('errors') and not find_key(data, 'instructions'):
206 |                 logger.error(f'[{RED}error{RESET}] {status} {data}')
207 |             else:
208 |                 logger.debug(fmt_status(status))
209 |                 stat(r, txt, data)
210 |         else:
211 |             logger.debug(fmt_status(status))
212 |             stat(r, txt, {})
213 |     except Exception as e:
214 |         logger.error(f'Failed to log: {e}')
215 | 
216 | 
217 | def fmt_status(status: int) -> str:
218 |     color = None
219 |     if 200 <= status < 300:
220 |         color = GREEN
221 |     elif 300 <= status < 400:
222 |         color = MAGENTA
223 |     elif 400 <= status < 600:
224 |         color = RED
225 |     return f'[{color}{status}{RESET}]'
226 | 
227 | 
228 | def get_code(cls, retries=5) -> str | None:
229 |     """ Get verification code from Proton Mail inbox """
230 | 
231 |     def poll_inbox():
232 |         inbox = cls.inbox()
233 |         for c in inbox.get('Conversations', []):
234 |             if c['Senders'][0]['Address'] in {'info@twitter.com', 'info@x.com'}:
235 |                 exprs = ['Your Twitter confirmation code is (.+)', '(.+) is your Twitter verification code']
236 |                 if temp := list(filter(None, (re.search(expr, c['Subject']) for expr in exprs))):
237 |                     return temp[0].group(1)
238 | 
239 |     for i in range(retries + 1):
240 |         if code := poll_inbox():
241 |             return code
242 |         if i == retries:
243 |             print(f'Max retries exceeded')
244 |             return
245 |         t = 2 ** i + random.random()
246 |         print(f'Retrying in {f"{t:.2f}"} seconds')
247 |         time.sleep(t)
248 | 
249 | 
250 | def parse_card_media(cards):
251 |     res = []
252 |     for c in cards:
253 |         img = c.get('value', {}).get('image_value', {})
254 |         if c.get('key') == 'photo_image_full_size_original':
255 |             url = img.get('url')
256 |             res.append([url, img.get('width', 0) * img.get('height', 0)])
257 |     return [t[0] for t in sorted(res, key=lambda x: -x[1])]
258 | 
259 | 
260 | def set2list(d):
261 |     if isinstance(d, dict):
262 |         return {k: set2list(v) for k, v in d.items()}
263 |     if isinstance(d, set):
264 |         return list(d)
265 |     return d
266 | 


--------------------------------------------------------------------------------