├── .gitignore ├── README.md ├── TwitterSpace.py ├── const.py.example ├── index.py ├── log.py ├── requirements.txt └── twspace.py /.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | !requirements.txt 4 | !const.py.example 5 | !index.py 6 | !TwitterSpace.py 7 | !twspace.py 8 | !log.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # auto-twitter-space 2 | ### Overview 3 | A script that tracks twitter spaces and can send it to a discord webhook. 4 | With twitter api v2 being paywalled, this script now uses Twitter's private API using guest tokens to find twitter spaces of the host and then the m3u8 url for the space is found and will have it printed on console and posted using a discord webhook. 5 | Optionally, it can also download the twitter space after it ends. 6 | 7 | ### Installation and Requirements 8 | This script requires a few non-standard modules all of which can be installed using the requirements text file. A requirements text file has been included and the command `pip3 install -r requirements.txt` (or pip) can be used to install the required dependencies(except [FFMPEG](https://ffmpeg.org/)) 9 | 10 | So far this has only been tested on Windows 11. 11 | 12 | ### How To Use 13 | Fill out applicable informations inside the `const.py`(if you haven't already renamed `const.py.example` to `const.py`, do so now) 14 | Optionally, obtain a discord webhook url and put it in `const.py` if you want Twitter Space notification to also be posted onto your discord channel. 15 | Optionally you can also specify whether to download the Twitter Space and/or the download location. After the download the files will optionally be posted and sent through a discord webhook. `twspace.py` can also be ran as a standalone script to manually download twitter spaces. 16 | 17 | Cookies such as `AUTH_TOKEN` and `CT0`(CSRF token) must be obtained and can be found via 18 | 19 | `browser's developer tool` > `Application` > `Storage` > `Cookies` 20 | 21 | and provide obtained values in `const.py`. Also, due to rate-limiting, adjust sleep to optimal value. 22 | 23 | With all the setting up out of the way this script can run by calling the main/index file `index.py` 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /TwitterSpace.py: -------------------------------------------------------------------------------- 1 | import dataclasses 2 | from datetime import datetime 3 | import re 4 | 5 | 6 | @dataclasses.dataclass 7 | class TwitterSpace: 8 | handle_id: str 9 | handle_name: str 10 | handle_image: str = None 11 | space_title: str = "Twitter Space" 12 | space_state: str = None 13 | space_creator_id: str = None 14 | space_creator_name: str = None 15 | space_participant_title: str = None # admin or speaker 16 | space_was_running: bool = False 17 | space_started_at: int = 0 18 | space_ended_at: int = 0 19 | space_url: str = None 20 | m3u8_url: str = None 21 | space_notified: bool = False 22 | space_downloaded: bool = False 23 | space_duration: float = 0 24 | rest_id: str = None 25 | media_key: str = None 26 | 27 | def get_strftime(self): 28 | # initial timestamp is in milliseconds and not seconds 29 | return datetime.fromtimestamp(self.space_started_at/1000).strftime("%Y%m%d") 30 | 31 | def get_m3u8_id(self): 32 | return re.search("(.*\/Transcoding\/v1\/hls\/(.*)(\/non_transcode.*))", self.m3u8_url).group(2) 33 | 34 | def get_server(self): 35 | reg_result = re.search("(https:\/\/)((?:[^-]*-){2})(.*)(\.pscp.*)", self.m3u8_url) 36 | # regex will return something like 'prod-fastly-' so remove the last dash 37 | deployment_server = reg_result.group(2)[:-1] 38 | periscope_server = reg_result.group(3) 39 | server = (deployment_server, periscope_server) 40 | return server 41 | 42 | def set_space_duration(self): 43 | self.space_duration = self.space_ended_at/1000.0 - self.space_started_at/1000.0 44 | 45 | # def set_server(self): 46 | # reg_result = re.search("(https:\/\/)((?:[^-]*-){2})(.*)(\.pscp.*)", self.m3u8_url) 47 | # # regex will return something like 'prod-fastly-' so remove the last dash 48 | # self.deployment_server = reg_result.group(2)[:-1] 49 | # self.periscope_server = reg_result.group(3) 50 | 51 | def set_space_details(self, space_details): 52 | self.handle_image = space_details['creator_results']['result']['legacy']['profile_image_url_https'] 53 | self.space_title = space_details.get('title', "") 54 | self.space_url = f"https://twitter.com/i/spaces/{self.rest_id}" 55 | self.space_started_at = int(space_details.get('started_at', 0)) 56 | self.space_state = space_details['state'] 57 | self.space_was_running = True 58 | self.space_ended_at = int(space_details.get('ended_at', 0)) 59 | 60 | def reset_default(self): 61 | self.handle_id: str = self.handle_id 62 | self.handle_name: str = self.handle_name 63 | self.handle_image: str = None 64 | self.space_title: str = None 65 | self.space_state: str = None 66 | self.space_creator_id: str = None 67 | self.space_creator_name: str = None 68 | self.space_participant_title: str = None 69 | self.space_was_running: bool = False 70 | self.space_started_at: int = 0 71 | self.space_ended_at: int = 0 72 | self.space_url: str = None 73 | self.m3u8_url: str = None 74 | self.space_notified: bool = False 75 | self.space_downloaded: bool = False 76 | self.space_duration: float = 0 77 | self.rest_id: str = None 78 | self.media_key: str = None 79 | 80 | 81 | -------------------------------------------------------------------------------- /const.py.example: -------------------------------------------------------------------------------- 1 | # duration in seconds and configure if rate-limited 2 | SLEEP_TIME = 60 3 | 4 | WEBHOOK_URL = "" 5 | WEBHOOK_DOWNLOAD_URL = "" 6 | 7 | # Cookies: auth_token and ct0(CSRF Token) can be found in your browser's cookies under Application tab in dev's tool 8 | BEARER_TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" 9 | AUTH_TOKEN = "" 10 | CT0 = "" 11 | 12 | # Specify whether to send downloaded twitter space to the discord webhook 13 | SEND_DOWNLOAD = False 14 | 15 | # Either True or just download path to a specified directory to enable download 16 | DOWNLOAD = "" 17 | 18 | # Track and download all spaces on the user timeline(user hosted spaces or retweeting other spaces) 19 | ALL_SPACE_TIMELINE = True 20 | 21 | LOGGING = True 22 | 23 | ''' 24 | Prepopulated Hololive and Other creators to monitor 25 | Following the same format add whoever you want to monitor in the form of {"creator_name": creator_id} 26 | ''' 27 | # Gamers IDs 28 | twitter_ids = [{"inugamikorone": 1109748792721432577}, {"k5r6n3": 1543196930032906241}, {"nekomataokayu": 1109751762733301760}, 29 | {"MO_OKAYU_GU": 1508759123285856256}, {"ookamimio": 1063337246231687169}, {"ookamimio_sab": 1593114800938135552}, 30 | {"shirakamifubuki": 997786053124616192}] 31 | # Gen 0 IDs 32 | twitter_ids += [{'robocosan': 960340787782299648}, {'maybe_robochan': 1598143974006747138}, 33 | {'sakuramiko35': 979891380616019968}, {'mikochisub': 1637740181813084161}, {'tokino_sora': 880317891249188864}, 34 | {'suisei_hosimati': 975275878673408001}, {'suisei_submati': 1656215492728877058}, {'AZKi_VDiVA': 1062499145267605504}] 35 | # Gen 1 IDs 36 | twitter_ids += [{'yozoramel': 985703615758123008}, {'YozoramelS': 1200743208805748736}, {'mel_kapu': 1596643514888368129}, {'akirosenthal': 996643748862836736}, 37 | {'akaihaato': 998336069992001537}, {'natsuiromatsuri': 996645451045617664}, {'7216_2nd': 1122810226153938944}] 38 | # Gen 2 IDs 39 | twitter_ids += [{'minatoaqua': 1024528894940987392}, {'nakiriayame': 1024532356554608640}, {'YODAYOsub': 1598245359083347968}, 40 | {'murasakishionch': 1024533638879166464}, {'shionchan_o': 1554007042779594752}, 41 | {'yuzukichococh': 1024970912859189248}, {'oozorasubaru': 1027853566780698624}] 42 | # Gen 3 IDs 43 | twitter_ids += [{'houshoumarine': 1153192638645821440}, {'shiroganenoel': 1153195295573856256}, {'danchou_sub': 1609453459618828288}, 44 | {'shiranuiflare': 1154304634569150464}, {'fuu_tan_sub': 1650527824145682432}, {'usadapekora': 1133215093246664706}, {'uraakapeko': 1590623757990645761}] 45 | # Gen 4 IDs 46 | twitter_ids += [{'himemoriluna': 1200396798281445376}, {'lunatan_nanora': 1574391126525546502}, {'amanekanatach': 1200396304360206337}, 47 | {'tokoyamitowa': 1200357161747939328}, {'tsunomakiwatame': 1200397643479805957}] 48 | # Gen 5 IDs 49 | twitter_ids += [{'omarupolka': 1270551806993547265}, {'yukihanalamy': 1255013740799356929}, {'YukihanaWamy': 1645752800369061888}, 50 | {'shishirobotan': 1255015814979186689}, {'momosuzunene': 1255017971363090432}, {'_nenechidayo': 1561593840590794753}] 51 | # Gen 6 IDs 52 | twitter_ids += [{'LaplusDarknesss': 1433657158067896325}, {'wagahaida_L': 1648179101247864832}, {'takanelui': 1433660866063339527}, 53 | {'hakuikoyori': 1433667543806267393}, 54 | {'sakamatachloe': 1433669866406375432}, {'kazamairohach': 1434755250049589252}] 55 | # Other IDs 56 | twitter_ids += [{'ksononair': 733990222787018753}, {'tanigox': 2006101}, {'achan_UGA': 1064352899705143297}, 57 | {'daidoushinove': 1156797715319160832}, {"kotone": 986871577890312192}] 58 | 59 | # Holostars Gen 1 IDs 60 | twitter_ids += [{'miyabihanasaki': 1132832428353966081}, {'arurandeisu': 1156841498479955968}, 61 | {'rikkaroid': 1174223248655114246}, {'kanadeizuru': 1132924263441227776}] 62 | # Holostars Gen 2 SunTempo IDs 63 | twitter_ids += [{'kishidotemma': 1194519616472543232}, {'astelleda': 1181889913517572096}, 64 | {'yukokuroberu': 1194520283446530051}] 65 | # Holostars Gen 3 MaFia IDs 66 | twitter_ids += [{'kageyamashien': 1248565757207695361}, {'aragamioga': 1248567107173773313}] 67 | # Holostars Gen 4 Uproar IDs 68 | twitter_ids += [{'yatogamifuma': 1490584094086164480}, {'minaserioch': 1490583416768970752}, 69 | {'hizakigamma': 1490585175331201024}, {'utsugiuyu': 1490587283849809923}] 70 | # Holostars EN TEMPUS IDs 71 | twitter_ids +=[{'regisaltare': 1536575088996524032}, {'axelsyrios': 1536577295632441344}, 72 | {'magnidezmond': 1536576325296996352}, {'noirvesper_en': 1536579341332516864}] 73 | # Holostars EN TEMPUS 2 IDs 74 | twitter_ids += [{'gavisbettel': 1582926739684339712}, {'machinaxflayon': 1582922712166825986}, 75 | {'banzoinhakka': 1582927907206631425}, {'josuijishinri': 1582925071546732544}] 76 | 77 | # HoloID Gen 1 IDs 78 | twitter_ids += [{'ayunda_risu': 1234752200145899520}, {'moonahoshinova': 1234753886520393729}, 79 | {'airaniiofifteen': 1235180878449397764}] 80 | # HoloID Gen 2 IDs 81 | twitter_ids += [{'anyamelfissa': 1328277750000492545}, {'kureijiollie': 1328277233492844544}, 82 | {'pavoliareine': 1328275136575799297}] 83 | # HoloID Gen 3 IDs 84 | twitter_ids += [{'kaelakovalskia': 1486636197908602880}, {'vestiazeta': 1486633489101307907}, 85 | {'kobokanaeru': 1486629076005634049}] 86 | 87 | # HoloEN Gen 1 Myth IDs 88 | twitter_ids += [{'moricalliope': 1283653858510598144}, {'takanashikiara': 1283646922406760448}, 89 | {'ninomaeinanis': 1283650008835743744}] 90 | twitter_ids += [{'gawrgura': 1283657064410017793}, {'watsonameliaEN': 1283656034305769472}] 91 | 92 | # HoloEN Project Hope ID 93 | twitter_ids += [{'irys_en': 1363705980261855232}] 94 | 95 | # HoloEN Gen 2 Council IDs 96 | twitter_ids += [{'tsukumosana': 1409819816194576394}, {'ceresfauna': 1409784760805650436}, 97 | {'ourokronii': 1409817096523968513}] 98 | twitter_ids += [{'nanashimumei_en': 1409817941705515015}, {'hakosbaelz': 1409783149211443200}] 99 | -------------------------------------------------------------------------------- /index.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import time 4 | import urllib3 5 | import requests 6 | from requests.adapters import HTTPAdapter, Retry 7 | 8 | import twspace 9 | import threading 10 | import re 11 | import const 12 | from datetime import datetime 13 | from log import create_logger 14 | import logging 15 | from TwitterSpace import TwitterSpace 16 | 17 | 18 | # TODO: Look into how to handle retweeted spaces and scheduled spaces -> avoid space being downloaded multiple times(create a download list to check spaces being downloaded?) 19 | # TODO: Look into how to better handle and show twitter participant level(speaker, listener, etc) 20 | 21 | # Major Changes: Setted TwitterSpaces which is a dictionary of TwitterSpace objects e.g.{'user_id': TwitterSpace(handle_id='sam', handle_name='sam', handle_image=None, space_id=None, space_title=None, space_started_at='20230409', space_url=None, m3u8_url=None, space_notified=False, space_downloaded=False, space_duration=0, periscope_server=None, deployment_server=None, rest_id=None, media_key=None)} 22 | SLEEP_TIME = const.SLEEP_TIME 23 | ALL_SPACE_TIMELINE = const.ALL_SPACE_TIMELINE 24 | 25 | WEBHOOK_URL = const.WEBHOOK_URL 26 | DOWNLOAD = const.DOWNLOAD 27 | 28 | BEARER_TOKEN = const.BEARER_TOKEN 29 | AUTH_TOKEN = const.AUTH_TOKEN 30 | CSRF_TOKEN = const.CT0 31 | 32 | # List of twitter creators to monitor 33 | twitter_ids = const.twitter_ids 34 | TwitterSpaces = {} 35 | 36 | ################################# 37 | """ 38 | API FUNCTIONS 39 | """ 40 | 41 | 42 | def set_logger(logger=None): 43 | if logger is None: 44 | logger = logging.getLogger(__name__) 45 | if len(logger.handlers) != 0: 46 | return logger 47 | console_handler = logging.StreamHandler() 48 | console_handler.setLevel(logging.INFO) 49 | formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s | %(message)s', 50 | datefmt='%Y-%m-%d %H:%M:%S') 51 | console_handler.setFormatter(formatter) 52 | logger.addHandler(console_handler) 53 | return logger 54 | 55 | 56 | # def fix_up_user_array(logger=None): 57 | # global twitter_ids 58 | # logger = set_logger(logger) 59 | # 60 | # handle_name = None 61 | # for user_dict in const.twitter_ids: 62 | # try: 63 | # handle_name, handle_id = user_dict.popitem() 64 | # twitter_ids.append({'name': handle_name, 'id': str(handle_id)}) 65 | # raise Exception 66 | # except Exception as e: 67 | # logger.error(f"Issue with providing user IDs {handle_name if handle_name is not None else ''}: Error {e}") 68 | # sys.exit() 69 | 70 | def create_session(): 71 | headers = {"Authorization": BEARER_TOKEN, "X-Csrf-Token": CSRF_TOKEN} 72 | cookies = {"auth_token": AUTH_TOKEN, "ct0": CSRF_TOKEN} 73 | session = requests.Session() 74 | retry = Retry(total=5, backoff_factor=1, status_forcelist=[400, 401, 403, 404, 429, 500, 502, 503, 504]) 75 | session.mount("https://", HTTPAdapter(max_retries=retry)) 76 | session.headers = headers 77 | session.cookies.update(cookies) 78 | return session 79 | 80 | 81 | def handle_rate_limit(handle_name, header_json, error_code=None,logger=None): 82 | logger = set_logger(logger) 83 | logger.debug(f"[{handle_name}] Headers: {header_json}") 84 | x_rate_limit_remaining = int(header_json.get("x-rate-limit-remaining", 0)) 85 | x_rate_limit_reset = int(header_json.get("x-rate-limit-reset", 0)) 86 | 87 | if x_rate_limit_remaining == 0 or error_code in (88, 429): 88 | rate_limit_duration = (datetime.fromtimestamp(x_rate_limit_reset) - datetime.now()).total_seconds() if x_rate_limit_reset != 0 else 900 89 | logger.warning( 90 | f"[{handle_name}] Rate-limited until {datetime.fromtimestamp(x_rate_limit_reset)}, sleeping for {int(rate_limit_duration)} seconds...") 91 | time.sleep(rate_limit_duration or SLEEP_TIME) 92 | 93 | 94 | # Alternate method to usertweets to find twitter spaces 95 | def get_spaces_by_avatar_content(user_ids_list, logger=None, session=None): 96 | user_spaces = {"users": {}, "refresh_delay_secs": 0} 97 | for i, user_ids in enumerate(user_ids_list): 98 | if 0 < i <= len(user_ids_list)-1: 99 | logger.debug(f"{user_ids} Sleeping for {SLEEP_TIME} seconds to avoid rate-limit") 100 | time.sleep(SLEEP_TIME) 101 | 102 | space_id_url = f"https://twitter.com/i/api/fleets/v1/avatar_content?user_ids={','.join(user_ids)}&only_spaces=true" 103 | 104 | try: 105 | res = session.get(space_id_url) 106 | logger.debug(f'URL: {space_id_url}') 107 | logger.debug(f'Header: {res.headers}') 108 | if res.status_code == 200: 109 | res_json = res.json() 110 | logger.debug(f"User Spaces: {res_json}") 111 | user_spaces['users'].update(res_json['users']) 112 | elif res.status_code == 429: 113 | logger.error(f"Rate-limited error {res.status_code} {res.text}, sleeping for {SLEEP_TIME} seconds...") 114 | time.sleep(SLEEP_TIME) 115 | continue 116 | elif res.status_code == 401: 117 | logger.error(f"Authentication error {res.status_code} {res.text}") 118 | 119 | except requests.exceptions.RetryError as reqError: 120 | logger.debug(reqError, exc_info=True) 121 | except Exception as e: 122 | logger.error(e, exc_info=True) 123 | return user_spaces 124 | 125 | 126 | # Get the creator of the space and title of the current user(admin, speaker or None) 127 | def get_space_participant(user, space_details): 128 | # Check if space is created by the current space user and not a retweeted space on timeline,etc 129 | space_details_json = space_details.json() 130 | space_creator_id = space_details_json['data']['audioSpace']['metadata']['creator_results']['result']['rest_id'] 131 | space_creator_name = space_details_json['data']['audioSpace']['metadata']['creator_results']['result']['legacy']['screen_name'] 132 | 133 | if user.handle_id in json.dumps(space_details_json['data']['audioSpace']['participants']['admins']): 134 | participant_title = 'admin' 135 | elif user.handle_id in json.dumps(space_details_json['data']['audioSpace']['participants']['speakers']): 136 | participant_title = 'speaker' 137 | elif user.handle_id in json.dumps(space_details_json['data']['audioSpace']['participants']['listeners']): 138 | participant_title = 'listener' 139 | else: 140 | participant_title = 'unknown' 141 | 142 | return space_creator_id, space_creator_name, participant_title 143 | 144 | 145 | # Gets the first twitter space on the timeline/user profile 146 | # Returns the space id 147 | def get_space_tweet_id(handle_id, handle_name, logger=None, session=None): 148 | logger = set_logger(logger) 149 | 150 | space_id_pattern = r'"expanded_url":"https://twitter\.com/i/spaces/(.*?)"' 151 | 152 | # See UserTweets.json for example json response 153 | space_id_url = "https://twitter.com/i/api/graphql/rIIwMe1ObkGh_ByBtTCtRQ/UserTweets" 154 | headers = {"Authorization": BEARER_TOKEN, "X-Csrf-Token": CSRF_TOKEN} 155 | cookies = {"auth_token": AUTH_TOKEN, "ct0": CSRF_TOKEN} 156 | 157 | params = { 158 | "variables": f'{{"userId":"{handle_id}",' 159 | '"count":10,' 160 | '"includePromotedContent":false,' 161 | '"withQuickPromoteEligibilityTweetFields":false,' 162 | '"withVoice":true,' 163 | '"withV2Timeline":true}', 164 | "features": '{"rweb_lists_timeline_redesign_enabled": true,' 165 | '"responsive_web_graphql_exclude_directive_enabled": false,' 166 | '"verified_phone_label_enabled": false,' 167 | '"creator_subscriptions_tweet_preview_api_enabled": true,' 168 | '"responsive_web_graphql_timeline_navigation_enabled": true,' 169 | '"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,' 170 | '"tweetypie_unmention_optimization_enabled": true,' 171 | '"responsive_web_edit_tweet_api_enabled": true,' 172 | '"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,' 173 | '"view_counts_everywhere_api_enabled": true,' 174 | '"longform_notetweets_consumption_enabled": true,' 175 | '"responsive_web_twitter_article_tweet_consumption_enabled": false,' 176 | '"tweet_awards_web_tipping_enabled": false,' 177 | '"freedom_of_speech_not_reach_fetch_enabled": false,' 178 | '"standardized_nudges_misinfo": true,' 179 | '"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,' 180 | '"longform_notetweets_rich_text_read_enabled": true,' 181 | '"longform_notetweets_inline_media_enabled": true,' 182 | '"responsive_web_media_download_video_enabled": false,' 183 | '"responsive_web_enhance_cards_enabled": false}' 184 | } 185 | try: 186 | rest_id_response = session.get(url=space_id_url, params=params) 187 | except (requests.exceptions.ConnectionError, requests.exceptions.RetryError, requests.exceptions.ReadTimeout) as r_exception: 188 | logger.debug(r_exception) 189 | logger.debug(f"[{handle_name}] Connection issue occurred while looking for twitter space...") 190 | return None 191 | except requests.exceptions.RequestException as req_exceptions: 192 | logger.error(f"[{handle_name}] {req_exceptions}", exc_info=True) 193 | return None 194 | except Exception as e: 195 | logger.error(e, exc_info=True) 196 | return None 197 | 198 | # Error check 199 | if rest_id_response.status_code != 200: 200 | try: 201 | rest_id_json = rest_id_response.json() 202 | except requests.exceptions.JSONDecodeError: 203 | logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response}", exc_info=True) 204 | logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}") 205 | logger.warning( 206 | f"[{handle_name}] Issue finding space with error code {rest_id_response.status_code} {rest_id_response.text.strip()}") 207 | if rest_id_response.status_code == 429: 208 | logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}") 209 | handle_rate_limit(handle_name, rest_id_response.headers, error_code=429, logger=logger) 210 | return None 211 | 212 | try: 213 | # try except so script can work after a long period of inactivity(sleep) 214 | if 'error' in rest_id_json or 'errors' in rest_id_json: 215 | logger.debug(f"[{handle_name}] {rest_id_json}") 216 | if isinstance(rest_id_json.get('errors'), list): 217 | logger.debug(f"[{handle_name}] {rest_id_json} Error {rest_id_json['errors'][0]['code']} {rest_id_json['errors'][0]['message']}") 218 | # Needed anymore? 219 | if rest_id_json['errors'][0]['code'] == 88: 220 | handle_rate_limit(handle_name, rest_id_response.headers, error_code=88, logger=logger) 221 | elif rest_id_json.get('errors').get(0).get('code') == 239: 222 | logger.debug( 223 | f"[{handle_name}] Issue finding space with error code {rest_id_response.status_code} {rest_id_response.text.strip()}") 224 | logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}") 225 | elif rest_id_json.get('errors').get(0).get('code') in (32, 353): 226 | logger.error( 227 | f"[{handle_name}] Issue finding space, may need to get new tokens with error code {rest_id_response.status_code} {rest_id_response.text.strip()}") 228 | logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}") 229 | else: 230 | logger.error( 231 | f"[{handle_name}] Issue finding space, may need to get new tokens with error code {rest_id_response.status_code} {rest_id_response.json().get('errors').get(0).get('message')}") 232 | except AttributeError as aError: 233 | logger.debug(aError, exc_info=True) 234 | 235 | else: 236 | logger.warning(f"[{handle_name}] Issue finding space with error code {rest_id_response.status_code} {rest_id_response.text.strip()}") 237 | elif 'data' not in rest_id_response.json(): 238 | logger.debug(f"[{handle_name}] {rest_id_response}") 239 | handle_rate_limit(handle_name, rest_id_response.headers, logger=logger) 240 | 241 | # space_id = space_id_response.json()['data']['user']['result']['timeline_v2']['timeline']['instructions'][0]['entries'][0]['result']['legacy']['extended_entities']['media']['media_key'] 242 | rest_id = None 243 | try: 244 | rest_id = re.search(string=rest_id_response.text, pattern=space_id_pattern).group(1) 245 | logger.debug(f"Space ID for {handle_name}({handle_id}): {rest_id}") 246 | except AttributeError: 247 | # No space found 248 | return rest_id 249 | 250 | return rest_id 251 | 252 | 253 | # Gets detailed information/status of the twitter space 254 | # Returns a media key which is used to get information about the video stream(m3u8 url) 255 | def get_space_details(handle_name, rest_id, logger=None, session=None): 256 | logger = set_logger(logger) 257 | 258 | # See AudioSpaceById for example json response 259 | space_id_url = "https://twitter.com/i/api/graphql/kZ9wfR8EBtiP0As3sFFrBA/AudioSpaceById" 260 | 261 | params = { 262 | "variables": f'{{"id":"{rest_id}",' 263 | '"isMetatagsQuery":false,' 264 | '"withListeners":true,' 265 | '"withReplays":true}', 266 | "features": '{"spaces_2022_h2_clipping":true,' 267 | '"spaces_2022_h2_spaces_communities":true,' 268 | '"responsive_web_graphql_exclude_directive_enabled":false,' 269 | '"verified_phone_label_enabled":false,' 270 | '"creator_subscriptions_tweet_preview_api_enabled":true,' 271 | '"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,' 272 | '"tweetypie_unmention_optimization_enabled":true,' 273 | '"responsive_web_edit_tweet_api_enabled":true,' 274 | '"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,' 275 | '"view_counts_everywhere_api_enabled":true,' 276 | '"longform_notetweets_consumption_enabled":true,' 277 | '"responsive_web_twitter_article_tweet_consumption_enabled":false,' 278 | '"tweet_awards_web_tipping_enabled":false,' 279 | '"freedom_of_speech_not_reach_fetch_enabled":true,' 280 | '"freedom_of_speech_not_reach_appeal_label_enabled":false,' 281 | '"standardized_nudges_misinfo":true,' 282 | '"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,' 283 | '"responsive_web_graphql_timeline_navigation_enabled":true,' 284 | '"longform_notetweets_rich_text_read_enabled":true,' 285 | '"longform_notetweets_inline_media_enabled":true,' 286 | '"responsive_web_media_download_video_enabled":false,' 287 | '"responsive_web_enhance_cards_enabled":false}' 288 | } 289 | try: 290 | space_id_response = session.get(url=space_id_url, params=params) 291 | except (requests.exceptions.ConnectionError, requests.exceptions.RetryError, requests.exceptions.ReadTimeout) as r_exception: 292 | logger.debug(r_exception) 293 | logger.debug(f"[{handle_name}] Connection issue occurred while looking for twitter space...") 294 | return None 295 | except requests.exceptions.RequestException as req_exceptions: 296 | logger.error(f"[{handle_name}] {req_exceptions}", exc_info=True) 297 | return None 298 | except Exception as e: 299 | logger.error(e, exc_info=True) 300 | return None 301 | # Error check 302 | try: 303 | space_id_json = space_id_response.json() 304 | logger.debug(f"[{handle_name}] Space Details: {space_id_json}") 305 | logger.debug(f"[{handle_name}] Space ID Json Headers: {space_id_response.headers}") 306 | except requests.exceptions.JSONDecodeError: 307 | logger.error(f"[{handle_name}] Issue getting space details with error code {space_id_response.status_code} {space_id_response.text.strip()}") 308 | logger.debug(space_id_response) 309 | return None 310 | 311 | if space_id_response.status_code == 429: 312 | handle_rate_limit(handle_name, space_id_response.headers, error_code=429, logger=logger) 313 | logger.debug(f"[{handle_name}] Error {space_id_response.status_code} {space_id_response.text}", exc_info=True) 314 | 315 | if 'data' not in space_id_json or space_id_response.status_code != 200: 316 | if 'error' in space_id_json: 317 | # Not needed anymore? 318 | if space_id_json['errors'][0]['code'] in (32, 353): 319 | logger.info(f"[{handle_name}] Bad guest token, renewing...") 320 | logger.error( 321 | f"[{handle_name}] Issue finding space, may need to get new tokens with error code {space_id_json.status_code} {space_id_json.text.strip()}") 322 | logger.error(f"[{handle_name}] Issue getting media key with error code {space_id_response.status_code} {space_id_json}") 323 | return None 324 | else: 325 | # {'data': {'audioSpace': {}}} 326 | logger.debug("Empty object received...") 327 | return None 328 | elif space_id_json.get('data').get('audioSpace') == {}: 329 | # {'data': {'audioSpace': {}}} 330 | return None 331 | 332 | handle_rate_limit(handle_name, space_id_response.headers, logger=logger) 333 | return space_id_response 334 | 335 | 336 | def get_media_key(handle_name, space_detail, logger=None): 337 | logger = set_logger(logger) 338 | media_key = None 339 | # media_key_pattern = r'"media_key":"(.*?)"' 340 | # space_id = space_id_response.json()['data']['user']['result']['timeline_v2']['timeline']['instructions'][0]['entries'][0]['result']['legacy']['extended_entities']['media']['media_key'] 341 | try: 342 | # media_key = re.search(string=space_detail.text, pattern=media_key_pattern).group(1) 343 | media_key = space_detail['media_key'] 344 | logger.debug(f"[{handle_name}] Media Key: {media_key}") 345 | except (AttributeError, KeyError): 346 | logger.error(f"[{handle_name}] Issue finding media key") 347 | logger.debug(f"[{handle_name}] {space_detail}") 348 | return media_key 349 | 350 | 351 | # Gets detailed information about the video/media stream 352 | # Returns m3u8 url 353 | def get_space_source(handle_name, media_key, logger=None, session=None): 354 | logger = set_logger(logger) 355 | location_url = None 356 | # See live_video_stream for example json response 357 | space_source_url = f"https://api.twitter.com/1.1/live_video_stream/status/{media_key}" 358 | 359 | try: 360 | space_source_response = session.get(url=space_source_url) 361 | except (requests.exceptions.RequestException, urllib3.exceptions.MaxRetryError, requests.exceptions.RetryError) as e: 362 | logger.error(f"[{handle_name}] {e}") 363 | 364 | if space_source_response.status_code != 200: 365 | logger.error(f"[{handle_name}] Issue getting space source with error code {space_source_response.status_code}") 366 | logger.debug(f"[{handle_name}] Space Source Headers: {space_source_response.headers}") 367 | handle_rate_limit(handle_name, space_source_response.headers, logger=logger) 368 | return location_url 369 | 370 | space_source = space_source_response.json() 371 | location_url = space_source["source"]["location"].replace("dynamic", "master").replace("?type=live", "") 372 | logger.debug(f"[{handle_name}] Space Source: {space_source}") 373 | 374 | return location_url 375 | 376 | 377 | def create_users(): 378 | for user in const.twitter_ids: 379 | user_name, user_id = next(iter(user.items())) 380 | TwitterSpaces[user_id] = TwitterSpace(handle_id=str(user_id), handle_name=user_name) 381 | 382 | 383 | # list of user ids list of up to 100 ids per list 384 | def get_user_ids(): 385 | user_ids = [] 386 | split_twitter_id_list = [] 387 | if len(twitter_ids) // 100 != 0: 388 | for split in range(len(twitter_ids) // 100): 389 | split_twitter_id_list += [twitter_ids[split * 100:(split + 1) * 100]] 390 | if len(twitter_ids) % 100 != 0: 391 | split_twitter_id_list += [twitter_ids[(len(twitter_ids) // 100) * 100:]] 392 | else: 393 | split_twitter_id_list = [twitter_ids] 394 | 395 | for twitter_user_list in split_twitter_id_list: 396 | temp_id = [] 397 | for twitter_user in twitter_user_list: 398 | temp_id.append(str(*twitter_user.values())) 399 | user_ids.append(temp_id) 400 | return user_ids 401 | 402 | 403 | def fix_up_spaces_by_avatar_content(user_spaces_list): 404 | user_spaces = {} 405 | for user_id in user_spaces_list['users']: 406 | # Tuple of (user_id, broadcast_id) where broadcast_id is equivalent to rest_id i.e. Space ID 407 | try: 408 | user_spaces[user_id] = user_spaces_list['users'][user_id]['spaces']['live_content']['audiospace']['broadcast_id'] 409 | except Exception as e: 410 | logger.error(e, exc_info=True) 411 | logger.debug(user_spaces_list['users'][user_id]) 412 | return user_spaces 413 | 414 | 415 | def get_spaces(user_ids, logger=None, session=None): 416 | user_spaces = get_spaces_by_avatar_content(user_ids, logger=logger, session=session) 417 | space_ids = fix_up_spaces_by_avatar_content(user_spaces) 418 | logger.debug(f"User Spaces: {user_spaces}") 419 | logger.debug(f"Space IDs: {space_ids}") 420 | logger.debug(f"TwitterSpaces: {TwitterSpaces}") 421 | # if space_ids == {}: 422 | # return 423 | 424 | for user in TwitterSpaces.values(): 425 | # if user.handle_id not in space_ids.keys() and user.space_state != 'Running': 426 | # # if user has no live space and user space object has no space running 427 | # # logger.debug(f"[{user.handle_name}] {user.handle_id not in space_ids.keys()}, {user.space_state != 'Running'}") 428 | # continue 429 | # if user.space_state == 'Running' and user.space_downloaded: 430 | # # if user space object is running and already downloaded 431 | # # logger.debug( 432 | # # f"[{user.handle_name}] {user.space_state == 'Running'}, {user.space_downloaded}") 433 | # continue 434 | 435 | # this rest_id here is basically a check for whether space is still live 436 | rest_id = space_ids.get(user.handle_id) 437 | try: 438 | # if rest_id is None or (user.handle_id in space_ids.keys() and user.space_notified): 439 | # # Not Live(continue)->On Live->Still On Live(continue)->Just Offline->Offline(continue) 440 | # logger.debug(f"[{user.handle_name}] Skipping...") 441 | # continue 442 | 443 | if user.rest_id == rest_id or (rest_id is None and user.space_downloaded): 444 | # Not Live(continue)->On Live->Still On Live(continue)->Just Offline->Offline(continue) 445 | logger.debug(f"[{user.handle_name}] Skipping...") 446 | continue 447 | 448 | try: 449 | if rest_id is None: 450 | # set rest_id when space is offline to be able to download 451 | rest_id = user.rest_id 452 | logger.debug(f"[{user.handle_name}] Looking for spaces...") 453 | space_details_res = get_space_details(user.handle_name, rest_id, logger=logger, session=session) 454 | except Exception as e: 455 | logger.error(e, exc_info=True) 456 | 457 | if space_details_res is None: 458 | logger.debug(f"[{user.handle_name}] Unable to get space details...") 459 | continue 460 | else: 461 | space_details = space_details_res.json()['data']['audioSpace']['metadata'] 462 | logger.debug(f"[{user.handle_name}] {space_details_res.json()}") 463 | # If space has already been queried(also ensure new space isn't skipped if previous space hasn't been downloaded) 464 | # or is a past space that has not been queried then skip 465 | if user.space_state == space_details['state'] and user.rest_id != space_details['rest_id'] or user.space_state is None and space_details['state'] == 'Ended': 466 | logger.debug(f"[{user.handle_name}] Past space, skipping...") 467 | continue 468 | 469 | # Handling new spaces 470 | if user.space_state == 'Ended' and space_details['state'] == 'Running': 471 | user.reset_default() 472 | logger.debug(f"Resetting default values for {user.handle_name}") 473 | 474 | # Handling scheduled space 475 | if user.space_state == 'NotStarted' and space_details['state'] == 'Running': 476 | user.reset_default() 477 | logger.info(f"Scheduled space from {user.handle_name} is now live") 478 | logger.debug(f"Resetting default values for {user.handle_name}") 479 | 480 | try: 481 | space_creator_id, space_creator_name, participant_title = get_space_participant(user, space_details_res) 482 | except (KeyError, requests.exceptions.JSONDecodeError) as cError: 483 | logger.debug(cError) 484 | space_creator_id, space_creator_name, participant_title = user.space_creator_id, user.handle_name, None 485 | 486 | # TODO: Add another check to not track space if it's a retweeted where host is also on the list 487 | # If current user isn't hosting the space or participating and should not be tracked 488 | # if user.handle_id != space_creator_id and participant_title is None and not ALL_SPACE_TIMELINE: 489 | # continue 490 | 491 | user.rest_id = rest_id 492 | media_key = get_media_key(user.handle_name, space_details, logger=logger) 493 | user.media_key = media_key 494 | user.set_space_details(space_details) 495 | user.space_creator_id = space_creator_id 496 | user.space_creator_name = space_creator_name 497 | user.space_participant_title = participant_title 498 | logger.debug(f"[{user.handle_name}] {space_details}") 499 | 500 | try: 501 | if user.space_state == "Running" and not user.space_notified: 502 | notify_space(user, logger=logger, session=session) 503 | except Exception as e: 504 | logger.error(f"[{user.handle_name}] Issue notifying space", exc_info=True) 505 | logger.debug(e, exc_info=True) 506 | 507 | except Exception as e: 508 | logger.error(f"[{user.handle_name}] Issue getting latest space id", exc_info=True) 509 | logger.debug(e, exc_info=True) 510 | continue 511 | 512 | 513 | def download(ended_spaces, logger=None): 514 | if DOWNLOAD is not None or False: 515 | downloaded = [] 516 | for ended_space in ended_spaces: 517 | # if int(ended_space.space_duration) == 0: 518 | # duration = datetime.timestamp(datetime.now()) - ended_space.space_started_at/1000.0 519 | # ended_space.space_duration = duration 520 | # logger.debug(f"Setting custom duration of {duration} for {ended_space.handle_name}") 521 | 522 | # ended_space.m3u8_url = get_space_source(media_key=ended_space.media_key, logger=logger) 523 | # print(" " * 70, end='\n') 524 | 525 | # Add a check to avoid duplicate download for retweeted/joined space between two or more tracked user 526 | if ended_space.rest_id in downloaded: 527 | logger.warning(f"[{ended_space.handle_name}] {ended_space.rest_id} has already been downloaded, skipping...") 528 | ended_space.space_downloaded = True 529 | continue 530 | 531 | logger.info(f"{ended_space.space_creator_name} is now offline at {ended_space.rest_id}") 532 | 533 | try: 534 | if ended_space.m3u8_url is None: 535 | ended_space.m3u8_url = get_space_source(handle_name=ended_space.space_creator_name, media_key=ended_space.media_key, logger=logger, session=session) 536 | if ended_space.m3u8_url is None: 537 | logger.error( 538 | f"[{ended_space.handle_name}] Can not download space for {ended_space.space_creator_name}, unable to find m3u8 url...") 539 | return 540 | 541 | ended_space.set_space_duration() 542 | server = ended_space.get_server() 543 | m3u8_id = ended_space.get_m3u8_id() 544 | space_date = ended_space.get_strftime() 545 | logger.debug(ended_space) 546 | threading.Thread(target=twspace.download, 547 | args=[m3u8_id, ended_space.rest_id, ended_space.space_creator_name, 548 | ended_space.handle_name, ended_space.space_title, server, 549 | ended_space.space_duration, space_date, logger]).start() 550 | ended_space.space_downloaded = True 551 | downloaded.append(ended_space.rest_id) 552 | except Exception as thread_exception: 553 | logger.error(thread_exception, exc_info=True) 554 | ended_space.space_downloaded = False 555 | 556 | 557 | def loading_text(): 558 | loading_string = "Waiting for live twitter spaces " 559 | animation = [" ", ". ", ".. ", "... ", ".... ", "....."] 560 | idx = 0 561 | while True: 562 | print(f"[INFO] {datetime.now().replace(microsecond=0)} | " + loading_string + animation[idx % len(animation)], 563 | end="\r") 564 | time.sleep(0.3) 565 | idx += 1 566 | if idx == 6: 567 | idx = 0 568 | 569 | 570 | def notify_space(space, logger=None, session=None): 571 | logger.debug(f"[{space.space_creator_name}] Space Object: {str(space)}") 572 | # logger.debug(f"Space Details: {str(space[0]['data'])}") 573 | # logger.debug(f"User Details: {str(space[1]['data'])}") 574 | space_id = space.rest_id 575 | status = 'live' if space.space_state == 'Running' else space.space_state 576 | creator_profile_image = space.handle_image 577 | 578 | space_creator = space.space_creator_name 579 | space_handle_name = space.handle_name 580 | space_started_at = space.get_strftime() 581 | space_title = space.space_title 582 | 583 | space_url = f"https://twitter.com/i/spaces/{space_id}" 584 | 585 | # Get and send the m3u8 url 586 | counter = 0 587 | m3u8_url = None 588 | while counter <= 5: 589 | m3u8_url = get_space_source(handle_name=space_creator, media_key=space.media_key, logger=logger, session=session) 590 | if m3u8_url is None: 591 | counter += 1 592 | time.sleep(20) 593 | logger.warning(f"[{space.handle_name}]Retrying to get m3u8 url {counter}/{5}") 594 | continue 595 | else: 596 | break 597 | 598 | space.m3u8_url = m3u8_url 599 | logger.debug(space) 600 | # Todo maybe consider changing space_creator to `space_creator` to avoid underscore error 601 | if space.handle_id == space.space_creator_id: 602 | logger.info(f"{space_creator} is now {status} at {space_url}") 603 | else: 604 | logger.info(f"[{space.space_creator_name}] {space_handle_name} is participating at {space_url}") 605 | logger.info(f"M3U8: {m3u8_url}") 606 | 607 | if space.handle_id == space.space_creator_id: 608 | description = f"{space_creator} is now {status} at <{space_url}> ```{m3u8_url}```" 609 | else: 610 | description = f"{space_handle_name} is participating at <{space_url}> ```{m3u8_url}```" 611 | message = {"embeds": [{ 612 | "color": 1942002, 613 | "author": { 614 | "name": f"{space_creator}", 615 | "icon_url": creator_profile_image 616 | }, 617 | "fields": [ 618 | { 619 | "name": space_title, 620 | "value": description 621 | } 622 | ], 623 | "thumbnail": { 624 | "url": creator_profile_image.replace("normal", "200x200") 625 | } 626 | }] 627 | } 628 | if WEBHOOK_URL is not None: 629 | retry = 0 630 | max_retry = 5 631 | while retry < max_retry: 632 | try: 633 | session.post(WEBHOOK_URL, json=message, timeout=5) 634 | space.space_notified = True 635 | break 636 | except requests.exceptions.ConnectionError: 637 | retry += 1 638 | logger.debug(f"[{space_creator}] Issue notifying space {space_id}", exc_info=True) 639 | logger.debug(f"[{space_creator}] Re-notifying space") 640 | except Exception as e: 641 | retry += 1 642 | logger.debug(f"[{space_creator}] Issue notifying space {space_id}", exc_info=True) 643 | logger.debug(f"[{space_creator}] Re-notifying space") 644 | if retry == max_retry: 645 | logger.debug(f"[{space_creator}] Issue notifying space", exc_info=True) 646 | 647 | 648 | 649 | if __name__ == "__main__": 650 | logger = create_logger("logfile.log") 651 | logger.info("Starting program") 652 | threading.Thread(target=loading_text).start() 653 | session = create_session() 654 | # loading_string = "[INFO] Waiting for live twitter spaces" 655 | 656 | create_users() 657 | user_ids = get_user_ids() 658 | logger.debug(f"TwitterSpaces: {TwitterSpaces}") 659 | logger.debug(f"User Ids: {user_ids}") 660 | while True: 661 | try: 662 | get_spaces(user_ids, logger=logger, session=session) 663 | # space_list = [space for space in TwitterSpaces.values() if space.space_state == "Running" and not space.space_notified] 664 | 665 | to_download = [space for space in TwitterSpaces.values() if 666 | (space.space_state == "Ended" or space.space_ended_at != 0) and 667 | (not space.space_downloaded and space.space_was_running)] 668 | # if space_list is None: 669 | # continue 670 | 671 | # Download spaces that have ended and have yet to be downloaded 672 | if to_download is not None: 673 | download(to_download, logger=logger) 674 | 675 | except SystemExit: 676 | sys.exit("Error, Exiting") 677 | except OSError: 678 | sys.exit("Error, Exiting") 679 | except KeyboardInterrupt: 680 | sys.exit("Error, Exiting") 681 | except Exception as e: 682 | logger.error(e, exc_info=True) 683 | -------------------------------------------------------------------------------- /log.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import logging 3 | from logging.handlers import TimedRotatingFileHandler 4 | import os.path 5 | import const 6 | 7 | 8 | def namer(name): 9 | return name + ".gz" 10 | 11 | 12 | def rotator(source, dest): 13 | with open(source, 'rb') as f_in: 14 | with gzip.open(dest, 'wb') as f_out: 15 | f_out.write(f_in.read()) 16 | os.remove(source) 17 | 18 | 19 | # Filter subclass that does not allow the file logging of sleeping messages 20 | class NoParsingFilter(logging.Filter): 21 | def filter(self, record): 22 | # if len(record.message) < 37: 23 | # record.message = f"{record.message}{' '*20}" 24 | return not record.getMessage().startswith('Sleeping') 25 | 26 | 27 | # Filter subclass to allow and not allow stack traceback filtering 28 | class TracebackInfoFilter(logging.Filter): 29 | """Clear or restore the exception on log records""" 30 | def __init__(self, clear=True): 31 | self.clear = clear 32 | 33 | def filter(self, record): 34 | if self.clear: 35 | record._exc_info_hidden, record.exc_info = record.exc_info, None 36 | # clear the exception traceback text cache, if created. 37 | record.exc_text = None 38 | elif hasattr(record, "_exc_info_hidden"): 39 | record.exc_info = record._exc_info_hidden 40 | del record._exc_info_hidden 41 | return True 42 | 43 | 44 | def create_logger(logfile_name): 45 | # Check if log dir exist and if not create it 46 | logging.handlers.TimedRotatingFileHandler 47 | log_dir = os.getcwd()+"\\logs" 48 | if not os.path.isdir(log_dir): 49 | os.makedirs(log_dir) 50 | 51 | # Get the logger object 52 | logger = logging.getLogger(__name__) 53 | 54 | # If logger has already been created then return it(for the imported modules) 55 | if len(logger.handlers) != 0: 56 | return logger 57 | 58 | # Set logging level and log path 59 | logger.setLevel(logging.DEBUG) 60 | log_path = log_dir + "\\" + logfile_name 61 | 62 | # Create a new log file everyday 63 | handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1, encoding='utf-8') 64 | formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)d] %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 65 | handler.setFormatter(formatter) 66 | handler.suffix = "%Y%m%d" # file suffix to be changed 67 | handler.addFilter(NoParsingFilter()) 68 | handler.rotator = rotator 69 | handler.namer = namer 70 | # Set handler to allow stacktraceback logging 71 | handler.addFilter(TracebackInfoFilter(clear=False)) 72 | 73 | # logging.basicConfig(level=logging.INFO, 74 | # format='%(asctime)s [%(filename)s:%(lineno)d] %(levelname)-5s %(message)s', 75 | # datefmt='%Y-%m-%d %H:%M', 76 | # filename=log_path) 77 | 78 | # define a Handler which writes DEBUG messages or higher to the sys.stderr 79 | console = logging.StreamHandler() 80 | console.setLevel(logging.INFO) 81 | console.addFilter(TracebackInfoFilter()) 82 | # set a format which is simpler for console use 83 | console_formatter = logging.Formatter(f'[%(levelname)s] %(asctime)s | %(message)s {" "*10}', datefmt='%Y-%m-%d %H:%M:%S') 84 | # tell the handler to use this format 85 | console.setFormatter(console_formatter) 86 | # add the handlers to the root logger 87 | logger.addHandler(console) 88 | logger.addHandler(handler) 89 | 90 | # If logging is not enabled then remove the root log handler but keep the stream handler 91 | if not const.LOGGING: 92 | try: 93 | lhStdout = logger.handlers[1] 94 | logger.removeHandler(lhStdout) 95 | except IndexError as ierror: 96 | logger.error(ierror) 97 | return logger 98 | return logger 99 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dataclasses==0.6 2 | discord.py==2.0.1 3 | requests==2.31.0 4 | urllib3==1.26.12 5 | discord==2.3.0 -------------------------------------------------------------------------------- /twspace.py: -------------------------------------------------------------------------------- 1 | from urllib import error 2 | import subprocess 3 | import requests 4 | from requests.adapters import HTTPAdapter, MaxRetryError 5 | from urllib3 import Retry 6 | import const 7 | import discord 8 | from log import create_logger 9 | import os 10 | import re 11 | import time 12 | 13 | 14 | # Function takes in the file name and check if it contains illegal characters 15 | # If it contains an illegal character then remove it and return the new file name without the illegal character 16 | def checkFileName(fileName): 17 | invalidName = re.compile(r"[\\*?<>:\"/\|]") 18 | newFileName = fileName 19 | if re.search(invalidName, fileName) is not None: 20 | newFileName = re.sub(invalidName, "_", fileName) 21 | # If file name has multiple lines then join them together(because stripping newline doesn't work) 22 | if "\n" in fileName: 23 | title_array = fileName.splitlines() 24 | newFileName = " ".join(title_array) 25 | return newFileName 26 | 27 | 28 | def send_file(file_path, space_id, twitter_name, space_title, space_date): 29 | logger = create_logger("logfile_twspace.log") 30 | if os.path.isfile(file_path): 31 | webhook = discord.Webhook.from_url(const.WEBHOOK_DOWNLOAD_URL, adapter=discord.RequestsWebhookAdapter()) 32 | space_file = discord.File(file_path) 33 | content = f"[{twitter_name}] The twitter space for {twitter_name} was downloaded\n`[{space_date}] {twitter_name} - {space_title} ({space_id})`" 34 | try: 35 | webhook.send(content=content, file=space_file) 36 | except discord.HTTPException as e: 37 | logger.error(e.text, exc_info=True) 38 | else: 39 | logger.error(f"[{twitter_name}] Could not find space file to send", exc_info=True) 40 | 41 | 42 | def get_m3u8_chunk(base_url, master_url, logger, session): 43 | # Get the playlist m3u8 44 | t = session.get(master_url).content.decode('utf-8') 45 | # logger.debug(t) 46 | master_playlist = re.findall(".*m3u8$", t, re.MULTILINE) 47 | for i in master_playlist: 48 | master_playlist = i 49 | logger.debug(master_playlist) 50 | # Get the playlist m3u8 content and replace the chunk url with the appropriate prefix 51 | chunk_m3u8 = base_url + master_playlist 52 | logger.debug(chunk_m3u8) 53 | return chunk_m3u8 54 | 55 | 56 | def check_correct_duration(t, duration, logger): 57 | if duration is None: 58 | return True, duration 59 | moe = 30 60 | reg = re.compile("#EXTINF:(\d.\d{3})") 61 | result = re.findall(reg, t) 62 | m3u8_duration = sum(map(float, result)) 63 | logger.debug(f"Space duration: {duration - moe} <= {m3u8_duration} <= {duration + moe}") 64 | if duration - moe <= m3u8_duration <= duration + moe: 65 | return True, m3u8_duration 66 | else: 67 | return False, m3u8_duration 68 | 69 | 70 | def download(m3u8_id, rest_id, space_creator, handle_name, space_title, space_server, space_duration, space_date, logger=None): 71 | session = requests.Session() 72 | retry = Retry(total=5, connect=5, backoff_factor=1, status_forcelist=[400, 401, 403, 404, 429, 500, 502, 503, 504]) 73 | session.mount("https://", HTTPAdapter(max_retries=retry)) 74 | if logger is None: 75 | logger = create_logger("logfile.log") 76 | DOWNLOAD_PATH = os.path.join(const.DOWNLOAD, space_creator) 77 | SEND_DOWNLOAD = const.SEND_DOWNLOAD 78 | if DOWNLOAD_PATH == "True": 79 | DOWNLOAD_PATH = os.path.join(os.getcwd(), space_creator) 80 | elif not os.path.exists(DOWNLOAD_PATH): 81 | os.makedirs(DOWNLOAD_PATH) 82 | 83 | deployment_server, periscope_server = space_server 84 | 85 | base_url = f'https://{deployment_server}-{periscope_server}.pscp.tv' 86 | base_addon = '/Transcoding/v1/hls/' 87 | 88 | file_name = checkFileName(space_title) 89 | 90 | # Remove .video from the periscope_server string 91 | periscope_server = periscope_server.removesuffix('.video') 92 | end_masterurl = "/non_transcode/us-east-1/periscope-replay-direct-prod-us-east-1-public/audio-space/master_playlist.m3u8" 93 | end_chunkurl = f'/non_transcode/{periscope_server}/periscope-replay-direct-prod-{periscope_server}-public/audio-space/chunk' 94 | master_url = base_url+base_addon+m3u8_id+end_masterurl 95 | logger.debug(master_url) 96 | 97 | # Retry on 404 error 98 | retry = 0 99 | MAX_RETRY = 20 100 | while retry < MAX_RETRY: 101 | try: 102 | # Get the chunk m3u8 103 | chunk_m3u8 = get_m3u8_chunk(base_url, master_url, logger, session) 104 | t = session.get(chunk_m3u8).content.decode('utf-8') 105 | correct_duration, m3u8_duration = check_correct_duration(t, space_duration, logger) 106 | logger.debug(f"[{space_creator}] Expected M3U8 Duration: {m3u8_duration}") 107 | if not correct_duration: 108 | # raise error.HTTPError(url=chunk_m3u8, code=102, msg="M3U8 is incomplete", hdrs=None, fp=None) 109 | retry += 1 110 | logger.warning(f"[{space_creator}] Incorrect duration, M3U8 playlist download retry({retry}/{MAX_RETRY}) ...{' ' * 10}") 111 | logger.debug(chunk_m3u8) 112 | time.sleep(const.SLEEP_TIME) 113 | continue 114 | break 115 | except (MaxRetryError, requests.exceptions.RetryError, requests.exceptions.ConnectionError) as retryError: 116 | retry += 1 117 | logger.debug(retryError, exc_info=True) 118 | logger.warning(f"[{space_creator}] Retrying({retry}/{MAX_RETRY}) m3u8 playlist download...{' ' * 10}") 119 | time.sleep(const.SLEEP_TIME) 120 | except error.HTTPError as httpError: 121 | retry += 1 122 | logger.debug(httpError, exc_info=True) 123 | logger.warning(f"[{space_creator}] Retrying({retry}/{MAX_RETRY}) m3u8 playlist download...{' '*10}") 124 | time.sleep(const.SLEEP_TIME) 125 | except Exception as e: 126 | retry += 1 127 | logger.error(e, exc_info=True) 128 | logger.warning(f"[{space_creator}] Retrying({retry}/{MAX_RETRY}) m3u8 playlist download...{' ' * 10}") 129 | time.sleep(const.SLEEP_TIME) 130 | t = t.replace('chunk', base_url+base_addon+m3u8_id+end_chunkurl) 131 | logger.debug(t) 132 | filename = f'{rest_id}.m3u8' 133 | output = f'{DOWNLOAD_PATH}\\{space_date} - {space_creator} - {file_name} ({rest_id}).m4a' 134 | command = ['ffmpeg', '-n', '-loglevel', 'info', '-protocol_whitelist', 'file,crypto,https,tcp,tls'] 135 | command += ['-i', filename, '-metadata', f'date={space_date}'] 136 | command += ['-metadata', f'comment=feat.{handle_name}'] if space_creator != handle_name else ['-metadata', f'comment={master_url}'] 137 | command += ['-metadata', f'artist={space_creator}', '-metadata', f'title={space_title}', '-c', 'copy', output] 138 | 139 | # Check if the file already exist and if it does remove it 140 | try: 141 | if os.path.isfile(filename): 142 | os.remove(filename) 143 | except PermissionError as perm_error: 144 | logger.error(perm_error, exc_info=True) 145 | try: 146 | # Create a new file with the appropriately replaced chunk url 147 | with open(filename, 'w') as f: 148 | f.write(t) 149 | 150 | download_result = subprocess.run(command, capture_output=True, text=True) 151 | logger.debug(download_result.stderr) 152 | 153 | if SEND_DOWNLOAD: 154 | send_file(output, rest_id, space_creator, space_title, space_date) 155 | if retry >= MAX_RETRY: 156 | if m3u8_duration is not None: 157 | logger.warning(f"[{space_creator}] Download completed for {rest_id}, but may not be completely downloaded with a duration of {round(m3u8_duration/60, 2)} minutes") 158 | else: 159 | logger.warning( 160 | f"[{space_creator}] Download completed for {rest_id}, but may not be completely downloaded") 161 | elif "HTTP error 404 Not Found" in download_result.stderr: 162 | logger.warning(f"[{space_creator}] AAC chunk(s) returning 404 Error Not Found, download incomplete...") 163 | else: 164 | logger.info(f"[{space_creator}] Download completed for {rest_id + ' ' * 10}") 165 | except Exception: 166 | logger.error(exc_info=True) 167 | finally: 168 | # Check if the file already exist and if it does remove it 169 | try: 170 | if os.path.isfile(filename): 171 | os.remove(filename) 172 | except PermissionError as perm_error: 173 | logger.error(perm_error, exc_info=True) 174 | return True 175 | 176 | 177 | if __name__ == "__main__": 178 | import threading 179 | 180 | def loading_text(): 181 | loading_string = f"[INFO] Downloading twitter space {space_id} " 182 | animation = [" ", ". ", ".. ", "... ", ".... ", "....."] 183 | idx = 0 184 | while status: 185 | print(loading_string + animation[idx % len(animation)], end="\r") 186 | time.sleep(0.3) 187 | idx += 1 188 | if idx == 6: 189 | idx = 0 190 | 191 | def get_space_server(m3u8_url): 192 | reg_result = re.search("(https:\/\/)((?:[^-]*-){2})(.*)(\.pscp.*)", m3u8_url) 193 | # regex will return something like 'prod-fastly-' so remove the last dash 194 | deployment_server = reg_result.group(2)[:-1] 195 | periscope_server = reg_result.group(3) 196 | server = (deployment_server, periscope_server) 197 | return server 198 | 199 | try: 200 | status = True 201 | m3u8_url = input("m3u8 Url: ") 202 | space_id = input("space id: ") 203 | twitter_name = input("twitter name: ") 204 | space_title = input("space title: ") 205 | space_date = input("space date(YYYYMMDD): ") 206 | # space_date = datetime.strptime(space_date, "%Y%m%d").timestamp() * 1000 207 | m3u8_id = re.search("(.*\/Transcoding\/v1\/hls\/(.*)(\/non_transcode.*))", m3u8_url).group(2) 208 | server = get_space_server(m3u8_url) 209 | # m3u8_id, rest_id, space_creator_name, handle_name, space_title, space_server, space_duration, space_date, logger=None 210 | t1 = threading.Thread(target=loading_text) 211 | t1.start() 212 | download(m3u8_id=m3u8_id, rest_id=space_id, space_creator=twitter_name, handle_name=twitter_name, space_title=space_title, 213 | space_server=server, space_duration=None, space_date=space_date) 214 | status = False 215 | input("Download complete, press any key to exit...") 216 | exit() 217 | except Exception as e: 218 | print(f"\rError encountered...{' '*40}\n{e}") 219 | while True: 220 | input("Exit...") 221 | exit() 222 | --------------------------------------------------------------------------------