├── .gitignore
├── README.md
├── TwitterSpace.py
├── const.py.example
├── index.py
├── log.py
├── requirements.txt
└── twspace.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | !requirements.txt
4 | !const.py.example
5 | !index.py
6 | !TwitterSpace.py
7 | !twspace.py
8 | !log.py


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # auto-twitter-space
 2 | ### Overview
 3 | A script that tracks twitter spaces and can send it to a discord webhook. 
 4 | With twitter api v2 being paywalled, this script now uses Twitter's private API using guest tokens to find twitter spaces of the host and then the m3u8 url for the space is found and will have it printed on console and posted using a discord webhook. 
 5 | Optionally, it can also download the twitter space after it ends.
 6 | 
 7 | ### Installation and Requirements
 8 | This script requires a few non-standard modules all of which can be installed using the requirements text file. A requirements text file has been included and the command `pip3 install -r requirements.txt` (or pip) can be used to install the required dependencies(except [FFMPEG](https://ffmpeg.org/))
 9 | 
10 | So far this has only been tested on Windows 11. 
11 | 
12 | ### How To Use
13 | Fill out applicable informations inside the `const.py`(if you haven't already renamed `const.py.example` to `const.py`, do so now)
14 | Optionally, obtain a discord webhook url and put it in `const.py` if you want Twitter Space notification to also be posted onto your discord channel.
15 | Optionally you can also specify whether to download the Twitter Space and/or the download location. After the download the files will optionally be posted and sent through a discord webhook. `twspace.py` can also be ran as a standalone script to manually download twitter spaces.
16 | 
17 | Cookies such as `AUTH_TOKEN` and `CT0`(CSRF token) must be obtained and can be found via 
18 | 
19 | `browser's developer tool` > `Application` > `Storage` > `Cookies` 
20 | 
21 | and provide obtained values in `const.py`. Also, due to rate-limiting, adjust sleep to optimal value. 
22 | 
23 | With all the setting up out of the way this script can run by calling the main/index file `index.py`
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/TwitterSpace.py:
--------------------------------------------------------------------------------
 1 | import dataclasses
 2 | from datetime import datetime
 3 | import re
 4 | 
 5 | 
 6 | @dataclasses.dataclass
 7 | class TwitterSpace:
 8 |     handle_id: str
 9 |     handle_name: str
10 |     handle_image: str = None
11 |     space_title: str = "Twitter Space"
12 |     space_state: str = None
13 |     space_creator_id: str = None
14 |     space_creator_name: str = None
15 |     space_participant_title: str = None  # admin or speaker
16 |     space_was_running: bool = False
17 |     space_started_at: int = 0
18 |     space_ended_at: int = 0
19 |     space_url: str = None
20 |     m3u8_url: str = None
21 |     space_notified: bool = False
22 |     space_downloaded: bool = False
23 |     space_duration: float = 0
24 |     rest_id: str = None
25 |     media_key: str = None
26 | 
27 |     def get_strftime(self):
28 |         # initial timestamp is in milliseconds and not seconds
29 |         return datetime.fromtimestamp(self.space_started_at/1000).strftime("%Y%m%d")
30 | 
31 |     def get_m3u8_id(self):
32 |         return re.search("(.*\/Transcoding\/v1\/hls\/(.*)(\/non_transcode.*))", self.m3u8_url).group(2)
33 | 
34 |     def get_server(self):
35 |         reg_result = re.search("(https:\/\/)((?:[^-]*-){2})(.*)(\.pscp.*)", self.m3u8_url)
36 |         # regex will return something like 'prod-fastly-' so remove the last dash
37 |         deployment_server = reg_result.group(2)[:-1]
38 |         periscope_server = reg_result.group(3)
39 |         server = (deployment_server, periscope_server)
40 |         return server
41 | 
42 |     def set_space_duration(self):
43 |         self.space_duration = self.space_ended_at/1000.0 - self.space_started_at/1000.0
44 | 
45 |     # def set_server(self):
46 |     #     reg_result = re.search("(https:\/\/)((?:[^-]*-){2})(.*)(\.pscp.*)", self.m3u8_url)
47 |     #     # regex will return something like 'prod-fastly-' so remove the last dash
48 |     #     self.deployment_server = reg_result.group(2)[:-1]
49 |     #     self.periscope_server = reg_result.group(3)
50 | 
51 |     def set_space_details(self, space_details):
52 |         self.handle_image = space_details['creator_results']['result']['legacy']['profile_image_url_https']
53 |         self.space_title = space_details.get('title', "")
54 |         self.space_url = f"https://twitter.com/i/spaces/{self.rest_id}"
55 |         self.space_started_at = int(space_details.get('started_at', 0))
56 |         self.space_state = space_details['state']
57 |         self.space_was_running = True
58 |         self.space_ended_at = int(space_details.get('ended_at', 0))
59 | 
60 |     def reset_default(self):
61 |         self.handle_id: str = self.handle_id
62 |         self.handle_name: str = self.handle_name
63 |         self.handle_image: str = None
64 |         self.space_title: str = None
65 |         self.space_state: str = None
66 |         self.space_creator_id: str = None
67 |         self.space_creator_name: str = None
68 |         self.space_participant_title: str = None
69 |         self.space_was_running: bool = False
70 |         self.space_started_at: int = 0
71 |         self.space_ended_at: int = 0
72 |         self.space_url: str = None
73 |         self.m3u8_url: str = None
74 |         self.space_notified: bool = False
75 |         self.space_downloaded: bool = False
76 |         self.space_duration: float = 0
77 |         self.rest_id: str = None
78 |         self.media_key: str = None
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/const.py.example:
--------------------------------------------------------------------------------
 1 | # duration in seconds and configure if rate-limited
 2 | SLEEP_TIME = 60
 3 | 
 4 | WEBHOOK_URL = ""
 5 | WEBHOOK_DOWNLOAD_URL = ""
 6 | 
 7 | # Cookies: auth_token and ct0(CSRF Token) can be found in your browser's cookies under Application tab in dev's tool
 8 | BEARER_TOKEN = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
 9 | AUTH_TOKEN = ""
10 | CT0 = ""
11 | 
12 | # Specify whether to send downloaded twitter space to the discord webhook
13 | SEND_DOWNLOAD = False
14 | 
15 | # Either True or just download path to a specified directory to enable download
16 | DOWNLOAD = ""
17 | 
18 | # Track and download all spaces on the user timeline(user hosted spaces or retweeting other spaces)
19 | ALL_SPACE_TIMELINE = True
20 | 
21 | LOGGING = True
22 | 
23 | '''
24 | Prepopulated Hololive and Other creators to monitor
25 | Following the same format add whoever you want to monitor in the form of {"creator_name": creator_id}
26 | '''
27 | # Gamers IDs
28 | twitter_ids = [{"inugamikorone": 1109748792721432577}, {"k5r6n3": 1543196930032906241}, {"nekomataokayu": 1109751762733301760},
29 |                {"MO_OKAYU_GU": 1508759123285856256}, {"ookamimio": 1063337246231687169}, {"ookamimio_sab": 1593114800938135552},
30 |                {"shirakamifubuki": 997786053124616192}]
31 | # Gen 0 IDs
32 | twitter_ids += [{'robocosan': 960340787782299648}, {'maybe_robochan': 1598143974006747138},
33 |                 {'sakuramiko35': 979891380616019968}, {'mikochisub': 1637740181813084161}, {'tokino_sora': 880317891249188864},
34 |                 {'suisei_hosimati': 975275878673408001}, {'suisei_submati': 1656215492728877058}, {'AZKi_VDiVA': 1062499145267605504}]
35 | # Gen 1 IDs
36 | twitter_ids += [{'yozoramel': 985703615758123008}, {'YozoramelS': 1200743208805748736}, {'mel_kapu': 1596643514888368129}, {'akirosenthal': 996643748862836736},
37 |                 {'akaihaato': 998336069992001537}, {'natsuiromatsuri': 996645451045617664}, {'7216_2nd': 1122810226153938944}]
38 | # Gen 2 IDs
39 | twitter_ids += [{'minatoaqua': 1024528894940987392}, {'nakiriayame': 1024532356554608640}, {'YODAYOsub': 1598245359083347968},
40 |                 {'murasakishionch': 1024533638879166464}, {'shionchan_o': 1554007042779594752},
41 |                 {'yuzukichococh': 1024970912859189248}, {'oozorasubaru': 1027853566780698624}]
42 | # Gen 3 IDs
43 | twitter_ids += [{'houshoumarine': 1153192638645821440}, {'shiroganenoel': 1153195295573856256}, {'danchou_sub': 1609453459618828288},
44 |                 {'shiranuiflare': 1154304634569150464}, {'fuu_tan_sub': 1650527824145682432}, {'usadapekora': 1133215093246664706}, {'uraakapeko': 1590623757990645761}]
45 | # Gen 4 IDs
46 | twitter_ids += [{'himemoriluna': 1200396798281445376}, {'lunatan_nanora': 1574391126525546502}, {'amanekanatach': 1200396304360206337},
47 |                 {'tokoyamitowa': 1200357161747939328}, {'tsunomakiwatame': 1200397643479805957}]
48 | # Gen 5 IDs
49 | twitter_ids += [{'omarupolka': 1270551806993547265}, {'yukihanalamy': 1255013740799356929}, {'YukihanaWamy': 1645752800369061888},
50 |                 {'shishirobotan': 1255015814979186689}, {'momosuzunene': 1255017971363090432}, {'_nenechidayo': 1561593840590794753}]
51 | # Gen 6 IDs
52 | twitter_ids += [{'LaplusDarknesss': 1433657158067896325}, {'wagahaida_L': 1648179101247864832}, {'takanelui': 1433660866063339527},
53 |                 {'hakuikoyori': 1433667543806267393},
54 |                 {'sakamatachloe': 1433669866406375432}, {'kazamairohach': 1434755250049589252}]
55 | # Other IDs
56 | twitter_ids += [{'ksononair': 733990222787018753}, {'tanigox': 2006101}, {'achan_UGA': 1064352899705143297},
57 |                 {'daidoushinove': 1156797715319160832}, {"kotone": 986871577890312192}]
58 | 
59 | # Holostars Gen 1 IDs
60 | twitter_ids += [{'miyabihanasaki': 1132832428353966081}, {'arurandeisu': 1156841498479955968},
61 |                 {'rikkaroid': 1174223248655114246}, {'kanadeizuru': 1132924263441227776}]
62 | # Holostars Gen 2 SunTempo IDs
63 | twitter_ids += [{'kishidotemma': 1194519616472543232}, {'astelleda': 1181889913517572096},
64 |                 {'yukokuroberu': 1194520283446530051}]
65 | # Holostars Gen 3 MaFia IDs
66 | twitter_ids += [{'kageyamashien': 1248565757207695361}, {'aragamioga': 1248567107173773313}]
67 | # Holostars Gen 4 Uproar IDs
68 | twitter_ids += [{'yatogamifuma': 1490584094086164480}, {'minaserioch': 1490583416768970752},
69 |                 {'hizakigamma': 1490585175331201024}, {'utsugiuyu': 1490587283849809923}]
70 | # Holostars EN TEMPUS IDs
71 | twitter_ids +=[{'regisaltare': 1536575088996524032}, {'axelsyrios': 1536577295632441344},
72 |                {'magnidezmond': 1536576325296996352}, {'noirvesper_en': 1536579341332516864}]
73 | # Holostars EN TEMPUS 2 IDs
74 | twitter_ids += [{'gavisbettel': 1582926739684339712}, {'machinaxflayon': 1582922712166825986},
75 |                 {'banzoinhakka': 1582927907206631425}, {'josuijishinri': 1582925071546732544}]
76 | 
77 | # HoloID Gen 1 IDs
78 | twitter_ids += [{'ayunda_risu': 1234752200145899520}, {'moonahoshinova': 1234753886520393729},
79 |                 {'airaniiofifteen': 1235180878449397764}]
80 | # HoloID Gen 2 IDs
81 | twitter_ids += [{'anyamelfissa': 1328277750000492545}, {'kureijiollie': 1328277233492844544},
82 |                 {'pavoliareine': 1328275136575799297}]
83 | # HoloID Gen 3 IDs
84 | twitter_ids += [{'kaelakovalskia': 1486636197908602880}, {'vestiazeta': 1486633489101307907},
85 |                 {'kobokanaeru': 1486629076005634049}]
86 | 
87 | # HoloEN Gen 1 Myth IDs
88 | twitter_ids += [{'moricalliope': 1283653858510598144}, {'takanashikiara': 1283646922406760448},
89 |                 {'ninomaeinanis': 1283650008835743744}]
90 | twitter_ids += [{'gawrgura': 1283657064410017793}, {'watsonameliaEN': 1283656034305769472}]
91 | 
92 | # HoloEN Project Hope ID
93 | twitter_ids += [{'irys_en': 1363705980261855232}]
94 | 
95 | # HoloEN Gen 2 Council IDs
96 | twitter_ids += [{'tsukumosana': 1409819816194576394}, {'ceresfauna': 1409784760805650436},
97 |                 {'ourokronii': 1409817096523968513}]
98 | twitter_ids += [{'nanashimumei_en': 1409817941705515015}, {'hakosbaelz': 1409783149211443200}]
99 | 


--------------------------------------------------------------------------------
/index.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import sys
  3 | import time
  4 | import urllib3
  5 | import requests
  6 | from requests.adapters import HTTPAdapter, Retry
  7 | 
  8 | import twspace
  9 | import threading
 10 | import re
 11 | import const
 12 | from datetime import datetime
 13 | from log import create_logger
 14 | import logging
 15 | from TwitterSpace import TwitterSpace
 16 | 
 17 | 
 18 | # TODO: Look into how to handle retweeted spaces and scheduled spaces -> avoid space being downloaded multiple times(create a download list to check spaces being downloaded?)
 19 | # TODO: Look into how to better handle and show twitter participant level(speaker, listener, etc)
 20 | 
 21 | # Major Changes: Setted TwitterSpaces which is a dictionary of TwitterSpace objects e.g.{'user_id': TwitterSpace(handle_id='sam', handle_name='sam', handle_image=None, space_id=None, space_title=None, space_started_at='20230409', space_url=None, m3u8_url=None, space_notified=False, space_downloaded=False, space_duration=0, periscope_server=None, deployment_server=None, rest_id=None, media_key=None)}
 22 | SLEEP_TIME = const.SLEEP_TIME
 23 | ALL_SPACE_TIMELINE = const.ALL_SPACE_TIMELINE
 24 | 
 25 | WEBHOOK_URL = const.WEBHOOK_URL
 26 | DOWNLOAD = const.DOWNLOAD
 27 | 
 28 | BEARER_TOKEN = const.BEARER_TOKEN
 29 | AUTH_TOKEN = const.AUTH_TOKEN
 30 | CSRF_TOKEN = const.CT0
 31 | 
 32 | # List of twitter creators to monitor
 33 | twitter_ids = const.twitter_ids
 34 | TwitterSpaces = {}
 35 | 
 36 | #################################
 37 | """
 38 | API FUNCTIONS
 39 | """
 40 | 
 41 | 
 42 | def set_logger(logger=None):
 43 |     if logger is None:
 44 |         logger = logging.getLogger(__name__)
 45 |         if len(logger.handlers) != 0:
 46 |             return logger
 47 |         console_handler = logging.StreamHandler()
 48 |         console_handler.setLevel(logging.INFO)
 49 |         formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s | %(message)s',
 50 |                                       datefmt='%Y-%m-%d %H:%M:%S')
 51 |         console_handler.setFormatter(formatter)
 52 |         logger.addHandler(console_handler)
 53 |     return logger
 54 | 
 55 | 
 56 | # def fix_up_user_array(logger=None):
 57 | #     global twitter_ids
 58 | #     logger = set_logger(logger)
 59 | #
 60 | #     handle_name = None
 61 | #     for user_dict in const.twitter_ids:
 62 | #         try:
 63 | #             handle_name, handle_id = user_dict.popitem()
 64 | #             twitter_ids.append({'name': handle_name, 'id': str(handle_id)})
 65 | #             raise Exception
 66 | #         except Exception as e:
 67 | #             logger.error(f"Issue with providing user IDs {handle_name if handle_name is not None else ''}: Error {e}")
 68 | #             sys.exit()
 69 | 
 70 | def create_session():
 71 |     headers = {"Authorization": BEARER_TOKEN, "X-Csrf-Token": CSRF_TOKEN}
 72 |     cookies = {"auth_token": AUTH_TOKEN, "ct0": CSRF_TOKEN}
 73 |     session = requests.Session()
 74 |     retry = Retry(total=5, backoff_factor=1, status_forcelist=[400, 401, 403, 404, 429, 500, 502, 503, 504])
 75 |     session.mount("https://", HTTPAdapter(max_retries=retry))
 76 |     session.headers = headers
 77 |     session.cookies.update(cookies)
 78 |     return session
 79 | 
 80 | 
 81 | def handle_rate_limit(handle_name, header_json, error_code=None,logger=None):
 82 |     logger = set_logger(logger)
 83 |     logger.debug(f"[{handle_name}] Headers: {header_json}")
 84 |     x_rate_limit_remaining = int(header_json.get("x-rate-limit-remaining", 0))
 85 |     x_rate_limit_reset = int(header_json.get("x-rate-limit-reset", 0))
 86 | 
 87 |     if x_rate_limit_remaining == 0 or error_code in (88, 429):
 88 |         rate_limit_duration = (datetime.fromtimestamp(x_rate_limit_reset) - datetime.now()).total_seconds() if x_rate_limit_reset != 0 else 900
 89 |         logger.warning(
 90 |             f"[{handle_name}] Rate-limited until {datetime.fromtimestamp(x_rate_limit_reset)}, sleeping for {int(rate_limit_duration)} seconds...")
 91 |         time.sleep(rate_limit_duration or SLEEP_TIME)
 92 | 
 93 | 
 94 | # Alternate method to usertweets to find twitter spaces
 95 | def get_spaces_by_avatar_content(user_ids_list, logger=None, session=None):
 96 |     user_spaces = {"users": {}, "refresh_delay_secs": 0}
 97 |     for i, user_ids in enumerate(user_ids_list):
 98 |         if 0 < i <= len(user_ids_list)-1:
 99 |             logger.debug(f"{user_ids} Sleeping for {SLEEP_TIME} seconds to avoid rate-limit")
100 |             time.sleep(SLEEP_TIME)
101 | 
102 |         space_id_url = f"https://twitter.com/i/api/fleets/v1/avatar_content?user_ids={','.join(user_ids)}&only_spaces=true"
103 | 
104 |         try:
105 |             res = session.get(space_id_url)
106 |             logger.debug(f'URL: {space_id_url}')
107 |             logger.debug(f'Header: {res.headers}')
108 |             if res.status_code == 200:
109 |                 res_json = res.json()
110 |                 logger.debug(f"User Spaces: {res_json}")
111 |                 user_spaces['users'].update(res_json['users'])
112 |             elif res.status_code == 429:
113 |                 logger.error(f"Rate-limited error {res.status_code} {res.text}, sleeping for {SLEEP_TIME} seconds...")
114 |                 time.sleep(SLEEP_TIME)
115 |                 continue
116 |             elif res.status_code == 401:
117 |                 logger.error(f"Authentication error {res.status_code} {res.text}")
118 | 
119 |         except requests.exceptions.RetryError as reqError:
120 |             logger.debug(reqError, exc_info=True)
121 |         except Exception as e:
122 |             logger.error(e, exc_info=True)
123 |     return user_spaces
124 | 
125 | 
126 | # Get the creator of the space and title of the current user(admin, speaker or None)
127 | def get_space_participant(user, space_details):
128 |     # Check if space is created by the current space user and not a retweeted space on timeline,etc
129 |     space_details_json = space_details.json()
130 |     space_creator_id = space_details_json['data']['audioSpace']['metadata']['creator_results']['result']['rest_id']
131 |     space_creator_name = space_details_json['data']['audioSpace']['metadata']['creator_results']['result']['legacy']['screen_name']
132 | 
133 |     if user.handle_id in json.dumps(space_details_json['data']['audioSpace']['participants']['admins']):
134 |         participant_title = 'admin'
135 |     elif user.handle_id in json.dumps(space_details_json['data']['audioSpace']['participants']['speakers']):
136 |         participant_title = 'speaker'
137 |     elif user.handle_id in json.dumps(space_details_json['data']['audioSpace']['participants']['listeners']):
138 |         participant_title = 'listener'
139 |     else:
140 |         participant_title = 'unknown'
141 | 
142 |     return space_creator_id, space_creator_name, participant_title
143 | 
144 | 
145 | # Gets the first twitter space on the timeline/user profile
146 | # Returns the space id
147 | def get_space_tweet_id(handle_id, handle_name, logger=None, session=None):
148 |     logger = set_logger(logger)
149 | 
150 |     space_id_pattern = r'"expanded_url":"https://twitter\.com/i/spaces/(.*?)"'
151 | 
152 |     # See UserTweets.json for example json response
153 |     space_id_url = "https://twitter.com/i/api/graphql/rIIwMe1ObkGh_ByBtTCtRQ/UserTweets"
154 |     headers = {"Authorization": BEARER_TOKEN, "X-Csrf-Token": CSRF_TOKEN}
155 |     cookies = {"auth_token": AUTH_TOKEN, "ct0": CSRF_TOKEN}
156 | 
157 |     params = {
158 |         "variables": f'{{"userId":"{handle_id}",'
159 |                      '"count":10,'
160 |                      '"includePromotedContent":false,'
161 |                      '"withQuickPromoteEligibilityTweetFields":false,'
162 |                      '"withVoice":true,'
163 |                      '"withV2Timeline":true}',
164 |         "features": '{"rweb_lists_timeline_redesign_enabled": true,'
165 |                      '"responsive_web_graphql_exclude_directive_enabled": false,'
166 |                      '"verified_phone_label_enabled": false,'
167 |                      '"creator_subscriptions_tweet_preview_api_enabled": true,'
168 |                      '"responsive_web_graphql_timeline_navigation_enabled": true,'
169 |                      '"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,'
170 |                      '"tweetypie_unmention_optimization_enabled": true,'
171 |                      '"responsive_web_edit_tweet_api_enabled": true,'
172 |                      '"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,'
173 |                      '"view_counts_everywhere_api_enabled": true,'
174 |                      '"longform_notetweets_consumption_enabled": true,'
175 |                      '"responsive_web_twitter_article_tweet_consumption_enabled": false,'
176 |                      '"tweet_awards_web_tipping_enabled": false,'
177 |                      '"freedom_of_speech_not_reach_fetch_enabled": false,'
178 |                      '"standardized_nudges_misinfo": true,'
179 |                      '"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": false,'
180 |                      '"longform_notetweets_rich_text_read_enabled": true,'
181 |                      '"longform_notetweets_inline_media_enabled": true,'
182 |                      '"responsive_web_media_download_video_enabled": false,'
183 |                      '"responsive_web_enhance_cards_enabled": false}'
184 |     }
185 |     try:
186 |         rest_id_response = session.get(url=space_id_url, params=params)
187 |     except (requests.exceptions.ConnectionError, requests.exceptions.RetryError, requests.exceptions.ReadTimeout) as r_exception:
188 |         logger.debug(r_exception)
189 |         logger.debug(f"[{handle_name}] Connection issue occurred while looking for twitter space...")
190 |         return None
191 |     except requests.exceptions.RequestException as req_exceptions:
192 |         logger.error(f"[{handle_name}] {req_exceptions}", exc_info=True)
193 |         return None
194 |     except Exception as e:
195 |         logger.error(e, exc_info=True)
196 |         return None
197 | 
198 |     # Error check
199 |     if rest_id_response.status_code != 200:
200 |         try:
201 |             rest_id_json = rest_id_response.json()
202 |         except requests.exceptions.JSONDecodeError:
203 |             logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response}", exc_info=True)
204 |             logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}")
205 |             logger.warning(
206 |                 f"[{handle_name}] Issue finding space with error code {rest_id_response.status_code} {rest_id_response.text.strip()}")
207 |             if rest_id_response.status_code == 429:
208 |                 logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}")
209 |                 handle_rate_limit(handle_name, rest_id_response.headers, error_code=429, logger=logger)
210 |             return None
211 | 
212 |         try:
213 |             # try except so script can work after a long period of inactivity(sleep)
214 |             if 'error' in rest_id_json or 'errors' in rest_id_json:
215 |                 logger.debug(f"[{handle_name}] {rest_id_json}")
216 |                 if isinstance(rest_id_json.get('errors'), list):
217 |                     logger.debug(f"[{handle_name}] {rest_id_json} Error {rest_id_json['errors'][0]['code']} {rest_id_json['errors'][0]['message']}")
218 |                     # Needed anymore?
219 |                     if rest_id_json['errors'][0]['code'] == 88:
220 |                         handle_rate_limit(handle_name, rest_id_response.headers, error_code=88, logger=logger)
221 |                 elif rest_id_json.get('errors').get(0).get('code') == 239:
222 |                     logger.debug(
223 |                         f"[{handle_name}] Issue finding space with error code {rest_id_response.status_code} {rest_id_response.text.strip()}")
224 |                     logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}")
225 |                 elif rest_id_json.get('errors').get(0).get('code') in (32, 353):
226 |                     logger.error(
227 |                         f"[{handle_name}] Issue finding space, may need to get new tokens with error code {rest_id_response.status_code} {rest_id_response.text.strip()}")
228 |                     logger.debug(f"[{handle_name}] JSONDecodeError: {rest_id_response.headers}")
229 |                 else:
230 |                     logger.error(
231 |                         f"[{handle_name}] Issue finding space, may need to get new tokens with error code {rest_id_response.status_code} {rest_id_response.json().get('errors').get(0).get('message')}")
232 |         except AttributeError as aError:
233 |             logger.debug(aError, exc_info=True)
234 | 
235 |         else:
236 |             logger.warning(f"[{handle_name}] Issue finding space with error code {rest_id_response.status_code} {rest_id_response.text.strip()}")
237 |     elif 'data' not in rest_id_response.json():
238 |         logger.debug(f"[{handle_name}] {rest_id_response}")
239 |     handle_rate_limit(handle_name, rest_id_response.headers, logger=logger)
240 | 
241 |     # space_id = space_id_response.json()['data']['user']['result']['timeline_v2']['timeline']['instructions'][0]['entries'][0]['result']['legacy']['extended_entities']['media']['media_key']
242 |     rest_id = None
243 |     try:
244 |         rest_id = re.search(string=rest_id_response.text, pattern=space_id_pattern).group(1)
245 |         logger.debug(f"Space ID for {handle_name}({handle_id}): {rest_id}")
246 |     except AttributeError:
247 |         # No space found
248 |         return rest_id
249 | 
250 |     return rest_id
251 | 
252 | 
253 | # Gets detailed information/status of the twitter space
254 | # Returns a media key which is used to get information about the video stream(m3u8 url)
255 | def get_space_details(handle_name, rest_id, logger=None, session=None):
256 |     logger = set_logger(logger)
257 | 
258 |     # See AudioSpaceById for example json response
259 |     space_id_url = "https://twitter.com/i/api/graphql/kZ9wfR8EBtiP0As3sFFrBA/AudioSpaceById"
260 | 
261 |     params = {
262 |         "variables": f'{{"id":"{rest_id}",'
263 |                      '"isMetatagsQuery":false,'
264 |                      '"withListeners":true,'
265 |                      '"withReplays":true}',
266 |         "features": '{"spaces_2022_h2_clipping":true,'
267 |                      '"spaces_2022_h2_spaces_communities":true,'
268 |                      '"responsive_web_graphql_exclude_directive_enabled":false,'
269 |                      '"verified_phone_label_enabled":false,'
270 |                      '"creator_subscriptions_tweet_preview_api_enabled":true,'
271 |                      '"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,'
272 |                      '"tweetypie_unmention_optimization_enabled":true,'
273 |                      '"responsive_web_edit_tweet_api_enabled":true,'
274 |                      '"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,'
275 |                      '"view_counts_everywhere_api_enabled":true,'
276 |                      '"longform_notetweets_consumption_enabled":true,'
277 |                      '"responsive_web_twitter_article_tweet_consumption_enabled":false,'
278 |                      '"tweet_awards_web_tipping_enabled":false,'
279 |                      '"freedom_of_speech_not_reach_fetch_enabled":true,'
280 |                      '"freedom_of_speech_not_reach_appeal_label_enabled":false,'
281 |                      '"standardized_nudges_misinfo":true,'
282 |                      '"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,'
283 |                      '"responsive_web_graphql_timeline_navigation_enabled":true,'
284 |                      '"longform_notetweets_rich_text_read_enabled":true,'
285 |                      '"longform_notetweets_inline_media_enabled":true,'
286 |                      '"responsive_web_media_download_video_enabled":false,'
287 |                      '"responsive_web_enhance_cards_enabled":false}'
288 |     }
289 |     try:
290 |         space_id_response = session.get(url=space_id_url, params=params)
291 |     except (requests.exceptions.ConnectionError, requests.exceptions.RetryError, requests.exceptions.ReadTimeout) as r_exception:
292 |         logger.debug(r_exception)
293 |         logger.debug(f"[{handle_name}] Connection issue occurred while looking for twitter space...")
294 |         return None
295 |     except requests.exceptions.RequestException as req_exceptions:
296 |         logger.error(f"[{handle_name}] {req_exceptions}", exc_info=True)
297 |         return None
298 |     except Exception as e:
299 |         logger.error(e, exc_info=True)
300 |         return None
301 |     # Error check
302 |     try:
303 |         space_id_json = space_id_response.json()
304 |         logger.debug(f"[{handle_name}] Space Details: {space_id_json}")
305 |         logger.debug(f"[{handle_name}] Space ID Json Headers: {space_id_response.headers}")
306 |     except requests.exceptions.JSONDecodeError:
307 |         logger.error(f"[{handle_name}] Issue getting space details with error code {space_id_response.status_code} {space_id_response.text.strip()}")
308 |         logger.debug(space_id_response)
309 |         return None
310 | 
311 |     if space_id_response.status_code == 429:
312 |         handle_rate_limit(handle_name, space_id_response.headers, error_code=429, logger=logger)
313 |         logger.debug(f"[{handle_name}] Error {space_id_response.status_code} {space_id_response.text}", exc_info=True)
314 | 
315 |     if 'data' not in space_id_json or space_id_response.status_code != 200:
316 |         if 'error' in space_id_json:
317 |             # Not needed anymore?
318 |             if space_id_json['errors'][0]['code'] in (32, 353):
319 |                 logger.info(f"[{handle_name}] Bad guest token, renewing...")
320 |                 logger.error(
321 |                     f"[{handle_name}] Issue finding space, may need to get new tokens with error code {space_id_json.status_code} {space_id_json.text.strip()}")
322 |             logger.error(f"[{handle_name}] Issue getting media key with error code {space_id_response.status_code} {space_id_json}")
323 |             return None
324 |         else:
325 |             # {'data': {'audioSpace': {}}}
326 |             logger.debug("Empty object received...")
327 |             return None
328 |     elif space_id_json.get('data').get('audioSpace') == {}:
329 |         # {'data': {'audioSpace': {}}}
330 |         return None
331 | 
332 |     handle_rate_limit(handle_name, space_id_response.headers, logger=logger)
333 |     return space_id_response
334 | 
335 | 
336 | def get_media_key(handle_name, space_detail, logger=None):
337 |     logger = set_logger(logger)
338 |     media_key = None
339 |     # media_key_pattern = r'"media_key":"(.*?)"'
340 |     # space_id = space_id_response.json()['data']['user']['result']['timeline_v2']['timeline']['instructions'][0]['entries'][0]['result']['legacy']['extended_entities']['media']['media_key']
341 |     try:
342 |         # media_key = re.search(string=space_detail.text, pattern=media_key_pattern).group(1)
343 |         media_key = space_detail['media_key']
344 |         logger.debug(f"[{handle_name}] Media Key: {media_key}")
345 |     except (AttributeError, KeyError):
346 |         logger.error(f"[{handle_name}] Issue finding media key")
347 |         logger.debug(f"[{handle_name}] {space_detail}")
348 |     return media_key
349 | 
350 | 
351 | # Gets detailed information about the video/media stream
352 | # Returns m3u8 url
353 | def get_space_source(handle_name, media_key, logger=None, session=None):
354 |     logger = set_logger(logger)
355 |     location_url = None
356 |     # See live_video_stream for example json response
357 |     space_source_url = f"https://api.twitter.com/1.1/live_video_stream/status/{media_key}"
358 | 
359 |     try:
360 |         space_source_response = session.get(url=space_source_url)
361 |     except (requests.exceptions.RequestException, urllib3.exceptions.MaxRetryError, requests.exceptions.RetryError) as e:
362 |         logger.error(f"[{handle_name}] {e}")
363 | 
364 |     if space_source_response.status_code != 200:
365 |         logger.error(f"[{handle_name}] Issue getting space source with error code {space_source_response.status_code}")
366 |         logger.debug(f"[{handle_name}] Space Source Headers: {space_source_response.headers}")
367 |         handle_rate_limit(handle_name, space_source_response.headers, logger=logger)
368 |         return location_url
369 | 
370 |     space_source = space_source_response.json()
371 |     location_url = space_source["source"]["location"].replace("dynamic", "master").replace("?type=live", "")
372 |     logger.debug(f"[{handle_name}] Space Source: {space_source}")
373 | 
374 |     return location_url
375 | 
376 | 
377 | def create_users():
378 |     for user in const.twitter_ids:
379 |         user_name, user_id = next(iter(user.items()))
380 |         TwitterSpaces[user_id] = TwitterSpace(handle_id=str(user_id), handle_name=user_name)
381 | 
382 | 
383 | # list of user ids list of up to 100 ids per list
384 | def get_user_ids():
385 |     user_ids = []
386 |     split_twitter_id_list = []
387 |     if len(twitter_ids) // 100 != 0:
388 |         for split in range(len(twitter_ids) // 100):
389 |             split_twitter_id_list += [twitter_ids[split * 100:(split + 1) * 100]]
390 |         if len(twitter_ids) % 100 != 0:
391 |             split_twitter_id_list += [twitter_ids[(len(twitter_ids) // 100) * 100:]]
392 |     else:
393 |         split_twitter_id_list = [twitter_ids]
394 | 
395 |     for twitter_user_list in split_twitter_id_list:
396 |         temp_id = []
397 |         for twitter_user in twitter_user_list:
398 |             temp_id.append(str(*twitter_user.values()))
399 |         user_ids.append(temp_id)
400 |     return user_ids
401 | 
402 | 
403 | def fix_up_spaces_by_avatar_content(user_spaces_list):
404 |     user_spaces = {}
405 |     for user_id in user_spaces_list['users']:
406 |         # Tuple of (user_id, broadcast_id) where broadcast_id is equivalent to rest_id i.e. Space ID
407 |         try:
408 |             user_spaces[user_id] = user_spaces_list['users'][user_id]['spaces']['live_content']['audiospace']['broadcast_id']
409 |         except Exception as e:
410 |             logger.error(e, exc_info=True)
411 |             logger.debug(user_spaces_list['users'][user_id])
412 |     return user_spaces
413 | 
414 | 
415 | def get_spaces(user_ids, logger=None, session=None):
416 |     user_spaces = get_spaces_by_avatar_content(user_ids, logger=logger, session=session)
417 |     space_ids = fix_up_spaces_by_avatar_content(user_spaces)
418 |     logger.debug(f"User Spaces: {user_spaces}")
419 |     logger.debug(f"Space IDs: {space_ids}")
420 |     logger.debug(f"TwitterSpaces: {TwitterSpaces}")
421 |     # if space_ids == {}:
422 |     #     return
423 | 
424 |     for user in TwitterSpaces.values():
425 |         # if user.handle_id not in space_ids.keys() and user.space_state != 'Running':
426 |         #     # if user has no live space and user space object has no space running
427 |         #     # logger.debug(f"[{user.handle_name}] {user.handle_id not in space_ids.keys()}, {user.space_state != 'Running'}")
428 |         #     continue
429 |         # if user.space_state == 'Running' and user.space_downloaded:
430 |         #     # if user space object is running and already downloaded
431 |         #     # logger.debug(
432 |         #     #     f"[{user.handle_name}] {user.space_state == 'Running'}, {user.space_downloaded}")
433 |         #     continue
434 | 
435 |         # this rest_id here is basically a check for whether space is still live
436 |         rest_id = space_ids.get(user.handle_id)
437 |         try:
438 |             # if rest_id is None or (user.handle_id in space_ids.keys() and user.space_notified):
439 |             #     # Not Live(continue)->On Live->Still On Live(continue)->Just Offline->Offline(continue)
440 |             #     logger.debug(f"[{user.handle_name}] Skipping...")
441 |             #     continue
442 | 
443 |             if user.rest_id == rest_id or (rest_id is None and user.space_downloaded):
444 |                 # Not Live(continue)->On Live->Still On Live(continue)->Just Offline->Offline(continue)
445 |                 logger.debug(f"[{user.handle_name}] Skipping...")
446 |                 continue
447 | 
448 |             try:
449 |                 if rest_id is None:
450 |                     # set rest_id when space is offline to be able to download
451 |                     rest_id = user.rest_id
452 |                 logger.debug(f"[{user.handle_name}] Looking for spaces...")
453 |                 space_details_res = get_space_details(user.handle_name, rest_id, logger=logger, session=session)
454 |             except Exception as e:
455 |                 logger.error(e, exc_info=True)
456 | 
457 |             if space_details_res is None:
458 |                 logger.debug(f"[{user.handle_name}] Unable to get space details...")
459 |                 continue
460 |             else:
461 |                 space_details = space_details_res.json()['data']['audioSpace']['metadata']
462 |                 logger.debug(f"[{user.handle_name}] {space_details_res.json()}")
463 |                 # If space has already been queried(also ensure new space isn't skipped if previous space hasn't been downloaded)
464 |                 # or is a past space that has not been queried then skip
465 |                 if user.space_state == space_details['state'] and user.rest_id != space_details['rest_id'] or user.space_state is None and space_details['state'] == 'Ended':
466 |                     logger.debug(f"[{user.handle_name}] Past space, skipping...")
467 |                     continue
468 | 
469 |                 # Handling new spaces
470 |                 if user.space_state == 'Ended' and space_details['state'] == 'Running':
471 |                     user.reset_default()
472 |                     logger.debug(f"Resetting default values for {user.handle_name}")
473 | 
474 |                 # Handling scheduled space
475 |                 if user.space_state == 'NotStarted' and space_details['state'] == 'Running':
476 |                     user.reset_default()
477 |                     logger.info(f"Scheduled space from {user.handle_name} is now live")
478 |                     logger.debug(f"Resetting default values for {user.handle_name}")
479 | 
480 |             try:
481 |                 space_creator_id, space_creator_name, participant_title = get_space_participant(user, space_details_res)
482 |             except (KeyError, requests.exceptions.JSONDecodeError) as cError:
483 |                 logger.debug(cError)
484 |                 space_creator_id, space_creator_name, participant_title = user.space_creator_id, user.handle_name, None
485 | 
486 |             # TODO: Add another check to not track space if it's a retweeted where host is also on the list
487 |             # If current user isn't hosting the space or participating and should not be tracked
488 |             # if user.handle_id != space_creator_id and participant_title is None and not ALL_SPACE_TIMELINE:
489 |             #     continue
490 | 
491 |             user.rest_id = rest_id
492 |             media_key = get_media_key(user.handle_name, space_details, logger=logger)
493 |             user.media_key = media_key
494 |             user.set_space_details(space_details)
495 |             user.space_creator_id = space_creator_id
496 |             user.space_creator_name = space_creator_name
497 |             user.space_participant_title = participant_title
498 |             logger.debug(f"[{user.handle_name}] {space_details}")
499 | 
500 |             try:
501 |                 if user.space_state == "Running" and not user.space_notified:
502 |                     notify_space(user, logger=logger, session=session)
503 |             except Exception as e:
504 |                 logger.error(f"[{user.handle_name}] Issue notifying space", exc_info=True)
505 |                 logger.debug(e, exc_info=True)
506 | 
507 |         except Exception as e:
508 |             logger.error(f"[{user.handle_name}] Issue getting latest space id", exc_info=True)
509 |             logger.debug(e, exc_info=True)
510 |             continue
511 | 
512 | 
513 | def download(ended_spaces, logger=None):
514 |     if DOWNLOAD is not None or False:
515 |         downloaded = []
516 |         for ended_space in ended_spaces:
517 |             # if int(ended_space.space_duration) == 0:
518 |             #     duration = datetime.timestamp(datetime.now()) - ended_space.space_started_at/1000.0
519 |             #     ended_space.space_duration = duration
520 |             #     logger.debug(f"Setting custom duration of {duration} for {ended_space.handle_name}")
521 | 
522 |             # ended_space.m3u8_url = get_space_source(media_key=ended_space.media_key, logger=logger)
523 |             # print(" " * 70, end='\n')
524 | 
525 |             # Add a check to avoid duplicate download for retweeted/joined space between two or more tracked user
526 |             if ended_space.rest_id in downloaded:
527 |                 logger.warning(f"[{ended_space.handle_name}] {ended_space.rest_id} has already been downloaded, skipping...")
528 |                 ended_space.space_downloaded = True
529 |                 continue
530 | 
531 |             logger.info(f"{ended_space.space_creator_name} is now offline at {ended_space.rest_id}")
532 | 
533 |             try:
534 |                 if ended_space.m3u8_url is None:
535 |                     ended_space.m3u8_url = get_space_source(handle_name=ended_space.space_creator_name, media_key=ended_space.media_key, logger=logger, session=session)
536 |                     if ended_space.m3u8_url is None:
537 |                         logger.error(
538 |                             f"[{ended_space.handle_name}] Can not download space for {ended_space.space_creator_name}, unable to find m3u8 url...")
539 |                         return
540 | 
541 |                 ended_space.set_space_duration()
542 |                 server = ended_space.get_server()
543 |                 m3u8_id = ended_space.get_m3u8_id()
544 |                 space_date = ended_space.get_strftime()
545 |                 logger.debug(ended_space)
546 |                 threading.Thread(target=twspace.download,
547 |                                  args=[m3u8_id, ended_space.rest_id, ended_space.space_creator_name,
548 |                                        ended_space.handle_name, ended_space.space_title, server,
549 |                                        ended_space.space_duration, space_date, logger]).start()
550 |                 ended_space.space_downloaded = True
551 |                 downloaded.append(ended_space.rest_id)
552 |             except Exception as thread_exception:
553 |                 logger.error(thread_exception, exc_info=True)
554 |                 ended_space.space_downloaded = False
555 | 
556 | 
557 | def loading_text():
558 |     loading_string = "Waiting for live twitter spaces "
559 |     animation = ["     ", ".    ", "..   ", "...  ", ".... ", "....."]
560 |     idx = 0
561 |     while True:
562 |         print(f"[INFO] {datetime.now().replace(microsecond=0)} | " + loading_string + animation[idx % len(animation)],
563 |               end="\r")
564 |         time.sleep(0.3)
565 |         idx += 1
566 |         if idx == 6:
567 |             idx = 0
568 | 
569 | 
570 | def notify_space(space, logger=None, session=None):
571 |     logger.debug(f"[{space.space_creator_name}] Space Object: {str(space)}")
572 |     # logger.debug(f"Space Details: {str(space[0]['data'])}")
573 |     # logger.debug(f"User Details: {str(space[1]['data'])}")
574 |     space_id = space.rest_id
575 |     status = 'live' if space.space_state == 'Running' else space.space_state
576 |     creator_profile_image = space.handle_image
577 | 
578 |     space_creator = space.space_creator_name
579 |     space_handle_name = space.handle_name
580 |     space_started_at = space.get_strftime()
581 |     space_title = space.space_title
582 | 
583 |     space_url = f"https://twitter.com/i/spaces/{space_id}"
584 | 
585 |     # Get and send the m3u8 url
586 |     counter = 0
587 |     m3u8_url = None
588 |     while counter <= 5:
589 |         m3u8_url = get_space_source(handle_name=space_creator, media_key=space.media_key, logger=logger, session=session)
590 |         if m3u8_url is None:
591 |             counter += 1
592 |             time.sleep(20)
593 |             logger.warning(f"[{space.handle_name}]Retrying to get m3u8 url {counter}/{5}")
594 |             continue
595 |         else:
596 |             break
597 | 
598 |     space.m3u8_url = m3u8_url
599 |     logger.debug(space)
600 |     # Todo maybe consider changing space_creator to `space_creator` to avoid underscore error
601 |     if space.handle_id == space.space_creator_id:
602 |         logger.info(f"{space_creator} is now {status} at {space_url}")
603 |     else:
604 |         logger.info(f"[{space.space_creator_name}] {space_handle_name} is participating at {space_url}")
605 |     logger.info(f"M3U8: {m3u8_url}")
606 | 
607 |     if space.handle_id == space.space_creator_id:
608 |         description = f"{space_creator} is now {status} at <{space_url}> ```{m3u8_url}```"
609 |     else:
610 |         description = f"{space_handle_name} is participating at <{space_url}> ```{m3u8_url}```"
611 |     message = {"embeds": [{
612 |         "color": 1942002,
613 |         "author": {
614 |             "name": f"{space_creator}",
615 |             "icon_url": creator_profile_image
616 |         },
617 |         "fields": [
618 |             {
619 |                 "name": space_title,
620 |                 "value": description
621 |             }
622 |         ],
623 |         "thumbnail": {
624 |             "url": creator_profile_image.replace("normal", "200x200")
625 |         }
626 |     }]
627 |     }
628 |     if WEBHOOK_URL is not None:
629 |         retry = 0
630 |         max_retry = 5
631 |         while retry < max_retry:
632 |             try:
633 |                 session.post(WEBHOOK_URL, json=message, timeout=5)
634 |                 space.space_notified = True
635 |                 break
636 |             except requests.exceptions.ConnectionError:
637 |                 retry += 1
638 |                 logger.debug(f"[{space_creator}] Issue notifying space {space_id}", exc_info=True)
639 |                 logger.debug(f"[{space_creator}] Re-notifying space")
640 |             except Exception as e:
641 |                 retry += 1
642 |                 logger.debug(f"[{space_creator}] Issue notifying space {space_id}", exc_info=True)
643 |                 logger.debug(f"[{space_creator}] Re-notifying space")
644 |         if retry == max_retry:
645 |             logger.debug(f"[{space_creator}] Issue notifying space", exc_info=True)
646 | 
647 | 
648 | 
649 | if __name__ == "__main__":
650 |     logger = create_logger("logfile.log")
651 |     logger.info("Starting program")
652 |     threading.Thread(target=loading_text).start()
653 |     session = create_session()
654 |     # loading_string = "[INFO] Waiting for live twitter spaces"
655 | 
656 |     create_users()
657 |     user_ids = get_user_ids()
658 |     logger.debug(f"TwitterSpaces: {TwitterSpaces}")
659 |     logger.debug(f"User Ids: {user_ids}")
660 |     while True:
661 |         try:
662 |             get_spaces(user_ids, logger=logger, session=session)
663 |             # space_list = [space for space in TwitterSpaces.values() if space.space_state == "Running" and not space.space_notified]
664 | 
665 |             to_download = [space for space in TwitterSpaces.values() if
666 |                            (space.space_state == "Ended" or space.space_ended_at != 0) and
667 |                            (not space.space_downloaded and space.space_was_running)]
668 |             # if space_list is None:
669 |             #     continue
670 | 
671 |             # Download spaces that have ended and have yet to be downloaded
672 |             if to_download is not None:
673 |                 download(to_download, logger=logger)
674 | 
675 |         except SystemExit:
676 |             sys.exit("Error, Exiting")
677 |         except OSError:
678 |             sys.exit("Error, Exiting")
679 |         except KeyboardInterrupt:
680 |             sys.exit("Error, Exiting")
681 |         except Exception as e:
682 |             logger.error(e, exc_info=True)
683 | 


--------------------------------------------------------------------------------
/log.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import logging
 3 | from logging.handlers import TimedRotatingFileHandler
 4 | import os.path
 5 | import const
 6 | 
 7 | 
 8 | def namer(name):
 9 |     return name + ".gz"
10 | 
11 | 
12 | def rotator(source, dest):
13 |     with open(source, 'rb') as f_in:
14 |         with gzip.open(dest, 'wb') as f_out:
15 |             f_out.write(f_in.read())
16 |     os.remove(source)
17 | 
18 | 
19 | # Filter subclass that does not allow the file logging of sleeping messages
20 | class NoParsingFilter(logging.Filter):
21 |     def filter(self, record):
22 |         # if len(record.message) < 37:
23 |         #     record.message = f"{record.message}{' '*20}"
24 |         return not record.getMessage().startswith('Sleeping')
25 | 
26 | 
27 | # Filter subclass to allow and not allow stack traceback filtering
28 | class TracebackInfoFilter(logging.Filter):
29 |     """Clear or restore the exception on log records"""
30 |     def __init__(self, clear=True):
31 |         self.clear = clear
32 | 
33 |     def filter(self, record):
34 |         if self.clear:
35 |             record._exc_info_hidden, record.exc_info = record.exc_info, None
36 |             # clear the exception traceback text cache, if created.
37 |             record.exc_text = None
38 |         elif hasattr(record, "_exc_info_hidden"):
39 |             record.exc_info = record._exc_info_hidden
40 |             del record._exc_info_hidden
41 |         return True
42 | 
43 | 
44 | def create_logger(logfile_name):
45 |     # Check if log dir exist and if not create it
46 |     logging.handlers.TimedRotatingFileHandler
47 |     log_dir = os.getcwd()+"\\logs"
48 |     if not os.path.isdir(log_dir):
49 |         os.makedirs(log_dir)
50 | 
51 |     # Get the logger object
52 |     logger = logging.getLogger(__name__)
53 | 
54 |     # If logger has already been created then return it(for the imported modules)
55 |     if len(logger.handlers) != 0:
56 |         return logger
57 | 
58 |     # Set logging level and log path
59 |     logger.setLevel(logging.DEBUG)
60 |     log_path = log_dir + "\\" + logfile_name
61 | 
62 |     # Create a new log file everyday
63 |     handler = TimedRotatingFileHandler(log_path, when="midnight", interval=1, encoding='utf-8')
64 |     formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)d] %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
65 |     handler.setFormatter(formatter)
66 |     handler.suffix = "%Y%m%d"   # file suffix to be changed
67 |     handler.addFilter(NoParsingFilter())
68 |     handler.rotator = rotator
69 |     handler.namer = namer
70 |     # Set handler to allow stacktraceback logging
71 |     handler.addFilter(TracebackInfoFilter(clear=False))
72 | 
73 |     # logging.basicConfig(level=logging.INFO,
74 |     #                     format='%(asctime)s [%(filename)s:%(lineno)d] %(levelname)-5s %(message)s',
75 |     #                     datefmt='%Y-%m-%d %H:%M',
76 |     #                     filename=log_path)
77 | 
78 |     # define a Handler which writes DEBUG messages or higher to the sys.stderr
79 |     console = logging.StreamHandler()
80 |     console.setLevel(logging.INFO)
81 |     console.addFilter(TracebackInfoFilter())
82 |     # set a format which is simpler for console use
83 |     console_formatter = logging.Formatter(f'[%(levelname)s] %(asctime)s | %(message)s {" "*10}', datefmt='%Y-%m-%d %H:%M:%S')
84 |     # tell the handler to use this format
85 |     console.setFormatter(console_formatter)
86 |     # add the handlers to the root logger
87 |     logger.addHandler(console)
88 |     logger.addHandler(handler)
89 | 
90 |     # If logging is not enabled then remove the root log handler but keep the stream handler
91 |     if not const.LOGGING:
92 |         try:
93 |             lhStdout = logger.handlers[1]
94 |             logger.removeHandler(lhStdout)
95 |         except IndexError as ierror:
96 |             logger.error(ierror)
97 |             return logger
98 |     return logger
99 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dataclasses==0.6
2 | discord.py==2.0.1
3 | requests==2.31.0
4 | urllib3==1.26.12
5 | discord==2.3.0


--------------------------------------------------------------------------------
/twspace.py:
--------------------------------------------------------------------------------
  1 | from urllib import error
  2 | import subprocess
  3 | import requests
  4 | from requests.adapters import HTTPAdapter, MaxRetryError
  5 | from urllib3 import Retry
  6 | import const
  7 | import discord
  8 | from log import create_logger
  9 | import os
 10 | import re
 11 | import time
 12 | 
 13 | 
 14 | # Function takes in the file name and check if it contains illegal characters
 15 | # If it contains an illegal character then remove it and return the new file name without the illegal character
 16 | def checkFileName(fileName):
 17 |     invalidName = re.compile(r"[\\*?<>:\"/\|]")
 18 |     newFileName = fileName
 19 |     if re.search(invalidName, fileName) is not None:
 20 |         newFileName = re.sub(invalidName, "_", fileName)
 21 |     # If file name has multiple lines then join them together(because stripping newline doesn't work)
 22 |     if "\n" in fileName:
 23 |         title_array = fileName.splitlines()
 24 |         newFileName = " ".join(title_array)
 25 |     return newFileName
 26 | 
 27 | 
 28 | def send_file(file_path, space_id, twitter_name, space_title, space_date):
 29 |     logger = create_logger("logfile_twspace.log")
 30 |     if os.path.isfile(file_path):
 31 |         webhook = discord.Webhook.from_url(const.WEBHOOK_DOWNLOAD_URL, adapter=discord.RequestsWebhookAdapter())
 32 |         space_file = discord.File(file_path)
 33 |         content = f"[{twitter_name}] The twitter space for {twitter_name} was downloaded\n`[{space_date}] {twitter_name} - {space_title} ({space_id})`"
 34 |         try:
 35 |             webhook.send(content=content, file=space_file)
 36 |         except discord.HTTPException as e:
 37 |             logger.error(e.text, exc_info=True)
 38 |     else:
 39 |         logger.error(f"[{twitter_name}]  Could not find space file to send", exc_info=True)
 40 | 
 41 | 
 42 | def get_m3u8_chunk(base_url, master_url, logger, session):
 43 |     # Get the playlist m3u8
 44 |     t = session.get(master_url).content.decode('utf-8')
 45 |     # logger.debug(t)
 46 |     master_playlist = re.findall(".*m3u8$", t, re.MULTILINE)
 47 |     for i in master_playlist:
 48 |         master_playlist = i
 49 |     logger.debug(master_playlist)
 50 |     # Get the playlist m3u8 content and replace the chunk url with the appropriate prefix
 51 |     chunk_m3u8 = base_url + master_playlist
 52 |     logger.debug(chunk_m3u8)
 53 |     return chunk_m3u8
 54 | 
 55 | 
 56 | def check_correct_duration(t, duration, logger):
 57 |     if duration is None:
 58 |         return True, duration
 59 |     moe = 30
 60 |     reg = re.compile("#EXTINF:(\d.\d{3})")
 61 |     result = re.findall(reg, t)
 62 |     m3u8_duration = sum(map(float, result))
 63 |     logger.debug(f"Space duration: {duration - moe} <= {m3u8_duration} <= {duration + moe}")
 64 |     if duration - moe <= m3u8_duration <= duration + moe:
 65 |         return True, m3u8_duration
 66 |     else:
 67 |         return False, m3u8_duration
 68 | 
 69 | 
 70 | def download(m3u8_id, rest_id, space_creator, handle_name, space_title, space_server, space_duration, space_date, logger=None):
 71 |     session = requests.Session()
 72 |     retry = Retry(total=5, connect=5, backoff_factor=1, status_forcelist=[400, 401, 403, 404, 429, 500, 502, 503, 504])
 73 |     session.mount("https://", HTTPAdapter(max_retries=retry))
 74 |     if logger is None:
 75 |         logger = create_logger("logfile.log")
 76 |     DOWNLOAD_PATH = os.path.join(const.DOWNLOAD, space_creator)
 77 |     SEND_DOWNLOAD = const.SEND_DOWNLOAD
 78 |     if DOWNLOAD_PATH == "True":
 79 |         DOWNLOAD_PATH = os.path.join(os.getcwd(), space_creator)
 80 |     elif not os.path.exists(DOWNLOAD_PATH):
 81 |         os.makedirs(DOWNLOAD_PATH)
 82 | 
 83 |     deployment_server, periscope_server = space_server
 84 | 
 85 |     base_url = f'https://{deployment_server}-{periscope_server}.pscp.tv'
 86 |     base_addon = '/Transcoding/v1/hls/'
 87 | 
 88 |     file_name = checkFileName(space_title)
 89 | 
 90 |     # Remove .video from the periscope_server string
 91 |     periscope_server = periscope_server.removesuffix('.video')
 92 |     end_masterurl = "/non_transcode/us-east-1/periscope-replay-direct-prod-us-east-1-public/audio-space/master_playlist.m3u8"
 93 |     end_chunkurl = f'/non_transcode/{periscope_server}/periscope-replay-direct-prod-{periscope_server}-public/audio-space/chunk'
 94 |     master_url = base_url+base_addon+m3u8_id+end_masterurl
 95 |     logger.debug(master_url)
 96 | 
 97 |     # Retry on 404 error
 98 |     retry = 0
 99 |     MAX_RETRY = 20
100 |     while retry < MAX_RETRY:
101 |         try:
102 |             # Get the chunk m3u8
103 |             chunk_m3u8 = get_m3u8_chunk(base_url, master_url, logger, session)
104 |             t = session.get(chunk_m3u8).content.decode('utf-8')
105 |             correct_duration, m3u8_duration = check_correct_duration(t, space_duration, logger)
106 |             logger.debug(f"[{space_creator}] Expected M3U8 Duration: {m3u8_duration}")
107 |             if not correct_duration:
108 |                 # raise error.HTTPError(url=chunk_m3u8, code=102, msg="M3U8 is incomplete", hdrs=None, fp=None)
109 |                 retry += 1
110 |                 logger.warning(f"[{space_creator}] Incorrect duration, M3U8 playlist download retry({retry}/{MAX_RETRY}) ...{' ' * 10}")
111 |                 logger.debug(chunk_m3u8)
112 |                 time.sleep(const.SLEEP_TIME)
113 |                 continue
114 |             break
115 |         except (MaxRetryError, requests.exceptions.RetryError, requests.exceptions.ConnectionError) as retryError:
116 |             retry += 1
117 |             logger.debug(retryError, exc_info=True)
118 |             logger.warning(f"[{space_creator}] Retrying({retry}/{MAX_RETRY}) m3u8 playlist download...{' ' * 10}")
119 |             time.sleep(const.SLEEP_TIME)
120 |         except error.HTTPError as httpError:
121 |             retry += 1
122 |             logger.debug(httpError, exc_info=True)
123 |             logger.warning(f"[{space_creator}] Retrying({retry}/{MAX_RETRY}) m3u8 playlist download...{' '*10}")
124 |             time.sleep(const.SLEEP_TIME)
125 |         except Exception as e:
126 |             retry += 1
127 |             logger.error(e, exc_info=True)
128 |             logger.warning(f"[{space_creator}] Retrying({retry}/{MAX_RETRY}) m3u8 playlist download...{' ' * 10}")
129 |             time.sleep(const.SLEEP_TIME)
130 |     t = t.replace('chunk', base_url+base_addon+m3u8_id+end_chunkurl)
131 |     logger.debug(t)
132 |     filename = f'{rest_id}.m3u8'
133 |     output = f'{DOWNLOAD_PATH}\\{space_date} - {space_creator} - {file_name} ({rest_id}).m4a'
134 |     command = ['ffmpeg', '-n', '-loglevel', 'info', '-protocol_whitelist', 'file,crypto,https,tcp,tls']
135 |     command += ['-i', filename, '-metadata', f'date={space_date}']
136 |     command += ['-metadata', f'comment=feat.{handle_name}'] if space_creator != handle_name else ['-metadata', f'comment={master_url}']
137 |     command += ['-metadata', f'artist={space_creator}', '-metadata', f'title={space_title}', '-c', 'copy', output]
138 | 
139 |     # Check if the file already exist and if it does remove it
140 |     try:
141 |         if os.path.isfile(filename):
142 |             os.remove(filename)
143 |     except PermissionError as perm_error:
144 |         logger.error(perm_error, exc_info=True)
145 |     try:
146 |         # Create a new file with the appropriately replaced chunk url
147 |         with open(filename, 'w') as f:
148 |             f.write(t)
149 | 
150 |         download_result = subprocess.run(command, capture_output=True, text=True)
151 |         logger.debug(download_result.stderr)
152 | 
153 |         if SEND_DOWNLOAD:
154 |             send_file(output, rest_id, space_creator, space_title, space_date)
155 |         if retry >= MAX_RETRY:
156 |             if m3u8_duration is not None:
157 |                 logger.warning(f"[{space_creator}] Download completed for {rest_id}, but may not be completely downloaded with a duration of {round(m3u8_duration/60, 2)} minutes")
158 |             else:
159 |                 logger.warning(
160 |                     f"[{space_creator}] Download completed for {rest_id}, but may not be completely downloaded")
161 |         elif "HTTP error 404 Not Found" in download_result.stderr:
162 |             logger.warning(f"[{space_creator}] AAC chunk(s) returning 404 Error Not Found, download incomplete...")
163 |         else:
164 |             logger.info(f"[{space_creator}] Download completed for {rest_id + ' ' * 10}")
165 |     except Exception:
166 |         logger.error(exc_info=True)
167 |     finally:
168 |         # Check if the file already exist and if it does remove it
169 |         try:
170 |             if os.path.isfile(filename):
171 |                 os.remove(filename)
172 |         except PermissionError as perm_error:
173 |             logger.error(perm_error, exc_info=True)
174 |     return True
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     import threading
179 | 
180 |     def loading_text():
181 |         loading_string = f"[INFO] Downloading twitter space {space_id} "
182 |         animation = ["     ", ".    ", "..   ", "...  ", ".... ", "....."]
183 |         idx = 0
184 |         while status:
185 |             print(loading_string + animation[idx % len(animation)], end="\r")
186 |             time.sleep(0.3)
187 |             idx += 1
188 |             if idx == 6:
189 |                 idx = 0
190 | 
191 |     def get_space_server(m3u8_url):
192 |         reg_result = re.search("(https:\/\/)((?:[^-]*-){2})(.*)(\.pscp.*)", m3u8_url)
193 |         # regex will return something like 'prod-fastly-' so remove the last dash
194 |         deployment_server = reg_result.group(2)[:-1]
195 |         periscope_server = reg_result.group(3)
196 |         server = (deployment_server, periscope_server)
197 |         return server
198 | 
199 |     try:
200 |         status = True
201 |         m3u8_url = input("m3u8 Url: ")
202 |         space_id = input("space id: ")
203 |         twitter_name = input("twitter name: ")
204 |         space_title = input("space title: ")
205 |         space_date = input("space date(YYYYMMDD): ")
206 |         # space_date = datetime.strptime(space_date, "%Y%m%d").timestamp() * 1000
207 |         m3u8_id = re.search("(.*\/Transcoding\/v1\/hls\/(.*)(\/non_transcode.*))", m3u8_url).group(2)
208 |         server = get_space_server(m3u8_url)
209 |         # m3u8_id, rest_id, space_creator_name, handle_name, space_title, space_server, space_duration, space_date, logger=None
210 |         t1 = threading.Thread(target=loading_text)
211 |         t1.start()
212 |         download(m3u8_id=m3u8_id, rest_id=space_id, space_creator=twitter_name, handle_name=twitter_name, space_title=space_title,
213 |                  space_server=server, space_duration=None, space_date=space_date)
214 |         status = False
215 |         input("Download complete, press any key to exit...")
216 |         exit()
217 |     except Exception as e:
218 |         print(f"\rError encountered...{' '*40}\n{e}")
219 |         while True:
220 |             input("Exit...")
221 |             exit()
222 | 


--------------------------------------------------------------------------------