├── requirements.txt
├── .gitignore
├── README.md
└── surveillor.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2021.10.8
2 | charset-normalizer==2.0.9
3 | ffmpy==0.3.0
4 | idna==3.3
5 | requests==2.26.0
6 | urllib3==1.26.7
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | models_followed.txt
 2 | vids_preprocessed/
 3 | testing.ipynb
 4 | chromedriver
 5 | .ipynb_checkpoints
 6 | tryout_ffmpy.ipynb
 7 | test.mkv
 8 | env
 9 | strategy.txt
10 | data_dump
11 | surveil.log
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This script records models from stripchat.com/xhamsterlive.com continuously by exploiting a publically accessible API of the site and agnostic hls-routing.
 2 | 
 3 | This script is tested in Ubuntu and with ffmpeg version 4.2.4-1ubuntu0.1. If it functions for other operating systems or ffmpeg binaries is not known.
 4 | 
 5 | The easiest way to run this script (in Ubuntu):
 6 | 
 7 | '''
 8 | python3 surveillor.py <model usernames in lower-case, separated by space>
 9 | '''
10 | 
11 | To ease repeated recording of a long list of followed models, create models_followed.txt and write each model username in lower-case separated by newline into this text file. Then execute script without arguments.
12 | 


--------------------------------------------------------------------------------
/surveillor.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | import concurrent.futures
  4 | import json
  5 | import os
  6 | import sys
  7 | import threading
  8 | from datetime import datetime
  9 | from time import sleep
 10 | import logging
 11 | import multiprocessing
 12 | 
 13 | import ffmpy
 14 | import requests
 15 | 
 16 | 
 17 | def logit(message: str):
 18 |     logger = logging.getLogger(__name__)
 19 |     logger.setLevel(logging.INFO)
 20 |     handler = logging.FileHandler("surveil.log")
 21 |     handler.setLevel(logging.INFO)
 22 |     logger.addHandler(handler)
 23 |     logger.info(f"{datetime_tag()}: {message}")
 24 | 
 25 | 
 26 | def datetime_tag():
 27 |     return datetime.now().strftime("%y%m%d_%H%M%S")
 28 | 
 29 | 
 30 | def m3u8_link_recorder(m3u8_link: str, model_username: str, sleep_time: int):
 31 |     """records video through m3u8 link. Is called by concurrent_stream_recording 
 32 |     method to be executed once per m3u8 link in parallel.
 33 |     """
 34 | 
 35 |     vids_preprocessed_dir = "vids_preprocessed"
 36 |     model_path = os.path.join(vids_preprocessed_dir, model_username)
 37 |     vid_name = f"{datetime_tag()}.mkv"
 38 |     vid_path = os.path.join(vids_preprocessed_dir, model_username, vid_name)
 39 |     ff = ffmpy.FFmpeg(
 40 |         inputs={m3u8_link: None},
 41 |         outputs={vid_path: "-c copy"}
 42 |     )
 43 | 
 44 |     if not os.path.isdir(vids_preprocessed_dir):
 45 |         os.mkdir(vids_preprocessed_dir)
 46 |     if not os.path.isdir(model_path):
 47 |         os.mkdir(model_path)
 48 | 
 49 |     logit(f"{model_username} is being recorded")
 50 | 
 51 |     thread_1 = threading.Thread(target=ff.run)
 52 |     thread_1.start()
 53 |     while not ff.process:
 54 |         sleep(sleep_time)
 55 |     ff.process.terminate()
 56 |     thread_1.join()
 57 | 
 58 |     logit(f"{model_username} recording stopped")
 59 | 
 60 | 
 61 | def model_list_grabber():
 62 |     """ask xhamsterlive.com which models are online (with all sorts of other data). 
 63 |     tuple index: id, uname, 480p option
 64 |     """
 65 | 
 66 |     url = "https://xhamsterlive.com/api/front/v2/models?topLimit=10000"
 67 |     r = requests.get(url, stream=True)
 68 |     req = json.loads(r.content)
 69 |     models = req.get("blocks")[5].get("models")
 70 |     model_list_saver(models)
 71 |     models_online_resolution_option_480p = []
 72 | 
 73 |     for model in models:
 74 |         id = str(model.get("id"))
 75 |         if model.get("broadcastSettings").get("presets").get("testing") == None:
 76 |             resolution_option_480p = False
 77 |         else:
 78 |             resolution_option_480p = True
 79 |         uname = str(model.get("username"))
 80 |         models_online_resolution_option_480p.append(
 81 |             tuple([id, uname, resolution_option_480p]))
 82 | 
 83 | 
 84 |     return models_online_resolution_option_480p, models
 85 | 
 86 | 
 87 | def model_list_saver(model_list):
 88 |     """called to save API-data to create a cool dataset.
 89 |     """
 90 | 
 91 |     data_dump_dir = "data_dump"
 92 |     json_file_name = f"{datetime_tag()}.json"
 93 |     json_file_path = os.path.join(data_dump_dir, json_file_name)
 94 | 
 95 |     if not os.path.isdir(data_dump_dir):
 96 |         os.mkdir(data_dump_dir)
 97 |     with open(json_file_path, "w") as fp:
 98 |         json.dump(model_list, fp)
 99 | 
100 | 
101 | def stream_download_decider(all_model_names_480_option: tuple):
102 |     """takes tuple of all models online with odel id, model uname and 480p option. Will 
103 |     decide according to models_followed.txt list rank which four models to record.
104 |     """
105 | 
106 |     models_followed_online = []
107 |     if len(sys.argv) == 1:
108 |         with open("models_followed.txt", "r") as f:
109 |             for line in f.readlines():
110 |                 model_followed = line.replace("\n", "")
111 |                 for id_online, uname_online, option_480p_online in all_model_names_480_option:
112 |                     if model_followed == uname_online.lower():
113 |                         models_followed_online.append(
114 |                             tuple([id_online, uname_online, option_480p_online]))
115 |     else:
116 |         models_followed = sys.argv[1:]
117 |         for model_followed in models_followed:
118 |             for id_online, uname_online, option_480p_online in all_model_names_480_option:
119 |                 if model_followed == uname_online.lower():
120 |                     models_followed_online.append(
121 |                         tuple([id_online, uname_online, option_480p_online]))
122 |     if len(models_followed_online) > 0:
123 |         print(models_followed_online)
124 |         
125 |     elif len(models_followed_online) == 0:
126 |         print("none of your models are online")
127 | 
128 |     return models_followed_online
129 | 
130 | 
131 | def concurrent_stream_recording(models_online_followed: tuple, sleep_time: int, models_to_record: int):
132 |     """Invokes concurrent library.
133 |     """
134 | 
135 |     m3u8_links = []
136 |     usernames = [x[1] for x in models_online_followed]
137 | 
138 |     for id, uname, option_480p in models_online_followed:
139 |         if option_480p:
140 |             m3u8_link = f"https://b-hls-01.strpst.com/hls/{id}/{id}_480p.m3u8"
141 |             m3u8_links.append(m3u8_link)
142 |         elif not option_480p:
143 |             m3u8_link = f"https://b-hls-01.strpst.com/hls/{id}/{id}.m3u8"
144 |             m3u8_links.append(m3u8_link)
145 |     with concurrent.futures.ProcessPoolExecutor() as executor:
146 |         executor.map(m3u8_link_recorder,
147 |                      m3u8_links[:models_to_record], usernames[:models_to_record], [sleep_time] * models_to_record)
148 | 
149 | 
150 | def video_stitcher():
151 |     """invoke method for stitching together videos in each subdirectory of "vids_preprocessed"
152 |     -directory which is instatiated by m3u8_link_recorder
153 |     """
154 | 
155 |     vids_preprocessed_dir = "vids_preprocessed"
156 |     subdirs = os.listdir(vids_preprocessed_dir)
157 | 
158 |     logit(f"video_stitcher started")
159 | 
160 |     for subdir in subdirs:
161 |         dir_and_subdir = os.path.join(vids_preprocessed_dir, subdir)
162 |         if len(os.listdir(dir_and_subdir)) > 1:
163 |             vids = os.listdir(dir_and_subdir)
164 |             list_txt_dir = os.path.join(dir_and_subdir, "my_list.txt")
165 |             output_dir = os.path.join(
166 |                 dir_and_subdir, f"concat_{datetime_tag()}.mkv")
167 | 
168 |             dict_ = dict(zip([int(''.join((x.split('_')[-2], x.split('_')[-1].replace('.mkv', '')))) for x in vids], [x for x in vids]))
169 |             vids_sorted = list(dict(sorted(dict_.items())).values())
170 |             with open(list_txt_dir, "w") as fp:
171 |                 for vid in vids_sorted:
172 |                     vid_str = f"file {vid}\n"
173 |                     fp.writelines(vid_str)
174 | 
175 |             ff = ffmpy.FFmpeg(
176 |                 global_options={"-f concat -safe 0"},
177 |                 inputs={list_txt_dir: None},
178 |                 outputs={output_dir: "-c copy"}
179 |             )
180 |             ff.run()
181 | 
182 |             for vid in vids:
183 |                 vid_dir = os.path.join(dir_and_subdir, vid)
184 |                 os.remove(vid_dir)
185 |             os.remove(list_txt_dir)
186 |             logit(f"video_stitcher concatenated {subdir}")
187 | 
188 | 
189 | def main():
190 |     while True:
191 |         for i in range(3):
192 |             models_online, models = model_list_grabber()
193 |             logit(f"{len(models_online)} followed models are online")
194 |             models_online_followed = stream_download_decider(models_online)
195 |             logit(f"{len(models_online_followed)} followed models are online")
196 |             concurrent_stream_recording(models_online_followed, 60, multiprocessing.cpu_count())
197 |         video_stitcher()
198 | 
199 | 
200 | if __name__ == "__main__":
201 |     main()
202 | 


--------------------------------------------------------------------------------