├── .gitignore ├── LICENSE ├── README.md ├── drivers └── .gitkeep ├── requirements.txt ├── setup.py └── volafile-downloader ├── config.py ├── downloader.py ├── utils.py └── volafile-downloader.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | eggs/ 15 | .eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *.cover 45 | .hypothesis/ 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # PyBuilder 52 | target/ 53 | 54 | # Jupyter Notebook 55 | .ipynb_checkpoints 56 | 57 | # pyenv 58 | .python-version 59 | 60 | # Environments 61 | .env 62 | .venv 63 | env/ 64 | venv/ 65 | ENV/ 66 | 67 | *.log 68 | downloads/ 69 | .vscode/ 70 | **/drivers/chromedriver.exe -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 the-okn3 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Volafile Downloader 2 | 3 | volafile.org files and chat downloader 4 | 5 | Tested with **Python 3.6.2** on **Windows 10** (It should work anywhere). 6 | 7 | ## Install 8 | 9 | Install all the required libraries: 10 | 11 | ``` 12 | pip install -r requirements.txt 13 | ``` 14 | 15 | Feel free to edit the config file with your own options, most of the stuff in there have comments with a little explanation. You don't have to edit it, it should work as is. 16 | 17 | ``` 18 | config.py 19 | ``` 20 | 21 | You need to have the chromium drivers, you can download the chromium drivers from their official website, go to http://chromedriver.chromium.org/downloads and download the drivers and place them in a directory, example: "drivers/**chromedriver.exe**". 22 | 23 | If you are using **Linux**, **MacOS**, **other OS** or you have the chrome driver installed in other path, just change the path of the chromedriver in the config file. You might need to give permissions to the file, example: `sudo chmod +x chromedriver` 24 | 25 | ## Usage & Examples 26 | 27 | Download all files from a room: 28 | 29 | ``` 30 | λ python volafile-downloader.py -r roomname 31 | ``` 32 | 33 | And with a password 34 | 35 | ``` 36 | λ python volafile-downloader.py -r roomname -p 123456 37 | ``` 38 | 39 | Download all files from a room and loops to check if new files were added (you can also change the time delay between the loop with the **-ld** option): 40 | 41 | ``` 42 | λ python volafile-downloader.py -r roomname -l 43 | ``` 44 | 45 | Download all files from a room and loops to check if new files were added and also downloads the chat logs (The download of chat logs only work with the download loop (-l argument), check the FAQ for more information) 46 | 47 | ``` 48 | λ python volafile-downloader.py -r roomname -l -cl 49 | ``` 50 | 51 | Download all files from a room with password and loops to check if new files were added and archives the files by creation date: 52 | 53 | ``` 54 | -r : Room name 55 | -p : Room password 56 | -l : Loops to check for new files 57 | -a : Archives 58 | -at : Archive type CREATION_DATE (Default) OR DOWNLOAD_DATE 59 | -cl : Downloads the chat logs 60 | ``` 61 | 62 | ``` 63 | λ python volafile-downloader.py -r roomname -p 123456 -l -a -at CREATION_DATE -cl 64 | ``` 65 | 66 | Show all the available options: 67 | Note: Some options/arguments will override some config variables. 68 | 69 | ``` 70 | λ python volafile-downloader.py -h 71 | usage: volafile-downloader.py [-h] [-o OUTPUT_DIR] [-l] [-p PASSWORD] [-a] 72 | [-at ARCHIVE_TYPE] [-cl] [-ld LOOP_DELAY] 73 | [-ms MAX_ALLOWED_SIZE] [-nl] -r ROOM 74 | 75 | optional arguments: 76 | -h, --help show this help message and exit 77 | -o OUTPUT_DIR, --output-dir OUTPUT_DIR 78 | Output directory 79 | -l, --loop Download all the files in the room and loops to check 80 | if new files were added 81 | -p PASSWORD, --password PASSWORD 82 | Room password 83 | -a, --archive Archive room 84 | -at ARCHIVE_TYPE, --archive-type ARCHIVE_TYPE 85 | Archive type CREATION_DATE or DOWNLOAD_DATE 86 | -cl, --chat-log Download chat log 87 | -ld LOOP_DELAY, --loop-delay LOOP_DELAY 88 | Time delay when downloading in loop 89 | -ms MAX_ALLOWED_SIZE, --max-allowed-size MAX_ALLOWED_SIZE 90 | Max allowed size to download a file (in bytes) 91 | -nl, --no-logs Disable the logging to text files when a file is 92 | downloaded, it's too big and when there was an error 93 | 94 | required arguments: 95 | -r ROOM, --room ROOM Room 96 | ``` 97 | 98 | ## FAQ (Frequently asked questions) 99 | 100 | **Where can I download the drivers?** 101 | 102 | You can download the drivers from the official chromium website http://chromedriver.chromium.org/downloads 103 | 104 | **Why I only can download the chat log with the loop argument (-l)?** 105 | 106 | Because when you enter a room you don't have access to the chat messages that were sent before you entered the room, unless you were in the room when the other people sent the message. I believe the messages are not being saved in the Volafile servers and only are being send and received by Web Sockets and then saved in your computer. 107 | 108 | ## Feel free to buy me a coffee 109 | 110 | Other Methods: Buy Me A Coffee 111 | 112 | Bitcoin (BTC): **12hPw1663w6Ne71g5zU1gFTjPh2syUTbeq** 113 | 114 | Ethereum (ETH): **0x25b9318c17ef4f27b960c89bb90b855f09aa299f** 115 | 116 | Litecoin (LTC): **LVwr7RhcdkTGTAgoy6iyxGrZKXvmE293HH** 117 | 118 | Ripple (XRP): **12hPw1663w6Ne71g5zU1gFTjPh2syUTbeq** 119 | 120 | ## License 121 | 122 | Copyright 2018 the-okn3 123 | 124 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 125 | 126 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 127 | 128 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 129 | -------------------------------------------------------------------------------- /drivers/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/the-okn3/volafile-downloader/91bb98a2e3384cc522c549aac2734d1f21fecaea/drivers/.gitkeep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm==4.15.0 2 | humanfriendly==4.4.1 3 | requests==2.18.4 4 | selenium==3.5.0 5 | colorlog==3.1.4 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="volafile-downloader", 5 | version="2.2.0", 6 | scripts=["volafile-downloader"], 7 | description="Volafile.org files downloader", 8 | author="Okn3", 9 | author_email="okn3@protonmail.com", 10 | url="https://github.com/the-okn3/volafile-downloader", 11 | keywords=["volafile", "downloader", "download", "files", "chat"] 12 | ) 13 | -------------------------------------------------------------------------------- /volafile-downloader/config.py: -------------------------------------------------------------------------------- 1 | # NOTE: Some config parameters can be overwrite by using command line arguments 2 | 3 | # Download 4 | # The size is in bytes, 314572800 = 300MB 5 | max_allowed_size = 800572800 6 | 7 | download_output_dir = "../downloads" 8 | download_users_to_ignore = [ 9 | "ExampleNameHere2233" 10 | ] 11 | 12 | chat_log = True 13 | chat_messages_to_ignore = [ 14 | """Volafile can also be found here: https://twitter.com/volafile https://facebook.com/volafile""" 15 | ] 16 | chat_nicks_to_ignore = [ 17 | "News" 18 | ] 19 | 20 | # Archive will create and put the files in separated folders by date 21 | # note: the date that is created is the date the file was added, so if the 22 | # same file was added some other day and it was added again it will download 23 | # again the file 24 | # ex: /2018-10-13 25 | archive = False 26 | # Archive types: 27 | # CREATION_DATE = The date the file was created 28 | # DOWNLOAD_DATE = The date the file was downloaded 29 | archive_type = "CREATION_DATE" 30 | # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior 31 | archive_date_format = "%Y-%m-%d" 32 | 33 | # Time to wait when downloading in loop, ie: (Download everything in the room, 34 | # Wait 60 seconds, Download everything in the room, and so on), note: each file 35 | # it's only downloaded if the file wasn't downloaded before 36 | download_loop_delay = 60 37 | 38 | # To start with the oldest files (that will expire first) first 39 | download_oldest_first = True 40 | 41 | # Extensions to not download 42 | extensions_blacklist = [".mp3", ".wav"] 43 | 44 | filenames_blacklist = [ 45 | "donotdownloadme_lalalalalalalalalal.jpg" 46 | ] 47 | 48 | # This can be edited or removed, the only thing needed is the user-agent 49 | headers = { 50 | 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) " 51 | "Gecko/20100101 Firefox/55.0", 52 | "DNT": "1", 53 | "Upgrade-Insecure-Requests": "1" 54 | } 55 | 56 | # This shouldn't be edited or removed, it's required to download the files 57 | cookies = { 58 | "allow-download": "1" 59 | } 60 | 61 | base_url = "https://volafile.org/r/" 62 | 63 | # Driver 64 | # For development it's better to set the "driver_headless" to False, to show 65 | # the window with the website. 66 | driver_path = "../drivers/chromedriver.exe" 67 | driver_headless = True 68 | driver_log_level = 3 69 | 70 | # Logs 71 | log_download_archive = True 72 | log_download_error = True 73 | log_download_too_big = True 74 | 75 | logger_stream_format = "[%(asctime)s] %(log_color)s[%(levelname)s] - " \ 76 | "%(message)s%(reset)s" 77 | logger_stream_level = "DEBUG" 78 | logger_stream_date_format = "%m-%d-%y %H:%M:%S" 79 | 80 | logger_file_active = True 81 | logger_file_format = "[%(asctime)s] [%(levelname)s] [%(module)s] - %(message)s" 82 | logger_file_level = "INFO" 83 | logger_file_date_format = "%m-%d-%y %H:%M:%S" 84 | logger_file_path = "../downloads/volafile-downloader.log" 85 | -------------------------------------------------------------------------------- /volafile-downloader/downloader.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import re 4 | import sys 5 | import time 6 | import config 7 | import logging 8 | import humanfriendly 9 | 10 | from enum import Enum 11 | from utils import download_file, log, sanitize_file_name, prepare_url, \ 12 | get_file_id_and_name, get_file_extension, expiration_to_date, \ 13 | log_file, get_logged_files 14 | from selenium import webdriver 15 | from selenium.webdriver.support import ui 16 | from selenium.common.exceptions import TimeoutException 17 | from datetime import datetime 18 | 19 | 20 | class Result(Enum): 21 | SUCCESS = 1 22 | ERROR = 2 23 | 24 | 25 | class Downloader(): 26 | 27 | PASSWORD_INPUT_CSS_SELECTOR = """#room_content_fixed """ \ 28 | """div.ui_frame_container div.ui_frame_body.ui_frame_body_bar """ \ 29 | """input[type="password"]""" 30 | 31 | PASSWORD_BUTTON_XPATH = """//*[@id="room_content_fixed"]/div[1]/div/""" \ 32 | """div[3]/span[2]""" 33 | 34 | MODAL_18_WARNING_XPATH = """//*[@id="room_content_fixed"]/div""" \ 35 | """[1]/div/div[3]/span[2]""" 36 | 37 | def __init__(self, 38 | room, 39 | password, 40 | output_dir, 41 | max_allowed_size=config.max_allowed_size, 42 | do_log=True, 43 | archive=config.archive, 44 | archive_type=config.archive_type, 45 | chat_log=config.chat_log): 46 | 47 | self.logger = logging.getLogger("root") 48 | self.driver = None 49 | self.looping = False 50 | self.room = room 51 | self.password = password 52 | self.output_dir = output_dir 53 | self.max_allowed_size = max_allowed_size 54 | self.do_log = do_log 55 | self.archive = archive 56 | self.archive_type = archive_type 57 | self.chat_log = chat_log 58 | self.old_chat_messages = [] 59 | 60 | self.logger.info('Initializing...') 61 | self.logger.info("Room: %s" % (self.room)) 62 | self.logger.info("Password: %s" % (self.password)) 63 | self.logger.info("Archive: %s" % (self.archive)) 64 | 65 | # Create necessary directories 66 | self.download_directory = os.path.join(self.output_dir, self.room) 67 | if not os.path.exists(self.download_directory): 68 | os.makedirs(self.download_directory) 69 | 70 | self.downloaded_files = get_logged_files(self.download_directory) 71 | 72 | def downloadLoop(self, loop_delay=60): 73 | self.looping = True 74 | 75 | self.initDriver() 76 | 77 | try: 78 | while self.looping: 79 | self.logger.info("Downloading room '%s'" % (self.room)) 80 | 81 | downloaded = self.downloadFiles(False) 82 | 83 | if self.chat_log: 84 | self.downloadChatLog() 85 | 86 | if not downloaded: 87 | self.logger.info("There is no files to download") 88 | 89 | self.logger.info("[Sleeping for %s seconds]" % (loop_delay)) 90 | time.sleep(int(loop_delay)) 91 | except Exception: 92 | self.closeDriver() 93 | self.logger.warning("Something went wrong, restarting...") 94 | self.logger.info("[Sleeping for %s seconds]" % (loop_delay)) 95 | time.sleep(int(loop_delay)) 96 | return self.downloadLoop(loop_delay) 97 | 98 | self.closeDriver() 99 | 100 | def download(self): 101 | """ Download all the files from an entire room """ 102 | 103 | self.initDriver() 104 | self.downloadFiles(True) 105 | 106 | def downloadFiles(self, close_driver=True): 107 | try: 108 | # List of files 109 | self.logger.info("Downloading the list of files...") 110 | 111 | result, files = self.getFilesList() 112 | if result == Result.ERROR: 113 | self.logger.error("Error while trying to fetch the list " 114 | "of files, maybe there is no files " 115 | "to download") 116 | return False 117 | 118 | self.logger.info("List of files downloaded") 119 | 120 | if close_driver: 121 | self.closeDriver() 122 | 123 | except Exception as ex: 124 | self.logger.warning( 125 | "The Website might be offline or another error " 126 | "occurred: " + str(ex)) 127 | 128 | if close_driver: 129 | self.closeDriver() 130 | 131 | return False 132 | 133 | file_index = 1 134 | info = dict( 135 | total=len(files), 136 | downloaded=0, 137 | already_exist=0, 138 | too_big=0, 139 | failed=0, 140 | forbidden_extension=0, 141 | user_ignored=0 142 | ) 143 | 144 | for f in files: 145 | 146 | self.logger.info(u"[%s of %s] [%s] [%s] [%s] [%s] [by %s]" % ( 147 | file_index, 148 | info["total"], 149 | f["name"], 150 | f["extension"], 151 | humanfriendly.format_size(f["size"]), 152 | f["expiration"], 153 | f["tag"] 154 | )) 155 | 156 | download_directory_path = self.download_directory 157 | 158 | # Change directory if it's to archive 159 | if self.archive: 160 | archive_dir_name = datetime.now().strftime(config.archive_date_format) 161 | 162 | if self.archive_type == "CREATION_DATE": 163 | archive_dir_name = expiration_to_date(f["expiration"])\ 164 | .strftime(config.archive_date_format) 165 | 166 | download_directory_path = os.path.join( 167 | self.download_directory, archive_dir_name) 168 | 169 | if not os.path.exists(download_directory_path): 170 | os.makedirs(download_directory_path) 171 | 172 | file_id_name = f["name"] + " - " + str(f["id"]) + f["extension"] 173 | 174 | file_path = os.path.join(download_directory_path, file_id_name) 175 | 176 | file_index += 1 177 | 178 | # Check if the file already exists 179 | if os.path.exists(file_path) or \ 180 | file_id_name in self.downloaded_files: 181 | self.logger.info("File already exists") 182 | info["already_exist"] += 1 183 | continue 184 | 185 | # Check if we can download a file from this user 186 | if f["tag"].strip() in config.download_users_to_ignore: 187 | self.logger.info("User ignored (from the list in the config)") 188 | info["user_ignored"] += 1 189 | continue 190 | 191 | # Check if the file extension is blacklisted 192 | if f["extension"] in config.extensions_blacklist: 193 | self.logger.warning( 194 | "File Extension not allowed to download") 195 | info["forbidden_extension"] += 1 196 | continue 197 | 198 | # Check if the file name is blacklisted 199 | if f["name"] + f["extension"] in config.filenames_blacklist: 200 | self.logger.warning( 201 | "File Name not allowed to download") 202 | info["forbidden_extension"] += 1 203 | continue 204 | 205 | # Check if the file size is greater then allowed 206 | if f["size"] > self.max_allowed_size: 207 | self.logger.warning( 208 | "File size not allowed to download") 209 | if self.do_log: 210 | log("TOOBIG", self.download_directory, f) 211 | info["too_big"] += 1 212 | continue 213 | 214 | try: 215 | self.logger.info("Downloading...") 216 | download_file(f["url"], file_path) 217 | self.logger.info("Downloaded") 218 | 219 | self.downloaded_files.append(file_id_name) 220 | log_file(file_id_name, self.download_directory) 221 | 222 | if self.do_log: 223 | log("ARCHIVE", self.download_directory, f) 224 | 225 | info["downloaded"] += 1 226 | except Exception as ex: 227 | self.logger.error( 228 | "Error downloading file:" + str(ex)) 229 | if self.do_log: 230 | log("ERROR", self.download_directory, f) 231 | info["failed"] += 1 232 | 233 | self.logger.info("DONE") 234 | self.logger.info("%s of %s Files downloaded" % 235 | (info["downloaded"], info["total"])) 236 | self.logger.info("%s of %s Files already existed" % 237 | (info["already_exist"], info["total"])) 238 | self.logger.info("%s of %s Files were too big to download" % 239 | (info["too_big"], info["total"])) 240 | self.logger.info("%s of %s Files have extensions or name not allowed to " 241 | "download" % 242 | (info["forbidden_extension"], info["total"])) 243 | self.logger.info("%s of %s Files couldn't be downloaded (error " 244 | "downloading)" % 245 | (info["failed"], info["total"])) 246 | self.logger.info("%s of %s Files couldn't be downloaded (user " 247 | "ignored)" % 248 | (info["user_ignored"], info["total"])) 249 | return True 250 | 251 | def initDriver(self): 252 | if config.driver_path and not os.path.exists(config.driver_path): 253 | self.logger.error("The driver path in the config doesn't exist") 254 | print( 255 | "You can download the chromium drivers from their official" 256 | "website:" 257 | "\n\t- Access http://chromedriver.chromium.org/downloads and " 258 | "download the drivers." 259 | "\n\t- Place the drivers in the drivers folder or in other place" 260 | "\n\t- If you are on linux or macOS you might need to give " 261 | "\n\t permission to that file, ex: sudo chmod +x chromedriver" 262 | "\n\t- Edit the 'driver_path' in the config.py file with the path" 263 | "of the drivers you downloaded" 264 | ) 265 | sys.exit(1) 266 | 267 | # Create driver with all the arguments 268 | options = webdriver.ChromeOptions() 269 | options.add_argument("--log-level=%d" % int(config.driver_log_level)) 270 | options.add_argument("--disable-logging") 271 | options.add_argument("--disable-extensions") 272 | if config.driver_headless: 273 | options.add_argument("headless") 274 | 275 | self.driver = webdriver.Chrome(config.driver_path, 276 | service_log_path="NUL", 277 | chrome_options=options) 278 | 279 | wait = ui.WebDriverWait(self.driver, 3) 280 | 281 | # Go to the url 282 | self.driver.get(prepare_url(config.base_url, self.room)) 283 | 284 | # See if is asking for a password, if yes then type one 285 | if not self.typePasswordIfNeeded(self.password): 286 | return (Result.ERROR, None) 287 | 288 | # Try to wait for the +18 warning modal and click OK 289 | try: 290 | wait.until(lambda driver: driver.find_element_by_xpath( 291 | self.MODAL_18_WARNING_XPATH) 292 | ).click() 293 | except TimeoutException: 294 | self.logger.info( 295 | "Couldn't find the +18 warning modal, " 296 | "assuming there isn't one...") 297 | except Exception: 298 | self.logger.info("Nothing to download") 299 | 300 | def downloadChatLog(self): 301 | self.logger.info("Downloading chat log...") 302 | 303 | messages = self.driver.execute_async_script(""" 304 | var done = arguments[0]; 305 | window.indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB; 306 | var db; 307 | var request = window.indexedDB.open("localforage", 2); 308 | request.onsuccess = function(event) { 309 | console.log(event); 310 | db = event.target.result; 311 | var transaction = db.transaction("keyvaluepairs", "readwrite"); 312 | var objectStore = transaction.objectStore("keyvaluepairs"); 313 | 314 | var test = objectStore.get("room:""" + self.room + """:messages"); 315 | test.onsuccess = function(event) { 316 | done(event.target.result); 317 | } 318 | }; 319 | """) 320 | 321 | if not messages: 322 | self.logger.info("No chat log to download") 323 | return 324 | 325 | # Create necessary directories 326 | path = os.path.join(self.output_dir, self.room) 327 | path = os.path.join(path, 328 | datetime.now().strftime(config.archive_date_format)) 329 | if not os.path.exists(path): 330 | os.makedirs(path) 331 | path = os.path.join(path, "chat.log") 332 | 333 | # Get only the new messages 334 | new_messages = [x for x in messages if x not in self.old_chat_messages] 335 | self.old_chat_messages = messages 336 | 337 | for message in new_messages: 338 | owner = "♕" if "owner" in message["options"] else "" 339 | 340 | texts = [] 341 | stop = False 342 | for m in message["message"]: 343 | text = str(m) 344 | 345 | if m["type"] == "text": 346 | text = m["value"] 347 | elif m["type"] == "file": 348 | text = "%s - %s (%s)" % (m["id"], 349 | m["name"], 350 | m["filetype"]) 351 | elif m["type"] == "url": 352 | text = "%s (%s)" % (m["text"], m["href"]) 353 | 354 | if text in config.chat_messages_to_ignore: 355 | stop = True 356 | continue 357 | 358 | texts.append(text) 359 | 360 | if (message["nick"] in config.chat_nicks_to_ignore) or stop: 361 | continue 362 | 363 | with open(path, "a+", encoding="utf-8") as f: 364 | f.write("%s%s: %s\n" % ( 365 | owner, 366 | message["nick"], 367 | "\n".join(texts)) 368 | ) 369 | 370 | self.logger.info( 371 | "Downloaded chat log with %d new messages" % len(new_messages)) 372 | 373 | def getFilesList(self): 374 | """Get the list of files from a room and prepare the information 375 | of each file 376 | """ 377 | 378 | wait = ui.WebDriverWait(self.driver, 3) 379 | 380 | # Wait for the list of files and get them 381 | try: 382 | files = wait.until(lambda driver: 383 | driver.find_elements_by_css_selector( 384 | "#file_list .filelist_file")) 385 | except TimeoutException: 386 | self.logger.error( 387 | "Couldn't find the list of files, aborting...") 388 | return (Result.ERROR, None) 389 | 390 | # Get all files information 391 | files_list_output = [] 392 | for file_elem in files: 393 | 394 | file_left_part = file_elem.find_element_by_class_name( 395 | "file_left_part") 396 | 397 | file_right_part = file_elem.find_element_by_class_name( 398 | "file_right_part") 399 | 400 | url = file_left_part.get_attribute("href") 401 | 402 | file_tag = file_left_part.find_element_by_class_name( 403 | "file_tag").get_attribute("innerHTML") 404 | 405 | file_size_expiration = file_right_part.get_attribute("innerHTML") 406 | size_expiration_pattern = re.compile(r"^(.*?)<.*>(.*)<\/span>") 407 | size_expiration_info = size_expiration_pattern.findall( 408 | file_size_expiration) 409 | 410 | file_size = size_expiration_info[0][0] 411 | file_expiration = size_expiration_info[0][1] 412 | 413 | file_id, real_file_name = get_file_id_and_name(url) 414 | 415 | file_name_without_extension, extension = get_file_extension( 416 | real_file_name) 417 | 418 | files_list_output.append({ 419 | "id": file_id, 420 | "url": url, 421 | "name": sanitize_file_name(file_name_without_extension), 422 | "extension": extension, 423 | "tag": file_tag, 424 | "size": humanfriendly.parse_size(file_size), 425 | "expiration": file_expiration 426 | }) 427 | 428 | if config.download_oldest_first: 429 | files_list_output = files_list_output[::-1] 430 | 431 | return (Result.SUCCESS, files_list_output) 432 | 433 | def isPasswordNeeded(self): 434 | wait = ui.WebDriverWait(self.driver, 2) 435 | try: 436 | wait.until( 437 | lambda driver: driver.find_element_by_css_selector( 438 | self.PASSWORD_INPUT_CSS_SELECTOR)) 439 | return True 440 | except TimeoutException: 441 | return False 442 | 443 | def typePasswordIfNeeded(self, password): 444 | wait = ui.WebDriverWait(self.driver, 5) 445 | try: 446 | password_input = wait.until( 447 | lambda driver: driver.find_element_by_css_selector( 448 | self.PASSWORD_INPUT_CSS_SELECTOR)) 449 | 450 | if not password: 451 | self.logger.error("This room requires a password and you " 452 | "didn't type one") 453 | return 454 | 455 | password_input.send_keys(password) 456 | 457 | wait.until( 458 | lambda driver: driver.find_element_by_xpath( 459 | self.PASSWORD_BUTTON_XPATH) 460 | ).click() 461 | except TimeoutException: 462 | self.logger.info("This room doesn't require a password") 463 | if password: 464 | self.logger.info("You typed a password for a room that " 465 | "doesn't require one") 466 | return True 467 | 468 | # Verify if is asking again for the password, if yes then the 469 | # password that we typed is wrong 470 | time.sleep(1) 471 | if self.isPasswordNeeded(): 472 | self.logger.error("This room required a password but you " 473 | "typed the wrong one") 474 | return False 475 | 476 | return True 477 | 478 | def closeDriver(self): 479 | """Close driver""" 480 | 481 | if self.driver: 482 | try: 483 | # self.driver.close() 484 | self.driver.quit() 485 | except Exception: 486 | self.logger.error("Something happened while trying to close " 487 | "the driver") 488 | 489 | def stop(self): 490 | self.looping = False 491 | self.closeDriver() 492 | -------------------------------------------------------------------------------- /volafile-downloader/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import config 4 | import requests 5 | 6 | from tqdm import tqdm 7 | from urllib.parse import unquote 8 | from os.path import splitext 9 | from datetime import datetime, timedelta 10 | 11 | 12 | def sanitize_file_name(file_name): 13 | """ 14 | Sanitize a file name by removing extra spaces, replaces spaces with 15 | underscores and escapes special characters 16 | """ 17 | 18 | file_name = str(file_name).strip().replace(' ', '_') 19 | return re.sub(r'(?u)[^-\w.]', '', file_name) 20 | 21 | 22 | def log(log_type, path, file_info): 23 | """ Log information to a file """ 24 | 25 | output_path = None 26 | 27 | if log_type.upper() == "ERROR" and config.log_download_error: 28 | output_path = os.path.join(path, "error.txt") 29 | 30 | elif log_type.upper() == "ARCHIVE" and config.log_download_archive: 31 | output_path = os.path.join(path, "archive.txt") 32 | 33 | elif log_type.upper() == "TOOBIG" and config.log_download_too_big: 34 | output_path = os.path.join(path, "toobig.txt") 35 | 36 | else: 37 | print("[-] Error: Fix the god damn code, there is a log " 38 | "type that doesn't exist: " + log_type.upper()) 39 | return 40 | 41 | if output_path: 42 | message = "%s - %s - %s - %s - %s\n" % (file_info["url"], 43 | file_info["name"], 44 | file_info["tag"], 45 | file_info["size"], 46 | file_info["expiration"]) 47 | 48 | with open(output_path, "a+", encoding="utf-8") as f: 49 | f.write(str(message)) 50 | 51 | 52 | def log_file(file_path_as_id, path): 53 | output_path = os.path.join(path, "files.txt") 54 | with open(output_path, "a+", encoding="utf-8") as f: 55 | f.write(str(file_path_as_id) + "\n") 56 | 57 | 58 | def get_logged_files(path): 59 | output_path = os.path.join(path, "files.txt") 60 | if not os.path.exists(output_path): 61 | return [] 62 | 63 | with open(output_path, "r", encoding="utf-8") as f: 64 | # splitlines will remove the '\n' in the end and return a list of line. 65 | return list(set(f.read().splitlines())) 66 | return [] 67 | 68 | 69 | def prepare_url(base_url, room): 70 | """ Prepare a URL by adding the room to the base URL """ 71 | 72 | if not base_url.endswith("/") and not room.startswith("/"): 73 | base_url += "/" 74 | elif base_url.endswith("/") and room.startswith("/"): 75 | base_url = base_url[:-1] 76 | 77 | return base_url + room 78 | 79 | 80 | def get_file_id_and_name(url): 81 | """ Get the file id and name from a URL """ 82 | 83 | pattern = re.compile(r"\/get\/([a-zA-Z0-9-_]+)\/(.*)") 84 | info = pattern.findall(url) 85 | file_id = info[0][0] 86 | file_name = unquote(info[0][1]) 87 | return file_id, file_name 88 | 89 | 90 | def get_file_extension(file_name): 91 | """ Get the file extension from a file name """ 92 | 93 | for ext in ['.tar.gz', '.tar.bz2']: 94 | if file_name.endswith(ext): 95 | return file_name[:-len(ext)], file_name[-len(ext):] 96 | return splitext(file_name) or "" 97 | 98 | 99 | def download_file(url, file_name=None): 100 | """ Downloads a file from Volafile and shows a progress bar """ 101 | 102 | chunk_size = 1024 103 | 104 | r = requests.get(url, stream=True, headers=config.headers, 105 | cookies=config.cookies) 106 | r.raise_for_status() 107 | 108 | if not r: 109 | return False 110 | 111 | total_size = int(r.headers.get("content-length", 0)) 112 | 113 | with open(file_name + ".part", "wb") as f: 114 | for data in tqdm(iterable=r.iter_content(chunk_size=chunk_size), 115 | total=total_size / chunk_size, unit="KB", 116 | unit_scale=True): 117 | f.write(data) 118 | 119 | # Remove the ".part" from the file name 120 | os.rename(file_name + ".part", file_name) 121 | 122 | 123 | def expiration_to_date(expiration): 124 | expiration = expiration.lower().strip() 125 | number, method = expiration.split(" ") 126 | max_expiration_days = 2 127 | date = datetime.now() + timedelta(days=-max_expiration_days) 128 | number = int(number) 129 | 130 | if method == "day" or method == "days": 131 | return date + timedelta(days=+number) 132 | elif method == "hour" or method == "hours": 133 | return date + timedelta(hours=+number) 134 | elif method == "min" or method == "mins": 135 | return date + timedelta(minutes=+number) 136 | elif method == "sec" or method == "secs": 137 | return date + timedelta(seconds=+number) 138 | 139 | return datetime.now() 140 | -------------------------------------------------------------------------------- /volafile-downloader/volafile-downloader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | __author__ = "Okn3" 5 | __email__ = "okn3@protonmail.com" 6 | __license__ = "MIT" 7 | __version__ = "2.2.0" 8 | 9 | import sys 10 | import config 11 | import argparse 12 | import logging 13 | import colorlog 14 | 15 | from downloader import Downloader 16 | 17 | 18 | def get_args(): 19 | """ Get and prepare all the arguments """ 20 | 21 | parser = argparse.ArgumentParser() 22 | 23 | # Optional 24 | parser.add_argument("-o", "--output-dir", help="Output directory", 25 | required=False, default=config.download_output_dir) 26 | 27 | parser.add_argument("-l", "--loop", action="store_true", 28 | help="Download all the files in the room and " 29 | "loops to check if new files were added") 30 | parser.add_argument("-p", "--password", default=None, 31 | help="Room password") 32 | parser.add_argument("-a", "--archive", action="store_true", 33 | default=config.archive, 34 | help="Archive room") 35 | parser.add_argument("-at", "--archive-type", default=config.archive_type, 36 | help="Archive type CREATION_DATE (Default) or " 37 | "DOWNLOAD_DATE") 38 | parser.add_argument("-cl", "--chat-log", action="store_true", 39 | default=config.chat_log, 40 | help="Download chat log") 41 | parser.add_argument("-ld", "--loop-delay", 42 | default=config.download_loop_delay, 43 | help="Time delay when downloading in loop") 44 | parser.add_argument("-ms", "--max-allowed-size", 45 | default=config.max_allowed_size, 46 | help="Max allowed size to download a file (in bytes)") 47 | parser.add_argument("-nl", "--no-logs", 48 | default=True, action="store_false", 49 | help="Disable the logging to text files when a file " 50 | "is downloaded, it's too big and when there was " 51 | "an error") 52 | 53 | # Required 54 | required = parser.add_argument_group('required arguments') 55 | required.add_argument("-r", "--room", help="Room", required=True) 56 | 57 | return parser.parse_args() 58 | 59 | 60 | def init_logger(): 61 | """ Initialize the logger """ 62 | global logger 63 | 64 | logger = logging.getLogger("root") 65 | logger.setLevel(config.logger_stream_level) 66 | 67 | # Stream Handler 68 | logger_stream_handler = colorlog.StreamHandler() 69 | logger_stream_handler.setLevel(config.logger_stream_level) 70 | logger_stream_formatter = colorlog.ColoredFormatter( 71 | config.logger_stream_format, datefmt=config.logger_stream_date_format) 72 | logger_stream_handler.setFormatter(logger_stream_formatter) 73 | 74 | logger.addHandler(logger_stream_handler) 75 | 76 | # File Handler 77 | if config.logger_file_active: 78 | logger_file_handler = logging.FileHandler( 79 | config.logger_file_path, encoding='utf8') 80 | logger_file_handler.setLevel(config.logger_file_level) 81 | logger_file_formatter = logging.Formatter( 82 | config.logger_file_format, datefmt=config.logger_file_date_format) 83 | logger_file_handler.setFormatter(logger_file_formatter) 84 | 85 | logger.addHandler(logger_file_handler) 86 | 87 | 88 | def main(): 89 | """ Main function that is executed when running the program """ 90 | 91 | args = get_args() 92 | 93 | init_logger() 94 | 95 | global downloader 96 | downloader = Downloader(room=args.room, 97 | password=args.password, 98 | output_dir=args.output_dir, 99 | max_allowed_size=args.max_allowed_size, 100 | do_log=args.no_logs, 101 | archive=args.archive, 102 | archive_type=args.archive_type, 103 | chat_log=args.chat_log) 104 | 105 | if args.loop: 106 | downloader.downloadLoop(args.loop_delay) 107 | else: 108 | downloader.download() 109 | 110 | 111 | if __name__ == "__main__": 112 | try: 113 | main() 114 | except KeyboardInterrupt: 115 | logger.info("Interrupted by the user.") 116 | if downloader: 117 | downloader.stop() 118 | sys.exit() 119 | --------------------------------------------------------------------------------