├── .gitignore
├── LICENSE
├── README.md
├── drivers
└── .gitkeep
├── requirements.txt
├── setup.py
└── volafile-downloader
├── config.py
├── downloader.py
├── utils.py
└── volafile-downloader.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | eggs/
15 | .eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | wheels/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 |
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *.cover
45 | .hypothesis/
46 |
47 | # Translations
48 | *.mo
49 | *.pot
50 |
51 | # PyBuilder
52 | target/
53 |
54 | # Jupyter Notebook
55 | .ipynb_checkpoints
56 |
57 | # pyenv
58 | .python-version
59 |
60 | # Environments
61 | .env
62 | .venv
63 | env/
64 | venv/
65 | ENV/
66 |
67 | *.log
68 | downloads/
69 | .vscode/
70 | **/drivers/chromedriver.exe
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 the-okn3
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Volafile Downloader
2 |
3 | volafile.org files and chat downloader
4 |
5 | Tested with **Python 3.6.2** on **Windows 10** (It should work anywhere).
6 |
7 | ## Install
8 |
9 | Install all the required libraries:
10 |
11 | ```
12 | pip install -r requirements.txt
13 | ```
14 |
15 | Feel free to edit the config file with your own options, most of the stuff in there have comments with a little explanation. You don't have to edit it, it should work as is.
16 |
17 | ```
18 | config.py
19 | ```
20 |
21 | You need to have the chromium drivers, you can download the chromium drivers from their official website, go to http://chromedriver.chromium.org/downloads and download the drivers and place them in a directory, example: "drivers/**chromedriver.exe**".
22 |
23 | If you are using **Linux**, **MacOS**, **other OS** or you have the chrome driver installed in other path, just change the path of the chromedriver in the config file. You might need to give permissions to the file, example: `sudo chmod +x chromedriver`
24 |
25 | ## Usage & Examples
26 |
27 | Download all files from a room:
28 |
29 | ```
30 | λ python volafile-downloader.py -r roomname
31 | ```
32 |
33 | And with a password
34 |
35 | ```
36 | λ python volafile-downloader.py -r roomname -p 123456
37 | ```
38 |
39 | Download all files from a room and loops to check if new files were added (you can also change the time delay between the loop with the **-ld** option):
40 |
41 | ```
42 | λ python volafile-downloader.py -r roomname -l
43 | ```
44 |
45 | Download all files from a room and loops to check if new files were added and also downloads the chat logs (The download of chat logs only work with the download loop (-l argument), check the FAQ for more information)
46 |
47 | ```
48 | λ python volafile-downloader.py -r roomname -l -cl
49 | ```
50 |
51 | Download all files from a room with password and loops to check if new files were added and archives the files by creation date:
52 |
53 | ```
54 | -r : Room name
55 | -p : Room password
56 | -l : Loops to check for new files
57 | -a : Archives
58 | -at : Archive type CREATION_DATE (Default) OR DOWNLOAD_DATE
59 | -cl : Downloads the chat logs
60 | ```
61 |
62 | ```
63 | λ python volafile-downloader.py -r roomname -p 123456 -l -a -at CREATION_DATE -cl
64 | ```
65 |
66 | Show all the available options:
67 | Note: Some options/arguments will override some config variables.
68 |
69 | ```
70 | λ python volafile-downloader.py -h
71 | usage: volafile-downloader.py [-h] [-o OUTPUT_DIR] [-l] [-p PASSWORD] [-a]
72 | [-at ARCHIVE_TYPE] [-cl] [-ld LOOP_DELAY]
73 | [-ms MAX_ALLOWED_SIZE] [-nl] -r ROOM
74 |
75 | optional arguments:
76 | -h, --help show this help message and exit
77 | -o OUTPUT_DIR, --output-dir OUTPUT_DIR
78 | Output directory
79 | -l, --loop Download all the files in the room and loops to check
80 | if new files were added
81 | -p PASSWORD, --password PASSWORD
82 | Room password
83 | -a, --archive Archive room
84 | -at ARCHIVE_TYPE, --archive-type ARCHIVE_TYPE
85 | Archive type CREATION_DATE or DOWNLOAD_DATE
86 | -cl, --chat-log Download chat log
87 | -ld LOOP_DELAY, --loop-delay LOOP_DELAY
88 | Time delay when downloading in loop
89 | -ms MAX_ALLOWED_SIZE, --max-allowed-size MAX_ALLOWED_SIZE
90 | Max allowed size to download a file (in bytes)
91 | -nl, --no-logs Disable the logging to text files when a file is
92 | downloaded, it's too big and when there was an error
93 |
94 | required arguments:
95 | -r ROOM, --room ROOM Room
96 | ```
97 |
98 | ## FAQ (Frequently asked questions)
99 |
100 | **Where can I download the drivers?**
101 |
102 | You can download the drivers from the official chromium website http://chromedriver.chromium.org/downloads
103 |
104 | **Why I only can download the chat log with the loop argument (-l)?**
105 |
106 | Because when you enter a room you don't have access to the chat messages that were sent before you entered the room, unless you were in the room when the other people sent the message. I believe the messages are not being saved in the Volafile servers and only are being send and received by Web Sockets and then saved in your computer.
107 |
108 | ## Feel free to buy me a coffee
109 |
110 | Other Methods:
111 |
112 | Bitcoin (BTC): **12hPw1663w6Ne71g5zU1gFTjPh2syUTbeq**
113 |
114 | Ethereum (ETH): **0x25b9318c17ef4f27b960c89bb90b855f09aa299f**
115 |
116 | Litecoin (LTC): **LVwr7RhcdkTGTAgoy6iyxGrZKXvmE293HH**
117 |
118 | Ripple (XRP): **12hPw1663w6Ne71g5zU1gFTjPh2syUTbeq**
119 |
120 | ## License
121 |
122 | Copyright 2018 the-okn3
123 |
124 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
125 |
126 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
127 |
128 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
129 |
--------------------------------------------------------------------------------
/drivers/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/the-okn3/volafile-downloader/91bb98a2e3384cc522c549aac2734d1f21fecaea/drivers/.gitkeep
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm==4.15.0
2 | humanfriendly==4.4.1
3 | requests==2.18.4
4 | selenium==3.5.0
5 | colorlog==3.1.4
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name="volafile-downloader",
5 | version="2.2.0",
6 | scripts=["volafile-downloader"],
7 | description="Volafile.org files downloader",
8 | author="Okn3",
9 | author_email="okn3@protonmail.com",
10 | url="https://github.com/the-okn3/volafile-downloader",
11 | keywords=["volafile", "downloader", "download", "files", "chat"]
12 | )
13 |
--------------------------------------------------------------------------------
/volafile-downloader/config.py:
--------------------------------------------------------------------------------
1 | # NOTE: Some config parameters can be overwrite by using command line arguments
2 |
3 | # Download
4 | # The size is in bytes, 314572800 = 300MB
5 | max_allowed_size = 800572800
6 |
7 | download_output_dir = "../downloads"
8 | download_users_to_ignore = [
9 | "ExampleNameHere2233"
10 | ]
11 |
12 | chat_log = True
13 | chat_messages_to_ignore = [
14 | """Volafile can also be found here: https://twitter.com/volafile https://facebook.com/volafile"""
15 | ]
16 | chat_nicks_to_ignore = [
17 | "News"
18 | ]
19 |
20 | # Archive will create and put the files in separated folders by date
21 | # note: the date that is created is the date the file was added, so if the
22 | # same file was added some other day and it was added again it will download
23 | # again the file
24 | # ex: /2018-10-13
25 | archive = False
26 | # Archive types:
27 | # CREATION_DATE = The date the file was created
28 | # DOWNLOAD_DATE = The date the file was downloaded
29 | archive_type = "CREATION_DATE"
30 | # https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
31 | archive_date_format = "%Y-%m-%d"
32 |
33 | # Time to wait when downloading in loop, ie: (Download everything in the room,
34 | # Wait 60 seconds, Download everything in the room, and so on), note: each file
35 | # it's only downloaded if the file wasn't downloaded before
36 | download_loop_delay = 60
37 |
38 | # To start with the oldest files (that will expire first) first
39 | download_oldest_first = True
40 |
41 | # Extensions to not download
42 | extensions_blacklist = [".mp3", ".wav"]
43 |
44 | filenames_blacklist = [
45 | "donotdownloadme_lalalalalalalalalal.jpg"
46 | ]
47 |
48 | # This can be edited or removed, the only thing needed is the user-agent
49 | headers = {
50 | 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) "
51 | "Gecko/20100101 Firefox/55.0",
52 | "DNT": "1",
53 | "Upgrade-Insecure-Requests": "1"
54 | }
55 |
56 | # This shouldn't be edited or removed, it's required to download the files
57 | cookies = {
58 | "allow-download": "1"
59 | }
60 |
61 | base_url = "https://volafile.org/r/"
62 |
63 | # Driver
64 | # For development it's better to set the "driver_headless" to False, to show
65 | # the window with the website.
66 | driver_path = "../drivers/chromedriver.exe"
67 | driver_headless = True
68 | driver_log_level = 3
69 |
70 | # Logs
71 | log_download_archive = True
72 | log_download_error = True
73 | log_download_too_big = True
74 |
75 | logger_stream_format = "[%(asctime)s] %(log_color)s[%(levelname)s] - " \
76 | "%(message)s%(reset)s"
77 | logger_stream_level = "DEBUG"
78 | logger_stream_date_format = "%m-%d-%y %H:%M:%S"
79 |
80 | logger_file_active = True
81 | logger_file_format = "[%(asctime)s] [%(levelname)s] [%(module)s] - %(message)s"
82 | logger_file_level = "INFO"
83 | logger_file_date_format = "%m-%d-%y %H:%M:%S"
84 | logger_file_path = "../downloads/volafile-downloader.log"
85 |
--------------------------------------------------------------------------------
/volafile-downloader/downloader.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import re
4 | import sys
5 | import time
6 | import config
7 | import logging
8 | import humanfriendly
9 |
10 | from enum import Enum
11 | from utils import download_file, log, sanitize_file_name, prepare_url, \
12 | get_file_id_and_name, get_file_extension, expiration_to_date, \
13 | log_file, get_logged_files
14 | from selenium import webdriver
15 | from selenium.webdriver.support import ui
16 | from selenium.common.exceptions import TimeoutException
17 | from datetime import datetime
18 |
19 |
20 | class Result(Enum):
21 | SUCCESS = 1
22 | ERROR = 2
23 |
24 |
25 | class Downloader():
26 |
27 | PASSWORD_INPUT_CSS_SELECTOR = """#room_content_fixed """ \
28 | """div.ui_frame_container div.ui_frame_body.ui_frame_body_bar """ \
29 | """input[type="password"]"""
30 |
31 | PASSWORD_BUTTON_XPATH = """//*[@id="room_content_fixed"]/div[1]/div/""" \
32 | """div[3]/span[2]"""
33 |
34 | MODAL_18_WARNING_XPATH = """//*[@id="room_content_fixed"]/div""" \
35 | """[1]/div/div[3]/span[2]"""
36 |
37 | def __init__(self,
38 | room,
39 | password,
40 | output_dir,
41 | max_allowed_size=config.max_allowed_size,
42 | do_log=True,
43 | archive=config.archive,
44 | archive_type=config.archive_type,
45 | chat_log=config.chat_log):
46 |
47 | self.logger = logging.getLogger("root")
48 | self.driver = None
49 | self.looping = False
50 | self.room = room
51 | self.password = password
52 | self.output_dir = output_dir
53 | self.max_allowed_size = max_allowed_size
54 | self.do_log = do_log
55 | self.archive = archive
56 | self.archive_type = archive_type
57 | self.chat_log = chat_log
58 | self.old_chat_messages = []
59 |
60 | self.logger.info('Initializing...')
61 | self.logger.info("Room: %s" % (self.room))
62 | self.logger.info("Password: %s" % (self.password))
63 | self.logger.info("Archive: %s" % (self.archive))
64 |
65 | # Create necessary directories
66 | self.download_directory = os.path.join(self.output_dir, self.room)
67 | if not os.path.exists(self.download_directory):
68 | os.makedirs(self.download_directory)
69 |
70 | self.downloaded_files = get_logged_files(self.download_directory)
71 |
72 | def downloadLoop(self, loop_delay=60):
73 | self.looping = True
74 |
75 | self.initDriver()
76 |
77 | try:
78 | while self.looping:
79 | self.logger.info("Downloading room '%s'" % (self.room))
80 |
81 | downloaded = self.downloadFiles(False)
82 |
83 | if self.chat_log:
84 | self.downloadChatLog()
85 |
86 | if not downloaded:
87 | self.logger.info("There is no files to download")
88 |
89 | self.logger.info("[Sleeping for %s seconds]" % (loop_delay))
90 | time.sleep(int(loop_delay))
91 | except Exception:
92 | self.closeDriver()
93 | self.logger.warning("Something went wrong, restarting...")
94 | self.logger.info("[Sleeping for %s seconds]" % (loop_delay))
95 | time.sleep(int(loop_delay))
96 | return self.downloadLoop(loop_delay)
97 |
98 | self.closeDriver()
99 |
100 | def download(self):
101 | """ Download all the files from an entire room """
102 |
103 | self.initDriver()
104 | self.downloadFiles(True)
105 |
106 | def downloadFiles(self, close_driver=True):
107 | try:
108 | # List of files
109 | self.logger.info("Downloading the list of files...")
110 |
111 | result, files = self.getFilesList()
112 | if result == Result.ERROR:
113 | self.logger.error("Error while trying to fetch the list "
114 | "of files, maybe there is no files "
115 | "to download")
116 | return False
117 |
118 | self.logger.info("List of files downloaded")
119 |
120 | if close_driver:
121 | self.closeDriver()
122 |
123 | except Exception as ex:
124 | self.logger.warning(
125 | "The Website might be offline or another error "
126 | "occurred: " + str(ex))
127 |
128 | if close_driver:
129 | self.closeDriver()
130 |
131 | return False
132 |
133 | file_index = 1
134 | info = dict(
135 | total=len(files),
136 | downloaded=0,
137 | already_exist=0,
138 | too_big=0,
139 | failed=0,
140 | forbidden_extension=0,
141 | user_ignored=0
142 | )
143 |
144 | for f in files:
145 |
146 | self.logger.info(u"[%s of %s] [%s] [%s] [%s] [%s] [by %s]" % (
147 | file_index,
148 | info["total"],
149 | f["name"],
150 | f["extension"],
151 | humanfriendly.format_size(f["size"]),
152 | f["expiration"],
153 | f["tag"]
154 | ))
155 |
156 | download_directory_path = self.download_directory
157 |
158 | # Change directory if it's to archive
159 | if self.archive:
160 | archive_dir_name = datetime.now().strftime(config.archive_date_format)
161 |
162 | if self.archive_type == "CREATION_DATE":
163 | archive_dir_name = expiration_to_date(f["expiration"])\
164 | .strftime(config.archive_date_format)
165 |
166 | download_directory_path = os.path.join(
167 | self.download_directory, archive_dir_name)
168 |
169 | if not os.path.exists(download_directory_path):
170 | os.makedirs(download_directory_path)
171 |
172 | file_id_name = f["name"] + " - " + str(f["id"]) + f["extension"]
173 |
174 | file_path = os.path.join(download_directory_path, file_id_name)
175 |
176 | file_index += 1
177 |
178 | # Check if the file already exists
179 | if os.path.exists(file_path) or \
180 | file_id_name in self.downloaded_files:
181 | self.logger.info("File already exists")
182 | info["already_exist"] += 1
183 | continue
184 |
185 | # Check if we can download a file from this user
186 | if f["tag"].strip() in config.download_users_to_ignore:
187 | self.logger.info("User ignored (from the list in the config)")
188 | info["user_ignored"] += 1
189 | continue
190 |
191 | # Check if the file extension is blacklisted
192 | if f["extension"] in config.extensions_blacklist:
193 | self.logger.warning(
194 | "File Extension not allowed to download")
195 | info["forbidden_extension"] += 1
196 | continue
197 |
198 | # Check if the file name is blacklisted
199 | if f["name"] + f["extension"] in config.filenames_blacklist:
200 | self.logger.warning(
201 | "File Name not allowed to download")
202 | info["forbidden_extension"] += 1
203 | continue
204 |
205 | # Check if the file size is greater then allowed
206 | if f["size"] > self.max_allowed_size:
207 | self.logger.warning(
208 | "File size not allowed to download")
209 | if self.do_log:
210 | log("TOOBIG", self.download_directory, f)
211 | info["too_big"] += 1
212 | continue
213 |
214 | try:
215 | self.logger.info("Downloading...")
216 | download_file(f["url"], file_path)
217 | self.logger.info("Downloaded")
218 |
219 | self.downloaded_files.append(file_id_name)
220 | log_file(file_id_name, self.download_directory)
221 |
222 | if self.do_log:
223 | log("ARCHIVE", self.download_directory, f)
224 |
225 | info["downloaded"] += 1
226 | except Exception as ex:
227 | self.logger.error(
228 | "Error downloading file:" + str(ex))
229 | if self.do_log:
230 | log("ERROR", self.download_directory, f)
231 | info["failed"] += 1
232 |
233 | self.logger.info("DONE")
234 | self.logger.info("%s of %s Files downloaded" %
235 | (info["downloaded"], info["total"]))
236 | self.logger.info("%s of %s Files already existed" %
237 | (info["already_exist"], info["total"]))
238 | self.logger.info("%s of %s Files were too big to download" %
239 | (info["too_big"], info["total"]))
240 | self.logger.info("%s of %s Files have extensions or name not allowed to "
241 | "download" %
242 | (info["forbidden_extension"], info["total"]))
243 | self.logger.info("%s of %s Files couldn't be downloaded (error "
244 | "downloading)" %
245 | (info["failed"], info["total"]))
246 | self.logger.info("%s of %s Files couldn't be downloaded (user "
247 | "ignored)" %
248 | (info["user_ignored"], info["total"]))
249 | return True
250 |
251 | def initDriver(self):
252 | if config.driver_path and not os.path.exists(config.driver_path):
253 | self.logger.error("The driver path in the config doesn't exist")
254 | print(
255 | "You can download the chromium drivers from their official"
256 | "website:"
257 | "\n\t- Access http://chromedriver.chromium.org/downloads and "
258 | "download the drivers."
259 | "\n\t- Place the drivers in the drivers folder or in other place"
260 | "\n\t- If you are on linux or macOS you might need to give "
261 | "\n\t permission to that file, ex: sudo chmod +x chromedriver"
262 | "\n\t- Edit the 'driver_path' in the config.py file with the path"
263 | "of the drivers you downloaded"
264 | )
265 | sys.exit(1)
266 |
267 | # Create driver with all the arguments
268 | options = webdriver.ChromeOptions()
269 | options.add_argument("--log-level=%d" % int(config.driver_log_level))
270 | options.add_argument("--disable-logging")
271 | options.add_argument("--disable-extensions")
272 | if config.driver_headless:
273 | options.add_argument("headless")
274 |
275 | self.driver = webdriver.Chrome(config.driver_path,
276 | service_log_path="NUL",
277 | chrome_options=options)
278 |
279 | wait = ui.WebDriverWait(self.driver, 3)
280 |
281 | # Go to the url
282 | self.driver.get(prepare_url(config.base_url, self.room))
283 |
284 | # See if is asking for a password, if yes then type one
285 | if not self.typePasswordIfNeeded(self.password):
286 | return (Result.ERROR, None)
287 |
288 | # Try to wait for the +18 warning modal and click OK
289 | try:
290 | wait.until(lambda driver: driver.find_element_by_xpath(
291 | self.MODAL_18_WARNING_XPATH)
292 | ).click()
293 | except TimeoutException:
294 | self.logger.info(
295 | "Couldn't find the +18 warning modal, "
296 | "assuming there isn't one...")
297 | except Exception:
298 | self.logger.info("Nothing to download")
299 |
300 | def downloadChatLog(self):
301 | self.logger.info("Downloading chat log...")
302 |
303 | messages = self.driver.execute_async_script("""
304 | var done = arguments[0];
305 | window.indexedDB = window.indexedDB || window.mozIndexedDB || window.webkitIndexedDB || window.msIndexedDB;
306 | var db;
307 | var request = window.indexedDB.open("localforage", 2);
308 | request.onsuccess = function(event) {
309 | console.log(event);
310 | db = event.target.result;
311 | var transaction = db.transaction("keyvaluepairs", "readwrite");
312 | var objectStore = transaction.objectStore("keyvaluepairs");
313 |
314 | var test = objectStore.get("room:""" + self.room + """:messages");
315 | test.onsuccess = function(event) {
316 | done(event.target.result);
317 | }
318 | };
319 | """)
320 |
321 | if not messages:
322 | self.logger.info("No chat log to download")
323 | return
324 |
325 | # Create necessary directories
326 | path = os.path.join(self.output_dir, self.room)
327 | path = os.path.join(path,
328 | datetime.now().strftime(config.archive_date_format))
329 | if not os.path.exists(path):
330 | os.makedirs(path)
331 | path = os.path.join(path, "chat.log")
332 |
333 | # Get only the new messages
334 | new_messages = [x for x in messages if x not in self.old_chat_messages]
335 | self.old_chat_messages = messages
336 |
337 | for message in new_messages:
338 | owner = "♕" if "owner" in message["options"] else ""
339 |
340 | texts = []
341 | stop = False
342 | for m in message["message"]:
343 | text = str(m)
344 |
345 | if m["type"] == "text":
346 | text = m["value"]
347 | elif m["type"] == "file":
348 | text = "%s - %s (%s)" % (m["id"],
349 | m["name"],
350 | m["filetype"])
351 | elif m["type"] == "url":
352 | text = "%s (%s)" % (m["text"], m["href"])
353 |
354 | if text in config.chat_messages_to_ignore:
355 | stop = True
356 | continue
357 |
358 | texts.append(text)
359 |
360 | if (message["nick"] in config.chat_nicks_to_ignore) or stop:
361 | continue
362 |
363 | with open(path, "a+", encoding="utf-8") as f:
364 | f.write("%s%s: %s\n" % (
365 | owner,
366 | message["nick"],
367 | "\n".join(texts))
368 | )
369 |
370 | self.logger.info(
371 | "Downloaded chat log with %d new messages" % len(new_messages))
372 |
373 | def getFilesList(self):
374 | """Get the list of files from a room and prepare the information
375 | of each file
376 | """
377 |
378 | wait = ui.WebDriverWait(self.driver, 3)
379 |
380 | # Wait for the list of files and get them
381 | try:
382 | files = wait.until(lambda driver:
383 | driver.find_elements_by_css_selector(
384 | "#file_list .filelist_file"))
385 | except TimeoutException:
386 | self.logger.error(
387 | "Couldn't find the list of files, aborting...")
388 | return (Result.ERROR, None)
389 |
390 | # Get all files information
391 | files_list_output = []
392 | for file_elem in files:
393 |
394 | file_left_part = file_elem.find_element_by_class_name(
395 | "file_left_part")
396 |
397 | file_right_part = file_elem.find_element_by_class_name(
398 | "file_right_part")
399 |
400 | url = file_left_part.get_attribute("href")
401 |
402 | file_tag = file_left_part.find_element_by_class_name(
403 | "file_tag").get_attribute("innerHTML")
404 |
405 | file_size_expiration = file_right_part.get_attribute("innerHTML")
406 | size_expiration_pattern = re.compile(r"^(.*?)<.*>(.*)<\/span>")
407 | size_expiration_info = size_expiration_pattern.findall(
408 | file_size_expiration)
409 |
410 | file_size = size_expiration_info[0][0]
411 | file_expiration = size_expiration_info[0][1]
412 |
413 | file_id, real_file_name = get_file_id_and_name(url)
414 |
415 | file_name_without_extension, extension = get_file_extension(
416 | real_file_name)
417 |
418 | files_list_output.append({
419 | "id": file_id,
420 | "url": url,
421 | "name": sanitize_file_name(file_name_without_extension),
422 | "extension": extension,
423 | "tag": file_tag,
424 | "size": humanfriendly.parse_size(file_size),
425 | "expiration": file_expiration
426 | })
427 |
428 | if config.download_oldest_first:
429 | files_list_output = files_list_output[::-1]
430 |
431 | return (Result.SUCCESS, files_list_output)
432 |
433 | def isPasswordNeeded(self):
434 | wait = ui.WebDriverWait(self.driver, 2)
435 | try:
436 | wait.until(
437 | lambda driver: driver.find_element_by_css_selector(
438 | self.PASSWORD_INPUT_CSS_SELECTOR))
439 | return True
440 | except TimeoutException:
441 | return False
442 |
443 | def typePasswordIfNeeded(self, password):
444 | wait = ui.WebDriverWait(self.driver, 5)
445 | try:
446 | password_input = wait.until(
447 | lambda driver: driver.find_element_by_css_selector(
448 | self.PASSWORD_INPUT_CSS_SELECTOR))
449 |
450 | if not password:
451 | self.logger.error("This room requires a password and you "
452 | "didn't type one")
453 | return
454 |
455 | password_input.send_keys(password)
456 |
457 | wait.until(
458 | lambda driver: driver.find_element_by_xpath(
459 | self.PASSWORD_BUTTON_XPATH)
460 | ).click()
461 | except TimeoutException:
462 | self.logger.info("This room doesn't require a password")
463 | if password:
464 | self.logger.info("You typed a password for a room that "
465 | "doesn't require one")
466 | return True
467 |
468 | # Verify if is asking again for the password, if yes then the
469 | # password that we typed is wrong
470 | time.sleep(1)
471 | if self.isPasswordNeeded():
472 | self.logger.error("This room required a password but you "
473 | "typed the wrong one")
474 | return False
475 |
476 | return True
477 |
478 | def closeDriver(self):
479 | """Close driver"""
480 |
481 | if self.driver:
482 | try:
483 | # self.driver.close()
484 | self.driver.quit()
485 | except Exception:
486 | self.logger.error("Something happened while trying to close "
487 | "the driver")
488 |
489 | def stop(self):
490 | self.looping = False
491 | self.closeDriver()
492 |
--------------------------------------------------------------------------------
/volafile-downloader/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import re
3 | import config
4 | import requests
5 |
6 | from tqdm import tqdm
7 | from urllib.parse import unquote
8 | from os.path import splitext
9 | from datetime import datetime, timedelta
10 |
11 |
12 | def sanitize_file_name(file_name):
13 | """
14 | Sanitize a file name by removing extra spaces, replaces spaces with
15 | underscores and escapes special characters
16 | """
17 |
18 | file_name = str(file_name).strip().replace(' ', '_')
19 | return re.sub(r'(?u)[^-\w.]', '', file_name)
20 |
21 |
22 | def log(log_type, path, file_info):
23 | """ Log information to a file """
24 |
25 | output_path = None
26 |
27 | if log_type.upper() == "ERROR" and config.log_download_error:
28 | output_path = os.path.join(path, "error.txt")
29 |
30 | elif log_type.upper() == "ARCHIVE" and config.log_download_archive:
31 | output_path = os.path.join(path, "archive.txt")
32 |
33 | elif log_type.upper() == "TOOBIG" and config.log_download_too_big:
34 | output_path = os.path.join(path, "toobig.txt")
35 |
36 | else:
37 | print("[-] Error: Fix the god damn code, there is a log "
38 | "type that doesn't exist: " + log_type.upper())
39 | return
40 |
41 | if output_path:
42 | message = "%s - %s - %s - %s - %s\n" % (file_info["url"],
43 | file_info["name"],
44 | file_info["tag"],
45 | file_info["size"],
46 | file_info["expiration"])
47 |
48 | with open(output_path, "a+", encoding="utf-8") as f:
49 | f.write(str(message))
50 |
51 |
52 | def log_file(file_path_as_id, path):
53 | output_path = os.path.join(path, "files.txt")
54 | with open(output_path, "a+", encoding="utf-8") as f:
55 | f.write(str(file_path_as_id) + "\n")
56 |
57 |
58 | def get_logged_files(path):
59 | output_path = os.path.join(path, "files.txt")
60 | if not os.path.exists(output_path):
61 | return []
62 |
63 | with open(output_path, "r", encoding="utf-8") as f:
64 | # splitlines will remove the '\n' in the end and return a list of line.
65 | return list(set(f.read().splitlines()))
66 | return []
67 |
68 |
69 | def prepare_url(base_url, room):
70 | """ Prepare a URL by adding the room to the base URL """
71 |
72 | if not base_url.endswith("/") and not room.startswith("/"):
73 | base_url += "/"
74 | elif base_url.endswith("/") and room.startswith("/"):
75 | base_url = base_url[:-1]
76 |
77 | return base_url + room
78 |
79 |
80 | def get_file_id_and_name(url):
81 | """ Get the file id and name from a URL """
82 |
83 | pattern = re.compile(r"\/get\/([a-zA-Z0-9-_]+)\/(.*)")
84 | info = pattern.findall(url)
85 | file_id = info[0][0]
86 | file_name = unquote(info[0][1])
87 | return file_id, file_name
88 |
89 |
90 | def get_file_extension(file_name):
91 | """ Get the file extension from a file name """
92 |
93 | for ext in ['.tar.gz', '.tar.bz2']:
94 | if file_name.endswith(ext):
95 | return file_name[:-len(ext)], file_name[-len(ext):]
96 | return splitext(file_name) or ""
97 |
98 |
99 | def download_file(url, file_name=None):
100 | """ Downloads a file from Volafile and shows a progress bar """
101 |
102 | chunk_size = 1024
103 |
104 | r = requests.get(url, stream=True, headers=config.headers,
105 | cookies=config.cookies)
106 | r.raise_for_status()
107 |
108 | if not r:
109 | return False
110 |
111 | total_size = int(r.headers.get("content-length", 0))
112 |
113 | with open(file_name + ".part", "wb") as f:
114 | for data in tqdm(iterable=r.iter_content(chunk_size=chunk_size),
115 | total=total_size / chunk_size, unit="KB",
116 | unit_scale=True):
117 | f.write(data)
118 |
119 | # Remove the ".part" from the file name
120 | os.rename(file_name + ".part", file_name)
121 |
122 |
123 | def expiration_to_date(expiration):
124 | expiration = expiration.lower().strip()
125 | number, method = expiration.split(" ")
126 | max_expiration_days = 2
127 | date = datetime.now() + timedelta(days=-max_expiration_days)
128 | number = int(number)
129 |
130 | if method == "day" or method == "days":
131 | return date + timedelta(days=+number)
132 | elif method == "hour" or method == "hours":
133 | return date + timedelta(hours=+number)
134 | elif method == "min" or method == "mins":
135 | return date + timedelta(minutes=+number)
136 | elif method == "sec" or method == "secs":
137 | return date + timedelta(seconds=+number)
138 |
139 | return datetime.now()
140 |
--------------------------------------------------------------------------------
/volafile-downloader/volafile-downloader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | __author__ = "Okn3"
5 | __email__ = "okn3@protonmail.com"
6 | __license__ = "MIT"
7 | __version__ = "2.2.0"
8 |
9 | import sys
10 | import config
11 | import argparse
12 | import logging
13 | import colorlog
14 |
15 | from downloader import Downloader
16 |
17 |
18 | def get_args():
19 | """ Get and prepare all the arguments """
20 |
21 | parser = argparse.ArgumentParser()
22 |
23 | # Optional
24 | parser.add_argument("-o", "--output-dir", help="Output directory",
25 | required=False, default=config.download_output_dir)
26 |
27 | parser.add_argument("-l", "--loop", action="store_true",
28 | help="Download all the files in the room and "
29 | "loops to check if new files were added")
30 | parser.add_argument("-p", "--password", default=None,
31 | help="Room password")
32 | parser.add_argument("-a", "--archive", action="store_true",
33 | default=config.archive,
34 | help="Archive room")
35 | parser.add_argument("-at", "--archive-type", default=config.archive_type,
36 | help="Archive type CREATION_DATE (Default) or "
37 | "DOWNLOAD_DATE")
38 | parser.add_argument("-cl", "--chat-log", action="store_true",
39 | default=config.chat_log,
40 | help="Download chat log")
41 | parser.add_argument("-ld", "--loop-delay",
42 | default=config.download_loop_delay,
43 | help="Time delay when downloading in loop")
44 | parser.add_argument("-ms", "--max-allowed-size",
45 | default=config.max_allowed_size,
46 | help="Max allowed size to download a file (in bytes)")
47 | parser.add_argument("-nl", "--no-logs",
48 | default=True, action="store_false",
49 | help="Disable the logging to text files when a file "
50 | "is downloaded, it's too big and when there was "
51 | "an error")
52 |
53 | # Required
54 | required = parser.add_argument_group('required arguments')
55 | required.add_argument("-r", "--room", help="Room", required=True)
56 |
57 | return parser.parse_args()
58 |
59 |
60 | def init_logger():
61 | """ Initialize the logger """
62 | global logger
63 |
64 | logger = logging.getLogger("root")
65 | logger.setLevel(config.logger_stream_level)
66 |
67 | # Stream Handler
68 | logger_stream_handler = colorlog.StreamHandler()
69 | logger_stream_handler.setLevel(config.logger_stream_level)
70 | logger_stream_formatter = colorlog.ColoredFormatter(
71 | config.logger_stream_format, datefmt=config.logger_stream_date_format)
72 | logger_stream_handler.setFormatter(logger_stream_formatter)
73 |
74 | logger.addHandler(logger_stream_handler)
75 |
76 | # File Handler
77 | if config.logger_file_active:
78 | logger_file_handler = logging.FileHandler(
79 | config.logger_file_path, encoding='utf8')
80 | logger_file_handler.setLevel(config.logger_file_level)
81 | logger_file_formatter = logging.Formatter(
82 | config.logger_file_format, datefmt=config.logger_file_date_format)
83 | logger_file_handler.setFormatter(logger_file_formatter)
84 |
85 | logger.addHandler(logger_file_handler)
86 |
87 |
88 | def main():
89 | """ Main function that is executed when running the program """
90 |
91 | args = get_args()
92 |
93 | init_logger()
94 |
95 | global downloader
96 | downloader = Downloader(room=args.room,
97 | password=args.password,
98 | output_dir=args.output_dir,
99 | max_allowed_size=args.max_allowed_size,
100 | do_log=args.no_logs,
101 | archive=args.archive,
102 | archive_type=args.archive_type,
103 | chat_log=args.chat_log)
104 |
105 | if args.loop:
106 | downloader.downloadLoop(args.loop_delay)
107 | else:
108 | downloader.download()
109 |
110 |
111 | if __name__ == "__main__":
112 | try:
113 | main()
114 | except KeyboardInterrupt:
115 | logger.info("Interrupted by the user.")
116 | if downloader:
117 | downloader.stop()
118 | sys.exit()
119 |
--------------------------------------------------------------------------------