├── .github └── FUNDING.yml ├── DB.py ├── DiscordtoJson.py ├── DiscordtoJson_test.py ├── HashTable.py ├── KMPDownloader.py ├── KMPDownloader_test.py ├── LICENSE ├── LockingCounter.py ├── LockingCounter_test.py ├── PersistentCounter.py ├── README.md ├── Threadpool.py ├── example.txt ├── install_requirements.bat ├── jutils.py ├── patch for patoolib ├── p7rzip.py └── p7zip.py ├── requirements.txt ├── user-agent.txt └── zipextracter.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: jeffchen54 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /DB.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from sqlite3 import Connection, Cursor 3 | from threading import Lock 4 | import threading 5 | 6 | class DB(): 7 | __db_name:str 8 | __connection:Connection 9 | __cursor:Cursor 10 | __lock:Lock 11 | 12 | def __init__(self, db_name:str) -> None: 13 | """ 14 | Creates a database or reopens a database if a database 15 | with the provided name already exists. 16 | 17 | Args: 18 | db_name (str): name of the database 19 | Pre: db_name must have .db or other suffixes, some exceptions such as 20 | memory only database exists 21 | """ 22 | self.__db_name = db_name 23 | self.__connection = sqlite3.connect(db_name, check_same_thread=False) 24 | self.__cursor = self.__connection.cursor() 25 | self.__lock = threading.Lock() 26 | 27 | def execute(self, cmd:str|tuple) -> any: 28 | """ 29 | Thread safe; Executes a command, if the command returns something, 30 | it will be returned. 31 | 32 | Args: 33 | cmd (str): sql command 34 | Returns: anything the cmd returns 35 | """ 36 | self.__lock.acquire() 37 | 38 | if(isinstance(cmd, str)): 39 | content = self.__cursor.execute(cmd) 40 | 41 | else: 42 | content = self.__cursor.execute(*cmd) 43 | self.__lock.release() 44 | 45 | return content 46 | 47 | def executeBatch(self, cmds:list[str|tuple]) -> any: 48 | """ 49 | Thread safe; Executes batch commands, if the command returns something, 50 | it will be returned. 51 | 52 | Args: 53 | cmd (str): sql command 54 | Returns: list containing anything the cmd returns 55 | """ 56 | self.__lock.acquire() 57 | content = list[len(cmds)] 58 | for i, item in enumerate(cmds): 59 | if(isinstance(item, str)): 60 | content = self.__cursor.execute(item) 61 | else: 62 | content[i] = self.__cursor.execute(*item) 63 | self.__lock.release() 64 | 65 | return content 66 | 67 | def executeNCommit(self, cmd:str|tuple) -> any: 68 | """ 69 | Thread safe; Executes a command, if the command returns something, 70 | it will be returned. 71 | 72 | Commit is done after the opertion has been completed 73 | 74 | Args: 75 | cmd (str): sql command 76 | Returns: anything the cmd returns 77 | """ 78 | self.__lock.acquire() 79 | 80 | if(isinstance(cmd, str)): 81 | content = self.__cursor.execute(cmd) 82 | 83 | else: 84 | content = self.__cursor.execute(*cmd) 85 | self.__connection.commit() 86 | self.__lock.release() 87 | 88 | return content 89 | 90 | def executeBatchNCommit(self, cmds:list[str|tuple]) -> any: 91 | """ 92 | Thread safe; Executes a command, if the commands returns something, 93 | it will be returned. 94 | 95 | Commit is done after the opertion has been completed 96 | 97 | Args: 98 | cmd (str): sql command 99 | Returns: list of anything anything the cmd returns 100 | """ 101 | self.__lock.acquire() 102 | content = list[len(cmds)] 103 | for i, item in enumerate(cmds): 104 | if(isinstance(item, str)): 105 | content = self.__cursor.execute(item) 106 | else: 107 | content[i] = self.__cursor.execute(*item) 108 | self.__connection.commit() 109 | self.__lock.release() 110 | 111 | return content 112 | 113 | def commit(self) -> None: 114 | """ 115 | Thread safe; Commits unsaved changes. 116 | """ 117 | self.__lock.acquire() 118 | self.__connection.commit() 119 | self.__lock.release() 120 | 121 | def closeNOpen(self)->None: 122 | """ 123 | Closes and reopens the database connection 124 | """ 125 | self.__connection.close() 126 | self.__connection = sqlite3.connect(self.__db_name, check_same_thread=False) 127 | self.__cursor = self.__connection.cursor() 128 | 129 | 130 | def close(self)->None: 131 | """ 132 | Closes the database connection 133 | """ 134 | self.__connection.close() 135 | -------------------------------------------------------------------------------- /DiscordtoJson.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple JSON scraper for Kemono.party discord content. 3 | 4 | @author: Jeff Chen 5 | @last modified: 8/25/2022 6 | """ 7 | import time 8 | from cfscrape import CloudflareScraper 9 | import logging 10 | import requests.adapters 11 | from Threadpool import ThreadPool 12 | from threading import Semaphore 13 | from threading import Lock 14 | import cfscrape 15 | 16 | 17 | 18 | DISCORD_LOOKUP_API = "https://www.kemono.su/api/v1/discord/channel/lookup/" 19 | DISCORD_CHANNEL_CONTENT_PRE_API = "https://www.kemono.su/api/v1/discord/channel/" 20 | DISCORD_CHANNEL_CONTENT_SUF_API = "?o=" 21 | DISCORD_CHANNEL_CONTENT_SKIP_INCRE = 150 22 | HEADERS={'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36'} 23 | 24 | 25 | class DiscordToJson(): 26 | """ 27 | Utility functions used for scraping Kemono Party's Discord to Json data. 28 | Offers functions for scrapping Discord sub channel IDs and scraping the channels themselves. 29 | """ 30 | __recent:dict = None 31 | def discord_lookup(self, discordID:str, scraper:CloudflareScraper) -> dict: 32 | """ 33 | Looks up a discord id using Kemono.party's API and returns 34 | the result in JSON format 35 | 36 | Param: 37 | discordID: ID of discord channel to grab channel IDs from 38 | scraper: Scraper to use while scraping kemono 39 | Return: channelIDs in JSON format 40 | """ 41 | # Link URL 42 | url = DISCORD_LOOKUP_API + discordID 43 | 44 | # Grab data 45 | data = None 46 | while not data: 47 | try: 48 | data = scraper.get(url, timeout=5, headers=HEADERS) 49 | except(requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout): 50 | logging.debug("Connection error, retrying") 51 | time.sleep(1) 52 | 53 | # Convert data 54 | js = data.json() 55 | logging.debug("Received " + str(js) + " from " + url) 56 | 57 | # Return json 58 | return js 59 | 60 | def discord_lookup_all(self, channelID:str|None, threads:int=6, sessions:list=None)->dict|list: 61 | """ 62 | Similar to discord_channel_lookup() but processes everything, not just in segments. 63 | NOTE: will take a significant amount of time if discord channel is of considerable size 64 | 65 | Param: 66 | threads: Number of threads to use while looking up js 67 | sessions: list of sessions used when scraping, size must be >= threads 68 | """ 69 | 70 | # Grab data 71 | js_buff = [] 72 | 73 | # Generate threads and threading vars 74 | pool = ThreadPool(threads) 75 | pool.start_threads() 76 | js_buff_lock = Lock() 77 | main_sem = Semaphore(0) 78 | 79 | # Generate sessions for each thread 80 | if sessions: 81 | assert(len(sessions) >= threads) 82 | else: 83 | sessions = [cfscrape.create_scraper(requests.Session())] * threads 84 | adapters = [requests.adapters.HTTPAdapter(pool_connections=1, pool_maxsize=1, max_retries=0, pool_block=True)] * threads 85 | [session.mount('http://', adapter) for session,adapter in zip(sessions,adapters)] 86 | 87 | # Loop until no more data left 88 | [pool.enqueue((self.__discord_lookup_thread_job, (threads, DISCORD_CHANNEL_CONTENT_SKIP_INCRE, i * DISCORD_CHANNEL_CONTENT_SKIP_INCRE, channelID, sessions[i], main_sem, js_buff, js_buff_lock, pool)))\ 89 | for i in range(0, threads)] 90 | 91 | 92 | # Sleep until done 93 | main_sem.acquire() 94 | 95 | # Kill threads 96 | pool.join_queue() 97 | pool.kill_threads() 98 | 99 | # Kill all adapters 100 | [session.close() for session in sessions] 101 | 102 | # Return json 103 | return js_buff 104 | 105 | def __discord_lookup_thread_job(self, tcount:int, skip:int, curr:int, channelID:str, scraper:CloudflareScraper, main_sem:Semaphore, js_buff:list, js_buff_lock:Lock, pool:ThreadPool) -> None: 106 | """ 107 | Thread job for worker threads in discord_lookup_all. Processes a segment of 108 | data then sends its next segment into thread queue 109 | 110 | Param: 111 | tcount: number of threads used within threadpool. 112 | main_sem: Semaphore used to wake up main thread 113 | skip: skip amount to access next page of content, will be the same for all threads 114 | curr: current skip number 115 | channelID: Discord channel id 116 | scraper: scraper to be used to scrape js 117 | js_buff: list used to store stuff 118 | js_buff_lock: lock for js_buff 119 | pool: Threadpool used for this function 120 | Pre: main_sem begins on zero 121 | Pre: tcount number of tasks were/is going to be submitted into threadpool 122 | NOTE: that cond isn't used because there is a situation where broadcast may be 123 | called before calling thread goes to sleep 124 | """ 125 | data = None 126 | # Process current task 127 | url = DISCORD_CHANNEL_CONTENT_PRE_API + channelID + DISCORD_CHANNEL_CONTENT_SUF_API + str(curr) 128 | logging.info(f"scanning {url}") 129 | while not data: 130 | try: 131 | data = scraper.get(url, timeout=5, headers=HEADERS) 132 | except(requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout): 133 | logging.info("Connection error, retrying -> url: {s}".format(s=url)) 134 | 135 | if not data: 136 | logging.critical("Invalid data scraped -> url: {S}".format(s=url)) 137 | 138 | # Convert data 139 | js = data.json() 140 | 141 | 142 | # Add data to js_buff 143 | if len(js) > 0: 144 | js_buff_lock.acquire() 145 | # If js_buff is too small, extend it 146 | insert_pos = curr/skip 147 | space_diff = self.__calculate_additional_list_slots(js_buff, insert_pos) 148 | 149 | if(space_diff > 0): 150 | addon = [None] * int(space_diff) 151 | js_buff += addon 152 | 153 | # Add into js buff 154 | js_buff[int(insert_pos)] = js 155 | logging.debug("Received " + str(js) + " from " + url) 156 | js_buff_lock.release() 157 | 158 | # Create and add task back into threadpool 159 | pool.enqueue((self.__discord_lookup_thread_job, (tcount, DISCORD_CHANNEL_CONTENT_SKIP_INCRE, curr + tcount * DISCORD_CHANNEL_CONTENT_SKIP_INCRE, channelID, scraper, main_sem, js_buff, js_buff_lock, pool))) 160 | 161 | # If is done, broadcast to main thread 162 | else: 163 | main_sem.release() 164 | 165 | 166 | def __calculate_additional_list_slots(self, l:list, p:int)->int: 167 | """ 168 | Given the list l and position to insert element p, returns how many more list slots are 169 | needed in l to meet p 170 | 171 | Args: 172 | l (list): list 173 | p (int): position to insert element 174 | 175 | 176 | Returns: 177 | int: how many more list slots needed in l to meet p, if is <=0, no additional slots are needed 178 | """ 179 | return p - (len(l) - 1) 180 | 181 | def discord_channel_lookup(self, channelID:str|None, scraper:CloudflareScraper)->dict|list: 182 | """ 183 | Looks up a channel's content and returns it. Content is returned in 184 | chunks and not all content is returned; however, subsequent calls will 185 | return results that will always be different. 186 | 187 | Param: 188 | channelID: 189 | channelID of channel to scrape. 190 | If is None, scrape starting at the endpoint of the previous scrape 191 | If is not None, scrape starting the end of the channel 192 | scarper: 193 | Scraper: scaraper to use while scraping kemono 194 | 195 | Return: JSON object containing data from the file 196 | """ 197 | # If None sent but no history, quit 198 | if not channelID: 199 | assert(self.__recent) 200 | 201 | # If no history, create initial history 202 | if not self.__recent: 203 | self.__recent = {"channelID" : channelID, "skip" : 0} # it doesn't exist yet, so initialize it 204 | 205 | # If history exists and matches, use old data 206 | if(not channelID or channelID == self.__recent.get("channelID")): 207 | skip = self.__recent.get("skip") 208 | self.__recent = {"channelID" : self.__recent.get("channelID"), "skip" : skip + DISCORD_CHANNEL_CONTENT_SKIP_INCRE} 209 | channelID = self.__recent.get("channelID") 210 | 211 | # If history exists but does not match, start from beginning 212 | else: 213 | skip = 0 214 | self.__recent = {"channelID" : channelID, "skip" : skip + DISCORD_CHANNEL_CONTENT_SKIP_INCRE} 215 | 216 | # Grab data 217 | data = None 218 | url = DISCORD_CHANNEL_CONTENT_PRE_API + channelID + DISCORD_CHANNEL_CONTENT_SUF_API + str(skip) 219 | while not data: 220 | try: 221 | data = scraper.get(url, timeout=5, headers=HEADERS) 222 | except(requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout): 223 | logging.debug("Connection error, retrying") 224 | 225 | # Convert data 226 | js = data.json() 227 | logging.debug("Received " + str(js) + " from " + url) 228 | 229 | # Return json 230 | return js -------------------------------------------------------------------------------- /DiscordtoJson_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from DiscordtoJson import DiscordToJson 3 | import requests.adapters 4 | import cfscrape 5 | import json 6 | import logging 7 | 8 | 9 | class DiscordtoJsonTestCase(unittest.TestCase): 10 | def setUp(self) -> None: 11 | """ 12 | Creates a session 13 | """ 14 | logging.basicConfig(level=logging.DEBUG) 15 | self.scraper = cfscrape.create_scraper(requests.Session()) 16 | adapter = requests.adapters.HTTPAdapter(pool_connections=6, pool_maxsize=6, max_retries=0, pool_block=True) 17 | self.scraper.mount('http://', adapter) 18 | self.js = DiscordToJson() 19 | 20 | def test_discord_lookup(self): 21 | """ 22 | Tests discord lookup 23 | """ 24 | # No channels 25 | 26 | # One channel 27 | js = self.js.discord_lookup("https://kemono.party/discord/server/344748294372720643".rpartition('/')[2], self.scraper) 28 | self.assertEqual(json.dumps(js), '[{"id": "344748969991340033", "name": "tiffys-drawings"}]') 29 | 30 | # Multi channels 31 | js = self.js.discord_lookup("https://kemono.party/discord/server/634594002624184360".rpartition('/')[2], self.scraper) 32 | self.assertEqual(json.dumps(js), r'[{"id": "652592122951630850", "name": "100\u5186_100yen"}, {"id": "652563554108571650", "name": "100\u5186_100yen"}, {"id": "652592073785999369", "name": "300\u30d3\u30fc\u30eb_beer"}, {"id": "652551798472835072", "name": "300\u30d3\u30fc\u30eb_beer"}, {"id": "634659046678593536", "name": "\u4f5c\u696d\u4e2d_wip"}]') 33 | 34 | def test_channel_lookup(self): 35 | """ 36 | Tests discord channel lookup 37 | """ 38 | # Get first 25 39 | # 634659046678593536 40 | js = self.js.discord_channel_lookup("634659046678593536", self.scraper) 41 | before = json.dumps(js) 42 | 43 | # Get next 25 using None 44 | js = self.js.discord_channel_lookup(None, self.scraper) 45 | after = json.dumps(js) 46 | self.assertNotEqual(before, after) 47 | 48 | # Get next 25 using channel name 49 | js = self.js.discord_channel_lookup("634659046678593536", self.scraper) 50 | last = json.dumps(js) 51 | self.assertNotEqual(last, before) 52 | self.assertNotEqual(last, after) 53 | 54 | # Switch to another channel 55 | js = self.js.discord_channel_lookup("652563554108571650", self.scraper) 56 | logging.info("first" + str(js[len(js) - 1].get('content'))) 57 | self.assertEqual(str(js[0].get('content')), r'**6月の$1GoogleDriveリンク June $1 Google Drive link: **https://drive.google.com/drive/folders/1nXscFvaEiLRVLf0d7di6ti1iH-7MdZ9F?usp=sharing @everyone') 58 | self.assertEqual(str(str(js[len(js) - 1].get('content'))), r'> > **6月の$1 MEGA リンク June $1 MEGA link: **https://mega.nz/folder/z4dk3AhK#0L7XjaYUbH3iffrVpkLIeQ @everyone') 59 | # End of channel 60 | js = self.js.discord_channel_lookup(None, self.scraper) 61 | self.assertEqual(len(js), 0) 62 | 63 | 64 | def tearDown(self) -> None: 65 | """ 66 | Deconstructs session 67 | """ 68 | self.scraper.close() 69 | 70 | if __name__ == '__main__': 71 | unittest.main() -------------------------------------------------------------------------------- /HashTable.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | from typing import TypeVar, Generic 3 | import mmh3 4 | 5 | T = TypeVar('T') 6 | V = TypeVar('V') 7 | MINIMUM_SIZE = 2 8 | class Error(Exception): 9 | """Base class for other exceptions""" 10 | pass 11 | 12 | 13 | class MismatchTypeException(Error): 14 | """Raised when a comparison is made on 2 different types""" 15 | pass 16 | 17 | 18 | class KVPair (Generic[V,T]): 19 | """ 20 | Generic KVPair structure where: 21 | Key is generic V 22 | Value is generic T 23 | Tombstone is bool & optional 24 | Upon initiailization, data becomes read-only 25 | """ 26 | __key: V 27 | __value: T 28 | __tombstone: bool 29 | 30 | def __init__(self, key: V, value: T) -> None: 31 | """ 32 | Initializes KVPair. Tombstone is disabled by default. 33 | 34 | Param 35 | key: key (use to sort) 36 | value: value (data) 37 | 38 | """ 39 | self.__value = value 40 | self.__key = key 41 | self.__tombstone = False 42 | 43 | def getKey(self) -> V: 44 | """ 45 | Returns key 46 | Return: key 47 | """ 48 | return self.__key 49 | 50 | def getValue(self) -> T: 51 | """ 52 | Returns value 53 | Return: value 54 | """ 55 | return self.__value 56 | 57 | def setValue(self, newValue:T)->None: 58 | """ 59 | Set value 60 | Param: value 61 | """ 62 | self.__value = newValue 63 | 64 | def compareTo(self, other) -> int: 65 | """ 66 | Compares self and other key value. Ignores generic typing 67 | 68 | Raise: MismatchTypeException if other is not a KVPair\n 69 | Return: 70 | self.getKey() > other.getKey() -> 1\n 71 | self.getKey() == other.getKey() -> 0\n 72 | self.getKey() < other.getKey() -> -1\n 73 | 74 | """ 75 | if other == None or not isinstance(other, KVPair): 76 | raise MismatchTypeException("other is not of type KVPair(V,T)") 77 | 78 | if self.__key > other.getKey(): 79 | return 1 80 | if self.__key == other.getKey(): 81 | return 0 82 | return -1 83 | 84 | def __str__(self) -> str: 85 | """ 86 | toString function which returns KVPair in json style formatting 87 | {key:, value:, Tomb:} 88 | 89 | value relies on T's __str__ function 90 | 91 | Return: KVPair in json style format 92 | """ 93 | return "{key:" + str(self.__key) + ", value:" + str(self.__value) + ", Tomb:" + ("T" if self.__tombstone else "F") + "}" 94 | 95 | def setTombstone(self) -> None: 96 | """ 97 | Turns on tombstone 98 | """ 99 | self.__tombstone = True 100 | 101 | def disableTombstone(self) -> None: 102 | """ 103 | Turns off tombstone 104 | """ 105 | self.__tombstone = False 106 | 107 | def isTombstone(self) -> bool: 108 | """ 109 | Returns tombstone status 110 | 111 | Return true if set, false if disabled 112 | """ 113 | return self.__tombstone 114 | 115 | 116 | class HashTable: 117 | """ 118 | Closed, extensible hash table database storing KVPairs of any type 119 | Was built using code I wrote in Java for CS3114 with some slight functionality 120 | adjustments 121 | 122 | @author Jeff Chen 123 | @created 5/8/2022 124 | @Last modified 5/8/2022 125 | """ 126 | __size: int 127 | __records: List[KVPair] 128 | __occupied: int 129 | 130 | def __init__(self, size): 131 | """ 132 | Construct a hash table with initial size. 133 | 134 | Param: 135 | initialSize: Initial hash table size 136 | """ 137 | 138 | 139 | self.__size = max(size, MINIMUM_SIZE) 140 | self.__records = [None] * self.__size 141 | self.__occupied = 0 142 | 143 | # Core Functions ################################################# 144 | 145 | def hashtable_toarray(self) -> list: 146 | """ 147 | Returns a list of nontombstone values from the hashtable 148 | 149 | Returns: 150 | list: _description_ 151 | """ 152 | return [(i.getKey(), i.getValue()) for i in self.__records if i and not i.isTombstone()] 153 | 154 | def hashtable_add(self, pair: KVPair) -> None: 155 | """ 156 | Adds a KVPair to the hash table and expands if needed 157 | If is a duplicate entry exists, do nothing 158 | 159 | Param: 160 | KVPair: data to add to the hash table 161 | """ 162 | # Check if a record exists in the table 163 | if(self.hashtable_exist(pair) != -1): 164 | return 165 | 166 | # TableSz 167 | if(self.__isHalfFull()): 168 | self.__doubleTable() 169 | 170 | # Find insert position 171 | home = self.hash(str(pair.getKey()), self.__size) 172 | tombstone = -1 173 | curr = home 174 | 175 | step = 1 176 | while self.__records[curr] != None: 177 | 178 | if self.__records[curr].isTombstone() and tombstone == -1: 179 | tombstone = curr 180 | 181 | curr = self.__quadraticProbe(home, step, self.__size) 182 | step += 1 183 | 184 | # Add to hash table 185 | if tombstone != -1: 186 | self.__records[tombstone] = pair 187 | else: 188 | self.__records[curr] = pair 189 | 190 | self.__occupied += 1 191 | 192 | def hashtable_lookup_value(self, searchKey)->any: 193 | """ 194 | Look up a KVPair and returns its value 195 | 196 | Param: 197 | searchKey: key to search for 198 | Return: value of matching KVPair, None if not found 199 | """ 200 | # Get index from hash table 201 | index = self.hashtable_exist_by_key(searchKey) 202 | 203 | if(index == -1): 204 | return None 205 | return self.__records[index].getValue() 206 | 207 | def hashtable_edit_value(self, searchKey, newValue)->bool: 208 | """ 209 | Searches for a key in hash table, if found, edits value to 210 | newValue 211 | 212 | Param 213 | searchKey: key of KVPair to look for 214 | newValue: new value to set KVPair to 215 | Pre: searchKey and newValue match generic type of KVPair 216 | Return: True if was successful, False if not 217 | """ 218 | # Get index from hash table 219 | index = self.hashtable_exist_by_key(searchKey) 220 | 221 | if(index < 0): 222 | return False 223 | 224 | # Edit value 225 | self.__records[index].setValue(newValue) 226 | 227 | return True 228 | 229 | def hashtable_delete(self, token:KVPair)->bool: 230 | """ 231 | Removes an item from the hash table 232 | Param 233 | token: record to remove from table 234 | Return true if removed, false if not 235 | """ 236 | # Get position 237 | pos = self.hashtable_exist(token) 238 | 239 | if pos == -1: 240 | return False 241 | 242 | # Remove from table 243 | self.__records[pos].setTombstone() 244 | self.__occupied -= 1 245 | return True 246 | 247 | def hashtable_exist_by_key(self, searchKey) -> int: 248 | """ 249 | Check if a KVPair exists within a hash table. 250 | If a lengthy sequence of probes (>=10) is detected, table will 251 | be resized 252 | 253 | Params: 254 | searchKey: key of KVPair to search for 255 | Return: position if found, -1 if does not exists 256 | """ 257 | if searchKey == None: 258 | return -1 259 | 260 | home = self.hash(str(searchKey), self.__size) 261 | curr = home 262 | 263 | step = 1 264 | while self.__records[curr] != None: 265 | if not self.__records[curr].isTombstone() and self.__records[curr].getKey() == searchKey: 266 | return curr 267 | 268 | # Hashtable expansion is an extremely costly operation. 269 | # len / 4 is used to be more scalable with hash table size 270 | if step >= 10: 271 | self.__doubleTable() 272 | step = 1 273 | curr = self.hash(str(searchKey), self.__size) 274 | else: 275 | curr = self.__quadraticProbe(home, step, self.__size) 276 | step += 1 277 | return -1 278 | 279 | def hashtable_exist(self, token: KVPair) -> int: 280 | """ 281 | Check if a KVPair exists within a hash table. 282 | If a lengthy sequence of probes (>=10) is detected, table will 283 | be resized 284 | 285 | Params: 286 | tokens: KVPair to search 287 | Return: position if found, -1 if not 288 | """ 289 | home = self.hash(str(token.getKey()), self.__size) 290 | curr = home 291 | 292 | step = 1 293 | while self.__records[curr] != None: 294 | if not self.__records[curr].isTombstone() and self.__records[curr].compareTo(token) == 0: 295 | return curr 296 | if step >= 10: 297 | self.__doubleTable() 298 | step = 1 299 | curr = self.hash(str(token.getKey()), self.__size) 300 | else: 301 | curr = self.__quadraticProbe(home, step, self.__size) 302 | step += 1 303 | return -1 304 | 305 | # Getters ######################################################## 306 | def hashtable_getSize(self) -> int: 307 | """ 308 | Get size of hash table 309 | 310 | Return: size of hash table 311 | """ 312 | return self.__size 313 | 314 | def hashtable_getOccupied(self) -> int: 315 | """ 316 | Get number of occupied slots 317 | 318 | Return: number of occupied slots 319 | """ 320 | return self.__occupied 321 | 322 | def hashtable_print(self) -> None: 323 | """ 324 | Prints table in the following format 325 | Index\tData 326 | 1\t\t 327 | 2\t\t 328 | ... 329 | None entry data will be shown as 330 | """ 331 | print("Index\tData") 332 | for i in range(0, self.__size): 333 | print(str(i + 1) + "\t\t" + ("" if self.__records[i] == None else str(self.__records[i]))) 334 | 335 | # Utility ####################################################### 336 | 337 | def __transfer(self, dest: List[KVPair], src: KVPair) -> None: 338 | """ 339 | Transfers an existing record to dest 340 | Param 341 | dest: table to transfer records to 342 | src: record to transfer 343 | Pre: dest no tombstones and src not a tombstone. 344 | dest less than half full 345 | """ 346 | home = self.hash(str(src.getKey()), len(dest)) 347 | curr = home 348 | 349 | step = 1 350 | while dest[curr] != None: 351 | curr = self.__quadraticProbe(home, step, len(dest)) 352 | step += 1 353 | 354 | dest[curr] = src 355 | 356 | def __rehash(self, dest: List[KVPair]) -> None: 357 | """ 358 | Rehashes and transfers over all non-tombstone entries to dest 359 | 360 | Param 361 | dest: table to transfer records to 362 | """ 363 | remain = self.__occupied 364 | 365 | i = 0 366 | while remain > 0: 367 | if self.__records[i] != None and not self.__records[i].isTombstone(): 368 | self.__transfer(dest, self.__records[i]) 369 | remain -= 1 370 | i += 1 371 | 372 | def __doubleTable(self) -> None: 373 | """ 374 | Doubles and rehashes hash table 375 | """ 376 | newRecords: List[KVPair] = [None] * self.__size * 2 377 | self.__rehash(newRecords) 378 | 379 | self.__records = newRecords 380 | self.__size = len(newRecords) 381 | 382 | def __isHalfFull(self) -> bool: 383 | """ 384 | Checks if the table is half fulll 385 | 386 | Return: True if half full, false if not 387 | """ 388 | return (self.__size - self.__occupied) <= self.__occupied 389 | 390 | def __quadraticProbe(self, home: int, step: int, tableSz: int) -> int: 391 | """ 392 | Performs quadratic probe on home at step 393 | Param: 394 | home: home slot 395 | step: nth step in quadratic step 396 | tableSz: size of hash table 397 | Return: record slot at quadratic probe step 398 | """ 399 | return (home + step * step) % tableSz 400 | 401 | def hash(self, s: str, m: int) -> int: 402 | """ 403 | Hashing algorithm using murmurhash3 32-bit unsigned int 404 | 405 | Params 406 | s: string to hash 407 | m: size of table 408 | Return: home slot of s 409 | # Edit 2023, fixed broken hash function 410 | """ 411 | #sum:int = 0 412 | #mul:int = 1 413 | 414 | #for i in range(0, len(s)): 415 | # mul = 1 if (i % 4 == 0) else mul * 256 416 | # sum += ord(s[i]) * mul 417 | 418 | return mmh3.hash(s, signed=False) % m 419 | -------------------------------------------------------------------------------- /KMPDownloader_test.py: -------------------------------------------------------------------------------- 1 | from re import T 2 | import shutil 3 | from tkinter import N 4 | import unittest 5 | from KMPDownloader import KMP 6 | import os 7 | from KMPDownloader import DeadThreadPoolException 8 | import logging 9 | from Threadpool import tname 10 | """ 11 | Tests KMPDownloader.py, 12 | ################################################## 13 | WARNING Contains unsafe works!!! ################# 14 | ################################################## 15 | 16 | Author: Jeff Chen 17 | Last modified: 6/6/2022 18 | """ 19 | class KMPTestCase(unittest.TestCase): 20 | 21 | def setUp(self) -> None: 22 | """ 23 | Sets up an Null KMP since tests require different paramaters 24 | """ 25 | logging.basicConfig(level=logging.INFO) 26 | self.KMP = None 27 | tname.id = None 28 | 29 | @classmethod 30 | def setUpClass(cls): 31 | """ 32 | Create temporary testing directory 33 | """ 34 | cls.tempdir = os.path.abspath('./') + '\\temp\\' 35 | 36 | if os.path.exists(cls.tempdir): 37 | logging.critical("Please remove before testing ->" + cls.tempdir) 38 | exit() 39 | 40 | def test_start_kill_threads(self) -> None: 41 | """ 42 | Tests the starting and killing of threads 43 | """ 44 | self.KMP = KMP(self.tempdir, False, tcount=None, chunksz=None, ext_blacklist=None) 45 | 46 | # Single thread 47 | self.KMP._KMP__threads = self.KMP._KMP__create_threads(1) 48 | self.KMP._KMP__kill_threads(self.KMP._KMP__threads) 49 | self.assertRaises(DeadThreadPoolException, self.KMP._KMP__call_and_interpret_url, 50 | "https://kemono.party/gumroad/user/9222612694494/post/AizNy") 51 | 52 | # 3 Threads 53 | self.KMP._KMP__threads = self.KMP._KMP__create_threads(3) 54 | self.KMP._KMP__kill_threads(self.KMP._KMP__threads) 55 | self.assertRaises(DeadThreadPoolException, self.KMP._KMP__call_and_interpret_url, 56 | "https://kemono.party/gumroad/user/9222612694494/post/AizNy") 57 | 58 | self.KMP.close() 59 | def test_trim_fname(self) -> None: 60 | """ 61 | Tests __trim_fname 62 | """ 63 | self.KMP = KMP(self.tempdir, False, tcount=None, chunksz=None, ext_blacklist=None) 64 | # 65 | # Download まとめDL用.zip 66 | # 67 | 68 | # Case 3 -> Space 69 | self.assertEqual(self.KMP._KMP__trim_fname( 70 | "Download まとめDL用.zip"), "まとめDL用.zip") 71 | 72 | # Case 3 -> Multiple spaces 73 | self.assertEqual(self.KMP._KMP__trim_fname( 74 | "Download 1_2 2016 aged whiskey.zip"), "1_2 2016 aged whiskey.zip") 75 | 76 | # Case 2 -> Bad extension 77 | self.assertEqual(self.KMP._KMP__trim_fname( 78 | "/data/3d/68/3d68def31822e95ad249ceb2237fcdae29b644e6702366ddae761572be900955.jpg?f=https%3A//c10.patreonusercontent.\ 79 | com/3/e30%253D/patreon-media/p/post/30194248/7cffbc9604664ccab13f3b57fdc78e6f/1.jpe%3Ftoken-time%3D1570752000%26token\ 80 | -hash%3DLadY-wBiRPi84Qb5X-KI7NEgEP6HE6lljOLiHBm7qY8%253D"), "3d68def31822e95ad249ceb2237fcdae29b644e6702366ddae761572be900955.jpg") 81 | 82 | # Case 1 -> Good extension 83 | self.assertEqual(self.KMP._KMP__trim_fname( 84 | "/data/4f/83/4f83453fc625095401da81248a2242246b01b229bc5e1b2e1dd470da866f1980.jpg?f=b9ffc2f9-2c11-42c8-b5a2-7995a233ca41\ 85 | .jpg"), "b9ffc2f9-2c11-42c8-b5a2-7995a233ca41.jpg") 86 | 87 | self.assertEqual(self.KMP._KMP__trim_fname("/data/8b/e7/8be7e3fc0b0304c97b0bd5d9f7a66b2ad97c2d798808b52824642480e8dfe0d7.gif?f=BBS-Snoggler-Update.gif"), "BBS-Snoggler-Update.gif") 88 | self.KMP.close() 89 | def test_download_static_files(self) -> None: 90 | """ 91 | Tests downloading files under different circumstances 92 | """ 93 | 94 | # Single thread no image 95 | self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None) 96 | self.KMP.routine(unpacked=0, url= 97 | "https://kemono.party/patreon/user/33271853/post/36694748") 98 | self.KMP.close() 99 | 100 | # 3 Thread no image 101 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, ext_blacklist=None) 102 | self.KMP.routine(unpacked=0, url= 103 | "https://kemono.party/patreon/user/33271853/post/47946953") 104 | self.KMP.close() 105 | 106 | # Single Thread, 2 image 107 | self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None) 108 | self.KMP.routine(unpacked=0, url= 109 | "https://kemono.party/patreon/user/33271853/post/36001529") 110 | self.KMP.close() 111 | 112 | # 3 Threads, 2 image 113 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, ext_blacklist=None) 114 | self.KMP.routine(unpacked=0, url= 115 | "https://kemono.party/patreon/user/33271853/post/47255266") 116 | self.KMP.close() 117 | 118 | # 1 Thread, multi images 119 | self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None) 120 | self.KMP.routine(unpacked=0, url= 121 | "https://kemono.party/patreon/user/33271853/post/65647736") 122 | self.KMP.close() 123 | 124 | # 16 Thread multi images 125 | self.KMP = KMP(self.tempdir, unzip=False, tcount=16, chunksz=None, ext_blacklist=None) 126 | self.KMP.routine(unpacked=0, url= 127 | "https://kemono.party/patreon/user/33271853/post/52792630") 128 | self.KMP.close() 129 | 130 | # Verify content: 131 | self.assertEqual(os.stat( 132 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/0.png").st_size, 3692609) 133 | self.assertEqual(os.stat( 134 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/1.png").st_size, 3692609) 135 | self.assertEqual(os.stat( 136 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/2.png").st_size, 2752125) 137 | self.assertEqual(os.stat( 138 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/3.png").st_size, 3262789) 139 | self.assertEqual(os.stat( 140 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/4.png").st_size, 2392221) 141 | self.assertEqual(os.stat( 142 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/5.png").st_size, 2349839) 143 | self.assertEqual(os.stat( 144 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/6.png").st_size, 5652120) 145 | self.assertEqual(os.stat( 146 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/7.png").st_size, 1825005) 147 | self.assertEqual(os.stat( 148 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/8.png").st_size, 3002485) 149 | self.assertEqual(os.stat( 150 | self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon Kemono/9.png").st_size, 4467542) 151 | 152 | self.assertEqual(os.stat( 153 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/0.png").st_size, 13444381) 154 | self.assertEqual(os.stat( 155 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/1.png").st_size, 13444381) 156 | self.assertEqual(os.stat( 157 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/2.png").st_size, 13854733) 158 | self.assertEqual(os.stat( 159 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/3.png").st_size, 13702259) 160 | self.assertEqual(os.stat( 161 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/4.png").st_size, 13802523) 162 | self.assertEqual(os.stat( 163 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/5.png").st_size, 13040955) 164 | self.assertEqual(os.stat( 165 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/6.png").st_size, 13911132) 166 | self.assertEqual(os.stat( 167 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/7.png").st_size, 13524999) 168 | self.assertEqual(os.stat( 169 | self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon Kemono/8.png").st_size, 12743876) 170 | 171 | def test_download_static_attachments(self) -> None: 172 | """ 173 | Tests downloading static attachments 174 | """ 175 | # Single .pdf 176 | self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None) 177 | self.KMP.routine(unpacked=0, url= 178 | "https://kemono.party/gumroad/user/5563321775917/post/wSIJ") 179 | self.KMP.close() 180 | 181 | # Single .cvf 182 | self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None) 183 | self.KMP.routine(unpacked=0, url= 184 | "https://kemono.party/gumroad/user/5563321775917/post/mRSH") 185 | self.KMP.close() 186 | 187 | self.assertEqual(os.stat( 188 | self.tempdir + "Gumroad Help Center/New Creator FAQ - All the Basics in One Place by Gumroad Help Center from Gumroad Kemono/Creatorpedia.pdf").st_size, 14704) 189 | self.assertEqual(os.stat( 190 | self.tempdir + "Gumroad Help Center/An Example CSV of Exported Sales Data by Gumroad Help Center from Gumroad Kemono/Sales_CSV_Example.csv").st_size, 2933) 191 | 192 | def test_download_animated_attachments(self) -> None: 193 | """ 194 | Tests downloaded animated gif files 195 | """ 196 | # 2 Threads, 2 gifs 197 | self.KMP = KMP(self.tempdir, unzip=False, tcount=2, chunksz=None, ext_blacklist=None) 198 | self.KMP.routine(unpacked=0, url= 199 | "https://kemono.party/patreon/user/523894/post/66527944") 200 | self.KMP.close() 201 | 202 | # Single .mp4 203 | self.KMP.routine(unpacked=0, url= 204 | "https://kemono.party/gumroad/user/5563321775917/post/jnBuO") 205 | 206 | # .mov 207 | self.KMP.routine(unpacked=0, url= 208 | "https://kemono.party/gumroad/user/8844596389936/post/WBlK") 209 | 210 | self.assertEqual(os.stat( 211 | self.tempdir + "Jasonafex/New Playable Build 0.6.6 by Jasonafex from Patreon Kemono/1.gif").st_size, 1930242) 212 | self.assertEqual(os.stat( 213 | self.tempdir + "Jasonafex/New Playable Build 0.6.6 by Jasonafex from Patreon Kemono/0.gif").st_size, 1930242) 214 | self.assertEqual(os.stat( 215 | self.tempdir + "Gumroad Help Center/Creating a Product - A Streaming Video Experience by Gumroad Help Center from Gumroad Kemono/Product_Creation_-_Export_1015.mp4").st_size, 58934883) 216 | self.assertEqual(os.stat( 217 | self.tempdir + "Katon Callaway/Topology Tips by Katon Callaway from Gumroad Kemono/topoJoints.mov").st_size, 448251813) 218 | 219 | def test_download_audio_attachments(self) -> None: 220 | """ 221 | Tests downloading audio files 222 | """ 223 | # 3 mp3 224 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, ext_blacklist=None) 225 | self.KMP.routine(unpacked=0, url= 226 | "https://kemono.party/gumroad/user/5563321775917/post/moNG") 227 | 228 | # .sf2, .wav 229 | self.KMP.routine(unpacked=0, url= 230 | "https://kemono.party/gumroad/user/3915675902935/post/NTJQZ") 231 | 232 | self.assertEqual(os.stat(self.tempdir + "Truebones Motions Animation Studios/FREE STAR TREK SOUND FX INCLUDES SOUND FONT and .WAV file formats. by Truebones Motions Animation Studios from Gumroad Kemono/HS_StarTrekFX.sf2").st_size, 807090) 233 | self.assertEqual(os.stat(self.tempdir + "Truebones Motions Animation Studios/FREE STAR TREK SOUND FX INCLUDES SOUND FONT and .WAV file formats. by Truebones Motions Animation Studios from Gumroad Kemono/sf2_smpl.wav").st_size, 806998) 234 | self.assertEqual(os.stat( 235 | self.tempdir + "Gumroad Help Center/A Music Album - Jam time by Gumroad Help Center from Gumroad Kemono/BONUS_TRACK_Cant_Tail_Me_Nothing.mp3").st_size, 6541049) 236 | self.assertEqual(os.stat( 237 | self.tempdir + "Gumroad Help Center/A Music Album - Jam time by Gumroad Help Center from Gumroad Kemono/Tribute_to_1776.mp3").st_size, 6244398) 238 | self.assertEqual(os.stat( 239 | self.tempdir + "Gumroad Help Center/A Music Album - Jam time by Gumroad Help Center from Gumroad Kemono/Why_Am_I_Michael_Bluth.mp3").st_size, 1746643) 240 | self.KMP.close() 241 | 242 | def test_download_zip_attachments(self) -> None: 243 | """ 244 | Tests downloading of a zip file and unzipping of zip files 245 | """ 246 | # Single zip file 247 | self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None) 248 | self.KMP.routine(unpacked=0, url= 249 | "https://kemono.party/gumroad/user/samplescience/post/YeLB") 250 | 251 | size = self.getDirSz(self.tempdir + ( 252 | r"SampleScience Plugins Samples/SampleScience TR-626 HD by SampleScience Plugins Samples. from Gumroad Kemono/SampleScience_TR626_HD")) 253 | self.assertEqual(size, 4509259) 254 | self.KMP.close() 255 | 256 | def test_download_alternate_zip_attachments(self) -> None: 257 | """ 258 | Tests downloading of alternate zip files (7z...) and unzipping 259 | """ 260 | 261 | self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None) 262 | # Single 7z file 263 | # Has been tested in other tests 264 | 265 | # Single .rar file 266 | self.KMP.routine(unpacked=0, url= 267 | "https://kemono.party/gumroad/user/6075196025658/post/lWrr") 268 | size = self.getDirSz(self.tempdir + ( 269 | r"Joe Daniels/jd Flour Sack rig for Maya by Joe Daniels from Gumroad Kemono/jd_floursack")) 270 | self.assertEqual(size, 4878146) 271 | self.KMP.close() 272 | 273 | def test_download_non_file(self) -> None: 274 | """ 275 | Tests downloading an invalid file 276 | """ 277 | # If it don't crash, it pass 278 | self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None) 279 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/8296916/post/59821763") 280 | self.KMP.close() 281 | 282 | def test_download_empty_window(self) -> None: 283 | """ 284 | Tests downloading an artist with no works 285 | """ 286 | # https://kemono.party/gumroad/user/gunwild 287 | self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None) 288 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/8296916/post/59821763") 289 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Gunwild")), 0) 290 | self.KMP.close() 291 | 292 | def test_file_text(self) -> None: 293 | """ 294 | Tests downloading an artist with a link in their files segment 295 | """ 296 | self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None) 297 | 298 | # DNE 299 | self.KMP.routine(unpacked=0, url="https://kemono.party/fanbox/user/305765/post/3885644") 300 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Y.P/NEXT by Y.P from Pixiv Fanbox Kemono/file__text.txt"))) 301 | 302 | # Exists 303 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/30194248") 304 | with open(os.path.join(self.tempdir, "misswarmj/My First NSFW ASMR Video Preview by misswarmj from Patreon Kemono/file__text.txt"), 'r') as fd: 305 | self.assertEqual(fd.read(), "MissWarmJ on Twitter\n\ 306 | \n\ 307 | \n\ 308 | Hey,pls wear your earphone watch till the end! 1:38minuts~ It is the first NSFW #ASMR Preview. Would appriated Mega RT and Likes ^^ https://t.co/mwmfzS0cfb https://t.co/3n7bUYMzRD\n\ 309 | https://twitter.com/misswarmj/status/1176210868121546752\n\ 310 | \ 311 | ____________________________________________________________\n\ 312 | ") 313 | self.KMP.close() 314 | 315 | 316 | def test_download_bulk(self) -> None: 317 | """ 318 | Tests downloading bulk files 319 | """ 320 | temp = os.path.join(self.tempdir, "temp.txt") 321 | self.KMP = KMP(self.tempdir, unzip=True, tcount=6, chunksz=None, ext_blacklist=None) 322 | if os.path.exists(temp): 323 | os.remove(temp) 324 | 325 | 326 | # Single url, all works 327 | with open(temp, 'w+') as fd: 328 | fd.write("https://kemono.party/gumroad/user/5352387105120\n") 329 | fd.flush() 330 | fd.seek(0) 331 | self.KMP.routine(unpacked=0, url=fd.readlines()) 332 | 333 | # Single url, single work 334 | os.remove(temp) 335 | with open(temp, 'w+') as fd: 336 | fd.write("https://kemono.party/gumroad/user/650894809818/post/HskiT\n") 337 | fd.flush() 338 | fd.seek(0) 339 | self.KMP.routine(unpacked=0, url=fd.readlines()) 340 | 341 | # Multi url, all works 342 | os.remove(temp) 343 | with open(temp, 'w+') as fd: 344 | fd.write("https://kemono.party/gumroad/user/5252246151109\n") 345 | fd.write("https://kemono.party/gumroad/user/6100863138065\n") 346 | fd.flush() 347 | fd.seek(0) 348 | self.KMP.routine(unpacked=0, url=fd.readlines()) 349 | 350 | # Multi url, works only 351 | os.remove(temp) 352 | with open(temp, 'w+') as fd: 353 | fd.write("https://kemono.party/gumroad/user/5428435542017/post/wEhslZ\n") 354 | fd.write("https://kemono.party/gumroad/user/863606373292/post/FyiUu\n") 355 | fd.flush() 356 | fd.seek(0) 357 | self.KMP.routine(unpacked=0, url=fd.readlines()) 358 | 359 | # Multi url, mix of works 360 | os.remove(temp) 361 | with open(temp, 'w+') as fd: 362 | fd.write("https://kemono.party/gumroad/user/7331928256471/post/yPvnb\n") 363 | fd.write("https://kemono.party/gumroad/user/3340403173434\n") 364 | fd.write("https://kemono.party/gumroad/user/7331928256471/post/iMbJe\n") 365 | fd.flush() 366 | fd.seek(0) 367 | self.KMP.routine(unpacked=0, url=fd.readlines()) 368 | 369 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Perry Leijten/Maya skinning tools by Perry Leijten from Gumroad Kemono")), 6862522) 370 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Perry Leijten/Samus Rig by Perry Leijten from Gumroad Kemono")), 72827106) 371 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Jabir J3")), 952513) 372 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Kenzie Smith Piano")), 4362578) 373 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "MortMort")), 80470) 374 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Nikolai Mamashev")), 98229752) 375 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Randy Bishop")), 70006225) 376 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Skylleon")), 4246089) 377 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Tatyworks")), 19828495) 378 | os.remove(temp) 379 | self.KMP.close() 380 | 381 | def test_download_polluted_href(self): 382 | """ 383 | Tests downloading files from a source that contains both internally hosted 384 | and externally hosted URLs. 385 | """ 386 | self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None) 387 | self.KMP.routine(unpacked=0, url="https://kemono.party/fanbox/user/3102267/post/3841095") 388 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "mochitaichi/抱き枕カバー用のラフ by mochitaichi from Pixiv Fanbox Kemono")), 3716346) 389 | self.KMP.close() 390 | 391 | def test_extract_same__dest(self): 392 | """ 393 | Tests extracting xzip files to the same directory 394 | """ 395 | # Zip File already exists 396 | 397 | second = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None) 398 | second.routine(unpacked=0, url="https://kemono.party/fanbox/user/646778/post/3474562") 399 | 400 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox Kemono/01basic - Blue Archive Ako.zip"))) 401 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox Kemono/01basic - Blue Archive Ako.zip"))) 402 | size = self.getDirSz(self.tempdir + ( 403 | r"nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox Kemono/cap")) 404 | size2 = self.getDirSz(self.tempdir + ( 405 | r"nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox Kemono/(1)cap")) 406 | self.assertTrue(size == 63149843 or size == 103350378) 407 | self.assertTrue(size2 == 63149843 or size2 == 103350378) 408 | second.close() 409 | 410 | def test_duplicate_file(self): 411 | """ 412 | WARNING: Test takes significant time to complete 413 | 414 | Tests downloading a set of files and redownloading it, no new files 415 | should be added after redownloading 416 | """ 417 | # Download directory 418 | self.KMP = KMP(self.tempdir, unzip=True, tcount=12, chunksz=None, ext_blacklist=None) 419 | self.KMP.routine(unpacked=0, url='https://kemono.party/fanbox/user/39123643?o=25') 420 | 421 | # get size 422 | size = self.getDirSz(self.tempdir + (r"Belko")) 423 | logging.info(size) 424 | # Redownload 425 | self.KMP.reset() 426 | self.KMP.routine(unpacked=0, url='https://kemono.party/fanbox/user/39123643?o=25') 427 | 428 | # Confirm size is unchanged 429 | self.assertEqual(self.getDirSz(self.tempdir + (r"Belko")), size) 430 | self.KMP.close() 431 | 432 | def test_download_dead_image(self): 433 | """ 434 | Tests downloading a dead 3rd party link 435 | """ 436 | self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None) 437 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/22660508") 438 | self.KMP.close() 439 | # If it does not crash, it passes 440 | 441 | def test_download_link_not_file(self): 442 | """ 443 | Tests downloading a page where the download section contains links instead 444 | of files, should be skipped 445 | """ 446 | self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None) 447 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/29891980") 448 | self.KMP.close() 449 | # If it does not crash, it passes 450 | 451 | def test_download_non_image_img(self): 452 | """ 453 | Tests downloading an 'image' that isn't actually an image but a link 454 | """ 455 | self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None) 456 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/16278266") 457 | 458 | self.assertTrue(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon Kemono/0.jpg")) 459 | self.assertTrue(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon Kemono/1.jpg")) 460 | self.assertFalse(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon Kemono/2.jpg")) 461 | self.assertTrue(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon Kemono/3.jpg")) 462 | self.KMP.close() 463 | 464 | def test_post_content(self) -> None: 465 | """ 466 | Tests the downloading of post content 467 | """ 468 | self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None) 469 | 470 | # Empty post content 471 | self.KMP.routine(unpacked=0, url="https://kemono.party/gumroad/user/6033318026591/post/uijUqf") 472 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Motion Ape/Free Bounce Tool for After Effects by Motion Ape from Gumroad Kemono/post__content.txt"))) 473 | 474 | # Plaintext only 475 | self.KMP.routine(unpacked=0, url= 476 | "https://kemono.party/patreon/user/8296916/post/53546555") 477 | with open(os.path.join(self.tempdir, "dreamsavior/Translator Ver 3.7.11 by dreamsavior from Patreon Kemono/post__content.txt"), 'r') as fd: 478 | self.assertEqual(fd.read(), "There is a bug on ver 3.7.9 that prevents some users from installing the addon. This version fixes that bug.\ 479 | \nThis version also fixes a bug that caused Translator++ to write files with ASCII encoding characters despites the translation target being UTF8\ 480 | \nTranslator++ Ver 3.7.11\ 481 | \nUpdate : Renparser Ver 0.7.2\ 482 | \nFix : Renpy : Failed when exporting into a file\ 483 | \nFix : Renpy : Force write encoding into UTF-8 when detected as ASCII\ 484 | \nUpdate : KAGParser 1.2\ 485 | \nFix : KAG : Force write encoding into UTF-8 when detected as ASCII\ 486 | \nFix : Unable to install new addon\ 487 | \nI'm sorry for the inconvenience.\ 488 | \nEnjoy your day, and stay safe.\ 489 | \n(Yesterday I got the news that my mother and my brother were positive for covid)") 490 | 491 | # Links 492 | self.KMP.routine(unpacked=0, url= 493 | "https://kemono.party/patreon/user/8296916/post/52915682") 494 | with open(os.path.join(self.tempdir, "dreamsavior/RPG Maker MZ Is Free on steam by dreamsavior from Patreon Kemono/post__content.txt"), 'r') as fd: 495 | self.assertEqual(fd.read(), "Just a short notice. I'm not affiliated with Kadokawa nor endorsed by them. But \ 496 | when they make they software free ... even though for limited time I need to notice you all. You may need it to \ 497 | do minor editing to translate RMMZ based game.\nhttps://store.steampowered.com/app/1096900/RPG_Maker_MZ/\ 498 | \nhttps://store.steampowered.com/app/1096900/RPG_Maker_MZ/") 499 | 500 | # Images on KMP and on other website 501 | # https://kemono.party/patreon/user/8296916/post/52732723 502 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/8296916/post/52732723") 503 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "dreamsavior/Error File list not found in init file by dreamsavior from Patreon Kemono/0.jpg"))) 504 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "dreamsavior/Error File list not found in init file by dreamsavior from Patreon Kemono/1.png"))) 505 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "dreamsavior/Error File list not found in init file by dreamsavior from Patreon Kemono/2.png"))) 506 | self.KMP.close() 507 | 508 | def test_post_comments(self) -> None: 509 | """ 510 | Tests downloading post comment 511 | """ 512 | self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None) 513 | 514 | # Empty 515 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5442365/post/19064809") 516 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "ZebraCorner/Patreon Rewards Update and follow us on Social Media by ZebraCorner from Patreon Kemono/post__comments.txt"))) 517 | # Gumroad 518 | self.KMP.routine(unpacked=0, url="https://kemono.party/gumroad/user/6033318026591/post/LRWjd") 519 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Motion Ape/Free Folder Structurer Tool for After Effects by Motion Ape from Gumroad Kemono/post__comments.txt"))) 520 | # Fantia 521 | self.KMP.routine(unpacked=0, url="https://kemono.party/fantia/user/53451/post/775490") 522 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "おののいもこ/眼鏡子 by おののいもこ from Fantia Kemono/post__comments.txt"))) 523 | # SubscribeStar TODO 524 | self.KMP.routine(unpacked=0, url="https://kemono.party/subscribestar/user/sleepygimp/post/305785") 525 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "sleepygimp/Nancys Day Off r08Dear Supportersyet again just in t.. by sleepygimp from SubscribeStar Kemono/post__comments.txt"))) 526 | # DL site 527 | self.KMP.routine(unpacked=0, url="https://kemono.party/dlsite/user/RG44418/post/RE243414") 528 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "DLsite/FREE Life with Maid Kurumi-chan At Your Side Binaural by DLsite from DLsite Kemono/post__comments.txt"))) 529 | # Not empty 530 | self.KMP.routine(unpacked=0, url="https://kemono.party/fanbox/user/237083/post/3011863") 531 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Blood Rouge/WIP唾吐きクソビッチと化した金城遙華 by Blood Rouge from Pixiv Fanbox Kemono/post__comments.txt"))) 532 | self.KMP.close() 533 | 534 | def test_broken_url(self): 535 | """ 536 | Tests downloading of a file with bad file extension 537 | """ 538 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=None) 539 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/30194248") 540 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "misswarmj/My First NSFW ASMR Video Preview by misswarmj from Patreon Kemono/0.jpg"))) 541 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "misswarmj/My First NSFW ASMR Video Preview by misswarmj from Patreon Kemono/1.jpg"))) 542 | self.KMP.close() 543 | 544 | def test_discord(self): 545 | """ 546 | Tests downloading discord content 547 | """ 548 | self.KMP = KMP(self.tempdir, unzip=True, tcount=12, chunksz=None, ext_blacklist=None) 549 | self.KMP.routine(unpacked=0, url="https://kemono.party/discord/server/634594002624184360") 550 | 551 | # 100 552 | self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/100円_100yen/discord__content.txt")).st_size, 7845) 553 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/100円_100yen/images")), 75556767) 554 | 555 | # 100(1) 556 | self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/100円_100yen(1)/discord__content.txt")).st_size, 528) 557 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/100円_100yen(1)/images")), 0) 558 | 559 | # 300 560 | self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/300ビール_beer/discord__content.txt")).st_size, 20742) 561 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/300ビール_beer/images")), 277344917) 562 | 563 | # 300 564 | self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/300ビール_beer(1)/discord__content.txt")).st_size, 4011) 565 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/300ビール_beer(1)/images")), 8197562) 566 | 567 | # wip 568 | self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/作業中_wip/discord__content.txt")).st_size, 128463) 569 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/作業中_wip/images")), 487333680) 570 | 571 | self.KMP.close() 572 | 573 | 574 | def test_unpacked(self): 575 | """ 576 | Tests the unpacked download mode 577 | """ 578 | self.KMP = KMP(self.tempdir, unzip=True, tcount=10, chunksz=None, ext_blacklist=None) 579 | 580 | # No works 581 | self.KMP.routine(unpacked=2, url="https://kemono.party/gumroad/user/antijingoist") 582 | 583 | # All works 584 | self.KMP.routine(unpacked=2, url="https://kemono.party/fanbox/user/21587853") 585 | 586 | # Single page of works 587 | self.KMP.routine(unpacked=2, url="https://kemono.party/fanbox/user/144708?o=25") 588 | 589 | # Single work 590 | self.KMP.routine(unpacked=2, url="https://kemono.party/fanbox/user/24164271/post/2934828") 591 | 592 | self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "Abbie Gonzalez")), 0) 593 | self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "ie")), 1) 594 | self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "みこやん")), 144) 595 | self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "めかの工場")), 138) 596 | 597 | self.KMP.close() 598 | 599 | def test_download_undefined_char(self): 600 | """ 601 | Tests downloading a file whose name contains an 602 | undefined char 603 | """ 604 | self.KMP = KMP(self.tempdir, unzip=True, tcount=12, chunksz=None, ext_blacklist=None) 605 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/38223307/post/43447399") 606 | self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "HALkawa501/PSDChina Miku PSDClip Data by HALkawa501 from Patreon Kemono")), 140413258) 607 | self.KMP.close() 608 | 609 | def test_ext_blacklist(self): 610 | """ 611 | Tests downloading files with a blacklist 612 | """ 613 | # Non blacklisted 614 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['psd', 'rando', 'doesnotexists', 'chicken', 'bacon']) 615 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/881792/post/64901768") 616 | self.KMP.close() 617 | 618 | # Some blacklisted 619 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['mp4']) 620 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/881792/post/66258950") 621 | self.KMP.close() 622 | 623 | # All blacklisted 624 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['gif']) 625 | self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/881792/post/63450534") 626 | self.KMP.close() 627 | 628 | self.assertEqual(self.getDirSz(self.tempdir + "diives/Melina SMALL VIDEO WITH AUDIO by diives from Patreon Kemono"), 11884327) 629 | self.assertEqual(self.getDirSz(self.tempdir + "diives/Nat The Bunny NDE by diives from Patreon Kemono"), 1101) 630 | self.assertEqual(self.getDirSz(self.tempdir + "diives/Rya by diives from Patreon Kemono"), 4708568) 631 | 632 | shutil.rmtree(self.tempdir + "diives/Nat The Bunny NDE by diives from Patreon Kemono") 633 | 634 | # All blacklisted with partial unpacking 635 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['gif']) 636 | self.KMP.routine(unpacked=1, url="https://kemono.party/patreon/user/881792/post/63450534") 637 | self.KMP.close() 638 | self.assertEqual(os.stat(self.tempdir + "diives/Nat The Bunny NDE by diives from Patreon Kemono - post__content.txt").st_size, 230) 639 | 640 | 641 | # All blacklisted with unpacking 642 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['gif']) 643 | self.KMP.routine(unpacked=2, url="https://kemono.party/patreon/user/881792/post/63450534") 644 | self.KMP.close() 645 | 646 | self.assertEqual(self.getNumFiles(self.tempdir + "diives"), 2) 647 | 648 | def test_partial_unpacked(self): 649 | """ 650 | Tests partially unpacked download mode 651 | """ 652 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=None) 653 | 654 | # With files 655 | self.KMP.routine(unpacked=1, url="https://kemono.party/fanbox/user/49494721/post/3765544") 656 | 657 | self.assertEqual(self.getDirSz(self.tempdir + "soso\久岐忍 by soso from Pixiv Fanbox Kemono"), 1899290) 658 | 659 | # No files 660 | self.KMP.routine(unpacked=1, url="https://kemono.party/fanbox/user/49494721/post/2082281") 661 | 662 | self.assertTrue(os.path.exists(self.tempdir + "soso/リクエストボックス by soso from Pixiv Fanbox Kemono - post__comments.txt")) 663 | self.assertTrue(os.path.exists(self.tempdir + "soso/リクエストボックス by soso from Pixiv Fanbox Kemono - post__content.txt")) 664 | self.KMP.close() 665 | 666 | def test_partial_unpacked_blacklist(self): 667 | """ 668 | Tests partially unpacking a work that is empty after applying a blacklist 669 | """ 670 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['jpg']) 671 | self.KMP.routine(unpacked=1, url="https://kemono.party/patreon/user/12281898/post/67498846") 672 | self.assertTrue(os.path.exists(self.tempdir + "MANA/WIP by MANA from Patreon Kemono - post__content.txt")) 673 | self.assertFalse(os.path.exists(self.tempdir + "MANA/WIP by MANA from Patreon Kemono")) 674 | 675 | self.KMP.close() 676 | 677 | def test_exclude_posts(self): 678 | """ 679 | Tests excluding posts 680 | """ 681 | # https://kemono.party/fanbox/user/3316400/post/488806 682 | # No exclusions 683 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, post_name_exclusion=[]) 684 | self.KMP.routine("https://kemono.party/fanbox/user/3316400/post/532363", unpacked=None) 685 | self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "MだSたろう\\BRSその2-高画質版2枚 by MだSたろう from Pixiv Fanbox Kemono")), 4) 686 | self.KMP.close() 687 | 688 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, post_name_exclusion=["Nothing"]) 689 | self.KMP.routine("https://kemono.party/fanbox/user/3316400/post/490300", unpacked=None) 690 | self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "MだSたろう\\限定褐色 by MだSたろう from Pixiv Fanbox Kemono")), 4) 691 | self.KMP.close() 692 | 693 | # Exclusions 694 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, post_name_exclusion=["August"]) 695 | self.KMP.routine("https://kemono.party/gumroad/user/trylsc", unpacked=None) 696 | self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "TRYLSC")), 0) 697 | self.KMP.close() 698 | 699 | 700 | def test_exclude_link(self): 701 | """ 702 | Tests excluding links 703 | """ 704 | # No exclusions 705 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, link_name_exclusion=[]) 706 | self.KMP.routine("https://kemono.party/gumroad/user/2986488497406/post/bMhu", unpacked=None) 707 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "burningtides\Phuture Noize - A New Day Remake FLP Presets by burningtides from Gumroad Kemono\Phuture-Noize---A-New-Day-Remake-.zip"))) 708 | self.KMP.close() 709 | 710 | # Some exclusions 711 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, link_name_exclusion=["19","18"]) 712 | self.KMP.routine("https://kemono.party/gumroad/user/5646205703539/post/xIMAi", unpacked=None) 713 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad Kemono\speedsculpt_2_80_v_0_1_19.zip"))) 714 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad Kemono\speedsculpt_2_80_v_0_1_17.zip"))) 715 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad Kemono\speedsculpt_2_80_v_0_1_18.zip"))) 716 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad Kemono\speedsculpt_2_83_v_0_1_20.zip"))) 717 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad Kemono\speedsculpt_2_9_v_0_1_22.zip"))) 718 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad Kemono\speedsculpt_2_79_v_0_1_9.zip"))) 719 | self.KMP.close() 720 | 721 | # All excluded 722 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, link_name_exclusion=["sfm"]) 723 | self.KMP.routine("https://kemono.party/gumroad/user/6791944931428/post/nYFnj", unpacked=None) 724 | self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Bluejuicyjuice\\18 Nidoqueen SFM model by Bluejuicyjuice from Gumroad Kemono\\NidoSFM.7z"))) 725 | self.KMP.close() 726 | 727 | def test_server_name(self): 728 | """ 729 | Tests downloading of server name 730 | """ 731 | # Mp4 and images 732 | self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, download_server_name_type=True) 733 | self.KMP.routine("https://kemono.party/fanbox/user/49494721/post/4072005", unpacked=None) 734 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "soso\胡桃Live2Dアニメ by soso from Pixiv Fanbox Kemono\\d1c15668-08e1-4bea-a1bc-a55d25e59bc3.jpg"))) 735 | self.assertTrue(os.path.exists(os.path.join(self.tempdir, "soso\胡桃Live2Dアニメ by soso from Pixiv Fanbox Kemono\\胡桃_Live2D.mp4"))) 736 | self.KMP.close() 737 | 738 | def test_password_zip(self): 739 | """ 740 | Tests download and extraction of a password 741 | protected zip 742 | """ 743 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, download_server_name_type=True) 744 | self.KMP.routine("https://kemono.party/fanbox/user/262147/post/4062214", unpacked=2) 745 | self.assertFalse(os.path.exists(os.path.abspath("./") + "クレー計20枚パスワード付zip")) 746 | self.KMP.close() 747 | def test_alt_routine(self): 748 | """ 749 | Tests alternate download for all download types 750 | """ 751 | self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None) 752 | # All Artist Page 753 | self.KMP.alt_routine("https://kemono.party/fanbox/user/836862", unpacked=2) 754 | self.assertEquals(self.getNumFiles(os.path.join(self.tempdir, "tsumikisata")), 544) 755 | # Single Artist Page 756 | self.KMP.alt_routine("https://kemono.party/patreon/user/19467060?o=25", unpacked=2) 757 | self.assertEquals(self.getNumFiles(os.path.join(self.tempdir, "katecavanaugh")), 86) 758 | 759 | # Single Artist Work 760 | self.KMP.alt_routine("https://kemono.party/patreon/user/169359/post/27626311", unpacked=2) 761 | self.assertEquals(self.getNumFiles(os.path.join(self.tempdir, "seductionrl")), 8) 762 | 763 | # Discord Channel 764 | # TODO 765 | # Text file 766 | # TODO 767 | def getDirSz(self, dir: str) -> int: 768 | """ 769 | Returns directory and its content size 770 | 771 | Return directory and its content size 772 | """ 773 | size = 0 774 | for dirpath, dirname, filenames in os.walk(dir): 775 | for f in filenames: 776 | fp = os.path.join(dirpath, f) 777 | # skip if it is symbolic link 778 | if not os.path.islink(fp): 779 | size += os.path.getsize(fp) 780 | return size 781 | 782 | 783 | def getNumFiles(self, dir:str) -> int: 784 | """ 785 | Returns the number of files in a directory 786 | 787 | Return number of files in a directory 788 | """ 789 | return len([name for name in os.listdir(dir) if os.path.isfile(os.path.join(dir, name))]) 790 | 791 | 792 | 793 | if __name__ == '__main__': 794 | unittest.main() 795 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 jeff chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LockingCounter.py: -------------------------------------------------------------------------------- 1 | from threading import Lock, Condition 2 | class LockingCounter(): 3 | """ 4 | Represents a thread safe counter with multiple operations 5 | """ 6 | __value:int # Current lock's value 7 | __mutex:Lock # Lock used for counter itself 8 | __cond_list:list # List of conditionals 9 | 10 | def __init__(self, starting:int=0) -> None: 11 | """ 12 | Initializes the counter with starting value and initializes any other 13 | required variables. 14 | 15 | starting: starting value of the counter 16 | """ 17 | self.__value = starting 18 | self.__mutex = Lock() 19 | self.__cond_list = list() 20 | 21 | def toggle(self) -> int: 22 | """ 23 | Increments counter by 1 24 | 25 | Returns: counter value immediately after toggle 26 | """ 27 | self.__mutex.acquire() 28 | self.__value += 1 29 | saved = self.__value 30 | self.__mutex.release() 31 | 32 | # Notify waiting threads 33 | for cond in self.__cond_list: 34 | cond.acquire() 35 | cond.notify() 36 | cond.release() 37 | return saved 38 | 39 | def wait_until(self, target:int) -> None: 40 | """ 41 | Block until target is <= counter 42 | 43 | Args: 44 | target (int): value to block for 45 | """ 46 | # Get current value and see if target has already been met 47 | curr = self.get() 48 | 49 | if curr >= target: 50 | return 51 | 52 | # If not met, block until is met 53 | cond = Condition() 54 | self.__cond_list.append(cond) 55 | cond.acquire() 56 | cond.wait_for(predicate= lambda:self.__compare(target, self.get())) 57 | cond.release() 58 | self.__cond_list.remove(cond) 59 | return 60 | 61 | def __compare(self, i1:int, i2:int) -> bool: 62 | """ 63 | Performs i1 <= i2 and returns the result 64 | 65 | Args: 66 | i1 (int): int 1 67 | i2 (int): int 2 68 | 69 | Returns: 70 | bool: i1 <= i2 is returned 71 | """ 72 | return i1 <= i2 73 | 74 | def get(self) -> int: 75 | """ 76 | Returns counter value 77 | 78 | Returns: 79 | int: counter value 80 | """ 81 | self.__mutex.acquire() 82 | saved = self.__value 83 | self.__mutex.release() 84 | return saved 85 | 86 | def set(self, target:int) -> None: 87 | """ 88 | Set counter to target value 89 | 90 | target (int): value to set counter to 91 | """ 92 | self.__mutex.acquire() 93 | self.__value = target 94 | self.__mutex.release() -------------------------------------------------------------------------------- /LockingCounter_test.py: -------------------------------------------------------------------------------- 1 | import time 2 | import unittest 3 | from LockingCounter import LockingCounter 4 | import threading 5 | 6 | class LockingCounterTestCase(unittest.TestCase): 7 | def setUp(self) -> None: 8 | """ 9 | Create counter starting at 0 10 | """ 11 | self.counter = LockingCounter() 12 | 13 | def test_wait_for(self) -> None: 14 | """ 15 | Tests a single wait_for cycle 16 | """ 17 | # Generate a thread 18 | t1 = threading.Thread(target=self.wait_thread, args=(5,)) 19 | t1.start() 20 | # Keep incrementing until target is met 21 | self.counter.toggle() 22 | self.assertTrue(t1.is_alive()) 23 | self.counter.toggle() 24 | self.assertTrue(t1.is_alive()) 25 | self.counter.toggle() 26 | self.assertTrue(t1.is_alive()) 27 | self.counter.toggle() 28 | self.assertTrue(t1.is_alive()) 29 | self.counter.toggle() 30 | time.sleep(0.1) 31 | self.assertFalse(t1.is_alive()) 32 | 33 | # Generate another thread when target is met 34 | t1 = threading.Thread(target=self.wait_thread, args=(5,)) 35 | self.assertFalse(t1.is_alive()) 36 | t1 = threading.Thread(target=self.wait_thread, args=(0,)) 37 | self.assertFalse(t1.is_alive()) 38 | def wait_thread(self, target:int)->None: 39 | """ 40 | Wait until target is met, to be used with threading 41 | 42 | Args: 43 | target (int): target to wait for 44 | """ 45 | self.counter.wait_until(target) 46 | 47 | if __name__ == '__main__': 48 | unittest.main() -------------------------------------------------------------------------------- /PersistentCounter.py: -------------------------------------------------------------------------------- 1 | class PersistentCounter(): 2 | """ 3 | A counter but an object instead of an int 4 | """ 5 | __value:int # Current lock's value 6 | 7 | def __init__(self, starting:int=0) -> None: 8 | """ 9 | Initializes the counter with starting value 10 | 11 | starting: starting value of the counter 12 | """ 13 | self.__value = starting 14 | 15 | def toggle(self) -> int: 16 | """ 17 | Increments counter by 1 18 | 19 | Returns: counter value immediately after toggle 20 | """ 21 | 22 | self.__value += 1 23 | return self.__value 24 | 25 | def get(self) -> int: 26 | """ 27 | Returns the counter's value 28 | 29 | Returns: 30 | int: counter value 31 | """ 32 | return self.__value 33 | 34 | def set(self, target:int) -> None: 35 | """ 36 | Sets the counter's value 37 | 38 | Args: 39 | target (int): counter's new value 40 | """ 41 | self.__value = target -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KMPDownloader 2 | Simple Kemono.party downloader with QOL features. 3 | 4 | Functionality not guaranteed until 1.0, There are known bugs! 5 | Can download everything from Files, save text and links in Content, and everything in Downloads. Can be set to automatically unzip files if they contain no password. 6 | 7 | ![Screenshot 2022-05-17 114434 PNG](https://user-images.githubusercontent.com/78765964/168853513-b5b14b98-430f-4437-b63b-08ea93ddf014.jpg) 8 | 9 | ## Current Features 10 | View changelog for more details on features not included here. 11 | - All services supported (Patreon, Pixiv Fanbox, Gumroad, SubscribeStar, DLSite, Fantia, Discord). 12 | - Can download a single artist work, all artist works, or a single page of artist works. 13 | - Download all files and any downloads in high resolution and correct extension. 14 | - Automatic file unzipping for .7z, .zip, and .rar files. 15 | - Extraction of a work's content and comments. 16 | - High degree of control over downloads. Includes blacklisting file extensions, posts with certain keywords, omittion of certain download items, and much more! 17 | - Queuing system, download multiple URLs without user input 18 | - Multhreading support, significant download speed bonus. 19 | - Ease of use, cookies are for eating only! 20 | - Automatically artist work updates. 21 | 22 | 23 | ## Instructions: 24 | **Need in depth details or want to view all features? Please visit the [wiki](https://github.com/Jeffchen54/KMP-Kemono-Downloader/wiki)!** 25 | 26 | Download Python >=3.10 27 | 28 | - Run install_requirements.bat. 29 | - Install 7z and add it to your Window's Path. Line should be in the format "C:\Users\chenj\Downloads\7-Zip" 30 | - Run in your favorite command line software. Call "venv/Scripts/Activate" before running the program. 31 | - Read the command line arguments for instructions on how to run. 32 | - Enjoy! 33 | -------------------------------------------------------------------------------- /Threadpool.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | import queue 4 | """ 5 | Simple task sharing threadpool. Handles fully generic tasks of any kind. 6 | No way to get return values so tasks that returns None are the only functions 7 | supported! 8 | 9 | Author: Jeff Chen 10 | Last modified: 5/23/2022 11 | """ 12 | tname = threading.local() # TLV for thread name 13 | 14 | class Kill_Queue(): 15 | """ 16 | Queue with a built in kill switch with sem == # of available items, 17 | to be used in multithreading 18 | """ 19 | __queue:queue.Queue 20 | __kill:bool # Kill switch for downThreads 21 | __tasks:any # Avalible downloadable resource device 22 | 23 | def __init__(self) -> None: 24 | """ 25 | Create queue and set kill to false 26 | """ 27 | self.__queue = queue.Queue(-1) 28 | self.__kill = False 29 | self.__tasks = threading.Semaphore(0) 30 | 31 | def kill(self) -> None: 32 | """ 33 | Turns kill switch on 34 | """ 35 | self.__kill = True 36 | 37 | def revive(self) -> None: 38 | """ 39 | Turn kill switch off 40 | """ 41 | self.__kill = False 42 | 43 | def status(self) -> bool: 44 | """ 45 | Reports if the queue is dead or alive 46 | 47 | Return: True if dead, False if alive 48 | """ 49 | return self.__kill 50 | 51 | def enqueue(self, task:any) -> None: 52 | """ 53 | Put an item in the queue 54 | """ 55 | self.__queue.put(task) 56 | self.__tasks.release() 57 | 58 | def acquire_resource(self) -> None: 59 | """ 60 | Decrement semaphore keeping track of queue items 61 | """ 62 | self.__tasks.acquire() 63 | 64 | def release_resource(self) -> None: 65 | """ 66 | Increment semaphore keeping track of queue items. 67 | Does not need to be called after enqueue as it 68 | increments the semaphore automatically 69 | """ 70 | self.__tasks.release() 71 | 72 | def dequeue(self) -> any: 73 | """ 74 | Removes an item 75 | 76 | Pre: acquire_resource was called first 77 | Return item in front of the queue 78 | """ 79 | return self.__queue.get() 80 | 81 | def task_done(self) -> None: 82 | """ 83 | Indicates queue task was completed 84 | 85 | Pre: dequeue was called, thread task was completed 86 | """ 87 | self.__queue.task_done() 88 | 89 | def join_queue(self) -> None: 90 | """ 91 | Blocks until all task queue items have been processed 92 | """ 93 | self.__queue.join() 94 | 95 | def get_qsize(self) -> int: 96 | """ 97 | Get queue size (unreliable) 98 | 99 | Return: queue size 100 | """ 101 | return self.__queue.qsize() 102 | 103 | 104 | 105 | class ThreadPool(): 106 | """ 107 | Very basic task sharing threadpool, does not support futures. 108 | Thread local variables: 109 | tname.name: Thread name 110 | tname.id: thread id 111 | 112 | """ 113 | # Download task queue, Contains tuples in the structure: (func(),(args1,args2,...)) 114 | __task_queue:Kill_Queue 115 | __threads:list # List of threads in the threadpool 116 | __tcount:int # Number of threads 117 | __alive:bool # Checks if the threadpool is alive 118 | 119 | def __init__(self, tcount:int) -> None: 120 | """ 121 | Initializes a threadpool 122 | 123 | Param: 124 | tcount: Number of threads for the threadpool 125 | """ 126 | self.__task_queue = Kill_Queue() 127 | self.__tcount = tcount 128 | self.__alive = False 129 | 130 | def start_threads(self) -> None: 131 | """ 132 | Creates count number of downThreads and starts it 133 | 134 | Param: 135 | count: how many threads to create 136 | Return: Threads 137 | """ 138 | self.__threads = [] 139 | # Spawn threads 140 | for i in range(0, self.__tcount): 141 | self.__threads.append(ThreadPool.TaskThread(i, self.__task_queue)) 142 | self.__threads[i].start() 143 | self.__alive = True 144 | logging.debug(str(self.__tcount) + " threads have been started") 145 | 146 | def kill_threads(self) -> None: 147 | """ 148 | Kills all threads in threadpool. Threads are restarted and killed using a 149 | switch, deadlocked or infinitely running threads cannot be killed using 150 | this function. 151 | """ 152 | self.__task_queue.kill() 153 | 154 | for i in range(0, len(self.__threads)): 155 | self.__task_queue.release_resource() 156 | 157 | for i in self.__threads: 158 | i.join() 159 | self.__alive = False 160 | self.__task_queue.revive() 161 | logging.debug(str(len(self.__threads)) + " threads have been terminated") 162 | 163 | def enqueue(self, task:tuple) -> None: 164 | """ 165 | Put an item in task queue 166 | 167 | Param: 168 | task: tuple in the structure (func(),(args1,args2,...)) 169 | """ 170 | logging.debug("Enqueued into task queue: " + str(task)) 171 | self.__task_queue.enqueue(task) 172 | 173 | def enqueue_queue(self, task_list:queue.Queue) -> None: 174 | """ 175 | Put an queue in task queue. Each queue element will be 'get()' and then 176 | task_done() 177 | 178 | Param: 179 | task_list: queue of task tuples following the structure (func(),(args1,args2,...)) 180 | """ 181 | logging.debug("Enqueued into task queue: " + str(task_list)) 182 | size = task_list.qsize() 183 | for i in range(0,size): 184 | self.__task_queue.enqueue(task_list) 185 | 186 | def join_queue(self) -> None: 187 | """ 188 | Blocks until all task queue items have been processed 189 | """ 190 | logging.debug("Blocking until all tasks are complete") 191 | self.__task_queue.join_queue() 192 | 193 | def get_qsize(self) -> int: 194 | """ 195 | Get queue size (unreliable) 196 | 197 | Return: task queue size 198 | """ 199 | return self.__task_queue.get_qsize() 200 | 201 | def get_status(self) -> bool: 202 | """ 203 | Check if the threadpool is alive 204 | 205 | Return: True if alive, false if not 206 | """ 207 | return self.__alive 208 | class TaskThread(threading.Thread): 209 | """ 210 | Fully generic threadpool where tasks of any kind is stored and retrieved in task_queue, 211 | threads are daemon threads and can be killed using kill variable. 212 | """ 213 | __id: int 214 | __task_queue:Kill_Queue 215 | 216 | def __init__(self, id: int, task_queue:Kill_Queue) -> None: 217 | """ 218 | Initializes thread with a thread name 219 | Param: 220 | id: thread identifier 221 | task_queue: Queue to get tasks from 222 | tasks: Semaphore assoaciated with task queue 223 | """ 224 | self.__id = id 225 | self.__task_queue = task_queue 226 | super(ThreadPool.TaskThread, self).__init__(daemon=True) 227 | 228 | def run(self) -> None: 229 | """ 230 | Worker thread job. Blocks until a task is avalable via downloadables 231 | and retreives the task from download_queue 232 | """ 233 | tname.name = "Thread #" + str(self.__id) 234 | tname.id = self.__id 235 | while True: 236 | # Wait until download is available 237 | self.__task_queue.acquire_resource() 238 | 239 | # Check kill signal 240 | if self.__task_queue.status(): 241 | logging.debug(tname.name + " has terminated") 242 | return 243 | 244 | # Pop queue and download it 245 | todo = self.__task_queue.dequeue() 246 | 247 | # If dequeued element is a queue, we process it like its our queue 248 | if type(todo) is queue.Queue: 249 | monitored_todo = todo.get() 250 | logging.debug(tname.name + " (From SubQueue) Processing: " + str(monitored_todo)) 251 | monitored_todo[0](*monitored_todo[1]) 252 | todo.task_done() 253 | # Else, process the task directly 254 | else: 255 | logging.debug(tname.name + " Processing: " + str(todo)) 256 | todo[0](*todo[1]) 257 | self.__task_queue.task_done() -------------------------------------------------------------------------------- /example.txt: -------------------------------------------------------------------------------- 1 | https://kemono.party/patreon/user/572297/post/64470250 2 | 3 | https://kemono.party/patreon/user/572297?o=50 -------------------------------------------------------------------------------- /install_requirements.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | python -m venv venv && ^ 4 | venv\Scripts\activate && ^ 5 | pip install -r requirements.txt && ^ 6 | move /Y "patch for patoolib\*" venv\Lib\site-packages\patoolib\programs -------------------------------------------------------------------------------- /jutils.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | 4 | """ 5 | Misc helpful utils, mainly related to File IO 6 | 7 | @author Jeff Chen 8 | @version 6/15/2022 9 | """ 10 | 11 | def write_utf8(text:str, path:str, mode:str) -> None: 12 | """ 13 | Writes utf-8 text to a file at path 14 | 15 | Param: 16 | text: text to write 17 | path: where file to write to is located including file name 18 | mode: mode to set FIle IO 19 | """ 20 | with io.open(path, mode=mode, encoding='utf-8') as fd: 21 | fd.write(text) 22 | 23 | 24 | def write_to_file(path:str, line: str, mutex) -> None: 25 | """ 26 | Appends to a file, creates the file if it does not exists 27 | 28 | Param: 29 | path: file to write to, absolute path 30 | line: line to append to file 31 | mutex: (Optional) mutex lock associated with the file 32 | """ 33 | if mutex: 34 | mutex.acquire() 35 | 36 | write_utf8(line, path, 'a') 37 | #if not os.path.exists(path): 38 | # open(path, 'a').close() 39 | 40 | #with open(fname, "a") as myfile: 41 | # myfile.write(line) 42 | 43 | 44 | if mutex: 45 | mutex.release() 46 | 47 | 48 | def getDirSz(dir: str) -> int: 49 | """ 50 | Returns directory and its content size 51 | 52 | Return directory and its content size 53 | """ 54 | size = 0 55 | for dirpath, dirname, filenames in os.walk(dir): 56 | for f in filenames: 57 | fp = os.path.join(dirpath, f) 58 | # skip if it is symbolic link 59 | if not os.path.islink(fp): 60 | size += os.path.getsize(fp) 61 | return size -------------------------------------------------------------------------------- /patch for patoolib/p7rzip.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2016 Bastian Kleineidam 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | """Archive commands for the 7zr program. 17 | 18 | 7zr is a light executable supporting only the 7z archive format. 19 | """ 20 | 21 | from .p7zip import create_7z 22 | 23 | def extract_7z(archive, compression, cmd, verbosity, interactive, outdir): 24 | """Extract a 7z archive.""" 25 | cmdlist = [cmd, 'x'] 26 | if not interactive: 27 | cmdlist.append('-y') 28 | cmdlist.extend(['-o%s' % outdir, '--', archive]) 29 | return cmdlist 30 | 31 | def list_7z(archive, compression, cmd, verbosity, interactive): 32 | """List a 7z archive.""" 33 | cmdlist = [cmd, 'l'] 34 | if not interactive: 35 | cmdlist.append('-y') 36 | cmdlist.extend(['--', archive]) 37 | return cmdlist 38 | 39 | def test_7z(archive, compression, cmd, verbosity, interactive): 40 | """Test a 7z archive.""" 41 | cmdlist = [cmd, 't'] 42 | if not interactive: 43 | cmdlist.append('-y') 44 | cmdlist.extend(['--', archive]) 45 | return cmdlist -------------------------------------------------------------------------------- /patch for patoolib/p7zip.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (C) 2010-2015 Bastian Kleineidam 3 | # 4 | # This program is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | """Archive commands for the 7z program.""" 17 | 18 | def extract_7z(archive, compression, cmd, verbosity, interactive, outdir): 19 | """Extract a 7z archive.""" 20 | cmdlist = [cmd, 'x'] 21 | if not interactive: 22 | cmdlist.extend(['-p-', '-y']) 23 | cmdlist.extend(['-o%s' % outdir, '--', archive]) 24 | return cmdlist 25 | 26 | def extract_7z_singlefile(archive, compression, cmd, verbosity, interactive, outdir): 27 | """Extract a singlefile archive (eg. gzip or bzip2) with '7z e'. 28 | This makes sure a single file and no subdirectories are created, 29 | which would cause errors with patool repack.""" 30 | cmdlist = [cmd, 'e'] 31 | if not interactive: 32 | cmdlist.extend(['-p-', '-y']) 33 | cmdlist.extend(['-o%s' % outdir, '--', archive]) 34 | return cmdlist 35 | 36 | extract_bzip2 = \ 37 | extract_gzip = \ 38 | extract_compress = \ 39 | extract_xz = \ 40 | extract_lzma = \ 41 | extract_7z_singlefile 42 | 43 | extract_zip = \ 44 | extract_rar = \ 45 | extract_cab = \ 46 | extract_arj = \ 47 | extract_cpio = \ 48 | extract_rpm = \ 49 | extract_deb = \ 50 | extract_iso = \ 51 | extract_vhd = \ 52 | extract_7z 53 | 54 | def list_7z (archive, compression, cmd, verbosity, interactive): 55 | """List a 7z archive.""" 56 | cmdlist = [cmd, 'l'] 57 | if not interactive: 58 | cmdlist.extend(['-p-', '-y']) 59 | cmdlist.extend(['--', archive]) 60 | return cmdlist 61 | 62 | list_bzip2 = \ 63 | list_gzip = \ 64 | list_zip = \ 65 | list_compress = \ 66 | list_rar = \ 67 | list_cab = \ 68 | list_arj = \ 69 | list_cpio = \ 70 | list_rpm = \ 71 | list_deb = \ 72 | list_iso = \ 73 | list_xz = \ 74 | list_lzma = \ 75 | list_vhd = \ 76 | list_7z 77 | 78 | 79 | def test_7z (archive, compression, cmd, verbosity, interactive): 80 | """Test a 7z archive.""" 81 | cmdlist = [cmd, 't'] 82 | if not interactive: 83 | cmdlist.extend(['-p-', '-y']) 84 | cmdlist.extend(['--', archive]) 85 | return cmdlist 86 | 87 | test_bzip2 = \ 88 | test_gzip = \ 89 | test_zip = \ 90 | test_compress = \ 91 | test_rar = \ 92 | test_cab = \ 93 | test_arj = \ 94 | test_cpio = \ 95 | test_rpm = \ 96 | test_deb = \ 97 | test_iso = \ 98 | test_xz = \ 99 | test_lzma = \ 100 | test_vhd = \ 101 | test_7z 102 | 103 | 104 | def create_7z(archive, compression, cmd, verbosity, interactive, filenames): 105 | """Create a 7z archive.""" 106 | cmdlist = [cmd, 'a'] 107 | if not interactive: 108 | cmdlist.append('-y') 109 | cmdlist.extend(['-t7z', '-mx=9', '--', archive]) 110 | cmdlist.extend(filenames) 111 | return cmdlist 112 | 113 | 114 | def create_zip(archive, compression, cmd, verbosity, interactive, filenames): 115 | """Create a ZIP archive.""" 116 | cmdlist = [cmd, 'a'] 117 | if not interactive: 118 | cmdlist.append('-y') 119 | cmdlist.extend(['-tzip', '-mx=9', '--', archive]) 120 | cmdlist.extend(filenames) 121 | return cmdlist 122 | 123 | 124 | def create_xz(archive, compression, cmd, verbosity, interactive, filenames): 125 | """Create an XZ archive.""" 126 | cmdlist = [cmd, 'a'] 127 | if not interactive: 128 | cmdlist.append('-y') 129 | cmdlist.extend(['-txz', '-mx=9', '--', archive]) 130 | cmdlist.extend(filenames) 131 | return cmdlist 132 | 133 | 134 | def create_gzip(archive, compression, cmd, verbosity, interactive, filenames): 135 | """Create a GZIP archive.""" 136 | cmdlist = [cmd, 'a'] 137 | if not interactive: 138 | cmdlist.append('-y') 139 | cmdlist.extend(['-tgzip', '-mx=9', '--', archive]) 140 | cmdlist.extend(filenames) 141 | return cmdlist 142 | 143 | 144 | def create_bzip2(archive, compression, cmd, verbosity, interactive, filenames): 145 | """Create a BZIP2 archive.""" 146 | cmdlist = [cmd, 'a'] 147 | if not interactive: 148 | cmdlist.append('-y') 149 | cmdlist.extend(['-tbzip2', '-mx=9', '--', archive]) 150 | cmdlist.extend(filenames) 151 | return cmdlist -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jeffchen54/KMP-Kemono-Downloader/79bf1945f34a4243d4dbcd4a68562acb74ea34d2/requirements.txt -------------------------------------------------------------------------------- /user-agent.txt: -------------------------------------------------------------------------------- 1 | Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0 -------------------------------------------------------------------------------- /zipextracter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import shutil 4 | import tempfile 5 | import patoolib 6 | from patoolib import util 7 | import sys 8 | 9 | import jutils 10 | """ 11 | Extracts files using patoolib 12 | 13 | @author Jeff chen 14 | @version 6/15/2022 15 | """ 16 | 17 | def supported_zip_type(fname:str) -> bool: 18 | """ 19 | Checks if a file is a zip file (7z, zip, rar) 20 | 21 | Param: 22 | fname: zip file name or path 23 | Return True if zip file, false if not 24 | """ 25 | file = fname.rpartition('/')[2] 26 | extension = file.rpartition('.')[2] 27 | return 'zip' == extension or 'rar' == extension or '7z' == extension 28 | 29 | def extract_zip(zippath: str, destpath: str, temp:bool) -> bool: 30 | """ 31 | Extracts a zip file to a destination. Does nothing if file 32 | is password protected. Zipfile is deleted if extraction is 33 | successful 34 | 35 | Cases: 36 | (1) Unzip directory does not exists -> Extract to directory 37 | (2) Unzip directory exists but size does not match extracted dir -> Extracted dir prepended with (#) 38 | (3) Unzip directory exists and size matches -> Counted as duplicate and skips unzip directory 39 | (4) Extracted files conflict with existing files -> Same as case 2 40 | # Unzip directory refers to directories within a zip folder, for example 41 | test.zip 42 | -> animals 43 | -> dog1.jpg 44 | -> cat1.png 45 | 46 | animals is a directory, this is an unzip directory 47 | 48 | Param: 49 | unzip: full path to zip file included zip file itself 50 | destpath: full path to destination 51 | temp: True to extract to a temp dir then moving the files to destpath, false to extract 52 | directly to destpath. TODO implement. 53 | Pre: Is a zip file, can be checked using supported_zip_type(). destpath exists 54 | Return: True on success, false on failure 55 | """ 56 | destpath += '\\' 57 | backup_destpath = destpath 58 | 59 | # A tempdir is used to bypass Window's 255 char limit when unzipping files 60 | with tempfile.TemporaryDirectory(prefix="temp") as dirpath: 61 | dirpath += '\\' 62 | try: 63 | patoolib.extract_archive(zippath, outdir=dirpath + '\\', verbosity=-1, interactive=False) 64 | 65 | for f in os.listdir(dirpath): 66 | if os.path.isdir(os.path.abspath(dirpath + f)): 67 | # Duplicate file name handler 68 | downloaded = False 69 | while not downloaded: 70 | try: 71 | shutil.copytree(os.path.abspath(dirpath + f), os.path.abspath(destpath + f), dirs_exist_ok=False) 72 | downloaded = True 73 | except FileExistsError as e: 74 | # If duplicate dir is found, it will be stashed in the same dir but with (n) prepended 75 | counter = 1 76 | nextName = e.filename 77 | currSz = jutils.getDirSz(os.path.abspath(dirpath + f.replace("\\", ""))) 78 | # Check directory size of dirpath vs destpath, if same size, we are done 79 | done = False 80 | while(not done): 81 | # If the next directory does not exists, change destpath to that directory 82 | if not os.path.exists(nextName): 83 | destpath = nextName 84 | done = True 85 | else: 86 | # If directory with same size is found, we are done 87 | dirsize = jutils.getDirSz(nextName) 88 | if dirsize == currSz: 89 | done = True 90 | downloaded = True 91 | 92 | # Adjust path for next iteration 93 | if not done: 94 | nextName = destpath + '(' + str(counter) + ')' 95 | counter += 1 96 | 97 | # Move files from dupe directory to new directory 98 | shutil.rmtree(os.path.abspath(dirpath + f), ignore_errors=True) 99 | 100 | else: 101 | shutil.copy(os.path.abspath(dirpath + f), os.path.abspath(destpath + f)) 102 | os.remove(os.path.abspath(dirpath + f)) 103 | 104 | # Reset destpath as it may have been modified due to dupe files 105 | destpath = backup_destpath 106 | 107 | os.remove(zippath) 108 | return True 109 | except util.PatoolError as e: 110 | logging.critical("Unzipping a non zip file has occured or password protected file, failure is described below:" + 111 | "\n + ""File name: " + zippath + "\n" + "File size: " + str(os.stat(zippath).st_size)) 112 | logging.critical(e) 113 | d = os.listdir(destpath) 114 | if len(d) == 0: 115 | os.rmdir(destpath) 116 | except RuntimeError: 117 | logging.debug("File name: " + zippath + "\n" + 118 | "File size: " + str(os.stat(zippath).st_size)) 119 | return False 120 | def main(): 121 | if supported_zip_type(sys.argv[1]): 122 | extract_zip(os.path.abspath(sys.argv[1]), os.path.abspath("./testing") + '\\', True) 123 | else: 124 | print("Is not ZIP -> " + sys.argv[1]) 125 | if __name__ == "__main__": 126 | main() --------------------------------------------------------------------------------