├── .github
    └── FUNDING.yml
├── DB.py
├── DiscordtoJson.py
├── DiscordtoJson_test.py
├── HashTable.py
├── KMPDownloader.py
├── KMPDownloader_test.py
├── LICENSE
├── LockingCounter.py
├── LockingCounter_test.py
├── PersistentCounter.py
├── README.md
├── Threadpool.py
├── example.txt
├── install_requirements.bat
├── jutils.py
├── patch for patoolib
    ├── p7rzip.py
    └── p7zip.py
├── requirements.txt
├── user-agent.txt
└── zipextracter.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: jeffchen54
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/DB.py:
--------------------------------------------------------------------------------
  1 | import sqlite3
  2 | from sqlite3 import Connection, Cursor
  3 | from threading import Lock
  4 | import threading
  5 | 
  6 | class DB():
  7 |     __db_name:str
  8 |     __connection:Connection
  9 |     __cursor:Cursor
 10 |     __lock:Lock
 11 |     
 12 |     def __init__(self, db_name:str) -> None:
 13 |         """
 14 |         Creates a database or reopens a database if a database
 15 |         with the provided name already exists.
 16 |         
 17 |         Args:
 18 |             db_name (str): name of the database
 19 |         Pre: db_name must have .db or other suffixes, some exceptions such as 
 20 |                 memory only database exists
 21 |         """
 22 |         self.__db_name = db_name
 23 |         self.__connection = sqlite3.connect(db_name, check_same_thread=False)
 24 |         self.__cursor = self.__connection.cursor()
 25 |         self.__lock = threading.Lock()
 26 |     
 27 |     def execute(self, cmd:str|tuple) -> any:
 28 |         """
 29 |         Thread safe; Executes a command, if the command returns something,
 30 |         it will be returned.
 31 | 
 32 |         Args:
 33 |             cmd (str): sql command
 34 |         Returns: anything the cmd returns
 35 |         """
 36 |         self.__lock.acquire()
 37 |         
 38 |         if(isinstance(cmd, str)):
 39 |             content = self.__cursor.execute(cmd)
 40 |         
 41 |         else:
 42 |             content = self.__cursor.execute(*cmd)
 43 |         self.__lock.release()
 44 |         
 45 |         return content
 46 |     
 47 |     def executeBatch(self, cmds:list[str|tuple]) -> any:
 48 |         """
 49 |         Thread safe; Executes batch commands, if the command returns something,
 50 |         it will be returned.
 51 | 
 52 |         Args:
 53 |             cmd (str): sql command
 54 |         Returns: list containing anything the cmd returns
 55 |         """
 56 |         self.__lock.acquire()
 57 |         content = list[len(cmds)]
 58 |         for i, item in enumerate(cmds):
 59 |             if(isinstance(item, str)):
 60 |                 content = self.__cursor.execute(item)
 61 |             else:
 62 |                 content[i] = self.__cursor.execute(*item)
 63 |         self.__lock.release()
 64 |         
 65 |         return content
 66 |     
 67 |     def executeNCommit(self, cmd:str|tuple) -> any:
 68 |         """
 69 |         Thread safe; Executes a command, if the command returns something,
 70 |         it will be returned. 
 71 |         
 72 |         Commit is done after the opertion has been completed
 73 | 
 74 |         Args:
 75 |             cmd (str): sql command
 76 |         Returns: anything the cmd returns
 77 |         """
 78 |         self.__lock.acquire()
 79 |         
 80 |         if(isinstance(cmd, str)):
 81 |             content = self.__cursor.execute(cmd)
 82 |         
 83 |         else:
 84 |             content = self.__cursor.execute(*cmd)
 85 |         self.__connection.commit()
 86 |         self.__lock.release()
 87 |         
 88 |         return content
 89 |     
 90 |     def executeBatchNCommit(self, cmds:list[str|tuple]) -> any:
 91 |         """
 92 |         Thread safe; Executes a command, if the commands returns something,
 93 |         it will be returned. 
 94 |         
 95 |         Commit is done after the opertion has been completed
 96 | 
 97 |         Args:
 98 |             cmd (str): sql command
 99 |         Returns: list of anything anything the cmd returns
100 |         """
101 |         self.__lock.acquire()
102 |         content = list[len(cmds)]
103 |         for i, item in enumerate(cmds):
104 |             if(isinstance(item, str)):
105 |                 content = self.__cursor.execute(item)
106 |             else:
107 |                 content[i] = self.__cursor.execute(*item)
108 |         self.__connection.commit()
109 |         self.__lock.release()
110 |         
111 |         return content
112 |     
113 |     def commit(self) -> None:
114 |         """
115 |         Thread safe; Commits unsaved changes.
116 |         """
117 |         self.__lock.acquire()
118 |         self.__connection.commit()
119 |         self.__lock.release()
120 |     
121 |     def closeNOpen(self)->None:
122 |         """
123 |         Closes and reopens the database connection
124 |         """
125 |         self.__connection.close()
126 |         self.__connection = sqlite3.connect(self.__db_name, check_same_thread=False)
127 |         self.__cursor = self.__connection.cursor()
128 |     
129 |     
130 |     def close(self)->None:
131 |         """
132 |         Closes the database connection
133 |         """
134 |         self.__connection.close()
135 |         


--------------------------------------------------------------------------------
/DiscordtoJson.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple JSON scraper for Kemono.party discord content.
  3 | 
  4 | @author: Jeff Chen
  5 | @last modified: 8/25/2022
  6 | """
  7 | import time
  8 | from cfscrape import CloudflareScraper
  9 | import logging
 10 | import requests.adapters
 11 | from Threadpool import ThreadPool
 12 | from threading import Semaphore
 13 | from threading import Lock
 14 | import cfscrape
 15 | 
 16 | 
 17 | 
 18 | DISCORD_LOOKUP_API = "https://www.kemono.su/api/v1/discord/channel/lookup/"
 19 | DISCORD_CHANNEL_CONTENT_PRE_API = "https://www.kemono.su/api/v1/discord/channel/"
 20 | DISCORD_CHANNEL_CONTENT_SUF_API = "?o="
 21 | DISCORD_CHANNEL_CONTENT_SKIP_INCRE = 150
 22 | HEADERS={'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36'}
 23 | 
 24 | 
 25 | class DiscordToJson():
 26 |     """
 27 |     Utility functions used for scraping Kemono Party's Discord to Json data.
 28 |     Offers functions for scrapping Discord sub channel IDs and scraping the channels themselves.
 29 |     """
 30 |     __recent:dict = None
 31 |     def discord_lookup(self, discordID:str, scraper:CloudflareScraper) -> dict:
 32 |         """
 33 |         Looks up a discord id using Kemono.party's API and returns 
 34 |         the result in JSON format
 35 | 
 36 |         Param: 
 37 |             discordID: ID of discord channel to grab channel IDs from
 38 |             scraper: Scraper to use while scraping kemono 
 39 |         Return: channelIDs in JSON format
 40 |         """
 41 |         # Link URL
 42 |         url = DISCORD_LOOKUP_API + discordID
 43 |         
 44 |         # Grab data
 45 |         data = None
 46 |         while not data:
 47 |             try:
 48 |                 data = scraper.get(url, timeout=5, headers=HEADERS)
 49 |             except(requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout):
 50 |                 logging.debug("Connection error, retrying")
 51 |                 time.sleep(1)
 52 | 
 53 |         # Convert data
 54 |         js = data.json()
 55 |         logging.debug("Received " + str(js) + " from " + url)
 56 | 
 57 |         # Return json
 58 |         return js
 59 | 
 60 |     def discord_lookup_all(self, channelID:str|None, threads:int=6, sessions:list=None)->dict|list:
 61 |         """
 62 |         Similar to discord_channel_lookup() but processes everything, not just in segments.
 63 |         NOTE: will take a significant amount of time if discord channel is of considerable size
 64 |         
 65 |         Param:
 66 |             threads: Number of threads to use while looking up js
 67 |             sessions: list of sessions used when scraping, size must be >= threads
 68 |         """
 69 |         
 70 |         # Grab data
 71 |         js_buff = []
 72 | 
 73 |         # Generate threads and threading vars
 74 |         pool = ThreadPool(threads)
 75 |         pool.start_threads()
 76 |         js_buff_lock = Lock()
 77 |         main_sem = Semaphore(0)
 78 |         
 79 |         # Generate sessions for each thread
 80 |         if sessions:
 81 |             assert(len(sessions) >= threads)
 82 |         else:
 83 |             sessions = [cfscrape.create_scraper(requests.Session())] * threads
 84 |             adapters = [requests.adapters.HTTPAdapter(pool_connections=1, pool_maxsize=1, max_retries=0, pool_block=True)] * threads
 85 |             [session.mount('http://', adapter) for session,adapter in zip(sessions,adapters)]
 86 |         
 87 |         # Loop until no more data left
 88 |         [pool.enqueue((self.__discord_lookup_thread_job, (threads, DISCORD_CHANNEL_CONTENT_SKIP_INCRE, i * DISCORD_CHANNEL_CONTENT_SKIP_INCRE, channelID, sessions[i], main_sem, js_buff, js_buff_lock, pool)))\
 89 |             for i in range(0, threads)]
 90 | 
 91 |         
 92 |         # Sleep until done
 93 |         main_sem.acquire()
 94 |         
 95 |         # Kill threads
 96 |         pool.join_queue()
 97 |         pool.kill_threads()
 98 |         
 99 |         # Kill all adapters
100 |         [session.close() for session in sessions]
101 |         
102 |         # Return json
103 |         return js_buff
104 |     
105 |     def __discord_lookup_thread_job(self, tcount:int, skip:int, curr:int, channelID:str, scraper:CloudflareScraper, main_sem:Semaphore, js_buff:list, js_buff_lock:Lock, pool:ThreadPool) -> None:
106 |         """
107 |         Thread job for worker threads in discord_lookup_all. Processes a segment of 
108 |         data then sends its next segment into thread queue
109 |         
110 |         Param:
111 |             tcount: number of threads used within threadpool. 
112 |             main_sem: Semaphore used to wake up main thread
113 |             skip: skip amount to access next page of content, will be the same for all threads
114 |             curr: current skip number
115 |             channelID: Discord channel id
116 |             scraper: scraper to be used to scrape js
117 |             js_buff: list used to store stuff
118 |             js_buff_lock: lock for js_buff
119 |             pool: Threadpool used for this function
120 |         Pre: main_sem begins on zero
121 |         Pre: tcount number of tasks were/is going to be submitted into threadpool 
122 |         NOTE: that cond isn't used because there is a situation where broadcast may be 
123 |         called before calling thread goes to sleep
124 |         """
125 |         data = None
126 |         # Process current task
127 |         url = DISCORD_CHANNEL_CONTENT_PRE_API + channelID + DISCORD_CHANNEL_CONTENT_SUF_API + str(curr)
128 |         logging.info(f"scanning {url}")
129 |         while not data:
130 |             try:
131 |                 data = scraper.get(url, timeout=5, headers=HEADERS)
132 |             except(requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout):
133 |                 logging.info("Connection error, retrying -> url: {s}".format(s=url))
134 |                 
135 |         if not data:
136 |             logging.critical("Invalid data scraped -> url: {S}".format(s=url))
137 |         
138 |         # Convert data
139 |         js = data.json()    
140 |             
141 |         
142 |         # Add data to js_buff
143 |         if len(js) > 0:
144 |             js_buff_lock.acquire()
145 |             # If js_buff is too small, extend it
146 |             insert_pos = curr/skip
147 |             space_diff = self.__calculate_additional_list_slots(js_buff, insert_pos)
148 |             
149 |             if(space_diff > 0): 
150 |                 addon = [None] * int(space_diff)
151 |                 js_buff += addon
152 | 
153 |             # Add into js buff
154 |             js_buff[int(insert_pos)] = js
155 |             logging.debug("Received " + str(js) + " from " + url)
156 |             js_buff_lock.release()
157 |             
158 |             # Create and add task back into threadpool
159 |             pool.enqueue((self.__discord_lookup_thread_job, (tcount, DISCORD_CHANNEL_CONTENT_SKIP_INCRE, curr + tcount * DISCORD_CHANNEL_CONTENT_SKIP_INCRE, channelID, scraper, main_sem, js_buff, js_buff_lock, pool)))
160 |        
161 |         # If is done, broadcast to main thread
162 |         else:
163 |             main_sem.release()
164 |         
165 | 
166 |     def __calculate_additional_list_slots(self, l:list, p:int)->int:
167 |         """
168 |         Given the list l and position to insert element p, returns how many more list slots are 
169 |         needed in l to meet p
170 | 
171 |         Args:
172 |             l (list): list
173 |             p (int): position to insert element
174 | 
175 |         
176 |         Returns:
177 |             int: how many more list slots needed in l to meet p, if is <=0, no additional slots are needed
178 |         """
179 |         return p - (len(l) - 1)
180 |     
181 |     def discord_channel_lookup(self, channelID:str|None, scraper:CloudflareScraper)->dict|list:
182 |         """
183 |         Looks up a channel's content and returns it. Content is returned in 
184 |         chunks and not all content is returned; however, subsequent calls will
185 |         return results that will always be different.
186 | 
187 |         Param:
188 |             channelID: 
189 |                 channelID of channel to scrape. 
190 |                 If is None, scrape starting at the endpoint of the previous scrape
191 |                 If is not None, scrape starting the end of the channel
192 |             scarper:
193 |                 Scraper: scaraper to use while scraping kemono
194 | 
195 |         Return: JSON object containing data from the file
196 |         """
197 |         # If None sent but no history, quit
198 |         if not channelID:
199 |             assert(self.__recent)
200 | 
201 |         # If no history, create initial history
202 |         if not self.__recent:
203 |             self.__recent = {"channelID" : channelID, "skip" : 0}  # it doesn't exist yet, so initialize it
204 |         
205 |         # If history exists and matches, use old data
206 |         if(not channelID or channelID == self.__recent.get("channelID")):
207 |             skip = self.__recent.get("skip")
208 |             self.__recent = {"channelID" : self.__recent.get("channelID"), "skip" : skip + DISCORD_CHANNEL_CONTENT_SKIP_INCRE}
209 |             channelID = self.__recent.get("channelID")
210 | 
211 |         # If history exists but does not match, start from beginning
212 |         else:
213 |             skip = 0
214 |             self.__recent = {"channelID" : channelID, "skip" : skip + DISCORD_CHANNEL_CONTENT_SKIP_INCRE}
215 |         
216 |         # Grab data
217 |         data = None
218 |         url = DISCORD_CHANNEL_CONTENT_PRE_API + channelID + DISCORD_CHANNEL_CONTENT_SUF_API + str(skip)
219 |         while not data:
220 |             try:
221 |                 data = scraper.get(url, timeout=5, headers=HEADERS)
222 |             except(requests.exceptions.ConnectionError, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout):
223 |                 logging.debug("Connection error, retrying")
224 |         
225 |         # Convert data
226 |         js = data.json()
227 |         logging.debug("Received " + str(js) + " from " + url)
228 | 
229 |         # Return json
230 |         return js


--------------------------------------------------------------------------------
/DiscordtoJson_test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from DiscordtoJson import DiscordToJson
 3 | import requests.adapters
 4 | import cfscrape
 5 | import json
 6 | import logging
 7 | 
 8 | 
 9 | class DiscordtoJsonTestCase(unittest.TestCase):
10 |     def setUp(self) -> None:
11 |         """
12 |         Creates a session
13 |         """
14 |         logging.basicConfig(level=logging.DEBUG)
15 |         self.scraper = cfscrape.create_scraper(requests.Session())
16 |         adapter = requests.adapters.HTTPAdapter(pool_connections=6, pool_maxsize=6, max_retries=0, pool_block=True)
17 |         self.scraper.mount('http://', adapter)
18 |         self.js = DiscordToJson()
19 | 
20 |     def test_discord_lookup(self):
21 |         """
22 |         Tests discord lookup
23 |         """
24 |         # No channels
25 | 
26 |         # One channel
27 |         js = self.js.discord_lookup("https://kemono.party/discord/server/344748294372720643".rpartition('/')[2], self.scraper)
28 |         self.assertEqual(json.dumps(js), '[{"id": "344748969991340033", "name": "tiffys-drawings"}]')
29 | 
30 |         # Multi channels
31 |         js = self.js.discord_lookup("https://kemono.party/discord/server/634594002624184360".rpartition('/')[2], self.scraper)
32 |         self.assertEqual(json.dumps(js), r'[{"id": "652592122951630850", "name": "100\u5186_100yen"}, {"id": "652563554108571650", "name": "100\u5186_100yen"}, {"id": "652592073785999369", "name": "300\u30d3\u30fc\u30eb_beer"}, {"id": "652551798472835072", "name": "300\u30d3\u30fc\u30eb_beer"}, {"id": "634659046678593536", "name": "\u4f5c\u696d\u4e2d_wip"}]')
33 | 
34 |     def test_channel_lookup(self):
35 |         """
36 |         Tests discord channel lookup
37 |         """
38 |         # Get first 25
39 |         # 634659046678593536
40 |         js = self.js.discord_channel_lookup("634659046678593536", self.scraper)
41 |         before = json.dumps(js)
42 | 
43 |         # Get next 25 using None
44 |         js = self.js.discord_channel_lookup(None, self.scraper)
45 |         after = json.dumps(js)
46 |         self.assertNotEqual(before, after)
47 | 
48 |         # Get next 25 using channel name
49 |         js = self.js.discord_channel_lookup("634659046678593536", self.scraper)
50 |         last = json.dumps(js)
51 |         self.assertNotEqual(last, before)
52 |         self.assertNotEqual(last, after)
53 | 
54 |         # Switch to another channel 
55 |         js = self.js.discord_channel_lookup("652563554108571650", self.scraper)
56 |         logging.info("first" + str(js[len(js) - 1].get('content')))
57 |         self.assertEqual(str(js[0].get('content')), r'**6月の$1GoogleDriveリンク June $1 Google Drive link: **https://drive.google.com/drive/folders/1nXscFvaEiLRVLf0d7di6ti1iH-7MdZ9F?usp=sharing @everyone')
58 |         self.assertEqual(str(str(js[len(js) - 1].get('content'))), r'> > **6月の$1 MEGA リンク June $1 MEGA link:  **https://mega.nz/folder/z4dk3AhK#0L7XjaYUbH3iffrVpkLIeQ @everyone')
59 |         # End of channel
60 |         js = self.js.discord_channel_lookup(None, self.scraper)
61 |         self.assertEqual(len(js), 0)
62 | 
63 | 
64 |     def tearDown(self) -> None:
65 |         """
66 |         Deconstructs session
67 |         """
68 |         self.scraper.close()
69 | 
70 | if __name__ == '__main__':
71 |     unittest.main()


--------------------------------------------------------------------------------
/HashTable.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | from typing import TypeVar, Generic
  3 | import mmh3
  4 | 
  5 | T = TypeVar('T')
  6 | V = TypeVar('V')
  7 | MINIMUM_SIZE = 2
  8 | class Error(Exception):
  9 |     """Base class for other exceptions"""
 10 |     pass
 11 | 
 12 | 
 13 | class MismatchTypeException(Error):
 14 |     """Raised when a comparison is made on 2 different types"""
 15 |     pass
 16 | 
 17 | 
 18 | class KVPair (Generic[V,T]):
 19 |     """
 20 |     Generic KVPair structure where:
 21 |         Key is generic V
 22 |         Value is generic T
 23 |         Tombstone is bool & optional
 24 |     Upon initiailization, data becomes read-only
 25 |     """
 26 |     __key: V
 27 |     __value: T
 28 |     __tombstone: bool
 29 | 
 30 |     def __init__(self, key: V, value: T) -> None:
 31 |         """
 32 |         Initializes KVPair. Tombstone is disabled by default.
 33 | 
 34 |         Param
 35 |             key: key (use to sort)
 36 |             value: value (data)
 37 | 
 38 |         """
 39 |         self.__value = value
 40 |         self.__key = key
 41 |         self.__tombstone = False
 42 | 
 43 |     def getKey(self) -> V:
 44 |         """
 45 |         Returns key
 46 |         Return: key
 47 |         """
 48 |         return self.__key
 49 | 
 50 |     def getValue(self) -> T:
 51 |         """
 52 |         Returns value
 53 |         Return: value
 54 |         """
 55 |         return self.__value
 56 |     
 57 |     def setValue(self, newValue:T)->None:
 58 |         """
 59 |         Set value
 60 |         Param: value
 61 |         """
 62 |         self.__value = newValue
 63 | 
 64 |     def compareTo(self, other) -> int:
 65 |         """
 66 |         Compares self and other key value. Ignores generic typing
 67 | 
 68 |         Raise: MismatchTypeException if other is not a KVPair\n
 69 |         Return:
 70 |             self.getKey() > other.getKey() -> 1\n
 71 |             self.getKey() == other.getKey() -> 0\n
 72 |             self.getKey() < other.getKey() -> -1\n
 73 | 
 74 |         """
 75 |         if other == None or not isinstance(other, KVPair):
 76 |             raise MismatchTypeException("other is not of type KVPair(V,T)")
 77 | 
 78 |         if self.__key > other.getKey():
 79 |             return 1
 80 |         if self.__key == other.getKey():
 81 |             return 0
 82 |         return -1
 83 | 
 84 |     def __str__(self) -> str:
 85 |         """
 86 |         toString function which returns KVPair in json style formatting
 87 |         {key:<keyval>, value:<val>, Tomb:<val>}
 88 | 
 89 |         value relies on T's __str__ function
 90 | 
 91 |         Return: KVPair in json style format
 92 |         """
 93 |         return "{key:" + str(self.__key) + ", value:" + str(self.__value) + ", Tomb:" + ("T" if self.__tombstone else "F") + "}"
 94 | 
 95 |     def setTombstone(self) -> None:
 96 |         """
 97 |         Turns on tombstone
 98 |         """
 99 |         self.__tombstone = True
100 | 
101 |     def disableTombstone(self) -> None:
102 |         """
103 |         Turns off tombstone
104 |         """
105 |         self.__tombstone = False
106 | 
107 |     def isTombstone(self) -> bool:
108 |         """
109 |         Returns tombstone status
110 | 
111 |         Return true if set, false if disabled
112 |         """
113 |         return self.__tombstone
114 | 
115 | 
116 | class HashTable:
117 |     """
118 |     Closed, extensible hash table database storing KVPairs of any type
119 |     Was built using code I wrote in Java for CS3114 with some slight functionality
120 |     adjustments
121 | 
122 |     @author Jeff Chen
123 |     @created 5/8/2022
124 |     @Last modified 5/8/2022
125 |     """
126 |     __size: int
127 |     __records: List[KVPair]
128 |     __occupied: int
129 | 
130 |     def __init__(self, size):
131 |         """
132 |         Construct a hash table with initial size. 
133 | 
134 |         Param:
135 |             initialSize: Initial hash table size
136 |         """
137 |         
138 |         
139 |         self.__size = max(size, MINIMUM_SIZE)
140 |         self.__records = [None] * self.__size
141 |         self.__occupied = 0
142 | 
143 |     # Core Functions #################################################
144 | 
145 |     def hashtable_toarray(self) -> list:
146 |         """
147 |         Returns a list of nontombstone values from the hashtable
148 | 
149 |         Returns:
150 |             list: _description_
151 |         """
152 |         return [(i.getKey(), i.getValue()) for i in self.__records if i and not i.isTombstone()]
153 |     
154 |     def hashtable_add(self, pair: KVPair) -> None:
155 |         """
156 |         Adds a KVPair to the  hash table and expands if needed
157 |         If is a duplicate entry exists, do nothing
158 | 
159 |         Param:
160 |             KVPair: data to add to the hash table
161 |         """
162 |         # Check if a record exists in the table
163 |         if(self.hashtable_exist(pair) != -1):
164 |             return
165 | 
166 |         # TableSz
167 |         if(self.__isHalfFull()):
168 |             self.__doubleTable()
169 | 
170 |         # Find insert position
171 |         home = self.hash(str(pair.getKey()), self.__size)
172 |         tombstone = -1
173 |         curr = home
174 | 
175 |         step = 1
176 |         while self.__records[curr] != None:
177 | 
178 |             if self.__records[curr].isTombstone() and tombstone == -1:
179 |                 tombstone = curr
180 | 
181 |             curr = self.__quadraticProbe(home, step, self.__size)
182 |             step += 1
183 |         
184 |         # Add to hash table
185 |         if tombstone != -1:
186 |             self.__records[tombstone] = pair
187 |         else:
188 |             self.__records[curr] = pair
189 |         
190 |         self.__occupied += 1
191 | 
192 |     def hashtable_lookup_value(self, searchKey)->any:
193 |         """
194 |         Look up a KVPair and returns its value
195 |         
196 |         Param:
197 |             searchKey: key to search for
198 |         Return: value of matching KVPair, None if not found
199 |         """
200 |         # Get index from hash table
201 |         index = self.hashtable_exist_by_key(searchKey)
202 | 
203 |         if(index == -1):
204 |             return None
205 |         return self.__records[index].getValue()
206 | 
207 |     def hashtable_edit_value(self, searchKey, newValue)->bool:
208 |         """
209 |         Searches for a key in hash table, if found, edits value to 
210 |         newValue
211 | 
212 |         Param
213 |             searchKey: key of KVPair to look for
214 |             newValue: new value to set KVPair to
215 |         Pre: searchKey and newValue match generic type of KVPair
216 |         Return: True if was successful, False if not
217 |         """
218 |         # Get index from hash table
219 |         index = self.hashtable_exist_by_key(searchKey)
220 | 
221 |         if(index < 0):
222 |             return False
223 | 
224 |         # Edit value
225 |         self.__records[index].setValue(newValue)
226 | 
227 |         return True
228 | 
229 |     def hashtable_delete(self, token:KVPair)->bool:
230 |         """
231 |         Removes an item from the hash table
232 |         Param
233 |             token: record to remove from table
234 |         Return true if removed, false if not
235 |         """
236 |         # Get position
237 |         pos = self.hashtable_exist(token)
238 |         
239 |         if pos == -1:
240 |             return False
241 |         
242 |         # Remove from table
243 |         self.__records[pos].setTombstone()
244 |         self.__occupied -= 1
245 |         return True
246 | 
247 |     def hashtable_exist_by_key(self, searchKey) -> int:
248 |         """
249 |         Check if a KVPair exists within a hash table. 
250 |         If a lengthy sequence of probes (>=10) is detected, table will
251 |         be resized 
252 | 
253 |         Params:
254 |             searchKey: key of KVPair to search for 
255 |         Return: position if found, -1 if does not exists
256 |         """
257 |         if searchKey == None:
258 |             return -1
259 |         
260 |         home = self.hash(str(searchKey), self.__size)
261 |         curr = home
262 | 
263 |         step = 1
264 |         while self.__records[curr] != None:
265 |             if not self.__records[curr].isTombstone() and self.__records[curr].getKey() == searchKey:
266 |                 return curr
267 |             
268 |             # Hashtable expansion is an extremely costly operation. 
269 |             # len / 4 is used to be more scalable with hash table size
270 |             if step >= 10:
271 |                 self.__doubleTable()
272 |                 step = 1
273 |                 curr = self.hash(str(searchKey), self.__size)
274 |             else:
275 |                 curr = self.__quadraticProbe(home, step, self.__size)
276 |                 step += 1
277 |         return -1
278 | 
279 |     def hashtable_exist(self, token: KVPair) -> int:
280 |         """
281 |         Check if a KVPair exists within a hash table. 
282 |         If a lengthy sequence of probes (>=10) is detected, table will
283 |         be resized 
284 | 
285 |         Params:
286 |             tokens: KVPair to search
287 |         Return: position if found, -1 if not
288 |         """
289 |         home = self.hash(str(token.getKey()), self.__size)
290 |         curr = home
291 | 
292 |         step = 1
293 |         while self.__records[curr] != None:
294 |             if not self.__records[curr].isTombstone() and self.__records[curr].compareTo(token) == 0:
295 |                 return curr
296 |             if step >= 10:
297 |                 self.__doubleTable()
298 |                 step = 1
299 |                 curr = self.hash(str(token.getKey()), self.__size)
300 |             else:
301 |                 curr = self.__quadraticProbe(home, step, self.__size)
302 |                 step += 1
303 |         return -1
304 | 
305 |     # Getters ########################################################
306 |     def hashtable_getSize(self) -> int:
307 |         """
308 |         Get size of hash table
309 | 
310 |         Return: size of hash table
311 |         """
312 |         return self.__size
313 | 
314 |     def hashtable_getOccupied(self) -> int:
315 |         """
316 |         Get number of occupied slots
317 | 
318 |         Return: number of occupied slots
319 |         """
320 |         return self.__occupied
321 |     
322 |     def hashtable_print(self) -> None:
323 |         """
324 |         Prints table in the following format
325 |         Index\tData
326 |         1\t\t<data1>
327 |         2\t\t<data2>
328 |         ...
329 |         None entry data will be shown as <None>
330 |         """
331 |         print("Index\tData")
332 |         for i in range(0, self.__size):
333 |             print(str(i + 1) + "\t\t" + ("<None>" if self.__records[i] == None else str(self.__records[i])))
334 | 
335 |     # Utility #######################################################
336 | 
337 |     def __transfer(self, dest: List[KVPair], src: KVPair) -> None:
338 |         """
339 |         Transfers an existing record to dest
340 |         Param
341 |             dest: table to transfer records to
342 |             src: record to transfer
343 |         Pre: dest no tombstones and src not a tombstone. 
344 |             dest less than half full
345 |         """
346 |         home = self.hash(str(src.getKey()), len(dest))
347 |         curr = home
348 | 
349 |         step = 1
350 |         while dest[curr] != None:
351 |             curr = self.__quadraticProbe(home, step, len(dest))
352 |             step += 1
353 | 
354 |         dest[curr] = src
355 | 
356 |     def __rehash(self, dest: List[KVPair]) -> None:
357 |         """
358 |         Rehashes and transfers over all non-tombstone entries to dest
359 | 
360 |         Param
361 |             dest: table to transfer records to
362 |         """
363 |         remain = self.__occupied
364 | 
365 |         i = 0
366 |         while remain > 0:
367 |             if self.__records[i] != None and not self.__records[i].isTombstone():
368 |                 self.__transfer(dest, self.__records[i])
369 |                 remain -= 1
370 |             i += 1
371 | 
372 |     def __doubleTable(self) -> None:
373 |         """
374 |         Doubles and rehashes hash table
375 |         """
376 |         newRecords: List[KVPair] = [None] * self.__size * 2
377 |         self.__rehash(newRecords)
378 | 
379 |         self.__records = newRecords
380 |         self.__size = len(newRecords)
381 | 
382 |     def __isHalfFull(self) -> bool:
383 |         """
384 |         Checks if the table is half fulll
385 | 
386 |         Return: True if half full, false if not
387 |         """
388 |         return (self.__size - self.__occupied) <= self.__occupied
389 | 
390 |     def __quadraticProbe(self, home: int, step: int, tableSz: int) -> int:
391 |         """
392 |         Performs quadratic probe on home at step
393 |         Param:
394 |             home: home slot
395 |             step: nth step in quadratic step
396 |             tableSz: size of hash table
397 |         Return: record slot at quadratic probe step
398 |         """
399 |         return (home + step * step) % tableSz
400 | 
401 |     def hash(self, s: str, m: int) -> int:
402 |         """
403 |         Hashing algorithm using murmurhash3 32-bit unsigned int
404 | 
405 |         Params
406 |             s: string to hash
407 |             m: size of table 
408 |         Return: home slot of s
409 |         # Edit 2023, fixed broken hash function
410 |         """
411 |         #sum:int = 0
412 |         #mul:int = 1
413 |         
414 |         #for i in range(0, len(s)):
415 |         #    mul = 1 if (i % 4 == 0) else mul * 256
416 |         #    sum += ord(s[i]) * mul
417 |             
418 |         return mmh3.hash(s, signed=False) % m
419 | 


--------------------------------------------------------------------------------
/KMPDownloader_test.py:
--------------------------------------------------------------------------------
  1 | from re import T
  2 | import shutil
  3 | from tkinter import N
  4 | import unittest
  5 | from KMPDownloader import KMP
  6 | import os
  7 | from KMPDownloader import DeadThreadPoolException
  8 | import logging
  9 | from Threadpool import tname
 10 | """
 11 | Tests KMPDownloader.py,
 12 | ##################################################
 13 | WARNING Contains unsafe works!!! #################
 14 | ##################################################
 15 | 
 16 | Author: Jeff Chen
 17 | Last modified: 6/6/2022
 18 | """
 19 | class KMPTestCase(unittest.TestCase):
 20 | 
 21 |     def setUp(self) -> None:
 22 |         """
 23 |         Sets up an Null KMP since tests require different paramaters
 24 |         """
 25 |         logging.basicConfig(level=logging.INFO)
 26 |         self.KMP = None
 27 |         tname.id = None
 28 | 
 29 |     @classmethod
 30 |     def setUpClass(cls):
 31 |         """
 32 |         Create temporary testing directory
 33 |         """
 34 |         cls.tempdir = os.path.abspath('./') + '\\temp\\'
 35 | 
 36 |         if os.path.exists(cls.tempdir):
 37 |             logging.critical("Please remove before testing ->" + cls.tempdir)
 38 |             exit()
 39 | 
 40 |     def test_start_kill_threads(self) -> None:
 41 |         """
 42 |         Tests the starting and killing of threads
 43 |         """
 44 |         self.KMP = KMP(self.tempdir, False, tcount=None, chunksz=None, ext_blacklist=None)
 45 |         
 46 |         # Single thread
 47 |         self.KMP._KMP__threads = self.KMP._KMP__create_threads(1)
 48 |         self.KMP._KMP__kill_threads(self.KMP._KMP__threads)
 49 |         self.assertRaises(DeadThreadPoolException, self.KMP._KMP__call_and_interpret_url,
 50 |                           "https://kemono.party/gumroad/user/9222612694494/post/AizNy")
 51 | 
 52 |         # 3 Threads
 53 |         self.KMP._KMP__threads = self.KMP._KMP__create_threads(3)
 54 |         self.KMP._KMP__kill_threads(self.KMP._KMP__threads)
 55 |         self.assertRaises(DeadThreadPoolException, self.KMP._KMP__call_and_interpret_url,
 56 |                           "https://kemono.party/gumroad/user/9222612694494/post/AizNy")
 57 | 
 58 |         self.KMP.close()
 59 |     def test_trim_fname(self) -> None:
 60 |         """
 61 |         Tests __trim_fname
 62 |         """
 63 |         self.KMP = KMP(self.tempdir, False, tcount=None, chunksz=None, ext_blacklist=None)
 64 |         # <a class="post__attachment-link" href="/data/ac/95/ac95d0d22d3bf2b76e66305ba8b45e573d08980419f7aca786e11945f53342c4.zip?f=%E3%81%BE%E3%81%A8%E3%82%81DL%E7%94%A8.zip">
 65 |         #    Download まとめDL用.zip
 66 |         #  </a>
 67 | 
 68 |         # Case 3 -> Space
 69 |         self.assertEqual(self.KMP._KMP__trim_fname(
 70 |             "Download まとめDL用.zip"), "まとめDL用.zip")
 71 | 
 72 |         # Case 3 -> Multiple spaces
 73 |         self.assertEqual(self.KMP._KMP__trim_fname(
 74 |             "Download 1_2 2016 aged whiskey.zip"), "1_2 2016 aged whiskey.zip")
 75 | 
 76 |         # Case 2 -> Bad extension
 77 |         self.assertEqual(self.KMP._KMP__trim_fname(
 78 |             "/data/3d/68/3d68def31822e95ad249ceb2237fcdae29b644e6702366ddae761572be900955.jpg?f=https%3A//c10.patreonusercontent.\
 79 | com/3/e30%253D/patreon-media/p/post/30194248/7cffbc9604664ccab13f3b57fdc78e6f/1.jpe%3Ftoken-time%3D1570752000%26token\
 80 | -hash%3DLadY-wBiRPi84Qb5X-KI7NEgEP6HE6lljOLiHBm7qY8%253D"), "3d68def31822e95ad249ceb2237fcdae29b644e6702366ddae761572be900955.jpg")
 81 | 
 82 |         # Case 1 -> Good extension
 83 |         self.assertEqual(self.KMP._KMP__trim_fname(
 84 |             "/data/4f/83/4f83453fc625095401da81248a2242246b01b229bc5e1b2e1dd470da866f1980.jpg?f=b9ffc2f9-2c11-42c8-b5a2-7995a233ca41\
 85 | .jpg"), "b9ffc2f9-2c11-42c8-b5a2-7995a233ca41.jpg")
 86 | 
 87 |         self.assertEqual(self.KMP._KMP__trim_fname("/data/8b/e7/8be7e3fc0b0304c97b0bd5d9f7a66b2ad97c2d798808b52824642480e8dfe0d7.gif?f=BBS-Snoggler-Update.gif"), "BBS-Snoggler-Update.gif")
 88 |         self.KMP.close()
 89 |     def test_download_static_files(self) -> None:
 90 |         """
 91 |         Tests downloading files under different circumstances
 92 |         """
 93 | 
 94 |         # Single thread no image
 95 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None)
 96 |         self.KMP.routine(unpacked=0, url=
 97 |             "https://kemono.party/patreon/user/33271853/post/36694748")
 98 |         self.KMP.close()
 99 | 
100 |         # 3 Thread no image
101 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, ext_blacklist=None)
102 |         self.KMP.routine(unpacked=0, url=
103 |             "https://kemono.party/patreon/user/33271853/post/47946953")
104 |         self.KMP.close()
105 | 
106 |         # Single Thread, 2 image
107 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None)
108 |         self.KMP.routine(unpacked=0, url=
109 |             "https://kemono.party/patreon/user/33271853/post/36001529")
110 |         self.KMP.close()
111 | 
112 |         # 3 Threads, 2 image
113 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, ext_blacklist=None)
114 |         self.KMP.routine(unpacked=0, url=
115 |             "https://kemono.party/patreon/user/33271853/post/47255266")
116 |         self.KMP.close()
117 | 
118 |         # 1 Thread, multi images
119 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None)
120 |         self.KMP.routine(unpacked=0, url=
121 |             "https://kemono.party/patreon/user/33271853/post/65647736")
122 |         self.KMP.close()
123 | 
124 |         # 16 Thread multi images
125 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=16, chunksz=None, ext_blacklist=None)
126 |         self.KMP.routine(unpacked=0, url=
127 |             "https://kemono.party/patreon/user/33271853/post/52792630")
128 |         self.KMP.close()
129 | 
130 |         # Verify content:
131 |         self.assertEqual(os.stat(
132 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/0.png").st_size, 3692609)
133 |         self.assertEqual(os.stat(
134 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/1.png").st_size, 3692609)
135 |         self.assertEqual(os.stat(
136 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/2.png").st_size, 2752125)
137 |         self.assertEqual(os.stat(
138 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/3.png").st_size, 3262789)
139 |         self.assertEqual(os.stat(
140 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/4.png").st_size, 2392221)
141 |         self.assertEqual(os.stat(
142 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/5.png").st_size, 2349839)
143 |         self.assertEqual(os.stat(
144 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/6.png").st_size, 5652120)
145 |         self.assertEqual(os.stat(
146 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/7.png").st_size, 1825005)
147 |         self.assertEqual(os.stat(
148 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/8.png").st_size, 3002485)
149 |         self.assertEqual(os.stat(
150 |             self.tempdir + "delcieno/NAPP 3.0 PREVIEW by delcieno from Patreon  Kemono/9.png").st_size, 4467542)
151 | 
152 |         self.assertEqual(os.stat(
153 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/0.png").st_size, 13444381)
154 |         self.assertEqual(os.stat(
155 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/1.png").st_size, 13444381)
156 |         self.assertEqual(os.stat(
157 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/2.png").st_size, 13854733)
158 |         self.assertEqual(os.stat(
159 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/3.png").st_size, 13702259)
160 |         self.assertEqual(os.stat(
161 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/4.png").st_size, 13802523)
162 |         self.assertEqual(os.stat(
163 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/5.png").st_size, 13040955)
164 |         self.assertEqual(os.stat(
165 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/6.png").st_size, 13911132)
166 |         self.assertEqual(os.stat(
167 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/7.png").st_size, 13524999)
168 |         self.assertEqual(os.stat(
169 |             self.tempdir + "delcieno/ARMORS and NAPP status by delcieno from Patreon  Kemono/8.png").st_size, 12743876)
170 | 
171 |     def test_download_static_attachments(self) -> None:
172 |         """
173 |         Tests downloading static attachments
174 |         """
175 |         # Single .pdf
176 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None)
177 |         self.KMP.routine(unpacked=0, url=
178 |             "https://kemono.party/gumroad/user/5563321775917/post/wSIJ")
179 |         self.KMP.close()
180 | 
181 |         # Single .cvf
182 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None)
183 |         self.KMP.routine(unpacked=0, url=
184 |             "https://kemono.party/gumroad/user/5563321775917/post/mRSH")
185 |         self.KMP.close()
186 | 
187 |         self.assertEqual(os.stat(
188 |             self.tempdir + "Gumroad Help Center/New Creator FAQ - All the Basics in One Place by Gumroad Help Center from Gumroad  Kemono/Creatorpedia.pdf").st_size, 14704)
189 |         self.assertEqual(os.stat(
190 |             self.tempdir + "Gumroad Help Center/An Example CSV of Exported Sales Data by Gumroad Help Center from Gumroad  Kemono/Sales_CSV_Example.csv").st_size, 2933)
191 | 
192 |     def test_download_animated_attachments(self) -> None:
193 |         """
194 |         Tests downloaded animated gif files
195 |         """
196 |         # 2 Threads, 2 gifs
197 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=2, chunksz=None, ext_blacklist=None)
198 |         self.KMP.routine(unpacked=0, url=
199 |             "https://kemono.party/patreon/user/523894/post/66527944")
200 |         self.KMP.close()
201 | 
202 |         # Single .mp4
203 |         self.KMP.routine(unpacked=0, url=
204 |             "https://kemono.party/gumroad/user/5563321775917/post/jnBuO")
205 | 
206 |         # .mov
207 |         self.KMP.routine(unpacked=0, url=
208 |             "https://kemono.party/gumroad/user/8844596389936/post/WBlK")
209 | 
210 |         self.assertEqual(os.stat(
211 |             self.tempdir + "Jasonafex/New Playable Build 0.6.6 by Jasonafex from Patreon  Kemono/1.gif").st_size, 1930242)
212 |         self.assertEqual(os.stat(
213 |             self.tempdir + "Jasonafex/New Playable Build 0.6.6 by Jasonafex from Patreon  Kemono/0.gif").st_size, 1930242)
214 |         self.assertEqual(os.stat(
215 |             self.tempdir + "Gumroad Help Center/Creating a Product - A Streaming Video Experience by Gumroad Help Center from Gumroad  Kemono/Product_Creation_-_Export_1015.mp4").st_size, 58934883)
216 |         self.assertEqual(os.stat(
217 |             self.tempdir + "Katon Callaway/Topology Tips  by Katon Callaway from Gumroad  Kemono/topoJoints.mov").st_size, 448251813)
218 | 
219 |     def test_download_audio_attachments(self) -> None:
220 |         """
221 |         Tests downloading audio files
222 |         """
223 |         # 3 mp3
224 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, ext_blacklist=None)
225 |         self.KMP.routine(unpacked=0, url=
226 |             "https://kemono.party/gumroad/user/5563321775917/post/moNG")
227 | 
228 |         # .sf2, .wav
229 |         self.KMP.routine(unpacked=0, url=
230 |             "https://kemono.party/gumroad/user/3915675902935/post/NTJQZ")
231 | 
232 |         self.assertEqual(os.stat(self.tempdir + "Truebones Motions Animation Studios/FREE STAR TREK SOUND FX INCLUDES SOUND FONT and .WAV file formats. by Truebones Motions Animation Studios from Gumroad  Kemono/HS_StarTrekFX.sf2").st_size, 807090)
233 |         self.assertEqual(os.stat(self.tempdir + "Truebones Motions Animation Studios/FREE STAR TREK SOUND FX INCLUDES SOUND FONT and .WAV file formats. by Truebones Motions Animation Studios from Gumroad  Kemono/sf2_smpl.wav").st_size, 806998)
234 |         self.assertEqual(os.stat(
235 |             self.tempdir + "Gumroad Help Center/A Music Album - Jam time by Gumroad Help Center from Gumroad  Kemono/BONUS_TRACK_Cant_Tail_Me_Nothing.mp3").st_size, 6541049)
236 |         self.assertEqual(os.stat(
237 |             self.tempdir + "Gumroad Help Center/A Music Album - Jam time by Gumroad Help Center from Gumroad  Kemono/Tribute_to_1776.mp3").st_size, 6244398)
238 |         self.assertEqual(os.stat(
239 |             self.tempdir + "Gumroad Help Center/A Music Album - Jam time by Gumroad Help Center from Gumroad  Kemono/Why_Am_I_Michael_Bluth.mp3").st_size, 1746643)
240 |         self.KMP.close()
241 | 
242 |     def test_download_zip_attachments(self) -> None:
243 |         """
244 |         Tests downloading of a zip file and unzipping of zip files
245 |         """
246 |         # Single zip file
247 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None)
248 |         self.KMP.routine(unpacked=0, url=
249 |             "https://kemono.party/gumroad/user/samplescience/post/YeLB")
250 | 
251 |         size = self.getDirSz(self.tempdir + (
252 |             r"SampleScience Plugins  Samples/SampleScience TR-626 HD by SampleScience Plugins  Samples. from Gumroad  Kemono/SampleScience_TR626_HD"))
253 |         self.assertEqual(size, 4509259)
254 |         self.KMP.close()
255 | 
256 |     def test_download_alternate_zip_attachments(self) -> None:
257 |         """
258 |         Tests downloading of alternate zip files (7z...) and unzipping
259 |         """
260 | 
261 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None)
262 |         # Single 7z file
263 |         # Has been tested in other tests
264 | 
265 |         # Single .rar file
266 |         self.KMP.routine(unpacked=0, url=
267 |             "https://kemono.party/gumroad/user/6075196025658/post/lWrr")
268 |         size = self.getDirSz(self.tempdir + (
269 |             r"Joe Daniels/jd Flour Sack rig for Maya by Joe Daniels from Gumroad  Kemono/jd_floursack"))
270 |         self.assertEqual(size, 4878146)
271 |         self.KMP.close()
272 | 
273 |     def test_download_non_file(self) -> None:
274 |         """
275 |         Tests downloading an invalid file
276 |         """
277 |         # If it don't crash, it pass
278 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None)
279 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/8296916/post/59821763")
280 |         self.KMP.close()  
281 |     
282 |     def test_download_empty_window(self) -> None:
283 |         """
284 |         Tests downloading an artist with no works
285 |         """
286 |         # https://kemono.party/gumroad/user/gunwild
287 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None)
288 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/8296916/post/59821763")
289 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Gunwild")), 0)
290 |         self.KMP.close()
291 | 
292 |     def test_file_text(self) -> None:
293 |         """
294 |         Tests downloading an artist with a link in their files segment
295 |         """
296 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=1, chunksz=None, ext_blacklist=None)
297 |         
298 |         # DNE
299 |         self.KMP.routine(unpacked=0, url="https://kemono.party/fanbox/user/305765/post/3885644")
300 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Y.P/NEXT by Y.P from Pixiv Fanbox  Kemono/file__text.txt")))
301 | 
302 |         # Exists
303 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/30194248")
304 |         with open(os.path.join(self.tempdir, "misswarmj/My First NSFW ASMR Video Preview  by misswarmj from Patreon  Kemono/file__text.txt"), 'r') as fd:
305 |             self.assertEqual(fd.read(), "MissWarmJ on Twitter\n\
306 |               \n\
307 | \n\
308 |                   Hey,pls wear your earphone watch till the end! 1:38minuts~ It is the first NSFW #ASMR Preview. Would appriated Mega RT and Likes ^^ https://t.co/mwmfzS0cfb https://t.co/3n7bUYMzRD\n\
309 | https://twitter.com/misswarmj/status/1176210868121546752\n\
310 | \
311 | ____________________________________________________________\n\
312 | ")
313 |         self.KMP.close()
314 | 
315 | 
316 |     def test_download_bulk(self) -> None:
317 |         """
318 |         Tests downloading bulk files
319 |         """
320 |         temp = os.path.join(self.tempdir, "temp.txt")
321 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=6, chunksz=None, ext_blacklist=None)
322 |         if os.path.exists(temp):
323 |             os.remove(temp)
324 | 
325 | 
326 |         # Single url, all works
327 |         with open(temp, 'w+') as fd:
328 |             fd.write("https://kemono.party/gumroad/user/5352387105120\n")
329 |             fd.flush()
330 |             fd.seek(0)
331 |             self.KMP.routine(unpacked=0, url=fd.readlines())
332 | 
333 |         # Single url, single work
334 |         os.remove(temp)
335 |         with open(temp, 'w+') as fd:
336 |             fd.write("https://kemono.party/gumroad/user/650894809818/post/HskiT\n")
337 |             fd.flush()
338 |             fd.seek(0)
339 |             self.KMP.routine(unpacked=0, url=fd.readlines())
340 | 
341 |         # Multi url, all works
342 |         os.remove(temp)
343 |         with open(temp, 'w+') as fd:
344 |             fd.write("https://kemono.party/gumroad/user/5252246151109\n")
345 |             fd.write("https://kemono.party/gumroad/user/6100863138065\n")
346 |             fd.flush()
347 |             fd.seek(0)
348 |             self.KMP.routine(unpacked=0, url=fd.readlines())
349 | 
350 |         # Multi url, works only
351 |         os.remove(temp)
352 |         with open(temp, 'w+') as fd:
353 |             fd.write("https://kemono.party/gumroad/user/5428435542017/post/wEhslZ\n")
354 |             fd.write("https://kemono.party/gumroad/user/863606373292/post/FyiUu\n")
355 |             fd.flush()
356 |             fd.seek(0)
357 |             self.KMP.routine(unpacked=0, url=fd.readlines())
358 | 
359 |         # Multi url, mix of works
360 |         os.remove(temp)
361 |         with open(temp, 'w+') as fd:
362 |             fd.write("https://kemono.party/gumroad/user/7331928256471/post/yPvnb\n")
363 |             fd.write("https://kemono.party/gumroad/user/3340403173434\n")
364 |             fd.write("https://kemono.party/gumroad/user/7331928256471/post/iMbJe\n")
365 |             fd.flush()
366 |             fd.seek(0)
367 |             self.KMP.routine(unpacked=0, url=fd.readlines())
368 |         
369 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Perry Leijten/Maya skinning tools by Perry Leijten from Gumroad  Kemono")), 6862522)
370 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Perry Leijten/Samus Rig by Perry Leijten from Gumroad  Kemono")), 72827106)
371 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Jabir J3")), 952513)
372 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Kenzie Smith Piano")), 4362578)
373 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "MortMort")), 80470)
374 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Nikolai Mamashev")), 98229752)
375 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Randy Bishop")), 70006225)
376 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Skylleon")), 4246089)
377 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "Tatyworks")), 19828495)
378 |         os.remove(temp)
379 |         self.KMP.close()
380 | 
381 |     def test_download_polluted_href(self):
382 |         """
383 |         Tests downloading files from a source that contains both internally hosted
384 |         and externally hosted URLs. 
385 |         """
386 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None)
387 |         self.KMP.routine(unpacked=0, url="https://kemono.party/fanbox/user/3102267/post/3841095")
388 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "mochitaichi/抱き枕カバー用のラフ by mochitaichi from Pixiv Fanbox  Kemono")), 3716346)
389 |         self.KMP.close()
390 | 
391 |     def test_extract_same__dest(self):
392 |         """
393 |         Tests extracting xzip files to the same directory
394 |         """
395 |         # Zip File already exists
396 |         
397 |         second = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None)
398 |         second.routine(unpacked=0, url="https://kemono.party/fanbox/user/646778/post/3474562")
399 | 
400 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox  Kemono/01basic - Blue Archive Ako.zip")))
401 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox  Kemono/01basic - Blue Archive Ako.zip")))
402 |         size = self.getDirSz(self.tempdir + (
403 |             r"nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox  Kemono/cap"))
404 |         size2 = self.getDirSz(self.tempdir + (
405 |         r"nbit/Basic 2022年 03月 by nbit from Pixiv Fanbox  Kemono/(1)cap"))
406 |         self.assertTrue(size ==  63149843 or size == 103350378)
407 |         self.assertTrue(size2 == 63149843 or size2 == 103350378)
408 |         second.close()
409 |     
410 |     def test_duplicate_file(self):
411 |         """
412 |         WARNING: Test takes significant time to complete
413 | 
414 |         Tests downloading a set of files and redownloading it, no new files
415 |         should be added after redownloading
416 |         """
417 |         # Download directory
418 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=12, chunksz=None, ext_blacklist=None)
419 |         self.KMP.routine(unpacked=0, url='https://kemono.party/fanbox/user/39123643?o=25')
420 |         
421 |         # get size
422 |         size = self.getDirSz(self.tempdir + (r"Belko"))
423 |         logging.info(size)
424 |         # Redownload
425 |         self.KMP.reset()
426 |         self.KMP.routine(unpacked=0, url='https://kemono.party/fanbox/user/39123643?o=25')
427 | 
428 |         # Confirm size is unchanged
429 |         self.assertEqual(self.getDirSz(self.tempdir + (r"Belko")), size)
430 |         self.KMP.close()
431 | 
432 |     def test_download_dead_image(self):
433 |         """
434 |         Tests downloading a dead 3rd party link
435 |         """
436 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None)
437 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/22660508")
438 |         self.KMP.close()
439 |         # If it does not crash, it passes
440 |     
441 |     def test_download_link_not_file(self):
442 |         """
443 |         Tests downloading a page where the download section contains links instead 
444 |         of files, should be skipped
445 |         """
446 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None)
447 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/29891980")  
448 |         self.KMP.close()      
449 |         # If it does not crash, it passes
450 | 
451 |     def test_download_non_image_img(self):
452 |         """
453 |         Tests downloading an 'image' that isn't actually an image but a link
454 |         """
455 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=2, chunksz=None, ext_blacklist=None)
456 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/16278266")
457 | 
458 |         self.assertTrue(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon  Kemono/0.jpg"))
459 |         self.assertTrue(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon  Kemono/1.jpg"))
460 |         self.assertFalse(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon  Kemono/2.jpg"))
461 |         self.assertTrue(os.path.exists(self.tempdir + "misswarmj/New Feet lover post on Twitter by misswarmj from Patreon  Kemono/3.jpg"))
462 |         self.KMP.close()
463 | 
464 |     def test_post_content(self) -> None:
465 |         """
466 |         Tests the downloading of post content
467 |         """
468 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None)
469 | 
470 |         # Empty post content
471 |         self.KMP.routine(unpacked=0, url="https://kemono.party/gumroad/user/6033318026591/post/uijUqf")
472 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Motion Ape/Free Bounce Tool for After Effects by Motion Ape from Gumroad  Kemono/post__content.txt")))
473 | 
474 |         # Plaintext only
475 |         self.KMP.routine(unpacked=0, url=
476 |             "https://kemono.party/patreon/user/8296916/post/53546555")
477 |         with open(os.path.join(self.tempdir, "dreamsavior/Translator Ver 3.7.11 by dreamsavior from Patreon  Kemono/post__content.txt"), 'r') as fd:
478 |             self.assertEqual(fd.read(), "There is a bug on ver 3.7.9 that prevents some users from installing the addon. This version fixes that bug.\
479 | \nThis version also fixes a bug that caused Translator++ to write files with ASCII encoding characters despites the translation target being UTF8\
480 | \nTranslator++ Ver 3.7.11\
481 | \nUpdate : Renparser Ver 0.7.2\
482 | \nFix : Renpy : Failed when exporting into a file\
483 | \nFix : Renpy : Force write encoding into UTF-8 when detected as ASCII\
484 | \nUpdate : KAGParser 1.2\
485 | \nFix : KAG : Force write encoding into UTF-8 when detected as ASCII\
486 | \nFix : Unable to install new addon\
487 | \nI'm sorry for the inconvenience.\
488 | \nEnjoy your day, and stay safe.\
489 | \n(Yesterday I got the news that my mother and my brother were positive for covid)")
490 | 
491 |         # Links
492 |         self.KMP.routine(unpacked=0, url=
493 |             "https://kemono.party/patreon/user/8296916/post/52915682")
494 |         with open(os.path.join(self.tempdir, "dreamsavior/RPG Maker MZ Is Free on steam by dreamsavior from Patreon  Kemono/post__content.txt"), 'r') as fd:
495 |             self.assertEqual(fd.read(), "Just a short notice. I'm not affiliated with Kadokawa nor endorsed by them. But \
496 | when they make they software free ... even though for limited time I need to notice you all. You may need it to \
497 | do minor editing to translate RMMZ based game.\nhttps://store.steampowered.com/app/1096900/RPG_Maker_MZ/\
498 | \nhttps://store.steampowered.com/app/1096900/RPG_Maker_MZ/")
499 | 
500 |         # Images on KMP and on other website
501 |         # https://kemono.party/patreon/user/8296916/post/52732723
502 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/8296916/post/52732723")
503 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "dreamsavior/Error File list not found in init file by dreamsavior from Patreon  Kemono/0.jpg")))
504 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "dreamsavior/Error File list not found in init file by dreamsavior from Patreon  Kemono/1.png")))
505 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "dreamsavior/Error File list not found in init file by dreamsavior from Patreon  Kemono/2.png")))
506 |         self.KMP.close()
507 | 
508 |     def test_post_comments(self) -> None:
509 |         """
510 |         Tests downloading post comment
511 |         """
512 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=1, chunksz=None, ext_blacklist=None)
513 | 
514 |         # Empty
515 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5442365/post/19064809")
516 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "ZebraCorner/Patreon Rewards Update and follow us on Social Media by ZebraCorner from Patreon  Kemono/post__comments.txt")))
517 |         # Gumroad
518 |         self.KMP.routine(unpacked=0, url="https://kemono.party/gumroad/user/6033318026591/post/LRWjd")
519 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Motion Ape/Free Folder Structurer Tool for After Effects by Motion Ape from Gumroad  Kemono/post__comments.txt")))
520 |         # Fantia
521 |         self.KMP.routine(unpacked=0, url="https://kemono.party/fantia/user/53451/post/775490")
522 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "おののいもこ/眼鏡子 by おののいもこ from Fantia  Kemono/post__comments.txt")))
523 |         # SubscribeStar TODO
524 |         self.KMP.routine(unpacked=0, url="https://kemono.party/subscribestar/user/sleepygimp/post/305785")
525 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "sleepygimp/Nancys Day Off r08Dear Supportersyet again just in t.. by sleepygimp from SubscribeStar  Kemono/post__comments.txt")))
526 |         # DL site
527 |         self.KMP.routine(unpacked=0, url="https://kemono.party/dlsite/user/RG44418/post/RE243414")
528 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "DLsite/FREE Life with Maid Kurumi-chan At Your Side Binaural by DLsite from DLsite  Kemono/post__comments.txt")))
529 |         # Not empty
530 |         self.KMP.routine(unpacked=0, url="https://kemono.party/fanbox/user/237083/post/3011863")
531 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Blood Rouge/WIP唾吐きクソビッチと化した金城遙華 by Blood Rouge from Pixiv Fanbox  Kemono/post__comments.txt")))       
532 |         self.KMP.close()
533 | 
534 |     def test_broken_url(self):
535 |         """
536 |         Tests downloading of a file with bad file extension 
537 |         """
538 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=None)
539 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/5489259/post/30194248")
540 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "misswarmj/My First NSFW ASMR Video Preview  by misswarmj from Patreon  Kemono/0.jpg")))
541 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "misswarmj/My First NSFW ASMR Video Preview  by misswarmj from Patreon  Kemono/1.jpg")))
542 |         self.KMP.close()
543 | 
544 |     def test_discord(self):
545 |         """
546 |         Tests downloading discord content
547 |         """
548 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=12, chunksz=None, ext_blacklist=None)
549 |         self.KMP.routine(unpacked=0, url="https://kemono.party/discord/server/634594002624184360")
550 | 
551 |         # 100
552 |         self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/100円_100yen/discord__content.txt")).st_size, 7845)
553 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/100円_100yen/images")), 75556767)
554 | 
555 |         # 100(1)
556 |         self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/100円_100yen(1)/discord__content.txt")).st_size, 528)
557 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/100円_100yen(1)/images")), 0)
558 |         
559 |         # 300 
560 |         self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/300ビール_beer/discord__content.txt")).st_size, 20742)
561 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/300ビール_beer/images")), 277344917)
562 | 
563 |         # 300
564 |         self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/300ビール_beer(1)/discord__content.txt")).st_size, 4011)
565 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/300ビール_beer(1)/images")), 8197562)
566 | 
567 |         # wip
568 |         self.assertEqual(os.stat(os.path.join(self.tempdir, "634594002624184360/作業中_wip/discord__content.txt")).st_size, 128463)
569 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "634594002624184360/作業中_wip/images")), 487333680)
570 | 
571 |         self.KMP.close()
572 | 
573 | 
574 |     def test_unpacked(self):
575 |         """
576 |         Tests the unpacked download mode
577 |         """
578 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=10, chunksz=None, ext_blacklist=None)
579 | 
580 |         # No works
581 |         self.KMP.routine(unpacked=2, url="https://kemono.party/gumroad/user/antijingoist")
582 | 
583 |         # All works
584 |         self.KMP.routine(unpacked=2, url="https://kemono.party/fanbox/user/21587853")
585 | 
586 |         # Single page of works
587 |         self.KMP.routine(unpacked=2, url="https://kemono.party/fanbox/user/144708?o=25")
588 | 
589 |         # Single work
590 |         self.KMP.routine(unpacked=2, url="https://kemono.party/fanbox/user/24164271/post/2934828")
591 | 
592 |         self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "Abbie Gonzalez")), 0)
593 |         self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "ie")), 1)
594 |         self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "みこやん")), 144)
595 |         self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "めかの工場")), 138)
596 | 
597 |         self.KMP.close()
598 | 
599 |     def test_download_undefined_char(self):
600 |         """
601 |         Tests downloading a file whose name contains an 
602 |         undefined char
603 |         """
604 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=12, chunksz=None, ext_blacklist=None)
605 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/38223307/post/43447399")
606 |         self.assertEqual(self.getDirSz(os.path.join(self.tempdir, "HALkawa501/PSDChina Miku PSDClip Data by HALkawa501 from Patreon  Kemono")), 140413258)
607 |         self.KMP.close()
608 | 
609 |     def test_ext_blacklist(self):
610 |         """
611 |         Tests downloading files with a blacklist
612 |         """
613 |         # Non blacklisted
614 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['psd', 'rando', 'doesnotexists', 'chicken', 'bacon'])
615 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/881792/post/64901768")
616 |         self.KMP.close()
617 | 
618 |         # Some blacklisted
619 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['mp4'])
620 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/881792/post/66258950")
621 |         self.KMP.close()
622 | 
623 |         # All blacklisted
624 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['gif'])
625 |         self.KMP.routine(unpacked=0, url="https://kemono.party/patreon/user/881792/post/63450534")
626 |         self.KMP.close()
627 | 
628 |         self.assertEqual(self.getDirSz(self.tempdir + "diives/Melina SMALL VIDEO WITH AUDIO by diives from Patreon  Kemono"), 11884327)
629 |         self.assertEqual(self.getDirSz(self.tempdir + "diives/Nat The Bunny NDE by diives from Patreon  Kemono"), 1101)
630 |         self.assertEqual(self.getDirSz(self.tempdir + "diives/Rya by diives from Patreon  Kemono"), 4708568)
631 | 
632 |         shutil.rmtree(self.tempdir + "diives/Nat The Bunny NDE by diives from Patreon  Kemono")
633 | 
634 |         # All blacklisted with partial unpacking
635 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['gif'])
636 |         self.KMP.routine(unpacked=1, url="https://kemono.party/patreon/user/881792/post/63450534")
637 |         self.KMP.close()
638 |         self.assertEqual(os.stat(self.tempdir + "diives/Nat The Bunny NDE by diives from Patreon  Kemono - post__content.txt").st_size, 230)
639 |     
640 | 
641 |         # All blacklisted with unpacking
642 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['gif'])
643 |         self.KMP.routine(unpacked=2, url="https://kemono.party/patreon/user/881792/post/63450534")
644 |         self.KMP.close()
645 |         
646 |         self.assertEqual(self.getNumFiles(self.tempdir + "diives"), 2)
647 | 
648 |     def test_partial_unpacked(self):
649 |         """
650 |         Tests partially unpacked download mode
651 |         """
652 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=None)
653 |         
654 |         # With files
655 |         self.KMP.routine(unpacked=1, url="https://kemono.party/fanbox/user/49494721/post/3765544")
656 | 
657 |         self.assertEqual(self.getDirSz(self.tempdir + "soso\久岐忍 by soso from Pixiv Fanbox  Kemono"), 1899290)
658 | 
659 |         # No files
660 |         self.KMP.routine(unpacked=1, url="https://kemono.party/fanbox/user/49494721/post/2082281")
661 | 
662 |         self.assertTrue(os.path.exists(self.tempdir + "soso/リクエストボックス by soso from Pixiv Fanbox  Kemono - post__comments.txt"))
663 |         self.assertTrue(os.path.exists(self.tempdir + "soso/リクエストボックス by soso from Pixiv Fanbox  Kemono - post__content.txt"))
664 |         self.KMP.close()
665 |     
666 |     def test_partial_unpacked_blacklist(self):
667 |         """
668 |         Tests partially unpacking a work that is empty after applying a blacklist
669 |         """
670 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, ext_blacklist=['jpg'])
671 |         self.KMP.routine(unpacked=1, url="https://kemono.party/patreon/user/12281898/post/67498846")
672 |         self.assertTrue(os.path.exists(self.tempdir + "MANA/WIP by MANA from Patreon  Kemono - post__content.txt"))
673 |         self.assertFalse(os.path.exists(self.tempdir + "MANA/WIP by MANA from Patreon  Kemono"))
674 |         
675 |         self.KMP.close()
676 |             
677 |     def test_exclude_posts(self):
678 |         """
679 |         Tests excluding posts
680 |         """
681 |         # https://kemono.party/fanbox/user/3316400/post/488806
682 |         # No exclusions
683 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, post_name_exclusion=[])
684 |         self.KMP.routine("https://kemono.party/fanbox/user/3316400/post/532363", unpacked=None)
685 |         self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "MだSたろう\\BRSその２-高画質版2枚 by MだSたろう from Pixiv Fanbox  Kemono")), 4)
686 |         self.KMP.close()
687 |         
688 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, post_name_exclusion=["Nothing"])
689 |         self.KMP.routine("https://kemono.party/fanbox/user/3316400/post/490300", unpacked=None)
690 |         self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "MだSたろう\\限定褐色 by MだSたろう from Pixiv Fanbox  Kemono")), 4)        
691 |         self.KMP.close()
692 |         
693 |         # Exclusions
694 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, post_name_exclusion=["August"])
695 |         self.KMP.routine("https://kemono.party/gumroad/user/trylsc", unpacked=None)
696 |         self.assertEqual(self.getNumFiles(os.path.join(self.tempdir, "TRYLSC")), 0)        
697 |         self.KMP.close()
698 |         
699 | 
700 |     def test_exclude_link(self):
701 |         """
702 |         Tests excluding links
703 |         """
704 |         # No exclusions
705 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, link_name_exclusion=[])
706 |         self.KMP.routine("https://kemono.party/gumroad/user/2986488497406/post/bMhu", unpacked=None)
707 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "burningtides\Phuture Noize - A New Day Remake  FLP  Presets by burningtides from Gumroad  Kemono\Phuture-Noize---A-New-Day-Remake-.zip")))
708 |         self.KMP.close()
709 |         
710 |         # Some exclusions
711 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, link_name_exclusion=["19","18"])
712 |         self.KMP.routine("https://kemono.party/gumroad/user/5646205703539/post/xIMAi", unpacked=None)
713 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad  Kemono\speedsculpt_2_80_v_0_1_19.zip")))
714 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad  Kemono\speedsculpt_2_80_v_0_1_17.zip")))
715 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad  Kemono\speedsculpt_2_80_v_0_1_18.zip")))
716 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad  Kemono\speedsculpt_2_83_v_0_1_20.zip")))
717 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad  Kemono\speedsculpt_2_9_v_0_1_22.zip")))
718 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "Pitiwazou - Cédric Lepiller\SPEEDSCULPT by Pitiwazou - Cédric Lepiller from Gumroad  Kemono\speedsculpt_2_79_v_0_1_9.zip")))
719 |         self.KMP.close()
720 |         
721 |         # All excluded
722 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, link_name_exclusion=["sfm"])
723 |         self.KMP.routine("https://kemono.party/gumroad/user/6791944931428/post/nYFnj", unpacked=None)
724 |         self.assertFalse(os.path.exists(os.path.join(self.tempdir, "Bluejuicyjuice\\18 Nidoqueen SFM model by Bluejuicyjuice from Gumroad  Kemono\\NidoSFM.7z")))
725 |         self.KMP.close()     
726 |         
727 |     def test_server_name(self):
728 |         """
729 |         Tests downloading of server name
730 |         """
731 |         # Mp4 and images
732 |         self.KMP = KMP(self.tempdir, unzip=False, tcount=3, chunksz=None, download_server_name_type=True)
733 |         self.KMP.routine("https://kemono.party/fanbox/user/49494721/post/4072005", unpacked=None)
734 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "soso\胡桃Live2Dアニメ by soso from Pixiv Fanbox  Kemono\\d1c15668-08e1-4bea-a1bc-a55d25e59bc3.jpg")))
735 |         self.assertTrue(os.path.exists(os.path.join(self.tempdir, "soso\胡桃Live2Dアニメ by soso from Pixiv Fanbox  Kemono\\胡桃_Live2D.mp4")))
736 |         self.KMP.close()     
737 |     
738 |     def test_password_zip(self):
739 |         """
740 |         Tests download and extraction of a password 
741 |         protected zip
742 |         """
743 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None, download_server_name_type=True)
744 |         self.KMP.routine("https://kemono.party/fanbox/user/262147/post/4062214", unpacked=2)
745 |         self.assertFalse(os.path.exists(os.path.abspath("./") + "クレー計20枚パスワード付zip"))
746 |         self.KMP.close()
747 |     def test_alt_routine(self):
748 |         """
749 |         Tests alternate download for all download types
750 |         """
751 |         self.KMP = KMP(self.tempdir, unzip=True, tcount=3, chunksz=None)
752 |         # All Artist Page
753 |         self.KMP.alt_routine("https://kemono.party/fanbox/user/836862", unpacked=2)
754 |         self.assertEquals(self.getNumFiles(os.path.join(self.tempdir, "tsumikisata")), 544)        
755 |         # Single Artist Page
756 |         self.KMP.alt_routine("https://kemono.party/patreon/user/19467060?o=25", unpacked=2)
757 |         self.assertEquals(self.getNumFiles(os.path.join(self.tempdir, "katecavanaugh")), 86)        
758 |        
759 |         # Single Artist Work
760 |         self.KMP.alt_routine("https://kemono.party/patreon/user/169359/post/27626311", unpacked=2)
761 |         self.assertEquals(self.getNumFiles(os.path.join(self.tempdir, "seductionrl")), 8)        
762 |         
763 |         # Discord Channel
764 |         # TODO
765 |         # Text file
766 |         # TODO
767 |     def getDirSz(self, dir: str) -> int:
768 |         """
769 |         Returns directory and its content size
770 | 
771 |         Return directory and its content size
772 |         """
773 |         size = 0
774 |         for dirpath, dirname, filenames in os.walk(dir):
775 |             for f in filenames:
776 |                 fp = os.path.join(dirpath, f)
777 |                 # skip if it is symbolic link
778 |                 if not os.path.islink(fp):
779 |                     size += os.path.getsize(fp)
780 |         return size
781 | 
782 |     
783 |     def getNumFiles(self, dir:str) -> int:
784 |         """
785 |         Returns the number of files in a directory
786 | 
787 |         Return number of files in a directory
788 |         """
789 |         return len([name for name in os.listdir(dir) if os.path.isfile(os.path.join(dir, name))])
790 | 
791 | 
792 | 
793 | if __name__ == '__main__':
794 |     unittest.main()
795 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 jeff chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/LockingCounter.py:
--------------------------------------------------------------------------------
 1 | from threading import Lock, Condition
 2 | class LockingCounter():
 3 |     """
 4 |     Represents a thread safe counter with multiple operations
 5 |     """
 6 |     __value:int             # Current lock's value
 7 |     __mutex:Lock            # Lock used for counter itself
 8 |     __cond_list:list        # List of conditionals
 9 |     
10 |     def __init__(self, starting:int=0) -> None:
11 |         """
12 |         Initializes the counter with starting value and initializes any other
13 |         required variables.
14 |         
15 |         starting: starting value of the counter
16 |         """
17 |         self.__value = starting
18 |         self.__mutex = Lock()
19 |         self.__cond_list = list()
20 |         
21 |     def toggle(self) -> int:
22 |         """
23 |         Increments counter by 1
24 |         
25 |         Returns: counter value immediately after toggle
26 |         """
27 |         self.__mutex.acquire()
28 |         self.__value += 1
29 |         saved = self.__value        
30 |         self.__mutex.release()
31 |         
32 |         # Notify waiting threads 
33 |         for cond in self.__cond_list:
34 |             cond.acquire()
35 |             cond.notify()
36 |             cond.release()
37 |         return saved
38 |     
39 |     def wait_until(self, target:int) -> None:
40 |         """
41 |         Block until target is <= counter
42 | 
43 |         Args:
44 |             target (int): value to block for
45 |         """
46 |         # Get current value and see if target has already been met
47 |         curr = self.get()
48 |         
49 |         if curr >= target:
50 |             return
51 |         
52 |         # If not met, block until is met
53 |         cond = Condition()
54 |         self.__cond_list.append(cond)
55 |         cond.acquire()
56 |         cond.wait_for(predicate= lambda:self.__compare(target, self.get()))
57 |         cond.release()
58 |         self.__cond_list.remove(cond)
59 |         return
60 |     
61 |     def __compare(self, i1:int, i2:int) -> bool:
62 |         """
63 |         Performs i1 <= i2 and returns the result
64 | 
65 |         Args:
66 |             i1 (int): int 1 
67 |             i2 (int): int 2
68 | 
69 |         Returns:
70 |             bool: i1 <= i2 is returned
71 |         """
72 |         return i1 <= i2
73 |     
74 |     def get(self) -> int:
75 |         """
76 |         Returns counter value
77 | 
78 |         Returns:
79 |             int: counter value
80 |         """
81 |         self.__mutex.acquire()
82 |         saved = self.__value
83 |         self.__mutex.release()
84 |         return saved
85 |     
86 |     def set(self, target:int) -> None:
87 |         """
88 |         Set counter to target value
89 |         
90 |         target (int): value to set counter to 
91 |         """
92 |         self.__mutex.acquire()
93 |         self.__value = target
94 |         self.__mutex.release()


--------------------------------------------------------------------------------
/LockingCounter_test.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import unittest
 3 | from LockingCounter import LockingCounter
 4 | import threading
 5 | 
 6 | class LockingCounterTestCase(unittest.TestCase):
 7 |     def setUp(self) -> None:
 8 |         """
 9 |         Create counter starting at 0
10 |         """
11 |         self.counter = LockingCounter()
12 |         
13 |     def test_wait_for(self) -> None:
14 |         """
15 |         Tests a single wait_for cycle
16 |         """
17 |         # Generate a thread
18 |         t1 = threading.Thread(target=self.wait_thread, args=(5,))
19 |         t1.start()
20 |         # Keep incrementing until target is met
21 |         self.counter.toggle()
22 |         self.assertTrue(t1.is_alive())
23 |         self.counter.toggle()
24 |         self.assertTrue(t1.is_alive())
25 |         self.counter.toggle()
26 |         self.assertTrue(t1.is_alive())
27 |         self.counter.toggle()
28 |         self.assertTrue(t1.is_alive())
29 |         self.counter.toggle()
30 |         time.sleep(0.1)
31 |         self.assertFalse(t1.is_alive())
32 | 
33 |         # Generate another thread when target is met
34 |         t1 = threading.Thread(target=self.wait_thread, args=(5,))
35 |         self.assertFalse(t1.is_alive())
36 |         t1 = threading.Thread(target=self.wait_thread, args=(0,))
37 |         self.assertFalse(t1.is_alive())
38 |     def wait_thread(self, target:int)->None:
39 |         """
40 |         Wait until target is met, to be used with threading
41 | 
42 |         Args:
43 |             target (int): target to wait for
44 |         """
45 |         self.counter.wait_until(target)
46 | 
47 | if __name__ == '__main__':
48 |     unittest.main()


--------------------------------------------------------------------------------
/PersistentCounter.py:
--------------------------------------------------------------------------------
 1 | class PersistentCounter():
 2 |     """
 3 |     A counter but an object instead of an int
 4 |     """
 5 |     __value:int             # Current lock's value
 6 |     
 7 |     def __init__(self, starting:int=0) -> None:
 8 |         """
 9 |         Initializes the counter with starting value
10 |         
11 |         starting: starting value of the counter
12 |         """
13 |         self.__value = starting
14 |     
15 |     def toggle(self) -> int:
16 |         """
17 |         Increments counter by 1
18 |         
19 |         Returns: counter value immediately after toggle
20 |         """
21 |       
22 |         self.__value += 1
23 |         return self.__value
24 |     
25 |     def get(self) -> int:
26 |         """
27 |         Returns the counter's value
28 | 
29 |         Returns:
30 |             int: counter value
31 |         """
32 |         return self.__value
33 | 
34 |     def set(self, target:int) -> None:
35 |         """
36 |         Sets the counter's value
37 | 
38 |         Args:
39 |             target (int): counter's new value
40 |         """
41 |         self.__value = target


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # KMPDownloader
 2 | Simple Kemono.party downloader with QOL features.
 3 | 
 4 | Functionality not guaranteed until 1.0, There are known bugs!
 5 | Can download everything from Files, save text and links in Content, and everything in Downloads. Can be set to automatically unzip files if they contain no password.
 6 | 
 7 | ![Screenshot 2022-05-17 114434 PNG](https://user-images.githubusercontent.com/78765964/168853513-b5b14b98-430f-4437-b63b-08ea93ddf014.jpg)
 8 | 
 9 | ## Current Features
10 | View changelog for more details on features not included here.
11 | - All services supported (Patreon, Pixiv Fanbox, Gumroad, SubscribeStar, DLSite, Fantia, Discord).
12 | - Can download a single artist work, all artist works, or a single page of artist works.
13 | - Download all files and any downloads in high resolution and correct extension.
14 | - Automatic file unzipping for .7z, .zip, and .rar files. 
15 | - Extraction of a work's content and comments.
16 | - High degree of control over downloads. Includes blacklisting file extensions, posts with certain keywords, omittion of certain download items, and much more!
17 | - Queuing system, download multiple URLs without user input
18 | - Multhreading support, significant download speed bonus.
19 | - Ease of use, cookies are for eating only!  
20 | - Automatically artist work updates.
21 | 
22 | 
23 | ## Instructions:
24 | **Need in depth details or want to view all features? Please visit the [wiki](https://github.com/Jeffchen54/KMP-Kemono-Downloader/wiki)!**
25 | 
26 | Download Python >=3.10
27 | 
28 | - Run install_requirements.bat.
29 | - Install 7z and add it to your Window's Path. Line should be in the format "C:\Users\chenj\Downloads\7-Zip"
30 | - Run in your favorite command line software. Call "venv/Scripts/Activate" before running the program.
31 | - Read the command line arguments for instructions on how to run.
32 | - Enjoy!
33 | 


--------------------------------------------------------------------------------
/Threadpool.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import threading
  3 | import queue
  4 | """
  5 | Simple task sharing threadpool. Handles fully generic tasks of any kind.
  6 | No way to get return values so tasks that returns None are the only functions
  7 | supported!
  8 | 
  9 | Author: Jeff Chen
 10 | Last modified: 5/23/2022
 11 | """
 12 | tname = threading.local()   # TLV for thread name
 13 | 
 14 | class Kill_Queue():
 15 |     """
 16 |     Queue with a built in kill switch with sem == # of available items,
 17 |     to be used in multithreading
 18 |     """
 19 |     __queue:queue.Queue 
 20 |     __kill:bool     # Kill switch for downThreads
 21 |     __tasks:any     # Avalible downloadable resource device
 22 | 
 23 |     def __init__(self) -> None:
 24 |         """
 25 |         Create queue and set kill to false
 26 |         """
 27 |         self.__queue = queue.Queue(-1)
 28 |         self.__kill = False
 29 |         self.__tasks = threading.Semaphore(0)
 30 |     
 31 |     def kill(self) -> None:
 32 |         """
 33 |         Turns kill switch on
 34 |         """
 35 |         self.__kill = True
 36 |     
 37 |     def revive(self) -> None:
 38 |         """
 39 |         Turn kill switch off
 40 |         """
 41 |         self.__kill = False
 42 |     
 43 |     def status(self) -> bool:
 44 |         """
 45 |         Reports if the queue is dead or alive
 46 | 
 47 |         Return: True if dead, False if alive
 48 |         """
 49 |         return self.__kill
 50 |     
 51 |     def enqueue(self, task:any) -> None:
 52 |         """
 53 |         Put an item in the queue
 54 |         """
 55 |         self.__queue.put(task)
 56 |         self.__tasks.release()
 57 |     
 58 |     def acquire_resource(self) -> None:
 59 |         """
 60 |         Decrement semaphore keeping track of queue items
 61 |         """
 62 |         self.__tasks.acquire()
 63 | 
 64 |     def release_resource(self) -> None:
 65 |         """
 66 |         Increment semaphore keeping track of queue items.
 67 |         Does not need to be called after enqueue as it 
 68 |         increments the semaphore automatically
 69 |         """
 70 |         self.__tasks.release()
 71 | 
 72 |     def dequeue(self) -> any:
 73 |         """
 74 |         Removes an item
 75 | 
 76 |         Pre: acquire_resource was called first
 77 |         Return item in front of the queue
 78 |         """
 79 |         return self.__queue.get()
 80 |     
 81 |     def task_done(self) -> None:
 82 |         """
 83 |         Indicates queue task was completed
 84 | 
 85 |         Pre: dequeue was called, thread task was completed
 86 |         """
 87 |         self.__queue.task_done()
 88 |     
 89 |     def join_queue(self) -> None:
 90 |         """
 91 |         Blocks until all task queue items have been processed
 92 |         """
 93 |         self.__queue.join()
 94 | 
 95 |     def get_qsize(self) -> int:
 96 |         """
 97 |         Get queue size (unreliable)
 98 | 
 99 |         Return: queue size
100 |         """
101 |         return self.__queue.qsize()
102 | 
103 | 
104 | 
105 | class ThreadPool():
106 |     """
107 |     Very basic task sharing threadpool, does not support futures.
108 |     Thread local variables:
109 |         tname.name: Thread name
110 |         tname.id: thread id
111 | 
112 |     """
113 |     # Download task queue, Contains tuples in the structure: (func(),(args1,args2,...))
114 |     __task_queue:Kill_Queue
115 |     __threads:list  # List of threads in the threadpool
116 |     __tcount:int    # Number of threads
117 |     __alive:bool    # Checks if the threadpool is alive
118 | 
119 |     def __init__(self, tcount:int) -> None:
120 |         """
121 |         Initializes a threadpool
122 | 
123 |         Param:
124 |             tcount: Number of threads for the threadpool
125 |         """
126 |         self.__task_queue = Kill_Queue()
127 |         self.__tcount = tcount
128 |         self.__alive = False
129 |     
130 |     def start_threads(self) -> None:
131 |         """
132 |         Creates count number of downThreads and starts it
133 | 
134 |         Param:
135 |             count: how many threads to create
136 |         Return: Threads
137 |         """
138 |         self.__threads = []
139 |         # Spawn threads
140 |         for i in range(0, self.__tcount):
141 |             self.__threads.append(ThreadPool.TaskThread(i, self.__task_queue))
142 |             self.__threads[i].start()
143 |         self.__alive = True
144 |         logging.debug(str(self.__tcount) + " threads have been started")
145 |     
146 |     def kill_threads(self) -> None:
147 |         """
148 |         Kills all threads in threadpool. Threads are restarted and killed using a
149 |         switch, deadlocked or infinitely running threads cannot be killed using
150 |         this function.
151 |         """
152 |         self.__task_queue.kill()
153 | 
154 |         for i in range(0, len(self.__threads)):
155 |             self.__task_queue.release_resource()
156 | 
157 |         for i in self.__threads:
158 |             i.join()
159 |         self.__alive = False
160 |         self.__task_queue.revive()
161 |         logging.debug(str(len(self.__threads)) + " threads have been terminated")
162 | 
163 |     def enqueue(self, task:tuple) -> None:
164 |         """
165 |         Put an item in task queue
166 | 
167 |         Param:
168 |             task: tuple in the structure (func(),(args1,args2,...))
169 |         """
170 |         logging.debug("Enqueued into task queue: " + str(task))
171 |         self.__task_queue.enqueue(task)
172 |         
173 |     def enqueue_queue(self, task_list:queue.Queue) -> None:
174 |         """
175 |         Put an queue in task queue. Each queue element will be 'get()' and then
176 |         task_done()
177 | 
178 |         Param:
179 |             task_list: queue of task tuples following the structure (func(),(args1,args2,...))
180 |         """
181 |         logging.debug("Enqueued into task queue: " + str(task_list))
182 |         size = task_list.qsize()
183 |         for i in range(0,size):
184 |             self.__task_queue.enqueue(task_list)
185 |     
186 |     def join_queue(self) -> None:
187 |         """
188 |         Blocks until all task queue items have been processed
189 |         """
190 |         logging.debug("Blocking until all tasks are complete")
191 |         self.__task_queue.join_queue()
192 | 
193 |     def get_qsize(self) -> int:
194 |         """
195 |         Get queue size (unreliable)
196 | 
197 |         Return: task queue size
198 |         """
199 |         return self.__task_queue.get_qsize()
200 | 
201 |     def get_status(self) -> bool:
202 |         """
203 |         Check if the threadpool is alive
204 | 
205 |         Return: True if alive, false if not
206 |         """
207 |         return self.__alive
208 |     class TaskThread(threading.Thread):
209 |         """
210 |         Fully generic threadpool where tasks of any kind is stored and retrieved in task_queue,
211 |         threads are daemon threads and can be killed using kill variable. 
212 |         """
213 |         __id: int
214 |         __task_queue:Kill_Queue
215 | 
216 |         def __init__(self, id: int, task_queue:Kill_Queue) -> None:
217 |             """
218 |             Initializes thread with a thread name
219 |             Param: 
220 |             id: thread identifier
221 |             task_queue: Queue to get tasks from
222 |             tasks: Semaphore assoaciated with task queue
223 |             """
224 |             self.__id = id
225 |             self.__task_queue = task_queue
226 |             super(ThreadPool.TaskThread, self).__init__(daemon=True)
227 | 
228 |         def run(self) -> None:
229 |             """
230 |             Worker thread job. Blocks until a task is avalable via downloadables
231 |             and retreives the task from download_queue
232 |             """
233 |             tname.name = "Thread #" + str(self.__id)
234 |             tname.id = self.__id
235 |             while True:
236 |                 # Wait until download is available
237 |                 self.__task_queue.acquire_resource()
238 | 
239 |                 # Check kill signal
240 |                 if self.__task_queue.status():
241 |                     logging.debug(tname.name + " has terminated")
242 |                     return
243 | 
244 |                 # Pop queue and download it
245 |                 todo = self.__task_queue.dequeue()
246 |                 
247 |                 # If dequeued element is a queue, we process it like its our queue
248 |                 if type(todo) is queue.Queue:
249 |                     monitored_todo = todo.get()
250 |                     logging.debug(tname.name + " (From SubQueue) Processing: " + str(monitored_todo))
251 |                     monitored_todo[0](*monitored_todo[1])
252 |                     todo.task_done()
253 |                 # Else, process the task directly
254 |                 else:
255 |                     logging.debug(tname.name + " Processing: " + str(todo))
256 |                     todo[0](*todo[1])
257 |                 self.__task_queue.task_done()


--------------------------------------------------------------------------------
/example.txt:
--------------------------------------------------------------------------------
1 | https://kemono.party/patreon/user/572297/post/64470250
2 | 
3 | https://kemono.party/patreon/user/572297?o=50


--------------------------------------------------------------------------------
/install_requirements.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | 
3 | python -m venv venv && ^
4 | venv\Scripts\activate && ^
5 | pip install -r requirements.txt && ^
6 | move /Y "patch for patoolib\*" venv\Lib\site-packages\patoolib\programs


--------------------------------------------------------------------------------
/jutils.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | 
 4 | """
 5 | Misc helpful utils, mainly related to File IO
 6 | 
 7 | @author Jeff Chen
 8 | @version 6/15/2022
 9 | """
10 | 
11 | def write_utf8(text:str, path:str, mode:str) -> None:
12 |     """
13 |     Writes utf-8 text to a file at path
14 | 
15 |     Param:
16 |         text: text to write
17 |         path: where file to write to is located including file name
18 |         mode: mode to set FIle IO
19 |     """
20 |     with io.open(path, mode=mode,  encoding='utf-8') as fd:
21 |         fd.write(text)
22 | 
23 | 
24 | def write_to_file(path:str, line: str, mutex) -> None:
25 |     """
26 |     Appends to a file, creates the file if it does not exists
27 | 
28 |     Param:
29 |         path: file to write to, absolute path 
30 |         line: line to append to file
31 |         mutex: (Optional) mutex lock associated with the file
32 |     """
33 |     if mutex:
34 |         mutex.acquire()
35 | 
36 |     write_utf8(line, path, 'a')
37 |     #if not os.path.exists(path):
38 |     #    open(path, 'a').close()
39 | 
40 |     #with open(fname, "a") as myfile:
41 |     #    myfile.write(line)
42 | 
43 | 
44 |     if mutex:
45 |         mutex.release()
46 | 
47 | 
48 | def getDirSz(dir: str) -> int:
49 |     """
50 |     Returns directory and its content size
51 | 
52 |     Return directory and its content size
53 |     """
54 |     size = 0
55 |     for dirpath, dirname, filenames in os.walk(dir):
56 |         for f in filenames:
57 |             fp = os.path.join(dirpath, f)
58 |             # skip if it is symbolic link
59 |             if not os.path.islink(fp):
60 |                 size += os.path.getsize(fp)
61 |     return size


--------------------------------------------------------------------------------
/patch for patoolib/p7rzip.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (C) 2016 Bastian Kleineidam
 3 | #
 4 | # This program is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | """Archive commands for the 7zr program.
17 | 
18 | 7zr is a light executable supporting only the 7z archive format.
19 | """
20 | 
21 | from .p7zip import create_7z
22 | 
23 | def extract_7z(archive, compression, cmd, verbosity, interactive, outdir):
24 |     """Extract a 7z archive."""
25 |     cmdlist = [cmd, 'x']
26 |     if not interactive:
27 |         cmdlist.append('-y')
28 |     cmdlist.extend(['-o%s' % outdir, '--', archive])
29 |     return cmdlist
30 | 
31 | def list_7z(archive, compression, cmd, verbosity, interactive):
32 |     """List a 7z archive."""
33 |     cmdlist = [cmd, 'l']
34 |     if not interactive:
35 |         cmdlist.append('-y')
36 |     cmdlist.extend(['--', archive])
37 |     return cmdlist
38 | 
39 | def test_7z(archive, compression, cmd, verbosity, interactive):
40 |     """Test a 7z archive."""
41 |     cmdlist = [cmd, 't']
42 |     if not interactive:
43 |         cmdlist.append('-y')
44 |     cmdlist.extend(['--', archive])
45 |     return cmdlist


--------------------------------------------------------------------------------
/patch for patoolib/p7zip.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (C) 2010-2015 Bastian Kleineidam
  3 | #
  4 | # This program is free software: you can redistribute it and/or modify
  5 | # it under the terms of the GNU General Public License as published by
  6 | # the Free Software Foundation, either version 3 of the License, or
  7 | # (at your option) any later version.
  8 | #
  9 | # This program is distributed in the hope that it will be useful,
 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 12 | # GNU General Public License for more details.
 13 | #
 14 | # You should have received a copy of the GNU General Public License
 15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 16 | """Archive commands for the 7z program."""
 17 | 
 18 | def extract_7z(archive, compression, cmd, verbosity, interactive, outdir):
 19 |     """Extract a 7z archive."""
 20 |     cmdlist = [cmd, 'x']
 21 |     if not interactive:
 22 |         cmdlist.extend(['-p-', '-y'])
 23 |     cmdlist.extend(['-o%s' % outdir, '--', archive])
 24 |     return cmdlist
 25 | 
 26 | def extract_7z_singlefile(archive, compression, cmd, verbosity, interactive, outdir):
 27 |     """Extract a singlefile archive (eg. gzip or bzip2) with '7z e'.
 28 |     This makes sure a single file and no subdirectories are created,
 29 |     which would cause errors with patool repack."""
 30 |     cmdlist = [cmd, 'e']
 31 |     if not interactive:
 32 |         cmdlist.extend(['-p-', '-y'])
 33 |     cmdlist.extend(['-o%s' % outdir, '--', archive])
 34 |     return cmdlist
 35 | 
 36 | extract_bzip2 = \
 37 |   extract_gzip = \
 38 |   extract_compress = \
 39 |   extract_xz = \
 40 |   extract_lzma = \
 41 |   extract_7z_singlefile
 42 | 
 43 | extract_zip = \
 44 |   extract_rar = \
 45 |   extract_cab = \
 46 |   extract_arj = \
 47 |   extract_cpio = \
 48 |   extract_rpm = \
 49 |   extract_deb = \
 50 |   extract_iso = \
 51 |   extract_vhd = \
 52 |   extract_7z
 53 | 
 54 | def list_7z (archive, compression, cmd, verbosity, interactive):
 55 |     """List a 7z archive."""
 56 |     cmdlist = [cmd, 'l']
 57 |     if not interactive:
 58 |         cmdlist.extend(['-p-', '-y'])
 59 |     cmdlist.extend(['--', archive])
 60 |     return cmdlist
 61 | 
 62 | list_bzip2 = \
 63 |   list_gzip = \
 64 |   list_zip = \
 65 |   list_compress = \
 66 |   list_rar = \
 67 |   list_cab = \
 68 |   list_arj = \
 69 |   list_cpio = \
 70 |   list_rpm = \
 71 |   list_deb = \
 72 |   list_iso = \
 73 |   list_xz = \
 74 |   list_lzma = \
 75 |   list_vhd = \
 76 |   list_7z
 77 | 
 78 | 
 79 | def test_7z (archive, compression, cmd, verbosity, interactive):
 80 |     """Test a 7z archive."""
 81 |     cmdlist = [cmd, 't']
 82 |     if not interactive:
 83 |         cmdlist.extend(['-p-', '-y'])
 84 |     cmdlist.extend(['--', archive])
 85 |     return cmdlist
 86 | 
 87 | test_bzip2 = \
 88 |   test_gzip = \
 89 |   test_zip = \
 90 |   test_compress = \
 91 |   test_rar = \
 92 |   test_cab = \
 93 |   test_arj = \
 94 |   test_cpio = \
 95 |   test_rpm = \
 96 |   test_deb = \
 97 |   test_iso = \
 98 |   test_xz = \
 99 |   test_lzma = \
100 |   test_vhd = \
101 |   test_7z
102 | 
103 | 
104 | def create_7z(archive, compression, cmd, verbosity, interactive, filenames):
105 |     """Create a 7z archive."""
106 |     cmdlist = [cmd, 'a']
107 |     if not interactive:
108 |         cmdlist.append('-y')
109 |     cmdlist.extend(['-t7z', '-mx=9', '--', archive])
110 |     cmdlist.extend(filenames)
111 |     return cmdlist
112 | 
113 | 
114 | def create_zip(archive, compression, cmd, verbosity, interactive, filenames):
115 |     """Create a ZIP archive."""
116 |     cmdlist = [cmd, 'a']
117 |     if not interactive:
118 |         cmdlist.append('-y')
119 |     cmdlist.extend(['-tzip', '-mx=9', '--', archive])
120 |     cmdlist.extend(filenames)
121 |     return cmdlist
122 | 
123 | 
124 | def create_xz(archive, compression, cmd, verbosity, interactive, filenames):
125 |     """Create an XZ archive."""
126 |     cmdlist = [cmd, 'a']
127 |     if not interactive:
128 |         cmdlist.append('-y')
129 |     cmdlist.extend(['-txz', '-mx=9', '--', archive])
130 |     cmdlist.extend(filenames)
131 |     return cmdlist
132 | 
133 | 
134 | def create_gzip(archive, compression, cmd, verbosity, interactive, filenames):
135 |     """Create a GZIP archive."""
136 |     cmdlist = [cmd, 'a']
137 |     if not interactive:
138 |         cmdlist.append('-y')
139 |     cmdlist.extend(['-tgzip', '-mx=9', '--', archive])
140 |     cmdlist.extend(filenames)
141 |     return cmdlist
142 | 
143 | 
144 | def create_bzip2(archive, compression, cmd, verbosity, interactive, filenames):
145 |     """Create a BZIP2 archive."""
146 |     cmdlist = [cmd, 'a']
147 |     if not interactive:
148 |         cmdlist.append('-y')
149 |     cmdlist.extend(['-tbzip2', '-mx=9', '--', archive])
150 |     cmdlist.extend(filenames)
151 |     return cmdlist


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jeffchen54/KMP-Kemono-Downloader/79bf1945f34a4243d4dbcd4a68562acb74ea34d2/requirements.txt


--------------------------------------------------------------------------------
/user-agent.txt:
--------------------------------------------------------------------------------
1 | Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/117.0


--------------------------------------------------------------------------------
/zipextracter.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import shutil
  4 | import tempfile
  5 | import patoolib
  6 | from patoolib import util
  7 | import sys
  8 | 
  9 | import jutils
 10 | """
 11 | Extracts files using patoolib
 12 | 
 13 | @author Jeff chen
 14 | @version 6/15/2022
 15 | """
 16 | 
 17 | def supported_zip_type(fname:str) -> bool:
 18 |         """
 19 |         Checks if a file is a zip file (7z, zip, rar)
 20 | 
 21 |         Param:
 22 |             fname: zip file name or path
 23 |         Return True if zip file, false if not
 24 |         """
 25 |         file = fname.rpartition('/')[2]
 26 |         extension = file.rpartition('.')[2]
 27 |         return 'zip' == extension or 'rar' == extension or '7z' == extension
 28 |         
 29 | def extract_zip(zippath: str, destpath: str, temp:bool) -> bool:
 30 |         """
 31 |         Extracts a zip file to a destination. Does nothing if file
 32 |         is password protected. Zipfile is deleted if extraction is 
 33 |         successful
 34 |         
 35 |         Cases: 
 36 |             (1) Unzip directory does not exists ->  Extract to directory
 37 |             (2) Unzip directory exists but size does not match extracted dir -> Extracted dir prepended with (#)
 38 |             (3) Unzip directory exists and size matches -> Counted as duplicate and skips unzip directory
 39 |             (4) Extracted files conflict with existing files -> Same as case 2
 40 |         # Unzip directory refers to directories within a zip folder, for example
 41 |         test.zip
 42 |             -> animals
 43 |             -> dog1.jpg
 44 |             -> cat1.png
 45 |             
 46 |             animals is a directory, this is an unzip directory
 47 | 
 48 |         Param:
 49 |         unzip: full path to zip file included zip file itself
 50 |         destpath: full path to destination
 51 |         temp: True to extract to a temp dir then moving the files to destpath, false to extract
 52 |             directly to destpath. TODO implement.
 53 |         Pre: Is a zip file, can be checked using supported_zip_type(). destpath exists
 54 |         Return: True on success, false on failure
 55 |         """
 56 |         destpath += '\\'
 57 |         backup_destpath = destpath
 58 |         
 59 |         # A tempdir is used to bypass Window's 255 char limit when unzipping files
 60 |         with tempfile.TemporaryDirectory(prefix="temp") as dirpath:
 61 |             dirpath += '\\'
 62 |             try:
 63 |                 patoolib.extract_archive(zippath, outdir=dirpath + '\\', verbosity=-1, interactive=False)
 64 | 
 65 |                 for f in os.listdir(dirpath):
 66 |                     if os.path.isdir(os.path.abspath(dirpath + f)):
 67 |                         # Duplicate file name handler
 68 |                         downloaded = False
 69 |                         while not downloaded:
 70 |                             try:
 71 |                                 shutil.copytree(os.path.abspath(dirpath + f), os.path.abspath(destpath + f), dirs_exist_ok=False)
 72 |                                 downloaded = True
 73 |                             except FileExistsError as e:
 74 |                                 # If duplicate dir is found, it will be stashed in the same dir but with (n) prepended 
 75 |                                 counter = 1
 76 |                                 nextName = e.filename
 77 |                                 currSz = jutils.getDirSz(os.path.abspath(dirpath + f.replace("\\", "")))
 78 |                                 # Check directory size of dirpath vs destpath, if same size, we are done
 79 |                                 done = False
 80 |                                 while(not done):
 81 |                                     # If the next directory does not exists, change destpath to that directory
 82 |                                     if not os.path.exists(nextName):
 83 |                                         destpath = nextName
 84 |                                         done = True
 85 |                                     else:
 86 |                                         # If directory with same size is found, we are done
 87 |                                         dirsize = jutils.getDirSz(nextName)
 88 |                                         if dirsize == currSz:
 89 |                                             done = True
 90 |                                             downloaded = True
 91 |                                     
 92 |                                     # Adjust path for next iteration
 93 |                                     if not done:
 94 |                                         nextName = destpath + '(' + str(counter) + ')'
 95 |                                         counter += 1
 96 | 
 97 |                                 # Move files from dupe directory to new directory
 98 |                         shutil.rmtree(os.path.abspath(dirpath + f), ignore_errors=True)
 99 |                         
100 |                     else:
101 |                         shutil.copy(os.path.abspath(dirpath + f), os.path.abspath(destpath + f))
102 |                         os.remove(os.path.abspath(dirpath + f))
103 |                     
104 |                     # Reset destpath as it may have been modified due to dupe files
105 |                     destpath = backup_destpath
106 | 
107 |                 os.remove(zippath)
108 |                 return True
109 |             except util.PatoolError as e:
110 |                 logging.critical("Unzipping a non zip file has occured or password protected file, failure is described below:" +
111 |                                 "\n + ""File name: " + zippath + "\n" + "File size: " + str(os.stat(zippath).st_size))
112 |                 logging.critical(e)
113 |                 d = os.listdir(destpath)
114 |                 if len(d) == 0:
115 |                     os.rmdir(destpath)
116 |             except RuntimeError:
117 |                 logging.debug("File name: " + zippath + "\n" +
118 |                             "File size: " + str(os.stat(zippath).st_size))
119 |             return False
120 | def main():
121 |     if supported_zip_type(sys.argv[1]):
122 |         extract_zip(os.path.abspath(sys.argv[1]), os.path.abspath("./testing") + '\\', True)
123 |     else:
124 |         print("Is not ZIP -> " + sys.argv[1])
125 | if __name__ == "__main__":
126 |     main()


--------------------------------------------------------------------------------