├── LICENSE
├── README.md
├── pyproject.toml
├── requirements.txt
└── src
└── h2hdb
├── __init__.py
├── __main__.py
├── compress_gallery_to_cbz.py
├── config_loader.py
├── h2hdb_h2hdb.py
├── h2hdb_spec.py
├── hash_dict.py
├── information.py
├── logger.py
├── mysql_connector.py
├── py.typed
├── settings.py
├── sql_connector.py
├── table_comments.py
├── table_database_setting.py
├── table_files_dbids.py
├── table_gids.py
├── table_removed_gids.py
├── table_tags.py
├── table_times.py
├── table_titles.py
├── table_uploadaccounts.py
├── threading_tools.py
└── view_ginfo.py
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # H2HDB
2 |
3 | ## Description
4 |
5 | The `H2HDB` is a comprehensive database for organising and managing H@H comic collections. It offers a streamlined way to catalogue your comics, providing key information such as GID (Gallery ID), title, tags and more, ensuring your collection is always organised and accessible.
6 |
7 | ---
8 |
9 | ## Features
10 |
11 | - [x] Add new galleries to the database
12 | - [x] Comporess H@H's galleries to a folder
13 | - [x] Record the removed GIDs in a separate list
14 | - [ ] Write document (need?)
15 |
16 | ---
17 |
18 | ## Installation and Usage
19 |
20 | 1. Install Python 3.13 or higher from [python.org](https://www.python.org/downloads/).
21 | 1. Install the required packages.
22 |
23 | ```bash
24 | pip install h2hdb
25 | ```
26 |
27 | 1. Run the script.
28 |
29 | ```bash
30 | python -m h2hdb --config [json-path]
31 | ```
32 |
33 | ### Config
34 |
35 | ```json
36 | {
37 | "h2h": {
38 | "download_path": "[str]", // The download path of H@H. The default is `download`.
39 | "cbz_path": "[str]", // The cbz in this path.
40 | "cbz_max_size": "[int]", // The maxinum of the mininum of width and height height. The default is `768`.
41 | "cbz_grouping": "[str]", // `flat`, `date-yyyy`, `date-yyyy-mm`, or `date-yyyy-mm-dd`. The default is `flat`.
42 | "cbz_sort": "[str]" // `upload_time`, `download_time`, `pages`, or `pages+[num]`. The default is `no`.
43 | },
44 | "database": {
45 | "sql_type": "[str]", // Now only supports `mysql`. The default is `mysql`.
46 | "host": "[str]", // The default is `localhost`.
47 | "port": "[int]", // The default is `3306`.
48 | "user": "[str]", // The default is `root`.
49 | "password": "[str]" // The default is `password`.
50 | },
51 | "logger": {
52 | "level": "[str]" // One of NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL.
53 | }
54 | }
55 | ```
56 |
57 | ---
58 |
59 | ## Q & A
60 |
61 | - Why are some images missing from the CBZ-files?
62 | `H2HDB` does not compress images that are considered spam according to certain rules. If you encounter any images that you believe should have been included, please report the issue.
63 |
64 | - Why are some images in some CBZ files and not in other CBZ-files?
65 | `H2HDB` learns the spam rule from the previous CBZ files. If you kill the CBZ files containing these images, the new CBZ files will not contain these images.
66 |
67 | ---
68 |
69 | ## Credits
70 |
71 | The project was created by [Kuan-Lun Wang](https://www.klwang.tw/home/).
72 |
73 | ---
74 |
75 | ## License
76 |
77 | This project is distributed under the terms of the GNU General Public Licence (GPL). For detailed licence terms, see the `LICENSE` file included in this distribution.
78 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "h2hdb"
7 | version = "0.9.1.9"
8 | description = "A simple H@H database"
9 | readme = "README.md"
10 | authors = [{ name = "Kuan-Lun Wang" }]
11 | license = { text = "GNU Affero General Public License v3" }
12 | dependencies = [
13 | "h2h-galleryinfo-parser>=0.2.2",
14 | "mysql-connector-python>=9.3.0,<10.0.0",
15 | "pillow>=11.2.1,<12.0.0",
16 | "pydantic>=2.11.4",
17 | ]
18 | classifiers = [
19 | "Development Status :: 3 - Alpha",
20 | "Intended Audience :: Developers",
21 | "Operating System :: OS Independent",
22 | "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
23 | "Programming Language :: Python :: 3.13",
24 | ]
25 |
26 | [project.urls]
27 | Homepage = "https://github.com/Kuan-Lun/h2hdb"
28 | Source = "https://github.com/Kuan-Lun/h2hdb"
29 | Tracker = "https://github.com/Kuan-Lun/h2hdb/issues"
30 |
31 | [tool.setuptools]
32 | packages = ["h2hdb"]
33 | package-dir = { h2hdb = "src/h2hdb" }
34 | package-data = { h2hdb = ["py.typed"] }
35 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools>=80.7.1
--------------------------------------------------------------------------------
/src/h2hdb/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | "H2HDB",
3 | "DatabaseConfig",
4 | "LoggerConfig",
5 | "H2HConfig",
6 | "H2HDBConfig",
7 | "load_config",
8 | "HentaiDBLogger",
9 | "setup_logger",
10 | ]
11 | __author__ = "Kuan-Lun Wang"
12 |
13 |
14 | from .h2hdb_h2hdb import H2HDB
15 | from .config_loader import (
16 | DatabaseConfig,
17 | LoggerConfig,
18 | H2HConfig,
19 | H2HDBConfig,
20 | load_config,
21 | )
22 | from .logger import HentaiDBLogger, setup_logger
23 |
--------------------------------------------------------------------------------
/src/h2hdb/__main__.py:
--------------------------------------------------------------------------------
1 | from h2hdb import H2HDB
2 | from .config_loader import load_config
3 |
4 | if __name__ == "__main__":
5 | config = load_config()
6 | with H2HDB(config=config) as connector:
7 | # Check the database character set and collation
8 | connector.check_database_character_set()
9 | connector.check_database_collation()
10 | # Create the main tables
11 | connector.create_main_tables()
12 |
13 | # Insert the H2H download
14 | connector.insert_h2h_download()
15 |
16 | connector.refresh_current_files_hashs()
17 |
--------------------------------------------------------------------------------
/src/h2hdb/compress_gallery_to_cbz.py:
--------------------------------------------------------------------------------
1 | __all__ = ["compress_images_and_create_cbz", "calculate_hash_of_file_in_cbz"]
2 |
3 | import hashlib
4 | import os
5 | import shutil
6 | import zipfile
7 |
8 | from PIL import Image, ImageFile # type: ignore
9 |
10 | Image.MAX_IMAGE_PIXELS = None
11 | ImageFile.LOAD_TRUNCATED_IMAGES = True
12 |
13 | from .settings import hash_function_by_file
14 |
15 | from .settings import FILE_NAME_LENGTH_LIMIT, COMPARISON_HASH_ALGORITHM
16 |
17 |
18 | def compress_image(image_path: str, output_path: str, max_size: int) -> None:
19 | """Compress an image, saving it to the output path."""
20 | with Image.open(image_path) as image:
21 | if image.mode in ("RGBA", "LA"):
22 | image = image.convert("RGBA")
23 | white_bg = Image.new("RGBA", image.size, (255, 255, 255, 255))
24 | image = Image.alpha_composite(white_bg, image)
25 | image = image.convert("RGB")
26 | if image.mode != "RGB":
27 | image = image.convert("RGB")
28 |
29 | if max_size >= 1:
30 | if image.height >= image.width:
31 | max_width = max_size
32 | scale = max_size / image.width
33 | max_height = int(image.height * scale)
34 | else:
35 | max_height = max_size
36 | scale = max_size / image.height
37 | max_width = int(image.width * scale)
38 |
39 | unsuitable_formats = ["GIF", "TIFF", "ICO"]
40 | image.thumbnail((max_width, max_height), resample=Image.Resampling.LANCZOS)
41 | if image.format in unsuitable_formats:
42 | image.save(output_path, image.format)
43 | else:
44 | if "xmp" in image.info:
45 | del image.info["xmp"]
46 | image.save(output_path, "JPEG")
47 |
48 |
49 | def create_cbz(directory, output_path) -> None:
50 | """Create a CBZ file from all images in a directory."""
51 | with zipfile.ZipFile(output_path, "w") as cbz:
52 | for filename in os.listdir(directory):
53 | cbz.write(os.path.join(directory, filename), filename)
54 |
55 |
56 | def hash_and_process_file(
57 | input_directory: str,
58 | tmp_cbz_directory: str,
59 | filename: str,
60 | exclude_hashs: list[bytes],
61 | max_size: int,
62 | ) -> None:
63 | file_hash = hash_function_by_file(
64 | os.path.join(input_directory, filename), COMPARISON_HASH_ALGORITHM
65 | )
66 | if file_hash not in exclude_hashs:
67 | if filename.lower().endswith((".jpg", ".jpeg", ".png", "bmp")):
68 | new_filename = os.path.splitext(filename)[0] + ".jpg"
69 | compress_image(
70 | os.path.join(input_directory, filename),
71 | os.path.join(tmp_cbz_directory, new_filename),
72 | max_size,
73 | )
74 | elif filename.lower().endswith(".gif"):
75 | compress_image(
76 | os.path.join(input_directory, filename),
77 | os.path.join(tmp_cbz_directory, filename),
78 | max_size,
79 | )
80 | else:
81 | shutil.copy(
82 | os.path.join(input_directory, filename),
83 | os.path.join(tmp_cbz_directory, filename),
84 | )
85 |
86 |
87 | # Compress images and create a CBZ file
88 | def compress_images_and_create_cbz(
89 | input_directory: str,
90 | output_directory: str,
91 | tmp_directory: str,
92 | max_size: int,
93 | exclude_hashs: list[bytes],
94 | ) -> None:
95 | if len(set([input_directory, output_directory, tmp_directory])) < 2:
96 | raise ValueError("Input and output directories cannot be the same.")
97 |
98 | # Create the output directory
99 | gallery_name = os.path.basename(input_directory)
100 | tmp_cbz_directory = os.path.join(tmp_directory, gallery_name)
101 | if os.path.exists(tmp_cbz_directory):
102 | shutil.rmtree(tmp_cbz_directory)
103 | os.makedirs(tmp_cbz_directory)
104 |
105 | for filename in os.listdir(input_directory):
106 | hash_and_process_file(
107 | input_directory, tmp_cbz_directory, filename, exclude_hashs, max_size
108 | )
109 |
110 | # Create the CBZ file
111 | os.makedirs(output_directory, exist_ok=True)
112 | cbzfile = os.path.join(
113 | output_directory, gallery_name_to_cbz_file_name(gallery_name)
114 | )
115 | create_cbz(tmp_cbz_directory, cbzfile)
116 | shutil.rmtree(tmp_cbz_directory)
117 |
118 |
119 | def gallery_name_to_cbz_file_name(gallery_name: str) -> str:
120 | """Convert a gallery name to a CBZ file name."""
121 | while (len(gallery_name.encode("utf-8")) + 4) > FILE_NAME_LENGTH_LIMIT:
122 | gallery_name = gallery_name[1:]
123 | return gallery_name + ".cbz"
124 |
125 |
126 | def calculate_hash_of_file_in_cbz(
127 | cbz_path: str, file_name: str, algorithm: str
128 | ) -> bytes:
129 | if zipfile.is_zipfile(cbz_path):
130 | with zipfile.ZipFile(cbz_path, "r") as myzip:
131 | with myzip.open(file_name) as myfile:
132 | file_content = myfile.read()
133 | hash_object = hashlib.new(algorithm)
134 | hash_object.update(file_content)
135 | hash_of_file = hash_object.digest()
136 | else:
137 | hash_of_file = bytes(0)
138 | return hash_of_file
139 |
--------------------------------------------------------------------------------
/src/h2hdb/config_loader.py:
--------------------------------------------------------------------------------
1 | __all__ = ["DatabaseConfig", "LoggerConfig", "H2HConfig", "H2HDBConfig", "load_config"]
2 |
3 | import argparse
4 | import json
5 | import os
6 |
7 | from pydantic import BaseModel, Field, ConfigDict, field_validator
8 |
9 | from .settings import LOG_LEVEL, CBZ_GROUPING, CBZ_SORT
10 |
11 |
12 | class ConfigError(Exception):
13 | """
14 | Exception raised for errors in the configuration.
15 |
16 | Attributes:
17 | message -- explanation of the error
18 | """
19 |
20 | def __init__(self, message: str) -> None:
21 | self.message = message
22 | super().__init__(self.message)
23 |
24 |
25 | class ConfigModel(BaseModel):
26 | """
27 | Base class for configuration models.
28 |
29 | This class inherits from `pydantic.BaseModel` and is used to define the configuration
30 | structure for the application. It provides a way to validate and parse configuration data.
31 | """
32 |
33 | model_config = ConfigDict(extra="forbid")
34 |
35 |
36 | class DatabaseConfig(ConfigModel):
37 | sql_type: str = Field(
38 | default="mysql",
39 | description="Type of SQL database (e.g., mysql)",
40 | )
41 | host: str = Field(
42 | default="localhost",
43 | min_length=1,
44 | description="Host of the SQL database",
45 | )
46 | port: int = Field(
47 | default=3306,
48 | ge=1,
49 | le=65535,
50 | description="Port of the SQL database",
51 | )
52 | user: str = Field(
53 | default="root",
54 | min_length=1,
55 | description="User for the SQL database",
56 | )
57 | database: str = Field(
58 | default="h2h",
59 | min_length=1,
60 | description="Database name for the SQL database",
61 | )
62 | password: str = Field(
63 | default="password",
64 | description="Password for the SQL database",
65 | )
66 |
67 |
68 | class LoggerConfig(BaseModel):
69 | level: LOG_LEVEL = Field(
70 | default=LOG_LEVEL.info,
71 | description="Log level (case-insensitive): NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL",
72 | )
73 |
74 | @field_validator("level", mode="before")
75 | @classmethod
76 | def normalize_level(cls, v) -> LOG_LEVEL:
77 | if isinstance(v, str):
78 | v_lower = v.lower()
79 | try:
80 | return LOG_LEVEL[v_lower] # Enum lookup by name
81 | except KeyError:
82 | raise ValueError(
83 | f"Invalid log level '{v}'. Must be one of: "
84 | + ", ".join(name.upper() for name in LOG_LEVEL.__members__)
85 | )
86 | elif isinstance(v, int):
87 | try:
88 | return LOG_LEVEL(v)
89 | except ValueError:
90 | raise ValueError(f"Invalid log level value: {v}")
91 | elif isinstance(v, LOG_LEVEL):
92 | return v
93 | else:
94 | raise TypeError(f"Invalid type for log level: {type(v)}")
95 |
96 |
97 | class H2HConfig(ConfigModel):
98 | download_path: str = Field(
99 | default="download",
100 | min_length=1,
101 | description="Path to download files",
102 | )
103 | cbz_path: str = Field(
104 | default="",
105 | min_length=0,
106 | description="Path to save CBZ files",
107 | )
108 | cbz_max_size: int = Field(
109 | default=768,
110 | ge=1,
111 | description="Maximum width or height (in pixels) allowed for each image in the CBZ file",
112 | )
113 | cbz_grouping: CBZ_GROUPING = Field(
114 | default=CBZ_GROUPING.flat,
115 | description="Grouping method for CBZ files: flat, date-yyyy, date-yyyy-mm, or date-yyyy-mm-dd",
116 | )
117 | cbz_sort: CBZ_SORT = Field(
118 | default=CBZ_SORT.no,
119 | description="Sorting method for CBZ files: no, upload_time, download_time, pages, or pages+[num]",
120 | )
121 |
122 | @property
123 | def cbz_tmp_directory(self) -> str:
124 | return os.path.join(self.cbz_path, "tmp")
125 |
126 |
127 | class H2HDBConfig(ConfigModel):
128 | """
129 | Configuration class for H2HDB.
130 |
131 | This class combines the configurations for H2H, database, and logger into a single
132 | configuration object. It validates the types of each configuration component.
133 | """
134 |
135 | h2h: H2HConfig = Field(
136 | default_factory=H2HConfig,
137 | description="Configuration for H2H",
138 | )
139 | database: DatabaseConfig = Field(
140 | default_factory=DatabaseConfig,
141 | description="Configuration for the database",
142 | )
143 | logger: LoggerConfig = Field(
144 | default_factory=LoggerConfig,
145 | description="Configuration for the logger",
146 | )
147 |
148 |
149 | def load_config(config_path: str = "") -> H2HDBConfig:
150 | if config_path:
151 | with open(config_path, "r") as f:
152 | raw = json.load(f)
153 | else:
154 | parser = argparse.ArgumentParser()
155 | parser.add_argument("--config")
156 | args = parser.parse_args()
157 | if args.config:
158 | with open(args.config, "r") as f:
159 | raw = json.load(f)
160 | else:
161 | raw = {} # ← 重點:傳空 config,讓 default 自動補
162 |
163 | return H2HDBConfig.model_validate(raw)
164 |
--------------------------------------------------------------------------------
/src/h2hdb/h2hdb_h2hdb.py:
--------------------------------------------------------------------------------
1 | __all__ = ["H2HDB", "GALLERY_INFO_FILE_NAME"]
2 |
3 |
4 | import os
5 | from itertools import islice
6 | from time import sleep
7 |
8 | from h2h_galleryinfo_parser import (
9 | GalleryInfoParser,
10 | GalleryURLParser,
11 | parse_galleryinfo,
12 | )
13 |
14 | from .information import FileInformation, TagInformation
15 | from .settings import chunk_list, hash_function_by_file
16 | from .table_comments import H2HDBGalleriesComments
17 | from .table_files_dbids import H2HDBFiles
18 | from .table_removed_gids import H2HDBRemovedGalleries
19 | from .table_tags import H2HDBGalleriesTags
20 | from .threading_tools import run_in_parallel, SQLThreadsList
21 | from .view_ginfo import H2HDBGalleriesInfos
22 |
23 | from .hash_dict import HASH_ALGORITHMS
24 | from .settings import (
25 | COMPARISON_HASH_ALGORITHM,
26 | FOLDER_NAME_LENGTH_LIMIT,
27 | FILE_NAME_LENGTH_LIMIT,
28 | GALLERY_INFO_FILE_NAME,
29 | )
30 | from .threading_tools import POOL_CPU_LIMIT
31 |
32 |
33 | def get_sorting_base_level(x: int = 20) -> int:
34 | zero_level = max(x, 1)
35 | return zero_level
36 |
37 |
38 | class H2HDB(
39 | H2HDBGalleriesInfos,
40 | H2HDBGalleriesComments,
41 | H2HDBGalleriesTags,
42 | H2HDBFiles,
43 | H2HDBRemovedGalleries,
44 | ):
45 | def _create_pending_gallery_removals_table(self) -> None:
46 | with self.SQLConnector() as connector:
47 | table_name = "pending_gallery_removals"
48 | match self.config.database.sql_type.lower():
49 | case "mysql":
50 | column_name = "name"
51 | column_name_parts, create_gallery_name_parts_sql = (
52 | self.mysql_split_gallery_name_based_on_limit(column_name)
53 | )
54 | query = f"""
55 | CREATE TABLE IF NOT EXISTS {table_name} (
56 | PRIMARY KEY ({", ".join(column_name_parts)}),
57 | {create_gallery_name_parts_sql},
58 | full_name TEXT NOT NULL,
59 | FULLTEXT (full_name)
60 | )
61 | """
62 | connector.execute(query)
63 | self.logger.info(f"{table_name} table created.")
64 |
65 | def _count_duplicated_files_hashs_sha512(self) -> int:
66 | with self.SQLConnector() as connector:
67 | table_name = "duplicated_files_hashs_sha512"
68 | match self.config.database.sql_type.lower():
69 | case "mysql":
70 | query = f"""
71 | SELECT COUNT(*)
72 | FROM {table_name}
73 | """
74 | query_result = connector.fetch_one(query)
75 | return query_result[0]
76 |
77 | def _create_duplicated_galleries_tables(self) -> None:
78 | with self.SQLConnector() as connector:
79 | match self.config.database.sql_type.lower():
80 | case "mysql":
81 | query = """
82 | CREATE VIEW IF NOT EXISTS duplicated_files_hashs_sha512 AS
83 | SELECT db_file_id,
84 | db_hash_id
85 | FROM files_hashs_sha512
86 | GROUP BY db_hash_id
87 | HAVING COUNT(*) >= 3
88 | """
89 | connector.execute(query)
90 |
91 | with self.SQLConnector() as connector:
92 | match self.config.database.sql_type.lower():
93 | case "mysql":
94 | query = """
95 | CREATE VIEW IF NOT EXISTS duplicated_hash_values_by_count_artist_ratio AS WITH duplicated_db_dbids AS (
96 | SELECT galleries_dbids.db_gallery_id AS db_gallery_id,
97 | files_dbids.db_file_id AS db_file_id,
98 | duplicated_files_hashs_sha512.db_hash_id AS db_hash_id,
99 | galleries_tag_pairs_dbids.tag_value AS artist_value
100 | FROM duplicated_files_hashs_sha512
101 | LEFT JOIN files_hashs_sha512 ON duplicated_files_hashs_sha512.db_hash_id = files_hashs_sha512.db_hash_id
102 | LEFT JOIN files_dbids ON files_hashs_sha512.db_file_id = files_dbids.db_file_id
103 | LEFT JOIN galleries_dbids ON files_dbids.db_gallery_id = galleries_dbids.db_gallery_id
104 | LEFT JOIN galleries_tags ON galleries_dbids.db_gallery_id = galleries_tags.db_gallery_id
105 | LEFT JOIN galleries_tag_pairs_dbids ON galleries_tags.db_tag_pair_id = galleries_tag_pairs_dbids.db_tag_pair_id
106 | WHERE galleries_tag_pairs_dbids.tag_name = 'artist'
107 | ),
108 | duplicated_count_artists_by_db_gallery_id AS(
109 | SELECT COUNT(DISTINCT artist_value) AS artist_count,
110 | db_gallery_id
111 | FROM duplicated_db_dbids
112 | GROUP BY db_gallery_id
113 | )
114 | SELECT files_hashs_sha512_dbids.hash_value AS hash_value
115 | FROM duplicated_db_dbids
116 | LEFT JOIN duplicated_count_artists_by_db_gallery_id ON duplicated_db_dbids.db_gallery_id = duplicated_count_artists_by_db_gallery_id.db_gallery_id
117 | LEFT JOIN files_hashs_sha512_dbids ON duplicated_db_dbids.db_hash_id = files_hashs_sha512_dbids.db_hash_id
118 | GROUP BY duplicated_db_dbids.db_hash_id
119 | HAVING COUNT(DISTINCT duplicated_db_dbids.artist_value) / MAX(
120 | duplicated_count_artists_by_db_gallery_id.artist_count
121 | ) > 2
122 | """
123 | connector.execute(query)
124 |
125 | def insert_pending_gallery_removal(self, gallery_name: str) -> None:
126 | with self.SQLConnector() as connector:
127 | if self.check_pending_gallery_removal(gallery_name) is False:
128 | table_name = "pending_gallery_removals"
129 | if len(gallery_name) > FOLDER_NAME_LENGTH_LIMIT:
130 | self.logger.error(
131 | f"Gallery name '{gallery_name}' is too long. Must be {FOLDER_NAME_LENGTH_LIMIT} characters or less."
132 | )
133 | raise ValueError("Gallery name is too long.")
134 | gallery_name_parts = self._split_gallery_name(gallery_name)
135 |
136 | match self.config.database.sql_type.lower():
137 | case "mysql":
138 | column_name_parts, _ = (
139 | self.mysql_split_gallery_name_based_on_limit("name")
140 | )
141 | insert_query = f"""
142 | INSERT INTO {table_name} ({", ".join(column_name_parts)}, full_name)
143 | VALUES ({", ".join(["%s" for _ in column_name_parts])}, %s)
144 | """
145 | connector.execute(
146 | insert_query, (*tuple(gallery_name_parts), gallery_name)
147 | )
148 |
149 | def check_pending_gallery_removal(self, gallery_name: str) -> bool:
150 | with self.SQLConnector() as connector:
151 | table_name = "pending_gallery_removals"
152 | gallery_name_parts = self._split_gallery_name(gallery_name)
153 | match self.config.database.sql_type.lower():
154 | case "mysql":
155 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit(
156 | "name"
157 | )
158 | select_query = f"""
159 | SELECT full_name
160 | FROM {table_name}
161 | WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])}
162 | """
163 | query_result = connector.fetch_one(select_query, tuple(gallery_name_parts))
164 | return len(query_result) != 0
165 |
166 | def get_pending_gallery_removals(self) -> list[str]:
167 | with self.SQLConnector() as connector:
168 | table_name = "pending_gallery_removals"
169 | match self.config.database.sql_type.lower():
170 | case "mysql":
171 | select_query = f"""
172 | SELECT full_name
173 | FROM {table_name}
174 | """
175 |
176 | query_result = connector.fetch_all(select_query)
177 | pending_gallery_removals = [query[0] for query in query_result]
178 | return pending_gallery_removals
179 |
180 | def delete_pending_gallery_removal(self, gallery_name: str) -> None:
181 | with self.SQLConnector() as connector:
182 | table_name = "pending_gallery_removals"
183 | match self.config.database.sql_type.lower():
184 | case "mysql":
185 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit(
186 | "name"
187 | )
188 | delete_query = f"""
189 | DELETE FROM {table_name} WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])}
190 | """
191 |
192 | gallery_name_parts = self._split_gallery_name(gallery_name)
193 | connector.execute(delete_query, tuple(gallery_name_parts))
194 |
195 | def delete_pending_gallery_removals(self) -> None:
196 | pending_gallery_removals = self.get_pending_gallery_removals()
197 | for gallery_name in pending_gallery_removals:
198 | self.delete_gallery_file(gallery_name)
199 | self.delete_gallery(gallery_name)
200 | self.delete_pending_gallery_removal(gallery_name)
201 |
202 | def delete_gallery_file(self, gallery_name: str) -> None:
203 | # self.logger.info(f"Gallery images for '{gallery_name}' deleted.")
204 | pass
205 |
206 | def delete_gallery(self, gallery_name: str) -> None:
207 | with self.SQLConnector() as connector:
208 | if not self._check_galleries_dbids_by_gallery_name(gallery_name):
209 | self.logger.debug(f"Gallery '{gallery_name}' does not exist.")
210 | return
211 |
212 | match self.config.database.sql_type.lower():
213 | case "mysql":
214 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit(
215 | "name"
216 | )
217 | get_delete_gallery_id_query = f"""
218 | DELETE FROM galleries_dbids
219 | WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])}
220 | """
221 |
222 | gallery_name_parts = self._split_gallery_name(gallery_name)
223 | connector.execute(get_delete_gallery_id_query, tuple(gallery_name_parts))
224 | self.logger.info(f"Gallery '{gallery_name}' deleted.")
225 |
226 | def optimize_database(self) -> None:
227 | with self.SQLConnector() as connector:
228 | match self.config.database.sql_type.lower():
229 | case "mysql":
230 | select_table_name_query = f"""
231 | SELECT TABLE_NAME
232 | FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
233 | WHERE REFERENCED_TABLE_SCHEMA = '{self.config.database.database}'
234 | """
235 | table_names = connector.fetch_all(select_table_name_query)
236 | table_names = [t[0] for t in table_names]
237 |
238 | with self.SQLConnector() as connector:
239 | match self.config.database.sql_type.lower():
240 | case "mysql":
241 | get_optimize_query = lambda x: "OPTIMIZE TABLE {x}".format(x=x)
242 |
243 | for table_name in table_names:
244 | connector.execute(get_optimize_query(table_name))
245 | self.logger.info("Database optimized.")
246 |
247 | def _create_pending_download_gids_view(self) -> None:
248 | with self.SQLConnector() as connector:
249 | match self.config.database.sql_type.lower():
250 | case "mysql":
251 | query = """
252 | CREATE VIEW IF NOT EXISTS pending_download_gids AS
253 | SELECT gids.gid AS gid
254 | FROM (SELECT *
255 | FROM galleries_redownload_times AS grt0
256 | WHERE DATE_ADD(grt0.time, INTERVAL 7 DAY) <= NOW()
257 | )
258 | AS grt
259 | INNER JOIN galleries_download_times AS gdt
260 | on grt.db_gallery_id = gdt.db_gallery_id
261 | INNER JOIN galleries_upload_times AS gut
262 | ON grt.db_gallery_id = gut.db_gallery_id
263 | INNER JOIN galleries_gids AS gids
264 | ON grt.db_gallery_id = gids.db_gallery_id
265 | WHERE grt.time <= DATE_ADD(gut.time, INTERVAL 1 YEAR)
266 | AND DATE_ADD(gut.time, INTERVAL 7 DAY) <= NOW()
267 | OR DATE_ADD(gdt.time, INTERVAL 7 DAY) <= grt.time
268 | ORDER BY gut.`time` DESC
269 | """
270 | connector.execute(query)
271 | self.logger.info("pending_download_gids view created.")
272 |
273 | def get_pending_download_gids(self) -> list[int]:
274 | with self.SQLConnector() as connector:
275 | match self.config.database.sql_type.lower():
276 | case "mysql":
277 | query = """
278 | SELECT gid
279 | FROM pending_download_gids
280 | """
281 | query_result = connector.fetch_all(query)
282 | pending_download_gids = [query[0] for query in query_result]
283 | return pending_download_gids
284 |
285 | def _create_todelete_gids_table(self) -> None:
286 | with self.SQLConnector() as connector:
287 | table_name = "todelete_gids"
288 | match self.config.database.sql_type.lower():
289 | case "mysql":
290 | query = f"""
291 | CREATE TABLE IF NOT EXISTS {table_name} (
292 | PRIMARY KEY (gid),
293 | FOREIGN KEY (gid) REFERENCES galleries_gids(gid)
294 | ON UPDATE CASCADE
295 | ON DELETE CASCADE,
296 | gid INT UNSIGNED NOT NULL
297 | )
298 | """
299 | connector.execute(query)
300 | self.logger.info(f"{table_name} table created.")
301 |
302 | def _create_todelete_names_view(self) -> None:
303 | with self.SQLConnector() as connector:
304 | table_name = "todelete_names"
305 | match self.config.database.sql_type.lower():
306 | case "mysql":
307 | query = f"""
308 | CREATE VIEW IF NOT EXISTS {table_name} AS
309 | SELECT full_name
310 | FROM
311 | (SELECT galleries_names.full_name AS full_name
312 | FROM todelete_gids
313 | INNER JOIN galleries_gids
314 | ON galleries_gids.gid = todelete_gids.gid
315 | INNER JOIN galleries_names
316 | ON galleries_names.db_gallery_id = galleries_gids.db_gallery_id) AS todelete_names
317 | UNION
318 | SELECT full_name
319 | FROM (
320 | SELECT gi.name AS full_name
321 | FROM galleries_infos gi
322 | JOIN (
323 | SELECT gid, MAX(download_time) AS max_download_time
324 | FROM galleries_infos
325 | GROUP BY gid
326 | HAVING COUNT(*) > 1
327 | ) sub ON gi.gid = sub.gid
328 | WHERE gi.download_time < sub.max_download_time
329 | ) AS duplicated_gids_names
330 | """
331 | connector.execute(query)
332 | self.logger.info(f"{table_name} table created.")
333 |
334 | def check_todelete_gid(self, gid: int) -> bool:
335 | with self.SQLConnector() as connector:
336 | table_name = "todelete_gids"
337 | match self.config.database.sql_type.lower():
338 | case "mysql":
339 | select_query = f"""
340 | SELECT gid
341 | FROM {table_name}
342 | WHERE gid = %s
343 | """
344 | query_result = connector.fetch_one(select_query, (gid,))
345 | return len(query_result) != 0
346 |
347 | def insert_todelete_gid(self, gid: int) -> None:
348 | if not self.check_todelete_gid(gid):
349 | with self.SQLConnector() as connector:
350 | table_name = "todelete_gids"
351 | match self.config.database.sql_type.lower():
352 | case "mysql":
353 | insert_query = f"""
354 | INSERT INTO {table_name} (gid) VALUES (%s)
355 | """
356 | connector.execute(insert_query, (gid,))
357 |
358 | def _create_todownload_gids_table(self) -> None:
359 | with self.SQLConnector() as connector:
360 | table_name = "todownload_gids"
361 | match self.config.database.sql_type.lower():
362 | case "mysql":
363 | query = f"""
364 | CREATE TABLE IF NOT EXISTS {table_name} (
365 | PRIMARY KEY (gid),
366 | gid INT UNSIGNED NOT NULL,
367 | url CHAR({self.innodb_index_prefix_limit}) NOT NULL
368 | )
369 | """
370 | connector.execute(query)
371 | self.logger.info(f"{table_name} table created.")
372 |
373 | def check_todownload_gid(self, gid: int, url: str) -> bool:
374 | with self.SQLConnector() as connector:
375 | table_name = "todownload_gids"
376 | match self.config.database.sql_type.lower():
377 | case "mysql":
378 | if url != "":
379 | select_query = f"""
380 | SELECT gid
381 | FROM {table_name}
382 | WHERE gid = %s AND url = %s
383 | """
384 | query_result = connector.fetch_one(select_query, (gid, url))
385 | else:
386 | select_query = f"""
387 | SELECT gid
388 | FROM {table_name}
389 | WHERE gid = %s
390 | """
391 | query_result = connector.fetch_one(select_query, (gid,))
392 | return len(query_result) != 0
393 |
394 | def insert_todownload_gid(self, gid: int, url: str) -> None:
395 | if url != "":
396 | gallery = GalleryURLParser(url)
397 | gid = gallery.gid
398 | if gallery.gid != gid and gid != 0:
399 | raise ValueError(
400 | f"Gallery GID {gid} does not match URL GID {gallery.gid}."
401 | )
402 | elif gid <= 0:
403 | raise ValueError("Gallery GID must be greater than zero.")
404 |
405 | if not self.check_todownload_gid(gid, url):
406 | if (url == "") or (not self.check_todownload_gid(gid, "")):
407 | with self.SQLConnector() as connector:
408 | table_name = "todownload_gids"
409 | match self.config.database.sql_type.lower():
410 | case "mysql":
411 | insert_query = f"""
412 | INSERT INTO {table_name} (gid, url) VALUES (%s, %s)
413 | """
414 | connector.execute(insert_query, (gid, url))
415 | else:
416 | self.update_todownload_gid(gid, url)
417 |
418 | def update_todownload_gid(self, gid: int, url: str) -> None:
419 | with self.SQLConnector() as connector:
420 | table_name = "todownload_gids"
421 | match self.config.database.sql_type.lower():
422 | case "mysql":
423 | update_query = f"""
424 | UPDATE {table_name} SET url = %s WHERE gid = %s
425 | """
426 | connector.execute(update_query, (url, gid))
427 |
428 | def remove_todownload_gid(self, gid: int) -> None:
429 | with self.SQLConnector() as connector:
430 | table_name = "todownload_gids"
431 | match self.config.database.sql_type.lower():
432 | case "mysql":
433 | delete_query = f"""
434 | DELETE FROM {table_name} WHERE gid = %s
435 | """
436 | connector.execute(delete_query, (gid,))
437 |
438 | def get_todownload_gids(self) -> list[tuple[int, str]]:
439 | with self.SQLConnector() as connector:
440 | table_name = "todownload_gids"
441 | match self.config.database.sql_type.lower():
442 | case "mysql":
443 | select_query = f"""
444 | SELECT gid, url
445 | FROM {table_name}
446 | """
447 | query_result = connector.fetch_all(select_query)
448 | todownload_gids = [(query[0], query[1]) for query in query_result]
449 | return todownload_gids
450 |
451 | def create_main_tables(self) -> None:
452 | self.logger.debug("Creating main tables...")
453 | self._create_todownload_gids_table()
454 | self._create_pending_gallery_removals_table()
455 | self._create_galleries_names_table()
456 | self._create_galleries_gids_table()
457 | self._create_todelete_gids_table()
458 | self._create_galleries_download_times_table()
459 | self._create_galleries_redownload_times_table()
460 | self._create_galleries_upload_times_table()
461 | self._create_pending_download_gids_view()
462 | self._create_galleries_modified_times_table()
463 | self._create_galleries_access_times_table()
464 | self._create_galleries_titles_table()
465 | self._create_upload_account_table()
466 | self._create_galleries_comments_table()
467 | self._create_files_names_table()
468 | self._create_galleries_infos_view()
469 | self._create_todelete_names_view()
470 | self._create_galleries_files_hashs_tables()
471 | self._create_gallery_image_hash_view()
472 | self._create_duplicate_hash_in_gallery_view()
473 | self._create_removed_galleries_gids_table()
474 | self._create_galleries_tags_table()
475 | self._create_duplicated_galleries_tables()
476 | self.logger.info("Main tables created.")
477 |
478 | def update_redownload_time_to_now_by_gid(self, gid: int) -> None:
479 | db_gallery_id = self._get_db_gallery_id_by_gid(gid)
480 | table_name = "galleries_redownload_times"
481 | with self.SQLConnector() as connector:
482 | match self.config.database.sql_type.lower():
483 | case "mysql":
484 | update_query = f"""
485 | UPDATE {table_name} SET time = NOW() WHERE db_gallery_id = %s
486 | """
487 | connector.execute(update_query, (db_gallery_id,))
488 |
489 | def _insert_gallery_info(self, galleryinfo_params: GalleryInfoParser) -> None:
490 | self.insert_pending_gallery_removal(galleryinfo_params.gallery_name)
491 |
492 | self._insert_gallery_name(galleryinfo_params.gallery_name)
493 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(
494 | galleryinfo_params.gallery_name
495 | )
496 |
497 | with SQLThreadsList() as threads:
498 | threads.append(
499 | target=self._insert_gallery_gid,
500 | args=(db_gallery_id, galleryinfo_params.gid),
501 | )
502 | threads.append(
503 | target=self._insert_gallery_title,
504 | args=(db_gallery_id, galleryinfo_params.title),
505 | )
506 | threads.append(
507 | target=self._insert_upload_time,
508 | args=(db_gallery_id, galleryinfo_params.upload_time),
509 | )
510 | threads.append(
511 | target=self._insert_gallery_comment,
512 | args=(db_gallery_id, galleryinfo_params.galleries_comments),
513 | )
514 | threads.append(
515 | target=self._insert_gallery_upload_account,
516 | args=(db_gallery_id, galleryinfo_params.upload_account),
517 | )
518 | threads.append(
519 | target=self._insert_download_time,
520 | args=(db_gallery_id, galleryinfo_params.download_time),
521 | )
522 | threads.append(
523 | target=self._insert_access_time,
524 | args=(db_gallery_id, galleryinfo_params.download_time),
525 | )
526 | threads.append(
527 | target=self._insert_modified_time,
528 | args=(db_gallery_id, galleryinfo_params.modified_time),
529 | )
530 | threads.append(
531 | target=self._insert_gallery_files,
532 | args=(db_gallery_id, galleryinfo_params.files_path),
533 | )
534 |
535 | file_pairs: list[FileInformation] = list()
536 | for file_path in galleryinfo_params.files_path:
537 | db_file_id = self._get_db_file_id(db_gallery_id, file_path)
538 | absolute_file_path = os.path.join(
539 | galleryinfo_params.gallery_folder, file_path
540 | )
541 | file_pairs.append(FileInformation(absolute_file_path, db_file_id))
542 | self._insert_gallery_file_hash_for_db_gallery_id(file_pairs)
543 |
544 | taglist: list[TagInformation] = list()
545 | for tag in galleryinfo_params.tags:
546 | taglist.append(TagInformation(tag[0], tag[1]))
547 | self._insert_gallery_tags(db_gallery_id, taglist)
548 |
549 | self.delete_pending_gallery_removal(galleryinfo_params.gallery_name)
550 |
551 | def _check_gallery_info_file_hash(
552 | self, galleryinfo_params: GalleryInfoParser
553 | ) -> bool:
554 | if not self._check_galleries_dbids_by_gallery_name(
555 | galleryinfo_params.gallery_name
556 | ):
557 | return False
558 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(
559 | galleryinfo_params.gallery_name
560 | )
561 |
562 | if not self._check_db_file_id(db_gallery_id, GALLERY_INFO_FILE_NAME):
563 | return False
564 | gallery_info_file_id = self._get_db_file_id(
565 | db_gallery_id, GALLERY_INFO_FILE_NAME
566 | )
567 | absolute_file_path = os.path.join(
568 | galleryinfo_params.gallery_folder, GALLERY_INFO_FILE_NAME
569 | )
570 |
571 | if not self._check_hash_value_by_file_id(
572 | gallery_info_file_id, COMPARISON_HASH_ALGORITHM
573 | ):
574 | return False
575 | original_hash_value = self.get_hash_value_by_file_id(
576 | gallery_info_file_id, COMPARISON_HASH_ALGORITHM
577 | )
578 | current_hash_value = hash_function_by_file(
579 | absolute_file_path, COMPARISON_HASH_ALGORITHM
580 | )
581 | issame = original_hash_value == current_hash_value
582 | return issame
583 |
584 | def _get_duplicated_hash_values_by_count_artist_ratio(self) -> list[bytes]:
585 | with self.SQLConnector() as connector:
586 | table_name = "duplicated_hash_values_by_count_artist_ratio"
587 | match self.config.database.sql_type.lower():
588 | case "mysql":
589 | select_query = f"""
590 | SELECT hash_value
591 | FROM {table_name}
592 | """
593 |
594 | query_result = connector.fetch_all(select_query)
595 | return [query[0] for query in query_result]
596 |
597 | def insert_gallery_info(self, gallery_folder: str) -> bool:
598 | galleryinfo_params = parse_galleryinfo(gallery_folder)
599 | is_thesame = self._check_gallery_info_file_hash(galleryinfo_params)
600 | is_insert = is_thesame is False
601 | if is_insert:
602 | self.logger.debug(
603 | f"Inserting gallery '{galleryinfo_params.gallery_name}'..."
604 | )
605 | self.delete_gallery_file(galleryinfo_params.gallery_name)
606 | self.delete_gallery(galleryinfo_params.gallery_name)
607 | self._insert_gallery_info(galleryinfo_params)
608 | self.logger.debug(f"Gallery '{galleryinfo_params.gallery_name}' inserted.")
609 | return is_insert
610 |
611 | def compress_gallery_to_cbz(
612 | self, gallery_folder: str, exclude_hashs: list[bytes]
613 | ) -> bool:
614 | from .compress_gallery_to_cbz import (
615 | compress_images_and_create_cbz,
616 | calculate_hash_of_file_in_cbz,
617 | )
618 |
619 | galleryinfo_params = parse_galleryinfo(gallery_folder)
620 | match self.config.h2h.cbz_grouping:
621 | case "date-yyyy":
622 | upload_time = self.get_upload_time_by_gallery_name(
623 | galleryinfo_params.gallery_name
624 | )
625 | relative_cbz_directory = str(upload_time.year).rjust(4, "0")
626 | case "date-yyyy-mm":
627 | upload_time = self.get_upload_time_by_gallery_name(
628 | galleryinfo_params.gallery_name
629 | )
630 | relative_cbz_directory = os.path.join(
631 | str(upload_time.year).rjust(4, "0"),
632 | str(upload_time.month).rjust(2, "0"),
633 | )
634 | case "date-yyyy-mm-dd":
635 | upload_time = self.get_upload_time_by_gallery_name(
636 | galleryinfo_params.gallery_name
637 | )
638 | relative_cbz_directory = os.path.join(
639 | str(upload_time.year).rjust(4, "0"),
640 | str(upload_time.month).rjust(2, "0"),
641 | str(upload_time.day).rjust(2, "0"),
642 | )
643 | case "flat":
644 | relative_cbz_directory = ""
645 | case _:
646 | raise ValueError(
647 | f"Invalid cbz_grouping value: {self.config.h2h.cbz_grouping}"
648 | )
649 | cbz_directory = os.path.join(self.config.h2h.cbz_path, relative_cbz_directory)
650 | cbz_tmp_directory = os.path.join(self.config.h2h.cbz_path, "tmp")
651 |
652 | def gallery_name2cbz_file_name(gallery_name: str) -> str:
653 | while (len(gallery_name.encode("utf-8")) + 4) > FILE_NAME_LENGTH_LIMIT:
654 | gallery_name = gallery_name[1:]
655 | return gallery_name + ".cbz"
656 |
657 | cbz_path = os.path.join(
658 | cbz_directory, gallery_name2cbz_file_name(galleryinfo_params.gallery_name)
659 | )
660 | if os.path.exists(cbz_path):
661 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(
662 | galleryinfo_params.gallery_name
663 | )
664 | gallery_info_file_id = self._get_db_file_id(
665 | db_gallery_id, GALLERY_INFO_FILE_NAME
666 | )
667 | original_hash_value = self.get_hash_value_by_file_id(
668 | gallery_info_file_id, COMPARISON_HASH_ALGORITHM
669 | )
670 | cbz_hash_value = calculate_hash_of_file_in_cbz(
671 | cbz_path, GALLERY_INFO_FILE_NAME, COMPARISON_HASH_ALGORITHM
672 | )
673 | if original_hash_value != cbz_hash_value:
674 | compress_images_and_create_cbz(
675 | gallery_folder,
676 | cbz_directory,
677 | cbz_tmp_directory,
678 | self.config.h2h.cbz_max_size,
679 | exclude_hashs,
680 | )
681 | result = True
682 | else:
683 | result = False
684 | else:
685 | compress_images_and_create_cbz(
686 | gallery_folder,
687 | cbz_directory,
688 | cbz_tmp_directory,
689 | self.config.h2h.cbz_max_size,
690 | exclude_hashs,
691 | )
692 | result = True
693 | return result
694 |
695 | def scan_current_galleries_folders(self) -> tuple[list[str], list[str]]:
696 | self.delete_pending_gallery_removals()
697 |
698 | with self.SQLConnector() as connector:
699 | tmp_table_name = "tmp_current_galleries"
700 | match self.config.database.sql_type.lower():
701 | case "mysql":
702 | column_name = "name"
703 | column_name_parts, create_gallery_name_parts_sql = (
704 | self.mysql_split_gallery_name_based_on_limit(column_name)
705 | )
706 | query = f"""
707 | CREATE TEMPORARY TABLE IF NOT EXISTS {tmp_table_name} (
708 | PRIMARY KEY ({", ".join(column_name_parts)}),
709 | {create_gallery_name_parts_sql}
710 | )
711 | """
712 |
713 | connector.execute(query)
714 | self.logger.info(f"{tmp_table_name} table created.")
715 |
716 | match self.config.database.sql_type.lower():
717 | case "mysql":
718 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit(
719 | "name"
720 | )
721 | insert_query = f"""
722 | INSERT INTO {tmp_table_name}
723 | ({", ".join(column_name_parts)})
724 | VALUES ({", ".join(["%s" for _ in column_name_parts])})
725 | """
726 |
727 | data: list[tuple] = list()
728 | current_galleries_folders: list[str] = list()
729 | current_galleries_names: list[str] = list()
730 | for root, _, files in os.walk(self.config.h2h.download_path):
731 | if GALLERY_INFO_FILE_NAME in files:
732 | current_galleries_folders.append(root)
733 | gallery_name = os.path.basename(current_galleries_folders[-1])
734 | current_galleries_names.append(gallery_name)
735 | gallery_name_parts = self._split_gallery_name(gallery_name)
736 | data.append(tuple(gallery_name_parts))
737 | group_size = 5000
738 | it = iter(data)
739 | for _ in range(0, len(data), group_size):
740 | connector.execute_many(insert_query, list(islice(it, group_size)))
741 |
742 | match self.config.database.sql_type.lower():
743 | case "mysql":
744 | fetch_query = f"""
745 | SELECT CONCAT({",".join(["galleries_dbids."+column_name for column_name in column_name_parts])})
746 | FROM galleries_dbids
747 | LEFT JOIN {tmp_table_name} USING ({",".join(column_name_parts)})
748 | WHERE {tmp_table_name}.{column_name_parts[0]} IS NULL
749 | """
750 | removed_galleries = connector.fetch_all(fetch_query)
751 | if len(removed_galleries) > 0:
752 | removed_galleries = [gallery[0] for gallery in removed_galleries]
753 |
754 | for removed_gallery in removed_galleries:
755 | self.insert_pending_gallery_removal(removed_gallery)
756 |
757 | self.delete_pending_gallery_removals()
758 |
759 | return (current_galleries_folders, current_galleries_names)
760 |
761 | def _refresh_current_cbz_files(self, current_galleries_names: list[str]) -> None:
762 | from .compress_gallery_to_cbz import gallery_name_to_cbz_file_name
763 |
764 | current_cbzs: dict[str, str] = dict()
765 | for root, _, files in os.walk(self.config.h2h.cbz_path):
766 | for file in files:
767 | current_cbzs[file] = root
768 | for key in set(current_cbzs.keys()) - set(
769 | gallery_name_to_cbz_file_name(name) for name in current_galleries_names
770 | ):
771 | os.remove(os.path.join(current_cbzs[key], key))
772 | self.logger.info(f"CBZ '{key}' removed.")
773 | self.logger.info("CBZ files refreshed.")
774 |
775 | while True:
776 | directory_removed = False
777 | for root, dirs, files in os.walk(self.config.h2h.cbz_path, topdown=False):
778 | if root == self.config.h2h.cbz_path:
779 | continue
780 | if max([len(dirs), len(files)]) == 0:
781 | directory_removed = True
782 | os.rmdir(root)
783 | self.logger.info(f"Directory '{root}' removed.")
784 | if not directory_removed:
785 | break
786 | self.logger.info("Empty directories removed.")
787 |
788 | def _refresh_current_files_hashs(self, algorithm: str) -> None:
789 | if algorithm not in HASH_ALGORITHMS:
790 | raise ValueError(
791 | f"Invalid hash algorithm: {algorithm} not in {HASH_ALGORITHMS}"
792 | )
793 |
794 | with self.SQLConnector() as connector:
795 | match self.config.database.sql_type.lower():
796 | case "mysql":
797 | get_delete_db_hash_id_query = (
798 | lambda x, y: f"""
799 | DELETE FROM {y}
800 | WHERE db_hash_id IN (
801 | SELECT db_hash_id
802 | FROM {x}
803 | RIGHT JOIN {y} USING (db_hash_id)
804 | WHERE {x}.db_hash_id IS NULL
805 | )
806 | """
807 | )
808 | hash_table_name = f"files_hashs_{algorithm.lower()}"
809 | db_table_name = f"files_hashs_{algorithm.lower()}_dbids"
810 | connector.execute(
811 | get_delete_db_hash_id_query(hash_table_name, db_table_name)
812 | )
813 |
814 | def refresh_current_files_hashs(self):
815 | with SQLThreadsList() as threads:
816 | for algorithm in HASH_ALGORITHMS:
817 | threads.append(
818 | target=self._refresh_current_files_hashs,
819 | args=(algorithm,),
820 | )
821 |
822 | def insert_h2h_download(self) -> None:
823 | self.delete_pending_gallery_removals()
824 |
825 | current_galleries_folders, current_galleries_names = (
826 | self.scan_current_galleries_folders()
827 | )
828 |
829 | self._refresh_current_cbz_files(current_galleries_names)
830 |
831 | self.logger.info("Inserting galleries...")
832 | if self.config.h2h.cbz_sort in ["upload_time", "download_time", "gid", "title"]:
833 | self.logger.info(f"Sorting by {self.config.h2h.cbz_sort}...")
834 | current_galleries_folders = sorted(
835 | current_galleries_folders,
836 | key=lambda x: getattr(parse_galleryinfo(x), self.config.h2h.cbz_sort),
837 | reverse=True,
838 | )
839 | elif "no" in self.config.h2h.cbz_sort:
840 | self.logger.info("No sorting...")
841 | pass
842 | elif "pages" in self.config.h2h.cbz_sort:
843 | self.logger.info("Sorting by pages...")
844 | zero_level = (
845 | max(1, int(self.config.h2h.cbz_sort.split("+")[-1]))
846 | if "+" in self.config.h2h.cbz_sort
847 | else 20
848 | )
849 | self.logger.info(
850 | f"Sorting by pages with adjustment based on {zero_level} pages..."
851 | )
852 | current_galleries_folders = sorted(
853 | current_galleries_folders,
854 | key=lambda x: abs(getattr(parse_galleryinfo(x), "pages") - zero_level),
855 | )
856 | else:
857 | current_galleries_folders = sorted(
858 | current_galleries_folders,
859 | key=lambda x: getattr(parse_galleryinfo(x), "pages"),
860 | )
861 | self.logger.info("Galleries sorted.")
862 |
863 | self.logger.info("Getting excluded hash values...")
864 | exclude_hashs = list[bytes]()
865 | previously_count_duplicated_files = 0
866 | self.logger.info("Excluded hash values obtained.")
867 |
868 | def calculate_exclude_hashs(
869 | previously_count_duplicated_files: int, exclude_hashs: list[bytes]
870 | ) -> tuple[int, list[bytes]]:
871 | self.logger.debug("Checking for duplicated files...")
872 | current_count_duplicated_files = self._count_duplicated_files_hashs_sha512()
873 | new_exclude_hashs = exclude_hashs
874 | if current_count_duplicated_files > previously_count_duplicated_files:
875 | self.logger.debug(
876 | "Duplicated files found. Updating excluded hash values..."
877 | )
878 | previously_count_duplicated_files = current_count_duplicated_files
879 | new_exclude_hashs = (
880 | self._get_duplicated_hash_values_by_count_artist_ratio()
881 | )
882 | self.logger.info("Excluded hash values updated.")
883 | return previously_count_duplicated_files, new_exclude_hashs
884 |
885 | total_inserted_in_database = 0
886 | total_created_cbz = 0
887 | is_insert_limit_reached = False
888 | chunked_galleries_folders = chunk_list(
889 | current_galleries_folders, 100 * POOL_CPU_LIMIT
890 | )
891 | self.logger.info("Inserting galleries in parallel...")
892 | for gallery_chunk in chunked_galleries_folders:
893 | # Insert gallery info to database
894 | is_insert_list: list[bool] = list()
895 | try:
896 | is_insert_list += run_in_parallel(
897 | self.insert_gallery_info,
898 | [(x,) for x in gallery_chunk],
899 | )
900 | except Exception as e:
901 | self.logger.error(f"Error inserting galleries: {e}")
902 | self.logger.error("Retrying without parallel")
903 | for x in gallery_chunk:
904 | self.logger.error(f"Retrying gallery '{x}'...")
905 | is_insert_list.append(self.insert_gallery_info(x))
906 | if any(is_insert_list):
907 | self.logger.info("There are new galleries inserted in database.")
908 | is_insert_limit_reached |= True
909 | total_inserted_in_database += sum(is_insert_list)
910 |
911 | # Compress gallery to CBZ file
912 | if self.config.h2h.cbz_path != "":
913 | if any(is_insert_list):
914 | previously_count_duplicated_files, exclude_hashs = (
915 | calculate_exclude_hashs(
916 | previously_count_duplicated_files, exclude_hashs
917 | )
918 | )
919 | is_new_list = run_in_parallel(
920 | self.compress_gallery_to_cbz,
921 | [(x, exclude_hashs) for x in gallery_chunk],
922 | )
923 | if any(is_new_list):
924 | self.logger.info("There are new CBZ files created.")
925 | total_created_cbz += sum(is_new_list)
926 | self.logger.info(
927 | f"Total galleries inserted in database: {total_inserted_in_database}"
928 | )
929 | self.logger.info(f"Total CBZ files created: {total_created_cbz}")
930 |
931 | self.logger.info("Cleaning up database...")
932 | self.refresh_current_files_hashs()
933 |
934 | if is_insert_limit_reached:
935 | self.logger.info("Sleeping for 30 minutes...")
936 | sleep(1800)
937 | self.logger.info("Refreshing database...")
938 | return self.insert_h2h_download()
939 |
940 | self._reset_redownload_times()
941 |
942 | def get_komga_metadata(self, gallery_name: str) -> dict:
943 | metadata: dict[str, str | list[dict[str, str]]] = dict()
944 | metadata["title"] = self.get_title_by_gallery_name(gallery_name)
945 | if self._check_gallery_comment_by_gallery_name(gallery_name):
946 | metadata["summary"] = self.get_comment_by_gallery_name(gallery_name)
947 | else:
948 | metadata["summary"] = ""
949 | upload_time = self.get_upload_time_by_gallery_name(gallery_name)
950 | metadata["releaseDate"] = "-".join(
951 | [
952 | str(upload_time.year),
953 | "{m:02d}".format(m=upload_time.month),
954 | "{d:02d}".format(d=upload_time.day),
955 | ]
956 | )
957 | tags = self.get_tag_pairs_by_gallery_name(gallery_name)
958 | metadata["authors"] = [
959 | {"name": value, "role": key} for key, value in tags if value != ""
960 | ]
961 | return metadata
962 |
--------------------------------------------------------------------------------
/src/h2hdb/h2hdb_spec.py:
--------------------------------------------------------------------------------
1 | import math
2 | import re
3 | from abc import ABCMeta, abstractmethod
4 | from functools import partial
5 |
6 |
7 | from .config_loader import H2HDBConfig
8 | from .logger import setup_logger
9 | from .settings import (
10 | FILE_NAME_LENGTH_LIMIT,
11 | FOLDER_NAME_LENGTH_LIMIT,
12 | )
13 |
14 |
15 | class H2HDBAbstract(metaclass=ABCMeta):
16 | __slots__ = [
17 | "sql_connection_params",
18 | "innodb_index_prefix_limit",
19 | "config",
20 | "SQLConnector",
21 | "logger",
22 | ]
23 |
24 | def __init__(self, config: H2HDBConfig) -> None:
25 | """
26 | Initializes the H2HDBAbstract object.
27 |
28 | Raises:
29 | ValueError: If the SQL type is unsupported.
30 | """
31 | self.config = config
32 | self.logger = setup_logger(config.logger)
33 |
34 | # Set the appropriate connector based on the SQL type
35 | match self.config.database.sql_type.lower():
36 | case "mysql":
37 | from .mysql_connector import MySQLConnectorParams, MySQLConnector
38 |
39 | self.sql_connection_params = MySQLConnectorParams(
40 | host=self.config.database.host,
41 | port=self.config.database.port,
42 | user=self.config.database.user,
43 | password=self.config.database.password,
44 | database=self.config.database.database,
45 | )
46 | self.SQLConnector = partial(
47 | MySQLConnector, **self.sql_connection_params.model_dump()
48 | )
49 | self.innodb_index_prefix_limit = 191
50 | case _:
51 | raise ValueError("Unsupported SQL type")
52 |
53 | def __enter__(self) -> "H2HDBAbstract":
54 | return self
55 |
56 | def __exit__(
57 | self,
58 | exc_type: type[BaseException] | None,
59 | exc_value: BaseException | None,
60 | traceback: object | None,
61 | ) -> None:
62 | if exc_type is None:
63 | with self.SQLConnector() as connector:
64 | connector.commit()
65 |
66 | def _split_gallery_name(self, gallery_name: str) -> list[str]:
67 | size = FOLDER_NAME_LENGTH_LIMIT // self.innodb_index_prefix_limit + (
68 | FOLDER_NAME_LENGTH_LIMIT % self.innodb_index_prefix_limit > 0
69 | )
70 | gallery_name_parts = re.findall(
71 | f".{{1,{self.innodb_index_prefix_limit}}}", gallery_name
72 | )
73 | gallery_name_parts += [""] * (size - len(gallery_name_parts))
74 | return gallery_name_parts
75 |
76 | def _mysql_split_name_based_on_limit(
77 | self, name: str, name_length_limit: int
78 | ) -> tuple[list[str], str]:
79 | num_parts = math.ceil(name_length_limit / self.innodb_index_prefix_limit)
80 | name_parts = [
81 | f"{name}_part{i} CHAR({self.innodb_index_prefix_limit}) NOT NULL"
82 | for i in range(1, name_length_limit // self.innodb_index_prefix_limit + 1)
83 | ]
84 | if name_length_limit % self.innodb_index_prefix_limit > 0:
85 | name_parts.append(
86 | f"{name}_part{num_parts} CHAR({name_length_limit % self.innodb_index_prefix_limit}) NOT NULL"
87 | )
88 | column_name_parts = [f"{name}_part{i}" for i in range(1, num_parts + 1)]
89 | create_name_parts_sql = ", ".join(name_parts)
90 | return column_name_parts, create_name_parts_sql
91 |
92 | def mysql_split_gallery_name_based_on_limit(
93 | self, name: str
94 | ) -> tuple[list[str], str]:
95 | return self._mysql_split_name_based_on_limit(name, FOLDER_NAME_LENGTH_LIMIT)
96 |
97 | def mysql_split_file_name_based_on_limit(self, name: str) -> tuple[list[str], str]:
98 | return self._mysql_split_name_based_on_limit(name, FILE_NAME_LENGTH_LIMIT)
99 |
100 | @abstractmethod
101 | def check_database_character_set(self) -> None:
102 | """
103 | Checks the character set of the database.
104 | """
105 | pass
106 |
107 | @abstractmethod
108 | def check_database_collation(self) -> None:
109 | """
110 | Checks the collation of the database.
111 | """
112 | pass
113 |
114 | @abstractmethod
115 | def create_main_tables(self) -> None:
116 | """
117 | Creates the main tables for the comic database.
118 | """
119 | pass
120 |
121 | @abstractmethod
122 | def insert_gallery_info(self, gallery_path: str) -> bool:
123 | """
124 | Inserts the gallery information into the database.
125 |
126 | Args:
127 | gallery_path (str): The path to the gallery folder.
128 | """
129 | pass
130 |
131 | @abstractmethod
132 | def insert_h2h_download(self) -> None:
133 | """
134 | Inserts the H@H download information into the database.
135 | """
136 | pass
137 |
138 | @abstractmethod
139 | def get_gid_by_gallery_name(self, gallery_name: str) -> int:
140 | """
141 | Selects the gallery GID from the database.
142 |
143 | Args:
144 | gallery_name (str): The name of the gallery.
145 |
146 | Returns:
147 | int: The gallery GID.
148 | """
149 | pass
150 |
151 | @abstractmethod
152 | def get_gids(self) -> list[int]:
153 | """
154 | Selects the GIDs from the database.
155 |
156 | Returns:
157 | list[int]: The list of GIDs.
158 | """
159 | pass
160 |
161 | @abstractmethod
162 | def check_gid_by_gid(self, gid: int) -> bool:
163 | """
164 | Checks if the GID exists in the database.
165 |
166 | Args:
167 | gid (int): The gallery GID.
168 |
169 | Returns:
170 | bool: True if the GID exists, False otherwise.
171 | """
172 | pass
173 |
174 | @abstractmethod
175 | def get_title_by_gallery_name(self, gallery_name: str) -> str:
176 | """
177 | Selects the gallery title from the database.
178 |
179 | Args:
180 | gallery_name (str): The name of the gallery.
181 |
182 | Returns:
183 | str: The gallery title.
184 | """
185 | pass
186 |
187 | @abstractmethod
188 | def update_access_time(self, gallery_name: str, time: str) -> None:
189 | """
190 | Updates the access time for the gallery in the database.
191 |
192 | Args:
193 | gallery_name (str): The name of the gallery.
194 | time (str): The access time.
195 | """
196 | pass
197 |
198 | @abstractmethod
199 | def get_upload_account_by_gallery_name(self, gallery_name: str) -> str:
200 | """
201 | Selects the gallery upload account from the database.
202 |
203 | Args:
204 | gallery_name (str): The name of the gallery.
205 |
206 | Returns:
207 | str: The gallery upload account.
208 | """
209 | pass
210 |
211 | @abstractmethod
212 | def get_comment_by_gallery_name(self, gallery_name: str) -> str:
213 | """
214 | Selects the gallery comment from the database.
215 |
216 | Args:
217 | gallery_name (str): The name of the gallery.
218 |
219 | Returns:
220 | str: The gallery comment.
221 | """
222 | pass
223 |
224 | @abstractmethod
225 | def get_tag_value_by_gallery_name_and_tag_name(
226 | self, gallery_name: str, tag_name: str
227 | ) -> str:
228 | """
229 | Selects the gallery tag from the database.
230 |
231 | Args:
232 | gallery_name (str): The name of the gallery.
233 | tag_name (str): The name of the tag.
234 |
235 | Returns:
236 | str: The value of the tag.
237 | """
238 | pass
239 |
240 | @abstractmethod
241 | def get_files_by_gallery_name(self, gallery_name: str) -> list[str]:
242 | """
243 | Selects the gallery files from the database.
244 |
245 | Args:
246 | gallery_name (str): The name of the gallery.
247 |
248 | Returns:
249 | list[str]: The list of files in the gallery.
250 | """
251 | pass
252 |
253 | @abstractmethod
254 | def delete_gallery_file(self, gallery_name: str) -> None:
255 | """
256 | Deletes the gallery image from the database.
257 |
258 | Args:
259 | gallery_name (str): The name of the gallery.
260 | """
261 | pass
262 |
263 | @abstractmethod
264 | def delete_gallery(self, gallery_name: str) -> None:
265 | """
266 | Deletes the gallery from the database.
267 |
268 | Args:
269 | gallery_name (str): The name of the gallery.
270 | """
271 | pass
272 |
273 | @abstractmethod
274 | def insert_pending_gallery_removal(self, gallery_name: str) -> None:
275 | """
276 | Inserts the pending gallery removal into the database.
277 |
278 | Args:
279 | gallery_name (str): The name of the gallery.
280 | """
281 | pass
282 |
283 | @abstractmethod
284 | def check_pending_gallery_removal(self, gallery_name: str) -> bool:
285 | """
286 | Checks if the gallery is pending removal.
287 |
288 | Returns:
289 | bool: True if the gallery is pending removal, False otherwise.
290 | """
291 | pass
292 |
293 | @abstractmethod
294 | def get_pending_gallery_removals(self) -> list[str]:
295 | """
296 | Selects the pending gallery removals from the database.
297 |
298 | Returns:
299 | list[str]: The list of pending gallery removals.
300 | """
301 | pass
302 |
303 | @abstractmethod
304 | def delete_pending_gallery_removal(self, gallery_name: str) -> None:
305 | """
306 | Deletes the pending gallery removal from the database.
307 |
308 | Args:
309 | gallery_name (str): The name of the gallery.
310 | """
311 | pass
312 |
313 | @abstractmethod
314 | def delete_pending_gallery_removals(self) -> None:
315 | """
316 | Deletes all pending gallery removals from the database.
317 | """
318 | pass
319 |
320 | @abstractmethod
321 | def scan_current_galleries_folders(self) -> tuple[list[str], list[str]]:
322 | """
323 | Scans the current galleries folders.
324 |
325 | Returns:
326 | list[str]: The list of current galleries folders.
327 | """
328 | pass
329 |
330 | @abstractmethod
331 | def refresh_current_files_hashs(self) -> None:
332 | """
333 | Refreshes the current files hashes in the database.
334 | """
335 | pass
336 |
337 | @abstractmethod
338 | def get_komga_metadata(self, gallery_name: str) -> dict:
339 | """
340 | Selects the Komga metadata from the database.
341 |
342 | Args:
343 | gallery_name (str): The name of the gallery.
344 |
345 | Returns:
346 | dict: The Komga metadata.
347 | """
348 | pass
349 |
350 | @abstractmethod
351 | def check_todownload_gid(self, gid: int, url: str) -> bool:
352 | """
353 | Checks if the GID is to be downloaded.
354 |
355 | Args:
356 | gid (int): The gallery GID.
357 | url (str): The gallery URL.
358 |
359 | Returns:
360 | bool: True if the GID is to be downloaded, False otherwise.
361 | """
362 | pass
363 |
364 | @abstractmethod
365 | def insert_todownload_gid(self, gid: int, url: str) -> None:
366 | """
367 | Inserts the GID to be downloaded into the database.
368 |
369 | Args:
370 | gid (int): The gallery GID.
371 | url (str): The gallery URL.
372 | """
373 | pass
374 |
375 | @abstractmethod
376 | def get_todownload_gids(self) -> list[tuple[int, str]]:
377 | """
378 | Selects the GIDs to be downloaded from the database.
379 |
380 | Returns:
381 | list[tuple[int, str]]: The list of GIDs to be downloaded.
382 | """
383 | pass
384 |
385 | @abstractmethod
386 | def remove_todownload_gid(self, gid: int) -> None:
387 | """
388 | Removes the GID to be downloaded from the database.
389 |
390 | Args:
391 | gid (int): The gallery GID.
392 | """
393 | pass
394 |
395 | @abstractmethod
396 | def get_pending_download_gids(self) -> list[int]:
397 | """
398 | Selects the pending download GIDs from the database.
399 |
400 | Returns:
401 | list[int]: The list of pending download GIDs.
402 | """
403 | pass
404 |
405 | @abstractmethod
406 | def insert_removed_gallery_gid(self, gid: int) -> None:
407 | """
408 | Inserts the removed gallery GID into the database.
409 |
410 | Args:
411 | gid (int): The gallery GID.
412 | """
413 | pass
414 |
415 | @abstractmethod
416 | def insert_todelete_gid(self, gid: int) -> None:
417 | """
418 | Inserts the GID to be deleted into the database.
419 |
420 | Args:
421 | gid (int): The gallery GID.
422 | """
423 | pass
424 |
425 | @abstractmethod
426 | def update_redownload_time_to_now_by_gid(self, gid: int) -> None:
427 | """
428 | Updates the redownload time to now by GID.
429 |
430 | Args:
431 | gid (int): The gallery GID.
432 | """
433 | pass
434 |
--------------------------------------------------------------------------------
/src/h2hdb/hash_dict.py:
--------------------------------------------------------------------------------
1 | HASH_ALGORITHMS: dict[str, int] = dict(sha512=512, sha3_512=512, blake2b=512)
2 |
--------------------------------------------------------------------------------
/src/h2hdb/information.py:
--------------------------------------------------------------------------------
1 | from .hash_dict import HASH_ALGORITHMS
2 | from .settings import hash_function
3 |
4 |
5 | class FileInformation:
6 | def __init__(self, absolute_path: str, db_file_id: int) -> None:
7 | self.absolute_path = absolute_path
8 | self.db_file_id = db_file_id
9 | self.issethash = False
10 | self.db_hash_id: dict[str, int] = dict()
11 |
12 | def sethash(self) -> None:
13 | if not self.issethash:
14 | with open(self.absolute_path, "rb") as file:
15 | file_content = file.read()
16 | for algorithm in HASH_ALGORITHMS:
17 | setattr(self, algorithm, hash_function(file_content, algorithm))
18 | self.issethash = True
19 |
20 | def setdb_hash_id(self, algorithm: str, db_hash_id: int) -> None:
21 | self.db_hash_id[algorithm] = db_hash_id
22 |
23 |
24 | class TagInformation:
25 | __slots__ = ["tag_name", "tag_value", "db_tag_id"]
26 |
27 | def __init__(self, tag_name: str, tag_value: str) -> None:
28 | self.tag_name = tag_name
29 | self.tag_value = tag_value
30 |
31 | def setdb_tag_id(self, db_tag_id: int) -> None:
32 | self.db_tag_id = db_tag_id
33 |
--------------------------------------------------------------------------------
/src/h2hdb/logger.py:
--------------------------------------------------------------------------------
1 | __all__ = ["logger"]
2 |
3 |
4 | import logging
5 | from abc import ABCMeta, abstractmethod
6 | from logging.handlers import MemoryHandler
7 |
8 | from .config_loader import LoggerConfig
9 |
10 |
11 | def setup_screen_logger(level: int) -> logging.Logger:
12 | screen_logger = logging.getLogger("display_on_screen")
13 | screen_logger.setLevel(level)
14 |
15 | if not screen_logger.handlers:
16 | handler = logging.StreamHandler()
17 | formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
18 | handler.setFormatter(formatter)
19 | screen_logger.addHandler(handler)
20 | return screen_logger
21 |
22 |
23 | def setup_file_logger(level: int) -> logging.Logger:
24 | log_filename = "h2hdb.log"
25 | file_logger = logging.getLogger("write_to_file")
26 | file_logger.setLevel(level)
27 |
28 | if not file_logger.handlers:
29 | with open(log_filename, "w", encoding="utf-8") as f:
30 | f.write('"time stamp","level","message"\n')
31 |
32 | file_handler = logging.FileHandler(log_filename, mode="a+", encoding="utf-8")
33 | formatter = logging.Formatter('"%(asctime)s","%(levelname)-8s","%(message)s"')
34 | file_handler.setFormatter(formatter)
35 |
36 | # MemoryHandler with a capacity of x bytes
37 | memory_handler = MemoryHandler(
38 | capacity=1024, target=file_handler, flushLevel=logging.ERROR
39 | )
40 | file_logger.addHandler(memory_handler)
41 |
42 | return file_logger
43 |
44 |
45 | class AbstractLogger(metaclass=ABCMeta):
46 | @abstractmethod
47 | def debug(self, message: str) -> None: ...
48 |
49 | @abstractmethod
50 | def info(self, message: str) -> None: ...
51 |
52 | @abstractmethod
53 | def warning(self, message: str) -> None: ...
54 |
55 | @abstractmethod
56 | def error(self, message: str) -> None: ...
57 |
58 | @abstractmethod
59 | def critical(self, message: str) -> None: ...
60 |
61 |
62 | class HentaiDBLogger(AbstractLogger):
63 | def __init__(self, level: int) -> None:
64 | self.screen_logger = setup_screen_logger(level)
65 | self.file_logger = setup_file_logger(level)
66 |
67 | def debug(self, message: str) -> None:
68 | self._log_method("debug", message)
69 |
70 | def info(self, message: str) -> None:
71 | self._log_method("info", message)
72 |
73 | def warning(self, message: str) -> None:
74 | self._log_method("warning", message)
75 |
76 | def error(self, message: str) -> None:
77 | self._log_method("error", message)
78 |
79 | def critical(self, message: str) -> None:
80 | self._log_method("critical", message)
81 |
82 | def _log_method(self, level: str, message: str) -> None:
83 | log_method_screen = getattr(self.screen_logger, level)
84 | log_method_file = getattr(self.file_logger, level)
85 | log_method_screen(message)
86 | log_method_file(message)
87 |
88 | def hasHandlers(self) -> bool:
89 | return self.screen_logger.hasHandlers() or self.file_logger.hasHandlers()
90 |
91 | def removeHandlers(self) -> None:
92 | while self.hasHandlers():
93 | self.screen_logger.removeHandler(self.screen_logger.handlers[0])
94 | self.file_logger.removeHandler(self.file_logger.handlers[0])
95 |
96 | def addHandler(self, handler: logging.Handler) -> None:
97 | self.screen_logger.addHandler(handler)
98 | self.file_logger.addHandler(handler)
99 |
100 |
101 | def setup_logger(
102 | logger_config: LoggerConfig,
103 | ) -> HentaiDBLogger:
104 | return HentaiDBLogger(level=logger_config.level)
105 |
--------------------------------------------------------------------------------
/src/h2hdb/mysql_connector.py:
--------------------------------------------------------------------------------
1 | from mysql.connector import connect as SQLConnect
2 | from mysql.connector.abstracts import MySQLConnectionAbstract
3 | from mysql.connector.pooling import PooledMySQLConnection
4 | from mysql.connector.errors import IntegrityError
5 |
6 | from pydantic import Field, field_validator
7 |
8 | from .sql_connector import SQLConnectorParams, SQLConnector, DatabaseDuplicateKeyError
9 |
10 |
11 | AUTO_COMMIT_KEYS = ["INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "ALTER"]
12 |
13 |
14 | class MySQLDuplicateKeyError(DatabaseDuplicateKeyError):
15 | """
16 | Custom exception class for MySQL duplicate key errors.
17 |
18 | This class inherits from the MySQL Connector/Python IntegrityError class.
19 | """
20 |
21 | def __init__(self, message) -> None:
22 | self.message = message
23 | super().__init__(self.message)
24 |
25 |
26 | class MySQLConnectorParams(SQLConnectorParams):
27 | """
28 | MySQLConnectorParams is a data class that holds the connection parameters required to connect to a MySQL database.
29 |
30 | The class inherits from SQLConnectorParams and adds additional parameters specific to MySQL databases.
31 |
32 | The 'host' parameter is the host name or IP address of the MySQL database server.
33 |
34 | The 'port' parameter is the port number to connect to the MySQL database server.
35 |
36 | The 'user' parameter is the username to authenticate with the MySQL database server.
37 |
38 | The 'password' parameter is the password to authenticate with the MySQL database server.
39 |
40 | The 'database' parameter is the name of the MySQL database to connect to.
41 | """
42 |
43 | host: str = Field(
44 | min_length=1,
45 | description="Host of the MySQL database",
46 | )
47 | port: int = Field(
48 | ge=1,
49 | le=65535,
50 | description="Port of the MySQL database",
51 | )
52 | user: str = Field(
53 | min_length=1,
54 | description="User for the MySQL database",
55 | )
56 | password: str = Field(
57 | description="Password for the MySQL database",
58 | )
59 | database: str = Field(
60 | min_length=1,
61 | description="Database name for the MySQL database",
62 | )
63 |
64 |
65 | class MySQLCursor:
66 | def __init__(
67 | self, connection: PooledMySQLConnection | MySQLConnectionAbstract
68 | ) -> None:
69 | self.connection = connection
70 |
71 | def __enter__(self):
72 | self.cursor = self.connection.cursor(buffered=True)
73 | return self.cursor
74 |
75 | def __exit__(self, exc_type, exc_val, exc_tb):
76 | self.cursor.close()
77 |
78 |
79 | class MySQLConnector(SQLConnector):
80 | """
81 | MySQLConnector is a concrete subclass of SQLConnector that provides an implementation for connecting to a MySQL database.
82 |
83 | The class uses the MySQL Connector/Python library to establish a connection to a MySQL database.
84 |
85 | The 'connect' method establishes a connection to the MySQL database using the provided connection parameters.
86 |
87 | The 'close' method closes the connection to the MySQL database.
88 |
89 | The 'execute' method executes a single SQL command on the MySQL database.
90 |
91 | The 'execute_many' method executes multiple SQL commands on the MySQL database.
92 |
93 | The 'fetch_one' method fetches a single result from the MySQL database.
94 |
95 | The 'fetch_all' method fetches all results from the MySQL database.
96 |
97 | The 'commit' method commits the current transaction to the MySQL database.
98 |
99 | The 'rollback' method rolls back the current transaction in the MySQL database.
100 | """
101 |
102 | def __init__(
103 | self, host: str, port: int, user: str, password: str, database: str
104 | ) -> None:
105 | self.params = MySQLConnectorParams(
106 | host=host, port=port, user=user, password=password, database=database
107 | )
108 |
109 | def connect(self) -> None:
110 | self.connection = SQLConnect(**self.params.model_dump())
111 |
112 | def close(self) -> None:
113 | self.connection.close()
114 |
115 | def check_table_exists(self, table_name: str) -> bool:
116 | query = f"SHOW TABLES LIKE '{table_name}'"
117 | result = self.fetch_one(query)
118 | return result is not None
119 |
120 | def commit(self) -> None:
121 | self.connection.commit()
122 |
123 | def rollback(self) -> None:
124 | self.connection.rollback()
125 |
126 | def execute(self, query: str, data: tuple = ()) -> None:
127 | with MySQLCursor(self.connection) as cursor:
128 | try:
129 | cursor.execute(query, data)
130 | except IntegrityError as e:
131 | raise MySQLDuplicateKeyError(str(e))
132 | except Exception as e:
133 | raise e
134 | if any(key in query.upper() for key in AUTO_COMMIT_KEYS):
135 | self.commit()
136 |
137 | def execute_many(self, query: str, data: list[tuple]) -> None:
138 | with MySQLCursor(self.connection) as cursor:
139 | try:
140 | cursor.executemany(query, data)
141 | except IntegrityError as e:
142 | raise MySQLDuplicateKeyError(str(e))
143 | if any(key in query.upper() for key in AUTO_COMMIT_KEYS):
144 | self.commit()
145 |
146 | def fetch_one(self, query: str, data: tuple = ()) -> tuple:
147 | with MySQLCursor(self.connection) as cursor:
148 | cursor.execute(query, data)
149 | vlist = cursor.fetchone()
150 | if isinstance(vlist, tuple):
151 | return vlist
152 | else:
153 | return tuple()
154 |
155 | def fetch_all(self, query: str, data: tuple = ()) -> list:
156 | with MySQLCursor(self.connection) as cursor:
157 | cursor.execute(query, data)
158 | vlist = cursor.fetchall()
159 | return vlist
160 |
--------------------------------------------------------------------------------
/src/h2hdb/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kuan-Lun/h2hdb/90477b90368947bcf40d7c100787aa726485842a/src/h2hdb/py.typed
--------------------------------------------------------------------------------
/src/h2hdb/settings.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | "FOLDER_NAME_LENGTH_LIMIT",
3 | "FILE_NAME_LENGTH_LIMIT",
4 | "COMPARISON_HASH_ALGORITHM",
5 | "GALLERY_INFO_FILE_NAME",
6 | "hash_function",
7 | "hash_function_by_file",
8 | ]
9 |
10 | import logging
11 | import hashlib
12 | from enum import Enum
13 |
14 | FOLDER_NAME_LENGTH_LIMIT = 255
15 | FILE_NAME_LENGTH_LIMIT = 255
16 | COMPARISON_HASH_ALGORITHM = "sha512"
17 | GALLERY_INFO_FILE_NAME = "galleryinfo.txt"
18 |
19 |
20 | class LOG_LEVEL(int, Enum):
21 | notset = logging.NOTSET
22 | debug = logging.DEBUG
23 | info = logging.INFO
24 | warning = logging.WARNING
25 | error = logging.ERROR
26 | critical = logging.CRITICAL
27 |
28 |
29 | class CBZ_GROUPING(str, Enum):
30 | flat = "flat"
31 | date_yyyy = "date-yyyy"
32 | date_yyyy_mm = "date-yyyy-mm"
33 | date_yyyy_mm_dd = "date-yyyy-mm-dd"
34 |
35 |
36 | class CBZ_SORT(str, Enum):
37 | no = "no"
38 | upload_time = "upload_time"
39 | download_time = "download_time"
40 | pages = "pages"
41 | pages_num = "pages+[num]"
42 |
43 |
44 | def hash_function(x: bytes, algorithm: str) -> bytes:
45 | return getattr(hashlib, algorithm.lower())(x).digest()
46 |
47 |
48 | def hash_function_by_file(file_path: str, algorithm: str) -> bytes:
49 | with open(file_path, "rb") as f:
50 | file_content = f.read()
51 | return hash_function(file_content, algorithm)
52 |
53 |
54 | def chunk_list(input_list: list, chunk_size: int) -> list:
55 | if chunk_size <= 0:
56 | raise ValueError("Chunk size must be greater than 0.")
57 |
58 | return [
59 | input_list[i : i + chunk_size] for i in range(0, len(input_list), chunk_size)
60 | ]
61 |
--------------------------------------------------------------------------------
/src/h2hdb/sql_connector.py:
--------------------------------------------------------------------------------
1 | __all__ = [
2 | "SQLConnectorParams",
3 | "MySQLConnector",
4 | "DatabaseConfigurationError",
5 | "DatabaseKeyError",
6 | "DatabaseTableError",
7 | ]
8 |
9 |
10 | from abc import ABC, abstractmethod
11 |
12 | from pydantic import BaseModel, ConfigDict
13 |
14 |
15 | class DatabaseConfigurationError(Exception):
16 | """
17 | Custom exception class for database configuration errors.
18 |
19 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed.
20 | """
21 |
22 | def __init__(self, message):
23 | self.message = message
24 | super().__init__(self.message)
25 |
26 |
27 | class DatabaseKeyError(Exception):
28 | """
29 | Custom exception class for database key errors.
30 |
31 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed.
32 | """
33 |
34 | def __init__(self, message):
35 | self.message = message
36 | super().__init__(self.message)
37 |
38 |
39 | class DatabaseDuplicateKeyError(Exception):
40 | """
41 | Custom exception class for database duplicate key errors.
42 |
43 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed.
44 | """
45 |
46 | def __init__(self, message):
47 | self.message = message
48 | super().__init__(self.message)
49 |
50 |
51 | class DatabaseTableError(Exception):
52 | """
53 | Custom exception class for database table errors.
54 |
55 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed.
56 | """
57 |
58 | def __init__(self, message):
59 | self.message = message
60 | super().__init__(self.message)
61 |
62 |
63 | class SQLConnectorParams(BaseModel):
64 | model_config = ConfigDict(extra="forbid")
65 |
66 |
67 | class SQLConnector(ABC):
68 | """
69 | SQLConnector is an abstract base class that provides a standard interface for SQL database connections.
70 | It is designed to be subclassed by specific types of SQL database connectors (e.g., MySQLConnector, PostgreSQLConnector).
71 |
72 | The class uses the Abstract Base Classes (ABC) metaclass to enforce that subclasses implement the 'connect', 'close', 'execute', 'fetch', 'execute_many', and 'commit' methods.
73 |
74 | The constructor takes in the necessary parameters to establish a database connection, such as host, port, user, password, and database.
75 |
76 | The 'connect', 'close', 'check_table_exists', 'execute', 'execute_many', 'fetch_one', 'fetch_all', 'commit', and 'rollback' methods are abstract and must be implemented by concrete subclasses.
77 |
78 | The 'connect' method is designed to establish a connection to the database. It doesn't take any parameters.
79 |
80 | The 'close' method is designed to close the connection to the database. It doesn't take any parameters.
81 |
82 | The 'check_table_exists' method is designed to check if a table exists in the database. It takes the name of the table as a parameter and returns a boolean value.
83 |
84 | The 'execute' method is designed to execute a single SQL command. It takes a SQL query string and a tuple of data as parameters.
85 |
86 | The 'execute_many' method is designed to execute multiple SQL commands. It takes a SQL query string and a list of tuples as parameters, where each tuple contains the data for one command.
87 |
88 | The 'fetch_one' method is designed to fetch a single result from the database. It takes a SQL query string and a tuple of data as parameters.
89 |
90 | The 'fetch_all' method is designed to fetch all results from the database. It takes a SQL query string and a tuple of data as parameters.
91 |
92 | The 'commit' method is designed to commit the current transaction to the database. It doesn't take any parameters.
93 |
94 | The 'rollback' method is designed to roll back the current transaction in the database. It doesn't take any parameters.
95 | """
96 |
97 | @abstractmethod
98 | def __init__(self) -> None:
99 | pass
100 |
101 | @abstractmethod
102 | def connect(self) -> None:
103 | """
104 | Connects to the SQL database.
105 |
106 | This method establishes a connection to the SQL database using the provided credentials.
107 |
108 | Returns:
109 | None
110 | """
111 | pass
112 |
113 | @abstractmethod
114 | def close(self) -> None:
115 | """
116 | Closes the SQL connector connection.
117 |
118 | This method closes the connection to the SQL database.
119 |
120 | Returns:
121 | None
122 | """
123 | pass
124 |
125 | def __enter__(self) -> "SQLConnector":
126 | """
127 | Establishes a connection to the SQL database.
128 |
129 | Returns:
130 | SQLConnector: The SQLConnector object itself.
131 | """
132 | self.connect()
133 | return self
134 |
135 | @abstractmethod
136 | def check_table_exists(self, table_name: str) -> bool:
137 | """
138 | Checks if a table exists in the database.
139 |
140 | Args:
141 | table_name (str): The name of the table to check for existence.
142 |
143 | Returns:
144 | bool: True if the table exists, False otherwise.
145 | """
146 | pass
147 |
148 | @abstractmethod
149 | def commit(self) -> None:
150 | """
151 | Commits the current transaction to the database.
152 |
153 | This method is used to save any changes made within the current transaction
154 | to the database. It ensures that all changes are permanently saved and can
155 | be accessed by other transactions.
156 |
157 | Returns:
158 | None
159 | """
160 | pass
161 |
162 | @abstractmethod
163 | def rollback(self) -> None:
164 | """
165 | Rolls back the current transaction in the database.
166 |
167 | This method is used to undo any changes made within the current transaction
168 | and return the database to its state before the transaction began.
169 |
170 | Returns:
171 | None
172 | """
173 | pass
174 |
175 | def __exit__(self, exc_type, exc_value, traceback) -> None:
176 | """
177 | Performs necessary cleanup operations when exiting a context manager.
178 |
179 | Args:
180 | exc_type (type): The type of the exception raised, if any.
181 | exc_value (Exception): The exception raised, if any.
182 | traceback (traceback): The traceback object associated with the exception, if any.
183 | """
184 | self.close()
185 |
186 | @abstractmethod
187 | def execute(self, query: str, data: tuple = ()) -> None:
188 | """
189 | Executes the given SQL query with optional data parameters.
190 |
191 | Args:
192 | query (str): The SQL query to execute.
193 | data (tuple, optional): The data parameters to be used in the query. Defaults to ().
194 |
195 | Returns:
196 | None
197 | """
198 | pass
199 |
200 | @abstractmethod
201 | def execute_many(self, query: str, data: list[tuple]) -> None:
202 | """
203 | Executes a SQL query multiple times with different sets of data.
204 |
205 | Args:
206 | query (str): The SQL query to execute.
207 | data (list[tuple]): A list of tuples, where each tuple represents a set of data to be used in the query.
208 |
209 | Returns:
210 | None
211 | """
212 | pass
213 |
214 | @abstractmethod
215 | def fetch_one(self, query: str, data: tuple = ()) -> tuple:
216 | """
217 | Executes the given SQL query and returns the first row of the result set.
218 |
219 | Args:
220 | query (str): The SQL query to execute.
221 | data (tuple, optional): The parameters to be passed to the query. Defaults to an empty tuple.
222 |
223 | Returns:
224 | tuple: The first row of the result set.
225 |
226 | """
227 | pass
228 |
229 | @abstractmethod
230 | def fetch_all(self, query: str, data: tuple = ()) -> list:
231 | """
232 | Executes the given SQL query and fetches all the rows from the result set.
233 |
234 | Args:
235 | query (str): The SQL query to be executed.
236 | data (tuple, optional): The parameters to be passed to the query. Defaults to ().
237 |
238 | Returns:
239 | list: A list of tuples representing the rows fetched from the result set.
240 | """
241 | pass
242 |
--------------------------------------------------------------------------------
/src/h2hdb/table_comments.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 |
3 |
4 | from .table_gids import H2HDBGalleriesIDs
5 | from .h2hdb_spec import H2HDBAbstract
6 | from .sql_connector import DatabaseKeyError
7 |
8 |
9 | class H2HDBGalleriesComments(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
10 | def _create_galleries_comments_table(self) -> None:
11 | with self.SQLConnector() as connector:
12 | table_name = "galleries_comments"
13 | match self.config.database.sql_type.lower():
14 | case "mysql":
15 | query = f"""
16 | CREATE TABLE IF NOT EXISTS {table_name} (
17 | PRIMARY KEY (db_gallery_id),
18 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
19 | ON UPDATE CASCADE
20 | ON DELETE CASCADE,
21 | db_gallery_id INT UNSIGNED NOT NULL,
22 | comment TEXT NOT NULL,
23 | FULLTEXT (Comment)
24 | )
25 | """
26 | connector.execute(query)
27 | self.logger.info(f"{table_name} table created.")
28 |
29 | def _insert_gallery_comment(self, db_gallery_id: int, comment: str) -> None:
30 | if comment != "":
31 | with self.SQLConnector() as connector:
32 | table_name = "galleries_comments"
33 | match self.config.database.sql_type.lower():
34 | case "mysql":
35 | insert_query = f"""
36 | INSERT INTO {table_name} (db_gallery_id, comment) VALUES (%s, %s)
37 | """
38 | connector.execute(insert_query, (db_gallery_id, comment))
39 |
40 | def _update_gallery_comment(self, db_gallery_id: int, comment: str) -> None:
41 | with self.SQLConnector() as connector:
42 | table_name = "galleries_comments"
43 | match self.config.database.sql_type.lower():
44 | case "mysql":
45 | update_query = f"""
46 | UPDATE {table_name} SET Comment = %s WHERE db_gallery_id = %s
47 | """
48 | connector.execute(update_query, (comment, db_gallery_id))
49 |
50 | def __get_gallery_comment_by_db_gallery_id(self, db_gallery_id: int) -> tuple:
51 | with self.SQLConnector() as connector:
52 | table_name = "galleries_comments"
53 | match self.config.database.sql_type.lower():
54 | case "mysql":
55 | select_query = f"""
56 | SELECT Comment
57 | FROM {table_name}
58 | WHERE db_gallery_id = %s
59 | """
60 | query_result = connector.fetch_one(select_query, (db_gallery_id,))
61 | return query_result
62 |
63 | def _check_gallery_comment_by_db_gallery_id(self, db_gallery_id: int) -> bool:
64 | query_result = self.__get_gallery_comment_by_db_gallery_id(db_gallery_id)
65 | return len(query_result) != 0
66 |
67 | def _check_gallery_comment_by_gallery_name(self, gallery_name: str) -> bool:
68 | ischeck = False
69 | if self._check_galleries_dbids_by_gallery_name(gallery_name):
70 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
71 | ischeck = self._check_gallery_comment_by_db_gallery_id(db_gallery_id)
72 | return ischeck
73 |
74 | def _select_gallery_comment(self, db_gallery_id: int) -> str:
75 | query_result = self.__get_gallery_comment_by_db_gallery_id(db_gallery_id)
76 | if query_result:
77 | comment = query_result[0]
78 | else:
79 | msg = (
80 | f"Uploader comment for gallery name ID {db_gallery_id} does not exist."
81 | )
82 | self.logger.error(msg)
83 | raise DatabaseKeyError(msg)
84 | return comment
85 |
86 | def get_comment_by_gallery_name(self, gallery_name: str) -> str:
87 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
88 | return self._select_gallery_comment(db_gallery_id)
89 |
--------------------------------------------------------------------------------
/src/h2hdb/table_database_setting.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 |
3 | from .h2hdb_spec import H2HDBAbstract
4 | from .sql_connector import DatabaseConfigurationError
5 |
6 |
7 | class H2HDBCheckDatabaseSettings(H2HDBAbstract, metaclass=ABCMeta):
8 | """
9 | A class that checks the database settings for character set and collation.
10 |
11 | This class inherits from `H2HDBAbstract` and is used to ensure that the database
12 | character set and collation are valid. It provides methods to check the character set and
13 | collation of the database and raises an error if they are invalid.
14 |
15 | Attributes:
16 | sql_type (str): The type of SQL database being used.
17 |
18 | Methods:
19 | check_database_character_set: Checks the character set of the database.
20 | check_database_collation: Checks the collation of the database.
21 | """
22 |
23 | def check_database_character_set(self) -> None:
24 | """
25 | Checks the character set of the database and raises an error if it is invalid.
26 |
27 | Raises:
28 | DatabaseConfigurationError: If the database character set is invalid.
29 | """
30 | with self.SQLConnector() as connector:
31 | match self.config.database.sql_type.lower():
32 | case "mysql":
33 | charset = "utf8mb4"
34 | query = "SHOW VARIABLES LIKE 'character_set_database';"
35 |
36 | charset_result: str = connector.fetch_one(query)[1]
37 | is_charset_valid: bool = charset_result == charset
38 | if not is_charset_valid:
39 | message = f"Invalid database character set. Must be '{charset}' but is '{charset_result}'."
40 | self.logger.error(message)
41 | raise DatabaseConfigurationError(message)
42 | self.logger.info("Database character set is valid.")
43 |
44 | def check_database_collation(self) -> None:
45 | """
46 | Checks the collation of the database and raises an error if it is invalid.
47 |
48 | Raises:
49 | DatabaseConfigurationError: If the database collation is invalid.
50 | """
51 | with self.SQLConnector() as connector:
52 | match self.config.database.sql_type.lower():
53 | case "mysql":
54 | query = "SHOW VARIABLES LIKE 'collation_database';"
55 | collation = "utf8mb4_bin"
56 |
57 | collation_result: str = connector.fetch_one(query)[1]
58 | is_collation_valid: bool = collation_result == collation
59 | if not is_collation_valid:
60 | message = f"Invalid database collation. Must be '{collation}' but is '{collation_result}'."
61 | self.logger.error(message)
62 | raise DatabaseConfigurationError(message)
63 | self.logger.info("Database character set and collation are valid.")
64 |
--------------------------------------------------------------------------------
/src/h2hdb/table_files_dbids.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 | from itertools import chain
3 |
4 | from .hash_dict import HASH_ALGORITHMS
5 | from .settings import FILE_NAME_LENGTH_LIMIT
6 |
7 | from .settings import chunk_list
8 | from .table_gids import H2HDBGalleriesIDs
9 | from .information import FileInformation
10 | from .h2hdb_spec import H2HDBAbstract
11 | from .settings import hash_function_by_file
12 | from .sql_connector import (
13 | DatabaseKeyError,
14 | DatabaseDuplicateKeyError,
15 | )
16 |
17 |
18 | class H2HDBFiles(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
19 | def _create_files_names_table(self) -> None:
20 | with self.SQLConnector() as connector:
21 | table_name = f"files_dbids"
22 | match self.config.database.sql_type.lower():
23 | case "mysql":
24 | column_name = "name"
25 | column_name_parts, create_gallery_name_parts_sql = (
26 | self.mysql_split_file_name_based_on_limit(column_name)
27 | )
28 | query = f"""
29 | CREATE TABLE IF NOT EXISTS {table_name} (
30 | PRIMARY KEY (db_file_id),
31 | db_file_id INT UNSIGNED AUTO_INCREMENT,
32 | db_gallery_id INT UNSIGNED NOT NULL,
33 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
34 | ON UPDATE CASCADE
35 | ON DELETE CASCADE,
36 | {create_gallery_name_parts_sql},
37 | UNIQUE real_primay_key (db_gallery_id, {", ".join(column_name_parts)}),
38 | UNIQUE db_file_to_gallery_id (db_file_id, db_gallery_id)
39 | )
40 | """
41 | connector.execute(query)
42 | self.logger.info(f"{table_name} table created.")
43 |
44 | table_name = f"files_names"
45 | match self.config.database.sql_type.lower():
46 | case "mysql":
47 | query = f"""
48 | CREATE TABLE IF NOT EXISTS {table_name} (
49 | PRIMARY KEY (db_file_id),
50 | FOREIGN KEY (db_file_id) REFERENCES files_dbids(db_file_id)
51 | ON UPDATE CASCADE
52 | ON DELETE CASCADE,
53 | db_file_id INT UNSIGNED NOT NULL,
54 | full_name TEXT NOT NULL,
55 | FULLTEXT (full_name)
56 | )
57 | """
58 | connector.execute(query)
59 | self.logger.info(f"{table_name} table created.")
60 |
61 | def _insert_gallery_files(
62 | self, db_gallery_id: int, file_names_list: list[str]
63 | ) -> None:
64 | with self.SQLConnector() as connector:
65 |
66 | file_name_parts_list: list[list[str]] = list()
67 | for file_name in file_names_list:
68 | if len(file_name) > FILE_NAME_LENGTH_LIMIT:
69 | self.logger.error(
70 | f"File name '{file_name}' is too long. Must be {FILE_NAME_LENGTH_LIMIT} characters or less."
71 | )
72 | raise ValueError("File name is too long.")
73 | file_name_parts_list.append(self._split_gallery_name(file_name))
74 |
75 | table_name = "files_dbids"
76 | match self.config.database.sql_type.lower():
77 | case "mysql":
78 | column_name_parts, _ = self.mysql_split_file_name_based_on_limit(
79 | "name"
80 | )
81 | insert_query_header = f"""
82 | INSERT INTO {table_name}
83 | (db_gallery_id, {", ".join(column_name_parts)})
84 | """ # VALUES (%s, {", ".join(["%s" for _ in column_name_parts])})
85 | insert_query_values = " ".join(
86 | [
87 | "VALUES",
88 | ", ".join(
89 | [
90 | f"(%s, {", ".join(["%s" for _ in column_name_parts])})"
91 | for _ in file_names_list
92 | ]
93 | ),
94 | ]
95 | )
96 | insert_query = f"{insert_query_header} {insert_query_values}"
97 | insert_parameter = tuple(
98 | chain(
99 | *[
100 | (db_gallery_id, *file_name_parts_list[n])
101 | for n in range(len(file_name_parts_list))
102 | ]
103 | )
104 | )
105 | connector.execute(
106 | insert_query,
107 | insert_parameter,
108 | )
109 |
110 | db_file_id_list = [
111 | self._get_db_file_id(db_gallery_id, file_name)
112 | for file_name in file_names_list
113 | ]
114 |
115 | table_name = "files_names"
116 | match self.config.database.sql_type.lower():
117 | case "mysql":
118 | column_name_parts, _ = self.mysql_split_file_name_based_on_limit(
119 | "name"
120 | )
121 | insert_query_header = f"""
122 | INSERT INTO {table_name}
123 | (db_file_id, full_name)
124 | """
125 | insert_query_values = " ".join(
126 | ["VALUES", ", ".join(["(%s, %s)" for _ in file_names_list])]
127 | )
128 | insert_query = f"{insert_query_header} {insert_query_values}"
129 |
130 | connector.execute(
131 | insert_query,
132 | tuple(
133 | chain(
134 | *[
135 | (db_file_id_list[n], file_names_list[n])
136 | for n in range(len(file_names_list))
137 | ]
138 | )
139 | ),
140 | )
141 |
142 | def __get_db_file_id(self, db_gallery_id: int, file_name: str) -> tuple:
143 | with self.SQLConnector() as connector:
144 | table_name = "files_dbids"
145 | file_name_parts = self._split_gallery_name(file_name)
146 | match self.config.database.sql_type.lower():
147 | case "mysql":
148 | column_name_parts, _ = self.mysql_split_file_name_based_on_limit(
149 | "name"
150 | )
151 | select_query = f"""
152 | SELECT db_file_id
153 | FROM {table_name}
154 | WHERE db_gallery_id = %s
155 | AND {" AND ".join([f"{part} = %s" for part in column_name_parts])}
156 | """
157 | data = (db_gallery_id, *file_name_parts)
158 | query_result = connector.fetch_one(select_query, data)
159 | return query_result
160 |
161 | def _check_db_file_id(self, db_gallery_id: int, file_name: str) -> bool:
162 | query_result = self.__get_db_file_id(db_gallery_id, file_name)
163 | return len(query_result) != 0
164 |
165 | def _get_db_file_id(self, db_gallery_id: int, file_name: str) -> int:
166 | query_result = self.__get_db_file_id(db_gallery_id, file_name)
167 | if query_result:
168 | gallery_image_id = query_result[0]
169 | else:
170 | msg = f"Image ID for gallery name ID {db_gallery_id} and file '{file_name}' does not exist."
171 | self.logger.error(msg)
172 | raise DatabaseKeyError(msg)
173 | return gallery_image_id
174 |
175 | def get_files_by_gallery_name(self, gallery_name: str) -> list[str]:
176 | with self.SQLConnector() as connector:
177 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
178 | table_name = "files_names"
179 | match self.config.database.sql_type.lower():
180 | case "mysql":
181 | select_query = f"""
182 | SELECT full_name
183 | FROM {table_name}
184 | WHERE db_gallery_id = %s
185 | """
186 | query_result = connector.fetch_all(select_query, (db_gallery_id,))
187 | if query_result:
188 | files = [query[0] for query in query_result]
189 | else:
190 | msg = f"Files for gallery name ID {db_gallery_id} do not exist."
191 | self.logger.error(msg)
192 | raise DatabaseKeyError(msg)
193 | return files
194 |
195 | def _create_galleries_files_hashs_table(
196 | self, algorithm: str, output_bits: int
197 | ) -> None:
198 | with self.SQLConnector() as connector:
199 | dbids_table_name = "files_hashs_%s_dbids" % algorithm.lower()
200 | match self.config.database.sql_type.lower():
201 | case "mysql":
202 | query = f"""
203 | CREATE TABLE IF NOT EXISTS {dbids_table_name} (
204 | PRIMARY KEY (db_hash_id),
205 | db_hash_id INT UNSIGNED AUTO_INCREMENT,
206 | hash_value BINARY({output_bits/8}) NOT NULL,
207 | UNIQUE (hash_value)
208 | )
209 | """
210 | connector.execute(query)
211 | self.logger.info(f"{dbids_table_name} table created.")
212 |
213 | table_name = "files_hashs_%s" % algorithm.lower()
214 | match self.config.database.sql_type.lower():
215 | case "mysql":
216 | query = f"""
217 | CREATE TABLE IF NOT EXISTS {table_name} (
218 | PRIMARY KEY (db_file_id),
219 | FOREIGN KEY (db_file_id) REFERENCES files_dbids(db_file_id)
220 | ON UPDATE CASCADE
221 | ON DELETE CASCADE,
222 | db_file_id INT UNSIGNED NOT NULL,
223 | FOREIGN KEY (db_hash_id) REFERENCES {dbids_table_name}(db_hash_id)
224 | ON UPDATE CASCADE,
225 | db_hash_id INT UNSIGNED NOT NULL,
226 | UNIQUE db_hash_id (db_hash_id, db_file_id)
227 | )
228 | """
229 | connector.execute(query)
230 | self.logger.info(f"{table_name} table created.")
231 |
232 | def _create_galleries_files_hashs_tables(self) -> None:
233 | self.logger.debug("Creating gallery image hash tables...")
234 | for algorithm, output_bits in HASH_ALGORITHMS.items():
235 | self._create_galleries_files_hashs_table(algorithm, output_bits)
236 | self.logger.info("Gallery image hash tables created.")
237 |
238 | def _create_gallery_image_hash_view(self) -> None:
239 | with self.SQLConnector() as connector:
240 | table_name = "files_hashs"
241 | match self.config.database.sql_type.lower():
242 | case "mysql":
243 | query = f"""
244 | CREATE VIEW IF NOT EXISTS {table_name} AS
245 | SELECT files_names.db_file_id AS db_file_id,
246 | galleries_titles.title AS gallery_title,
247 | galleries_names.full_name AS gallery_name,
248 | files_names.full_name AS file_name,
249 | files_hashs_sha512_dbids.hash_value AS sha512
250 | FROM files_names
251 | LEFT JOIN files_dbids USING (db_file_id)
252 | LEFT JOIN galleries_titles USING (db_gallery_id)
253 | LEFT JOIN galleries_names USING (db_gallery_id)
254 | LEFT JOIN files_hashs_sha512 USING (db_file_id)
255 | LEFT JOIN files_hashs_sha512_dbids USING (db_hash_id)
256 | """
257 | connector.execute(query)
258 | self.logger.info(f"{table_name} view created.")
259 |
260 | def _check_files_dbids_by_db_gallery_id(self, db_gallery_id: int) -> tuple | None:
261 | with self.SQLConnector() as connector:
262 | table_name = f"files_dbids"
263 | match self.config.database.sql_type.lower():
264 | case "mysql":
265 | select_query = f"""
266 | SELECT COUNT(*)
267 | FROM {table_name}
268 | WHERE db_gallery_id = %s
269 | """
270 | query_result = connector.fetch_one(select_query, (db_gallery_id,))
271 | return query_result[0] != 0
272 |
273 | def _insert_gallery_file_hash_for_db_gallery_id(
274 | self, fileinformations: list[FileInformation]
275 | ) -> None:
276 | for finfo in fileinformations:
277 | finfo.sethash()
278 |
279 | for algorithm in HASH_ALGORITHMS:
280 | toinsert: set[bytes] = set()
281 | for finfo in fileinformations:
282 | filehash: bytes = getattr(finfo, algorithm)
283 | if not self._check_db_hash_id_by_hash_value(filehash, algorithm):
284 | toinsert.add(filehash)
285 | self.insert_db_hash_id_by_hash_values(toinsert, algorithm)
286 |
287 | for finfo in fileinformations:
288 | for algorithm in HASH_ALGORITHMS:
289 | finfo.setdb_hash_id(
290 | algorithm,
291 | self.get_db_hash_id_by_hash_value(
292 | getattr(finfo, algorithm), algorithm
293 | ),
294 | )
295 | self.insert_hash_value_by_db_hash_ids(fileinformations)
296 |
297 | def _insert_gallery_file_hash(
298 | self, db_file_id: int, absolute_file_path: str
299 | ) -> None:
300 |
301 | for algorithm in HASH_ALGORITHMS:
302 | is_insert = False
303 | current_hash_value = hash_function_by_file(absolute_file_path, algorithm)
304 | if self._check_hash_value_by_file_id(db_file_id, algorithm):
305 | original_hash_value = self.get_hash_value_by_file_id(
306 | db_file_id, algorithm
307 | )
308 | if original_hash_value != current_hash_value:
309 | if self._check_db_hash_id_by_hash_value(
310 | current_hash_value, algorithm
311 | ):
312 | db_hash_id = self.get_db_hash_id_by_hash_value(
313 | current_hash_value, algorithm
314 | )
315 | self._update_gallery_file_hash_by_db_hash_id(
316 | db_file_id, db_hash_id, algorithm
317 | )
318 | else:
319 | is_insert |= True
320 | else:
321 | is_insert |= True
322 |
323 | if is_insert:
324 | if self._check_db_hash_id_by_hash_value(current_hash_value, algorithm):
325 | db_hash_id = self.get_db_hash_id_by_hash_value(
326 | current_hash_value, algorithm
327 | )
328 | else:
329 | with self.SQLConnector() as connector:
330 | table_name = f"files_hashs_{algorithm.lower()}_dbids"
331 | match self.config.database.sql_type.lower():
332 | case "mysql":
333 | insert_hash_value_query = f"""
334 | INSERT INTO {table_name} (hash_value) VALUES (UNHEX(%s))
335 | """
336 | try:
337 | connector.execute(
338 | insert_hash_value_query, (current_hash_value.hex(),)
339 | )
340 | except DatabaseDuplicateKeyError:
341 | self.logger.warning(
342 | f"Hash value {current_hash_value!r} already exists in the database."
343 | )
344 | except Exception as e:
345 | raise e
346 | db_hash_id = self.get_db_hash_id_by_hash_value(
347 | current_hash_value, algorithm
348 | )
349 |
350 | with self.SQLConnector() as connector:
351 | table_name = f"files_hashs_{algorithm.lower()}"
352 | match self.config.database.sql_type.lower():
353 | case "mysql":
354 | insert_db_hash_id_query = f"""
355 | INSERT INTO {table_name} (db_file_id, db_hash_id) VALUES (%s, %s)
356 | """
357 | connector.execute(insert_db_hash_id_query, (db_file_id, db_hash_id))
358 |
359 | def __get_db_hash_id_by_hash_value(
360 | self, hash_value: bytes, algorithm: str
361 | ) -> tuple:
362 | with self.SQLConnector() as connector:
363 | table_name = f"files_hashs_{algorithm.lower()}_dbids"
364 | match self.config.database.sql_type.lower():
365 | case "mysql":
366 | select_query = f"""
367 | SELECT db_hash_id
368 | FROM {table_name}
369 | WHERE hash_value = UNHEX(%s)
370 | """
371 | query_result = connector.fetch_one(select_query, (hash_value.hex(),))
372 | return query_result
373 |
374 | def _check_db_hash_id_by_hash_value(
375 | self, hash_value: bytes, algorithm: str
376 | ) -> bool:
377 | query_result = self.__get_db_hash_id_by_hash_value(hash_value, algorithm)
378 | return len(query_result) != 0
379 |
380 | def get_db_hash_id_by_hash_value(self, hash_value: bytes, algorithm: str) -> int:
381 | query_result = self.__get_db_hash_id_by_hash_value(hash_value, algorithm)
382 | if query_result:
383 | db_hash_id = query_result[0]
384 | else:
385 | msg = f"Image hash for image ID 0x{hash_value.hex()} does not exist."
386 | raise DatabaseKeyError(msg)
387 | return db_hash_id
388 |
389 | def insert_hash_value_by_db_hash_ids(
390 | self, fileinformations: list[FileInformation]
391 | ) -> None:
392 | for algorithm in HASH_ALGORITHMS:
393 | with self.SQLConnector() as connector:
394 | table_name = f"files_hashs_{algorithm.lower()}"
395 | match self.config.database.sql_type.lower():
396 | case "mysql":
397 | insert_query_header = f"""
398 | INSERT INTO {table_name} (db_file_id, db_hash_id)
399 | """
400 | insert_query_values = " ".join(
401 | ["VALUES", ", ".join(["(%s, %s)"] * len(fileinformations))]
402 | )
403 | insert_query = f"{insert_query_header} {insert_query_values}"
404 | parameters: list[int] = list()
405 | for fileinformation in fileinformations:
406 | parameters += [
407 | fileinformation.db_file_id,
408 | fileinformation.db_hash_id[algorithm],
409 | ]
410 | connector.execute(insert_query, tuple(parameters))
411 |
412 | def insert_db_hash_id_by_hash_value(
413 | self, hash_value: bytes, algorithm: str
414 | ) -> None:
415 | with self.SQLConnector() as connector:
416 | table_name = f"files_hashs_{algorithm.lower()}_dbids"
417 | match self.config.database.sql_type.lower():
418 | case "mysql":
419 | insert_query = f"""
420 | INSERT INTO {table_name} (hash_value) VALUES (UNHEX(%s))
421 | """
422 | connector.execute(insert_query, (hash_value.hex(),))
423 |
424 | def insert_db_hash_id_by_hash_values(
425 | self, hash_values: set[bytes], algorithm: str
426 | ) -> None:
427 | if not hash_values:
428 | return
429 |
430 | toinsert: list[str] = list()
431 | for hash_value in hash_values:
432 | if (hash_value not in toinsert) and (
433 | not self._check_db_hash_id_by_hash_value(hash_value, algorithm)
434 | ):
435 | toinsert.append(hash_value.hex())
436 | if not toinsert:
437 | return
438 |
439 | isretry = False
440 | with self.SQLConnector() as connector:
441 | table_name = f"files_hashs_{algorithm.lower()}_dbids"
442 | match self.config.database.sql_type.lower():
443 | case "mysql":
444 | insert_query_header = f"""
445 | INSERT INTO {table_name} (hash_value)
446 | """
447 | insert_query_values = " ".join(
448 | ["VALUES", ", ".join(["(UNHEX(%s))"] * len(toinsert))]
449 | )
450 | insert_query = f"{insert_query_header} {insert_query_values}"
451 | try:
452 | connector.execute(insert_query, tuple(toinsert))
453 | except DatabaseDuplicateKeyError:
454 | isretry = True
455 |
456 | if isretry:
457 | for hash_hex in toinsert:
458 | if not self._check_db_hash_id_by_hash_value(hash_value, algorithm):
459 | self.logger.warning(
460 | f"Retrying to insert hash value 0x{hash_hex} into the database."
461 | )
462 | self.insert_db_hash_id_by_hash_values(
463 | {bytes.fromhex(hash_hex)}, algorithm
464 | )
465 |
466 | def get_hash_value_by_db_hash_id(self, db_hash_id: int, algorithm: str) -> bytes:
467 | with self.SQLConnector() as connector:
468 | table_name = f"files_hashs_{algorithm.lower()}_dbids"
469 | match self.config.database.sql_type.lower():
470 | case "mysql":
471 | select_query = f"""
472 | SELECT hash_value
473 | FROM {table_name}
474 | WHERE db_hash_id = %s
475 | """
476 | query_result = connector.fetch_one(select_query, (db_hash_id,))
477 | if query_result:
478 | hash_value = query_result[0]
479 | else:
480 | msg = f"Image hash for image ID {db_hash_id} does not exist."
481 | raise DatabaseKeyError(msg)
482 | return hash_value
483 |
484 | def __get_hash_value_by_file_id(self, db_file_id: int, algorithm: str) -> tuple:
485 | with self.SQLConnector() as connector:
486 | table_name = f"files_hashs_{algorithm.lower()}"
487 | match self.config.database.sql_type.lower():
488 | case "mysql":
489 | select_query = f"""
490 | SELECT db_hash_id
491 | FROM {table_name}
492 | WHERE db_file_id = %s
493 | """
494 | query_result = connector.fetch_one(select_query, (db_file_id,))
495 | return query_result
496 |
497 | def _check_hash_value_by_file_id(self, db_file_id: int, algorithm: str) -> bool:
498 | query_result = self.__get_hash_value_by_file_id(db_file_id, algorithm)
499 | return len(query_result) != 0
500 |
501 | def get_hash_value_by_file_id(self, db_file_id: int, algorithm: str) -> bytes:
502 | query_result = self.__get_hash_value_by_file_id(db_file_id, algorithm)
503 | if query_result:
504 | db_hash_id = query_result[0]
505 | else:
506 | msg = f"Image hash for image ID {db_file_id} does not exist."
507 | raise DatabaseKeyError(msg)
508 | return self.get_hash_value_by_db_hash_id(db_hash_id, algorithm)
509 |
510 | def _update_gallery_file_hash_by_db_hash_id(
511 | self, db_file_id: int, db_hash_id: int, algorithm: str
512 | ) -> None:
513 | with self.SQLConnector() as connector:
514 | table_name = f"files_hashs_{algorithm.lower()}"
515 | match self.config.database.sql_type.lower():
516 | case "mysql":
517 | update_query = f"""
518 | UPDATE {table_name} SET db_hash_id = %s WHERE db_file_id = %s
519 | """
520 | connector.execute(update_query, (db_hash_id, db_file_id))
521 |
--------------------------------------------------------------------------------
/src/h2hdb/table_gids.py:
--------------------------------------------------------------------------------
1 | __all__ = ["H2HDBGalleriesIDs", "H2HDBGalleriesGIDs"]
2 |
3 | from abc import ABCMeta
4 |
5 | from .h2hdb_spec import H2HDBAbstract
6 | from .sql_connector import DatabaseKeyError
7 |
8 |
9 | class H2HDBGalleriesIDs(H2HDBAbstract, metaclass=ABCMeta):
10 | def _create_galleries_names_table(self) -> None:
11 | with self.SQLConnector() as connector:
12 | table_name = "galleries_dbids"
13 | match self.config.database.sql_type.lower():
14 | case "mysql":
15 | column_name = "name"
16 | column_name_parts, create_gallery_name_parts_sql = (
17 | self.mysql_split_gallery_name_based_on_limit(column_name)
18 | )
19 | id_query = f"""
20 | CREATE TABLE IF NOT EXISTS {table_name} (
21 | PRIMARY KEY (db_gallery_id),
22 | db_gallery_id INT UNSIGNED AUTO_INCREMENT,
23 | {create_gallery_name_parts_sql},
24 | UNIQUE real_primay_key ({", ".join(column_name_parts)})
25 | )
26 | """
27 | connector.execute(id_query)
28 |
29 | table_name = "galleries_names"
30 | match self.config.database.sql_type.lower():
31 | case "mysql":
32 | name_query = f"""
33 | CREATE TABLE IF NOT EXISTS {table_name} (
34 | PRIMARY KEY (db_gallery_id),
35 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
36 | ON UPDATE CASCADE
37 | ON DELETE CASCADE,
38 | db_gallery_id INT UNSIGNED NOT NULL,
39 | full_name TEXT NOT NULL,
40 | FULLTEXT (full_name)
41 | )
42 | """
43 | connector.execute(name_query)
44 | self.logger.info(f"{table_name} table created.")
45 |
46 | def _insert_gallery_name(self, gallery_name: str) -> None:
47 | with self.SQLConnector() as connector:
48 | table_name = "galleries_dbids"
49 | gallery_name_parts = self._split_gallery_name(gallery_name)
50 |
51 | match self.config.database.sql_type.lower():
52 | case "mysql":
53 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit(
54 | "name"
55 | )
56 | insert_query = f"""
57 | INSERT INTO {table_name}
58 | ({", ".join(column_name_parts)})
59 | VALUES ({", ".join(["%s" for _ in column_name_parts])})
60 | """
61 | connector.execute(insert_query, tuple(gallery_name_parts))
62 |
63 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
64 |
65 | table_name = "galleries_names"
66 | gallery_name_parts = self._split_gallery_name(gallery_name)
67 |
68 | match self.config.database.sql_type.lower():
69 | case "mysql":
70 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit(
71 | "name"
72 | )
73 | insert_query = f"""
74 | INSERT INTO {table_name}
75 | (db_gallery_id, full_name)
76 | VALUES (%s, %s)
77 | """
78 | connector.execute(insert_query, (db_gallery_id, gallery_name))
79 |
80 | def __get_db_gallery_id_by_gallery_name(self, gallery_name: str) -> tuple:
81 | with self.SQLConnector() as connector:
82 | table_name = "galleries_dbids"
83 | gallery_name_parts = self._split_gallery_name(gallery_name)
84 |
85 | match self.config.database.sql_type.lower():
86 | case "mysql":
87 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit(
88 | "name"
89 | )
90 | select_query = f"""
91 | SELECT db_gallery_id
92 | FROM {table_name}
93 | WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])}
94 | """
95 |
96 | query_result = connector.fetch_one(select_query, tuple(gallery_name_parts))
97 | return query_result
98 |
99 | def _check_galleries_dbids_by_gallery_name(self, gallery_name: str) -> bool:
100 | query_result = self.__get_db_gallery_id_by_gallery_name(gallery_name)
101 | return len(query_result) != 0
102 |
103 | def _get_db_gallery_id_by_gallery_name(self, gallery_name: str) -> int:
104 | query_result = self.__get_db_gallery_id_by_gallery_name(gallery_name)
105 | if query_result:
106 | db_gallery_id = query_result[0]
107 | else:
108 | self.logger.debug(f"Gallery name '{gallery_name}' does not exist.")
109 | raise DatabaseKeyError(f"Gallery name '{gallery_name}' does not exist.")
110 | return db_gallery_id
111 |
112 | def _get_db_gallery_id_by_gid(self, gid: int) -> int:
113 | with self.SQLConnector() as connector:
114 | table_name = "galleries_gids"
115 | match self.config.database.sql_type.lower():
116 | case "mysql":
117 | select_query = f"""
118 | SELECT db_gallery_id
119 | FROM {table_name}
120 | WHERE gid = %s
121 | """
122 | query_result = connector.fetch_one(select_query, (gid,))
123 |
124 | if query_result:
125 | db_gallery_id = query_result[0]
126 | else:
127 | msg = f"Gallery name ID for GID {gid} does not exist."
128 | self.logger.error(msg)
129 | raise DatabaseKeyError(msg)
130 | return db_gallery_id
131 |
132 |
133 | class H2HDBGalleriesGIDs(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
134 | """
135 | A class that handles the GIDs for galleries in the comic database.
136 |
137 | This class inherits from `H2HDBAbstract` and is used to manage the GIDs for galleries
138 |
139 | Attributes:
140 | sql_type (str): The type of SQL database being used.
141 | sql_connection_params (SQLConnectorParams): The parameters for establishing the SQL connection.
142 | connector (SQLConnector): The SQL connector object.
143 |
144 | Methods:
145 | _create_galleries_gids_table: Creates the galleries_gids table.
146 | _insert_gallery_gid: Inserts the GID for the gallery name ID into the galleries_gids table.
147 | get_gid_by_gallery_name: Selects the GID for the gallery name from the database.
148 | """
149 |
150 | def _create_galleries_gids_table(self) -> None:
151 | with self.SQLConnector() as connector:
152 | table_name = "galleries_gids"
153 | match self.config.database.sql_type.lower():
154 | case "mysql":
155 | query = f"""
156 | CREATE TABLE IF NOT EXISTS {table_name} (
157 | PRIMARY KEY (db_gallery_id),
158 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
159 | ON UPDATE CASCADE
160 | ON DELETE CASCADE,
161 | db_gallery_id INT UNSIGNED NOT NULL,
162 | gid INT UNSIGNED NOT NULL,
163 | INDEX (gid)
164 | )
165 | """
166 | connector.execute(query)
167 | self.logger.info(f"{table_name} table created.")
168 |
169 | def _insert_gallery_gid(self, db_gallery_id: int, gid: int) -> None:
170 | with self.SQLConnector() as connector:
171 | table_name = "galleries_gids"
172 | match self.config.database.sql_type.lower():
173 | case "mysql":
174 | insert_query = f"""
175 | INSERT INTO {table_name} (db_gallery_id, gid) VALUES (%s, %s)
176 | """
177 | connector.execute(insert_query, (db_gallery_id, gid))
178 |
179 | def _get_gid_by_db_gallery_id(self, db_gallery_id: int) -> int:
180 | with self.SQLConnector() as connector:
181 | table_name = "galleries_gids"
182 | match self.config.database.sql_type.lower():
183 | case "mysql":
184 | select_query = f"""
185 | SELECT gid
186 | FROM {table_name}
187 | WHERE db_gallery_id = %s
188 | """
189 | query_result = connector.fetch_one(select_query, (db_gallery_id,))
190 |
191 | if query_result:
192 | gid = query_result[0]
193 | else:
194 | msg = f"GID for gallery name ID {db_gallery_id} does not exist."
195 | self.logger.error(msg)
196 | raise DatabaseKeyError(msg)
197 | return gid
198 |
199 | def get_gid_by_gallery_name(self, gallery_name: str) -> int:
200 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
201 | return self._get_gid_by_db_gallery_id(db_gallery_id)
202 |
203 | def get_gids(self) -> list[int]:
204 | with self.SQLConnector() as connector:
205 | table_name = "galleries_gids"
206 | match self.config.database.sql_type.lower():
207 | case "mysql":
208 | select_query = f"""
209 | SELECT gid
210 | FROM {table_name}
211 | """
212 | query_result = connector.fetch_all(select_query)
213 | gids = [gid for gid, in query_result]
214 | return gids
215 |
216 | def check_gid_by_gid(self, gid: int) -> bool:
217 | with self.SQLConnector() as connector:
218 | table_name = "galleries_gids"
219 | match self.config.database.sql_type.lower():
220 | case "mysql":
221 | select_query = f"""
222 | SELECT gid
223 | FROM {table_name}
224 | WHERE gid = %s
225 | """
226 | query_result = connector.fetch_one(select_query, (gid,))
227 | return len(query_result) != 0
228 |
--------------------------------------------------------------------------------
/src/h2hdb/table_removed_gids.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 |
3 | from .table_gids import H2HDBGalleriesIDs
4 | from .h2hdb_spec import H2HDBAbstract
5 | from .sql_connector import DatabaseKeyError
6 |
7 |
8 | class H2HDBRemovedGalleries(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
9 | def _create_removed_galleries_gids_table(self) -> None:
10 | with self.SQLConnector() as connector:
11 | table_name = "removed_galleries_gids"
12 | match self.config.database.sql_type.lower():
13 | case "mysql":
14 | query = f"""
15 | CREATE TABLE IF NOT EXISTS {table_name} (
16 | PRIMARY KEY (gid),
17 | gid INT UNSIGNED NOT NULL
18 | )
19 | """
20 | connector.execute(query)
21 | self.logger.info(f"{table_name} table created.")
22 |
23 | def insert_removed_gallery_gid(self, gid: int) -> None:
24 | with self.SQLConnector() as connector:
25 | table_name = "removed_galleries_gids"
26 | match self.config.database.sql_type.lower():
27 | case "mysql":
28 | insert_query = f"""
29 | INSERT INTO {table_name} (gid) VALUES (%s)
30 | """
31 | if self._check_removed_gallery_gid(gid):
32 | self.logger.warning(f"Removed gallery GID {gid} already exists.")
33 | else:
34 | connector.execute(insert_query, (gid,))
35 |
36 | def __get_removed_gallery_gid(self, gid: int) -> tuple:
37 | with self.SQLConnector() as connector:
38 | table_name = "removed_galleries_gids"
39 | match self.config.database.sql_type.lower():
40 | case "mysql":
41 | select_query = f"""
42 | SELECT gid
43 | FROM {table_name}
44 | WHERE gid = %s
45 | """
46 | query_result = connector.fetch_one(select_query, (gid,))
47 | return query_result
48 |
49 | def _check_removed_gallery_gid(self, gid: int) -> bool:
50 | query_result = self.__get_removed_gallery_gid(gid)
51 | return len(query_result) != 0
52 |
53 | def select_removed_gallery_gid(self, gid: int) -> int:
54 | query_result = self.__get_removed_gallery_gid(gid)
55 | if query_result:
56 | gid = query_result[0]
57 | self.logger.warning(f"Removed gallery GID {gid} exists.")
58 | else:
59 | msg = f"Removed gallery GID {gid} does not exist."
60 | self.logger.error(msg)
61 | raise DatabaseKeyError(msg)
62 | return gid
63 |
--------------------------------------------------------------------------------
/src/h2hdb/table_tags.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 | from typing import Callable
3 |
4 | from .table_gids import H2HDBGalleriesIDs
5 | from .h2hdb_spec import H2HDBAbstract
6 | from .information import TagInformation
7 | from .sql_connector import (
8 | DatabaseKeyError,
9 | DatabaseDuplicateKeyError,
10 | )
11 |
12 |
13 | class H2HDBGalleriesTags(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
14 | def _create_galleries_tags_table(self) -> None:
15 | with self.SQLConnector() as connector:
16 | tag_name_table_name = f"galleries_tags_names"
17 | match self.config.database.sql_type.lower():
18 | case "mysql":
19 | query = f"""
20 | CREATE TABLE IF NOT EXISTS {tag_name_table_name} (
21 | PRIMARY KEY (tag_name),
22 | tag_name CHAR({self.innodb_index_prefix_limit}) NOT NULL
23 | )
24 | """
25 | connector.execute(query)
26 | self.logger.info(f"{tag_name_table_name} table created.")
27 |
28 | tag_value_table_name = f"galleries_tags_values"
29 | match self.config.database.sql_type.lower():
30 | case "mysql":
31 | query = f"""
32 | CREATE TABLE IF NOT EXISTS {tag_value_table_name} (
33 | PRIMARY KEY (tag_value),
34 | tag_value CHAR({self.innodb_index_prefix_limit}) NOT NULL
35 | )
36 | """
37 | connector.execute(query)
38 | self.logger.info(f"{tag_value_table_name} table created.")
39 |
40 | tag_pairs_table_name = f"galleries_tag_pairs_dbids"
41 | match self.config.database.sql_type.lower():
42 | case "mysql":
43 | query = f"""
44 | CREATE TABLE IF NOT EXISTS {tag_pairs_table_name} (
45 | PRIMARY KEY (db_tag_pair_id),
46 | db_tag_pair_id INT UNSIGNED AUTO_INCREMENT,
47 | tag_name CHAR({self.innodb_index_prefix_limit}) NOT NULL,
48 | FOREIGN KEY (tag_name) REFERENCES {tag_name_table_name}(tag_name)
49 | ON UPDATE CASCADE
50 | ON DELETE CASCADE,
51 | tag_value CHAR({self.innodb_index_prefix_limit}) NOT NULL,
52 | FOREIGN KEY (tag_value) REFERENCES {tag_value_table_name}(tag_value)
53 | ON UPDATE CASCADE
54 | ON DELETE CASCADE,
55 | UNIQUE (tag_name, tag_value),
56 | INDEX (tag_value)
57 | )
58 | """
59 | connector.execute(query)
60 | self.logger.info(f"{tag_pairs_table_name} table created.")
61 |
62 | table_name = f"galleries_tags"
63 | match self.config.database.sql_type.lower():
64 | case "mysql":
65 | query = f"""
66 | CREATE TABLE IF NOT EXISTS {table_name} (
67 | PRIMARY KEY (db_gallery_id, db_tag_pair_id),
68 | db_gallery_id INT UNSIGNED NOT NULL,
69 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
70 | ON UPDATE CASCADE
71 | ON DELETE CASCADE,
72 | db_tag_pair_id INT UNSIGNED NOT NULL,
73 | FOREIGN KEY (db_tag_pair_id) REFERENCES {tag_pairs_table_name}(db_tag_pair_id)
74 | ON UPDATE CASCADE
75 | ON DELETE CASCADE,
76 | UNIQUE (db_tag_pair_id, db_gallery_id)
77 | )
78 | """
79 | connector.execute(query)
80 | self.logger.info(f"{table_name} table created.")
81 |
82 | def __get_db_tag_pair_id(self, tag_name: str, tag_value: str) -> tuple:
83 | with self.SQLConnector() as connector:
84 | match self.config.database.sql_type.lower():
85 | case "mysql":
86 | select_query = f"""
87 | SELECT db_tag_pair_id
88 | FROM galleries_tag_pairs_dbids
89 | WHERE tag_name = %s AND tag_value = %s
90 | """
91 | query_result = connector.fetch_one(select_query, (tag_name, tag_value))
92 | return query_result
93 |
94 | def _check_db_tag_pair_id(self, tag_name: str, tag_value: str) -> bool:
95 | query_result = self.__get_db_tag_pair_id(tag_name, tag_value)
96 | return len(query_result) != 0
97 |
98 | def _get_db_tag_pair_id(self, tag_name: str, tag_value: str) -> int:
99 | query_result = self.__get_db_tag_pair_id(tag_name, tag_value)
100 | if query_result:
101 | db_tag_id = query_result[0]
102 | else:
103 | self.logger.debug(f"Tag '{tag_value}' does not exist.")
104 | raise DatabaseKeyError(f"Tag '{tag_value}' does not exist.")
105 | return db_tag_id
106 |
107 | def _check_gallery_tag_name(self, tag_name: str) -> bool:
108 | with self.SQLConnector() as connector:
109 | table_name = f"galleries_tags_names"
110 | match self.config.database.sql_type.lower():
111 | case "mysql":
112 | select_query = f"""
113 | SELECT tag_name
114 | FROM {table_name}
115 | WHERE tag_name = %s
116 | """
117 | query_result = connector.fetch_one(select_query, (tag_name,))
118 | return len(query_result) != 0
119 |
120 | def _check_gallery_tag_value(self, tag_value: str) -> bool:
121 | with self.SQLConnector() as connector:
122 | table_name = f"galleries_tags_values"
123 | match self.config.database.sql_type.lower():
124 | case "mysql":
125 | select_query = f"""
126 | SELECT tag_value
127 | FROM {table_name}
128 | WHERE tag_value = %s
129 | """
130 | query_result = connector.fetch_one(select_query, (tag_value,))
131 | return len(query_result) != 0
132 |
133 | def __insert_tag_names_or_tag_values(
134 | self, n_or_v: str, tag_nvs: list[str], check_fun: Callable[[str], bool]
135 | ) -> None:
136 | toinsert_tag_nvs = list[str]()
137 | for tag_nv in tag_nvs:
138 | if not check_fun(tag_nv):
139 | toinsert_tag_nvs.append(tag_nv)
140 |
141 | if not toinsert_tag_nvs:
142 | return
143 |
144 | isretry = False
145 | with self.SQLConnector() as connector:
146 | match n_or_v.lower():
147 | case "name":
148 | table_name = "galleries_tags_names"
149 | column_name = "tag_name"
150 | case "value":
151 | table_name = "galleries_tags_values"
152 | column_name = "tag_value"
153 |
154 | match self.config.database.sql_type.lower():
155 | case "mysql":
156 | insert_query_header = f"""
157 | INSERT INTO {table_name} ({column_name})
158 | """
159 | insert_query_values = " ".join(
160 | ["VALUES", ", ".join(["(%s)" for _ in toinsert_tag_nvs])]
161 | )
162 | insert_query = f"{insert_query_header} {insert_query_values}"
163 | try:
164 | connector.execute(insert_query, tuple(toinsert_tag_nvs))
165 | except DatabaseDuplicateKeyError:
166 | isretry = True
167 | except Exception as e:
168 | raise e
169 |
170 | if isretry:
171 | self.__insert_tag_names_or_tag_values(n_or_v, toinsert_tag_nvs, check_fun)
172 |
173 | def _insert_tag_names(self, tag_names: list[str]) -> None:
174 | self.__insert_tag_names_or_tag_values(
175 | "name", tag_names, self._check_gallery_tag_name
176 | )
177 |
178 | def _insert_tag_values(self, tag_values: list[str]) -> None:
179 | self.__insert_tag_names_or_tag_values(
180 | "value", tag_values, self._check_gallery_tag_value
181 | )
182 |
183 | def _insert_tag_pairs_dbids(self, tags: list[TagInformation]) -> None:
184 | toinsert_db_tag_pair_id = list[TagInformation]()
185 | for tag in tags:
186 | if not self._check_db_tag_pair_id(tag.tag_name, tag.tag_value):
187 | toinsert_db_tag_pair_id.append(tag)
188 |
189 | if not toinsert_db_tag_pair_id:
190 | return
191 |
192 | isretry = False
193 | with self.SQLConnector() as connector:
194 | tag_pairs_table_name = f"galleries_tag_pairs_dbids"
195 | match self.config.database.sql_type.lower():
196 | case "mysql":
197 | insert_query_header = f"""
198 | INSERT INTO {tag_pairs_table_name} (tag_name, tag_value)
199 | """
200 | insert_query_values = " ".join(
201 | [
202 | "VALUES",
203 | ", ".join(["(%s, %s)" for _ in toinsert_db_tag_pair_id]),
204 | ]
205 | )
206 | insert_query = f"{insert_query_header} {insert_query_values}"
207 | parameter = list[str]()
208 | for tag in toinsert_db_tag_pair_id:
209 | parameter.extend([tag.tag_name, tag.tag_value])
210 | try:
211 | connector.execute(insert_query, tuple(parameter))
212 | except DatabaseDuplicateKeyError:
213 | isretry = True
214 | except Exception as e:
215 | raise e
216 |
217 | if isretry:
218 | self._insert_tag_pairs_dbids(toinsert_db_tag_pair_id)
219 |
220 | def _insert_gallery_tags(
221 | self, db_gallery_id: int, tags: list[TagInformation]
222 | ) -> None:
223 | toinsert_db_tag_pair_id = list[TagInformation]()
224 | for tag in tags:
225 | if not self._check_db_tag_pair_id(tag.tag_name, tag.tag_value):
226 | toinsert_db_tag_pair_id.append(tag)
227 |
228 | if not toinsert_db_tag_pair_id:
229 | return
230 |
231 | self._insert_tag_names(list({tag.tag_name for tag in tags}))
232 | self._insert_tag_values(list({tag.tag_value for tag in tags}))
233 |
234 | self._insert_tag_pairs_dbids(toinsert_db_tag_pair_id)
235 |
236 | db_tag_pair_ids = list[int]()
237 | for tag in tags:
238 | db_tag_pair_ids.append(
239 | self._get_db_tag_pair_id(tag.tag_name, tag.tag_value)
240 | )
241 |
242 | with self.SQLConnector() as connector:
243 | table_name = f"galleries_tags"
244 | match self.config.database.sql_type.lower():
245 | case "mysql":
246 | insert_query_header = f"""
247 | INSERT INTO {table_name} (db_gallery_id, db_tag_pair_id)
248 | """
249 | insert_query_values = " ".join(
250 | ["VALUES", ", ".join(["(%s, %s)" for _ in db_tag_pair_ids])]
251 | )
252 | insert_query = f"{insert_query_header} {insert_query_values}"
253 | parameter = list[int]()
254 | for db_tag_pair_id in db_tag_pair_ids:
255 | parameter.extend([db_gallery_id, db_tag_pair_id])
256 | connector.execute(insert_query, tuple(parameter))
257 |
258 | def _select_gallery_tag(self, db_gallery_id: int, tag_name: str) -> str:
259 | with self.SQLConnector() as connector:
260 | table_name = f"galleries_tags_{tag_name}"
261 | match self.config.database.sql_type.lower():
262 | case "mysql":
263 | select_query = f"""
264 | SELECT tag
265 | FROM {table_name}
266 | WHERE db_gallery_id = %s
267 | """
268 | query_result = connector.fetch_one(select_query, (db_gallery_id,))
269 | if query_result:
270 | tag = query_result[0]
271 | else:
272 | msg = f"Tag '{tag_name}' does not exist."
273 | self.logger.error(msg)
274 | raise DatabaseKeyError(msg)
275 | return tag
276 |
277 | def get_tag_value_by_gallery_name_and_tag_name(
278 | self, gallery_name: str, tag_name: str
279 | ) -> str:
280 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
281 | return self._select_gallery_tag(db_gallery_id, tag_name)
282 |
283 | def get_tag_pairs_by_gallery_name(self, gallery_name: str) -> list[tuple[str, str]]:
284 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
285 | db_tag_pair_ids = self._get_db_tag_pair_id_by_db_gallery_id(db_gallery_id)
286 | return [
287 | self._get_tag_pairs_by_db_tag_pair_id(db_tag_pair_id)
288 | for db_tag_pair_id in db_tag_pair_ids
289 | ]
290 |
291 | def _get_db_tag_pair_id_by_db_gallery_id(self, db_gallery_id: int) -> list[int]:
292 | with self.SQLConnector() as connector:
293 | table_name = "galleries_tags"
294 | match self.config.database.sql_type.lower():
295 | case "mysql":
296 | select_query = f"""
297 | SELECT db_tag_pair_id
298 | FROM {table_name}
299 | WHERE db_gallery_id = %s
300 | """
301 | query_result = connector.fetch_all(select_query, (db_gallery_id,))
302 | return [query[0] for query in query_result]
303 |
304 | def _get_tag_pairs_by_db_tag_pair_id(self, db_tag_pair_id: int) -> tuple[str, str]:
305 | with self.SQLConnector() as connector:
306 | table_name = "galleries_tag_pairs_dbids"
307 | match self.config.database.sql_type.lower():
308 | case "mysql":
309 | select_query = f"""
310 | SELECT tag_name, tag_value
311 | FROM {table_name}
312 | WHERE db_tag_pair_id = %s
313 | """
314 | query_result = connector.fetch_one(select_query, (db_tag_pair_id,))
315 | if query_result:
316 | tag_name, tag_value = query_result
317 | else:
318 | msg = f"Tag pair ID {db_tag_pair_id} does not exist."
319 | self.logger.error(msg)
320 | raise DatabaseKeyError(msg)
321 | return tag_name, tag_value
322 |
--------------------------------------------------------------------------------
/src/h2hdb/table_times.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 | import datetime
3 |
4 |
5 | from .table_gids import H2HDBGalleriesIDs
6 | from .h2hdb_spec import H2HDBAbstract
7 | from .sql_connector import DatabaseKeyError
8 |
9 |
10 | class H2HDBTimes(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
11 | def _create_times_table(self, table_name: str) -> None:
12 | with self.SQLConnector() as connector:
13 | match self.config.database.sql_type.lower():
14 | case "mysql":
15 | query = f"""
16 | CREATE TABLE IF NOT EXISTS {table_name} (
17 | PRIMARY KEY (db_gallery_id),
18 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
19 | ON UPDATE CASCADE
20 | ON DELETE CASCADE,
21 | db_gallery_id INT UNSIGNED NOT NULL,
22 | time DATETIME NOT NULL,
23 | INDEX (time)
24 | )
25 | """
26 | connector.execute(query)
27 | self.logger.info(f"{table_name} table created.")
28 |
29 | def _insert_time(self, table_name: str, db_gallery_id: int, time: str) -> None:
30 | with self.SQLConnector() as connector:
31 | match self.config.database.sql_type.lower():
32 | case "mysql":
33 | insert_query = f"""
34 | INSERT INTO {table_name} (db_gallery_id, time) VALUES (%s, %s)
35 | """
36 | connector.execute(insert_query, (db_gallery_id, time))
37 |
38 | def _select_time(self, table_name: str, db_gallery_id: int) -> datetime.datetime:
39 | with self.SQLConnector() as connector:
40 | match self.config.database.sql_type.lower():
41 | case "mysql":
42 | select_query = f"""
43 | SELECT time
44 | FROM {table_name}
45 | WHERE db_gallery_id = %s
46 | """
47 | query_result = connector.fetch_one(select_query, (db_gallery_id,))
48 | if query_result:
49 | time = query_result[0]
50 | else:
51 | msg = f"Time for gallery name ID {db_gallery_id} does not exist in table '{table_name}'."
52 | self.logger.error(msg)
53 | raise DatabaseKeyError(msg)
54 | return time
55 |
56 | def _update_time(self, table_name: str, db_gallery_id: int, time: str) -> None:
57 | with self.SQLConnector() as connector:
58 | match self.config.database.sql_type.lower():
59 | case "mysql":
60 | update_query = f"""
61 | UPDATE {table_name} SET time = %s WHERE db_gallery_id = %s
62 | """
63 | connector.execute(update_query, (time, db_gallery_id))
64 |
65 | def _create_galleries_download_times_table(self) -> None:
66 | self._create_times_table("galleries_download_times")
67 |
68 | def _create_galleries_redownload_times_table(self) -> None:
69 | self._create_times_table("galleries_redownload_times")
70 |
71 | def _insert_download_time(self, db_gallery_id: int, time: str) -> None:
72 | self._insert_time("galleries_download_times", db_gallery_id, time)
73 | self._insert_time("galleries_redownload_times", db_gallery_id, time)
74 |
75 | def update_redownload_time(self, db_gallery_id: int, time: str) -> None:
76 | self._update_time("galleries_redownload_times", db_gallery_id, time)
77 |
78 | def _reset_redownload_times(self) -> None:
79 | table_name = "galleries_redownload_times"
80 | with self.SQLConnector() as connector:
81 | match self.config.database.sql_type.lower():
82 | case "mysql":
83 | update_query = f"""
84 | UPDATE {table_name}
85 | JOIN galleries_download_times
86 | ON {table_name}.db_gallery_id = galleries_download_times.db_gallery_id
87 | SET {table_name}.time = galleries_download_times.time
88 | WHERE {table_name}.time <> galleries_download_times.time;
89 |
90 | """
91 | connector.execute(update_query)
92 |
93 | def _create_galleries_upload_times_table(self) -> None:
94 | self._create_times_table("galleries_upload_times")
95 |
96 | def _insert_upload_time(self, db_gallery_id: int, time: str) -> None:
97 | self._insert_time("galleries_upload_times", db_gallery_id, time)
98 |
99 | def get_upload_time_by_gallery_name(self, gallery_name: str) -> datetime.datetime:
100 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
101 | return self._select_time("galleries_upload_times", db_gallery_id)
102 |
103 | def _create_galleries_modified_times_table(self) -> None:
104 | self._create_times_table("galleries_modified_times")
105 |
106 | def _insert_modified_time(self, db_gallery_id: int, time: str) -> None:
107 | self._insert_time("galleries_modified_times", db_gallery_id, time)
108 |
109 | def _create_galleries_access_times_table(self) -> None:
110 | self._create_times_table("galleries_access_times")
111 |
112 | def _insert_access_time(self, db_gallery_id: int, time: str) -> None:
113 | self._insert_time("galleries_access_times", db_gallery_id, time)
114 |
115 | def update_access_time(self, gallery_name: str, time: str) -> None:
116 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
117 | self._update_time("galleries_access_times", db_gallery_id, time)
118 |
--------------------------------------------------------------------------------
/src/h2hdb/table_titles.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 |
3 |
4 | from .table_gids import H2HDBGalleriesIDs
5 | from .h2hdb_spec import H2HDBAbstract
6 | from .sql_connector import DatabaseKeyError
7 |
8 |
9 | class H2HDBGalleriesTitles(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
10 | def _create_galleries_titles_table(self) -> None:
11 | with self.SQLConnector() as connector:
12 | table_name = "galleries_titles"
13 | match self.config.database.sql_type.lower():
14 | case "mysql":
15 | query = f"""
16 | CREATE TABLE IF NOT EXISTS {table_name} (
17 | PRIMARY KEY (db_gallery_id),
18 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
19 | ON UPDATE CASCADE
20 | ON DELETE CASCADE,
21 | db_gallery_id INT UNSIGNED NOT NULL,
22 | title TEXT NOT NULL,
23 | FULLTEXT (title)
24 | )
25 | """
26 | connector.execute(query)
27 | self.logger.info(f"{table_name} table created.")
28 |
29 | def _insert_gallery_title(self, db_gallery_id: int, title: str) -> None:
30 | with self.SQLConnector() as connector:
31 | table_name = "galleries_titles"
32 | match self.config.database.sql_type.lower():
33 | case "mysql":
34 | insert_query = f"""
35 | INSERT INTO {table_name} (db_gallery_id, title) VALUES (%s, %s)
36 | """
37 | connector.execute(insert_query, (db_gallery_id, title))
38 |
39 | def _get_title_by_db_gallery_id(self, db_gallery_id: int) -> str:
40 | with self.SQLConnector() as connector:
41 | table_name = "galleries_titles"
42 | match self.config.database.sql_type.lower():
43 | case "mysql":
44 | select_query = f"""
45 | SELECT title
46 | FROM {table_name}
47 | WHERE db_gallery_id = %s
48 | """
49 | query_result = connector.fetch_one(select_query, (db_gallery_id,))
50 | if query_result:
51 | title = query_result[0]
52 | else:
53 | msg = f"Title for gallery name ID {db_gallery_id} does not exist."
54 | self.logger.error(msg)
55 | raise DatabaseKeyError(msg)
56 | return title
57 |
58 | def get_title_by_gallery_name(self, gallery_name: str) -> str:
59 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
60 | return self._get_title_by_db_gallery_id(db_gallery_id)
61 |
--------------------------------------------------------------------------------
/src/h2hdb/table_uploadaccounts.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta
2 |
3 | from .table_gids import H2HDBGalleriesIDs
4 | from .h2hdb_spec import H2HDBAbstract
5 | from .sql_connector import DatabaseKeyError
6 |
7 |
8 | class H2HDBUploadAccounts(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta):
9 | def _create_upload_account_table(self) -> None:
10 | with self.SQLConnector() as connector:
11 | table_name = "galleries_upload_accounts"
12 | match self.config.database.sql_type.lower():
13 | case "mysql":
14 | query = f"""
15 | CREATE TABLE IF NOT EXISTS {table_name} (
16 | PRIMARY KEY (db_gallery_id),
17 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id)
18 | ON UPDATE CASCADE
19 | ON DELETE CASCADE,
20 | db_gallery_id INT UNSIGNED NOT NULL,
21 | account CHAR({self.innodb_index_prefix_limit}) NOT NULL,
22 | INDEX (account)
23 | )
24 | """
25 | connector.execute(query)
26 | self.logger.info(f"{table_name} table created.")
27 |
28 | def _insert_gallery_upload_account(self, db_gallery_id: int, account: str) -> None:
29 | with self.SQLConnector() as connector:
30 | table_name = "galleries_upload_accounts"
31 | match self.config.database.sql_type.lower():
32 | case "mysql":
33 | insert_query = f"""
34 | INSERT INTO {table_name} (db_gallery_id, account) VALUES (%s, %s)
35 | """
36 | connector.execute(insert_query, (db_gallery_id, account))
37 |
38 | def _select_gallery_upload_account(self, db_gallery_id: int) -> str:
39 | with self.SQLConnector() as connector:
40 | table_name = "galleries_upload_accounts"
41 | match self.config.database.sql_type.lower():
42 | case "mysql":
43 | select_query = f"""
44 | SELECT account
45 | FROM {table_name}
46 | WHERE db_gallery_id = %s
47 | """
48 | query_result = connector.fetch_one(select_query, (db_gallery_id,))
49 | if query_result:
50 | account = query_result[0]
51 | else:
52 | msg = f"Upload account for gallery name ID {db_gallery_id} does not exist."
53 | self.logger.error(msg)
54 | raise DatabaseKeyError(msg)
55 | return account
56 |
57 | def get_upload_account_by_gallery_name(self, gallery_name: str) -> str:
58 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name)
59 | return self._select_gallery_upload_account(db_gallery_id)
60 |
--------------------------------------------------------------------------------
/src/h2hdb/threading_tools.py:
--------------------------------------------------------------------------------
1 | import threading
2 | from threading import Thread
3 | from abc import ABCMeta, abstractmethod
4 | from typing import Callable
5 | from multiprocessing import cpu_count
6 | from multiprocessing.pool import Pool
7 | from contextlib import ExitStack
8 |
9 | CPU_NUM = cpu_count()
10 | POOL_CPU_LIMIT = max(CPU_NUM - 2, 1)
11 |
12 | MAX_THREADS = 2 * CPU_NUM
13 | SQL_SEMAPHORE = threading.Semaphore(POOL_CPU_LIMIT)
14 |
15 |
16 | def wrap_thread_target_with_semaphores(
17 | target: Callable,
18 | semaphores: list[threading.Semaphore],
19 | ) -> Callable:
20 | def wrapper(*args, **kwargs) -> None:
21 | with ExitStack() as stack:
22 | for semaphore in semaphores:
23 | stack.enter_context(semaphore)
24 | target(*args, **kwargs)
25 |
26 | return wrapper
27 |
28 |
29 | class ThreadsList(list[Thread], metaclass=ABCMeta):
30 | @abstractmethod
31 | def get_semaphores(self) -> list[threading.Semaphore]:
32 | pass
33 |
34 | def append(self, target, args):
35 | thread = Thread(
36 | target=wrap_thread_target_with_semaphores(target, self.get_semaphores()),
37 | args=args,
38 | )
39 | super().append(thread)
40 |
41 | def __enter__(self) -> "ThreadsList":
42 | return self
43 |
44 | def __exit__(
45 | self,
46 | exc_type: type[BaseException] | None,
47 | exc_value: BaseException | None,
48 | traceback: object | None,
49 | ) -> None:
50 | running_threads: list[Thread] = list()
51 | while self:
52 | self[0].start()
53 | running_threads.append(self.pop(0))
54 | while len(running_threads) >= MAX_THREADS:
55 | for thread in running_threads:
56 | if not thread.is_alive():
57 | thread.join()
58 | running_threads.remove(thread)
59 | for thread in running_threads:
60 | thread.join()
61 |
62 |
63 | class SQLThreadsList(ThreadsList):
64 | def get_semaphores(self) -> list[threading.Semaphore]:
65 | return [SQL_SEMAPHORE]
66 |
67 |
68 | def run_in_parallel(fun, args: list[tuple]) -> list:
69 | results = list()
70 | if args:
71 | with Pool(POOL_CPU_LIMIT) as pool:
72 | if len(args[0]) > 1:
73 | results += pool.starmap(fun, args)
74 | else:
75 | results += pool.map(fun, [arg[0] for arg in args])
76 | return results
77 |
--------------------------------------------------------------------------------
/src/h2hdb/view_ginfo.py:
--------------------------------------------------------------------------------
1 | from .table_uploadaccounts import H2HDBUploadAccounts
2 | from .table_titles import H2HDBGalleriesTitles
3 | from .table_times import H2HDBTimes
4 | from .table_gids import H2HDBGalleriesIDs, H2HDBGalleriesGIDs
5 | from .table_database_setting import H2HDBCheckDatabaseSettings
6 |
7 |
8 | class H2HDBGalleriesInfos(
9 | H2HDBGalleriesTitles,
10 | H2HDBUploadAccounts,
11 | H2HDBTimes,
12 | H2HDBGalleriesGIDs,
13 | H2HDBGalleriesIDs,
14 | H2HDBCheckDatabaseSettings,
15 | ):
16 | def _create_galleries_infos_view(self) -> None:
17 | with self.SQLConnector() as connector:
18 | match self.config.database.sql_type.lower():
19 | case "mysql":
20 | query = """
21 | CREATE VIEW IF NOT EXISTS galleries_infos AS
22 | SELECT galleries_names.db_gallery_id AS db_gallery_id,
23 | galleries_names.full_name AS name,
24 | galleries_titles.title AS title,
25 | galleries_gids.gid AS gid,
26 | galleries_upload_accounts.account AS upload_account,
27 | galleries_upload_times.time AS upload_time,
28 | galleries_download_times.time AS download_time,
29 | galleries_modified_times.time AS modified_time,
30 | galleries_access_times.time AS access_time
31 | FROM galleries_names
32 | LEFT JOIN galleries_titles USING (db_gallery_id)
33 | LEFT JOIN galleries_gids USING (db_gallery_id)
34 | LEFT JOIN galleries_upload_accounts USING (db_gallery_id)
35 | LEFT JOIN galleries_upload_times USING (db_gallery_id)
36 | LEFT JOIN galleries_download_times USING (db_gallery_id)
37 | LEFT JOIN galleries_modified_times USING (db_gallery_id)
38 | LEFT JOIN galleries_access_times USING (db_gallery_id)
39 | """
40 | connector.execute(query)
41 | self.logger.info("galleries_infos view created.")
42 |
43 | def _create_duplicate_hash_in_gallery_view(self) -> None:
44 | with self.SQLConnector() as connector:
45 | match self.config.database.sql_type.lower():
46 | case "mysql":
47 | query = """
48 | CREATE VIEW IF NOT EXISTS duplicate_hash_in_gallery AS WITH Files AS (
49 | SELECT files_dbids.db_gallery_id AS db_gallery_id,
50 | files_hashs_sha512.db_hash_id AS hash_value
51 | FROM files_dbids
52 | JOIN files_hashs_sha512 ON files_dbids.db_file_id = files_hashs_sha512.db_file_id
53 | ),
54 | DuplicateCount AS (
55 | SELECT db_gallery_id,
56 | hash_value
57 | FROM Files
58 | GROUP BY db_gallery_id,
59 | hash_value
60 | HAVING COUNT(*) > 1
61 | ),
62 | TotalCount AS (
63 | SELECT db_gallery_id,
64 | COUNT(*) AS files_count
65 | FROM files_dbids
66 | GROUP BY db_gallery_id
67 | ),
68 | DuplicateGroupCount AS (
69 | SELECT db_gallery_id,
70 | COUNT(*) AS duplicate_groups
71 | FROM DuplicateCount
72 | GROUP BY db_gallery_id
73 | )
74 | SELECT tc.db_gallery_id AS db_gallery_id,
75 | gg.gid AS gid,
76 | gn.full_name AS gallery_name
77 | FROM TotalCount AS tc
78 | JOIN DuplicateGroupCount AS dg ON tc.db_gallery_id = dg.db_gallery_id
79 | JOIN galleries_gids AS gg ON tc.db_gallery_id = gg.db_gallery_id
80 | JOIN galleries_names AS gn ON gg.db_gallery_id = gn.db_gallery_id
81 | WHERE CAST(dg.duplicate_groups AS FLOAT) / (
82 | tc.files_count - CAST(dg.duplicate_groups AS FLOAT)
83 | ) > 0.9
84 | ORDER BY gid DESC;
85 | """
86 | connector.execute(query)
87 | self.logger.info("duplicate_hash_in_gallery view created.")
88 |
--------------------------------------------------------------------------------