├── LICENSE ├── README.md ├── pyproject.toml ├── requirements.txt └── src └── h2hdb ├── __init__.py ├── __main__.py ├── compress_gallery_to_cbz.py ├── config_loader.py ├── h2hdb_h2hdb.py ├── h2hdb_spec.py ├── hash_dict.py ├── information.py ├── logger.py ├── mysql_connector.py ├── py.typed ├── settings.py ├── sql_connector.py ├── table_comments.py ├── table_database_setting.py ├── table_files_dbids.py ├── table_gids.py ├── table_removed_gids.py ├── table_tags.py ├── table_times.py ├── table_titles.py ├── table_uploadaccounts.py ├── threading_tools.py └── view_ginfo.py /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # H2HDB 2 | 3 | ## Description 4 | 5 | The `H2HDB` is a comprehensive database for organising and managing H@H comic collections. It offers a streamlined way to catalogue your comics, providing key information such as GID (Gallery ID), title, tags and more, ensuring your collection is always organised and accessible. 6 | 7 | --- 8 | 9 | ## Features 10 | 11 | - [x] Add new galleries to the database 12 | - [x] Comporess H@H's galleries to a folder 13 | - [x] Record the removed GIDs in a separate list 14 | - [ ] Write document (need?) 15 | 16 | --- 17 | 18 | ## Installation and Usage 19 | 20 | 1. Install Python 3.13 or higher from [python.org](https://www.python.org/downloads/). 21 | 1. Install the required packages. 22 | 23 | ```bash 24 | pip install h2hdb 25 | ``` 26 | 27 | 1. Run the script. 28 | 29 | ```bash 30 | python -m h2hdb --config [json-path] 31 | ``` 32 | 33 | ### Config 34 | 35 | ```json 36 | { 37 | "h2h": { 38 | "download_path": "[str]", // The download path of H@H. The default is `download`. 39 | "cbz_path": "[str]", // The cbz in this path. 40 | "cbz_max_size": "[int]", // The maxinum of the mininum of width and height height. The default is `768`. 41 | "cbz_grouping": "[str]", // `flat`, `date-yyyy`, `date-yyyy-mm`, or `date-yyyy-mm-dd`. The default is `flat`. 42 | "cbz_sort": "[str]" // `upload_time`, `download_time`, `pages`, or `pages+[num]`. The default is `no`. 43 | }, 44 | "database": { 45 | "sql_type": "[str]", // Now only supports `mysql`. The default is `mysql`. 46 | "host": "[str]", // The default is `localhost`. 47 | "port": "[int]", // The default is `3306`. 48 | "user": "[str]", // The default is `root`. 49 | "password": "[str]" // The default is `password`. 50 | }, 51 | "logger": { 52 | "level": "[str]" // One of NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL. 53 | } 54 | } 55 | ``` 56 | 57 | --- 58 | 59 | ## Q & A 60 | 61 | - Why are some images missing from the CBZ-files? 62 | `H2HDB` does not compress images that are considered spam according to certain rules. If you encounter any images that you believe should have been included, please report the issue. 63 | 64 | - Why are some images in some CBZ files and not in other CBZ-files? 65 | `H2HDB` learns the spam rule from the previous CBZ files. If you kill the CBZ files containing these images, the new CBZ files will not contain these images. 66 | 67 | --- 68 | 69 | ## Credits 70 | 71 | The project was created by [Kuan-Lun Wang](https://www.klwang.tw/home/). 72 | 73 | --- 74 | 75 | ## License 76 | 77 | This project is distributed under the terms of the GNU General Public Licence (GPL). For detailed licence terms, see the `LICENSE` file included in this distribution. 78 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "h2hdb" 7 | version = "0.9.1.9" 8 | description = "A simple H@H database" 9 | readme = "README.md" 10 | authors = [{ name = "Kuan-Lun Wang" }] 11 | license = { text = "GNU Affero General Public License v3" } 12 | dependencies = [ 13 | "h2h-galleryinfo-parser>=0.2.2", 14 | "mysql-connector-python>=9.3.0,<10.0.0", 15 | "pillow>=11.2.1,<12.0.0", 16 | "pydantic>=2.11.4", 17 | ] 18 | classifiers = [ 19 | "Development Status :: 3 - Alpha", 20 | "Intended Audience :: Developers", 21 | "Operating System :: OS Independent", 22 | "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", 23 | "Programming Language :: Python :: 3.13", 24 | ] 25 | 26 | [project.urls] 27 | Homepage = "https://github.com/Kuan-Lun/h2hdb" 28 | Source = "https://github.com/Kuan-Lun/h2hdb" 29 | Tracker = "https://github.com/Kuan-Lun/h2hdb/issues" 30 | 31 | [tool.setuptools] 32 | packages = ["h2hdb"] 33 | package-dir = { h2hdb = "src/h2hdb" } 34 | package-data = { h2hdb = ["py.typed"] } 35 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools>=80.7.1 -------------------------------------------------------------------------------- /src/h2hdb/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "H2HDB", 3 | "DatabaseConfig", 4 | "LoggerConfig", 5 | "H2HConfig", 6 | "H2HDBConfig", 7 | "load_config", 8 | "HentaiDBLogger", 9 | "setup_logger", 10 | ] 11 | __author__ = "Kuan-Lun Wang" 12 | 13 | 14 | from .h2hdb_h2hdb import H2HDB 15 | from .config_loader import ( 16 | DatabaseConfig, 17 | LoggerConfig, 18 | H2HConfig, 19 | H2HDBConfig, 20 | load_config, 21 | ) 22 | from .logger import HentaiDBLogger, setup_logger 23 | -------------------------------------------------------------------------------- /src/h2hdb/__main__.py: -------------------------------------------------------------------------------- 1 | from h2hdb import H2HDB 2 | from .config_loader import load_config 3 | 4 | if __name__ == "__main__": 5 | config = load_config() 6 | with H2HDB(config=config) as connector: 7 | # Check the database character set and collation 8 | connector.check_database_character_set() 9 | connector.check_database_collation() 10 | # Create the main tables 11 | connector.create_main_tables() 12 | 13 | # Insert the H2H download 14 | connector.insert_h2h_download() 15 | 16 | connector.refresh_current_files_hashs() 17 | -------------------------------------------------------------------------------- /src/h2hdb/compress_gallery_to_cbz.py: -------------------------------------------------------------------------------- 1 | __all__ = ["compress_images_and_create_cbz", "calculate_hash_of_file_in_cbz"] 2 | 3 | import hashlib 4 | import os 5 | import shutil 6 | import zipfile 7 | 8 | from PIL import Image, ImageFile # type: ignore 9 | 10 | Image.MAX_IMAGE_PIXELS = None 11 | ImageFile.LOAD_TRUNCATED_IMAGES = True 12 | 13 | from .settings import hash_function_by_file 14 | 15 | from .settings import FILE_NAME_LENGTH_LIMIT, COMPARISON_HASH_ALGORITHM 16 | 17 | 18 | def compress_image(image_path: str, output_path: str, max_size: int) -> None: 19 | """Compress an image, saving it to the output path.""" 20 | with Image.open(image_path) as image: 21 | if image.mode in ("RGBA", "LA"): 22 | image = image.convert("RGBA") 23 | white_bg = Image.new("RGBA", image.size, (255, 255, 255, 255)) 24 | image = Image.alpha_composite(white_bg, image) 25 | image = image.convert("RGB") 26 | if image.mode != "RGB": 27 | image = image.convert("RGB") 28 | 29 | if max_size >= 1: 30 | if image.height >= image.width: 31 | max_width = max_size 32 | scale = max_size / image.width 33 | max_height = int(image.height * scale) 34 | else: 35 | max_height = max_size 36 | scale = max_size / image.height 37 | max_width = int(image.width * scale) 38 | 39 | unsuitable_formats = ["GIF", "TIFF", "ICO"] 40 | image.thumbnail((max_width, max_height), resample=Image.Resampling.LANCZOS) 41 | if image.format in unsuitable_formats: 42 | image.save(output_path, image.format) 43 | else: 44 | if "xmp" in image.info: 45 | del image.info["xmp"] 46 | image.save(output_path, "JPEG") 47 | 48 | 49 | def create_cbz(directory, output_path) -> None: 50 | """Create a CBZ file from all images in a directory.""" 51 | with zipfile.ZipFile(output_path, "w") as cbz: 52 | for filename in os.listdir(directory): 53 | cbz.write(os.path.join(directory, filename), filename) 54 | 55 | 56 | def hash_and_process_file( 57 | input_directory: str, 58 | tmp_cbz_directory: str, 59 | filename: str, 60 | exclude_hashs: list[bytes], 61 | max_size: int, 62 | ) -> None: 63 | file_hash = hash_function_by_file( 64 | os.path.join(input_directory, filename), COMPARISON_HASH_ALGORITHM 65 | ) 66 | if file_hash not in exclude_hashs: 67 | if filename.lower().endswith((".jpg", ".jpeg", ".png", "bmp")): 68 | new_filename = os.path.splitext(filename)[0] + ".jpg" 69 | compress_image( 70 | os.path.join(input_directory, filename), 71 | os.path.join(tmp_cbz_directory, new_filename), 72 | max_size, 73 | ) 74 | elif filename.lower().endswith(".gif"): 75 | compress_image( 76 | os.path.join(input_directory, filename), 77 | os.path.join(tmp_cbz_directory, filename), 78 | max_size, 79 | ) 80 | else: 81 | shutil.copy( 82 | os.path.join(input_directory, filename), 83 | os.path.join(tmp_cbz_directory, filename), 84 | ) 85 | 86 | 87 | # Compress images and create a CBZ file 88 | def compress_images_and_create_cbz( 89 | input_directory: str, 90 | output_directory: str, 91 | tmp_directory: str, 92 | max_size: int, 93 | exclude_hashs: list[bytes], 94 | ) -> None: 95 | if len(set([input_directory, output_directory, tmp_directory])) < 2: 96 | raise ValueError("Input and output directories cannot be the same.") 97 | 98 | # Create the output directory 99 | gallery_name = os.path.basename(input_directory) 100 | tmp_cbz_directory = os.path.join(tmp_directory, gallery_name) 101 | if os.path.exists(tmp_cbz_directory): 102 | shutil.rmtree(tmp_cbz_directory) 103 | os.makedirs(tmp_cbz_directory) 104 | 105 | for filename in os.listdir(input_directory): 106 | hash_and_process_file( 107 | input_directory, tmp_cbz_directory, filename, exclude_hashs, max_size 108 | ) 109 | 110 | # Create the CBZ file 111 | os.makedirs(output_directory, exist_ok=True) 112 | cbzfile = os.path.join( 113 | output_directory, gallery_name_to_cbz_file_name(gallery_name) 114 | ) 115 | create_cbz(tmp_cbz_directory, cbzfile) 116 | shutil.rmtree(tmp_cbz_directory) 117 | 118 | 119 | def gallery_name_to_cbz_file_name(gallery_name: str) -> str: 120 | """Convert a gallery name to a CBZ file name.""" 121 | while (len(gallery_name.encode("utf-8")) + 4) > FILE_NAME_LENGTH_LIMIT: 122 | gallery_name = gallery_name[1:] 123 | return gallery_name + ".cbz" 124 | 125 | 126 | def calculate_hash_of_file_in_cbz( 127 | cbz_path: str, file_name: str, algorithm: str 128 | ) -> bytes: 129 | if zipfile.is_zipfile(cbz_path): 130 | with zipfile.ZipFile(cbz_path, "r") as myzip: 131 | with myzip.open(file_name) as myfile: 132 | file_content = myfile.read() 133 | hash_object = hashlib.new(algorithm) 134 | hash_object.update(file_content) 135 | hash_of_file = hash_object.digest() 136 | else: 137 | hash_of_file = bytes(0) 138 | return hash_of_file 139 | -------------------------------------------------------------------------------- /src/h2hdb/config_loader.py: -------------------------------------------------------------------------------- 1 | __all__ = ["DatabaseConfig", "LoggerConfig", "H2HConfig", "H2HDBConfig", "load_config"] 2 | 3 | import argparse 4 | import json 5 | import os 6 | 7 | from pydantic import BaseModel, Field, ConfigDict, field_validator 8 | 9 | from .settings import LOG_LEVEL, CBZ_GROUPING, CBZ_SORT 10 | 11 | 12 | class ConfigError(Exception): 13 | """ 14 | Exception raised for errors in the configuration. 15 | 16 | Attributes: 17 | message -- explanation of the error 18 | """ 19 | 20 | def __init__(self, message: str) -> None: 21 | self.message = message 22 | super().__init__(self.message) 23 | 24 | 25 | class ConfigModel(BaseModel): 26 | """ 27 | Base class for configuration models. 28 | 29 | This class inherits from `pydantic.BaseModel` and is used to define the configuration 30 | structure for the application. It provides a way to validate and parse configuration data. 31 | """ 32 | 33 | model_config = ConfigDict(extra="forbid") 34 | 35 | 36 | class DatabaseConfig(ConfigModel): 37 | sql_type: str = Field( 38 | default="mysql", 39 | description="Type of SQL database (e.g., mysql)", 40 | ) 41 | host: str = Field( 42 | default="localhost", 43 | min_length=1, 44 | description="Host of the SQL database", 45 | ) 46 | port: int = Field( 47 | default=3306, 48 | ge=1, 49 | le=65535, 50 | description="Port of the SQL database", 51 | ) 52 | user: str = Field( 53 | default="root", 54 | min_length=1, 55 | description="User for the SQL database", 56 | ) 57 | database: str = Field( 58 | default="h2h", 59 | min_length=1, 60 | description="Database name for the SQL database", 61 | ) 62 | password: str = Field( 63 | default="password", 64 | description="Password for the SQL database", 65 | ) 66 | 67 | 68 | class LoggerConfig(BaseModel): 69 | level: LOG_LEVEL = Field( 70 | default=LOG_LEVEL.info, 71 | description="Log level (case-insensitive): NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL", 72 | ) 73 | 74 | @field_validator("level", mode="before") 75 | @classmethod 76 | def normalize_level(cls, v) -> LOG_LEVEL: 77 | if isinstance(v, str): 78 | v_lower = v.lower() 79 | try: 80 | return LOG_LEVEL[v_lower] # Enum lookup by name 81 | except KeyError: 82 | raise ValueError( 83 | f"Invalid log level '{v}'. Must be one of: " 84 | + ", ".join(name.upper() for name in LOG_LEVEL.__members__) 85 | ) 86 | elif isinstance(v, int): 87 | try: 88 | return LOG_LEVEL(v) 89 | except ValueError: 90 | raise ValueError(f"Invalid log level value: {v}") 91 | elif isinstance(v, LOG_LEVEL): 92 | return v 93 | else: 94 | raise TypeError(f"Invalid type for log level: {type(v)}") 95 | 96 | 97 | class H2HConfig(ConfigModel): 98 | download_path: str = Field( 99 | default="download", 100 | min_length=1, 101 | description="Path to download files", 102 | ) 103 | cbz_path: str = Field( 104 | default="", 105 | min_length=0, 106 | description="Path to save CBZ files", 107 | ) 108 | cbz_max_size: int = Field( 109 | default=768, 110 | ge=1, 111 | description="Maximum width or height (in pixels) allowed for each image in the CBZ file", 112 | ) 113 | cbz_grouping: CBZ_GROUPING = Field( 114 | default=CBZ_GROUPING.flat, 115 | description="Grouping method for CBZ files: flat, date-yyyy, date-yyyy-mm, or date-yyyy-mm-dd", 116 | ) 117 | cbz_sort: CBZ_SORT = Field( 118 | default=CBZ_SORT.no, 119 | description="Sorting method for CBZ files: no, upload_time, download_time, pages, or pages+[num]", 120 | ) 121 | 122 | @property 123 | def cbz_tmp_directory(self) -> str: 124 | return os.path.join(self.cbz_path, "tmp") 125 | 126 | 127 | class H2HDBConfig(ConfigModel): 128 | """ 129 | Configuration class for H2HDB. 130 | 131 | This class combines the configurations for H2H, database, and logger into a single 132 | configuration object. It validates the types of each configuration component. 133 | """ 134 | 135 | h2h: H2HConfig = Field( 136 | default_factory=H2HConfig, 137 | description="Configuration for H2H", 138 | ) 139 | database: DatabaseConfig = Field( 140 | default_factory=DatabaseConfig, 141 | description="Configuration for the database", 142 | ) 143 | logger: LoggerConfig = Field( 144 | default_factory=LoggerConfig, 145 | description="Configuration for the logger", 146 | ) 147 | 148 | 149 | def load_config(config_path: str = "") -> H2HDBConfig: 150 | if config_path: 151 | with open(config_path, "r") as f: 152 | raw = json.load(f) 153 | else: 154 | parser = argparse.ArgumentParser() 155 | parser.add_argument("--config") 156 | args = parser.parse_args() 157 | if args.config: 158 | with open(args.config, "r") as f: 159 | raw = json.load(f) 160 | else: 161 | raw = {} # ← 重點:傳空 config,讓 default 自動補 162 | 163 | return H2HDBConfig.model_validate(raw) 164 | -------------------------------------------------------------------------------- /src/h2hdb/h2hdb_h2hdb.py: -------------------------------------------------------------------------------- 1 | __all__ = ["H2HDB", "GALLERY_INFO_FILE_NAME"] 2 | 3 | 4 | import os 5 | from itertools import islice 6 | from time import sleep 7 | 8 | from h2h_galleryinfo_parser import ( 9 | GalleryInfoParser, 10 | GalleryURLParser, 11 | parse_galleryinfo, 12 | ) 13 | 14 | from .information import FileInformation, TagInformation 15 | from .settings import chunk_list, hash_function_by_file 16 | from .table_comments import H2HDBGalleriesComments 17 | from .table_files_dbids import H2HDBFiles 18 | from .table_removed_gids import H2HDBRemovedGalleries 19 | from .table_tags import H2HDBGalleriesTags 20 | from .threading_tools import run_in_parallel, SQLThreadsList 21 | from .view_ginfo import H2HDBGalleriesInfos 22 | 23 | from .hash_dict import HASH_ALGORITHMS 24 | from .settings import ( 25 | COMPARISON_HASH_ALGORITHM, 26 | FOLDER_NAME_LENGTH_LIMIT, 27 | FILE_NAME_LENGTH_LIMIT, 28 | GALLERY_INFO_FILE_NAME, 29 | ) 30 | from .threading_tools import POOL_CPU_LIMIT 31 | 32 | 33 | def get_sorting_base_level(x: int = 20) -> int: 34 | zero_level = max(x, 1) 35 | return zero_level 36 | 37 | 38 | class H2HDB( 39 | H2HDBGalleriesInfos, 40 | H2HDBGalleriesComments, 41 | H2HDBGalleriesTags, 42 | H2HDBFiles, 43 | H2HDBRemovedGalleries, 44 | ): 45 | def _create_pending_gallery_removals_table(self) -> None: 46 | with self.SQLConnector() as connector: 47 | table_name = "pending_gallery_removals" 48 | match self.config.database.sql_type.lower(): 49 | case "mysql": 50 | column_name = "name" 51 | column_name_parts, create_gallery_name_parts_sql = ( 52 | self.mysql_split_gallery_name_based_on_limit(column_name) 53 | ) 54 | query = f""" 55 | CREATE TABLE IF NOT EXISTS {table_name} ( 56 | PRIMARY KEY ({", ".join(column_name_parts)}), 57 | {create_gallery_name_parts_sql}, 58 | full_name TEXT NOT NULL, 59 | FULLTEXT (full_name) 60 | ) 61 | """ 62 | connector.execute(query) 63 | self.logger.info(f"{table_name} table created.") 64 | 65 | def _count_duplicated_files_hashs_sha512(self) -> int: 66 | with self.SQLConnector() as connector: 67 | table_name = "duplicated_files_hashs_sha512" 68 | match self.config.database.sql_type.lower(): 69 | case "mysql": 70 | query = f""" 71 | SELECT COUNT(*) 72 | FROM {table_name} 73 | """ 74 | query_result = connector.fetch_one(query) 75 | return query_result[0] 76 | 77 | def _create_duplicated_galleries_tables(self) -> None: 78 | with self.SQLConnector() as connector: 79 | match self.config.database.sql_type.lower(): 80 | case "mysql": 81 | query = """ 82 | CREATE VIEW IF NOT EXISTS duplicated_files_hashs_sha512 AS 83 | SELECT db_file_id, 84 | db_hash_id 85 | FROM files_hashs_sha512 86 | GROUP BY db_hash_id 87 | HAVING COUNT(*) >= 3 88 | """ 89 | connector.execute(query) 90 | 91 | with self.SQLConnector() as connector: 92 | match self.config.database.sql_type.lower(): 93 | case "mysql": 94 | query = """ 95 | CREATE VIEW IF NOT EXISTS duplicated_hash_values_by_count_artist_ratio AS WITH duplicated_db_dbids AS ( 96 | SELECT galleries_dbids.db_gallery_id AS db_gallery_id, 97 | files_dbids.db_file_id AS db_file_id, 98 | duplicated_files_hashs_sha512.db_hash_id AS db_hash_id, 99 | galleries_tag_pairs_dbids.tag_value AS artist_value 100 | FROM duplicated_files_hashs_sha512 101 | LEFT JOIN files_hashs_sha512 ON duplicated_files_hashs_sha512.db_hash_id = files_hashs_sha512.db_hash_id 102 | LEFT JOIN files_dbids ON files_hashs_sha512.db_file_id = files_dbids.db_file_id 103 | LEFT JOIN galleries_dbids ON files_dbids.db_gallery_id = galleries_dbids.db_gallery_id 104 | LEFT JOIN galleries_tags ON galleries_dbids.db_gallery_id = galleries_tags.db_gallery_id 105 | LEFT JOIN galleries_tag_pairs_dbids ON galleries_tags.db_tag_pair_id = galleries_tag_pairs_dbids.db_tag_pair_id 106 | WHERE galleries_tag_pairs_dbids.tag_name = 'artist' 107 | ), 108 | duplicated_count_artists_by_db_gallery_id AS( 109 | SELECT COUNT(DISTINCT artist_value) AS artist_count, 110 | db_gallery_id 111 | FROM duplicated_db_dbids 112 | GROUP BY db_gallery_id 113 | ) 114 | SELECT files_hashs_sha512_dbids.hash_value AS hash_value 115 | FROM duplicated_db_dbids 116 | LEFT JOIN duplicated_count_artists_by_db_gallery_id ON duplicated_db_dbids.db_gallery_id = duplicated_count_artists_by_db_gallery_id.db_gallery_id 117 | LEFT JOIN files_hashs_sha512_dbids ON duplicated_db_dbids.db_hash_id = files_hashs_sha512_dbids.db_hash_id 118 | GROUP BY duplicated_db_dbids.db_hash_id 119 | HAVING COUNT(DISTINCT duplicated_db_dbids.artist_value) / MAX( 120 | duplicated_count_artists_by_db_gallery_id.artist_count 121 | ) > 2 122 | """ 123 | connector.execute(query) 124 | 125 | def insert_pending_gallery_removal(self, gallery_name: str) -> None: 126 | with self.SQLConnector() as connector: 127 | if self.check_pending_gallery_removal(gallery_name) is False: 128 | table_name = "pending_gallery_removals" 129 | if len(gallery_name) > FOLDER_NAME_LENGTH_LIMIT: 130 | self.logger.error( 131 | f"Gallery name '{gallery_name}' is too long. Must be {FOLDER_NAME_LENGTH_LIMIT} characters or less." 132 | ) 133 | raise ValueError("Gallery name is too long.") 134 | gallery_name_parts = self._split_gallery_name(gallery_name) 135 | 136 | match self.config.database.sql_type.lower(): 137 | case "mysql": 138 | column_name_parts, _ = ( 139 | self.mysql_split_gallery_name_based_on_limit("name") 140 | ) 141 | insert_query = f""" 142 | INSERT INTO {table_name} ({", ".join(column_name_parts)}, full_name) 143 | VALUES ({", ".join(["%s" for _ in column_name_parts])}, %s) 144 | """ 145 | connector.execute( 146 | insert_query, (*tuple(gallery_name_parts), gallery_name) 147 | ) 148 | 149 | def check_pending_gallery_removal(self, gallery_name: str) -> bool: 150 | with self.SQLConnector() as connector: 151 | table_name = "pending_gallery_removals" 152 | gallery_name_parts = self._split_gallery_name(gallery_name) 153 | match self.config.database.sql_type.lower(): 154 | case "mysql": 155 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit( 156 | "name" 157 | ) 158 | select_query = f""" 159 | SELECT full_name 160 | FROM {table_name} 161 | WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])} 162 | """ 163 | query_result = connector.fetch_one(select_query, tuple(gallery_name_parts)) 164 | return len(query_result) != 0 165 | 166 | def get_pending_gallery_removals(self) -> list[str]: 167 | with self.SQLConnector() as connector: 168 | table_name = "pending_gallery_removals" 169 | match self.config.database.sql_type.lower(): 170 | case "mysql": 171 | select_query = f""" 172 | SELECT full_name 173 | FROM {table_name} 174 | """ 175 | 176 | query_result = connector.fetch_all(select_query) 177 | pending_gallery_removals = [query[0] for query in query_result] 178 | return pending_gallery_removals 179 | 180 | def delete_pending_gallery_removal(self, gallery_name: str) -> None: 181 | with self.SQLConnector() as connector: 182 | table_name = "pending_gallery_removals" 183 | match self.config.database.sql_type.lower(): 184 | case "mysql": 185 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit( 186 | "name" 187 | ) 188 | delete_query = f""" 189 | DELETE FROM {table_name} WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])} 190 | """ 191 | 192 | gallery_name_parts = self._split_gallery_name(gallery_name) 193 | connector.execute(delete_query, tuple(gallery_name_parts)) 194 | 195 | def delete_pending_gallery_removals(self) -> None: 196 | pending_gallery_removals = self.get_pending_gallery_removals() 197 | for gallery_name in pending_gallery_removals: 198 | self.delete_gallery_file(gallery_name) 199 | self.delete_gallery(gallery_name) 200 | self.delete_pending_gallery_removal(gallery_name) 201 | 202 | def delete_gallery_file(self, gallery_name: str) -> None: 203 | # self.logger.info(f"Gallery images for '{gallery_name}' deleted.") 204 | pass 205 | 206 | def delete_gallery(self, gallery_name: str) -> None: 207 | with self.SQLConnector() as connector: 208 | if not self._check_galleries_dbids_by_gallery_name(gallery_name): 209 | self.logger.debug(f"Gallery '{gallery_name}' does not exist.") 210 | return 211 | 212 | match self.config.database.sql_type.lower(): 213 | case "mysql": 214 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit( 215 | "name" 216 | ) 217 | get_delete_gallery_id_query = f""" 218 | DELETE FROM galleries_dbids 219 | WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])} 220 | """ 221 | 222 | gallery_name_parts = self._split_gallery_name(gallery_name) 223 | connector.execute(get_delete_gallery_id_query, tuple(gallery_name_parts)) 224 | self.logger.info(f"Gallery '{gallery_name}' deleted.") 225 | 226 | def optimize_database(self) -> None: 227 | with self.SQLConnector() as connector: 228 | match self.config.database.sql_type.lower(): 229 | case "mysql": 230 | select_table_name_query = f""" 231 | SELECT TABLE_NAME 232 | FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE 233 | WHERE REFERENCED_TABLE_SCHEMA = '{self.config.database.database}' 234 | """ 235 | table_names = connector.fetch_all(select_table_name_query) 236 | table_names = [t[0] for t in table_names] 237 | 238 | with self.SQLConnector() as connector: 239 | match self.config.database.sql_type.lower(): 240 | case "mysql": 241 | get_optimize_query = lambda x: "OPTIMIZE TABLE {x}".format(x=x) 242 | 243 | for table_name in table_names: 244 | connector.execute(get_optimize_query(table_name)) 245 | self.logger.info("Database optimized.") 246 | 247 | def _create_pending_download_gids_view(self) -> None: 248 | with self.SQLConnector() as connector: 249 | match self.config.database.sql_type.lower(): 250 | case "mysql": 251 | query = """ 252 | CREATE VIEW IF NOT EXISTS pending_download_gids AS 253 | SELECT gids.gid AS gid 254 | FROM (SELECT * 255 | FROM galleries_redownload_times AS grt0 256 | WHERE DATE_ADD(grt0.time, INTERVAL 7 DAY) <= NOW() 257 | ) 258 | AS grt 259 | INNER JOIN galleries_download_times AS gdt 260 | on grt.db_gallery_id = gdt.db_gallery_id 261 | INNER JOIN galleries_upload_times AS gut 262 | ON grt.db_gallery_id = gut.db_gallery_id 263 | INNER JOIN galleries_gids AS gids 264 | ON grt.db_gallery_id = gids.db_gallery_id 265 | WHERE grt.time <= DATE_ADD(gut.time, INTERVAL 1 YEAR) 266 | AND DATE_ADD(gut.time, INTERVAL 7 DAY) <= NOW() 267 | OR DATE_ADD(gdt.time, INTERVAL 7 DAY) <= grt.time 268 | ORDER BY gut.`time` DESC 269 | """ 270 | connector.execute(query) 271 | self.logger.info("pending_download_gids view created.") 272 | 273 | def get_pending_download_gids(self) -> list[int]: 274 | with self.SQLConnector() as connector: 275 | match self.config.database.sql_type.lower(): 276 | case "mysql": 277 | query = """ 278 | SELECT gid 279 | FROM pending_download_gids 280 | """ 281 | query_result = connector.fetch_all(query) 282 | pending_download_gids = [query[0] for query in query_result] 283 | return pending_download_gids 284 | 285 | def _create_todelete_gids_table(self) -> None: 286 | with self.SQLConnector() as connector: 287 | table_name = "todelete_gids" 288 | match self.config.database.sql_type.lower(): 289 | case "mysql": 290 | query = f""" 291 | CREATE TABLE IF NOT EXISTS {table_name} ( 292 | PRIMARY KEY (gid), 293 | FOREIGN KEY (gid) REFERENCES galleries_gids(gid) 294 | ON UPDATE CASCADE 295 | ON DELETE CASCADE, 296 | gid INT UNSIGNED NOT NULL 297 | ) 298 | """ 299 | connector.execute(query) 300 | self.logger.info(f"{table_name} table created.") 301 | 302 | def _create_todelete_names_view(self) -> None: 303 | with self.SQLConnector() as connector: 304 | table_name = "todelete_names" 305 | match self.config.database.sql_type.lower(): 306 | case "mysql": 307 | query = f""" 308 | CREATE VIEW IF NOT EXISTS {table_name} AS 309 | SELECT full_name 310 | FROM 311 | (SELECT galleries_names.full_name AS full_name 312 | FROM todelete_gids 313 | INNER JOIN galleries_gids 314 | ON galleries_gids.gid = todelete_gids.gid 315 | INNER JOIN galleries_names 316 | ON galleries_names.db_gallery_id = galleries_gids.db_gallery_id) AS todelete_names 317 | UNION 318 | SELECT full_name 319 | FROM ( 320 | SELECT gi.name AS full_name 321 | FROM galleries_infos gi 322 | JOIN ( 323 | SELECT gid, MAX(download_time) AS max_download_time 324 | FROM galleries_infos 325 | GROUP BY gid 326 | HAVING COUNT(*) > 1 327 | ) sub ON gi.gid = sub.gid 328 | WHERE gi.download_time < sub.max_download_time 329 | ) AS duplicated_gids_names 330 | """ 331 | connector.execute(query) 332 | self.logger.info(f"{table_name} table created.") 333 | 334 | def check_todelete_gid(self, gid: int) -> bool: 335 | with self.SQLConnector() as connector: 336 | table_name = "todelete_gids" 337 | match self.config.database.sql_type.lower(): 338 | case "mysql": 339 | select_query = f""" 340 | SELECT gid 341 | FROM {table_name} 342 | WHERE gid = %s 343 | """ 344 | query_result = connector.fetch_one(select_query, (gid,)) 345 | return len(query_result) != 0 346 | 347 | def insert_todelete_gid(self, gid: int) -> None: 348 | if not self.check_todelete_gid(gid): 349 | with self.SQLConnector() as connector: 350 | table_name = "todelete_gids" 351 | match self.config.database.sql_type.lower(): 352 | case "mysql": 353 | insert_query = f""" 354 | INSERT INTO {table_name} (gid) VALUES (%s) 355 | """ 356 | connector.execute(insert_query, (gid,)) 357 | 358 | def _create_todownload_gids_table(self) -> None: 359 | with self.SQLConnector() as connector: 360 | table_name = "todownload_gids" 361 | match self.config.database.sql_type.lower(): 362 | case "mysql": 363 | query = f""" 364 | CREATE TABLE IF NOT EXISTS {table_name} ( 365 | PRIMARY KEY (gid), 366 | gid INT UNSIGNED NOT NULL, 367 | url CHAR({self.innodb_index_prefix_limit}) NOT NULL 368 | ) 369 | """ 370 | connector.execute(query) 371 | self.logger.info(f"{table_name} table created.") 372 | 373 | def check_todownload_gid(self, gid: int, url: str) -> bool: 374 | with self.SQLConnector() as connector: 375 | table_name = "todownload_gids" 376 | match self.config.database.sql_type.lower(): 377 | case "mysql": 378 | if url != "": 379 | select_query = f""" 380 | SELECT gid 381 | FROM {table_name} 382 | WHERE gid = %s AND url = %s 383 | """ 384 | query_result = connector.fetch_one(select_query, (gid, url)) 385 | else: 386 | select_query = f""" 387 | SELECT gid 388 | FROM {table_name} 389 | WHERE gid = %s 390 | """ 391 | query_result = connector.fetch_one(select_query, (gid,)) 392 | return len(query_result) != 0 393 | 394 | def insert_todownload_gid(self, gid: int, url: str) -> None: 395 | if url != "": 396 | gallery = GalleryURLParser(url) 397 | gid = gallery.gid 398 | if gallery.gid != gid and gid != 0: 399 | raise ValueError( 400 | f"Gallery GID {gid} does not match URL GID {gallery.gid}." 401 | ) 402 | elif gid <= 0: 403 | raise ValueError("Gallery GID must be greater than zero.") 404 | 405 | if not self.check_todownload_gid(gid, url): 406 | if (url == "") or (not self.check_todownload_gid(gid, "")): 407 | with self.SQLConnector() as connector: 408 | table_name = "todownload_gids" 409 | match self.config.database.sql_type.lower(): 410 | case "mysql": 411 | insert_query = f""" 412 | INSERT INTO {table_name} (gid, url) VALUES (%s, %s) 413 | """ 414 | connector.execute(insert_query, (gid, url)) 415 | else: 416 | self.update_todownload_gid(gid, url) 417 | 418 | def update_todownload_gid(self, gid: int, url: str) -> None: 419 | with self.SQLConnector() as connector: 420 | table_name = "todownload_gids" 421 | match self.config.database.sql_type.lower(): 422 | case "mysql": 423 | update_query = f""" 424 | UPDATE {table_name} SET url = %s WHERE gid = %s 425 | """ 426 | connector.execute(update_query, (url, gid)) 427 | 428 | def remove_todownload_gid(self, gid: int) -> None: 429 | with self.SQLConnector() as connector: 430 | table_name = "todownload_gids" 431 | match self.config.database.sql_type.lower(): 432 | case "mysql": 433 | delete_query = f""" 434 | DELETE FROM {table_name} WHERE gid = %s 435 | """ 436 | connector.execute(delete_query, (gid,)) 437 | 438 | def get_todownload_gids(self) -> list[tuple[int, str]]: 439 | with self.SQLConnector() as connector: 440 | table_name = "todownload_gids" 441 | match self.config.database.sql_type.lower(): 442 | case "mysql": 443 | select_query = f""" 444 | SELECT gid, url 445 | FROM {table_name} 446 | """ 447 | query_result = connector.fetch_all(select_query) 448 | todownload_gids = [(query[0], query[1]) for query in query_result] 449 | return todownload_gids 450 | 451 | def create_main_tables(self) -> None: 452 | self.logger.debug("Creating main tables...") 453 | self._create_todownload_gids_table() 454 | self._create_pending_gallery_removals_table() 455 | self._create_galleries_names_table() 456 | self._create_galleries_gids_table() 457 | self._create_todelete_gids_table() 458 | self._create_galleries_download_times_table() 459 | self._create_galleries_redownload_times_table() 460 | self._create_galleries_upload_times_table() 461 | self._create_pending_download_gids_view() 462 | self._create_galleries_modified_times_table() 463 | self._create_galleries_access_times_table() 464 | self._create_galleries_titles_table() 465 | self._create_upload_account_table() 466 | self._create_galleries_comments_table() 467 | self._create_files_names_table() 468 | self._create_galleries_infos_view() 469 | self._create_todelete_names_view() 470 | self._create_galleries_files_hashs_tables() 471 | self._create_gallery_image_hash_view() 472 | self._create_duplicate_hash_in_gallery_view() 473 | self._create_removed_galleries_gids_table() 474 | self._create_galleries_tags_table() 475 | self._create_duplicated_galleries_tables() 476 | self.logger.info("Main tables created.") 477 | 478 | def update_redownload_time_to_now_by_gid(self, gid: int) -> None: 479 | db_gallery_id = self._get_db_gallery_id_by_gid(gid) 480 | table_name = "galleries_redownload_times" 481 | with self.SQLConnector() as connector: 482 | match self.config.database.sql_type.lower(): 483 | case "mysql": 484 | update_query = f""" 485 | UPDATE {table_name} SET time = NOW() WHERE db_gallery_id = %s 486 | """ 487 | connector.execute(update_query, (db_gallery_id,)) 488 | 489 | def _insert_gallery_info(self, galleryinfo_params: GalleryInfoParser) -> None: 490 | self.insert_pending_gallery_removal(galleryinfo_params.gallery_name) 491 | 492 | self._insert_gallery_name(galleryinfo_params.gallery_name) 493 | db_gallery_id = self._get_db_gallery_id_by_gallery_name( 494 | galleryinfo_params.gallery_name 495 | ) 496 | 497 | with SQLThreadsList() as threads: 498 | threads.append( 499 | target=self._insert_gallery_gid, 500 | args=(db_gallery_id, galleryinfo_params.gid), 501 | ) 502 | threads.append( 503 | target=self._insert_gallery_title, 504 | args=(db_gallery_id, galleryinfo_params.title), 505 | ) 506 | threads.append( 507 | target=self._insert_upload_time, 508 | args=(db_gallery_id, galleryinfo_params.upload_time), 509 | ) 510 | threads.append( 511 | target=self._insert_gallery_comment, 512 | args=(db_gallery_id, galleryinfo_params.galleries_comments), 513 | ) 514 | threads.append( 515 | target=self._insert_gallery_upload_account, 516 | args=(db_gallery_id, galleryinfo_params.upload_account), 517 | ) 518 | threads.append( 519 | target=self._insert_download_time, 520 | args=(db_gallery_id, galleryinfo_params.download_time), 521 | ) 522 | threads.append( 523 | target=self._insert_access_time, 524 | args=(db_gallery_id, galleryinfo_params.download_time), 525 | ) 526 | threads.append( 527 | target=self._insert_modified_time, 528 | args=(db_gallery_id, galleryinfo_params.modified_time), 529 | ) 530 | threads.append( 531 | target=self._insert_gallery_files, 532 | args=(db_gallery_id, galleryinfo_params.files_path), 533 | ) 534 | 535 | file_pairs: list[FileInformation] = list() 536 | for file_path in galleryinfo_params.files_path: 537 | db_file_id = self._get_db_file_id(db_gallery_id, file_path) 538 | absolute_file_path = os.path.join( 539 | galleryinfo_params.gallery_folder, file_path 540 | ) 541 | file_pairs.append(FileInformation(absolute_file_path, db_file_id)) 542 | self._insert_gallery_file_hash_for_db_gallery_id(file_pairs) 543 | 544 | taglist: list[TagInformation] = list() 545 | for tag in galleryinfo_params.tags: 546 | taglist.append(TagInformation(tag[0], tag[1])) 547 | self._insert_gallery_tags(db_gallery_id, taglist) 548 | 549 | self.delete_pending_gallery_removal(galleryinfo_params.gallery_name) 550 | 551 | def _check_gallery_info_file_hash( 552 | self, galleryinfo_params: GalleryInfoParser 553 | ) -> bool: 554 | if not self._check_galleries_dbids_by_gallery_name( 555 | galleryinfo_params.gallery_name 556 | ): 557 | return False 558 | db_gallery_id = self._get_db_gallery_id_by_gallery_name( 559 | galleryinfo_params.gallery_name 560 | ) 561 | 562 | if not self._check_db_file_id(db_gallery_id, GALLERY_INFO_FILE_NAME): 563 | return False 564 | gallery_info_file_id = self._get_db_file_id( 565 | db_gallery_id, GALLERY_INFO_FILE_NAME 566 | ) 567 | absolute_file_path = os.path.join( 568 | galleryinfo_params.gallery_folder, GALLERY_INFO_FILE_NAME 569 | ) 570 | 571 | if not self._check_hash_value_by_file_id( 572 | gallery_info_file_id, COMPARISON_HASH_ALGORITHM 573 | ): 574 | return False 575 | original_hash_value = self.get_hash_value_by_file_id( 576 | gallery_info_file_id, COMPARISON_HASH_ALGORITHM 577 | ) 578 | current_hash_value = hash_function_by_file( 579 | absolute_file_path, COMPARISON_HASH_ALGORITHM 580 | ) 581 | issame = original_hash_value == current_hash_value 582 | return issame 583 | 584 | def _get_duplicated_hash_values_by_count_artist_ratio(self) -> list[bytes]: 585 | with self.SQLConnector() as connector: 586 | table_name = "duplicated_hash_values_by_count_artist_ratio" 587 | match self.config.database.sql_type.lower(): 588 | case "mysql": 589 | select_query = f""" 590 | SELECT hash_value 591 | FROM {table_name} 592 | """ 593 | 594 | query_result = connector.fetch_all(select_query) 595 | return [query[0] for query in query_result] 596 | 597 | def insert_gallery_info(self, gallery_folder: str) -> bool: 598 | galleryinfo_params = parse_galleryinfo(gallery_folder) 599 | is_thesame = self._check_gallery_info_file_hash(galleryinfo_params) 600 | is_insert = is_thesame is False 601 | if is_insert: 602 | self.logger.debug( 603 | f"Inserting gallery '{galleryinfo_params.gallery_name}'..." 604 | ) 605 | self.delete_gallery_file(galleryinfo_params.gallery_name) 606 | self.delete_gallery(galleryinfo_params.gallery_name) 607 | self._insert_gallery_info(galleryinfo_params) 608 | self.logger.debug(f"Gallery '{galleryinfo_params.gallery_name}' inserted.") 609 | return is_insert 610 | 611 | def compress_gallery_to_cbz( 612 | self, gallery_folder: str, exclude_hashs: list[bytes] 613 | ) -> bool: 614 | from .compress_gallery_to_cbz import ( 615 | compress_images_and_create_cbz, 616 | calculate_hash_of_file_in_cbz, 617 | ) 618 | 619 | galleryinfo_params = parse_galleryinfo(gallery_folder) 620 | match self.config.h2h.cbz_grouping: 621 | case "date-yyyy": 622 | upload_time = self.get_upload_time_by_gallery_name( 623 | galleryinfo_params.gallery_name 624 | ) 625 | relative_cbz_directory = str(upload_time.year).rjust(4, "0") 626 | case "date-yyyy-mm": 627 | upload_time = self.get_upload_time_by_gallery_name( 628 | galleryinfo_params.gallery_name 629 | ) 630 | relative_cbz_directory = os.path.join( 631 | str(upload_time.year).rjust(4, "0"), 632 | str(upload_time.month).rjust(2, "0"), 633 | ) 634 | case "date-yyyy-mm-dd": 635 | upload_time = self.get_upload_time_by_gallery_name( 636 | galleryinfo_params.gallery_name 637 | ) 638 | relative_cbz_directory = os.path.join( 639 | str(upload_time.year).rjust(4, "0"), 640 | str(upload_time.month).rjust(2, "0"), 641 | str(upload_time.day).rjust(2, "0"), 642 | ) 643 | case "flat": 644 | relative_cbz_directory = "" 645 | case _: 646 | raise ValueError( 647 | f"Invalid cbz_grouping value: {self.config.h2h.cbz_grouping}" 648 | ) 649 | cbz_directory = os.path.join(self.config.h2h.cbz_path, relative_cbz_directory) 650 | cbz_tmp_directory = os.path.join(self.config.h2h.cbz_path, "tmp") 651 | 652 | def gallery_name2cbz_file_name(gallery_name: str) -> str: 653 | while (len(gallery_name.encode("utf-8")) + 4) > FILE_NAME_LENGTH_LIMIT: 654 | gallery_name = gallery_name[1:] 655 | return gallery_name + ".cbz" 656 | 657 | cbz_path = os.path.join( 658 | cbz_directory, gallery_name2cbz_file_name(galleryinfo_params.gallery_name) 659 | ) 660 | if os.path.exists(cbz_path): 661 | db_gallery_id = self._get_db_gallery_id_by_gallery_name( 662 | galleryinfo_params.gallery_name 663 | ) 664 | gallery_info_file_id = self._get_db_file_id( 665 | db_gallery_id, GALLERY_INFO_FILE_NAME 666 | ) 667 | original_hash_value = self.get_hash_value_by_file_id( 668 | gallery_info_file_id, COMPARISON_HASH_ALGORITHM 669 | ) 670 | cbz_hash_value = calculate_hash_of_file_in_cbz( 671 | cbz_path, GALLERY_INFO_FILE_NAME, COMPARISON_HASH_ALGORITHM 672 | ) 673 | if original_hash_value != cbz_hash_value: 674 | compress_images_and_create_cbz( 675 | gallery_folder, 676 | cbz_directory, 677 | cbz_tmp_directory, 678 | self.config.h2h.cbz_max_size, 679 | exclude_hashs, 680 | ) 681 | result = True 682 | else: 683 | result = False 684 | else: 685 | compress_images_and_create_cbz( 686 | gallery_folder, 687 | cbz_directory, 688 | cbz_tmp_directory, 689 | self.config.h2h.cbz_max_size, 690 | exclude_hashs, 691 | ) 692 | result = True 693 | return result 694 | 695 | def scan_current_galleries_folders(self) -> tuple[list[str], list[str]]: 696 | self.delete_pending_gallery_removals() 697 | 698 | with self.SQLConnector() as connector: 699 | tmp_table_name = "tmp_current_galleries" 700 | match self.config.database.sql_type.lower(): 701 | case "mysql": 702 | column_name = "name" 703 | column_name_parts, create_gallery_name_parts_sql = ( 704 | self.mysql_split_gallery_name_based_on_limit(column_name) 705 | ) 706 | query = f""" 707 | CREATE TEMPORARY TABLE IF NOT EXISTS {tmp_table_name} ( 708 | PRIMARY KEY ({", ".join(column_name_parts)}), 709 | {create_gallery_name_parts_sql} 710 | ) 711 | """ 712 | 713 | connector.execute(query) 714 | self.logger.info(f"{tmp_table_name} table created.") 715 | 716 | match self.config.database.sql_type.lower(): 717 | case "mysql": 718 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit( 719 | "name" 720 | ) 721 | insert_query = f""" 722 | INSERT INTO {tmp_table_name} 723 | ({", ".join(column_name_parts)}) 724 | VALUES ({", ".join(["%s" for _ in column_name_parts])}) 725 | """ 726 | 727 | data: list[tuple] = list() 728 | current_galleries_folders: list[str] = list() 729 | current_galleries_names: list[str] = list() 730 | for root, _, files in os.walk(self.config.h2h.download_path): 731 | if GALLERY_INFO_FILE_NAME in files: 732 | current_galleries_folders.append(root) 733 | gallery_name = os.path.basename(current_galleries_folders[-1]) 734 | current_galleries_names.append(gallery_name) 735 | gallery_name_parts = self._split_gallery_name(gallery_name) 736 | data.append(tuple(gallery_name_parts)) 737 | group_size = 5000 738 | it = iter(data) 739 | for _ in range(0, len(data), group_size): 740 | connector.execute_many(insert_query, list(islice(it, group_size))) 741 | 742 | match self.config.database.sql_type.lower(): 743 | case "mysql": 744 | fetch_query = f""" 745 | SELECT CONCAT({",".join(["galleries_dbids."+column_name for column_name in column_name_parts])}) 746 | FROM galleries_dbids 747 | LEFT JOIN {tmp_table_name} USING ({",".join(column_name_parts)}) 748 | WHERE {tmp_table_name}.{column_name_parts[0]} IS NULL 749 | """ 750 | removed_galleries = connector.fetch_all(fetch_query) 751 | if len(removed_galleries) > 0: 752 | removed_galleries = [gallery[0] for gallery in removed_galleries] 753 | 754 | for removed_gallery in removed_galleries: 755 | self.insert_pending_gallery_removal(removed_gallery) 756 | 757 | self.delete_pending_gallery_removals() 758 | 759 | return (current_galleries_folders, current_galleries_names) 760 | 761 | def _refresh_current_cbz_files(self, current_galleries_names: list[str]) -> None: 762 | from .compress_gallery_to_cbz import gallery_name_to_cbz_file_name 763 | 764 | current_cbzs: dict[str, str] = dict() 765 | for root, _, files in os.walk(self.config.h2h.cbz_path): 766 | for file in files: 767 | current_cbzs[file] = root 768 | for key in set(current_cbzs.keys()) - set( 769 | gallery_name_to_cbz_file_name(name) for name in current_galleries_names 770 | ): 771 | os.remove(os.path.join(current_cbzs[key], key)) 772 | self.logger.info(f"CBZ '{key}' removed.") 773 | self.logger.info("CBZ files refreshed.") 774 | 775 | while True: 776 | directory_removed = False 777 | for root, dirs, files in os.walk(self.config.h2h.cbz_path, topdown=False): 778 | if root == self.config.h2h.cbz_path: 779 | continue 780 | if max([len(dirs), len(files)]) == 0: 781 | directory_removed = True 782 | os.rmdir(root) 783 | self.logger.info(f"Directory '{root}' removed.") 784 | if not directory_removed: 785 | break 786 | self.logger.info("Empty directories removed.") 787 | 788 | def _refresh_current_files_hashs(self, algorithm: str) -> None: 789 | if algorithm not in HASH_ALGORITHMS: 790 | raise ValueError( 791 | f"Invalid hash algorithm: {algorithm} not in {HASH_ALGORITHMS}" 792 | ) 793 | 794 | with self.SQLConnector() as connector: 795 | match self.config.database.sql_type.lower(): 796 | case "mysql": 797 | get_delete_db_hash_id_query = ( 798 | lambda x, y: f""" 799 | DELETE FROM {y} 800 | WHERE db_hash_id IN ( 801 | SELECT db_hash_id 802 | FROM {x} 803 | RIGHT JOIN {y} USING (db_hash_id) 804 | WHERE {x}.db_hash_id IS NULL 805 | ) 806 | """ 807 | ) 808 | hash_table_name = f"files_hashs_{algorithm.lower()}" 809 | db_table_name = f"files_hashs_{algorithm.lower()}_dbids" 810 | connector.execute( 811 | get_delete_db_hash_id_query(hash_table_name, db_table_name) 812 | ) 813 | 814 | def refresh_current_files_hashs(self): 815 | with SQLThreadsList() as threads: 816 | for algorithm in HASH_ALGORITHMS: 817 | threads.append( 818 | target=self._refresh_current_files_hashs, 819 | args=(algorithm,), 820 | ) 821 | 822 | def insert_h2h_download(self) -> None: 823 | self.delete_pending_gallery_removals() 824 | 825 | current_galleries_folders, current_galleries_names = ( 826 | self.scan_current_galleries_folders() 827 | ) 828 | 829 | self._refresh_current_cbz_files(current_galleries_names) 830 | 831 | self.logger.info("Inserting galleries...") 832 | if self.config.h2h.cbz_sort in ["upload_time", "download_time", "gid", "title"]: 833 | self.logger.info(f"Sorting by {self.config.h2h.cbz_sort}...") 834 | current_galleries_folders = sorted( 835 | current_galleries_folders, 836 | key=lambda x: getattr(parse_galleryinfo(x), self.config.h2h.cbz_sort), 837 | reverse=True, 838 | ) 839 | elif "no" in self.config.h2h.cbz_sort: 840 | self.logger.info("No sorting...") 841 | pass 842 | elif "pages" in self.config.h2h.cbz_sort: 843 | self.logger.info("Sorting by pages...") 844 | zero_level = ( 845 | max(1, int(self.config.h2h.cbz_sort.split("+")[-1])) 846 | if "+" in self.config.h2h.cbz_sort 847 | else 20 848 | ) 849 | self.logger.info( 850 | f"Sorting by pages with adjustment based on {zero_level} pages..." 851 | ) 852 | current_galleries_folders = sorted( 853 | current_galleries_folders, 854 | key=lambda x: abs(getattr(parse_galleryinfo(x), "pages") - zero_level), 855 | ) 856 | else: 857 | current_galleries_folders = sorted( 858 | current_galleries_folders, 859 | key=lambda x: getattr(parse_galleryinfo(x), "pages"), 860 | ) 861 | self.logger.info("Galleries sorted.") 862 | 863 | self.logger.info("Getting excluded hash values...") 864 | exclude_hashs = list[bytes]() 865 | previously_count_duplicated_files = 0 866 | self.logger.info("Excluded hash values obtained.") 867 | 868 | def calculate_exclude_hashs( 869 | previously_count_duplicated_files: int, exclude_hashs: list[bytes] 870 | ) -> tuple[int, list[bytes]]: 871 | self.logger.debug("Checking for duplicated files...") 872 | current_count_duplicated_files = self._count_duplicated_files_hashs_sha512() 873 | new_exclude_hashs = exclude_hashs 874 | if current_count_duplicated_files > previously_count_duplicated_files: 875 | self.logger.debug( 876 | "Duplicated files found. Updating excluded hash values..." 877 | ) 878 | previously_count_duplicated_files = current_count_duplicated_files 879 | new_exclude_hashs = ( 880 | self._get_duplicated_hash_values_by_count_artist_ratio() 881 | ) 882 | self.logger.info("Excluded hash values updated.") 883 | return previously_count_duplicated_files, new_exclude_hashs 884 | 885 | total_inserted_in_database = 0 886 | total_created_cbz = 0 887 | is_insert_limit_reached = False 888 | chunked_galleries_folders = chunk_list( 889 | current_galleries_folders, 100 * POOL_CPU_LIMIT 890 | ) 891 | self.logger.info("Inserting galleries in parallel...") 892 | for gallery_chunk in chunked_galleries_folders: 893 | # Insert gallery info to database 894 | is_insert_list: list[bool] = list() 895 | try: 896 | is_insert_list += run_in_parallel( 897 | self.insert_gallery_info, 898 | [(x,) for x in gallery_chunk], 899 | ) 900 | except Exception as e: 901 | self.logger.error(f"Error inserting galleries: {e}") 902 | self.logger.error("Retrying without parallel") 903 | for x in gallery_chunk: 904 | self.logger.error(f"Retrying gallery '{x}'...") 905 | is_insert_list.append(self.insert_gallery_info(x)) 906 | if any(is_insert_list): 907 | self.logger.info("There are new galleries inserted in database.") 908 | is_insert_limit_reached |= True 909 | total_inserted_in_database += sum(is_insert_list) 910 | 911 | # Compress gallery to CBZ file 912 | if self.config.h2h.cbz_path != "": 913 | if any(is_insert_list): 914 | previously_count_duplicated_files, exclude_hashs = ( 915 | calculate_exclude_hashs( 916 | previously_count_duplicated_files, exclude_hashs 917 | ) 918 | ) 919 | is_new_list = run_in_parallel( 920 | self.compress_gallery_to_cbz, 921 | [(x, exclude_hashs) for x in gallery_chunk], 922 | ) 923 | if any(is_new_list): 924 | self.logger.info("There are new CBZ files created.") 925 | total_created_cbz += sum(is_new_list) 926 | self.logger.info( 927 | f"Total galleries inserted in database: {total_inserted_in_database}" 928 | ) 929 | self.logger.info(f"Total CBZ files created: {total_created_cbz}") 930 | 931 | self.logger.info("Cleaning up database...") 932 | self.refresh_current_files_hashs() 933 | 934 | if is_insert_limit_reached: 935 | self.logger.info("Sleeping for 30 minutes...") 936 | sleep(1800) 937 | self.logger.info("Refreshing database...") 938 | return self.insert_h2h_download() 939 | 940 | self._reset_redownload_times() 941 | 942 | def get_komga_metadata(self, gallery_name: str) -> dict: 943 | metadata: dict[str, str | list[dict[str, str]]] = dict() 944 | metadata["title"] = self.get_title_by_gallery_name(gallery_name) 945 | if self._check_gallery_comment_by_gallery_name(gallery_name): 946 | metadata["summary"] = self.get_comment_by_gallery_name(gallery_name) 947 | else: 948 | metadata["summary"] = "" 949 | upload_time = self.get_upload_time_by_gallery_name(gallery_name) 950 | metadata["releaseDate"] = "-".join( 951 | [ 952 | str(upload_time.year), 953 | "{m:02d}".format(m=upload_time.month), 954 | "{d:02d}".format(d=upload_time.day), 955 | ] 956 | ) 957 | tags = self.get_tag_pairs_by_gallery_name(gallery_name) 958 | metadata["authors"] = [ 959 | {"name": value, "role": key} for key, value in tags if value != "" 960 | ] 961 | return metadata 962 | -------------------------------------------------------------------------------- /src/h2hdb/h2hdb_spec.py: -------------------------------------------------------------------------------- 1 | import math 2 | import re 3 | from abc import ABCMeta, abstractmethod 4 | from functools import partial 5 | 6 | 7 | from .config_loader import H2HDBConfig 8 | from .logger import setup_logger 9 | from .settings import ( 10 | FILE_NAME_LENGTH_LIMIT, 11 | FOLDER_NAME_LENGTH_LIMIT, 12 | ) 13 | 14 | 15 | class H2HDBAbstract(metaclass=ABCMeta): 16 | __slots__ = [ 17 | "sql_connection_params", 18 | "innodb_index_prefix_limit", 19 | "config", 20 | "SQLConnector", 21 | "logger", 22 | ] 23 | 24 | def __init__(self, config: H2HDBConfig) -> None: 25 | """ 26 | Initializes the H2HDBAbstract object. 27 | 28 | Raises: 29 | ValueError: If the SQL type is unsupported. 30 | """ 31 | self.config = config 32 | self.logger = setup_logger(config.logger) 33 | 34 | # Set the appropriate connector based on the SQL type 35 | match self.config.database.sql_type.lower(): 36 | case "mysql": 37 | from .mysql_connector import MySQLConnectorParams, MySQLConnector 38 | 39 | self.sql_connection_params = MySQLConnectorParams( 40 | host=self.config.database.host, 41 | port=self.config.database.port, 42 | user=self.config.database.user, 43 | password=self.config.database.password, 44 | database=self.config.database.database, 45 | ) 46 | self.SQLConnector = partial( 47 | MySQLConnector, **self.sql_connection_params.model_dump() 48 | ) 49 | self.innodb_index_prefix_limit = 191 50 | case _: 51 | raise ValueError("Unsupported SQL type") 52 | 53 | def __enter__(self) -> "H2HDBAbstract": 54 | return self 55 | 56 | def __exit__( 57 | self, 58 | exc_type: type[BaseException] | None, 59 | exc_value: BaseException | None, 60 | traceback: object | None, 61 | ) -> None: 62 | if exc_type is None: 63 | with self.SQLConnector() as connector: 64 | connector.commit() 65 | 66 | def _split_gallery_name(self, gallery_name: str) -> list[str]: 67 | size = FOLDER_NAME_LENGTH_LIMIT // self.innodb_index_prefix_limit + ( 68 | FOLDER_NAME_LENGTH_LIMIT % self.innodb_index_prefix_limit > 0 69 | ) 70 | gallery_name_parts = re.findall( 71 | f".{{1,{self.innodb_index_prefix_limit}}}", gallery_name 72 | ) 73 | gallery_name_parts += [""] * (size - len(gallery_name_parts)) 74 | return gallery_name_parts 75 | 76 | def _mysql_split_name_based_on_limit( 77 | self, name: str, name_length_limit: int 78 | ) -> tuple[list[str], str]: 79 | num_parts = math.ceil(name_length_limit / self.innodb_index_prefix_limit) 80 | name_parts = [ 81 | f"{name}_part{i} CHAR({self.innodb_index_prefix_limit}) NOT NULL" 82 | for i in range(1, name_length_limit // self.innodb_index_prefix_limit + 1) 83 | ] 84 | if name_length_limit % self.innodb_index_prefix_limit > 0: 85 | name_parts.append( 86 | f"{name}_part{num_parts} CHAR({name_length_limit % self.innodb_index_prefix_limit}) NOT NULL" 87 | ) 88 | column_name_parts = [f"{name}_part{i}" for i in range(1, num_parts + 1)] 89 | create_name_parts_sql = ", ".join(name_parts) 90 | return column_name_parts, create_name_parts_sql 91 | 92 | def mysql_split_gallery_name_based_on_limit( 93 | self, name: str 94 | ) -> tuple[list[str], str]: 95 | return self._mysql_split_name_based_on_limit(name, FOLDER_NAME_LENGTH_LIMIT) 96 | 97 | def mysql_split_file_name_based_on_limit(self, name: str) -> tuple[list[str], str]: 98 | return self._mysql_split_name_based_on_limit(name, FILE_NAME_LENGTH_LIMIT) 99 | 100 | @abstractmethod 101 | def check_database_character_set(self) -> None: 102 | """ 103 | Checks the character set of the database. 104 | """ 105 | pass 106 | 107 | @abstractmethod 108 | def check_database_collation(self) -> None: 109 | """ 110 | Checks the collation of the database. 111 | """ 112 | pass 113 | 114 | @abstractmethod 115 | def create_main_tables(self) -> None: 116 | """ 117 | Creates the main tables for the comic database. 118 | """ 119 | pass 120 | 121 | @abstractmethod 122 | def insert_gallery_info(self, gallery_path: str) -> bool: 123 | """ 124 | Inserts the gallery information into the database. 125 | 126 | Args: 127 | gallery_path (str): The path to the gallery folder. 128 | """ 129 | pass 130 | 131 | @abstractmethod 132 | def insert_h2h_download(self) -> None: 133 | """ 134 | Inserts the H@H download information into the database. 135 | """ 136 | pass 137 | 138 | @abstractmethod 139 | def get_gid_by_gallery_name(self, gallery_name: str) -> int: 140 | """ 141 | Selects the gallery GID from the database. 142 | 143 | Args: 144 | gallery_name (str): The name of the gallery. 145 | 146 | Returns: 147 | int: The gallery GID. 148 | """ 149 | pass 150 | 151 | @abstractmethod 152 | def get_gids(self) -> list[int]: 153 | """ 154 | Selects the GIDs from the database. 155 | 156 | Returns: 157 | list[int]: The list of GIDs. 158 | """ 159 | pass 160 | 161 | @abstractmethod 162 | def check_gid_by_gid(self, gid: int) -> bool: 163 | """ 164 | Checks if the GID exists in the database. 165 | 166 | Args: 167 | gid (int): The gallery GID. 168 | 169 | Returns: 170 | bool: True if the GID exists, False otherwise. 171 | """ 172 | pass 173 | 174 | @abstractmethod 175 | def get_title_by_gallery_name(self, gallery_name: str) -> str: 176 | """ 177 | Selects the gallery title from the database. 178 | 179 | Args: 180 | gallery_name (str): The name of the gallery. 181 | 182 | Returns: 183 | str: The gallery title. 184 | """ 185 | pass 186 | 187 | @abstractmethod 188 | def update_access_time(self, gallery_name: str, time: str) -> None: 189 | """ 190 | Updates the access time for the gallery in the database. 191 | 192 | Args: 193 | gallery_name (str): The name of the gallery. 194 | time (str): The access time. 195 | """ 196 | pass 197 | 198 | @abstractmethod 199 | def get_upload_account_by_gallery_name(self, gallery_name: str) -> str: 200 | """ 201 | Selects the gallery upload account from the database. 202 | 203 | Args: 204 | gallery_name (str): The name of the gallery. 205 | 206 | Returns: 207 | str: The gallery upload account. 208 | """ 209 | pass 210 | 211 | @abstractmethod 212 | def get_comment_by_gallery_name(self, gallery_name: str) -> str: 213 | """ 214 | Selects the gallery comment from the database. 215 | 216 | Args: 217 | gallery_name (str): The name of the gallery. 218 | 219 | Returns: 220 | str: The gallery comment. 221 | """ 222 | pass 223 | 224 | @abstractmethod 225 | def get_tag_value_by_gallery_name_and_tag_name( 226 | self, gallery_name: str, tag_name: str 227 | ) -> str: 228 | """ 229 | Selects the gallery tag from the database. 230 | 231 | Args: 232 | gallery_name (str): The name of the gallery. 233 | tag_name (str): The name of the tag. 234 | 235 | Returns: 236 | str: The value of the tag. 237 | """ 238 | pass 239 | 240 | @abstractmethod 241 | def get_files_by_gallery_name(self, gallery_name: str) -> list[str]: 242 | """ 243 | Selects the gallery files from the database. 244 | 245 | Args: 246 | gallery_name (str): The name of the gallery. 247 | 248 | Returns: 249 | list[str]: The list of files in the gallery. 250 | """ 251 | pass 252 | 253 | @abstractmethod 254 | def delete_gallery_file(self, gallery_name: str) -> None: 255 | """ 256 | Deletes the gallery image from the database. 257 | 258 | Args: 259 | gallery_name (str): The name of the gallery. 260 | """ 261 | pass 262 | 263 | @abstractmethod 264 | def delete_gallery(self, gallery_name: str) -> None: 265 | """ 266 | Deletes the gallery from the database. 267 | 268 | Args: 269 | gallery_name (str): The name of the gallery. 270 | """ 271 | pass 272 | 273 | @abstractmethod 274 | def insert_pending_gallery_removal(self, gallery_name: str) -> None: 275 | """ 276 | Inserts the pending gallery removal into the database. 277 | 278 | Args: 279 | gallery_name (str): The name of the gallery. 280 | """ 281 | pass 282 | 283 | @abstractmethod 284 | def check_pending_gallery_removal(self, gallery_name: str) -> bool: 285 | """ 286 | Checks if the gallery is pending removal. 287 | 288 | Returns: 289 | bool: True if the gallery is pending removal, False otherwise. 290 | """ 291 | pass 292 | 293 | @abstractmethod 294 | def get_pending_gallery_removals(self) -> list[str]: 295 | """ 296 | Selects the pending gallery removals from the database. 297 | 298 | Returns: 299 | list[str]: The list of pending gallery removals. 300 | """ 301 | pass 302 | 303 | @abstractmethod 304 | def delete_pending_gallery_removal(self, gallery_name: str) -> None: 305 | """ 306 | Deletes the pending gallery removal from the database. 307 | 308 | Args: 309 | gallery_name (str): The name of the gallery. 310 | """ 311 | pass 312 | 313 | @abstractmethod 314 | def delete_pending_gallery_removals(self) -> None: 315 | """ 316 | Deletes all pending gallery removals from the database. 317 | """ 318 | pass 319 | 320 | @abstractmethod 321 | def scan_current_galleries_folders(self) -> tuple[list[str], list[str]]: 322 | """ 323 | Scans the current galleries folders. 324 | 325 | Returns: 326 | list[str]: The list of current galleries folders. 327 | """ 328 | pass 329 | 330 | @abstractmethod 331 | def refresh_current_files_hashs(self) -> None: 332 | """ 333 | Refreshes the current files hashes in the database. 334 | """ 335 | pass 336 | 337 | @abstractmethod 338 | def get_komga_metadata(self, gallery_name: str) -> dict: 339 | """ 340 | Selects the Komga metadata from the database. 341 | 342 | Args: 343 | gallery_name (str): The name of the gallery. 344 | 345 | Returns: 346 | dict: The Komga metadata. 347 | """ 348 | pass 349 | 350 | @abstractmethod 351 | def check_todownload_gid(self, gid: int, url: str) -> bool: 352 | """ 353 | Checks if the GID is to be downloaded. 354 | 355 | Args: 356 | gid (int): The gallery GID. 357 | url (str): The gallery URL. 358 | 359 | Returns: 360 | bool: True if the GID is to be downloaded, False otherwise. 361 | """ 362 | pass 363 | 364 | @abstractmethod 365 | def insert_todownload_gid(self, gid: int, url: str) -> None: 366 | """ 367 | Inserts the GID to be downloaded into the database. 368 | 369 | Args: 370 | gid (int): The gallery GID. 371 | url (str): The gallery URL. 372 | """ 373 | pass 374 | 375 | @abstractmethod 376 | def get_todownload_gids(self) -> list[tuple[int, str]]: 377 | """ 378 | Selects the GIDs to be downloaded from the database. 379 | 380 | Returns: 381 | list[tuple[int, str]]: The list of GIDs to be downloaded. 382 | """ 383 | pass 384 | 385 | @abstractmethod 386 | def remove_todownload_gid(self, gid: int) -> None: 387 | """ 388 | Removes the GID to be downloaded from the database. 389 | 390 | Args: 391 | gid (int): The gallery GID. 392 | """ 393 | pass 394 | 395 | @abstractmethod 396 | def get_pending_download_gids(self) -> list[int]: 397 | """ 398 | Selects the pending download GIDs from the database. 399 | 400 | Returns: 401 | list[int]: The list of pending download GIDs. 402 | """ 403 | pass 404 | 405 | @abstractmethod 406 | def insert_removed_gallery_gid(self, gid: int) -> None: 407 | """ 408 | Inserts the removed gallery GID into the database. 409 | 410 | Args: 411 | gid (int): The gallery GID. 412 | """ 413 | pass 414 | 415 | @abstractmethod 416 | def insert_todelete_gid(self, gid: int) -> None: 417 | """ 418 | Inserts the GID to be deleted into the database. 419 | 420 | Args: 421 | gid (int): The gallery GID. 422 | """ 423 | pass 424 | 425 | @abstractmethod 426 | def update_redownload_time_to_now_by_gid(self, gid: int) -> None: 427 | """ 428 | Updates the redownload time to now by GID. 429 | 430 | Args: 431 | gid (int): The gallery GID. 432 | """ 433 | pass 434 | -------------------------------------------------------------------------------- /src/h2hdb/hash_dict.py: -------------------------------------------------------------------------------- 1 | HASH_ALGORITHMS: dict[str, int] = dict(sha512=512, sha3_512=512, blake2b=512) 2 | -------------------------------------------------------------------------------- /src/h2hdb/information.py: -------------------------------------------------------------------------------- 1 | from .hash_dict import HASH_ALGORITHMS 2 | from .settings import hash_function 3 | 4 | 5 | class FileInformation: 6 | def __init__(self, absolute_path: str, db_file_id: int) -> None: 7 | self.absolute_path = absolute_path 8 | self.db_file_id = db_file_id 9 | self.issethash = False 10 | self.db_hash_id: dict[str, int] = dict() 11 | 12 | def sethash(self) -> None: 13 | if not self.issethash: 14 | with open(self.absolute_path, "rb") as file: 15 | file_content = file.read() 16 | for algorithm in HASH_ALGORITHMS: 17 | setattr(self, algorithm, hash_function(file_content, algorithm)) 18 | self.issethash = True 19 | 20 | def setdb_hash_id(self, algorithm: str, db_hash_id: int) -> None: 21 | self.db_hash_id[algorithm] = db_hash_id 22 | 23 | 24 | class TagInformation: 25 | __slots__ = ["tag_name", "tag_value", "db_tag_id"] 26 | 27 | def __init__(self, tag_name: str, tag_value: str) -> None: 28 | self.tag_name = tag_name 29 | self.tag_value = tag_value 30 | 31 | def setdb_tag_id(self, db_tag_id: int) -> None: 32 | self.db_tag_id = db_tag_id 33 | -------------------------------------------------------------------------------- /src/h2hdb/logger.py: -------------------------------------------------------------------------------- 1 | __all__ = ["logger"] 2 | 3 | 4 | import logging 5 | from abc import ABCMeta, abstractmethod 6 | from logging.handlers import MemoryHandler 7 | 8 | from .config_loader import LoggerConfig 9 | 10 | 11 | def setup_screen_logger(level: int) -> logging.Logger: 12 | screen_logger = logging.getLogger("display_on_screen") 13 | screen_logger.setLevel(level) 14 | 15 | if not screen_logger.handlers: 16 | handler = logging.StreamHandler() 17 | formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s") 18 | handler.setFormatter(formatter) 19 | screen_logger.addHandler(handler) 20 | return screen_logger 21 | 22 | 23 | def setup_file_logger(level: int) -> logging.Logger: 24 | log_filename = "h2hdb.log" 25 | file_logger = logging.getLogger("write_to_file") 26 | file_logger.setLevel(level) 27 | 28 | if not file_logger.handlers: 29 | with open(log_filename, "w", encoding="utf-8") as f: 30 | f.write('"time stamp","level","message"\n') 31 | 32 | file_handler = logging.FileHandler(log_filename, mode="a+", encoding="utf-8") 33 | formatter = logging.Formatter('"%(asctime)s","%(levelname)-8s","%(message)s"') 34 | file_handler.setFormatter(formatter) 35 | 36 | # MemoryHandler with a capacity of x bytes 37 | memory_handler = MemoryHandler( 38 | capacity=1024, target=file_handler, flushLevel=logging.ERROR 39 | ) 40 | file_logger.addHandler(memory_handler) 41 | 42 | return file_logger 43 | 44 | 45 | class AbstractLogger(metaclass=ABCMeta): 46 | @abstractmethod 47 | def debug(self, message: str) -> None: ... 48 | 49 | @abstractmethod 50 | def info(self, message: str) -> None: ... 51 | 52 | @abstractmethod 53 | def warning(self, message: str) -> None: ... 54 | 55 | @abstractmethod 56 | def error(self, message: str) -> None: ... 57 | 58 | @abstractmethod 59 | def critical(self, message: str) -> None: ... 60 | 61 | 62 | class HentaiDBLogger(AbstractLogger): 63 | def __init__(self, level: int) -> None: 64 | self.screen_logger = setup_screen_logger(level) 65 | self.file_logger = setup_file_logger(level) 66 | 67 | def debug(self, message: str) -> None: 68 | self._log_method("debug", message) 69 | 70 | def info(self, message: str) -> None: 71 | self._log_method("info", message) 72 | 73 | def warning(self, message: str) -> None: 74 | self._log_method("warning", message) 75 | 76 | def error(self, message: str) -> None: 77 | self._log_method("error", message) 78 | 79 | def critical(self, message: str) -> None: 80 | self._log_method("critical", message) 81 | 82 | def _log_method(self, level: str, message: str) -> None: 83 | log_method_screen = getattr(self.screen_logger, level) 84 | log_method_file = getattr(self.file_logger, level) 85 | log_method_screen(message) 86 | log_method_file(message) 87 | 88 | def hasHandlers(self) -> bool: 89 | return self.screen_logger.hasHandlers() or self.file_logger.hasHandlers() 90 | 91 | def removeHandlers(self) -> None: 92 | while self.hasHandlers(): 93 | self.screen_logger.removeHandler(self.screen_logger.handlers[0]) 94 | self.file_logger.removeHandler(self.file_logger.handlers[0]) 95 | 96 | def addHandler(self, handler: logging.Handler) -> None: 97 | self.screen_logger.addHandler(handler) 98 | self.file_logger.addHandler(handler) 99 | 100 | 101 | def setup_logger( 102 | logger_config: LoggerConfig, 103 | ) -> HentaiDBLogger: 104 | return HentaiDBLogger(level=logger_config.level) 105 | -------------------------------------------------------------------------------- /src/h2hdb/mysql_connector.py: -------------------------------------------------------------------------------- 1 | from mysql.connector import connect as SQLConnect 2 | from mysql.connector.abstracts import MySQLConnectionAbstract 3 | from mysql.connector.pooling import PooledMySQLConnection 4 | from mysql.connector.errors import IntegrityError 5 | 6 | from pydantic import Field, field_validator 7 | 8 | from .sql_connector import SQLConnectorParams, SQLConnector, DatabaseDuplicateKeyError 9 | 10 | 11 | AUTO_COMMIT_KEYS = ["INSERT", "UPDATE", "DELETE", "CREATE", "DROP", "ALTER"] 12 | 13 | 14 | class MySQLDuplicateKeyError(DatabaseDuplicateKeyError): 15 | """ 16 | Custom exception class for MySQL duplicate key errors. 17 | 18 | This class inherits from the MySQL Connector/Python IntegrityError class. 19 | """ 20 | 21 | def __init__(self, message) -> None: 22 | self.message = message 23 | super().__init__(self.message) 24 | 25 | 26 | class MySQLConnectorParams(SQLConnectorParams): 27 | """ 28 | MySQLConnectorParams is a data class that holds the connection parameters required to connect to a MySQL database. 29 | 30 | The class inherits from SQLConnectorParams and adds additional parameters specific to MySQL databases. 31 | 32 | The 'host' parameter is the host name or IP address of the MySQL database server. 33 | 34 | The 'port' parameter is the port number to connect to the MySQL database server. 35 | 36 | The 'user' parameter is the username to authenticate with the MySQL database server. 37 | 38 | The 'password' parameter is the password to authenticate with the MySQL database server. 39 | 40 | The 'database' parameter is the name of the MySQL database to connect to. 41 | """ 42 | 43 | host: str = Field( 44 | min_length=1, 45 | description="Host of the MySQL database", 46 | ) 47 | port: int = Field( 48 | ge=1, 49 | le=65535, 50 | description="Port of the MySQL database", 51 | ) 52 | user: str = Field( 53 | min_length=1, 54 | description="User for the MySQL database", 55 | ) 56 | password: str = Field( 57 | description="Password for the MySQL database", 58 | ) 59 | database: str = Field( 60 | min_length=1, 61 | description="Database name for the MySQL database", 62 | ) 63 | 64 | 65 | class MySQLCursor: 66 | def __init__( 67 | self, connection: PooledMySQLConnection | MySQLConnectionAbstract 68 | ) -> None: 69 | self.connection = connection 70 | 71 | def __enter__(self): 72 | self.cursor = self.connection.cursor(buffered=True) 73 | return self.cursor 74 | 75 | def __exit__(self, exc_type, exc_val, exc_tb): 76 | self.cursor.close() 77 | 78 | 79 | class MySQLConnector(SQLConnector): 80 | """ 81 | MySQLConnector is a concrete subclass of SQLConnector that provides an implementation for connecting to a MySQL database. 82 | 83 | The class uses the MySQL Connector/Python library to establish a connection to a MySQL database. 84 | 85 | The 'connect' method establishes a connection to the MySQL database using the provided connection parameters. 86 | 87 | The 'close' method closes the connection to the MySQL database. 88 | 89 | The 'execute' method executes a single SQL command on the MySQL database. 90 | 91 | The 'execute_many' method executes multiple SQL commands on the MySQL database. 92 | 93 | The 'fetch_one' method fetches a single result from the MySQL database. 94 | 95 | The 'fetch_all' method fetches all results from the MySQL database. 96 | 97 | The 'commit' method commits the current transaction to the MySQL database. 98 | 99 | The 'rollback' method rolls back the current transaction in the MySQL database. 100 | """ 101 | 102 | def __init__( 103 | self, host: str, port: int, user: str, password: str, database: str 104 | ) -> None: 105 | self.params = MySQLConnectorParams( 106 | host=host, port=port, user=user, password=password, database=database 107 | ) 108 | 109 | def connect(self) -> None: 110 | self.connection = SQLConnect(**self.params.model_dump()) 111 | 112 | def close(self) -> None: 113 | self.connection.close() 114 | 115 | def check_table_exists(self, table_name: str) -> bool: 116 | query = f"SHOW TABLES LIKE '{table_name}'" 117 | result = self.fetch_one(query) 118 | return result is not None 119 | 120 | def commit(self) -> None: 121 | self.connection.commit() 122 | 123 | def rollback(self) -> None: 124 | self.connection.rollback() 125 | 126 | def execute(self, query: str, data: tuple = ()) -> None: 127 | with MySQLCursor(self.connection) as cursor: 128 | try: 129 | cursor.execute(query, data) 130 | except IntegrityError as e: 131 | raise MySQLDuplicateKeyError(str(e)) 132 | except Exception as e: 133 | raise e 134 | if any(key in query.upper() for key in AUTO_COMMIT_KEYS): 135 | self.commit() 136 | 137 | def execute_many(self, query: str, data: list[tuple]) -> None: 138 | with MySQLCursor(self.connection) as cursor: 139 | try: 140 | cursor.executemany(query, data) 141 | except IntegrityError as e: 142 | raise MySQLDuplicateKeyError(str(e)) 143 | if any(key in query.upper() for key in AUTO_COMMIT_KEYS): 144 | self.commit() 145 | 146 | def fetch_one(self, query: str, data: tuple = ()) -> tuple: 147 | with MySQLCursor(self.connection) as cursor: 148 | cursor.execute(query, data) 149 | vlist = cursor.fetchone() 150 | if isinstance(vlist, tuple): 151 | return vlist 152 | else: 153 | return tuple() 154 | 155 | def fetch_all(self, query: str, data: tuple = ()) -> list: 156 | with MySQLCursor(self.connection) as cursor: 157 | cursor.execute(query, data) 158 | vlist = cursor.fetchall() 159 | return vlist 160 | -------------------------------------------------------------------------------- /src/h2hdb/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kuan-Lun/h2hdb/90477b90368947bcf40d7c100787aa726485842a/src/h2hdb/py.typed -------------------------------------------------------------------------------- /src/h2hdb/settings.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "FOLDER_NAME_LENGTH_LIMIT", 3 | "FILE_NAME_LENGTH_LIMIT", 4 | "COMPARISON_HASH_ALGORITHM", 5 | "GALLERY_INFO_FILE_NAME", 6 | "hash_function", 7 | "hash_function_by_file", 8 | ] 9 | 10 | import logging 11 | import hashlib 12 | from enum import Enum 13 | 14 | FOLDER_NAME_LENGTH_LIMIT = 255 15 | FILE_NAME_LENGTH_LIMIT = 255 16 | COMPARISON_HASH_ALGORITHM = "sha512" 17 | GALLERY_INFO_FILE_NAME = "galleryinfo.txt" 18 | 19 | 20 | class LOG_LEVEL(int, Enum): 21 | notset = logging.NOTSET 22 | debug = logging.DEBUG 23 | info = logging.INFO 24 | warning = logging.WARNING 25 | error = logging.ERROR 26 | critical = logging.CRITICAL 27 | 28 | 29 | class CBZ_GROUPING(str, Enum): 30 | flat = "flat" 31 | date_yyyy = "date-yyyy" 32 | date_yyyy_mm = "date-yyyy-mm" 33 | date_yyyy_mm_dd = "date-yyyy-mm-dd" 34 | 35 | 36 | class CBZ_SORT(str, Enum): 37 | no = "no" 38 | upload_time = "upload_time" 39 | download_time = "download_time" 40 | pages = "pages" 41 | pages_num = "pages+[num]" 42 | 43 | 44 | def hash_function(x: bytes, algorithm: str) -> bytes: 45 | return getattr(hashlib, algorithm.lower())(x).digest() 46 | 47 | 48 | def hash_function_by_file(file_path: str, algorithm: str) -> bytes: 49 | with open(file_path, "rb") as f: 50 | file_content = f.read() 51 | return hash_function(file_content, algorithm) 52 | 53 | 54 | def chunk_list(input_list: list, chunk_size: int) -> list: 55 | if chunk_size <= 0: 56 | raise ValueError("Chunk size must be greater than 0.") 57 | 58 | return [ 59 | input_list[i : i + chunk_size] for i in range(0, len(input_list), chunk_size) 60 | ] 61 | -------------------------------------------------------------------------------- /src/h2hdb/sql_connector.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | "SQLConnectorParams", 3 | "MySQLConnector", 4 | "DatabaseConfigurationError", 5 | "DatabaseKeyError", 6 | "DatabaseTableError", 7 | ] 8 | 9 | 10 | from abc import ABC, abstractmethod 11 | 12 | from pydantic import BaseModel, ConfigDict 13 | 14 | 15 | class DatabaseConfigurationError(Exception): 16 | """ 17 | Custom exception class for database configuration errors. 18 | 19 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed. 20 | """ 21 | 22 | def __init__(self, message): 23 | self.message = message 24 | super().__init__(self.message) 25 | 26 | 27 | class DatabaseKeyError(Exception): 28 | """ 29 | Custom exception class for database key errors. 30 | 31 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed. 32 | """ 33 | 34 | def __init__(self, message): 35 | self.message = message 36 | super().__init__(self.message) 37 | 38 | 39 | class DatabaseDuplicateKeyError(Exception): 40 | """ 41 | Custom exception class for database duplicate key errors. 42 | 43 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed. 44 | """ 45 | 46 | def __init__(self, message): 47 | self.message = message 48 | super().__init__(self.message) 49 | 50 | 51 | class DatabaseTableError(Exception): 52 | """ 53 | Custom exception class for database table errors. 54 | 55 | This class inherits from the built-in Python Exception class. You can add additional methods or attributes if needed. 56 | """ 57 | 58 | def __init__(self, message): 59 | self.message = message 60 | super().__init__(self.message) 61 | 62 | 63 | class SQLConnectorParams(BaseModel): 64 | model_config = ConfigDict(extra="forbid") 65 | 66 | 67 | class SQLConnector(ABC): 68 | """ 69 | SQLConnector is an abstract base class that provides a standard interface for SQL database connections. 70 | It is designed to be subclassed by specific types of SQL database connectors (e.g., MySQLConnector, PostgreSQLConnector). 71 | 72 | The class uses the Abstract Base Classes (ABC) metaclass to enforce that subclasses implement the 'connect', 'close', 'execute', 'fetch', 'execute_many', and 'commit' methods. 73 | 74 | The constructor takes in the necessary parameters to establish a database connection, such as host, port, user, password, and database. 75 | 76 | The 'connect', 'close', 'check_table_exists', 'execute', 'execute_many', 'fetch_one', 'fetch_all', 'commit', and 'rollback' methods are abstract and must be implemented by concrete subclasses. 77 | 78 | The 'connect' method is designed to establish a connection to the database. It doesn't take any parameters. 79 | 80 | The 'close' method is designed to close the connection to the database. It doesn't take any parameters. 81 | 82 | The 'check_table_exists' method is designed to check if a table exists in the database. It takes the name of the table as a parameter and returns a boolean value. 83 | 84 | The 'execute' method is designed to execute a single SQL command. It takes a SQL query string and a tuple of data as parameters. 85 | 86 | The 'execute_many' method is designed to execute multiple SQL commands. It takes a SQL query string and a list of tuples as parameters, where each tuple contains the data for one command. 87 | 88 | The 'fetch_one' method is designed to fetch a single result from the database. It takes a SQL query string and a tuple of data as parameters. 89 | 90 | The 'fetch_all' method is designed to fetch all results from the database. It takes a SQL query string and a tuple of data as parameters. 91 | 92 | The 'commit' method is designed to commit the current transaction to the database. It doesn't take any parameters. 93 | 94 | The 'rollback' method is designed to roll back the current transaction in the database. It doesn't take any parameters. 95 | """ 96 | 97 | @abstractmethod 98 | def __init__(self) -> None: 99 | pass 100 | 101 | @abstractmethod 102 | def connect(self) -> None: 103 | """ 104 | Connects to the SQL database. 105 | 106 | This method establishes a connection to the SQL database using the provided credentials. 107 | 108 | Returns: 109 | None 110 | """ 111 | pass 112 | 113 | @abstractmethod 114 | def close(self) -> None: 115 | """ 116 | Closes the SQL connector connection. 117 | 118 | This method closes the connection to the SQL database. 119 | 120 | Returns: 121 | None 122 | """ 123 | pass 124 | 125 | def __enter__(self) -> "SQLConnector": 126 | """ 127 | Establishes a connection to the SQL database. 128 | 129 | Returns: 130 | SQLConnector: The SQLConnector object itself. 131 | """ 132 | self.connect() 133 | return self 134 | 135 | @abstractmethod 136 | def check_table_exists(self, table_name: str) -> bool: 137 | """ 138 | Checks if a table exists in the database. 139 | 140 | Args: 141 | table_name (str): The name of the table to check for existence. 142 | 143 | Returns: 144 | bool: True if the table exists, False otherwise. 145 | """ 146 | pass 147 | 148 | @abstractmethod 149 | def commit(self) -> None: 150 | """ 151 | Commits the current transaction to the database. 152 | 153 | This method is used to save any changes made within the current transaction 154 | to the database. It ensures that all changes are permanently saved and can 155 | be accessed by other transactions. 156 | 157 | Returns: 158 | None 159 | """ 160 | pass 161 | 162 | @abstractmethod 163 | def rollback(self) -> None: 164 | """ 165 | Rolls back the current transaction in the database. 166 | 167 | This method is used to undo any changes made within the current transaction 168 | and return the database to its state before the transaction began. 169 | 170 | Returns: 171 | None 172 | """ 173 | pass 174 | 175 | def __exit__(self, exc_type, exc_value, traceback) -> None: 176 | """ 177 | Performs necessary cleanup operations when exiting a context manager. 178 | 179 | Args: 180 | exc_type (type): The type of the exception raised, if any. 181 | exc_value (Exception): The exception raised, if any. 182 | traceback (traceback): The traceback object associated with the exception, if any. 183 | """ 184 | self.close() 185 | 186 | @abstractmethod 187 | def execute(self, query: str, data: tuple = ()) -> None: 188 | """ 189 | Executes the given SQL query with optional data parameters. 190 | 191 | Args: 192 | query (str): The SQL query to execute. 193 | data (tuple, optional): The data parameters to be used in the query. Defaults to (). 194 | 195 | Returns: 196 | None 197 | """ 198 | pass 199 | 200 | @abstractmethod 201 | def execute_many(self, query: str, data: list[tuple]) -> None: 202 | """ 203 | Executes a SQL query multiple times with different sets of data. 204 | 205 | Args: 206 | query (str): The SQL query to execute. 207 | data (list[tuple]): A list of tuples, where each tuple represents a set of data to be used in the query. 208 | 209 | Returns: 210 | None 211 | """ 212 | pass 213 | 214 | @abstractmethod 215 | def fetch_one(self, query: str, data: tuple = ()) -> tuple: 216 | """ 217 | Executes the given SQL query and returns the first row of the result set. 218 | 219 | Args: 220 | query (str): The SQL query to execute. 221 | data (tuple, optional): The parameters to be passed to the query. Defaults to an empty tuple. 222 | 223 | Returns: 224 | tuple: The first row of the result set. 225 | 226 | """ 227 | pass 228 | 229 | @abstractmethod 230 | def fetch_all(self, query: str, data: tuple = ()) -> list: 231 | """ 232 | Executes the given SQL query and fetches all the rows from the result set. 233 | 234 | Args: 235 | query (str): The SQL query to be executed. 236 | data (tuple, optional): The parameters to be passed to the query. Defaults to (). 237 | 238 | Returns: 239 | list: A list of tuples representing the rows fetched from the result set. 240 | """ 241 | pass 242 | -------------------------------------------------------------------------------- /src/h2hdb/table_comments.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | 4 | from .table_gids import H2HDBGalleriesIDs 5 | from .h2hdb_spec import H2HDBAbstract 6 | from .sql_connector import DatabaseKeyError 7 | 8 | 9 | class H2HDBGalleriesComments(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 10 | def _create_galleries_comments_table(self) -> None: 11 | with self.SQLConnector() as connector: 12 | table_name = "galleries_comments" 13 | match self.config.database.sql_type.lower(): 14 | case "mysql": 15 | query = f""" 16 | CREATE TABLE IF NOT EXISTS {table_name} ( 17 | PRIMARY KEY (db_gallery_id), 18 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 19 | ON UPDATE CASCADE 20 | ON DELETE CASCADE, 21 | db_gallery_id INT UNSIGNED NOT NULL, 22 | comment TEXT NOT NULL, 23 | FULLTEXT (Comment) 24 | ) 25 | """ 26 | connector.execute(query) 27 | self.logger.info(f"{table_name} table created.") 28 | 29 | def _insert_gallery_comment(self, db_gallery_id: int, comment: str) -> None: 30 | if comment != "": 31 | with self.SQLConnector() as connector: 32 | table_name = "galleries_comments" 33 | match self.config.database.sql_type.lower(): 34 | case "mysql": 35 | insert_query = f""" 36 | INSERT INTO {table_name} (db_gallery_id, comment) VALUES (%s, %s) 37 | """ 38 | connector.execute(insert_query, (db_gallery_id, comment)) 39 | 40 | def _update_gallery_comment(self, db_gallery_id: int, comment: str) -> None: 41 | with self.SQLConnector() as connector: 42 | table_name = "galleries_comments" 43 | match self.config.database.sql_type.lower(): 44 | case "mysql": 45 | update_query = f""" 46 | UPDATE {table_name} SET Comment = %s WHERE db_gallery_id = %s 47 | """ 48 | connector.execute(update_query, (comment, db_gallery_id)) 49 | 50 | def __get_gallery_comment_by_db_gallery_id(self, db_gallery_id: int) -> tuple: 51 | with self.SQLConnector() as connector: 52 | table_name = "galleries_comments" 53 | match self.config.database.sql_type.lower(): 54 | case "mysql": 55 | select_query = f""" 56 | SELECT Comment 57 | FROM {table_name} 58 | WHERE db_gallery_id = %s 59 | """ 60 | query_result = connector.fetch_one(select_query, (db_gallery_id,)) 61 | return query_result 62 | 63 | def _check_gallery_comment_by_db_gallery_id(self, db_gallery_id: int) -> bool: 64 | query_result = self.__get_gallery_comment_by_db_gallery_id(db_gallery_id) 65 | return len(query_result) != 0 66 | 67 | def _check_gallery_comment_by_gallery_name(self, gallery_name: str) -> bool: 68 | ischeck = False 69 | if self._check_galleries_dbids_by_gallery_name(gallery_name): 70 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 71 | ischeck = self._check_gallery_comment_by_db_gallery_id(db_gallery_id) 72 | return ischeck 73 | 74 | def _select_gallery_comment(self, db_gallery_id: int) -> str: 75 | query_result = self.__get_gallery_comment_by_db_gallery_id(db_gallery_id) 76 | if query_result: 77 | comment = query_result[0] 78 | else: 79 | msg = ( 80 | f"Uploader comment for gallery name ID {db_gallery_id} does not exist." 81 | ) 82 | self.logger.error(msg) 83 | raise DatabaseKeyError(msg) 84 | return comment 85 | 86 | def get_comment_by_gallery_name(self, gallery_name: str) -> str: 87 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 88 | return self._select_gallery_comment(db_gallery_id) 89 | -------------------------------------------------------------------------------- /src/h2hdb/table_database_setting.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | from .h2hdb_spec import H2HDBAbstract 4 | from .sql_connector import DatabaseConfigurationError 5 | 6 | 7 | class H2HDBCheckDatabaseSettings(H2HDBAbstract, metaclass=ABCMeta): 8 | """ 9 | A class that checks the database settings for character set and collation. 10 | 11 | This class inherits from `H2HDBAbstract` and is used to ensure that the database 12 | character set and collation are valid. It provides methods to check the character set and 13 | collation of the database and raises an error if they are invalid. 14 | 15 | Attributes: 16 | sql_type (str): The type of SQL database being used. 17 | 18 | Methods: 19 | check_database_character_set: Checks the character set of the database. 20 | check_database_collation: Checks the collation of the database. 21 | """ 22 | 23 | def check_database_character_set(self) -> None: 24 | """ 25 | Checks the character set of the database and raises an error if it is invalid. 26 | 27 | Raises: 28 | DatabaseConfigurationError: If the database character set is invalid. 29 | """ 30 | with self.SQLConnector() as connector: 31 | match self.config.database.sql_type.lower(): 32 | case "mysql": 33 | charset = "utf8mb4" 34 | query = "SHOW VARIABLES LIKE 'character_set_database';" 35 | 36 | charset_result: str = connector.fetch_one(query)[1] 37 | is_charset_valid: bool = charset_result == charset 38 | if not is_charset_valid: 39 | message = f"Invalid database character set. Must be '{charset}' but is '{charset_result}'." 40 | self.logger.error(message) 41 | raise DatabaseConfigurationError(message) 42 | self.logger.info("Database character set is valid.") 43 | 44 | def check_database_collation(self) -> None: 45 | """ 46 | Checks the collation of the database and raises an error if it is invalid. 47 | 48 | Raises: 49 | DatabaseConfigurationError: If the database collation is invalid. 50 | """ 51 | with self.SQLConnector() as connector: 52 | match self.config.database.sql_type.lower(): 53 | case "mysql": 54 | query = "SHOW VARIABLES LIKE 'collation_database';" 55 | collation = "utf8mb4_bin" 56 | 57 | collation_result: str = connector.fetch_one(query)[1] 58 | is_collation_valid: bool = collation_result == collation 59 | if not is_collation_valid: 60 | message = f"Invalid database collation. Must be '{collation}' but is '{collation_result}'." 61 | self.logger.error(message) 62 | raise DatabaseConfigurationError(message) 63 | self.logger.info("Database character set and collation are valid.") 64 | -------------------------------------------------------------------------------- /src/h2hdb/table_files_dbids.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from itertools import chain 3 | 4 | from .hash_dict import HASH_ALGORITHMS 5 | from .settings import FILE_NAME_LENGTH_LIMIT 6 | 7 | from .settings import chunk_list 8 | from .table_gids import H2HDBGalleriesIDs 9 | from .information import FileInformation 10 | from .h2hdb_spec import H2HDBAbstract 11 | from .settings import hash_function_by_file 12 | from .sql_connector import ( 13 | DatabaseKeyError, 14 | DatabaseDuplicateKeyError, 15 | ) 16 | 17 | 18 | class H2HDBFiles(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 19 | def _create_files_names_table(self) -> None: 20 | with self.SQLConnector() as connector: 21 | table_name = f"files_dbids" 22 | match self.config.database.sql_type.lower(): 23 | case "mysql": 24 | column_name = "name" 25 | column_name_parts, create_gallery_name_parts_sql = ( 26 | self.mysql_split_file_name_based_on_limit(column_name) 27 | ) 28 | query = f""" 29 | CREATE TABLE IF NOT EXISTS {table_name} ( 30 | PRIMARY KEY (db_file_id), 31 | db_file_id INT UNSIGNED AUTO_INCREMENT, 32 | db_gallery_id INT UNSIGNED NOT NULL, 33 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 34 | ON UPDATE CASCADE 35 | ON DELETE CASCADE, 36 | {create_gallery_name_parts_sql}, 37 | UNIQUE real_primay_key (db_gallery_id, {", ".join(column_name_parts)}), 38 | UNIQUE db_file_to_gallery_id (db_file_id, db_gallery_id) 39 | ) 40 | """ 41 | connector.execute(query) 42 | self.logger.info(f"{table_name} table created.") 43 | 44 | table_name = f"files_names" 45 | match self.config.database.sql_type.lower(): 46 | case "mysql": 47 | query = f""" 48 | CREATE TABLE IF NOT EXISTS {table_name} ( 49 | PRIMARY KEY (db_file_id), 50 | FOREIGN KEY (db_file_id) REFERENCES files_dbids(db_file_id) 51 | ON UPDATE CASCADE 52 | ON DELETE CASCADE, 53 | db_file_id INT UNSIGNED NOT NULL, 54 | full_name TEXT NOT NULL, 55 | FULLTEXT (full_name) 56 | ) 57 | """ 58 | connector.execute(query) 59 | self.logger.info(f"{table_name} table created.") 60 | 61 | def _insert_gallery_files( 62 | self, db_gallery_id: int, file_names_list: list[str] 63 | ) -> None: 64 | with self.SQLConnector() as connector: 65 | 66 | file_name_parts_list: list[list[str]] = list() 67 | for file_name in file_names_list: 68 | if len(file_name) > FILE_NAME_LENGTH_LIMIT: 69 | self.logger.error( 70 | f"File name '{file_name}' is too long. Must be {FILE_NAME_LENGTH_LIMIT} characters or less." 71 | ) 72 | raise ValueError("File name is too long.") 73 | file_name_parts_list.append(self._split_gallery_name(file_name)) 74 | 75 | table_name = "files_dbids" 76 | match self.config.database.sql_type.lower(): 77 | case "mysql": 78 | column_name_parts, _ = self.mysql_split_file_name_based_on_limit( 79 | "name" 80 | ) 81 | insert_query_header = f""" 82 | INSERT INTO {table_name} 83 | (db_gallery_id, {", ".join(column_name_parts)}) 84 | """ # VALUES (%s, {", ".join(["%s" for _ in column_name_parts])}) 85 | insert_query_values = " ".join( 86 | [ 87 | "VALUES", 88 | ", ".join( 89 | [ 90 | f"(%s, {", ".join(["%s" for _ in column_name_parts])})" 91 | for _ in file_names_list 92 | ] 93 | ), 94 | ] 95 | ) 96 | insert_query = f"{insert_query_header} {insert_query_values}" 97 | insert_parameter = tuple( 98 | chain( 99 | *[ 100 | (db_gallery_id, *file_name_parts_list[n]) 101 | for n in range(len(file_name_parts_list)) 102 | ] 103 | ) 104 | ) 105 | connector.execute( 106 | insert_query, 107 | insert_parameter, 108 | ) 109 | 110 | db_file_id_list = [ 111 | self._get_db_file_id(db_gallery_id, file_name) 112 | for file_name in file_names_list 113 | ] 114 | 115 | table_name = "files_names" 116 | match self.config.database.sql_type.lower(): 117 | case "mysql": 118 | column_name_parts, _ = self.mysql_split_file_name_based_on_limit( 119 | "name" 120 | ) 121 | insert_query_header = f""" 122 | INSERT INTO {table_name} 123 | (db_file_id, full_name) 124 | """ 125 | insert_query_values = " ".join( 126 | ["VALUES", ", ".join(["(%s, %s)" for _ in file_names_list])] 127 | ) 128 | insert_query = f"{insert_query_header} {insert_query_values}" 129 | 130 | connector.execute( 131 | insert_query, 132 | tuple( 133 | chain( 134 | *[ 135 | (db_file_id_list[n], file_names_list[n]) 136 | for n in range(len(file_names_list)) 137 | ] 138 | ) 139 | ), 140 | ) 141 | 142 | def __get_db_file_id(self, db_gallery_id: int, file_name: str) -> tuple: 143 | with self.SQLConnector() as connector: 144 | table_name = "files_dbids" 145 | file_name_parts = self._split_gallery_name(file_name) 146 | match self.config.database.sql_type.lower(): 147 | case "mysql": 148 | column_name_parts, _ = self.mysql_split_file_name_based_on_limit( 149 | "name" 150 | ) 151 | select_query = f""" 152 | SELECT db_file_id 153 | FROM {table_name} 154 | WHERE db_gallery_id = %s 155 | AND {" AND ".join([f"{part} = %s" for part in column_name_parts])} 156 | """ 157 | data = (db_gallery_id, *file_name_parts) 158 | query_result = connector.fetch_one(select_query, data) 159 | return query_result 160 | 161 | def _check_db_file_id(self, db_gallery_id: int, file_name: str) -> bool: 162 | query_result = self.__get_db_file_id(db_gallery_id, file_name) 163 | return len(query_result) != 0 164 | 165 | def _get_db_file_id(self, db_gallery_id: int, file_name: str) -> int: 166 | query_result = self.__get_db_file_id(db_gallery_id, file_name) 167 | if query_result: 168 | gallery_image_id = query_result[0] 169 | else: 170 | msg = f"Image ID for gallery name ID {db_gallery_id} and file '{file_name}' does not exist." 171 | self.logger.error(msg) 172 | raise DatabaseKeyError(msg) 173 | return gallery_image_id 174 | 175 | def get_files_by_gallery_name(self, gallery_name: str) -> list[str]: 176 | with self.SQLConnector() as connector: 177 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 178 | table_name = "files_names" 179 | match self.config.database.sql_type.lower(): 180 | case "mysql": 181 | select_query = f""" 182 | SELECT full_name 183 | FROM {table_name} 184 | WHERE db_gallery_id = %s 185 | """ 186 | query_result = connector.fetch_all(select_query, (db_gallery_id,)) 187 | if query_result: 188 | files = [query[0] for query in query_result] 189 | else: 190 | msg = f"Files for gallery name ID {db_gallery_id} do not exist." 191 | self.logger.error(msg) 192 | raise DatabaseKeyError(msg) 193 | return files 194 | 195 | def _create_galleries_files_hashs_table( 196 | self, algorithm: str, output_bits: int 197 | ) -> None: 198 | with self.SQLConnector() as connector: 199 | dbids_table_name = "files_hashs_%s_dbids" % algorithm.lower() 200 | match self.config.database.sql_type.lower(): 201 | case "mysql": 202 | query = f""" 203 | CREATE TABLE IF NOT EXISTS {dbids_table_name} ( 204 | PRIMARY KEY (db_hash_id), 205 | db_hash_id INT UNSIGNED AUTO_INCREMENT, 206 | hash_value BINARY({output_bits/8}) NOT NULL, 207 | UNIQUE (hash_value) 208 | ) 209 | """ 210 | connector.execute(query) 211 | self.logger.info(f"{dbids_table_name} table created.") 212 | 213 | table_name = "files_hashs_%s" % algorithm.lower() 214 | match self.config.database.sql_type.lower(): 215 | case "mysql": 216 | query = f""" 217 | CREATE TABLE IF NOT EXISTS {table_name} ( 218 | PRIMARY KEY (db_file_id), 219 | FOREIGN KEY (db_file_id) REFERENCES files_dbids(db_file_id) 220 | ON UPDATE CASCADE 221 | ON DELETE CASCADE, 222 | db_file_id INT UNSIGNED NOT NULL, 223 | FOREIGN KEY (db_hash_id) REFERENCES {dbids_table_name}(db_hash_id) 224 | ON UPDATE CASCADE, 225 | db_hash_id INT UNSIGNED NOT NULL, 226 | UNIQUE db_hash_id (db_hash_id, db_file_id) 227 | ) 228 | """ 229 | connector.execute(query) 230 | self.logger.info(f"{table_name} table created.") 231 | 232 | def _create_galleries_files_hashs_tables(self) -> None: 233 | self.logger.debug("Creating gallery image hash tables...") 234 | for algorithm, output_bits in HASH_ALGORITHMS.items(): 235 | self._create_galleries_files_hashs_table(algorithm, output_bits) 236 | self.logger.info("Gallery image hash tables created.") 237 | 238 | def _create_gallery_image_hash_view(self) -> None: 239 | with self.SQLConnector() as connector: 240 | table_name = "files_hashs" 241 | match self.config.database.sql_type.lower(): 242 | case "mysql": 243 | query = f""" 244 | CREATE VIEW IF NOT EXISTS {table_name} AS 245 | SELECT files_names.db_file_id AS db_file_id, 246 | galleries_titles.title AS gallery_title, 247 | galleries_names.full_name AS gallery_name, 248 | files_names.full_name AS file_name, 249 | files_hashs_sha512_dbids.hash_value AS sha512 250 | FROM files_names 251 | LEFT JOIN files_dbids USING (db_file_id) 252 | LEFT JOIN galleries_titles USING (db_gallery_id) 253 | LEFT JOIN galleries_names USING (db_gallery_id) 254 | LEFT JOIN files_hashs_sha512 USING (db_file_id) 255 | LEFT JOIN files_hashs_sha512_dbids USING (db_hash_id) 256 | """ 257 | connector.execute(query) 258 | self.logger.info(f"{table_name} view created.") 259 | 260 | def _check_files_dbids_by_db_gallery_id(self, db_gallery_id: int) -> tuple | None: 261 | with self.SQLConnector() as connector: 262 | table_name = f"files_dbids" 263 | match self.config.database.sql_type.lower(): 264 | case "mysql": 265 | select_query = f""" 266 | SELECT COUNT(*) 267 | FROM {table_name} 268 | WHERE db_gallery_id = %s 269 | """ 270 | query_result = connector.fetch_one(select_query, (db_gallery_id,)) 271 | return query_result[0] != 0 272 | 273 | def _insert_gallery_file_hash_for_db_gallery_id( 274 | self, fileinformations: list[FileInformation] 275 | ) -> None: 276 | for finfo in fileinformations: 277 | finfo.sethash() 278 | 279 | for algorithm in HASH_ALGORITHMS: 280 | toinsert: set[bytes] = set() 281 | for finfo in fileinformations: 282 | filehash: bytes = getattr(finfo, algorithm) 283 | if not self._check_db_hash_id_by_hash_value(filehash, algorithm): 284 | toinsert.add(filehash) 285 | self.insert_db_hash_id_by_hash_values(toinsert, algorithm) 286 | 287 | for finfo in fileinformations: 288 | for algorithm in HASH_ALGORITHMS: 289 | finfo.setdb_hash_id( 290 | algorithm, 291 | self.get_db_hash_id_by_hash_value( 292 | getattr(finfo, algorithm), algorithm 293 | ), 294 | ) 295 | self.insert_hash_value_by_db_hash_ids(fileinformations) 296 | 297 | def _insert_gallery_file_hash( 298 | self, db_file_id: int, absolute_file_path: str 299 | ) -> None: 300 | 301 | for algorithm in HASH_ALGORITHMS: 302 | is_insert = False 303 | current_hash_value = hash_function_by_file(absolute_file_path, algorithm) 304 | if self._check_hash_value_by_file_id(db_file_id, algorithm): 305 | original_hash_value = self.get_hash_value_by_file_id( 306 | db_file_id, algorithm 307 | ) 308 | if original_hash_value != current_hash_value: 309 | if self._check_db_hash_id_by_hash_value( 310 | current_hash_value, algorithm 311 | ): 312 | db_hash_id = self.get_db_hash_id_by_hash_value( 313 | current_hash_value, algorithm 314 | ) 315 | self._update_gallery_file_hash_by_db_hash_id( 316 | db_file_id, db_hash_id, algorithm 317 | ) 318 | else: 319 | is_insert |= True 320 | else: 321 | is_insert |= True 322 | 323 | if is_insert: 324 | if self._check_db_hash_id_by_hash_value(current_hash_value, algorithm): 325 | db_hash_id = self.get_db_hash_id_by_hash_value( 326 | current_hash_value, algorithm 327 | ) 328 | else: 329 | with self.SQLConnector() as connector: 330 | table_name = f"files_hashs_{algorithm.lower()}_dbids" 331 | match self.config.database.sql_type.lower(): 332 | case "mysql": 333 | insert_hash_value_query = f""" 334 | INSERT INTO {table_name} (hash_value) VALUES (UNHEX(%s)) 335 | """ 336 | try: 337 | connector.execute( 338 | insert_hash_value_query, (current_hash_value.hex(),) 339 | ) 340 | except DatabaseDuplicateKeyError: 341 | self.logger.warning( 342 | f"Hash value {current_hash_value!r} already exists in the database." 343 | ) 344 | except Exception as e: 345 | raise e 346 | db_hash_id = self.get_db_hash_id_by_hash_value( 347 | current_hash_value, algorithm 348 | ) 349 | 350 | with self.SQLConnector() as connector: 351 | table_name = f"files_hashs_{algorithm.lower()}" 352 | match self.config.database.sql_type.lower(): 353 | case "mysql": 354 | insert_db_hash_id_query = f""" 355 | INSERT INTO {table_name} (db_file_id, db_hash_id) VALUES (%s, %s) 356 | """ 357 | connector.execute(insert_db_hash_id_query, (db_file_id, db_hash_id)) 358 | 359 | def __get_db_hash_id_by_hash_value( 360 | self, hash_value: bytes, algorithm: str 361 | ) -> tuple: 362 | with self.SQLConnector() as connector: 363 | table_name = f"files_hashs_{algorithm.lower()}_dbids" 364 | match self.config.database.sql_type.lower(): 365 | case "mysql": 366 | select_query = f""" 367 | SELECT db_hash_id 368 | FROM {table_name} 369 | WHERE hash_value = UNHEX(%s) 370 | """ 371 | query_result = connector.fetch_one(select_query, (hash_value.hex(),)) 372 | return query_result 373 | 374 | def _check_db_hash_id_by_hash_value( 375 | self, hash_value: bytes, algorithm: str 376 | ) -> bool: 377 | query_result = self.__get_db_hash_id_by_hash_value(hash_value, algorithm) 378 | return len(query_result) != 0 379 | 380 | def get_db_hash_id_by_hash_value(self, hash_value: bytes, algorithm: str) -> int: 381 | query_result = self.__get_db_hash_id_by_hash_value(hash_value, algorithm) 382 | if query_result: 383 | db_hash_id = query_result[0] 384 | else: 385 | msg = f"Image hash for image ID 0x{hash_value.hex()} does not exist." 386 | raise DatabaseKeyError(msg) 387 | return db_hash_id 388 | 389 | def insert_hash_value_by_db_hash_ids( 390 | self, fileinformations: list[FileInformation] 391 | ) -> None: 392 | for algorithm in HASH_ALGORITHMS: 393 | with self.SQLConnector() as connector: 394 | table_name = f"files_hashs_{algorithm.lower()}" 395 | match self.config.database.sql_type.lower(): 396 | case "mysql": 397 | insert_query_header = f""" 398 | INSERT INTO {table_name} (db_file_id, db_hash_id) 399 | """ 400 | insert_query_values = " ".join( 401 | ["VALUES", ", ".join(["(%s, %s)"] * len(fileinformations))] 402 | ) 403 | insert_query = f"{insert_query_header} {insert_query_values}" 404 | parameters: list[int] = list() 405 | for fileinformation in fileinformations: 406 | parameters += [ 407 | fileinformation.db_file_id, 408 | fileinformation.db_hash_id[algorithm], 409 | ] 410 | connector.execute(insert_query, tuple(parameters)) 411 | 412 | def insert_db_hash_id_by_hash_value( 413 | self, hash_value: bytes, algorithm: str 414 | ) -> None: 415 | with self.SQLConnector() as connector: 416 | table_name = f"files_hashs_{algorithm.lower()}_dbids" 417 | match self.config.database.sql_type.lower(): 418 | case "mysql": 419 | insert_query = f""" 420 | INSERT INTO {table_name} (hash_value) VALUES (UNHEX(%s)) 421 | """ 422 | connector.execute(insert_query, (hash_value.hex(),)) 423 | 424 | def insert_db_hash_id_by_hash_values( 425 | self, hash_values: set[bytes], algorithm: str 426 | ) -> None: 427 | if not hash_values: 428 | return 429 | 430 | toinsert: list[str] = list() 431 | for hash_value in hash_values: 432 | if (hash_value not in toinsert) and ( 433 | not self._check_db_hash_id_by_hash_value(hash_value, algorithm) 434 | ): 435 | toinsert.append(hash_value.hex()) 436 | if not toinsert: 437 | return 438 | 439 | isretry = False 440 | with self.SQLConnector() as connector: 441 | table_name = f"files_hashs_{algorithm.lower()}_dbids" 442 | match self.config.database.sql_type.lower(): 443 | case "mysql": 444 | insert_query_header = f""" 445 | INSERT INTO {table_name} (hash_value) 446 | """ 447 | insert_query_values = " ".join( 448 | ["VALUES", ", ".join(["(UNHEX(%s))"] * len(toinsert))] 449 | ) 450 | insert_query = f"{insert_query_header} {insert_query_values}" 451 | try: 452 | connector.execute(insert_query, tuple(toinsert)) 453 | except DatabaseDuplicateKeyError: 454 | isretry = True 455 | 456 | if isretry: 457 | for hash_hex in toinsert: 458 | if not self._check_db_hash_id_by_hash_value(hash_value, algorithm): 459 | self.logger.warning( 460 | f"Retrying to insert hash value 0x{hash_hex} into the database." 461 | ) 462 | self.insert_db_hash_id_by_hash_values( 463 | {bytes.fromhex(hash_hex)}, algorithm 464 | ) 465 | 466 | def get_hash_value_by_db_hash_id(self, db_hash_id: int, algorithm: str) -> bytes: 467 | with self.SQLConnector() as connector: 468 | table_name = f"files_hashs_{algorithm.lower()}_dbids" 469 | match self.config.database.sql_type.lower(): 470 | case "mysql": 471 | select_query = f""" 472 | SELECT hash_value 473 | FROM {table_name} 474 | WHERE db_hash_id = %s 475 | """ 476 | query_result = connector.fetch_one(select_query, (db_hash_id,)) 477 | if query_result: 478 | hash_value = query_result[0] 479 | else: 480 | msg = f"Image hash for image ID {db_hash_id} does not exist." 481 | raise DatabaseKeyError(msg) 482 | return hash_value 483 | 484 | def __get_hash_value_by_file_id(self, db_file_id: int, algorithm: str) -> tuple: 485 | with self.SQLConnector() as connector: 486 | table_name = f"files_hashs_{algorithm.lower()}" 487 | match self.config.database.sql_type.lower(): 488 | case "mysql": 489 | select_query = f""" 490 | SELECT db_hash_id 491 | FROM {table_name} 492 | WHERE db_file_id = %s 493 | """ 494 | query_result = connector.fetch_one(select_query, (db_file_id,)) 495 | return query_result 496 | 497 | def _check_hash_value_by_file_id(self, db_file_id: int, algorithm: str) -> bool: 498 | query_result = self.__get_hash_value_by_file_id(db_file_id, algorithm) 499 | return len(query_result) != 0 500 | 501 | def get_hash_value_by_file_id(self, db_file_id: int, algorithm: str) -> bytes: 502 | query_result = self.__get_hash_value_by_file_id(db_file_id, algorithm) 503 | if query_result: 504 | db_hash_id = query_result[0] 505 | else: 506 | msg = f"Image hash for image ID {db_file_id} does not exist." 507 | raise DatabaseKeyError(msg) 508 | return self.get_hash_value_by_db_hash_id(db_hash_id, algorithm) 509 | 510 | def _update_gallery_file_hash_by_db_hash_id( 511 | self, db_file_id: int, db_hash_id: int, algorithm: str 512 | ) -> None: 513 | with self.SQLConnector() as connector: 514 | table_name = f"files_hashs_{algorithm.lower()}" 515 | match self.config.database.sql_type.lower(): 516 | case "mysql": 517 | update_query = f""" 518 | UPDATE {table_name} SET db_hash_id = %s WHERE db_file_id = %s 519 | """ 520 | connector.execute(update_query, (db_hash_id, db_file_id)) 521 | -------------------------------------------------------------------------------- /src/h2hdb/table_gids.py: -------------------------------------------------------------------------------- 1 | __all__ = ["H2HDBGalleriesIDs", "H2HDBGalleriesGIDs"] 2 | 3 | from abc import ABCMeta 4 | 5 | from .h2hdb_spec import H2HDBAbstract 6 | from .sql_connector import DatabaseKeyError 7 | 8 | 9 | class H2HDBGalleriesIDs(H2HDBAbstract, metaclass=ABCMeta): 10 | def _create_galleries_names_table(self) -> None: 11 | with self.SQLConnector() as connector: 12 | table_name = "galleries_dbids" 13 | match self.config.database.sql_type.lower(): 14 | case "mysql": 15 | column_name = "name" 16 | column_name_parts, create_gallery_name_parts_sql = ( 17 | self.mysql_split_gallery_name_based_on_limit(column_name) 18 | ) 19 | id_query = f""" 20 | CREATE TABLE IF NOT EXISTS {table_name} ( 21 | PRIMARY KEY (db_gallery_id), 22 | db_gallery_id INT UNSIGNED AUTO_INCREMENT, 23 | {create_gallery_name_parts_sql}, 24 | UNIQUE real_primay_key ({", ".join(column_name_parts)}) 25 | ) 26 | """ 27 | connector.execute(id_query) 28 | 29 | table_name = "galleries_names" 30 | match self.config.database.sql_type.lower(): 31 | case "mysql": 32 | name_query = f""" 33 | CREATE TABLE IF NOT EXISTS {table_name} ( 34 | PRIMARY KEY (db_gallery_id), 35 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 36 | ON UPDATE CASCADE 37 | ON DELETE CASCADE, 38 | db_gallery_id INT UNSIGNED NOT NULL, 39 | full_name TEXT NOT NULL, 40 | FULLTEXT (full_name) 41 | ) 42 | """ 43 | connector.execute(name_query) 44 | self.logger.info(f"{table_name} table created.") 45 | 46 | def _insert_gallery_name(self, gallery_name: str) -> None: 47 | with self.SQLConnector() as connector: 48 | table_name = "galleries_dbids" 49 | gallery_name_parts = self._split_gallery_name(gallery_name) 50 | 51 | match self.config.database.sql_type.lower(): 52 | case "mysql": 53 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit( 54 | "name" 55 | ) 56 | insert_query = f""" 57 | INSERT INTO {table_name} 58 | ({", ".join(column_name_parts)}) 59 | VALUES ({", ".join(["%s" for _ in column_name_parts])}) 60 | """ 61 | connector.execute(insert_query, tuple(gallery_name_parts)) 62 | 63 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 64 | 65 | table_name = "galleries_names" 66 | gallery_name_parts = self._split_gallery_name(gallery_name) 67 | 68 | match self.config.database.sql_type.lower(): 69 | case "mysql": 70 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit( 71 | "name" 72 | ) 73 | insert_query = f""" 74 | INSERT INTO {table_name} 75 | (db_gallery_id, full_name) 76 | VALUES (%s, %s) 77 | """ 78 | connector.execute(insert_query, (db_gallery_id, gallery_name)) 79 | 80 | def __get_db_gallery_id_by_gallery_name(self, gallery_name: str) -> tuple: 81 | with self.SQLConnector() as connector: 82 | table_name = "galleries_dbids" 83 | gallery_name_parts = self._split_gallery_name(gallery_name) 84 | 85 | match self.config.database.sql_type.lower(): 86 | case "mysql": 87 | column_name_parts, _ = self.mysql_split_gallery_name_based_on_limit( 88 | "name" 89 | ) 90 | select_query = f""" 91 | SELECT db_gallery_id 92 | FROM {table_name} 93 | WHERE {" AND ".join([f"{part} = %s" for part in column_name_parts])} 94 | """ 95 | 96 | query_result = connector.fetch_one(select_query, tuple(gallery_name_parts)) 97 | return query_result 98 | 99 | def _check_galleries_dbids_by_gallery_name(self, gallery_name: str) -> bool: 100 | query_result = self.__get_db_gallery_id_by_gallery_name(gallery_name) 101 | return len(query_result) != 0 102 | 103 | def _get_db_gallery_id_by_gallery_name(self, gallery_name: str) -> int: 104 | query_result = self.__get_db_gallery_id_by_gallery_name(gallery_name) 105 | if query_result: 106 | db_gallery_id = query_result[0] 107 | else: 108 | self.logger.debug(f"Gallery name '{gallery_name}' does not exist.") 109 | raise DatabaseKeyError(f"Gallery name '{gallery_name}' does not exist.") 110 | return db_gallery_id 111 | 112 | def _get_db_gallery_id_by_gid(self, gid: int) -> int: 113 | with self.SQLConnector() as connector: 114 | table_name = "galleries_gids" 115 | match self.config.database.sql_type.lower(): 116 | case "mysql": 117 | select_query = f""" 118 | SELECT db_gallery_id 119 | FROM {table_name} 120 | WHERE gid = %s 121 | """ 122 | query_result = connector.fetch_one(select_query, (gid,)) 123 | 124 | if query_result: 125 | db_gallery_id = query_result[0] 126 | else: 127 | msg = f"Gallery name ID for GID {gid} does not exist." 128 | self.logger.error(msg) 129 | raise DatabaseKeyError(msg) 130 | return db_gallery_id 131 | 132 | 133 | class H2HDBGalleriesGIDs(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 134 | """ 135 | A class that handles the GIDs for galleries in the comic database. 136 | 137 | This class inherits from `H2HDBAbstract` and is used to manage the GIDs for galleries 138 | 139 | Attributes: 140 | sql_type (str): The type of SQL database being used. 141 | sql_connection_params (SQLConnectorParams): The parameters for establishing the SQL connection. 142 | connector (SQLConnector): The SQL connector object. 143 | 144 | Methods: 145 | _create_galleries_gids_table: Creates the galleries_gids table. 146 | _insert_gallery_gid: Inserts the GID for the gallery name ID into the galleries_gids table. 147 | get_gid_by_gallery_name: Selects the GID for the gallery name from the database. 148 | """ 149 | 150 | def _create_galleries_gids_table(self) -> None: 151 | with self.SQLConnector() as connector: 152 | table_name = "galleries_gids" 153 | match self.config.database.sql_type.lower(): 154 | case "mysql": 155 | query = f""" 156 | CREATE TABLE IF NOT EXISTS {table_name} ( 157 | PRIMARY KEY (db_gallery_id), 158 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 159 | ON UPDATE CASCADE 160 | ON DELETE CASCADE, 161 | db_gallery_id INT UNSIGNED NOT NULL, 162 | gid INT UNSIGNED NOT NULL, 163 | INDEX (gid) 164 | ) 165 | """ 166 | connector.execute(query) 167 | self.logger.info(f"{table_name} table created.") 168 | 169 | def _insert_gallery_gid(self, db_gallery_id: int, gid: int) -> None: 170 | with self.SQLConnector() as connector: 171 | table_name = "galleries_gids" 172 | match self.config.database.sql_type.lower(): 173 | case "mysql": 174 | insert_query = f""" 175 | INSERT INTO {table_name} (db_gallery_id, gid) VALUES (%s, %s) 176 | """ 177 | connector.execute(insert_query, (db_gallery_id, gid)) 178 | 179 | def _get_gid_by_db_gallery_id(self, db_gallery_id: int) -> int: 180 | with self.SQLConnector() as connector: 181 | table_name = "galleries_gids" 182 | match self.config.database.sql_type.lower(): 183 | case "mysql": 184 | select_query = f""" 185 | SELECT gid 186 | FROM {table_name} 187 | WHERE db_gallery_id = %s 188 | """ 189 | query_result = connector.fetch_one(select_query, (db_gallery_id,)) 190 | 191 | if query_result: 192 | gid = query_result[0] 193 | else: 194 | msg = f"GID for gallery name ID {db_gallery_id} does not exist." 195 | self.logger.error(msg) 196 | raise DatabaseKeyError(msg) 197 | return gid 198 | 199 | def get_gid_by_gallery_name(self, gallery_name: str) -> int: 200 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 201 | return self._get_gid_by_db_gallery_id(db_gallery_id) 202 | 203 | def get_gids(self) -> list[int]: 204 | with self.SQLConnector() as connector: 205 | table_name = "galleries_gids" 206 | match self.config.database.sql_type.lower(): 207 | case "mysql": 208 | select_query = f""" 209 | SELECT gid 210 | FROM {table_name} 211 | """ 212 | query_result = connector.fetch_all(select_query) 213 | gids = [gid for gid, in query_result] 214 | return gids 215 | 216 | def check_gid_by_gid(self, gid: int) -> bool: 217 | with self.SQLConnector() as connector: 218 | table_name = "galleries_gids" 219 | match self.config.database.sql_type.lower(): 220 | case "mysql": 221 | select_query = f""" 222 | SELECT gid 223 | FROM {table_name} 224 | WHERE gid = %s 225 | """ 226 | query_result = connector.fetch_one(select_query, (gid,)) 227 | return len(query_result) != 0 228 | -------------------------------------------------------------------------------- /src/h2hdb/table_removed_gids.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | from .table_gids import H2HDBGalleriesIDs 4 | from .h2hdb_spec import H2HDBAbstract 5 | from .sql_connector import DatabaseKeyError 6 | 7 | 8 | class H2HDBRemovedGalleries(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 9 | def _create_removed_galleries_gids_table(self) -> None: 10 | with self.SQLConnector() as connector: 11 | table_name = "removed_galleries_gids" 12 | match self.config.database.sql_type.lower(): 13 | case "mysql": 14 | query = f""" 15 | CREATE TABLE IF NOT EXISTS {table_name} ( 16 | PRIMARY KEY (gid), 17 | gid INT UNSIGNED NOT NULL 18 | ) 19 | """ 20 | connector.execute(query) 21 | self.logger.info(f"{table_name} table created.") 22 | 23 | def insert_removed_gallery_gid(self, gid: int) -> None: 24 | with self.SQLConnector() as connector: 25 | table_name = "removed_galleries_gids" 26 | match self.config.database.sql_type.lower(): 27 | case "mysql": 28 | insert_query = f""" 29 | INSERT INTO {table_name} (gid) VALUES (%s) 30 | """ 31 | if self._check_removed_gallery_gid(gid): 32 | self.logger.warning(f"Removed gallery GID {gid} already exists.") 33 | else: 34 | connector.execute(insert_query, (gid,)) 35 | 36 | def __get_removed_gallery_gid(self, gid: int) -> tuple: 37 | with self.SQLConnector() as connector: 38 | table_name = "removed_galleries_gids" 39 | match self.config.database.sql_type.lower(): 40 | case "mysql": 41 | select_query = f""" 42 | SELECT gid 43 | FROM {table_name} 44 | WHERE gid = %s 45 | """ 46 | query_result = connector.fetch_one(select_query, (gid,)) 47 | return query_result 48 | 49 | def _check_removed_gallery_gid(self, gid: int) -> bool: 50 | query_result = self.__get_removed_gallery_gid(gid) 51 | return len(query_result) != 0 52 | 53 | def select_removed_gallery_gid(self, gid: int) -> int: 54 | query_result = self.__get_removed_gallery_gid(gid) 55 | if query_result: 56 | gid = query_result[0] 57 | self.logger.warning(f"Removed gallery GID {gid} exists.") 58 | else: 59 | msg = f"Removed gallery GID {gid} does not exist." 60 | self.logger.error(msg) 61 | raise DatabaseKeyError(msg) 62 | return gid 63 | -------------------------------------------------------------------------------- /src/h2hdb/table_tags.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from typing import Callable 3 | 4 | from .table_gids import H2HDBGalleriesIDs 5 | from .h2hdb_spec import H2HDBAbstract 6 | from .information import TagInformation 7 | from .sql_connector import ( 8 | DatabaseKeyError, 9 | DatabaseDuplicateKeyError, 10 | ) 11 | 12 | 13 | class H2HDBGalleriesTags(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 14 | def _create_galleries_tags_table(self) -> None: 15 | with self.SQLConnector() as connector: 16 | tag_name_table_name = f"galleries_tags_names" 17 | match self.config.database.sql_type.lower(): 18 | case "mysql": 19 | query = f""" 20 | CREATE TABLE IF NOT EXISTS {tag_name_table_name} ( 21 | PRIMARY KEY (tag_name), 22 | tag_name CHAR({self.innodb_index_prefix_limit}) NOT NULL 23 | ) 24 | """ 25 | connector.execute(query) 26 | self.logger.info(f"{tag_name_table_name} table created.") 27 | 28 | tag_value_table_name = f"galleries_tags_values" 29 | match self.config.database.sql_type.lower(): 30 | case "mysql": 31 | query = f""" 32 | CREATE TABLE IF NOT EXISTS {tag_value_table_name} ( 33 | PRIMARY KEY (tag_value), 34 | tag_value CHAR({self.innodb_index_prefix_limit}) NOT NULL 35 | ) 36 | """ 37 | connector.execute(query) 38 | self.logger.info(f"{tag_value_table_name} table created.") 39 | 40 | tag_pairs_table_name = f"galleries_tag_pairs_dbids" 41 | match self.config.database.sql_type.lower(): 42 | case "mysql": 43 | query = f""" 44 | CREATE TABLE IF NOT EXISTS {tag_pairs_table_name} ( 45 | PRIMARY KEY (db_tag_pair_id), 46 | db_tag_pair_id INT UNSIGNED AUTO_INCREMENT, 47 | tag_name CHAR({self.innodb_index_prefix_limit}) NOT NULL, 48 | FOREIGN KEY (tag_name) REFERENCES {tag_name_table_name}(tag_name) 49 | ON UPDATE CASCADE 50 | ON DELETE CASCADE, 51 | tag_value CHAR({self.innodb_index_prefix_limit}) NOT NULL, 52 | FOREIGN KEY (tag_value) REFERENCES {tag_value_table_name}(tag_value) 53 | ON UPDATE CASCADE 54 | ON DELETE CASCADE, 55 | UNIQUE (tag_name, tag_value), 56 | INDEX (tag_value) 57 | ) 58 | """ 59 | connector.execute(query) 60 | self.logger.info(f"{tag_pairs_table_name} table created.") 61 | 62 | table_name = f"galleries_tags" 63 | match self.config.database.sql_type.lower(): 64 | case "mysql": 65 | query = f""" 66 | CREATE TABLE IF NOT EXISTS {table_name} ( 67 | PRIMARY KEY (db_gallery_id, db_tag_pair_id), 68 | db_gallery_id INT UNSIGNED NOT NULL, 69 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 70 | ON UPDATE CASCADE 71 | ON DELETE CASCADE, 72 | db_tag_pair_id INT UNSIGNED NOT NULL, 73 | FOREIGN KEY (db_tag_pair_id) REFERENCES {tag_pairs_table_name}(db_tag_pair_id) 74 | ON UPDATE CASCADE 75 | ON DELETE CASCADE, 76 | UNIQUE (db_tag_pair_id, db_gallery_id) 77 | ) 78 | """ 79 | connector.execute(query) 80 | self.logger.info(f"{table_name} table created.") 81 | 82 | def __get_db_tag_pair_id(self, tag_name: str, tag_value: str) -> tuple: 83 | with self.SQLConnector() as connector: 84 | match self.config.database.sql_type.lower(): 85 | case "mysql": 86 | select_query = f""" 87 | SELECT db_tag_pair_id 88 | FROM galleries_tag_pairs_dbids 89 | WHERE tag_name = %s AND tag_value = %s 90 | """ 91 | query_result = connector.fetch_one(select_query, (tag_name, tag_value)) 92 | return query_result 93 | 94 | def _check_db_tag_pair_id(self, tag_name: str, tag_value: str) -> bool: 95 | query_result = self.__get_db_tag_pair_id(tag_name, tag_value) 96 | return len(query_result) != 0 97 | 98 | def _get_db_tag_pair_id(self, tag_name: str, tag_value: str) -> int: 99 | query_result = self.__get_db_tag_pair_id(tag_name, tag_value) 100 | if query_result: 101 | db_tag_id = query_result[0] 102 | else: 103 | self.logger.debug(f"Tag '{tag_value}' does not exist.") 104 | raise DatabaseKeyError(f"Tag '{tag_value}' does not exist.") 105 | return db_tag_id 106 | 107 | def _check_gallery_tag_name(self, tag_name: str) -> bool: 108 | with self.SQLConnector() as connector: 109 | table_name = f"galleries_tags_names" 110 | match self.config.database.sql_type.lower(): 111 | case "mysql": 112 | select_query = f""" 113 | SELECT tag_name 114 | FROM {table_name} 115 | WHERE tag_name = %s 116 | """ 117 | query_result = connector.fetch_one(select_query, (tag_name,)) 118 | return len(query_result) != 0 119 | 120 | def _check_gallery_tag_value(self, tag_value: str) -> bool: 121 | with self.SQLConnector() as connector: 122 | table_name = f"galleries_tags_values" 123 | match self.config.database.sql_type.lower(): 124 | case "mysql": 125 | select_query = f""" 126 | SELECT tag_value 127 | FROM {table_name} 128 | WHERE tag_value = %s 129 | """ 130 | query_result = connector.fetch_one(select_query, (tag_value,)) 131 | return len(query_result) != 0 132 | 133 | def __insert_tag_names_or_tag_values( 134 | self, n_or_v: str, tag_nvs: list[str], check_fun: Callable[[str], bool] 135 | ) -> None: 136 | toinsert_tag_nvs = list[str]() 137 | for tag_nv in tag_nvs: 138 | if not check_fun(tag_nv): 139 | toinsert_tag_nvs.append(tag_nv) 140 | 141 | if not toinsert_tag_nvs: 142 | return 143 | 144 | isretry = False 145 | with self.SQLConnector() as connector: 146 | match n_or_v.lower(): 147 | case "name": 148 | table_name = "galleries_tags_names" 149 | column_name = "tag_name" 150 | case "value": 151 | table_name = "galleries_tags_values" 152 | column_name = "tag_value" 153 | 154 | match self.config.database.sql_type.lower(): 155 | case "mysql": 156 | insert_query_header = f""" 157 | INSERT INTO {table_name} ({column_name}) 158 | """ 159 | insert_query_values = " ".join( 160 | ["VALUES", ", ".join(["(%s)" for _ in toinsert_tag_nvs])] 161 | ) 162 | insert_query = f"{insert_query_header} {insert_query_values}" 163 | try: 164 | connector.execute(insert_query, tuple(toinsert_tag_nvs)) 165 | except DatabaseDuplicateKeyError: 166 | isretry = True 167 | except Exception as e: 168 | raise e 169 | 170 | if isretry: 171 | self.__insert_tag_names_or_tag_values(n_or_v, toinsert_tag_nvs, check_fun) 172 | 173 | def _insert_tag_names(self, tag_names: list[str]) -> None: 174 | self.__insert_tag_names_or_tag_values( 175 | "name", tag_names, self._check_gallery_tag_name 176 | ) 177 | 178 | def _insert_tag_values(self, tag_values: list[str]) -> None: 179 | self.__insert_tag_names_or_tag_values( 180 | "value", tag_values, self._check_gallery_tag_value 181 | ) 182 | 183 | def _insert_tag_pairs_dbids(self, tags: list[TagInformation]) -> None: 184 | toinsert_db_tag_pair_id = list[TagInformation]() 185 | for tag in tags: 186 | if not self._check_db_tag_pair_id(tag.tag_name, tag.tag_value): 187 | toinsert_db_tag_pair_id.append(tag) 188 | 189 | if not toinsert_db_tag_pair_id: 190 | return 191 | 192 | isretry = False 193 | with self.SQLConnector() as connector: 194 | tag_pairs_table_name = f"galleries_tag_pairs_dbids" 195 | match self.config.database.sql_type.lower(): 196 | case "mysql": 197 | insert_query_header = f""" 198 | INSERT INTO {tag_pairs_table_name} (tag_name, tag_value) 199 | """ 200 | insert_query_values = " ".join( 201 | [ 202 | "VALUES", 203 | ", ".join(["(%s, %s)" for _ in toinsert_db_tag_pair_id]), 204 | ] 205 | ) 206 | insert_query = f"{insert_query_header} {insert_query_values}" 207 | parameter = list[str]() 208 | for tag in toinsert_db_tag_pair_id: 209 | parameter.extend([tag.tag_name, tag.tag_value]) 210 | try: 211 | connector.execute(insert_query, tuple(parameter)) 212 | except DatabaseDuplicateKeyError: 213 | isretry = True 214 | except Exception as e: 215 | raise e 216 | 217 | if isretry: 218 | self._insert_tag_pairs_dbids(toinsert_db_tag_pair_id) 219 | 220 | def _insert_gallery_tags( 221 | self, db_gallery_id: int, tags: list[TagInformation] 222 | ) -> None: 223 | toinsert_db_tag_pair_id = list[TagInformation]() 224 | for tag in tags: 225 | if not self._check_db_tag_pair_id(tag.tag_name, tag.tag_value): 226 | toinsert_db_tag_pair_id.append(tag) 227 | 228 | if not toinsert_db_tag_pair_id: 229 | return 230 | 231 | self._insert_tag_names(list({tag.tag_name for tag in tags})) 232 | self._insert_tag_values(list({tag.tag_value for tag in tags})) 233 | 234 | self._insert_tag_pairs_dbids(toinsert_db_tag_pair_id) 235 | 236 | db_tag_pair_ids = list[int]() 237 | for tag in tags: 238 | db_tag_pair_ids.append( 239 | self._get_db_tag_pair_id(tag.tag_name, tag.tag_value) 240 | ) 241 | 242 | with self.SQLConnector() as connector: 243 | table_name = f"galleries_tags" 244 | match self.config.database.sql_type.lower(): 245 | case "mysql": 246 | insert_query_header = f""" 247 | INSERT INTO {table_name} (db_gallery_id, db_tag_pair_id) 248 | """ 249 | insert_query_values = " ".join( 250 | ["VALUES", ", ".join(["(%s, %s)" for _ in db_tag_pair_ids])] 251 | ) 252 | insert_query = f"{insert_query_header} {insert_query_values}" 253 | parameter = list[int]() 254 | for db_tag_pair_id in db_tag_pair_ids: 255 | parameter.extend([db_gallery_id, db_tag_pair_id]) 256 | connector.execute(insert_query, tuple(parameter)) 257 | 258 | def _select_gallery_tag(self, db_gallery_id: int, tag_name: str) -> str: 259 | with self.SQLConnector() as connector: 260 | table_name = f"galleries_tags_{tag_name}" 261 | match self.config.database.sql_type.lower(): 262 | case "mysql": 263 | select_query = f""" 264 | SELECT tag 265 | FROM {table_name} 266 | WHERE db_gallery_id = %s 267 | """ 268 | query_result = connector.fetch_one(select_query, (db_gallery_id,)) 269 | if query_result: 270 | tag = query_result[0] 271 | else: 272 | msg = f"Tag '{tag_name}' does not exist." 273 | self.logger.error(msg) 274 | raise DatabaseKeyError(msg) 275 | return tag 276 | 277 | def get_tag_value_by_gallery_name_and_tag_name( 278 | self, gallery_name: str, tag_name: str 279 | ) -> str: 280 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 281 | return self._select_gallery_tag(db_gallery_id, tag_name) 282 | 283 | def get_tag_pairs_by_gallery_name(self, gallery_name: str) -> list[tuple[str, str]]: 284 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 285 | db_tag_pair_ids = self._get_db_tag_pair_id_by_db_gallery_id(db_gallery_id) 286 | return [ 287 | self._get_tag_pairs_by_db_tag_pair_id(db_tag_pair_id) 288 | for db_tag_pair_id in db_tag_pair_ids 289 | ] 290 | 291 | def _get_db_tag_pair_id_by_db_gallery_id(self, db_gallery_id: int) -> list[int]: 292 | with self.SQLConnector() as connector: 293 | table_name = "galleries_tags" 294 | match self.config.database.sql_type.lower(): 295 | case "mysql": 296 | select_query = f""" 297 | SELECT db_tag_pair_id 298 | FROM {table_name} 299 | WHERE db_gallery_id = %s 300 | """ 301 | query_result = connector.fetch_all(select_query, (db_gallery_id,)) 302 | return [query[0] for query in query_result] 303 | 304 | def _get_tag_pairs_by_db_tag_pair_id(self, db_tag_pair_id: int) -> tuple[str, str]: 305 | with self.SQLConnector() as connector: 306 | table_name = "galleries_tag_pairs_dbids" 307 | match self.config.database.sql_type.lower(): 308 | case "mysql": 309 | select_query = f""" 310 | SELECT tag_name, tag_value 311 | FROM {table_name} 312 | WHERE db_tag_pair_id = %s 313 | """ 314 | query_result = connector.fetch_one(select_query, (db_tag_pair_id,)) 315 | if query_result: 316 | tag_name, tag_value = query_result 317 | else: 318 | msg = f"Tag pair ID {db_tag_pair_id} does not exist." 319 | self.logger.error(msg) 320 | raise DatabaseKeyError(msg) 321 | return tag_name, tag_value 322 | -------------------------------------------------------------------------------- /src/h2hdb/table_times.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | import datetime 3 | 4 | 5 | from .table_gids import H2HDBGalleriesIDs 6 | from .h2hdb_spec import H2HDBAbstract 7 | from .sql_connector import DatabaseKeyError 8 | 9 | 10 | class H2HDBTimes(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 11 | def _create_times_table(self, table_name: str) -> None: 12 | with self.SQLConnector() as connector: 13 | match self.config.database.sql_type.lower(): 14 | case "mysql": 15 | query = f""" 16 | CREATE TABLE IF NOT EXISTS {table_name} ( 17 | PRIMARY KEY (db_gallery_id), 18 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 19 | ON UPDATE CASCADE 20 | ON DELETE CASCADE, 21 | db_gallery_id INT UNSIGNED NOT NULL, 22 | time DATETIME NOT NULL, 23 | INDEX (time) 24 | ) 25 | """ 26 | connector.execute(query) 27 | self.logger.info(f"{table_name} table created.") 28 | 29 | def _insert_time(self, table_name: str, db_gallery_id: int, time: str) -> None: 30 | with self.SQLConnector() as connector: 31 | match self.config.database.sql_type.lower(): 32 | case "mysql": 33 | insert_query = f""" 34 | INSERT INTO {table_name} (db_gallery_id, time) VALUES (%s, %s) 35 | """ 36 | connector.execute(insert_query, (db_gallery_id, time)) 37 | 38 | def _select_time(self, table_name: str, db_gallery_id: int) -> datetime.datetime: 39 | with self.SQLConnector() as connector: 40 | match self.config.database.sql_type.lower(): 41 | case "mysql": 42 | select_query = f""" 43 | SELECT time 44 | FROM {table_name} 45 | WHERE db_gallery_id = %s 46 | """ 47 | query_result = connector.fetch_one(select_query, (db_gallery_id,)) 48 | if query_result: 49 | time = query_result[0] 50 | else: 51 | msg = f"Time for gallery name ID {db_gallery_id} does not exist in table '{table_name}'." 52 | self.logger.error(msg) 53 | raise DatabaseKeyError(msg) 54 | return time 55 | 56 | def _update_time(self, table_name: str, db_gallery_id: int, time: str) -> None: 57 | with self.SQLConnector() as connector: 58 | match self.config.database.sql_type.lower(): 59 | case "mysql": 60 | update_query = f""" 61 | UPDATE {table_name} SET time = %s WHERE db_gallery_id = %s 62 | """ 63 | connector.execute(update_query, (time, db_gallery_id)) 64 | 65 | def _create_galleries_download_times_table(self) -> None: 66 | self._create_times_table("galleries_download_times") 67 | 68 | def _create_galleries_redownload_times_table(self) -> None: 69 | self._create_times_table("galleries_redownload_times") 70 | 71 | def _insert_download_time(self, db_gallery_id: int, time: str) -> None: 72 | self._insert_time("galleries_download_times", db_gallery_id, time) 73 | self._insert_time("galleries_redownload_times", db_gallery_id, time) 74 | 75 | def update_redownload_time(self, db_gallery_id: int, time: str) -> None: 76 | self._update_time("galleries_redownload_times", db_gallery_id, time) 77 | 78 | def _reset_redownload_times(self) -> None: 79 | table_name = "galleries_redownload_times" 80 | with self.SQLConnector() as connector: 81 | match self.config.database.sql_type.lower(): 82 | case "mysql": 83 | update_query = f""" 84 | UPDATE {table_name} 85 | JOIN galleries_download_times 86 | ON {table_name}.db_gallery_id = galleries_download_times.db_gallery_id 87 | SET {table_name}.time = galleries_download_times.time 88 | WHERE {table_name}.time <> galleries_download_times.time; 89 | 90 | """ 91 | connector.execute(update_query) 92 | 93 | def _create_galleries_upload_times_table(self) -> None: 94 | self._create_times_table("galleries_upload_times") 95 | 96 | def _insert_upload_time(self, db_gallery_id: int, time: str) -> None: 97 | self._insert_time("galleries_upload_times", db_gallery_id, time) 98 | 99 | def get_upload_time_by_gallery_name(self, gallery_name: str) -> datetime.datetime: 100 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 101 | return self._select_time("galleries_upload_times", db_gallery_id) 102 | 103 | def _create_galleries_modified_times_table(self) -> None: 104 | self._create_times_table("galleries_modified_times") 105 | 106 | def _insert_modified_time(self, db_gallery_id: int, time: str) -> None: 107 | self._insert_time("galleries_modified_times", db_gallery_id, time) 108 | 109 | def _create_galleries_access_times_table(self) -> None: 110 | self._create_times_table("galleries_access_times") 111 | 112 | def _insert_access_time(self, db_gallery_id: int, time: str) -> None: 113 | self._insert_time("galleries_access_times", db_gallery_id, time) 114 | 115 | def update_access_time(self, gallery_name: str, time: str) -> None: 116 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 117 | self._update_time("galleries_access_times", db_gallery_id, time) 118 | -------------------------------------------------------------------------------- /src/h2hdb/table_titles.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | 4 | from .table_gids import H2HDBGalleriesIDs 5 | from .h2hdb_spec import H2HDBAbstract 6 | from .sql_connector import DatabaseKeyError 7 | 8 | 9 | class H2HDBGalleriesTitles(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 10 | def _create_galleries_titles_table(self) -> None: 11 | with self.SQLConnector() as connector: 12 | table_name = "galleries_titles" 13 | match self.config.database.sql_type.lower(): 14 | case "mysql": 15 | query = f""" 16 | CREATE TABLE IF NOT EXISTS {table_name} ( 17 | PRIMARY KEY (db_gallery_id), 18 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 19 | ON UPDATE CASCADE 20 | ON DELETE CASCADE, 21 | db_gallery_id INT UNSIGNED NOT NULL, 22 | title TEXT NOT NULL, 23 | FULLTEXT (title) 24 | ) 25 | """ 26 | connector.execute(query) 27 | self.logger.info(f"{table_name} table created.") 28 | 29 | def _insert_gallery_title(self, db_gallery_id: int, title: str) -> None: 30 | with self.SQLConnector() as connector: 31 | table_name = "galleries_titles" 32 | match self.config.database.sql_type.lower(): 33 | case "mysql": 34 | insert_query = f""" 35 | INSERT INTO {table_name} (db_gallery_id, title) VALUES (%s, %s) 36 | """ 37 | connector.execute(insert_query, (db_gallery_id, title)) 38 | 39 | def _get_title_by_db_gallery_id(self, db_gallery_id: int) -> str: 40 | with self.SQLConnector() as connector: 41 | table_name = "galleries_titles" 42 | match self.config.database.sql_type.lower(): 43 | case "mysql": 44 | select_query = f""" 45 | SELECT title 46 | FROM {table_name} 47 | WHERE db_gallery_id = %s 48 | """ 49 | query_result = connector.fetch_one(select_query, (db_gallery_id,)) 50 | if query_result: 51 | title = query_result[0] 52 | else: 53 | msg = f"Title for gallery name ID {db_gallery_id} does not exist." 54 | self.logger.error(msg) 55 | raise DatabaseKeyError(msg) 56 | return title 57 | 58 | def get_title_by_gallery_name(self, gallery_name: str) -> str: 59 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 60 | return self._get_title_by_db_gallery_id(db_gallery_id) 61 | -------------------------------------------------------------------------------- /src/h2hdb/table_uploadaccounts.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | 3 | from .table_gids import H2HDBGalleriesIDs 4 | from .h2hdb_spec import H2HDBAbstract 5 | from .sql_connector import DatabaseKeyError 6 | 7 | 8 | class H2HDBUploadAccounts(H2HDBGalleriesIDs, H2HDBAbstract, metaclass=ABCMeta): 9 | def _create_upload_account_table(self) -> None: 10 | with self.SQLConnector() as connector: 11 | table_name = "galleries_upload_accounts" 12 | match self.config.database.sql_type.lower(): 13 | case "mysql": 14 | query = f""" 15 | CREATE TABLE IF NOT EXISTS {table_name} ( 16 | PRIMARY KEY (db_gallery_id), 17 | FOREIGN KEY (db_gallery_id) REFERENCES galleries_dbids(db_gallery_id) 18 | ON UPDATE CASCADE 19 | ON DELETE CASCADE, 20 | db_gallery_id INT UNSIGNED NOT NULL, 21 | account CHAR({self.innodb_index_prefix_limit}) NOT NULL, 22 | INDEX (account) 23 | ) 24 | """ 25 | connector.execute(query) 26 | self.logger.info(f"{table_name} table created.") 27 | 28 | def _insert_gallery_upload_account(self, db_gallery_id: int, account: str) -> None: 29 | with self.SQLConnector() as connector: 30 | table_name = "galleries_upload_accounts" 31 | match self.config.database.sql_type.lower(): 32 | case "mysql": 33 | insert_query = f""" 34 | INSERT INTO {table_name} (db_gallery_id, account) VALUES (%s, %s) 35 | """ 36 | connector.execute(insert_query, (db_gallery_id, account)) 37 | 38 | def _select_gallery_upload_account(self, db_gallery_id: int) -> str: 39 | with self.SQLConnector() as connector: 40 | table_name = "galleries_upload_accounts" 41 | match self.config.database.sql_type.lower(): 42 | case "mysql": 43 | select_query = f""" 44 | SELECT account 45 | FROM {table_name} 46 | WHERE db_gallery_id = %s 47 | """ 48 | query_result = connector.fetch_one(select_query, (db_gallery_id,)) 49 | if query_result: 50 | account = query_result[0] 51 | else: 52 | msg = f"Upload account for gallery name ID {db_gallery_id} does not exist." 53 | self.logger.error(msg) 54 | raise DatabaseKeyError(msg) 55 | return account 56 | 57 | def get_upload_account_by_gallery_name(self, gallery_name: str) -> str: 58 | db_gallery_id = self._get_db_gallery_id_by_gallery_name(gallery_name) 59 | return self._select_gallery_upload_account(db_gallery_id) 60 | -------------------------------------------------------------------------------- /src/h2hdb/threading_tools.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from threading import Thread 3 | from abc import ABCMeta, abstractmethod 4 | from typing import Callable 5 | from multiprocessing import cpu_count 6 | from multiprocessing.pool import Pool 7 | from contextlib import ExitStack 8 | 9 | CPU_NUM = cpu_count() 10 | POOL_CPU_LIMIT = max(CPU_NUM - 2, 1) 11 | 12 | MAX_THREADS = 2 * CPU_NUM 13 | SQL_SEMAPHORE = threading.Semaphore(POOL_CPU_LIMIT) 14 | 15 | 16 | def wrap_thread_target_with_semaphores( 17 | target: Callable, 18 | semaphores: list[threading.Semaphore], 19 | ) -> Callable: 20 | def wrapper(*args, **kwargs) -> None: 21 | with ExitStack() as stack: 22 | for semaphore in semaphores: 23 | stack.enter_context(semaphore) 24 | target(*args, **kwargs) 25 | 26 | return wrapper 27 | 28 | 29 | class ThreadsList(list[Thread], metaclass=ABCMeta): 30 | @abstractmethod 31 | def get_semaphores(self) -> list[threading.Semaphore]: 32 | pass 33 | 34 | def append(self, target, args): 35 | thread = Thread( 36 | target=wrap_thread_target_with_semaphores(target, self.get_semaphores()), 37 | args=args, 38 | ) 39 | super().append(thread) 40 | 41 | def __enter__(self) -> "ThreadsList": 42 | return self 43 | 44 | def __exit__( 45 | self, 46 | exc_type: type[BaseException] | None, 47 | exc_value: BaseException | None, 48 | traceback: object | None, 49 | ) -> None: 50 | running_threads: list[Thread] = list() 51 | while self: 52 | self[0].start() 53 | running_threads.append(self.pop(0)) 54 | while len(running_threads) >= MAX_THREADS: 55 | for thread in running_threads: 56 | if not thread.is_alive(): 57 | thread.join() 58 | running_threads.remove(thread) 59 | for thread in running_threads: 60 | thread.join() 61 | 62 | 63 | class SQLThreadsList(ThreadsList): 64 | def get_semaphores(self) -> list[threading.Semaphore]: 65 | return [SQL_SEMAPHORE] 66 | 67 | 68 | def run_in_parallel(fun, args: list[tuple]) -> list: 69 | results = list() 70 | if args: 71 | with Pool(POOL_CPU_LIMIT) as pool: 72 | if len(args[0]) > 1: 73 | results += pool.starmap(fun, args) 74 | else: 75 | results += pool.map(fun, [arg[0] for arg in args]) 76 | return results 77 | -------------------------------------------------------------------------------- /src/h2hdb/view_ginfo.py: -------------------------------------------------------------------------------- 1 | from .table_uploadaccounts import H2HDBUploadAccounts 2 | from .table_titles import H2HDBGalleriesTitles 3 | from .table_times import H2HDBTimes 4 | from .table_gids import H2HDBGalleriesIDs, H2HDBGalleriesGIDs 5 | from .table_database_setting import H2HDBCheckDatabaseSettings 6 | 7 | 8 | class H2HDBGalleriesInfos( 9 | H2HDBGalleriesTitles, 10 | H2HDBUploadAccounts, 11 | H2HDBTimes, 12 | H2HDBGalleriesGIDs, 13 | H2HDBGalleriesIDs, 14 | H2HDBCheckDatabaseSettings, 15 | ): 16 | def _create_galleries_infos_view(self) -> None: 17 | with self.SQLConnector() as connector: 18 | match self.config.database.sql_type.lower(): 19 | case "mysql": 20 | query = """ 21 | CREATE VIEW IF NOT EXISTS galleries_infos AS 22 | SELECT galleries_names.db_gallery_id AS db_gallery_id, 23 | galleries_names.full_name AS name, 24 | galleries_titles.title AS title, 25 | galleries_gids.gid AS gid, 26 | galleries_upload_accounts.account AS upload_account, 27 | galleries_upload_times.time AS upload_time, 28 | galleries_download_times.time AS download_time, 29 | galleries_modified_times.time AS modified_time, 30 | galleries_access_times.time AS access_time 31 | FROM galleries_names 32 | LEFT JOIN galleries_titles USING (db_gallery_id) 33 | LEFT JOIN galleries_gids USING (db_gallery_id) 34 | LEFT JOIN galleries_upload_accounts USING (db_gallery_id) 35 | LEFT JOIN galleries_upload_times USING (db_gallery_id) 36 | LEFT JOIN galleries_download_times USING (db_gallery_id) 37 | LEFT JOIN galleries_modified_times USING (db_gallery_id) 38 | LEFT JOIN galleries_access_times USING (db_gallery_id) 39 | """ 40 | connector.execute(query) 41 | self.logger.info("galleries_infos view created.") 42 | 43 | def _create_duplicate_hash_in_gallery_view(self) -> None: 44 | with self.SQLConnector() as connector: 45 | match self.config.database.sql_type.lower(): 46 | case "mysql": 47 | query = """ 48 | CREATE VIEW IF NOT EXISTS duplicate_hash_in_gallery AS WITH Files AS ( 49 | SELECT files_dbids.db_gallery_id AS db_gallery_id, 50 | files_hashs_sha512.db_hash_id AS hash_value 51 | FROM files_dbids 52 | JOIN files_hashs_sha512 ON files_dbids.db_file_id = files_hashs_sha512.db_file_id 53 | ), 54 | DuplicateCount AS ( 55 | SELECT db_gallery_id, 56 | hash_value 57 | FROM Files 58 | GROUP BY db_gallery_id, 59 | hash_value 60 | HAVING COUNT(*) > 1 61 | ), 62 | TotalCount AS ( 63 | SELECT db_gallery_id, 64 | COUNT(*) AS files_count 65 | FROM files_dbids 66 | GROUP BY db_gallery_id 67 | ), 68 | DuplicateGroupCount AS ( 69 | SELECT db_gallery_id, 70 | COUNT(*) AS duplicate_groups 71 | FROM DuplicateCount 72 | GROUP BY db_gallery_id 73 | ) 74 | SELECT tc.db_gallery_id AS db_gallery_id, 75 | gg.gid AS gid, 76 | gn.full_name AS gallery_name 77 | FROM TotalCount AS tc 78 | JOIN DuplicateGroupCount AS dg ON tc.db_gallery_id = dg.db_gallery_id 79 | JOIN galleries_gids AS gg ON tc.db_gallery_id = gg.db_gallery_id 80 | JOIN galleries_names AS gn ON gg.db_gallery_id = gn.db_gallery_id 81 | WHERE CAST(dg.duplicate_groups AS FLOAT) / ( 82 | tc.files_count - CAST(dg.duplicate_groups AS FLOAT) 83 | ) > 0.9 84 | ORDER BY gid DESC; 85 | """ 86 | connector.execute(query) 87 | self.logger.info("duplicate_hash_in_gallery view created.") 88 | --------------------------------------------------------------------------------