├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── LICENSE.txt ├── README.md ├── src ├── archive.rs ├── doc.rs ├── lib.rs └── xmlutils.rs ├── test.epub └── tests ├── archive.rs ├── doc.rs ├── docs ├── Metamorphosis-jackson.epub ├── book2.epub └── fatbf.epub └── read.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | paths: 7 | - "**/*.rs" 8 | - "**/*.yml" 9 | - "Cargo.toml" 10 | - "**/*.epub" 11 | pull_request: 12 | paths: 13 | - "**/*.rs" 14 | - "**/*.yml" 15 | - "Cargo.toml" 16 | - "**/*.epub" 17 | 18 | jobs: 19 | build: 20 | runs-on: ${{ matrix.os }} 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | os: [macos-latest, ubuntu-latest, windows-latest] 25 | rust: [stable, 1.75.0] 26 | steps: 27 | - uses: actions/checkout@v2 28 | - uses: dtolnay/rust-toolchain@master 29 | with: 30 | toolchain: ${{ matrix.rust }} 31 | - run: cargo build --verbose 32 | - run: cargo test --verbose 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | *.swp 4 | .DS_Store 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | authors = ["Daniel García Moreno "] 3 | description = """ 4 | Library to support the reading of epub files. 5 | """ 6 | documentation = "https://docs.rs/epub/" 7 | exclude = ["test.epub"] 8 | keywords = ["epub", "ebook"] 9 | license = "GPL-3.0" 10 | name = "epub" 11 | repository = "https://github.com/danigm/epub-rs.git" 12 | version = "2.1.4" 13 | edition = "2021" 14 | 15 | [dependencies] 16 | xml-rs = "0.8.26" 17 | percent-encoding = "2.3.1" 18 | thiserror = "2.0.12" 19 | 20 | [features] 21 | mock = [] 22 | 23 | [dependencies.zip] 24 | version = "3.0.0" 25 | default-features = false 26 | features = [ 27 | "deflate", 28 | ] 29 | 30 | [dependencies.regex] 31 | version = "1.11.1" 32 | default-features = false 33 | # regex currently requires the standard library, you must re-enable it. 34 | features = [ 35 | "std", 36 | "unicode", 37 | "perf-cache", 38 | "perf-dfa", 39 | "perf-inline", 40 | ] 41 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # epub-rs 2 | 3 | Rust library to support the reading of epub files. 4 | 5 | - Documentation: https://docs.rs/epub 6 | - Crate: https://crates.io/crates/epub 7 | 8 | ## Install 9 | 10 | Add this to your `Cargo.toml`: 11 | 12 | ```toml 13 | [dependencies] 14 | epub = "1.2.2" 15 | ``` 16 | 17 | ## MSRV 18 | 19 | The minimum supported Rust version is 1.42.0. 20 | -------------------------------------------------------------------------------- /src/archive.rs: -------------------------------------------------------------------------------- 1 | //! Manages the zip component part of the epub doc. 2 | //! 3 | //! Provides easy methods to navigate through the epub parts and to get 4 | //! the content as string. 5 | 6 | use std::fs::File; 7 | use std::io::BufReader; 8 | use std::path::{Path, PathBuf}; 9 | 10 | use std::io::{Read, Seek}; 11 | 12 | /// Epub archive struct. Here it's stored the file path and the list of 13 | /// files in the zip archive. 14 | #[derive(Clone, Debug)] 15 | pub struct EpubArchive { 16 | zip: zip::ZipArchive, 17 | pub path: PathBuf, 18 | pub files: Vec, 19 | } 20 | 21 | #[derive(Debug, thiserror::Error)] 22 | pub enum ArchiveError { 23 | #[error("I/O Error: {0}")] 24 | IO(#[from] std::io::Error), 25 | #[error("Zip Error: {0}")] 26 | Zip(#[from] zip::result::ZipError), 27 | #[error("Invalid UTF-8: {0}")] 28 | Utf8(#[from] std::str::Utf8Error), 29 | #[error("Invalid UTF-8 Path")] 30 | PathUtf8, 31 | } 32 | impl From for ArchiveError { 33 | fn from(e: std::string::FromUtf8Error) -> Self { 34 | Self::Utf8(e.utf8_error()) 35 | } 36 | } 37 | 38 | impl EpubArchive> { 39 | /// Opens the epub file in `path`. 40 | /// 41 | /// # Errors 42 | /// 43 | /// Returns an error if the zip is broken or if the file doesn't 44 | /// exists. 45 | pub fn new>(path: P) -> Result { 46 | let path = path.as_ref(); 47 | let file = File::open(path)?; 48 | let mut archive = Self::from_reader(BufReader::new(file))?; 49 | archive.path = path.to_path_buf(); 50 | Ok(archive) 51 | } 52 | } 53 | 54 | impl EpubArchive { 55 | /// Opens the epub contained in `reader`. 56 | /// 57 | /// # Errors 58 | /// 59 | /// Returns an error if the zip is broken. 60 | pub fn from_reader(reader: R) -> Result { 61 | let zip = zip::ZipArchive::new(reader)?; 62 | 63 | let files: Vec = zip.file_names().map(String::from).collect(); 64 | 65 | Ok(Self { 66 | zip, 67 | path: PathBuf::new(), 68 | files, 69 | }) 70 | } 71 | 72 | /// Returns the content of the file by the `name` as `Vec`. 73 | /// 74 | /// # Errors 75 | /// 76 | /// Returns an error if the name doesn't exists in the zip archive. 77 | pub fn get_entry>(&mut self, name: P) -> Result, ArchiveError> { 78 | let mut entry: Vec = vec![]; 79 | 80 | let name = name.as_ref().to_str().ok_or(ArchiveError::PathUtf8)?; 81 | 82 | match self.zip.by_name(name) { 83 | Ok(mut zipfile) => { 84 | zipfile.read_to_end(&mut entry)?; 85 | return Ok(entry); 86 | } 87 | Err(zip::result::ZipError::FileNotFound) => {} 88 | Err(e) => { 89 | return Err(e.into()); 90 | } 91 | }; 92 | 93 | // try percent encoding 94 | let name = percent_encoding::percent_decode(name.as_bytes()).decode_utf8()?; 95 | let mut zipfile = self.zip.by_name(&name)?; 96 | zipfile.read_to_end(&mut entry)?; 97 | Ok(entry) 98 | } 99 | 100 | /// Returns the content of the file by the `name` as `String`. 101 | /// 102 | /// # Errors 103 | /// 104 | /// Returns an error if the name doesn't exists in the zip archive. 105 | pub fn get_entry_as_str>(&mut self, name: P) -> Result { 106 | let content = self.get_entry(name)?; 107 | String::from_utf8(content).map_err(ArchiveError::from) 108 | } 109 | 110 | /// Returns the content of container file "META-INF/container.xml". 111 | /// 112 | /// # Errors 113 | /// 114 | /// Returns an error if the epub doesn't have the container file. 115 | pub fn get_container_file(&mut self) -> Result, ArchiveError> { 116 | let content = self.get_entry("META-INF/container.xml")?; 117 | Ok(content) 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/doc.rs: -------------------------------------------------------------------------------- 1 | //! Manages the epub doc. 2 | //! 3 | //! Provides easy methods to navigate through the epub content, cover, 4 | //! chapters, etc. 5 | //! 6 | //! Main references to EPUB specs: 7 | //! - https://www.w3.org/TR/epub-33 8 | //! - https://idpf.org/epub/201 9 | 10 | use std::cmp::Ordering; 11 | use std::collections::HashMap; 12 | use std::fs::File; 13 | use std::io::BufReader; 14 | use std::io::{Read, Seek}; 15 | use std::path::{Component, Path, PathBuf}; 16 | use xmlutils::XMLError; 17 | 18 | use crate::archive::EpubArchive; 19 | 20 | use crate::xmlutils; 21 | 22 | #[derive(Debug, thiserror::Error)] 23 | pub enum DocError { 24 | #[error("Archive Error: {0}")] 25 | ArchiveError(#[from] crate::archive::ArchiveError), 26 | #[error("XML Error: {0}")] 27 | XmlError(#[from] crate::xmlutils::XMLError), 28 | #[error("I/O Error: {0}")] 29 | IOError(#[from] std::io::Error), 30 | #[error("Invalid EPub")] 31 | InvalidEpub, 32 | } 33 | 34 | #[derive(Clone, Debug, PartialEq, PartialOrd)] 35 | pub enum EpubVersion { 36 | Version2_0, 37 | Version3_0, 38 | Unknown(String), 39 | } 40 | 41 | /// Struct that represent a navigation point in a table of content 42 | #[derive(Clone, Debug, Eq)] 43 | pub struct NavPoint { 44 | /// the title of this navpoint 45 | pub label: String, 46 | /// the resource path 47 | pub content: PathBuf, 48 | /// nested navpoints 49 | pub children: Vec, 50 | /// the order in the toc 51 | pub play_order: usize, 52 | } 53 | 54 | impl Ord for NavPoint { 55 | fn cmp(&self, other: &Self) -> Ordering { 56 | self.play_order.cmp(&other.play_order) 57 | } 58 | } 59 | 60 | impl PartialOrd for NavPoint { 61 | fn partial_cmp(&self, other: &Self) -> Option { 62 | Some(self.cmp(other)) 63 | } 64 | } 65 | 66 | impl PartialEq for NavPoint { 67 | fn eq(&self, other: &Self) -> bool { 68 | self.play_order == other.play_order 69 | } 70 | } 71 | 72 | /// An EPUB3 metadata subexpression. 73 | /// It is associated with another metadata expression. 74 | /// The design follows EPUB3 but can be approximated when facing EPUB2 using attributes. 75 | #[derive(Clone, Debug)] 76 | pub struct MetadataRefinement { 77 | pub property: String, 78 | pub value: String, 79 | pub lang: Option, 80 | pub scheme: Option, 81 | } 82 | 83 | /// An EPUB3 Dublin Core metadata item. 84 | /// The design follows EPUB3's dcterms element but can draw information both 85 | /// dcterms and primary `` expressions. 86 | /// 87 | /// When facing EPUB2, it also draws information from XHTML1.1 ``. 88 | #[derive(Clone, Debug)] 89 | pub struct MetadataItem { 90 | pub(crate) id: Option, 91 | pub property: String, 92 | pub value: String, 93 | pub lang: Option, 94 | pub refined: Vec, 95 | } 96 | 97 | impl MetadataItem { 98 | pub fn refinement(&self, property: &str) -> Option<&MetadataRefinement> { 99 | self.refined.iter().find(|r| r.property == property) 100 | } 101 | } 102 | 103 | #[derive(Clone, Debug)] 104 | pub struct SpineItem { 105 | pub idref: String, 106 | pub id: Option, 107 | pub properties: Option, 108 | pub linear: bool, 109 | } 110 | 111 | #[derive(Clone, Debug)] 112 | pub struct ResourceItem { 113 | pub path: PathBuf, 114 | pub mime: String, 115 | pub properties: Option, 116 | } 117 | 118 | /// Struct to control the epub document 119 | /// 120 | /// The general policy for `EpubDoc` is to support both EPUB2 (commonly used) 121 | /// and EPUB3 (standard). Considering epub files that have mixed EPUB2 and 122 | /// EPUB3 features, the implementation of `EpubDoc` isn't strict and rejects 123 | /// something not in accordance with the specified version only when necessary. 124 | #[derive(Clone, Debug)] 125 | pub struct EpubDoc { 126 | /// the zip archive 127 | archive: EpubArchive, 128 | 129 | /// The current chapter, is an spine index 130 | current: usize, 131 | 132 | /// epub spec version 133 | pub version: EpubVersion, 134 | 135 | /// epub spine ids 136 | pub spine: Vec, 137 | 138 | /// resource id -> (path, mime) 139 | pub resources: HashMap, 140 | 141 | /// table of content, list of `NavPoint` in the toc.ncx 142 | pub toc: Vec, 143 | 144 | /// title of toc 145 | pub toc_title: String, 146 | 147 | /// The epub metadata. 148 | /// 149 | /// # Examples 150 | /// 151 | /// ``` 152 | /// # use epub::doc::EpubDoc; 153 | /// # let doc = EpubDoc::new("test.epub"); 154 | /// # let doc = doc.unwrap(); 155 | /// let title = doc.metadata.iter().find(|d| d.property == "title"); 156 | /// assert_eq!(title.unwrap().value, "Todo es mío"); 157 | /// ``` 158 | /// 159 | /// See `mdata(property)` for a convenient method returning the first matching item. 160 | pub metadata: Vec, 161 | 162 | /// root file base path 163 | pub root_base: PathBuf, 164 | 165 | /// root file full path 166 | pub root_file: PathBuf, 167 | 168 | /// Custom css list to inject in every xhtml file 169 | pub extra_css: Vec, 170 | 171 | /// unique identifier 172 | pub unique_identifier: Option, 173 | } 174 | 175 | /// A EpubDoc used for testing purposes 176 | #[cfg(feature = "mock")] 177 | impl EpubDoc>> { 178 | pub fn mock() -> Result { 179 | // binary for empty zip file so that archive can be created 180 | let data: Vec = vec![ 181 | 0x50, 0x4b, 0x05, 0x06, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 00, 182 | 00, 00, 183 | ]; 184 | 185 | let archive = EpubArchive::from_reader(std::io::Cursor::new(data))?; 186 | Ok(Self { 187 | archive, 188 | spine: vec![], 189 | toc: vec![], 190 | resources: HashMap::new(), 191 | metadata: HashMap::new(), 192 | root_file: PathBuf::new(), 193 | root_base: PathBuf::new(), 194 | current: 0, 195 | extra_css: vec![], 196 | unique_identifier: None, 197 | cover_id: None, 198 | }) 199 | } 200 | } 201 | 202 | impl EpubDoc> { 203 | /// Opens the epub file in `path`. 204 | /// 205 | /// Initialize some internal variables to be able to access to the epub 206 | /// spine definition and to navigate through the epub. 207 | /// 208 | /// # Examples 209 | /// 210 | /// ``` 211 | /// use epub::doc::EpubDoc; 212 | /// 213 | /// let doc = EpubDoc::new("test.epub"); 214 | /// assert!(doc.is_ok()); 215 | /// ``` 216 | /// 217 | /// # Errors 218 | /// 219 | /// Returns an error if the epub is broken or if the file doesn't 220 | /// exists. 221 | pub fn new>(path: P) -> Result { 222 | let path = path.as_ref(); 223 | let file = File::open(path)?; 224 | let mut doc = Self::from_reader(BufReader::new(file))?; 225 | doc.archive.path = path.to_path_buf(); 226 | Ok(doc) 227 | } 228 | } 229 | 230 | impl EpubDoc { 231 | /// Opens the epub contained in `reader`. 232 | /// 233 | /// Initialize some internal variables to be able to access to the epub 234 | /// spine definition and to navigate through the epub. 235 | /// 236 | /// # Examples 237 | /// 238 | /// ``` 239 | /// use epub::doc::EpubDoc; 240 | /// use std::fs::File; 241 | /// use std::io::{Cursor, Read}; 242 | /// 243 | /// let mut file = File::open("test.epub").unwrap(); 244 | /// let mut buffer = Vec::new(); 245 | /// file.read_to_end(&mut buffer).unwrap(); 246 | /// 247 | /// let cursor = Cursor::new(buffer); 248 | /// 249 | /// let doc = EpubDoc::from_reader(cursor); 250 | /// assert!(doc.is_ok()); 251 | /// ``` 252 | /// 253 | /// # Errors 254 | /// 255 | /// Returns an error if the epub is broken. 256 | pub fn from_reader(reader: R) -> Result { 257 | let mut archive = EpubArchive::from_reader(reader)?; 258 | 259 | let container = archive.get_container_file()?; 260 | let root_file = get_root_file(&container)?; 261 | let base_path = root_file.parent().expect("All files have a parent"); 262 | let mut doc = Self { 263 | archive, 264 | version: EpubVersion::Version2_0, 265 | spine: vec![], 266 | toc: vec![], 267 | toc_title: String::new(), 268 | resources: HashMap::new(), 269 | metadata: Vec::new(), 270 | root_file: root_file.clone(), 271 | root_base: base_path.to_path_buf(), 272 | current: 0, 273 | extra_css: vec![], 274 | unique_identifier: None, 275 | }; 276 | doc.fill_resources()?; 277 | Ok(doc) 278 | } 279 | 280 | /// Returns the first metadata found with this property name. 281 | /// 282 | /// # Examples 283 | /// 284 | /// ``` 285 | /// # use epub::doc::EpubDoc; 286 | /// # let doc = EpubDoc::new("test.epub"); 287 | /// # let doc = doc.unwrap(); 288 | /// let language = doc.mdata("language"); 289 | /// assert_eq!(language.unwrap().value, "es"); 290 | pub fn mdata(&self, property: &str) -> Option<&MetadataItem> { 291 | self.metadata.iter().find(|data| data.property == property) 292 | } 293 | 294 | /// Returns the title. 295 | /// 296 | /// An EPUB file may provide multiple titles. This method only returns the 297 | /// primary one. Access `metadata` directly to gain more control. 298 | pub fn get_title(&self) -> Option { 299 | self.mdata("title").map(|item| item.value.clone()) 300 | } 301 | 302 | /// Returns the id of the epub cover. 303 | /// 304 | /// # Examples 305 | /// 306 | /// ```rust 307 | /// use epub::doc::EpubDoc; 308 | /// 309 | /// let doc = EpubDoc::new("test.epub"); 310 | /// assert!(doc.is_ok()); 311 | /// let mut doc = doc.unwrap(); 312 | /// 313 | /// let cover_id = doc.get_cover_id(); 314 | /// ``` 315 | /// 316 | /// This returns the cover id, which can be used to get the cover data. 317 | /// The id is not guaranteed to be valid. 318 | pub fn get_cover_id(&self) -> Option { 319 | match self.version { 320 | // EPUB3 requires zero or one cover-image resource 321 | EpubVersion::Version3_0 => self.resources.iter().find_map(|(id, resource)| { 322 | resource 323 | .properties 324 | .as_ref() 325 | .and_then(|ps| ps.split_ascii_whitespace().find(|p| *p == "cover-image")) 326 | .map(|_| id.clone()) 327 | }), 328 | // EPUB2 doesn't include cover identification, but a common practice is `` 329 | _ => self.mdata("cover").map(|item| item.value.clone()), 330 | } 331 | } 332 | 333 | /// Returns the id of the navigation document (EPUB3 only). 334 | /// 335 | /// **Relationship with `toc`**: 336 | /// "Navigation document" is a concept formalized in EPUB3, superseding NCX 337 | /// format used in EPUB2. NCX is required in EPUB2 and not EPUB3, though 338 | /// some authors provide both in the archive. `self.toc` (parsed from NCX) 339 | /// and this are independent on each other. 340 | pub fn get_nav_id(&self) -> Option { 341 | match self.version { 342 | // EPUB3 requires exactly one nav resource 343 | EpubVersion::Version3_0 => self.resources.iter().find_map(|(id, resource)| { 344 | resource 345 | .properties 346 | .as_ref() 347 | .and_then(|ps| ps.split_ascii_whitespace().find(|p| *p == "nav")) 348 | .map(|_| id.clone()) 349 | }), 350 | // The concept of navigation document doesn't exist in EPUB2. 351 | _ => None, 352 | } 353 | } 354 | 355 | /// Returns the cover's content and mime-type 356 | /// 357 | /// # Examples 358 | /// 359 | /// ```rust,ignore 360 | /// use std::fs; 361 | /// use std::io::Write; 362 | /// use epub::doc::EpubDoc; 363 | /// 364 | /// let doc = EpubDoc::new("test.epub"); 365 | /// assert!(doc.is_ok()); 366 | /// let mut doc = doc.unwrap(); 367 | /// 368 | /// let cover_data = doc.get_cover().unwrap(); 369 | /// 370 | /// let f = fs::File::create("/tmp/cover.png"); 371 | /// assert!(f.is_ok()); 372 | /// let mut f = f.unwrap(); 373 | /// let resp = f.write_all(&cover_data); 374 | /// ``` 375 | /// 376 | /// Returns [`None`] if the cover can't be found. 377 | pub fn get_cover(&mut self) -> Option<(Vec, String)> { 378 | let cover_id = self.get_cover_id(); 379 | cover_id.and_then(|cid| self.get_resource(&cid)) 380 | } 381 | 382 | /// Returns Release Identifier defined at 383 | /// https://www.w3.org/publishing/epub32/epub-packages.html#sec-metadata-elem-identifiers-pid 384 | pub fn get_release_identifier(&self) -> Option { 385 | match ( 386 | self.unique_identifier.as_ref(), 387 | self.mdata("dcterms:modified"), 388 | ) { 389 | (Some(unique_identifier), Some(modified)) => { 390 | Some(format!("{}@{}", unique_identifier, modified.value)) 391 | } 392 | _ => None, 393 | } 394 | } 395 | 396 | /// Returns the resource content by full path in the epub archive 397 | /// 398 | /// Returns [`None`] if the path doesn't exist in the epub 399 | pub fn get_resource_by_path>(&mut self, path: P) -> Option> { 400 | self.archive.get_entry(path).ok() 401 | } 402 | 403 | /// Returns the resource content and mime-type by the id defined in the spine 404 | /// 405 | /// Returns [`None`] if the id doesn't exists in the epub 406 | pub fn get_resource(&mut self, id: &str) -> Option<(Vec, String)> { 407 | let ResourceItem { path, mime, .. } = self.resources.get(id)?; 408 | let path = path.clone(); 409 | let mime = mime.clone(); 410 | let content = self.get_resource_by_path(&path)?; 411 | Some((content, mime)) 412 | } 413 | 414 | /// Returns the resource content by full path in the epub archive, as String 415 | /// 416 | /// Returns [`None`] if the path doesn't exists in the epub 417 | pub fn get_resource_str_by_path>(&mut self, path: P) -> Option { 418 | self.archive.get_entry_as_str(path).ok() 419 | } 420 | 421 | /// Returns the resource content and mime-type by the id defined in the spine, as String 422 | /// 423 | /// Returns [`None`] if the id doesn't exists in the epub 424 | pub fn get_resource_str(&mut self, id: &str) -> Option<(String, String)> { 425 | let ResourceItem { path, mime, .. } = self.resources.get(id)?; 426 | let mime = mime.clone(); 427 | let path = path.clone(); 428 | let content = self.get_resource_str_by_path(path)?; 429 | Some((content, mime)) 430 | } 431 | 432 | /// Returns the resource mime-type 433 | /// 434 | /// # Examples 435 | /// 436 | /// ``` 437 | /// # use epub::doc::EpubDoc; 438 | /// # let doc = EpubDoc::new("test.epub"); 439 | /// # let doc = doc.unwrap(); 440 | /// let mime = doc.get_resource_mime("portada.png"); 441 | /// assert_eq!("image/png", mime.unwrap()); 442 | /// ``` 443 | /// 444 | /// Returns [`None`] the resource can't be found. 445 | pub fn get_resource_mime(&self, id: &str) -> Option { 446 | self.resources.get(id).map(|r| r.mime.clone()) 447 | } 448 | 449 | /// Returns the resource mime searching by source full path 450 | /// 451 | /// # Examples 452 | /// 453 | /// ``` 454 | /// # use epub::doc::EpubDoc; 455 | /// # let doc = EpubDoc::new("test.epub"); 456 | /// # let doc = doc.unwrap(); 457 | /// let mime = doc.get_resource_mime_by_path("OEBPS/Images/portada.png"); 458 | /// assert_eq!("image/png", mime.unwrap()); 459 | /// ``` 460 | /// 461 | /// Returns [`None`] the resource can't be found. 462 | pub fn get_resource_mime_by_path>(&self, path: P) -> Option { 463 | let path = path.as_ref(); 464 | 465 | self.resources.iter().find_map(|(_, r)| { 466 | if r.path == path { 467 | Some(r.mime.clone()) 468 | } else { 469 | None 470 | } 471 | }) 472 | } 473 | 474 | /// Returns the current chapter content and mime-type 475 | /// 476 | /// The current follows the epub spine order. You can modify the current 477 | /// calling to `go_next`, `go_prev` or `set_current` methods. 478 | /// 479 | /// Can return [`None`] if the epub is broken. 480 | pub fn get_current(&mut self) -> Option<(Vec, String)> { 481 | let current_id = self.get_current_id()?; 482 | self.get_resource(¤t_id) 483 | } 484 | 485 | /// See [`Self::get_current`] 486 | pub fn get_current_str(&mut self) -> Option<(String, String)> { 487 | let current_id = self.get_current_id()?; 488 | self.get_resource_str(¤t_id) 489 | } 490 | 491 | /// Returns the current chapter data, with resource uris renamed so they 492 | /// have the epub:// prefix and all are relative to the root file 493 | /// 494 | /// This method is useful to render the content with a html engine, because inside the epub 495 | /// local paths are relatives, so you can provide that content, because the engine will look 496 | /// for the relative path in the filesystem and that file isn't there. You should provide files 497 | /// with epub:// using [`Self::get_resource_by_path`] 498 | /// 499 | /// # Examples 500 | /// 501 | /// ``` 502 | /// # use epub::doc::EpubDoc; 503 | /// # let mut doc = EpubDoc::new("test.epub").unwrap(); 504 | /// let current = doc.get_current_with_epub_uris().unwrap(); 505 | /// let text = String::from_utf8(current).unwrap(); 506 | /// assert!(text.contains("epub://OEBPS/Images/portada.png")); 507 | 508 | /// doc.go_next(); 509 | /// let current = doc.get_current_with_epub_uris().unwrap(); 510 | /// let text = String::from_utf8(current).unwrap(); 511 | /// assert!(text.contains("epub://OEBPS/Styles/stylesheet.css")); 512 | /// assert!(text.contains("http://creativecommons.org/licenses/by-sa/3.0/")); 513 | /// ``` 514 | /// 515 | /// # Errors 516 | /// 517 | /// Returns [`DocError::InvalidEpub`] if the epub is broken. 518 | pub fn get_current_with_epub_uris(&mut self) -> Result, DocError> { 519 | let path = self.get_current_path().ok_or(DocError::InvalidEpub)?; 520 | let (current, _mime) = self.get_current().ok_or(DocError::InvalidEpub)?; 521 | 522 | let resp = xmlutils::replace_attrs( 523 | current.as_slice(), 524 | |element, attr, value| match (element, attr) { 525 | ("link", "href") | ("image", "href") | ("a", "href") | ("img", "src") => { 526 | build_epub_uri(&path, value) 527 | } 528 | _ => String::from(value), 529 | }, 530 | &self.extra_css, 531 | ); 532 | 533 | resp.map_err(From::from) 534 | } 535 | 536 | /// Returns the current chapter mimetype 537 | /// 538 | /// # Examples 539 | /// 540 | /// ``` 541 | /// # use epub::doc::EpubDoc; 542 | /// # let doc = EpubDoc::new("test.epub"); 543 | /// # let doc = doc.unwrap(); 544 | /// let m = doc.get_current_mime(); 545 | /// assert_eq!("application/xhtml+xml", m.unwrap()); 546 | /// ``` 547 | /// 548 | /// Can return [`None`] if the epub is broken. 549 | pub fn get_current_mime(&self) -> Option { 550 | let current_id = self.get_current_id()?; 551 | self.get_resource_mime(¤t_id) 552 | } 553 | 554 | /// Returns the current chapter full path 555 | /// 556 | /// # Examples 557 | /// 558 | /// ``` 559 | /// # use epub::doc::EpubDoc; 560 | /// # use std::path::Path; 561 | /// # let doc = EpubDoc::new("test.epub"); 562 | /// # let doc = doc.unwrap(); 563 | /// let p = doc.get_current_path(); 564 | /// assert_eq!(Path::new("OEBPS/Text/titlepage.xhtml"), p.unwrap()); 565 | /// ``` 566 | /// 567 | /// Can return [`None`] if the epub is broken. 568 | pub fn get_current_path(&self) -> Option { 569 | let current_id = self.get_current_id()?; 570 | self.resources.get(¤t_id).map(|r| r.path.clone()) 571 | } 572 | 573 | /// Returns the current chapter id 574 | /// 575 | /// # Examples 576 | /// 577 | /// ``` 578 | /// # use epub::doc::EpubDoc; 579 | /// # let doc = EpubDoc::new("test.epub"); 580 | /// # let doc = doc.unwrap(); 581 | /// let id = doc.get_current_id(); 582 | /// assert_eq!("titlepage.xhtml", id.unwrap()); 583 | /// ``` 584 | /// 585 | /// Can return [`None`] if the epub is broken. 586 | pub fn get_current_id(&self) -> Option { 587 | self.spine.get(self.current).cloned().map(|i| i.idref) 588 | } 589 | 590 | /// Changes current to the next chapter 591 | /// 592 | /// # Examples 593 | /// 594 | /// ``` 595 | /// # use epub::doc::EpubDoc; 596 | /// # let doc = EpubDoc::new("test.epub"); 597 | /// # let mut doc = doc.unwrap(); 598 | /// doc.go_next(); 599 | /// assert_eq!("000.xhtml", doc.get_current_id().unwrap()); 600 | /// 601 | /// let len = doc.spine.len(); 602 | /// for i in 1..len { 603 | /// doc.go_next(); 604 | /// } 605 | /// assert!(!doc.go_next()); 606 | /// ``` 607 | /// 608 | /// Returns [`false`] if the current chapter is the last one 609 | pub fn go_next(&mut self) -> bool { 610 | if self.current + 1 >= self.spine.len() { 611 | false 612 | } else { 613 | self.current += 1; 614 | true 615 | } 616 | } 617 | 618 | /// Changes current to the prev chapter 619 | /// 620 | /// # Examples 621 | /// 622 | /// ``` 623 | /// # use epub::doc::EpubDoc; 624 | /// # let doc = EpubDoc::new("test.epub"); 625 | /// # let mut doc = doc.unwrap(); 626 | /// assert!(!doc.go_prev()); 627 | /// 628 | /// doc.go_next(); // 000.xhtml 629 | /// doc.go_next(); // 001.xhtml 630 | /// doc.go_next(); // 002.xhtml 631 | /// doc.go_prev(); // 001.xhtml 632 | /// assert_eq!("001.xhtml", doc.get_current_id().unwrap()); 633 | /// ``` 634 | /// 635 | /// Returns [`false`] if the current chapter is the first one 636 | pub fn go_prev(&mut self) -> bool { 637 | if self.current < 1 { 638 | false 639 | } else { 640 | self.current -= 1; 641 | true 642 | } 643 | } 644 | 645 | /// Returns the number of chapters 646 | /// 647 | /// # Examples 648 | /// 649 | /// ``` 650 | /// # use epub::doc::EpubDoc; 651 | /// # let doc = EpubDoc::new("test.epub"); 652 | /// # let mut doc = doc.unwrap(); 653 | /// assert_eq!(17, doc.get_num_pages()); 654 | /// ``` 655 | pub fn get_num_pages(&self) -> usize { 656 | self.spine.len() 657 | } 658 | 659 | /// Returns the current chapter number, starting from 0 660 | pub fn get_current_page(&self) -> usize { 661 | self.current 662 | } 663 | 664 | /// Changes the current page 665 | /// 666 | /// # Examples 667 | /// 668 | /// ``` 669 | /// # use epub::doc::EpubDoc; 670 | /// # let doc = EpubDoc::new("test.epub"); 671 | /// # let mut doc = doc.unwrap(); 672 | /// assert_eq!(0, doc.get_current_page()); 673 | /// doc.set_current_page(2); 674 | /// assert_eq!("001.xhtml", doc.get_current_id().unwrap()); 675 | /// assert_eq!(2, doc.get_current_page()); 676 | /// assert!(!doc.set_current_page(50)); 677 | /// ``` 678 | /// 679 | /// Returns [`false`] if the page is out of bounds 680 | pub fn set_current_page(&mut self, n: usize) -> bool { 681 | if n >= self.spine.len() { 682 | false 683 | } else { 684 | self.current = n; 685 | true 686 | } 687 | } 688 | 689 | /// This will inject this css in every html page getted with 690 | /// [`Self::get_current_with_epub_uris`] 691 | /// 692 | /// # Examples 693 | /// 694 | /// ``` 695 | /// # use epub::doc::EpubDoc; 696 | /// # let doc = EpubDoc::new("test.epub"); 697 | /// # let mut doc = doc.unwrap(); 698 | /// # let _ = doc.set_current_page(2); 699 | /// let extracss = "body { background-color: black; color: white }"; 700 | /// doc.add_extra_css(extracss); 701 | /// let current = doc.get_current_with_epub_uris().unwrap(); 702 | /// let text = String::from_utf8(current).unwrap(); 703 | /// assert!(text.contains(extracss)); 704 | /// ``` 705 | pub fn add_extra_css(&mut self, css: &str) { 706 | self.extra_css.push(String::from(css)); 707 | } 708 | 709 | /// Function to convert a resource path to a chapter number in the spine 710 | /// If the resource isn't in the spine list, None will be returned 711 | /// 712 | /// This method is useful to convert a toc [`NavPoint`] content to a chapter number 713 | /// to be able to navigate easily 714 | pub fn resource_uri_to_chapter(&self, uri: &PathBuf) -> Option { 715 | for (k, ResourceItem { path, .. }) in &self.resources { 716 | if path == uri { 717 | return self.resource_id_to_chapter(k); 718 | } 719 | } 720 | 721 | None 722 | } 723 | 724 | /// Function to convert a resource id to a chapter number in the spine 725 | /// If the resourse isn't in the spine list, None will be returned 726 | pub fn resource_id_to_chapter(&self, uri: &str) -> Option { 727 | self.spine.iter().position(|item| item.idref == uri) 728 | } 729 | 730 | fn fill_resources(&mut self) -> Result<(), DocError> { 731 | let container = self.archive.get_entry(&self.root_file)?; 732 | let root = xmlutils::XMLReader::parse(container.as_slice())?; 733 | self.version = match root.borrow().get_attr("version") { 734 | Some(v) if v == "2.0" => EpubVersion::Version2_0, 735 | Some(v) if v == "3.0" => EpubVersion::Version3_0, 736 | Some(v) => EpubVersion::Unknown(String::from(v)), 737 | _ => EpubVersion::Unknown(String::from("Unknown")), 738 | }; 739 | let unique_identifier_id = &root.borrow().get_attr("unique-identifier"); 740 | 741 | // resources from manifest 742 | // This should be run before everything else, because other functions relies on 743 | // self.resources and should be filled before calling `fill_toc` 744 | let manifest = root 745 | .borrow() 746 | .find("manifest") 747 | .ok_or(DocError::InvalidEpub)?; 748 | for r in &manifest.borrow().children { 749 | let item = r.borrow(); 750 | let _ = self.insert_resource(&item); 751 | } 752 | 753 | // items from spine 754 | let spine = root.borrow().find("spine").ok_or(DocError::InvalidEpub)?; 755 | for r in &spine.borrow().children { 756 | let item = r.borrow(); 757 | let _ = self.insert_spine(&item); 758 | } 759 | 760 | // toc.ncx 761 | if let Some(toc) = spine.borrow().get_attr("toc") { 762 | let _ = self.fill_toc(&toc); 763 | } 764 | 765 | // metadata 766 | let metadata_elem = root 767 | .borrow() 768 | .find("metadata") 769 | .ok_or(DocError::InvalidEpub)?; 770 | self.fill_metadata(&metadata_elem.borrow()); 771 | 772 | let identifier = if let Some(uid) = unique_identifier_id { 773 | // find identifier with id 774 | self.metadata 775 | .iter() 776 | .find(|d| d.property == "identifier" && d.id.as_ref().is_some_and(|id| id == uid)) 777 | } else { 778 | // fallback with the first identifier. 779 | self.metadata.iter().find(|d| d.property == "identifier") 780 | }; 781 | self.unique_identifier = identifier.map(|data| data.value.clone()); 782 | 783 | Ok(()) 784 | } 785 | 786 | fn fill_metadata(&mut self, elem: &xmlutils::XMLNode) { 787 | // refinements are inserted here with ID as key, these are later associated to metadata 788 | let mut refinements: HashMap> = HashMap::new(); 789 | for r in &elem.children { 790 | let item = r.borrow(); 791 | // for each acceptable element, either push a metadata item or push a refinement 792 | match (item.name.namespace_ref(), &item.name.local_name) { 793 | // dcterms 794 | (Some("http://purl.org/dc/elements/1.1/"), name) => { 795 | let id = item.get_attr("id"); 796 | let lang = item.get_attr("lang"); 797 | let property = name.clone(); 798 | let value = item.text.clone().unwrap_or_default(); 799 | 800 | let refined: Vec = 801 | if let EpubVersion::Version3_0 = self.version { 802 | vec![] 803 | } else { 804 | // treat it as EPUB2 dcterms, storing additional info in attributes 805 | item.attrs 806 | .iter() 807 | .filter_map(|attr| { 808 | if let Some("http://www.idpf.org/2007/opf") = 809 | attr.name.namespace_ref() 810 | { 811 | let property = attr.name.local_name.clone(); 812 | let value = attr.value.clone(); 813 | Some(MetadataRefinement { 814 | property, 815 | value, 816 | lang: None, 817 | scheme: None, 818 | }) 819 | } else { 820 | None 821 | } 822 | }) 823 | .collect() 824 | }; 825 | self.metadata.push(MetadataItem { 826 | id, 827 | property, 828 | value, 829 | lang, 830 | refined, 831 | }); 832 | } 833 | 834 | // 835 | (Some("http://www.idpf.org/2007/opf"), name) 836 | if name.eq_ignore_ascii_case("meta") => 837 | { 838 | if let Some(property) = item.get_attr("property") { 839 | // EPUB3 , value in its text content 840 | let value = item.text.clone().unwrap_or_default(); 841 | let lang = item.get_attr("lang"); 842 | if let Some(refines) = item.get_attr("refines") { 843 | // refinement (subexpression in EPUB3 terminology) 844 | let tid = refines.strip_prefix('#').unwrap_or_else(|| &refines); 845 | let scheme = item.get_attr("scheme"); 846 | let refinement = MetadataRefinement { 847 | property, 848 | value, 849 | lang, 850 | scheme, 851 | }; 852 | if let Some(refs) = refinements.get_mut(tid) { 853 | refs.push(refinement); 854 | } else { 855 | refinements.insert(tid.to_string(), vec![refinement]); 856 | } 857 | } else { 858 | // primary 859 | let id = item.get_attr("id"); 860 | self.metadata.push(MetadataItem { 861 | id, 862 | property, 863 | value, 864 | lang, 865 | refined: vec![], 866 | }); 867 | } 868 | } else if let (Some(property), Some(value)) = 869 | (item.get_attr("name"), item.get_attr("content")) 870 | { 871 | // Legacy XHTML1.1 872 | self.metadata.push(MetadataItem { 873 | id: None, 874 | property, 875 | value, 876 | lang: None, 877 | refined: vec![], 878 | }); 879 | } 880 | } 881 | 882 | _ => (), 883 | } 884 | } 885 | 886 | // associate refinements 887 | self.metadata.iter_mut().for_each(|item| { 888 | if let Some(id) = &item.id { 889 | if let Some(mut refs) = refinements.remove(id) { 890 | item.refined.append(&mut refs); 891 | } 892 | } 893 | }); 894 | } 895 | 896 | // Forcibly converts separators in a filepath to unix separators to 897 | // to ensure that ZipArchive's by_name method will retrieve the proper 898 | // file. Failing to convert to unix-style on Windows causes the 899 | // ZipArchive not to find the file. 900 | fn convert_path_seps>(&self, href: P) -> PathBuf { 901 | let mut path = self.root_base.join(href); 902 | if cfg!(windows) { 903 | path = PathBuf::from(path.to_string_lossy().replace('\\', "/")); 904 | } 905 | path 906 | } 907 | 908 | fn insert_resource(&mut self, item: &xmlutils::XMLNode) -> Result<(), XMLError> { 909 | let id = item 910 | .get_attr("id") 911 | .ok_or_else(|| XMLError::AttrNotFound("id".into()))?; 912 | let href = item 913 | .get_attr("href") 914 | .ok_or_else(|| XMLError::AttrNotFound("href".into()))?; 915 | let mime = item 916 | .get_attr("media-type") 917 | .ok_or_else(|| XMLError::AttrNotFound("media-type".into()))?; 918 | let properties = item.get_attr("properties"); 919 | 920 | self.resources.insert( 921 | id, 922 | ResourceItem { 923 | path: self.convert_path_seps(href), 924 | mime, 925 | properties, 926 | }, 927 | ); 928 | Ok(()) 929 | } 930 | 931 | fn insert_spine(&mut self, item: &xmlutils::XMLNode) -> Result<(), DocError> { 932 | let idref = item 933 | .get_attr("idref") 934 | .ok_or_else(|| XMLError::AttrNotFound("idref".into()))?; 935 | let linear = item.get_attr("linear").unwrap_or("yes".into()) == "yes"; 936 | let properties = item.get_attr("properties"); 937 | let id = item.get_attr("id"); 938 | self.spine.push(SpineItem { 939 | idref, 940 | id, 941 | linear, 942 | properties, 943 | }); 944 | Ok(()) 945 | } 946 | 947 | fn fill_toc(&mut self, id: &str) -> Result<(), DocError> { 948 | let toc_res = self.resources.get(id).ok_or(DocError::InvalidEpub)?; // this should be turned into it's own error type, but 949 | 950 | let container = self.archive.get_entry(&toc_res.path)?; 951 | let root = xmlutils::XMLReader::parse(container.as_slice())?; 952 | 953 | self.toc_title = root 954 | .borrow() 955 | .find("docTitle") 956 | .and_then(|dt| { 957 | dt.borrow() 958 | .children 959 | .get(0) 960 | .and_then(|t| t.borrow().text.clone()) 961 | }) 962 | .unwrap_or_default(); 963 | 964 | let mapnode = root 965 | .borrow() 966 | .find("navMap") 967 | .ok_or_else(|| XMLError::AttrNotFound("navMap".into()))?; 968 | 969 | self.toc.append(&mut self.get_navpoints(&mapnode.borrow())); 970 | self.toc.sort(); 971 | 972 | Ok(()) 973 | } 974 | 975 | /// Recursively extract all navpoints from a node. 976 | fn get_navpoints(&self, parent: &xmlutils::XMLNode) -> Vec { 977 | let mut navpoints = Vec::new(); 978 | 979 | // TODO: parse metadata (dtb:totalPageCount, dtb:depth, dtb:maxPageNumber) 980 | 981 | for nav in &parent.children { 982 | let item = nav.borrow(); 983 | if item.name.local_name != "navPoint" { 984 | continue; 985 | } 986 | let play_order = item.get_attr("playOrder").and_then(|n| n.parse().ok()); 987 | let content = item 988 | .find("content") 989 | .and_then(|c| c.borrow().get_attr("src").map(|p| self.root_base.join(p))); 990 | 991 | let label = item.find("navLabel").and_then(|l| { 992 | l.borrow() 993 | .children 994 | .get(0) 995 | .and_then(|t| t.borrow().text.clone()) 996 | }); 997 | 998 | if let (Some(o), Some(c), Some(l)) = (play_order, content, label) { 999 | let navpoint = NavPoint { 1000 | label: l.clone(), 1001 | content: c.clone(), 1002 | children: self.get_navpoints(&item), 1003 | play_order: o, 1004 | }; 1005 | navpoints.push(navpoint); 1006 | } 1007 | } 1008 | 1009 | navpoints.sort(); 1010 | navpoints 1011 | } 1012 | } 1013 | 1014 | fn get_root_file(container: &[u8]) -> Result { 1015 | let root = xmlutils::XMLReader::parse(container)?; 1016 | let el = root.borrow(); 1017 | let element = el 1018 | .find("rootfile") 1019 | .ok_or_else(|| XMLError::AttrNotFound("rootfile".into()))?; 1020 | let el2 = element.borrow(); 1021 | 1022 | let attr = el2 1023 | .get_attr("full-path") 1024 | .ok_or_else(|| XMLError::AttrNotFound("full-path".into()))?; 1025 | 1026 | Ok(PathBuf::from(attr)) 1027 | } 1028 | 1029 | fn build_epub_uri>(path: P, append: &str) -> String { 1030 | // allowing external links 1031 | if append.starts_with("http") { 1032 | return String::from(append); 1033 | } 1034 | 1035 | let path = path.as_ref(); 1036 | let mut cpath = path.to_path_buf(); 1037 | 1038 | // current file base dir 1039 | cpath.pop(); 1040 | for p in Path::new(append).components() { 1041 | match p { 1042 | Component::ParentDir => { 1043 | cpath.pop(); 1044 | } 1045 | Component::Normal(s) => { 1046 | cpath.push(s); 1047 | } 1048 | _ => {} 1049 | }; 1050 | } 1051 | 1052 | // If on Windows, replace all Windows path separators with Unix path separators 1053 | let path = if cfg!(windows) { 1054 | cpath.to_string_lossy().replace('\\', "/") 1055 | } else { 1056 | cpath.to_string_lossy().to_string() 1057 | }; 1058 | 1059 | format!("epub://{}", path) 1060 | } 1061 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn(clippy::pedantic, clippy::nursery)] 2 | #![allow( 3 | clippy::module_name_repetitions, 4 | clippy::let_underscore_drop, 5 | 6 | // for MSRV 7 | clippy::unnested_or_patterns, 8 | clippy::uninlined_format_args, 9 | clippy::missing_const_for_fn, 10 | )] 11 | 12 | //! EPUB library 13 | //! lib to read and navigate through an epub file contents 14 | //! 15 | //! # Examples 16 | //! 17 | //! ## Opening 18 | //! 19 | //! ``` 20 | //! use epub::doc::EpubDoc; 21 | //! let doc = EpubDoc::new("test.epub"); 22 | //! assert!(doc.is_ok()); 23 | //! let doc = doc.unwrap(); 24 | //! 25 | //! ``` 26 | //! 27 | //! ## Getting doc metadata 28 | //! 29 | //! Metadata is a [`HashMap`](std::collections::HashMap) storing all metadata defined in the epub 30 | //! 31 | //! ``` 32 | //! # use epub::doc::EpubDoc; 33 | //! # let doc = EpubDoc::new("test.epub"); 34 | //! # let doc = doc.unwrap(); 35 | //! let language = doc.mdata("language"); 36 | //! assert_eq!(language.unwrap().value, "es"); 37 | //! ``` 38 | //! 39 | //! ## Accessing resources 40 | //! 41 | //! In the resources var is stored each resource defined 42 | //! in the epub indexed by the id and with the full internal 43 | //! path and mimetype. It's a `HashMap` 44 | //! where `a` is the resource id, `b` is the resource full path and 45 | //! `c` is the resource mimetype 46 | //! 47 | //! ``` 48 | //! # use epub::doc::EpubDoc; 49 | //! # use std::path::Path; 50 | //! # let doc = EpubDoc::new("test.epub"); 51 | //! # let doc = doc.unwrap(); 52 | //! assert_eq!(23, doc.resources.len()); 53 | //! let tpage = doc.resources.get("titlepage.xhtml"); 54 | //! assert_eq!(tpage.unwrap().path, Path::new("OEBPS/Text/titlepage.xhtml")); 55 | //! assert_eq!(tpage.unwrap().mime, "application/xhtml+xml"); 56 | //! ``` 57 | //! 58 | //! ## Navigating using the spine 59 | //! 60 | //! Spine is a `Vec` storing the epub spine as resources ids 61 | //! 62 | //! ``` 63 | //! # use epub::doc::EpubDoc; 64 | //! # let doc = EpubDoc::new("test.epub"); 65 | //! # let doc = doc.unwrap(); 66 | //! assert_eq!(17, doc.spine.len()); 67 | //! assert_eq!("titlepage.xhtml", doc.spine[0].idref); 68 | //! ``` 69 | //! 70 | //! ## Navigation using the doc internal state 71 | //! 72 | //! ``` 73 | //! use epub::doc::EpubDoc; 74 | //! let doc = EpubDoc::new("test.epub"); 75 | //! let mut doc = doc.unwrap(); 76 | //! assert_eq!(0, doc.get_current_page()); 77 | //! assert_eq!("application/xhtml+xml", doc.get_current_mime().unwrap()); 78 | //! 79 | //! doc.go_next(); 80 | //! assert_eq!("000.xhtml", doc.get_current_id().unwrap()); 81 | //! doc.go_next(); 82 | //! assert_eq!("001.xhtml", doc.get_current_id().unwrap()); 83 | //! doc.go_prev(); 84 | //! assert_eq!("000.xhtml", doc.get_current_id().unwrap()); 85 | //! 86 | //! doc.set_current_page(2); 87 | //! assert_eq!("001.xhtml", doc.get_current_id().unwrap()); 88 | //! assert_eq!(2, doc.get_current_page()); 89 | //! assert!(!doc.set_current_page(50)); 90 | //! 91 | //! // doc.get_current() will return a Vec with the current page content 92 | //! // doc.get_current_str() will return a String with the current page content 93 | //! ``` 94 | //! 95 | //! ## Getting the cover 96 | //! 97 | //! ```ignore 98 | //! use std::fs; 99 | //! use std::io::Write; 100 | //! use epub::doc::EpubDoc; 101 | //! 102 | //! let doc = EpubDoc::new("test.epub"); 103 | //! assert!(doc.is_ok()); 104 | //! let mut doc = doc.unwrap(); 105 | //! 106 | //! let cover_data = doc.get_cover().unwrap(); 107 | //! 108 | //! let f = fs::File::create("/tmp/cover.png"); 109 | //! assert!(f.is_ok()); 110 | //! let mut f = f.unwrap(); 111 | //! let resp = f.write_all(&cover_data); 112 | //! ``` 113 | 114 | mod xmlutils; 115 | 116 | pub mod archive; 117 | pub mod doc; 118 | -------------------------------------------------------------------------------- /src/xmlutils.rs: -------------------------------------------------------------------------------- 1 | use std::cell::RefCell; 2 | use std::rc::Rc; 3 | use std::rc::Weak; 4 | use xml::attribute::OwnedAttribute; 5 | use xml::reader::Error as ReaderError; 6 | use xml::reader::EventReader; 7 | use xml::reader::ParserConfig; 8 | 9 | use xml::reader::XmlEvent as ReaderEvent; 10 | use xml::writer::XmlEvent as WriterEvent; 11 | 12 | use std::fmt; 13 | use xml::writer::EmitterConfig; 14 | use xml::writer::Error as EmitterError; 15 | 16 | use std::borrow::Cow; 17 | 18 | // Using RefCell because we need to edit the children vec during the parsing. 19 | // Using rc because a Node will be referenced by its parent and by its childs. 20 | type ChildNodeRef = Rc>; 21 | type ParentNodeRef = Weak>; 22 | 23 | #[derive(Debug, thiserror::Error)] 24 | pub enum XMLError { 25 | #[error("XML Reader Error: {0}")] 26 | Reader(#[from] ReaderError), 27 | #[error("XML Writer Error: {0}")] 28 | Emitter(#[from] EmitterError), 29 | #[error("Attribute Not Found: {0}")] 30 | AttrNotFound(String), 31 | #[error("Invalid State; this is a bug")] 32 | InvalidState, 33 | #[error("No XML Elements Found")] 34 | NoElements, 35 | #[error("XML content is empty")] 36 | NoContent, 37 | } 38 | 39 | pub struct XMLReader<'a> { 40 | reader: EventReader<&'a [u8]>, 41 | } 42 | 43 | impl<'a> XMLReader<'a> { 44 | pub fn parse(content: &[u8]) -> Result, XMLError> { 45 | // The operations below require at least 4 bytes to not panic 46 | if content.is_empty() || content.len() < 4 { 47 | return Err(XMLError::NoContent); 48 | } 49 | 50 | let content_str; 51 | //If there is a UTF-8 BOM marker, ignore it 52 | let content_slice = if content[0..3] == [0xefu8, 0xbbu8, 0xbfu8] { 53 | &content[3..] 54 | } else if content[0..2] == [0xfeu8, 0xffu8] || content[0..2] == [0xffu8, 0xfeu8] { 55 | //handle utf-16 56 | let (big_byte, small_byte) = if content[0] == 0xfeu8 { 57 | (1, 0) //big endian utf-16 58 | } else { 59 | (0, 1) //little endian utf-16 60 | }; 61 | let content_u16: Vec = content[2..] 62 | .chunks_exact(2) 63 | .into_iter() 64 | .map(|a| u16::from_ne_bytes([a[big_byte], a[small_byte]])) 65 | .collect(); 66 | content_str = String::from_utf16_lossy(content_u16.as_slice()); 67 | content_str.as_bytes() 68 | } else { 69 | content 70 | }; 71 | 72 | let reader = XMLReader { 73 | reader: ParserConfig::new() 74 | .add_entity("nbsp", " ") 75 | .add_entity("copy", "©") 76 | .add_entity("reg", "®") 77 | .create_reader(content_slice), 78 | }; 79 | 80 | reader.parse_xml() 81 | } 82 | 83 | fn parse_xml(self) -> Result, XMLError> { 84 | let mut root: Option = None; 85 | let mut parents: Vec = vec![]; 86 | 87 | for e in self.reader { 88 | match e { 89 | Ok(ReaderEvent::StartElement { 90 | name, 91 | attributes, 92 | namespace, 93 | }) => { 94 | let node = XMLNode { 95 | name, 96 | attrs: attributes, 97 | namespace, 98 | parent: None, 99 | text: None, 100 | cdata: None, 101 | children: vec![], 102 | }; 103 | let arnode = Rc::new(RefCell::new(node)); 104 | 105 | { 106 | let current = parents.last(); 107 | if let Some(c) = current { 108 | c.borrow_mut().children.push(arnode.clone()); 109 | arnode.borrow_mut().parent = Some(Rc::downgrade(c)); 110 | } 111 | } 112 | parents.push(arnode.clone()); 113 | 114 | if root.is_none() { 115 | root = Some(arnode.clone()); 116 | } 117 | } 118 | Ok(ReaderEvent::EndElement { .. }) => { 119 | if !parents.is_empty() { 120 | parents.pop(); 121 | } 122 | } 123 | Ok(ReaderEvent::Characters(text)) => { 124 | let current = parents.last(); 125 | if let Some(c) = current { 126 | c.borrow_mut().text = Some(text); 127 | } 128 | } 129 | Ok(ReaderEvent::CData(text)) => { 130 | let current = parents.last(); 131 | if let Some(c) = current { 132 | c.borrow_mut().cdata = Some(text); 133 | } 134 | } 135 | _ => continue, 136 | } 137 | } 138 | 139 | if let Some(r) = root { 140 | let a = Rc::try_unwrap(r); 141 | match a { 142 | Ok(n) => return Ok(n), 143 | Err(_) => return Err(XMLError::InvalidState), 144 | } 145 | } 146 | Err(XMLError::NoElements) 147 | } 148 | } 149 | 150 | #[derive(Debug)] 151 | pub struct XMLNode { 152 | pub name: xml::name::OwnedName, 153 | pub attrs: Vec, 154 | pub namespace: xml::namespace::Namespace, 155 | pub text: Option, 156 | pub cdata: Option, 157 | pub parent: Option, 158 | pub children: Vec, 159 | } 160 | 161 | impl XMLNode { 162 | pub fn get_attr(&self, name: &str) -> Option { 163 | self.attrs 164 | .iter() 165 | .find(|a| a.name.local_name == name) 166 | .map(|a| a.value.clone()) 167 | } 168 | 169 | pub fn find(&self, tag: &str) -> Option { 170 | for r in &self.children { 171 | let c = r.borrow(); 172 | if c.name.local_name == tag { 173 | return Some(r.clone()); 174 | } else if let Some(n) = c.find(tag) { 175 | return Some(n); 176 | } 177 | } 178 | 179 | None 180 | } 181 | } 182 | 183 | impl fmt::Display for XMLNode { 184 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 185 | let childs: String = self.children.iter().fold(String::new(), |sum, x| { 186 | format!("{}{}\n\t", sum, *x.borrow()) 187 | }); 188 | let attrs: String = self 189 | .attrs 190 | .iter() 191 | .fold(String::new(), |sum, x| sum + &x.name.local_name + ", "); 192 | 193 | let t = self.text.as_ref(); 194 | let mut text = String::new(); 195 | if let Some(t) = t { 196 | text.clone_from(t); 197 | } 198 | 199 | write!( 200 | f, 201 | "<{} [{}]>\n\t{}{}", 202 | self.name.local_name, attrs, childs, text 203 | ) 204 | } 205 | } 206 | 207 | pub fn replace_attrs( 208 | xmldoc: &[u8], 209 | closure: F, 210 | extra_css: &[String], 211 | ) -> Result, XMLError> 212 | where 213 | F: Fn(&str, &str, &str) -> String, 214 | { 215 | let mut b = Vec::new(); 216 | 217 | { 218 | let reader = ParserConfig::new() 219 | .add_entity("nbsp", " ") 220 | .add_entity("copy", "©") 221 | .add_entity("reg", "®") 222 | .create_reader(xmldoc); 223 | let mut writer = EmitterConfig::default() 224 | .perform_indent(true) 225 | .create_writer(&mut b); 226 | 227 | for e in reader { 228 | match e? { 229 | ev @ ReaderEvent::StartElement { .. } => { 230 | let mut attrs: Vec = vec![]; 231 | 232 | if let Some(WriterEvent::StartElement { 233 | name, 234 | attributes, 235 | namespace, 236 | }) = ev.as_writer_event() 237 | { 238 | for i in 0..attributes.len() { 239 | let mut attr = attributes[i].to_owned(); 240 | let repl = closure(name.local_name, &attr.name.local_name, &attr.value); 241 | attr.value = repl; 242 | attrs.push(attr); 243 | } 244 | 245 | let w = WriterEvent::StartElement { 246 | name, 247 | attributes: Cow::Owned( 248 | attrs.iter().map(OwnedAttribute::borrow).collect(), 249 | ), 250 | //attributes: attributes, 251 | namespace, 252 | }; 253 | writer.write(w)?; 254 | } 255 | } 256 | ReaderEvent::EndElement { name: n } => { 257 | if n.local_name.to_lowercase() == "head" && !extra_css.is_empty() { 258 | // injecting here the extra css 259 | let mut allcss = extra_css.concat(); 260 | allcss = format!("*/ {} /*", allcss); 261 | 262 | writer.write(WriterEvent::start_element("style"))?; 263 | writer.write("/*")?; 264 | writer.write(WriterEvent::cdata(&allcss))?; 265 | writer.write("*/")?; 266 | writer.write(WriterEvent::end_element())?; 267 | } 268 | writer.write(WriterEvent::end_element())?; 269 | } 270 | ev => { 271 | if let Some(e) = ev.as_writer_event() { 272 | writer.write(e)?; 273 | } 274 | } 275 | } 276 | } 277 | } 278 | 279 | Ok(b) 280 | } 281 | -------------------------------------------------------------------------------- /test.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danigm/epub-rs/27eeef34a8987dbbd86260deff994cfde34a4846/test.epub -------------------------------------------------------------------------------- /tests/archive.rs: -------------------------------------------------------------------------------- 1 | use epub::archive::EpubArchive; 2 | use std::fs; 3 | use std::io::Write; 4 | 5 | #[test] 6 | fn archive_open() { 7 | let archive = EpubArchive::new("test.epub"); 8 | assert!(archive.is_ok()); 9 | let archive = archive.unwrap(); 10 | assert_eq!("test.epub", archive.path.display().to_string()); 11 | assert_eq!(32, archive.files.len()); 12 | } 13 | 14 | #[test] 15 | fn archive_entry() { 16 | let archive = EpubArchive::new("test.epub"); 17 | assert!(archive.is_ok()); 18 | let mut archive = archive.unwrap(); 19 | let content = archive.get_entry("META-INF/container.xml"); 20 | assert!(content.is_ok()); 21 | } 22 | 23 | #[test] 24 | fn archive_entry_percent_encoding() { 25 | let archive = EpubArchive::new("test.epub"); 26 | assert!(archive.is_ok()); 27 | let mut archive = archive.unwrap(); 28 | let content = archive.get_entry("a%20%25%20encoded%20item.xml"); 29 | assert!(content.is_ok()); 30 | let content = archive.get_entry("a%20normal%20item.xml"); 31 | assert!(content.is_ok()); 32 | } 33 | 34 | #[test] 35 | fn archive_root_file() { 36 | let archive = EpubArchive::new("test.epub"); 37 | assert!(archive.is_ok()); 38 | let mut archive = archive.unwrap(); 39 | let content = archive.get_entry("META-INF/container.xml"); 40 | let root = archive.get_container_file(); 41 | assert!(content.is_ok() && root.is_ok()); 42 | assert_eq!(content.unwrap(), root.unwrap()); 43 | } 44 | 45 | #[test] 46 | #[ignore] 47 | fn archive_bin_entry() { 48 | let archive = EpubArchive::new("test.epub"); 49 | assert!(archive.is_ok()); 50 | let mut archive = archive.unwrap(); 51 | let content = archive.get_entry("OEBPS/Images/portada.png"); 52 | assert!(content.is_ok()); 53 | 54 | let content = content.unwrap(); 55 | let f = fs::File::create("cover.png"); 56 | assert!(f.is_ok()); 57 | let mut f = f.unwrap(); 58 | let resp = f.write_all(&content); 59 | assert!(resp.is_ok()); 60 | } 61 | -------------------------------------------------------------------------------- /tests/doc.rs: -------------------------------------------------------------------------------- 1 | use epub::doc::EpubDoc; 2 | use epub::doc::EpubVersion; 3 | use epub::doc::MetadataItem; 4 | use std::path::Path; 5 | 6 | #[test] 7 | #[cfg(feature = "mock")] 8 | fn doc_mock() { 9 | let doc = EpubDoc::mock(); 10 | assert!(doc.is_ok()); 11 | } 12 | 13 | #[test] 14 | fn doc_open() { 15 | let doc = EpubDoc::new("test.epub"); 16 | assert!(doc.is_ok()); 17 | let doc = doc.unwrap(); 18 | let doc2 = EpubDoc::new("tests/docs/Metamorphosis-jackson.epub").unwrap(); 19 | assert_eq!(Path::new("OEBPS"), doc.root_base); 20 | assert_eq!(Path::new("OEBPS/content.opf"), doc.root_file); 21 | 22 | assert_eq!(23, doc.resources.len()); 23 | { 24 | let tpage = doc.resources.get("titlepage.xhtml"); 25 | assert_eq!(tpage.unwrap().path, Path::new("OEBPS/Text/titlepage.xhtml")); 26 | } 27 | 28 | { 29 | assert_eq!(17, doc.spine.len()); 30 | assert_eq!("titlepage.xhtml", doc.spine[0].idref); 31 | } 32 | 33 | { 34 | let unique_identifier = doc.unique_identifier.clone(); 35 | assert_eq!( 36 | unique_identifier.unwrap(), 37 | "urn:uuid:09132750-3601-4d19-b3a4-55fdf8639849" 38 | ); 39 | } 40 | 41 | { 42 | let identifier = doc.mdata("identifier").unwrap(); 43 | let scheme = identifier.refinement("scheme").unwrap(); 44 | assert_eq!(scheme.value, "UUID"); 45 | } 46 | 47 | { 48 | let title = doc.get_title().unwrap_or_default(); 49 | assert_eq!(title, "Todo es mío"); 50 | } 51 | 52 | { 53 | let creator = doc.mdata("creator").unwrap(); 54 | assert_eq!(creator.value, "Daniel Garcia"); 55 | let role = creator.refinement("role").unwrap(); 56 | assert_eq!(role.value, "aut"); 57 | } 58 | 59 | { 60 | let cover = doc.get_cover_id(); 61 | assert_eq!(cover, Some("portada.png".into())); 62 | } 63 | 64 | { 65 | let modified = doc.mdata("dcterms:modified"); 66 | assert_eq!(modified.unwrap().value, "2015-08-10T18:12:03Z"); 67 | } 68 | 69 | { 70 | let release_identifier = doc.get_release_identifier(); 71 | assert_eq!( 72 | release_identifier.unwrap(), 73 | "urn:uuid:09132750-3601-4d19-b3a4-55fdf8639849@2015-08-10T18:12:03Z" 74 | ); 75 | } 76 | 77 | { 78 | let unique_identifier = doc2.unique_identifier.clone(); 79 | assert_eq!( 80 | "http://metamorphosiskafka.pressbooks.com", 81 | unique_identifier.unwrap() 82 | ); 83 | } 84 | 85 | { 86 | let release_identifier = doc2.get_release_identifier(); 87 | assert_eq!(None, release_identifier); 88 | } 89 | } 90 | 91 | #[test] 92 | fn doc_open_epub3() { 93 | let doc = EpubDoc::new("tests/docs/fatbf.epub"); 94 | assert!(doc.is_ok()); 95 | let doc = doc.unwrap(); 96 | 97 | { 98 | // Test refinements 99 | let mut iter = doc.metadata.iter(); 100 | let finder = |item: &&MetadataItem| item.property == "identifier"; 101 | 102 | let identifier = iter.find(finder).unwrap(); 103 | assert!(identifier.refined.is_empty()); 104 | 105 | let identifier = iter.find(finder).unwrap(); 106 | let ident_type = identifier.refinement("identifier-type").unwrap(); 107 | assert_eq!(ident_type.scheme, Some("onix:codelist5".to_string())); 108 | assert_eq!(ident_type.value, "15"); 109 | } 110 | 111 | { 112 | // Test cover 113 | let cover_mime = doc.get_cover_id().and_then(|id| doc.get_resource_mime(&id)); 114 | assert_eq!(cover_mime, Some("image/jpeg".to_string())); 115 | } 116 | 117 | { 118 | // Test nav 119 | let nav_mime = doc.get_nav_id().and_then(|id| doc.get_resource_mime(&id)); 120 | assert_eq!(nav_mime, Some("application/xhtml+xml".to_string())); 121 | } 122 | } 123 | 124 | #[test] 125 | fn toc_test() { 126 | let doc = EpubDoc::new("test.epub"); 127 | assert!(doc.is_ok()); 128 | let doc = doc.unwrap(); 129 | 130 | assert!(!doc.toc.is_empty()); 131 | for nav in doc.toc.iter() { 132 | let chapter = doc.resource_uri_to_chapter(&nav.content); 133 | assert!(chapter.is_some()); 134 | assert_eq!(nav.play_order, chapter.unwrap()); 135 | } 136 | } 137 | 138 | #[test] 139 | fn toc_title_test() { 140 | let doc = EpubDoc::new("test.epub"); 141 | assert!(doc.is_ok()); 142 | let doc = doc.unwrap(); 143 | 144 | assert!(doc.toc_title == "Todo es mío"); 145 | } 146 | 147 | #[test] 148 | fn version_test() { 149 | let doc = EpubDoc::new("test.epub"); 150 | assert!(doc.is_ok()); 151 | let doc = doc.unwrap(); 152 | 153 | assert!(doc.version == EpubVersion::Version2_0); 154 | assert!(doc.version < EpubVersion::Version3_0); 155 | } 156 | -------------------------------------------------------------------------------- /tests/docs/Metamorphosis-jackson.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danigm/epub-rs/27eeef34a8987dbbd86260deff994cfde34a4846/tests/docs/Metamorphosis-jackson.epub -------------------------------------------------------------------------------- /tests/docs/book2.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danigm/epub-rs/27eeef34a8987dbbd86260deff994cfde34a4846/tests/docs/book2.epub -------------------------------------------------------------------------------- /tests/docs/fatbf.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danigm/epub-rs/27eeef34a8987dbbd86260deff994cfde34a4846/tests/docs/fatbf.epub -------------------------------------------------------------------------------- /tests/read.rs: -------------------------------------------------------------------------------- 1 | use epub::doc::EpubDoc; 2 | 3 | #[test] 4 | fn read_doc() { 5 | let input_file = "tests/docs/Metamorphosis-jackson.epub"; 6 | let doc = EpubDoc::new(input_file); 7 | assert!(doc.is_ok()); 8 | let mut doc = doc.unwrap(); 9 | 10 | if let Some(title) = doc.get_title() { 11 | println!("Book title: {}", title); 12 | } else { 13 | println!("Book title not found"); 14 | } 15 | println!("Num Pages: {}\n", doc.get_num_pages()); 16 | 17 | { 18 | println!("resources:\n"); 19 | for (k, v) in doc.resources.iter() { 20 | println!("{}: {}\n * {}\n", k, v.mime, v.path.display()); 21 | } 22 | println!(); 23 | } 24 | 25 | while doc.go_next() { 26 | println!("ID: {}", doc.get_current_id().unwrap()); 27 | let current = doc.get_current_str(); 28 | match current { 29 | Some((v, m)) => println!("Value {:?}, Mime {:?}\n", v, m), 30 | None => println!("Not Found\n"), 31 | } 32 | } 33 | } 34 | 35 | #[test] 36 | fn bad_epub() { 37 | //book2.epub has a opf encoded in UTF-16 38 | //It also has malformed toc, manifest and guide entries, as well as multiple metadata entries 39 | let input_file = "tests/docs/book2.epub"; 40 | let doc = EpubDoc::new(input_file); 41 | assert!(doc.is_ok()); 42 | let doc = doc.unwrap(); 43 | let titles: Vec = doc 44 | .metadata 45 | .iter() 46 | .filter_map(|d| { 47 | if d.property == "title" { 48 | Some(d.value.clone()) 49 | } else { 50 | None 51 | } 52 | }) 53 | .collect(); 54 | if !titles.is_empty() { 55 | assert_eq!( 56 | titles, 57 | vec!["Metamorphosis ".to_string(), "Metamorphosis2 ".to_string()] 58 | ); 59 | println!("Book title: {:#?}", titles); 60 | } else { 61 | println!("Book title not found"); 62 | } 63 | } 64 | --------------------------------------------------------------------------------