├── .github └── workflows │ ├── ci.yml │ └── pylint.yml ├── .gitignore ├── LICENSE ├── MAINTAINERS ├── Makefile ├── README.md ├── cabarchive ├── __init__.py ├── archive.py ├── cli.py ├── errors.py ├── file.py ├── parser.py ├── py.typed ├── test_misc.py ├── utils.py └── writer.py ├── data ├── .gitignore ├── compressed.cab ├── ddf-fixed.cab ├── example.jpg ├── large-compressed.cab ├── large.cab ├── multi-folder-compressed.cab ├── multi-folder.cab ├── random.bin ├── setup.ddf ├── simple.cab ├── test.txt └── utf8.cab ├── docs ├── .gitignore ├── Makefile ├── requirements.txt └── source │ ├── conf.py │ └── index.rst ├── mypy.ini ├── pylintrc ├── setup.cfg └── setup.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 2 | name: python-cabarchive 3 | 4 | on: 5 | push: 6 | branches: [ master ] 7 | pull_request: 8 | branches: [ master ] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: [3.8, 3.11] 16 | steps: 17 | - uses: actions/checkout@v2 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v1 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | - name: Install dependencies 23 | run: | 24 | sudo apt-get install -y \ 25 | cabextract 26 | - name: Lint with flake8 27 | run: | 28 | pip install flake8 29 | # stop the build if there are Python syntax errors or undefined names 30 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 31 | - name: Test with pytest 32 | run: | 33 | pip install pytest 34 | pytest 35 | -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- 1 | name: Pylint 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8", "3.9", "3.10"] 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pylint 21 | - name: Analysing the code with pylint 22 | run: | 23 | pylint --rcfile pylintrc cabarchive/*.py *.py 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *.pyi 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | /tmp/ 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | (This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.) 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | 458 | END OF TERMS AND CONDITIONS 459 | 460 | How to Apply These Terms to Your New Libraries 461 | 462 | If you develop a new library, and you want it to be of the greatest 463 | possible use to the public, we recommend making it free software that 464 | everyone can redistribute and change. You can do so by permitting 465 | redistribution under these terms (or, alternatively, under the terms of the 466 | ordinary General Public License). 467 | 468 | To apply these terms, attach the following notices to the library. It is 469 | safest to attach them to the start of each source file to most effectively 470 | convey the exclusion of warranty; and each file should have at least the 471 | "copyright" line and a pointer to where the full notice is found. 472 | 473 | {description} 474 | Copyright (C) {year} {fullname} 475 | 476 | This library is free software; you can redistribute it and/or 477 | modify it under the terms of the GNU Lesser General Public 478 | License as published by the Free Software Foundation; either 479 | version 2.1 of the License, or (at your option) any later version. 480 | 481 | This library is distributed in the hope that it will be useful, 482 | but WITHOUT ANY WARRANTY; without even the implied warranty of 483 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 484 | Lesser General Public License for more details. 485 | 486 | You should have received a copy of the GNU Lesser General Public 487 | License along with this library; if not, write to the Free Software 488 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 489 | USA 490 | 491 | Also add information on how to contact you by electronic and paper mail. 492 | 493 | You should also get your employer (if you work as a programmer) or your 494 | school, if any, to sign a "copyright disclaimer" for the library, if 495 | necessary. Here is a sample; alter the names: 496 | 497 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 498 | library `Frob' (a library for tweaking knobs) written by James Random 499 | Hacker. 500 | 501 | {signature of Ty Coon}, 1 April 1990 502 | Ty Coon, President of Vice 503 | 504 | That's all there is to it! 505 | 506 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Richard Hughes 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2019 Richard Hughes 2 | # SPDX-License-Identifier: GPL-2.0+ 3 | 4 | VENV=./env 5 | PYTHON=$(VENV)/bin/python 6 | PYTEST=$(VENV)/bin/pytest 7 | PYLINT=$(VENV)/bin/pylint 8 | MYPY=$(VENV)/bin/mypy 9 | CODESPELL=$(VENV)/bin/codespell 10 | PIP=$(VENV)/bin/pip 11 | BLACK=$(VENV)/bin/black 12 | STUBGEN=$(VENV)/bin/stubgen 13 | 14 | setup: 15 | virtualenv ./env 16 | 17 | $(PYTEST): 18 | $(PIP) install pytest-cov pylint 19 | 20 | $(MYPY): 21 | $(PIP) install mypy 22 | 23 | $(STUBGEN): 24 | $(PIP) install stubgen 25 | 26 | $(BLACK): 27 | $(PIP) install black 28 | 29 | check: $(PYTEST) $(MYPY) 30 | $(MYPY) cabarchive 31 | $(PYTEST) cabarchive 32 | $(PYLINT) --rcfile pylintrc cabarchive/*.py *.py 33 | 34 | blacken: $(BLACK) 35 | find cabarchive -name '*.py' -exec $(BLACK) {} \; 36 | 37 | pkg: $(STUBGEN) 38 | $(STUBGEN) --output . --package cabarchive 39 | $(PYTHON) setup.py sdist bdist_wheel 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-cabarchive 2 | 3 | Contributors welcome, either adding new functionality or fixing bugs. 4 | 5 | Documentation available here: https://python-cabarchive.readthedocs.io/ 6 | 7 | See also: https://msdn.microsoft.com/en-us/library/bb417343.aspx 8 | 9 | # Release Process 10 | 11 | These notes are probably only for the maintainer of this module! 12 | 13 | make pkg 14 | ./env/bin/twine upload dist/* 15 | -------------------------------------------------------------------------------- /cabarchive/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2015-2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | 8 | from cabarchive.file import CabFile 9 | from cabarchive.archive import CabArchive 10 | from cabarchive.errors import CorruptionError, NotSupportedError 11 | -------------------------------------------------------------------------------- /cabarchive/archive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2015-2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | # 8 | # pylint: disable=protected-access 9 | 10 | import fnmatch 11 | 12 | from typing import Optional, List 13 | 14 | from cabarchive.file import CabFile 15 | from cabarchive.parser import CabArchiveParser 16 | from cabarchive.writer import CabArchiveWriter 17 | 18 | 19 | class CabArchive(dict): 20 | """This instance allows parsing or writing a MS Cabinet archive. 21 | 22 | You can treat the CabArchive instance like a dictionary when reading 23 | and writing archives. 24 | 25 | For instance, loading an archive: 26 | 27 | .. code-block:: python 28 | 29 | with open("test.cab", "rb") as f: 30 | arc = CabArchive(f.read()) 31 | cff = arc["test.txt"] 32 | print("filename", cff.filename) # "test.txt" 33 | print("contents", cff.buf) # b"test123" 34 | print("created", cff.date.year) # 2015 35 | for fn in arc: 36 | print(fn) # "test.txt" 37 | 38 | ...or creating and saving an archive: 39 | 40 | .. code-block:: python 41 | 42 | arc = CabArchive() 43 | arc["test.txt"] = CabFile("test123".encode()) 44 | with open("test.cab", "wb") as f: 45 | f.write(arc.save()) 46 | """ 47 | 48 | def __init__(self, buf: Optional[bytes] = None, flattern: bool = False): 49 | """Creates a CabArchive instance. 50 | 51 | Args: 52 | self: A CabArchive instance. 53 | buf: Binary blob loaded from disk. 54 | flattern: Disregard archive directory structure wen loading. 55 | 56 | Raises: 57 | CorruptionError: The cab file was invalid or corrupt. 58 | NotSupportedError: The format was not supported, e.g. unknown compression. 59 | """ 60 | dict.__init__(self) 61 | 62 | self.set_id: int = 0 #: The "Set ID" used for multi-file archives 63 | 64 | # load archive 65 | if buf: 66 | CabArchiveParser(self, flattern=flattern).parse(buf) 67 | 68 | def __setitem__(self, key: str, val: CabFile) -> None: 69 | assert isinstance(key, str) 70 | assert isinstance(val, CabFile) 71 | val.filename = key 72 | dict.__setitem__(self, key, val) 73 | 74 | def parse(self, buf: bytes) -> None: 75 | """Parse .cab binary data 76 | 77 | Args: 78 | self: A CabArchive instance. 79 | bytes: Binary blob loaded from disk. 80 | 81 | Raises: 82 | CorruptionError: The cab file was invalid or corrupt. 83 | NotSupportedError: The format was not supported, e.g. unknown compression. 84 | 85 | """ 86 | CabArchiveParser(self).parse(buf) 87 | 88 | def find_file(self, glob: str) -> Optional[CabFile]: 89 | """Gets a file from the archive using a glob. 90 | 91 | Args: 92 | self: A CabArchive instance. 93 | glob: File glob, e.g. ``*.txt`` 94 | Returns: 95 | The first CabFile that matches the filename glob, or None. 96 | """ 97 | for fn in self: 98 | if fnmatch.fnmatch(fn, glob): 99 | return self[fn] 100 | return None 101 | 102 | def find_files(self, glob: str) -> List[CabFile]: 103 | """Gets files from the archive using a glob. 104 | 105 | Args: 106 | self: A CabArchive instance. 107 | glob: File glob, e.g. ``*.txt`` 108 | Returns: 109 | All CabFile object that matches the filename glob, or None. 110 | """ 111 | arr = [] 112 | for fn in self: 113 | if fnmatch.fnmatch(fn, glob): 114 | arr.append(self[fn]) 115 | return arr 116 | 117 | def save(self, compress: bool = False, sort: bool = True) -> bytes: 118 | """Returns cabinet file data, optionally compressed 119 | 120 | Args: 121 | self: A CabArchive instance. 122 | compress: If the binary data should be compressed. 123 | sort: If the file lists should be sorted in a predictable order 124 | Returns: 125 | The blob of memory that can be written to disk. 126 | """ 127 | return CabArchiveWriter(self, compress=compress, sort=sort).write() 128 | 129 | @property 130 | def size(self) -> int: 131 | """Returns cabinet uncompressed data size""" 132 | return sum(len(cffile) for cffile in self.values()) 133 | 134 | def __repr__(self) -> str: 135 | return f"CabArchive({[str(self[cabfile]) for cabfile in self]})" 136 | -------------------------------------------------------------------------------- /cabarchive/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | # 8 | # pylint: disable=wrong-import-position 9 | 10 | import sys 11 | import os 12 | import argparse 13 | 14 | sys.path.append(os.path.realpath(".")) 15 | 16 | from cabarchive import CabArchive, CabFile, NotSupportedError 17 | 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser(description="Process cabinet archives.") 21 | parser.add_argument( 22 | "--decompress", 23 | action="store_true", 24 | help="decompress the archives", 25 | default=False, 26 | ) 27 | parser.add_argument( 28 | "--create", 29 | action="store_true", 30 | help="create an archive", 31 | default=False, 32 | ) 33 | parser.add_argument( 34 | "--info", 35 | action="store_true", 36 | help="Show the files inside the archive", 37 | default=True, 38 | ) 39 | parser.add_argument( 40 | "--outdir", 41 | type=str, 42 | help="Specify the output directory for decompression", 43 | default=".", 44 | ) 45 | 46 | if len(sys.argv) == 1: 47 | print("No input files given") 48 | return 1 49 | 50 | args, argv = parser.parse_known_args() 51 | if args.decompress: 52 | for fn in argv: 53 | arc = CabArchive() 54 | try: 55 | with open(fn, "rb") as f: 56 | arc.parse(f.read()) 57 | except NotSupportedError as e: 58 | print(f"Failed to parse: {str(e)}") 59 | return 1 60 | print(f"Parsing {fn}:") 61 | if args.info: 62 | for fn in arc: 63 | print(fn) 64 | for fn in arc: 65 | path = os.path.join(args.outdir, fn) 66 | os.makedirs(os.path.dirname(path), exist_ok=True) 67 | with open(path, "wb") as f: 68 | print(f"Writing {fn}:") 69 | f.write(arc[fn].buf) 70 | elif args.create: 71 | arc = CabArchive() 72 | try: 73 | print(f"Creating {argv[0]}:") 74 | except IndexError: 75 | print("Expected: ARCHIVE [FILE]...") 76 | return 1 77 | for fn in argv[1:]: 78 | with open(fn, "rb") as f: 79 | arc[os.path.basename(fn)] = CabFile(buf=f.read()) 80 | with open(argv[0], "wb") as f: 81 | f.write(arc.save()) 82 | 83 | return 0 84 | 85 | 86 | if __name__ == "__main__": 87 | main() 88 | -------------------------------------------------------------------------------- /cabarchive/errors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2015-2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | 8 | 9 | class CorruptionError(Exception): 10 | pass 11 | 12 | 13 | class NotSupportedError(Exception): 14 | pass 15 | -------------------------------------------------------------------------------- /cabarchive/file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2015-2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | 8 | import datetime 9 | 10 | from typing import Optional 11 | 12 | 13 | def _is_ascii(text: str) -> bool: 14 | """Check if a string is ASCII only""" 15 | if not text: 16 | return False 17 | return all(ord(c) < 128 for c in text) 18 | 19 | 20 | class CabFile: 21 | """An object representing a file in a Cab archive 22 | 23 | Any number of CabFile instances can be stored in a CabArchive. 24 | A new instance can be created with just the data bytes or with an additional 25 | ``mtime``. If the modification time is not set then the current date and time 26 | is used, which may be unhelpful if you require a reproducable builds. 27 | 28 | .. code-block:: python 29 | 30 | cff = CabFile(b"test123") 31 | 32 | """ 33 | 34 | def __init__( 35 | self, 36 | buf: Optional[bytes] = None, 37 | filename: Optional[str] = None, 38 | mtime: Optional[datetime.datetime] = None, 39 | ): 40 | self.filename = filename #: filename to use in the archive 41 | self.buf = buf #: bytes to use for the file contents 42 | self.date: Optional[datetime.date] #: date the file was created 43 | self.time: Optional[datetime.time] #: time the file was created 44 | if mtime: 45 | self.date = mtime.date() 46 | self.time = mtime.time() 47 | else: 48 | self.date = datetime.date.today() 49 | self.time = datetime.datetime.now().time() 50 | self.is_readonly = False #: set if file is read-only 51 | self.is_hidden = False #: set if file is hidden 52 | self.is_system = False #: set if file is a system file 53 | self.is_arch = False #: set if file modified since last backup 54 | self.is_exec = False #: set if file is executable 55 | 56 | def __len__(self) -> int: 57 | if not self.buf: 58 | return 0 59 | return len(self.buf) 60 | 61 | @property 62 | def filename(self) -> Optional[str]: 63 | return self._filename 64 | 65 | @filename.setter 66 | def filename(self, filename: str) -> None: 67 | self.is_name_utf8 = not _is_ascii(filename) 68 | self._filename = filename 69 | 70 | @property 71 | def _filename_win32(self) -> Optional[str]: 72 | return self._filename.replace("/", "\\") 73 | 74 | def _attr_encode(self) -> int: 75 | """Get attributes on the file""" 76 | attr = 0x00 77 | if self.is_readonly: 78 | attr += 0x01 79 | if self.is_hidden: 80 | attr += 0x02 81 | if self.is_system: 82 | attr += 0x04 83 | if self.is_arch: 84 | attr += 0x20 85 | if self.is_exec: 86 | attr += 0x40 87 | if self.is_name_utf8: 88 | attr += 0x80 89 | return attr 90 | 91 | def _attr_decode(self, attr: int) -> None: 92 | """Set attributes on the file""" 93 | self.is_readonly = bool(attr & 0x01) 94 | self.is_hidden = bool(attr & 0x02) 95 | self.is_system = bool(attr & 0x04) 96 | self.is_arch = bool(attr & 0x20) 97 | self.is_exec = bool(attr & 0x40) 98 | self.is_name_utf8 = bool(attr & 0x80) 99 | 100 | def _date_decode(self, val: int) -> None: 101 | """Decode the MSCAB 32-bit date format""" 102 | try: 103 | self.date = datetime.date( 104 | 1980 + ((val & 0xFE00) >> 9), (val & 0x01E0) >> 5, val & 0x001F 105 | ) 106 | except ValueError as _: 107 | self.date = None 108 | 109 | def _time_decode(self, val: int) -> None: 110 | """Decode the MSCAB 32-bit time format""" 111 | try: 112 | self.time = datetime.time( 113 | (val & 0xF800) >> 11, (val & 0x07E0) >> 5, (val & 0x001F) * 2 114 | ) 115 | except ValueError as _: 116 | self.time = None 117 | 118 | def _date_encode(self) -> int: 119 | """Encode the MSCAB 32-bit date format""" 120 | if not self.date or self.date.year < 1980: 121 | return 0 122 | return ((self.date.year - 1980) << 9) + (self.date.month << 5) + self.date.day 123 | 124 | def _time_encode(self) -> int: 125 | """Encode the MSCAB 32-bit time format""" 126 | if not self.time: 127 | return 0 128 | return ( 129 | (self.time.hour << 11) + (self.time.minute << 5) + int(self.time.second / 2) 130 | ) 131 | 132 | def __repr__(self) -> str: 133 | return f"CabFile({self.filename}:{len(self):x})" 134 | -------------------------------------------------------------------------------- /cabarchive/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2015-2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | # 8 | # pylint: disable=protected-access,too-few-public-methods 9 | 10 | from typing import List, Optional, TYPE_CHECKING 11 | import struct 12 | import zlib 13 | import ntpath 14 | 15 | from cabarchive.file import CabFile 16 | from cabarchive.utils import FMT_CFFOLDER, FMT_CFHEADER_RESERVE, _checksum_compute 17 | from cabarchive.errors import CorruptionError, NotSupportedError 18 | 19 | if TYPE_CHECKING: 20 | from cabarchive.archive import CabArchive 21 | 22 | COMPRESSION_MASK_TYPE = 0x000F 23 | COMPRESSION_TYPE_NONE = 0x0000 24 | COMPRESSION_TYPE_MSZIP = 0x0001 25 | COMPRESSION_TYPE_QUANTUM = 0x0002 26 | COMPRESSION_TYPE_LZX = 0x0003 27 | 28 | 29 | class CabArchiveParser: 30 | def __init__(self, cfarchive: "CabArchive", flattern: bool = False): 31 | self.cfarchive: "CabArchive" = cfarchive 32 | self.flattern: bool = flattern 33 | self._folder_data: List[bytes] = [] 34 | self._buf: bytes = b"" 35 | self._header_reserved: bytes = b"" 36 | self._zdict: Optional[bytes] = None 37 | self._rsvd_block: int = 0 38 | self._ndatabsz: int = 0 39 | 40 | def parse_cffile(self, offset: int) -> int: 41 | """Parse a CFFILE entry""" 42 | fmt = " None: 85 | """Parse a CFFOLDER entry""" 86 | fmt = " int: 117 | """Parse a CFDATA entry""" 118 | fmt = " None: 166 | # used as internal state 167 | self._buf = buf 168 | if self._zdict is None: 169 | self._zdict = b"" 170 | 171 | offset: int = 0 172 | 173 | # read the file header 174 | fmt = "<4s" # signature 175 | fmt += "xxxx" # reserved1 176 | fmt += "I" # size 177 | fmt += "xxxx" # reserved2 178 | fmt += "I" # offset to CFFILE 179 | fmt += "xxxx" # reserved3 180 | fmt += "BB" # version minor, major 181 | fmt += "H" # no of CFFOLDERs 182 | fmt += "H" # no of CFFILEs 183 | fmt += "H" # flags 184 | fmt += "H" # setID 185 | fmt += "H" # cnt of cabs in set 186 | try: 187 | ( 188 | signature, 189 | size, 190 | off_cffile, 191 | version_minor, 192 | version_major, 193 | nr_folders, 194 | nr_files, 195 | flags, 196 | set_id, 197 | idx_cabinet, 198 | ) = struct.unpack_from(fmt, self._buf, 0) 199 | except struct.error as e: 200 | raise CorruptionError from e 201 | offset += struct.calcsize(fmt) 202 | 203 | # check magic bytes 204 | if signature != b"MSCF": 205 | raise NotSupportedError("Data is not application/vnd.ms-cab-compressed") 206 | 207 | # check size matches 208 | if size > len(self._buf): 209 | raise CorruptionError( 210 | "File size 0x{:x} does not match header 0x{:x} (delta 0x{:x})".format( 211 | len(self._buf), size, len(self._buf) - size 212 | ) 213 | ) 214 | 215 | # check version 216 | if version_major != 1 or version_minor != 3: 217 | raise NotSupportedError( 218 | f"Version {version_major}.{version_minor} not supported" 219 | ) 220 | 221 | # chained cabs not supported 222 | if idx_cabinet != 0: 223 | raise NotSupportedError("Chained cab file not supported") 224 | 225 | # verify we actually have data 226 | if nr_files == 0: 227 | raise CorruptionError("The cab file is empty") 228 | 229 | # verify we got complete data 230 | if off_cffile > len(self._buf): 231 | raise CorruptionError("Cab file corrupt") 232 | 233 | # reserved sizes 234 | if flags & 0x0004: 235 | try: 236 | (rsvd_hdr, rsvd_folder, rsvd_block) = struct.unpack_from( 237 | FMT_CFHEADER_RESERVE, self._buf, offset 238 | ) 239 | except struct.error as e: 240 | raise CorruptionError from e 241 | offset += struct.calcsize(FMT_CFHEADER_RESERVE) 242 | self._header_reserved = buf[offset : offset + rsvd_hdr] 243 | offset += rsvd_hdr 244 | self._rsvd_block = rsvd_block 245 | else: 246 | rsvd_folder = 0 247 | self._rsvd_block = 0 248 | 249 | # read this so we can do round-trip 250 | self.cfarchive.set_id = set_id 251 | 252 | # if the only folder is >= 2GB then CFFOLDER.ndatab will overflow 253 | if len(self._buf) >= 0x8000 * 0xFFFF and nr_folders == 1: 254 | self._ndatabsz = len(self._buf) 255 | 256 | # parse CFFOLDER 257 | for i in range(nr_folders): 258 | self.parse_cffolder(i, offset) 259 | offset += struct.calcsize(FMT_CFFOLDER) + rsvd_folder 260 | 261 | # parse CFFILEs 262 | for i in range(0, nr_files): 263 | off_cffile += self.parse_cffile(off_cffile) 264 | 265 | # allow reuse 266 | self._zdict = None 267 | -------------------------------------------------------------------------------- /cabarchive/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. The mypy package uses inline types. 2 | -------------------------------------------------------------------------------- /cabarchive/test_misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2015-2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | # 8 | # pylint: disable=wrong-import-position 9 | 10 | import os 11 | import sys 12 | import unittest 13 | import datetime 14 | import subprocess 15 | import time 16 | import hashlib 17 | 18 | # allows us to run this from the project root 19 | sys.path.append(os.path.realpath(".")) 20 | 21 | from cabarchive import CabArchive, CabFile, CorruptionError 22 | from cabarchive.utils import _checksum_compute 23 | 24 | 25 | def _check_range(data: bytes, expected: bytes) -> None: 26 | assert data 27 | assert expected 28 | failures: int = 0 29 | if len(data) != len(expected): 30 | print(f"different sizes, got {len(data)} expected {len(expected)}") 31 | failures += 1 32 | for i in range(len(data)): 33 | if data[i] != expected[i]: 34 | print(f"@0x{i:02x} got 0x{data[i]:02x} expected 0x{expected[i]:02x}") 35 | failures += 1 36 | if failures > 10: 37 | print("More than 10 failures, giving up...") 38 | break 39 | if failures: 40 | raise ValueError("Data is not the same") 41 | 42 | 43 | class TestInfParser(unittest.TestCase): 44 | def test_checksums(self): 45 | # test checksum function 46 | csum = _checksum_compute(b"hello123") 47 | self.assertEqual(csum, 0x5F5E5407) 48 | csum = _checksum_compute(b"hello") 49 | self.assertEqual(csum, 0x6C6C6507) 50 | 51 | # measure speed 52 | start = time.time() 53 | with open("data/random.bin", "rb") as f: 54 | csum = _checksum_compute(f.read()) 55 | print(f"profile checksum: {(time.time() - start) * 1000:f}ms") 56 | 57 | def test_create_compressed(self): 58 | cabarchive = CabArchive() 59 | 60 | # make predictable 61 | dt_epoch = datetime.datetime.fromtimestamp(0, datetime.timezone.utc) 62 | cabarchive["README.txt"] = CabFile(b"foofoofoofoofoofoofoofoo", mtime=dt_epoch) 63 | cabarchive["firmware.bin"] = CabFile( 64 | b"barbarbarbarbarbarbarbar", mtime=dt_epoch 65 | ) 66 | buf = cabarchive.save(compress=True) 67 | self.assertEqual(len(buf), 122) 68 | self.assertEqual( 69 | hashlib.sha1(buf).hexdigest(), "74e94703c403aa93b16d01b088eb52e3a9c73288" 70 | ) 71 | 72 | def test_values(self): 73 | # parse junk 74 | with self.assertRaises(CorruptionError): 75 | CabArchive().parse(b"hello") 76 | try: 77 | self.assertEqual(subprocess.call(["cabextract", "--test", "hello"]), 1) 78 | except FileNotFoundError as _: 79 | pass 80 | 81 | def test_simple(self): 82 | with open("data/simple.cab", "rb") as f: 83 | old = f.read() 84 | arc = CabArchive() 85 | arc.parse(old) 86 | cff = arc["test.txt"] 87 | self.assertEqual(cff.filename, "test.txt") 88 | self.assertEqual(cff.buf, b"test123") 89 | self.assertEqual(len(cff.buf), 7) 90 | self.assertEqual(cff.date.year, 2015) 91 | _check_range(arc.save(), old) 92 | 93 | def test_compressed(self): 94 | with open("data/compressed.cab", "rb") as f: 95 | old = f.read() 96 | arc = CabArchive() 97 | arc.parse(old) 98 | cff = arc.find_file("*.txt") 99 | self.assertEqual(cff.buf, b"test123") 100 | _check_range(arc.save(compress=True), old) 101 | 102 | def test_utf8(self): 103 | with open("data/utf8.cab", "rb") as f: 104 | old = f.read() 105 | arc = CabArchive() 106 | arc.parse(old) 107 | cff = arc.find_file("tést.dat") 108 | self.assertEqual(cff.filename, "tést.dat") 109 | self.assertEqual(cff.buf, "tést123".encode()) 110 | self.assertEqual(len(cff.buf), 8) 111 | self.assertEqual(cff.date.year, 2015) 112 | _check_range(arc.save(), old) 113 | 114 | def test_large(self): 115 | with open("data/large.cab", "rb") as f: 116 | old = f.read() 117 | arc = CabArchive() 118 | arc.parse(old) 119 | cff = arc.find_files("random.bin")[0] 120 | self.assertEqual(len(cff.buf), 0xFFFFF) 121 | self.assertEqual( 122 | hashlib.sha1(cff.buf).hexdigest(), 123 | "8497fe89c41871e3cbd7955e13321e056dfbd170", 124 | ) 125 | _check_range(arc.save(), old) 126 | 127 | def test_large_compressed(self): 128 | with open("data/large-compressed.cab", "rb") as f: 129 | old = f.read() 130 | arc = CabArchive() 131 | arc.parse(old) 132 | cff = arc.find_files("random.bin")[0] 133 | self.assertEqual(len(cff.buf), 0xFFFFF) 134 | self.assertEqual( 135 | hashlib.sha1(cff.buf).hexdigest(), 136 | "8497fe89c41871e3cbd7955e13321e056dfbd170", 137 | ) 138 | 139 | def test_multi_folder(self): 140 | # open a folder with multiple folders 141 | arc = CabArchive() 142 | with open("data/multi-folder.cab", "rb") as f: 143 | arc.parse(f.read()) 144 | self.assertEqual(len(arc), 2) 145 | cff = arc.find_file("*.txt") 146 | self.assertEqual(cff.buf, b"test123") 147 | 148 | def test_ddf_fixed(self): 149 | arc = CabArchive() 150 | with open("data/ddf-fixed.cab", "rb") as f: 151 | arc.parse(f.read()) 152 | self.assertEqual(len(arc), 2) 153 | cff = arc.find_file("*.txt") 154 | self.assertEqual(cff.buf, b"test123") 155 | 156 | def test_zdict(self): 157 | # parse multi folder compressed archive that saves zdict 158 | arc = CabArchive() 159 | with open("data/multi-folder-compressed.cab", "rb") as f: 160 | arc.parse(f.read()) 161 | cff = arc["test\\example.jpg"] 162 | self.assertEqual( 163 | hashlib.sha1(cff.buf).hexdigest(), 164 | "60880cf6f2a93616ba8d965bfbca72a56fb736bb", 165 | ) 166 | 167 | def test_create(self): 168 | # create new archive 169 | arc = CabArchive() 170 | arc.set_id = 0x0622 171 | 172 | # first example 173 | cff = CabFile() 174 | cff.buf = ( 175 | b"#include \r\n\r\nvoid main(void)\r\n" 176 | b'{\r\n printf("Hello, world!\\n");\r\n}\r\n' 177 | ) 178 | cff.date = datetime.date(1997, 3, 12) 179 | cff.time = datetime.time(11, 13, 52) 180 | cff.is_arch = True 181 | arc["hello.c"] = cff 182 | 183 | # second example 184 | cff = CabFile() 185 | cff.buf = ( 186 | b"#include \r\n\r\nvoid main(void)\r\n" 187 | b'{\r\n printf("Welcome!\\n");\r\n}\r\n\r\n' 188 | ) 189 | cff.date = datetime.date(1997, 3, 12) 190 | cff.time = datetime.time(11, 15, 14) 191 | cff.is_arch = True 192 | arc["welcome.c"] = cff 193 | 194 | # verify 195 | data = arc.save(False) 196 | with open("/tmp/test.cab", "wb") as f: 197 | f.write(data) 198 | expected = ( 199 | b"\x4D\x53\x43\x46\x00\x00\x00\x00\xFD\x00\x00\x00\x00\x00\x00\x00" 200 | b"\x2C\x00\x00\x00\x00\x00\x00\x00\x03\x01\x01\x00\x02\x00\x00\x00" 201 | b"\x22\x06\x00\x00\x5E\x00\x00\x00\x01\x00\x00\x00\x4D\x00\x00\x00" 202 | b"\x00\x00\x00\x00\x00\x00\x6C\x22\xBA\x59\x20\x00\x68\x65\x6C\x6C" 203 | b"\x6F\x2E\x63\x00\x4A\x00\x00\x00\x4D\x00\x00\x00\x00\x00\x6C\x22" 204 | b"\xE7\x59\x20\x00\x77\x65\x6C\x63\x6F\x6D\x65\x2E\x63\x00\xBD\x5A" 205 | b"\xA6\x30\x97\x00\x97\x00\x23\x69\x6E\x63\x6C\x75\x64\x65\x20\x3C" 206 | b"\x73\x74\x64\x69\x6F\x2E\x68\x3E\x0D\x0A\x0D\x0A\x76\x6F\x69\x64" 207 | b"\x20\x6D\x61\x69\x6E\x28\x76\x6F\x69\x64\x29\x0D\x0A\x7B\x0D\x0A" 208 | b"\x20\x20\x20\x20\x70\x72\x69\x6E\x74\x66\x28\x22\x48\x65\x6C\x6C" 209 | b"\x6F\x2C\x20\x77\x6F\x72\x6C\x64\x21\x5C\x6E\x22\x29\x3B\x0D\x0A" 210 | b"\x7D\x0D\x0A\x23\x69\x6E\x63\x6C\x75\x64\x65\x20\x3C\x73\x74\x64" 211 | b"\x69\x6F\x2E\x68\x3E\x0D\x0A\x0D\x0A\x76\x6F\x69\x64\x20\x6D\x61" 212 | b"\x69\x6E\x28\x76\x6F\x69\x64\x29\x0D\x0A\x7B\x0D\x0A\x20\x20\x20" 213 | b"\x20\x70\x72\x69\x6E\x74\x66\x28\x22\x57\x65\x6C\x63\x6F\x6D\x65" 214 | b"\x21\x5C\x6E\x22\x29\x3B\x0D\x0A\x7D\x0D\x0A\x0D\x0A" 215 | ) 216 | _check_range(data, expected) 217 | 218 | # use cabextract to test validity 219 | try: 220 | self.assertEqual( 221 | subprocess.call(["cabextract", "--test", "/tmp/test.cab"]), 0 222 | ) 223 | except FileNotFoundError as _: 224 | pass 225 | 226 | # check we can parse what we just created 227 | arc = CabArchive() 228 | with open("/tmp/test.cab", "rb") as f: 229 | arc.parse(f.read()) 230 | 231 | # add an extra file 232 | arc["test.inf"] = CabFile(b"$CHICAGO$") 233 | 234 | # save with compression 235 | with open("/tmp/test.cab", "wb") as f: 236 | f.write(arc.save(True)) 237 | 238 | # use cabextract to test validity 239 | try: 240 | self.assertEqual( 241 | subprocess.call(["cabextract", "--test", "/tmp/test.cab"]), 0 242 | ) 243 | except FileNotFoundError as _: 244 | pass 245 | 246 | 247 | if __name__ == "__main__": 248 | unittest.main() 249 | -------------------------------------------------------------------------------- /cabarchive/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (C) 2015-2020 Richard Hughes 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | # 8 | # pylint: disable=protected-access,too-few-public-methods 9 | 10 | import struct 11 | 12 | from typing import List 13 | 14 | FMT_CFHEADER = "<4sxxxxIxxxxIxxxxBBHHHHH" 15 | FMT_CFHEADER_RESERVE = " List[bytes]: 22 | """Split up a bytestream into chunks""" 23 | arrs = [] 24 | for i in range(0, len(arr), size): 25 | arrs.append(arr[i : i + size]) 26 | return arrs 27 | 28 | 29 | def _checksum_compute(buf: bytes, seed: int = 0) -> int: 30 | """Compute the MS cabinet checksum""" 31 | csum: int = seed 32 | for offset in range(0, len(buf), 4): 33 | try: 34 | (ul,) = struct.unpack_from(" 5 | # 6 | # SPDX-License-Identifier: LGPL-2.1+ 7 | # 8 | # pylint: disable=protected-access,too-few-public-methods 9 | 10 | from typing import List, TYPE_CHECKING 11 | import struct 12 | import zlib 13 | 14 | from cabarchive.file import CabFile 15 | from cabarchive.utils import ( 16 | FMT_CFHEADER, 17 | FMT_CFFOLDER, 18 | FMT_CFFILE, 19 | FMT_CFDATA, 20 | _chunkify, 21 | _checksum_compute, 22 | ) 23 | 24 | if TYPE_CHECKING: 25 | from cabarchive.archive import CabArchive 26 | 27 | 28 | class CabArchiveWriter: 29 | def __init__( 30 | self, cfarchive: "CabArchive", compress: bool = False, sort: bool = True 31 | ) -> None: 32 | self.cfarchive: "CabArchive" = cfarchive 33 | self.compress: bool = compress 34 | self.sort: bool = sort 35 | 36 | def write(self) -> bytes: 37 | # sort files before export 38 | cffiles: List[CabFile] = [] 39 | if self.sort: 40 | for fn in sorted(self.cfarchive.keys()): 41 | cffiles.append(self.cfarchive[fn]) 42 | else: 43 | cffiles.extend(self.cfarchive.values()) 44 | 45 | # create linear CFDATA block 46 | if len(cffiles) > 1: 47 | cfdata_linear = bytes() 48 | for f in cffiles: 49 | if f.buf: 50 | cfdata_linear += f.buf 51 | else: 52 | cfdata_linear = cffiles[0].buf or bytes() 53 | 54 | # _chunkify and compress with a fixed size 55 | chunks = _chunkify(cfdata_linear, 0x8000) 56 | if self.compress: 57 | chunks_zlib = [] 58 | for chunk in chunks: 59 | compressobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS) 60 | chunk_zlib = b"CK" 61 | chunk_zlib += compressobj.compress(chunk) 62 | chunk_zlib += compressobj.flush() 63 | chunks_zlib.append(chunk_zlib) 64 | else: 65 | chunks_zlib = chunks 66 | 67 | # create header 68 | archive_size = struct.calcsize(FMT_CFHEADER) 69 | archive_size += struct.calcsize(FMT_CFFOLDER) 70 | for f in cffiles: 71 | if not f._filename_win32: 72 | continue 73 | archive_size += ( 74 | struct.calcsize(FMT_CFFILE) + len(f._filename_win32.encode()) + 1 75 | ) 76 | for chunk in chunks_zlib: 77 | archive_size += struct.calcsize(FMT_CFDATA) + len(chunk) 78 | offset = struct.calcsize(FMT_CFHEADER) 79 | offset += struct.calcsize(FMT_CFFOLDER) 80 | data = struct.pack( 81 | FMT_CFHEADER, 82 | b"MSCF", # signature 83 | archive_size, # complete size 84 | offset, # offset to CFFILE 85 | 3, 86 | 1, # ver minor major 87 | 1, # no of CFFOLDERs 88 | len(self.cfarchive), # no of CFFILEs 89 | 0, # flags 90 | self.cfarchive.set_id, # setID 91 | 0, 92 | ) # cnt of cabs in set 93 | 94 | # create folder 95 | for f in cffiles: 96 | if not f._filename_win32: 97 | continue 98 | offset += struct.calcsize(FMT_CFFILE) 99 | offset += len(f._filename_win32.encode()) + 1 100 | data += struct.pack( 101 | FMT_CFFOLDER, 102 | offset, # offset to CFDATA 103 | min(len(chunks), 0xFFFF), # number of CFDATA blocks 104 | self.compress, 105 | ) # compression type 106 | 107 | # create each CFFILE 108 | index_into = 0 109 | for f in cffiles: 110 | if not f._filename_win32: 111 | continue 112 | data += struct.pack( 113 | FMT_CFFILE, 114 | len(f), # uncompressed size 115 | index_into, # uncompressed offset 116 | 0, # index into CFFOLDER 117 | f._date_encode(), # date 118 | f._time_encode(), # time 119 | f._attr_encode(), 120 | ) # attribs 121 | data += f._filename_win32.encode() + b"\0" 122 | index_into += len(f) 123 | 124 | # create each CFDATA 125 | for i in range(0, len(chunks)): 126 | chunk = chunks[i] 127 | chunk_zlib = chunks_zlib[i] 128 | 129 | # first do the 'checksum' on the data, then the partial 130 | # header. slightly crazy, but anyway 131 | checksum = _checksum_compute(chunk_zlib) 132 | hdr = struct.pack("