├── .gitignore ├── LICENSE ├── README.md ├── deptest ├── deptest.pdf ├── deptest.pdf.log └── deptest.psogood.pdf ├── docker ├── .dockerignore ├── Dockerfile └── build_docker.sh ├── docker_extraimgopt ├── .dockerignore ├── Dockerfile └── build_docker.sh ├── extra ├── debian │ ├── changelog │ ├── compat │ ├── control │ ├── copyright │ ├── dirs │ ├── rules │ ├── source │ │ └── format │ └── watch ├── dvipdfmx_fontfix.py ├── dvipdfmx_ndl14_extra.map ├── pdftex_ndl14_extra.map ├── pdftex_ndl14_extraflag.map ├── pts-graphics-helper.sty └── small.pdf ├── info.txt ├── latest_libexec.txt ├── lib ├── __init__.py └── pdfsizeopt │ ├── __init__.py │ ├── cff.py │ ├── float_util.py │ ├── main.py │ ├── pdfsizeopt_argparse.py │ ├── pdfsizeopt_argparse.version │ └── psproc.py ├── lint.sh ├── mksingle.py ├── pdfsizeopt ├── pdfsizeopt.single ├── pdfsizeopt_test.py ├── pts_pdfsizeopt2009 ├── README.txt ├── ltugbib.bst ├── ltugboat.cls ├── ltugproc.cls ├── pts_pdfsizeopt2009.bib └── pts_pdfsizeopt2009.tex ├── pts_pdfsizeopt2009_talk ├── compile.sh ├── pdfsizeopt_charts.key ├── pdfsizeopt_charts.pdf ├── pdfsizeopt_charts_ps2pdf.pdf └── pts_pdfsizeopt2009_talk.tex └── win32port ├── README.txt └── pdfsizeopt_winstub.c /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | Multivalent.jar 4 | MultivalentCompress.jar 5 | pdfsizeopt_libexec/ 6 | pdfsizeopt_libexec_linux-v2.tar.gz 7 | pdfsizeopt.exe 8 | pdfsizeopt_win32exec-v*.zip 9 | pdfsizeopt_win32exec/ 10 | docker/busybox 11 | docker/pdfsizeopt.single 12 | docker/pdfsizeopt_libexec_linux.tar.gz 13 | docker_extraimgopt/pdfsizeopt_libexec_extraimgopt_linux.tar.gz 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # README for pdfsizeopt 2 | 3 | pdfsizeopt is a program for converting large PDF files to small ones, 4 | without decreasing visual quality or removing interactive features (such as 5 | hyperlinks). More specifically, pdfsizeopt is a free, cross-platform 6 | command-line application (for Linux, Windows, macOS and Unix) and a 7 | collection of best practices to optimize the size of PDF files, with focus 8 | on PDFs created from TeX and LaTeX documents. pdfsizeopt is written in 9 | Python, so it is a bit slow, but it offloads some of the heavy work to its 10 | faster (C, C++ and Java) dependencies. 11 | 12 | Doesn't pdfsizeopt work with your PDF? 13 | Report the issue here: https://github.com/pts/pdfsizeopt/issues 14 | 15 | Send donations to the author of pdfsizeopt: 16 | https://flattr.com/submit/auto?user_id=pts&url=https://github.com/pts/pdfsizeopt 17 | 18 | ## Getting started: how to run pdfsizeopt 19 | 20 | If it is your first time trying pdfizeopt, follow these instructions. 21 | (This section was updated on 2023-02-15.) 22 | 23 | It's easy to install and run pdfsizeopt on modern Linux and Windows systems 24 | with an x86 processor. If you have such a system, jump directly to one of 25 | the following sections (*Installation instructions and usage on Linux* or 26 | *Installation instructions and usage on Windows*). It will take less than 27 | 5 minutes. 28 | 29 | It's easy to install and run pdfsizeopt on a Mac (both Intel x86 processors 30 | and ARM processors with Apple Silicon are supported). If you have such a 31 | system, jump directly to the section *Installation instructions and usage 32 | on macOS* (*not* using Docker). It will take less than 5 minutes. 33 | 34 | Alternatively (but not recommended because it's slower), it's possible to 35 | run pdfsizeopt within Docker on the following systems: Linux amd64, macOS 36 | 64-bit Intel x86 (amd64, x86_64), macOS 64-bit ARM (Apple Silicon, e.g. M1 37 | or M2 chip). After that, jump directly to the section *Installation 38 | instructions and usage with Docker on Linux and macOS*. That last step will 39 | take less than 5 minutes. 40 | 41 | If you are using an operating system other than Linux, Windows or macOS (on 42 | a computer with Intel processor), the easiest way to try pdfsizeopt is 43 | borrowing a friend's computer with Linux, Windows or macOS, or renting a 44 | Linux VM in the cloud. The reason why it's difficult to run pdfsizeopt on 45 | other kinds of systems is because pdfsizeopt has some required dependencies, 46 | some of them are old versions (e.g. Python 2.4--2.7, Ghostscript 9.05), so 47 | you'll have to compile the right versions of the dependencies first, which 48 | may take several hours and lots of frustrating trial-and-error even for 49 | experienced hackers. 50 | 51 | It's technically possible to port pdfsizeopt to other systems (and make it 52 | easy to install), but the author of pdfsizeopt doesn't have the free time to 53 | create and maintain such a port. As an FYI, see 54 | https://github.com/pts/pdfsizeopt/issues/154 about porting to Apple Silicon. 55 | 56 | ## Installation instructions and usage on Linux 57 | 58 | There is no installer, you need to run some commands in the command line to 59 | download and install. pdfsizeopt is a command-line only application, there 60 | is no GUI. 61 | 62 | To install pdfsizeopt on a Linux system (with architecture i386 or amd64), 63 | open a terminal window and run these commands (without the leading `$`): 64 | 65 | ``` 66 | $ mkdir ~/pdfsizeopt 67 | $ cd ~/pdfsizeopt 68 | $ wget -O pdfsizeopt_libexec_linux.tar.gz https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_libexec_linux-v9.tar.gz 69 | $ tar xzvf pdfsizeopt_libexec_linux.tar.gz 70 | $ rm -f pdfsizeopt_libexec_linux.tar.gz 71 | $ wget -O pdfsizeopt.single https://raw.githubusercontent.com/pts/pdfsizeopt/master/pdfsizeopt.single 72 | $ chmod +x pdfsizeopt.single 73 | $ ln -s pdfsizeopt.single pdfsizeopt 74 | ``` 75 | 76 | To optimize a PDF, run the following command: 77 | 78 | ``` 79 | ~/pdfsizeopt/pdfsizeopt input.pdf output.pdf 80 | ``` 81 | 82 | If the input PDF has many images or large images, pdfsizeopt can be very 83 | slow. You can speed it up by disabling pngout, the slowest image optimization 84 | method, like this: 85 | 86 | ``` 87 | ~/pdfsizeopt/pdfsizeopt --use-pngout=no input.pdf output.pdf 88 | ``` 89 | 90 | pdfsizeopt creates lots of temporary files (psotmp.*) in the output 91 | directory, but it also cleans up after itself. 92 | 93 | It's possible to optimize a PDF outside the current directory. To do that, 94 | specify the pathname (including the directory name) in the command-line. 95 | 96 | Please note that the commands above download all dependencies (including 97 | Python and Ghostscript) as well. It's possible to install some of the 98 | dependencies with your package manager, but these steps are considered 99 | alternative and more complicated, and thus are not covered here. 100 | 101 | Please note that pdfsizeopt works perfectly on any x86 and amd64 Linux 102 | system. There is no restriction on the libc, Linux distribution etc. because 103 | pdfsizeopt uses only its statically linked x86 executables, and it doesn't 104 | use any external commands (other than pdfsizeopt, pdfsizeopt.single and 105 | pdfsizeopt_libexec/*) on the system. pdfsizeopt also works perfectly on x86 106 | FreeBSD systems with the Linux emulation layer enabled. 107 | 108 | To avoid typing ~/pdfsizeopt/pdfsizeopt, add "$HOME/pdfsizeopt" to your PATH 109 | (probably in your ~/.bashrc), open a new terminal window, and the 110 | command pdfsizeopt will work from any directory. 111 | 112 | You can also put pdfsizeopt to a directory other than ~/pdfsizeopt , as you 113 | like. 114 | 115 | Additionally, you can install some extra image imptimizers (see more in the 116 | *Image optimizers* section below): 117 | 118 | ``` 119 | $ cd ~/pdfsizeopt 120 | $ wget -O pdfsizeopt_libexec_extraimgopt_linux-v3.tar.gz https://github.com/pts/pdfsizeopt/releases/download/2017-01-24/pdfsizeopt_libexec_extraimgopt_linux-v3.tar.gz 121 | $ tar xzvf pdfsizeopt_libexec_extraimgopt_linux-v3.tar.gz 122 | $ rm -f pdfsizeopt_libexec_extraimgopt_linux-v3.tar.gz 123 | ``` 124 | 125 | ## Installation instructions and usage on Windows 126 | 127 | There is no installer, you need to run some commands in the command line 128 | (black Command Prompt window) to download and install. pdfsizeopt is a 129 | command-line only application, there is no GUI. 130 | 131 | Create folder C:\pdfsizeopt, download 132 | https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_win32exec-v9.zip 133 | , and extract its contents to the folder C:\pdfsizeopt, so that the file 134 | C:\pdfsizeopt\pdfsizeopt.exe exists. 135 | 136 | Download 137 | https://raw.githubusercontent.com/pts/pdfsizeopt/master/pdfsizeopt.single 138 | and save it to C:\pdfsizeopt, as C:\pdfsizeopt\pdfsizeopt.single . 139 | 140 | To optimize a PDF, run the following command: 141 | 142 | ``` 143 | C:\pdfsizeopt\pdfsizeopt input.pdf output.pdf 144 | ``` 145 | 146 | in the command line, which is a black Command Prompt window, you can start 147 | it by Start menu / Run / cmd.exe, or finding Command Prompt in the start 148 | menu. 149 | 150 | (Press Tab to get filename completion while typing.) 151 | 152 | Since you have to type the input filename as a full pathname, it's 153 | recommended to create a directory with a short name (e.g. C:\pdfs), and copy 154 | the input PDF there first. 155 | 156 | If the input PDF has many images or large images, pdfsizeopt can be very 157 | slow. You can speed it up by disabling pngout, the slowest image optimization 158 | method, like this: 159 | 160 | ``` 161 | C:\pdfsizeopt\pdfsizeopt --use-pngout=no input.pdf output.pdf 162 | ``` 163 | 164 | To avoid typing C:\pdfsizeopt\pdfsizeopt, add C:\pdfsizeopt to (the end of) 165 | the system PATH, open a new Command Prompt window, and the command 166 | `pdfsizeopt` will work from any directory. 167 | 168 | Depending on your environment, filenames with 169 | accented characters may not work in the Windows version of pdfsizeopt. To 170 | play it safe, make sure your input and output files have names with letters, 171 | numbers, underscore (_), dash (-), dot (.) and plus (+). The backslash (\) 172 | and the slash (/) are both OK as the directory separator. 173 | 174 | Spaces in filenames and pathnames should work, but you need to put double 175 | quotes (") around the name. 176 | 177 | Filenames with some punctuation characters (such as double quote ("), 178 | question mark (?) and asterisk (*)) and nonprintable characters (such as 179 | newline) will not work on Windows. This is because Windows doesn't support 180 | these characters ([\x00..\x1f\"*:<>?|\x7f] in filenames at all, and it uses 181 | / and \\ as directory separator. 182 | 183 | You can also put pdfsizeopt to a directory other than C:\pdfsizeopt , but it 184 | won't work if there is whitespace or there are accented characters in any of 185 | the folder names. 186 | 187 | Please note that pdfsizeopt works perfectly in Wine (tested with wine-1.2 on 188 | Ubuntu Lucid and wine-1.6.2 on Ubuntu Trusty), but it's a bit slower than 189 | running it natively (as a Linux or Unix program). 190 | 191 | ## Installation instructions and usage with Docker on Linux and macOS 192 | 193 | These instructions work on the following systems: Linux amd64, macOS 64-bit 194 | Intel x86 (amd64, x86_64), macOS 64-bit ARM (Apple Silicon, e.g. M1 or M2 195 | chip). The version of Linux or macOS doesn't matter (old systems such as 196 | macOS Leopard 10.5 also work), as long as it has Docker installed and 197 | working. 198 | 199 | The programs in the Docker image ptspts/pdfsizeopt are compiled for Linux 200 | i386 (32-bit Intel x86), and these binaries happen to work in all platforms 201 | mentioned above, even with Apple Silicon. (Tested on 2023-02-21.) 202 | 203 | There is no installer, you need to run some commands in the command line to 204 | download and install. pdfsizeopt is a command-line only application, there 205 | is no GUI. 206 | 207 | First, check that you have Docker installed properly by running this 208 | command and checking for the OK at the end: 209 | 210 | ``` 211 | docker version && echo OK 212 | ``` 213 | 214 | If you don't get OK, because the `docker' command was not found, then Docker 215 | is not installed to your computer. Installation instructions (on 2023-02-22): 216 | 217 | * To install Docker on Linux, you have two options: Docker Engine 218 | (https://docs.docker.com/engine/install/ , within the Server section) or 219 | Docker Desktop (https://docs.docker.com/desktop/install/linux-install/). Any 220 | of them would work. 221 | 222 | * To install Docker on macOS, install Docker Desktop 223 | (https://docs.docker.com/desktop/install/mac-install/). 224 | 225 | Then (on macOS), add the `docker` command to your PATH by running the 226 | following command (copy-paste it, don't type, to avoid typos): 227 | 228 | ``` 229 | (echo; echo 'export PATH="/Applications/Docker.app/Contents/Resources/bin:$PATH"') >>~/.profile 230 | ``` 231 | 232 | Then (on macOS), close the Terminal app, and open it again (so that 233 | changes to ~/.profile take effect). 234 | 235 | * After the installation, retry the `docker version` command above. 236 | 237 | Remove any previous Docker images of pdfsizeopt: 238 | 239 | ``` 240 | docker image rm ptspts/pdfsizeopt 241 | ``` 242 | 243 | Do a test optimization run, which exercises all dependencies of pdfsizeopt: 244 | 245 | ``` 246 | curl -L -o deptest.pdf https://github.com/pts/pdfsizeopt/raw/master/deptest/deptest.pdf 247 | docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt pdfsizeopt deptest.pdf 248 | ``` 249 | 250 | If you get a (harmless) warning message like 251 | 252 | ``` 253 | WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested 254 | ``` 255 | 256 | , and you don't want to get it again, then add `--platform linux/amd64` after the `-it`: 257 | 258 | ``` 259 | docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it --platform linux/amd64 ptspts/pdfsizeopt pdfsizeopt deptest.pdf 260 | ``` 261 | 262 | To optimize a PDF, run this command: 263 | 264 | ``` 265 | docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt pdfsizeopt input.pdf output.pdf 266 | ``` 267 | 268 | If the input PDF has many images or large images, pdfsizeopt can be very 269 | slow. You can speed it up by disabling pngout, the slowest image optimization 270 | method, like this: 271 | 272 | ``` 273 | docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt pdfsizeopt --use-pngout=no input.pdf output.pdf 274 | ``` 275 | 276 | pdfsizeopt creates lots of temporary files (psotmp.*) in the output 277 | directory, but it also cleans up after itself. 278 | 279 | It's possible to optimize a PDF outside the current directory. To do that, 280 | specify the pathname (including the directory name) in the command-line. 281 | 282 | To avoid typing a long command, run 283 | 284 | ``` 285 | (echo '#! /bin/sh'; echo 'exec docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt pdfsizeopt "$@"') >pdfsizeopt && chmod 755 pdfsizeopt 286 | ``` 287 | 288 | , and then copy the pdfsizeopt script to your PATH, then open a new terminal 289 | window, and now this command will also work to optimize a PDF: 290 | 291 | ``` 292 | pdfsizeopt input.pdf output.pdf 293 | ``` 294 | 295 | Please note that the ptspts/pdfsizeopt Docker image is updated very rarely. 296 | To use a more up-to-date version of pdfsizeopt, run these commands to download: 297 | 298 | ``` 299 | curl -L -o pdfsizeopt.single https://raw.githubusercontent.com/pts/pdfsizeopt/master/pdfsizeopt.single 300 | chmod +x pdfsizeopt.single 301 | ``` 302 | 303 | Then run this command to optimize a PDF: 304 | 305 | ``` 306 | docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt ./pdfsizeopt.single --use-pngout=no input.pdf output.pdf 307 | ``` 308 | 309 | If you want to have extra image optimizers included on Linux, use 310 | ptspts/pdfsizeopt-with-extraimgopt instead of ptspts/pdfsizeopt in the 311 | commands above. Example: 312 | 313 | ``` 314 | docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt-with-extraimgopt pdfsizeopt --use-image-optimizer=sam2p,jbig2,pngout,zopflipng,optipng,advpng,ECT input.pdf output.pdf 315 | ``` 316 | 317 | ## Installation instructions and usage on macOS 318 | 319 | These instructions work on Macs with macOS Catalina 10.15 (and even 320 | older, maybe macOS Snow Leopard 10.6) -- macOS Ventura 13 (and even newer), 321 | having a 64-bit ARM processor (Apple Silicon) or a 64-bit Intel x86 322 | (x86_64, amd64) processor. The programs are compiled for 64-bit Intel x86 323 | processors, and they work on 64-bit ARM processors as well, using the 324 | Rosetta 2 emulation in macOS. These instructions were tested and known to 325 | work on macOS Ventura 13.3, both with 64-bit Intel x86 (x86_64, amd64) 326 | processor and Apple Silicon (ARM processor). 327 | 328 | If you have an older Mac running Mac OS X Leopard 10.5 -- macOS Mojave 329 | 10.14, follow the section *Installation instructions and usage on older 330 | macOS* instead. 331 | 332 | These instructions are not tested yet. See 333 | https://github.com/pts/pdfsizeopt/issues/154 for progress updates. 334 | 335 | There is no installer, you need to run some commands in the command line to 336 | download and install. pdfsizeopt is a command-line only application, there 337 | is no GUI. 338 | 339 | To install pdfsizeopt on a macOS system, open a terminal window and run 340 | these commands (without the leading `$`): 341 | 342 | ``` 343 | $ mkdir ~/pdfsizeopt 344 | $ cd ~/pdfsizeopt 345 | $ curl -L -o pdfsizeopt_libexec_darwin.tar.gz https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_libexec_darwinc64-v9.tar.gz 346 | $ tar xzvf pdfsizeopt_libexec_darwin.tar.gz 347 | $ rm -f pdfsizeopt_libexec_darwin.tar.gz 348 | $ curl -L -o pdfsizeopt.single https://raw.githubusercontent.com/pts/pdfsizeopt/master/pdfsizeopt.single 349 | $ chmod +x pdfsizeopt.single 350 | $ ln -s pdfsizeopt.single pdfsizeopt 351 | ``` 352 | 353 | Do a test optimization run, which exercises all dependencies of pdfsizeopt: 354 | 355 | ``` 356 | $ curl -L -o deptest.pdf https://github.com/pts/pdfsizeopt/raw/master/deptest/deptest.pdf 357 | $ ~/pdfsizeopt/pdfsizeopt deptest.pdf 358 | ``` 359 | 360 | ... and open (view) deptest.pdf and the corresponding optimized 361 | deptest.pso.pdf . 362 | 363 | To optimize a PDF, run the following command: 364 | 365 | ``` 366 | ~/pdfsizeopt/pdfsizeopt input.pdf output.pdf 367 | ``` 368 | 369 | If the input PDF has many images or large images, pdfsizeopt can be very 370 | slow. You can speed it up by disabling pngout, the slowest image optimization 371 | method, like this: 372 | 373 | ``` 374 | ~/pdfsizeopt/pdfsizeopt --use-pngout=no input.pdf output.pdf 375 | ``` 376 | 377 | Also, if you have an 32-bit Mac, then the pngout bundled with pdfsizeopt 378 | won't work (because it needs a 64-bit Mac), so you have to force 379 | --use-pngout=no . See the section *Image optimizers* for alternatives of 380 | pngout. 381 | 382 | pdfsizeopt creates lots of temporary files (psotmp.*) in the output 383 | directory, but it also cleans up after itself. 384 | 385 | It's possible to optimize a PDF outside the current directory. To do that, 386 | specify the pathname (including the directory name) in the command-line. 387 | 388 | Please note that the commands above download most dependencies (including 389 | Ghostscript, but excluding Python) as well. Everything should work as 390 | instructed above, out of the box. If you are experiencing problems, please 391 | report an issue on https://github.com/pts/pdfsizeopt/issues . 392 | 393 | To avoid typing ~/pdfsizeopt/pdfsizeopt, add "$HOME/pdfsizeopt" to your PATH 394 | (probably in your ~/.bashrc), open a new terminal window, and the 395 | command pdfsizeopt will work from any directory. 396 | 397 | You can also put pdfsizeopt to a directory other than ~/pdfsizeopt , as you 398 | like. 399 | 400 | ## Installation instructions and usage on older macOS 401 | 402 | These instructions should work on older Macs running Mac OS X Leopard 10.5 403 | -- macOS Mojave 10.14, and having a 32-bit or 64-bit Intel x86 processor. The 404 | programs are compiled for 32-bit Intel x86 (i386) processor (and also work 405 | on a 64-bit Intel processor with macOS Mojave 10.14 or earlier), except for 406 | the pngout tool, which needs at least Mac OS X Snow Leopard 10.6 and a 407 | 64-bit Intel processor. 408 | 409 | There is no installer, you need to run some commands in the command line to 410 | download and install. pdfsizeopt is a command-line only application, there 411 | is no GUI. 412 | 413 | To install pdfsizeopt on an older macOS system, open a terminal window and 414 | run these commands (without the leading `$`): 415 | 416 | ``` 417 | $ mkdir ~/pdfsizeopt 418 | $ cd ~/pdfsizeopt 419 | $ curl -L -o pdfsizeopt_libexec_darwin.tar.gz https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_libexec_darwin-v9.tar.gz 420 | $ tar xzvf pdfsizeopt_libexec_darwin.tar.gz 421 | $ rm -f pdfsizeopt_libexec_darwin.tar.gz 422 | $ curl -L -o pdfsizeopt.single https://raw.githubusercontent.com/pts/pdfsizeopt/master/pdfsizeopt.single 423 | $ chmod +x pdfsizeopt.single 424 | $ ln -s pdfsizeopt.single pdfsizeopt 425 | ``` 426 | 427 | Do a test optimization run, which exercises all dependencies of pdfsizeopt: 428 | 429 | ``` 430 | $ curl -L -o deptest.pdf https://github.com/pts/pdfsizeopt/raw/master/deptest/deptest.pdf 431 | $ ~/pdfsizeopt/pdfsizeopt deptest.pdf 432 | ``` 433 | 434 | ... and open (view) deptest.pdf and the corresponding optimized 435 | deptest.pso.pdf . 436 | 437 | To optimize a PDF, run the following command: 438 | 439 | ``` 440 | ~/pdfsizeopt/pdfsizeopt input.pdf output.pdf 441 | ``` 442 | 443 | If the input PDF has many images or large images, pdfsizeopt can be very 444 | slow. You can speed it up by disabling pngout, the slowest image optimization 445 | method, like this: 446 | 447 | ``` 448 | ~/pdfsizeopt/pdfsizeopt --use-pngout=no input.pdf output.pdf 449 | ``` 450 | 451 | Also, if you have a Mac with a 32-bit Intel x86 processor, then the pngout 452 | bundled with pdfsizeopt won't work (because it needs a 64-bit processor), so 453 | you have to force --use-pngout=no . See the section *Image optimizers* for 454 | alternatives of pngout. 455 | 456 | pdfsizeopt creates lots of temporary files (psotmp.*) in the output 457 | directory, but it also cleans up after itself. 458 | 459 | It's possible to optimize a PDF outside the current directory. To do that, 460 | specify the pathname (including the directory name) in the command-line. 461 | 462 | Please note that the commands above download most dependencies (including 463 | Ghostscript, but excluding Python) as well. Everything should work as 464 | instructed above, out of the box. If you are experiencing problems, please 465 | report an issue on https://github.com/pts/pdfsizeopt/issues . 466 | 467 | To avoid typing ~/pdfsizeopt/pdfsizeopt, add "$HOME/pdfsizeopt" to your PATH 468 | (probably in your ~/.bashrc), open a new terminal window, and the 469 | command pdfsizeopt will work from any directory. 470 | 471 | You can also put pdfsizeopt to a directory other than ~/pdfsizeopt , as you 472 | like. 473 | 474 | ## Installation instructions and usage on FreeBSD 475 | 476 | There is no installer, you need to run some commands in the command line to 477 | download and install. pdfsizeopt is a command-line only application, there 478 | is no GUI. 479 | 480 | pdfsizeopt works perfectly on x86 FreeBSD systems with the Linux 481 | emulation layer enabled. So, enable the Linux emulation layer on your 482 | FreeBSD system, and then follow the 483 | *Installation instructions and usage on Linux*. 484 | 485 | Alterantively, you can follow the 486 | *Installation instructions and usage on generic Unix*, but that needs much 487 | more work on your part (and it's inconvenient and error-prone), because you 488 | need to install many dependencies separately, possibly compiling some of 489 | them from source. 490 | 491 | ## Installation instructions and usage on generic Unix 492 | 493 | Doing this is increasingly hard in 2023, because pdfsizeopt needs Python 494 | 2.4--2.7 and Ghostscript 9.05, both very old, and thus hard to install to a 495 | modern system. 496 | 497 | There is no installer, you need to run some commands in the command line 498 | (black Command Prompt window) to download and install. pdfsizeopt is a 499 | command-line only application, there is no GUI. 500 | 501 | pdfizeopt is a Python script. It works with Python 2.4, 2.5, 2.6 and 2.7 502 | (but it doesn't work with Python 3.x). So please install Python first. 503 | 504 | Create a new directory named pdfsizeopt, and download this link there: 505 | https://raw.githubusercontent.com/pts/pdfsizeopt/master/pdfsizeopt.single 506 | 507 | Rename it to pdfsizeopt and make it executable by running the following 508 | commands (without the leading `$`): 509 | 510 | ``` 511 | $ cd pdfsizeopt 512 | $ mv pdfsizeopt.single pdfsizeopt 513 | $ chmod +x pdfsizeopt 514 | ``` 515 | 516 | If your Python executable is not /usr/bin/python, then edit the first line 517 | (starting with `#!`) in the pdfsizeopt script accordingly. 518 | 519 | Try it with: 520 | 521 | ``` 522 | $ ./pdfsizeopt --version 523 | info: This is pdfsizeopt ZIP rUNKNOWN size=105366. 524 | ``` 525 | 526 | pdfsizeopt has many dependencies. For full functionality, you need all of 527 | them. Install as many as you can, and put them to the PATH. 528 | 529 | Dependencies: 530 | 531 | * Python (command: python). Version 2.4, 2.5, 2.6 and 2.7 work (3.x doesn't 532 | work). 533 | * Ghostscript (command: gs): Version 9.05 is recommended, 8.50 should also 534 | work, and some early 9.x versions such as 9.14.1 also work. The most 535 | recent versions don't work, especially for font optimization. 536 | * jbig2 (command: jbig2): Install from source: 537 | https://github.com/pts/pdfsizeopt-jbig2 538 | If you are unable to install, use pdfsizeopt --use-jbig2=no . 539 | * pngout (command: pngout): Download binaries from here: 540 | http://www.jonof.id.au/kenutils Source code is not available. 541 | If you are unable to install, use pdfsizeopt --use-pngout=no . 542 | * imgdataopt (command: imgdataopt): Install from source: 543 | https://github.com/pts/imgdataopt 544 | To make pdfsizeopt able to use it, copy the `imgdataopt` program file as `sam2p` 545 | (e.g. /usr/local/bin/sam2p) to your PATH. 546 | If you are unable to install it, use pdfsizeopt --do-optimize-images=no . 547 | Some Linux distributions have sam2p binaries, but they tend to be too old. 548 | Alternatively, sam2p >=0.49.3 + png22pnm also works instead of imgdataopt, 549 | but imgdataopt is easier to install. 550 | * The Multivalent PDF compressor (written in Java) is an optional dependency 551 | of pdfsizeopt, turned off by default. Don't bother installing it. 552 | 553 | After installation, use pdfsizeopt as: 554 | 555 | ``` 556 | $ ./pdfsizeopt input.pdf output.pdf 557 | ``` 558 | 559 | You can add the directory containing pdfsizeopt to the PATH, so the 560 | command `pdfsizeopt` will work from any directory. 561 | 562 | ## Image optimizers 563 | 564 | pdfsizeopt can use the following external tools to make images in embedded 565 | PDF files smaller: 566 | 567 | * sam2p (used by default, cannot be disabled) 568 | * jbig2 (used by default, disable with --use-jbgi2=no) 569 | * pngout (used by default, disable with --use-pngout=no) 570 | * zopflipng (not enabled by default) 571 | * optipng (not enabled by default) 572 | * advpng (not enabled by default) 573 | * ECT (not enabled by default) 574 | 575 | To enable or disable any image optimizer, specify all image optimizers you 576 | want to be enabled like this: --use-image-optimizer=optipng,jbig2 . This 577 | will also disable the default pngout. 578 | 579 | You can also specify custom image optimizer command patterns by specifying 580 | separate, additional --use-image-optimier= flags, like this: 581 | 582 | ``` 583 | --use-image-optimizer="optipng %(sourcefnq)s -o6 -fix -force %(optipng_gray_flags)s-out %(targetfnq)s" 584 | ``` 585 | 586 | You always have to specify %(targetfnq) in the command pattern. 587 | 588 | Specify --do-debug-image-optimizers=yes to see which image optimizers are 589 | enabled (and their full command-line) for the current run. 590 | 591 | At startup, pdfsizeopt checks that the requested image optimizers are 592 | available (as program files), and fails if some of them are missing. To 593 | ignore those which are missing, specify --do-require-image-optimizers=no . 594 | 595 | It's your (the user's) responsibility to install the image optimizers and 596 | add them to the PATH. If you follow the installation instructions for 597 | Windows and Linux above, the default image optimizers (sam2p, jbig2 and 598 | pngout) will be installed for you. For Linux, there are also installation 599 | instructions above for extra image optimizers (zopflipng, optipng, advpng 600 | and ECT). 601 | 602 | ## Troubleshooting 603 | 604 | ### 1. pdfsizeopt fails for some fonts. 605 | 606 | Specify --do-unify-fonts=no and --do-regenerate-all-fonts=no . 607 | 608 | If it still fails, specify --do-optimize-fonts=no . 609 | 610 | In either case, please report it on https://github.com/pts/pdfsizeopt/issues 611 | 612 | ### 2. pdfsizeopt fails for some images. 613 | 614 | Specify --do-optimize-images=no . 615 | 616 | Please report it on https://github.com/pts/pdfsizeopt/issues 617 | 618 | ### 3. pdfsizeopt is too slow processing images. 619 | 620 | Specify --use-pngout=no . This disables pngout, which is the slowest 621 | optimization step for images. 622 | 623 | ### 4. pdfsizeopt fails without creating the output PDF. 624 | 625 | Please report it on https://github.com/pts/pdfsizeopt/issues , attaching the 626 | input PDF file and the console output of pdfsizeopt. Your report is very 627 | much appreciated. 628 | 629 | If pdfsizeopt exits with an uncaught exception, it may leave some temporary 630 | files (psotmp.*) behind in the current directory. You can remove these files. 631 | 632 | Please note that pdfsizeopt is not resilient in processing corrupt PDF 633 | files (i.e. those which are not compliant to the PDF standard). So if 634 | pdfsizeopt fails, then the reason may be a bug in pdfsizeopt or a corrupt 635 | PDF input file. Nevertheless, please report an issue (see above). 636 | 637 | ### 5. The output PDF of pdfsizeopt doesn't look like the same as the input PDF. 638 | 639 | Please report it on https://github.com/pts/pdfsizeopt/issues , attaching the 640 | input PDF file and the output PDF file (.pso.pdf) and the console output of 641 | pdfsizeopt. Your report is very much appreciated. 642 | 643 | ### 6. pdfsizeopt is unable to find some input files on Windows. 644 | 645 | This may happen if the filename or the full pathname contains any character 646 | other than the ASCII letters (a-z and A-Z), digits (0-9), underscore (_), 647 | ASCII dash (-), plus (+), dot (.), backslash (\) or slash (/). Typically 648 | these characters don't work: 649 | 650 | * spaces and tabs: This is easy to fix, just wrap the filename in double 651 | quotes ("), the usual way. 652 | * double quotes ("): This can't happen, filenames on Windows are not allowed 653 | to contain double quotes. If you need to pass a non-filename argument with 654 | a double quote in it to pdfsizeopt, do this. Wrap the argument in double 655 | quotes ("), replace all double quotes (") with \", and (in parallel to the 656 | previous replacement) replace a sequence 657 | backslashes (\) and an double quote (") immediately following them by 658 | duplicating the backslashes and replacing the double quote (") with \". 659 | This sounds complicated, but this is the usual way for other programs as 660 | well, see https://stackoverflow.com/a/4094897/97248 . 661 | * newlines and other non-space whitespace: This won't 662 | work, the Windows Command Prompt (cmd.exe) doesn't allow these characters in 663 | command-line arguments. Also Windows doesn't allow them in filenames. 664 | * accented characters (such as á and ő). These characters won't work (or it 665 | may work for only some characters, depending on the active code page) in 666 | the PDF filename specified in the commandline, or in the full pathname of 667 | pdfsizeopt (so don't install pdfsizeopt to C:\bőr, it won't work). 668 | 669 | Accented characters (outside the active code page) will not work in the 670 | full pathname of pdfsizeopt (such as C:\bőr\pdfsizeopt.exe). That's 671 | because Python is unable to call external programs (os.system, os.popen, 672 | os.spawnl and subprocess.call) with accented characters in their name, 673 | because it uses the single-byte API. 674 | 675 | * anything which is not ASCII printable (code between 33 and 126, 676 | inclusive): If not covered above, this may not work. See the description 677 | of accented characters. 678 | 679 | If some filenames still don't work, the workarounds are: 680 | 681 | * renaming or copying the file (and folders) in Windows Explorer, and passing 682 | the renamed file to pdfsizeopt 683 | * using pdfsizeopt on a Unix system (e.g. Linux, FreeBSD, macOS) instead 684 | 685 | Accented characters in PDF filename could be made work the following way (as 686 | a future improvement work to pdfsizeopt): 687 | 688 | * pdfsizeopt.exe should call the 16-bit API (GetCommandLineW) instead of 689 | the single-byte API (GetCommandLineA) to get the arguments 690 | * pdfsizeopt.exe should escape the non-ASCII characters in the arguments 691 | (e.g. as U+12AB) 692 | * pdfsizeopt.exe should run pdfsizeopt.single like this: 693 | 694 | .../pdfsizeopt_win32exec/pdfsizeopt_python.exe .../pdfsizeopt.single --args-u+ ... 695 | 696 | * pdfsizeopt Python code should recognize --args-u+, and when finding the 697 | filename, it should convert it to unicode (by keeping ASCII except for 698 | U+12AB), and it should pass tha unicode-typed value to open(...). Such an 699 | open(...) works in Python 2.6 on Windows. 700 | * When displaying filenames, pdfsizeopt Python code should still display the 701 | ASCII with the U+12AB escaping. Thus the win32console module is not 702 | needed. Thus filenames will be displayed leglibly but incorrectly (not 703 | copy-pasteably) in the Command Prompt window. 704 | 705 | * No escaping is needed in command lines of helper programs (e.g. gs, 706 | sam2p), because it's all ASCII, because filenames are autogenerated 707 | temporary fil names, which are all ASCII, and path to pdfsizeopt itself 708 | is required to the ASCII. 709 | 710 | Accented characters in the pathname of pdfsizeopt.single can be made work 711 | this way (as a future improvement work to pdfsizeopt): 712 | 713 | * Do the accented characters in the filename above first. 714 | * pdfsizeopt.exe should use wgetcwd to get the current directory. 715 | * pdfsizeopt.exe should use wchdir to change to the directory of 716 | pdfsizeopt.single . 717 | * pdfsizeopt.exe should prepend the directories pdfsizeopt_win32exec and 718 | pdfsizeopt_win32exec/pdfsizeopt_gswin to the PATH, using wputenv. 719 | * pdfsizeopt.exe should run pdfsizeopt.single like this: 720 | 721 | ``` 722 | pdfsizeopt_python.exe pdfsizeopt.single --args-u+ --cwd=... ... 723 | ``` 724 | 725 | , where the value of --cwd= is the escaped (U+12AB) version of the 726 | result of wgetcwd. 727 | 728 | * pdfsizeopt Python code should prepend the value of --cwd=... to the input 729 | filename if it's relative. 730 | * pdfsizeopt Python code shouldn't modify the PATH if --cwd=... is present. 731 | (Does this environment variable propagation work in Python 2.6.? Let's try!) 732 | * It's still true that 733 | no escaping is needed in command lines of external programs (e.g. gs, 734 | sam2p), because it's all ASCII, because temporary file names are all ASCII, 735 | and path to pdfsizeopt itself is required to the ASCII. Escaping is needed 736 | if the pathname of the temporary directory (TEMP variable) needs escaping. 737 | 738 | ### 7. Error on Windows: The application failed to initialize properly (0xc0000034). Click on OK to terminate the application. 739 | 740 | This error has happened on a Windows XP system. The solution: download 741 | msvcr90.dll (or find it somewhere already on your system), and copy it into 742 | pdfsizeopt_win32exec (next to python26.dll). Any version of msvcr90.dll will 743 | work: 744 | 745 | * msvcr90.dll 9.0.21022.8 (655872 bytes) 746 | * msvcr90.dll 9.0.30729.6161 (653136 bytes) 747 | * msvcr90.dll 9.0.30729.9247 (653968 bytes) 748 | 749 | ### 8. Error on Windows: The system cannot execute the specified command. 750 | 751 | This error has happened on a Windows XP system when the file 752 | Microsoft.VC90.CRT.manifest was missing from the pdfsizeopt_win32exec 753 | directory. The solution: reinstall pdfsieopt, the directory 754 | pdfsizeopt_win32exec in the newest version has that file. 755 | 756 | ### 9. Ghostscript errors with Type1CParser and Type1CConverter 757 | 758 | Please install pdfsizeopt by following the installation instructions on 759 | https://github.com/pts/pdfsizeopt . By doing so, pdfsizeopt will use 760 | Ghostscript 9.05 bundled with it, and it will work. 761 | 762 | ## More documentation 763 | 764 | * https://github.com/pts/pdfsizeopt/releases/download/docs-v1/pts_pdfsizeopt2009.psom.pdf 765 | White paper on EuroTex 2009. 766 | * https://github.com/pts/pdfsizeopt/releases/download/docs-v1/pts_pdfsizeopt2009_talk.psom.pdf 767 | Conference talk slides on EuroTex 2009. 768 | 769 | 770 | -------------------------------------------------------------------------------- /deptest/deptest.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/deptest/deptest.pdf -------------------------------------------------------------------------------- /deptest/deptest.pdf.log: -------------------------------------------------------------------------------- 1 | deptest.pdf.log conveniently uses most external dependencies of pdfsizeopt: 2 | 3 | * Ghostscript with Type1CConverter 4 | * Ghostscript with Type1CParser 5 | * Ghostscript with Type1CGenerator 6 | * Ghostscript with ImageRenderer 7 | * Ghostscript for stream decompression 8 | * sam2p for image compression 9 | * png22pnm for PDF decompression by sam2p 10 | * jbig2 for image compression 11 | * pngout for image compression 12 | 13 | Not used here: 14 | 15 | * qpdf for decrypting 16 | * Multivalent for additional compression 17 | 18 | Log generated with: 19 | 20 | $ dfsizeopt deptest.pdf >>deptest.pdf.log 2>&1 21 | 22 | info: This is pdfsizeopt rUNKNOWN size=357293. 23 | info: prepending to PATH: ../pdfsizeopt_libexec 24 | info: loading PDF from: deptest.pdf 25 | info: loaded PDF of 36080 bytes 26 | info: separated to 25 objs + xref + trailer 27 | info: found 2 Type1 fonts loaded 28 | info: writing Type1CConverter (31666 font bytes) to: psotmp.6460.conv.tmp.ps 29 | info: using Ghostscript ../pdfsizeopt_libexec/gs: GPL Ghostscript 9.05 (2012-02-08) 30 | info: executing Type1CConverter with Ghostscript: gs -q -P- -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dPDFSETTINGS=/printer -dColorConversionStrategy=/LeaveColorUnchanged -sOutputFile=psotmp.6460.conv.tmp.pdf -f psotmp.6460.conv.tmp.ps 31 | Type1CConverter: using interpreter GPL Ghostscript 905 20120208 32 | Type1CConverter: converting font /JVWJQI+LMRoman10-Regular to /Obj0000000010 33 | Type1CConverter: converting font /MFCZJC+LMRoman10-Regular to /Obj0000000022 34 | Type1CConverter: all OK 35 | info: loading PDF from: psotmp.6460.conv.tmp.pdf 36 | info: loaded PDF of 4387 bytes 37 | info: separated to 16 objs + xref + trailer 38 | info: found 2 fonts in GS output 39 | info: optimized total Type1 font size 31658 to Type1C font size 902 (3%) 40 | info: optimized Type1 font XObject 10,13: new size=658 (4%) 41 | info: optimized Type1 font XObject 22,25: new size=708 (4%) 42 | info: found 2 Type1C fonts loaded 43 | info: writing Type1CParser (891 font bytes) to: psotmp.6460.conv.parse.tmp.ps 44 | info: executing Type1CParser with Ghostscript: gs -q -P- -dNOPAUSE -dBATCH -sDEVICE=nullpage -sDataFile=psotmp.6460.conv.parsedata.tmp.ps -f psotmp.6460.conv.parse.tmp.ps 45 | Type1CParser: using interpreter GPL Ghostscript 905 20120208 46 | Type1CParser: all OK 47 | info: parsed 2 Type1C fonts 48 | info: merged fonts ['/JVWJQI+LMRoman10-Regular', '/MFCZJC+LMRoman10-Regular'], reduced char count from 4 to 3 (75%) 49 | info: writing Type1CGenerator (1448 bytes in 1 fonts) to: psotmp.6460.conv.gen.tmp.ps 50 | info: executing Type1CGenerator with Ghostscript: gs -q -P- -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -dPDFSETTINGS=/printer -dColorConversionStrategy=/LeaveColorUnchanged -sOutputFile=psotmp.6460.conv.gen.tmp.pdf -f psotmp.6460.conv.gen.tmp.ps 51 | Type1CGenerator: using interpreter GPL Ghostscript 905 20120208 52 | Type1CGenerator: all OK 53 | info: loading PDF from: psotmp.6460.conv.gen.tmp.pdf 54 | info: loaded PDF of 3526 bytes 55 | info: separated to 14 objs + xref + trailer 56 | info: found 1 fonts loaded 57 | info: optimized Type1C fonts to form 1314 bytes to 756 bytes (58%) 58 | info: uninlined 1 images, saved -81 bytes 59 | info: will optimize image XObject 9; orig width=100 height=100 colorspace=/DeviceGray bpc=1 inv=False filter=/FlateDecode dp=1 size=400 gs_device=pngmono 60 | info: saving PNG to psotmp.6460.img-9.parse.png 61 | info: written 226 bytes to PNG 62 | info: will optimize image XObject 26; orig width=100 height=100 colorspace=/DeviceGray bpc=1 inv=False filter=/CCITTFaxDecode dp=1 size=895 gs_device=pngmono 63 | info: optimizing 2 images of 1295 bytes in total 64 | info: writing ImageRenderer (920 image bytes) to: psotmp.6460.conv.pngmono.tmp.ps 65 | info: executing ImageRenderer with Ghostscript: gs -q -P- -dNOPAUSE -dBATCH -sDEVICE=pngmono -sOutputFile='psotmp.6460.img-%04d.pngmono.tmp.png' -f psotmp.6460.conv.pngmono.tmp.ps 66 | ImageRenderer: rendering image XObject 26 width=100 height=100 bpc=1 colorspace=/DeviceGray filter=/CCITTFaxDecode decodeparms=<< /Columns 100 /K 0 >> device=pngmono 67 | ImageRenderer: all OK 68 | info: loading image from: psotmp.6460.img-0001.pngmono.tmp.png 69 | info: loaded PNG IDAT of 171 bytes 70 | info: executing image converter sam2p_np: sam2p -pdf:2 -c zip:1:9 -s Gray1:Indexed1:Gray2:Indexed2:Rgb1:Gray4:Indexed4:Rgb2:Gray8:Indexed8:Rgb4:Rgb8:stop -- psotmp.6460.img-9.parse.png psotmp.6460.img-9.sam2p-np.pdf 71 | This is sam2p 0.49. 72 | Available Loaders: PS PDF JAI PNG JPEG TIFF PNM BMP GIF LBM XPM PCX TGA. 73 | Available Appliers: XWD Meta Empty BMP PNG TIFF6 TIFF6-JAI JPEG-JAI JPEG PNM GIF89a+LZW XPM PSL1C PSL23+PDF PSL2+PDF-JAI P-TrOpBb. 74 | sam2p: Notice: PNM: loaded alpha, but no transparent pixels 75 | sam2p: Notice: job: read InputFile: psotmp.6460.img-9.parse.png 76 | sam2p: Notice: writeTTT: using template: p02 77 | sam2p: Notice: applyProfile: applied OutputRule #0 78 | sam2p: Notice: job: written OutputFile: psotmp.6460.img-9.sam2p-np.pdf 79 | Success. 80 | info: loading image from: psotmp.6460.img-9.sam2p-np.pdf 81 | info: loading PDF from: psotmp.6460.img-9.sam2p-np.pdf 82 | info: loaded PDF of 874 bytes 83 | info: separated to 5 objs + xref + trailer 84 | info: loaded PNG IDAT of 167 bytes 85 | info: executing image converter sam2p_pr: sam2p -c zip:15:9 -- psotmp.6460.img-9.parse.png psotmp.6460.img-9.sam2p-pr.png 86 | This is sam2p 0.49. 87 | Available Loaders: PS PDF JAI PNG JPEG TIFF PNM BMP GIF LBM XPM PCX TGA. 88 | Available Appliers: XWD Meta Empty BMP PNG TIFF6 TIFF6-JAI JPEG-JAI JPEG PNM GIF89a+LZW XPM PSL1C PSL23+PDF PSL2+PDF-JAI P-TrOpBb. 89 | sam2p: Notice: PNM: loaded alpha, but no transparent pixels 90 | sam2p: Notice: job: read InputFile: psotmp.6460.img-9.parse.png 91 | sam2p: Notice: applyProfile: applied OutputRule #2 92 | sam2p: Notice: job: written OutputFile: psotmp.6460.img-9.sam2p-pr.png 93 | Success. 94 | info: loading image from: psotmp.6460.img-9.sam2p-pr.png 95 | info: loaded PNG IDAT of 226 bytes 96 | info: executing image converter jbig2: jbig2 -p psotmp.6460.img-9.sam2p-pr.png >psotmp.6460.img-9.jbig2 97 | info: executing image converter pngout: pngout -force psotmp.6460.img-9.sam2p-pr.png psotmp.6460.img-9.pngout.png 98 | In: psotmp.6460.img-9.sam2p-pr.png /c0 /f5 /d1 In: 283 bytes 99 | Out: psotmp.6460.img-9.pngout.png /c3 /f0 /d1, 2 colors 0% 1% 2% 3% 4% 5% 6% 7% 8% 9% 10% 11% 12% 13% 14% 15% 16% 17% 18% 19% 20% 21% 22% 23% 24% 25% 26% 27% 28% 29% 30% 31% 32% 33% 34% 35% 36% 37% 38% 39% 40% 41% 42% 43% 44% 45% 46% 47% 48% 49% 50% 51% 52% 53% 54% 55% 56% 57% 58% 59% 60% 61% 62% 63% 64% 65% 66% 67% 68% 69% 70% 71% 72% 73% 74% 75% 76% 77% 78% 79% 80% 81% 82% 83% 84% 85% 86% 87% 88% 89% 90% 91% 92% 93% 94% 95% 96% 97% 98% 99% 0% 1% 2% 3% 4% 5% 6% 7% 8% 9% 10% 11% 12% 13% 14% 15% 16% 17% 18% 19% 20% 21% 22% 23% 24% 25% 26% 27% 28% 29% 30% 31% 32% 33% 34% 35% 36% 37% 38% 39% 40% 41% 42% 43% 44% 45% 46% 47% 48% 49% 50% 51% 52% 53% 54% 55% 56% 57% 58% 59% 60% 61% 62% 63% 64% 65% 66% 67% 68% 69% 70% 71% 72% 73% 74% 75% 76% 77% 78% 79% 80% 81% 82% 83% 84% 85% 86% 87% 88% 89% 90% 91% 92% 93% 94% 95% 96% 97% 98% 99% Out: 291 -68.3= 222.6 0% 1% 2% 3% 4% 5% 6% 7% 8% 9% 10% 11% 12% 13% 14% 15% 16% 17% 18% 19% 20% 21% 22% 23% 24% 25% 26% 27% 28% 29% 30% 31% 32% 33% 34% 35% 36% 37% 38% 39% 40% 41% 42% 43% 44% 45% 46% 47% 48% 49% 50% 51% 52% 53% 54% 55% 56% 57% 58% 59% 60% 61% 62% 63% 64% 65% 66% 67% 68% 69% 70% 71% 72% 73% 74% 75% 76% 77% 78% 79% 80% 81% 82% 83% 84% 85% 86% 87% 88% 89% 90% 91% 92% 93% 94% 95% 96% 97% 98% 99% Out: 238 -0.8= 237.1 0% 1% 2% 3% 4% 5% 6% 7% 8% 9% 10% 11% 12% 13% 14% 15% 16% 17% 18% 19% 20% 21% 22% 23% 24% 25% 26% 27% 28% 29% 30% 31% 32% 33% 34% 35% 36% 37% 38% 39% 40% 41% 42% 43% 44% 45% 46% 47% 48% 49% 50% 51% 52% 53% 54% 55% 56% 57% 58% 59% 60% 61% 62% 63% 64% 65% 66% 67% 68% 69% 70% 71% 72% 73% 74% 75% 76% 77% 78% 79% 80% 81% 82% 83% 84% 85% 86% 87% 88% 89% 90% 91% 92% 93% 94% 95% 96% 97% 98% 99% Out: 238 bytes 100 | Chg: -45 bytes ( 84% of original) 101 | info: loading image from: psotmp.6460.img-9.pngout.png 102 | info: loaded PNG IDAT of 163 bytes and PLTE of 6 bytes 103 | info: optimized image XObject 9 file_name=psotmp.6460.img-9.sam2p-np.pdf size=329 (82%) methods=sam2p_np:329,jbig2:342,pngout:385,parse:391,#orig:400,sam2p_pr:448 104 | info: executing image converter sam2p_np: sam2p -pdf:2 -c zip:1:9 -s Gray1:Indexed1:Gray2:Indexed2:Rgb1:Gray4:Indexed4:Rgb2:Gray8:Indexed8:Rgb4:Rgb8:stop -- psotmp.6460.img-0001.pngmono.tmp.png psotmp.6460.img-26.sam2p-np.pdf 105 | This is sam2p 0.49. 106 | Available Loaders: PS PDF JAI PNG JPEG TIFF PNM BMP GIF LBM XPM PCX TGA. 107 | Available Appliers: XWD Meta Empty BMP PNG TIFF6 TIFF6-JAI JPEG-JAI JPEG PNM GIF89a+LZW XPM PSL1C PSL23+PDF PSL2+PDF-JAI P-TrOpBb. 108 | sam2p: Notice: PNM: loaded alpha, but no transparent pixels 109 | sam2p: Notice: job: read InputFile: psotmp.6460.img-0001.pngmono.tmp.png 110 | sam2p: Notice: writeTTT: using template: p02 111 | sam2p: Notice: applyProfile: applied OutputRule #0 112 | sam2p: Notice: job: written OutputFile: psotmp.6460.img-26.sam2p-np.pdf 113 | Success. 114 | info: loading image from: psotmp.6460.img-26.sam2p-np.pdf 115 | info: loading PDF from: psotmp.6460.img-26.sam2p-np.pdf 116 | info: loaded PDF of 874 bytes 117 | info: separated to 5 objs + xref + trailer 118 | info: loaded PNG IDAT of 167 bytes 119 | info: using already processed image for obj 26 120 | info: optimized image XObject 26 file_name=psotmp.6460.img-9.sam2p-np.pdf size=329 (37%) methods=#prev-processed-best:329,sam2p_np:329,gs:393,#orig:895 121 | info: saved 637 bytes (49%) on optimizable images 122 | info: decompressing 120 bytes with Ghostscript /Filter/LZWDecode 123 | info: optimized 5 streams, kept 2 #orig, 2 uncompressed, 1 zip 124 | info: eliminated 4 duplicate objs 125 | info: eliminated 4 unused objs in 4 classes 126 | info: compressed 2 streams, kept 0 of them uncompressed 127 | info: saving PDF with 18 objs to: deptest.pso.pdf 128 | info: trying 3 jobs and using the smallest 129 | info: generated object stream of 595 bytes in 13 objects (22%) 130 | info: job original generated 2232 bytes (6%) 131 | info: job xrefstm generated 2892 bytes (8%) 132 | info: job nostm generated 3189 bytes (9%) 133 | info: jobs result: original=2232 xrefstm=2892 nostm=3189 134 | info: generated 2232 bytes (6%) 135 | -------------------------------------------------------------------------------- /deptest/deptest.psogood.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/deptest/deptest.psogood.pdf -------------------------------------------------------------------------------- /docker/.dockerignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.tmp 3 | *.tar.gz 4 | build_docker.sh 5 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # by pts@fazekas.hu at Wed Oct 11 15:24:03 CEST 2017 2 | # 3 | # Run: 4 | # 5 | # $ docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt pdfsizeopt input.pdf output.pdf 6 | # 7 | # Building in a separate `context' directory so that only a few bytes have 8 | # to be sent to the Docker daemon. 9 | # 10 | 11 | FROM scratch 12 | MAINTAINER pts@fazekas.hu 13 | LABEL version=1 14 | CMD ["sh"] 15 | ADD busybox /bin/ 16 | #RUN ["busybox", "chmod", "755", "/bin/busybox"] 17 | RUN ["busybox", "ln", "-s", "/", "/usr"] 18 | RUN ["busybox", "--install", "-s"] 19 | 20 | ADD pdfsizeopt_libexec/gs pdfsizeopt_libexec/jbig2 pdfsizeopt_libexec/sam2p pdfsizeopt_libexec/pngout pdfsizeopt_libexec/python /bin/ 21 | #RUN cd /bin && chmod 755 gs jbig2 png22pnm sam2p pngout python 22 | # Run this ADD last, to improve caching. 23 | ADD pdfsizeopt.single /bin/pdfsizeopt 24 | #RUN cd /bin && chmod 755 pdfsizeopt 25 | WORKDIR /workdir 26 | -------------------------------------------------------------------------------- /docker/build_docker.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -- 2 | # by pts@fazekas.hu at Wed Oct 11 15:24:03 CEST 2017 3 | # 4 | 5 | set -ex 6 | cd "${0%/*}" 7 | test -f ../pdfsizeopt.single 8 | if ! test -f pdfsizeopt_libexec_linux.tar.gz; then 9 | wget -nv -O pdfsizeopt_libexec_linux.tar.gz.tmp https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_libexec_linux-v9.tar.gz 10 | rm -f pdfsizeopt_libexec_linux.tar.gz 11 | mv pdfsizeopt_libexec_linux.tar.gz.tmp pdfsizeopt_libexec_linux.tar.gz 12 | fi 13 | if ! test -f busybox; then 14 | wget -nv -O busybox.tmp https://github.com/pts/pdfsizeopt/releases/download/2017-10-11b/busybox 15 | chmod 755 busybox.tmp 16 | rm -f busybox 17 | mv busybox.tmp busybox 18 | fi 19 | # Doing these chmods early makes the image half as large. 20 | chmod 755 busybox 21 | rm -rf pdfsizeopt_libexec 22 | tar xzvf pdfsizeopt_libexec_linux.tar.gz 23 | chmod 755 pdfsizeopt_libexec/gs pdfsizeopt_libexec/jbig2 pdfsizeopt_libexec/sam2p pdfsizeopt_libexec/pngout pdfsizeopt_libexec/python 24 | rm -f pdfsizeopt.single 25 | cp -a ../pdfsizeopt.single ./ 26 | chmod 755 pdfsizeopt.single 27 | # Reads Dockerfile. 28 | docker build -t ptspts/pdfsizeopt . 29 | rm -rf pdfsizeopt_libexec 30 | : docker push ptspts/pdfsizeopt 31 | 32 | : build_docker.sh OK. 33 | -------------------------------------------------------------------------------- /docker_extraimgopt/.dockerignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.tmp 3 | *.tar.gz 4 | build_docker.sh 5 | -------------------------------------------------------------------------------- /docker_extraimgopt/Dockerfile: -------------------------------------------------------------------------------- 1 | # by pts@fazekas.hu at docker push ptspts/pdfsizeopt 2 | # 3 | # Run: 4 | # 5 | # $ docker run -v "$PWD:/workdir" -u "$(id -u):$(id -g)" --rm -it ptspts/pdfsizeopt-with-extraimgopt pdfsizeopt --use-image-optimizer=sam2p,jbig2,pngout,zopflipng,optipng,advpng,ECT input.pdf output.pdf 6 | # 7 | # Building in a separate `context' directory so that only a few bytes have 8 | # to be sent to the Docker daemon. 9 | # 10 | 11 | FROM ptspts/pdfsizeopt 12 | MAINTAINER pts@fazekas.hu 13 | LABEL version=1 14 | ADD pdfsizeopt_libexec/ECT pdfsizeopt_libexec/advpng pdfsizeopt_libexec/optipng pdfsizeopt_libexec/zopflipng /bin/ 15 | -------------------------------------------------------------------------------- /docker_extraimgopt/build_docker.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash -- 2 | # by pts@fazekas.hu at Mon Nov 27 20:28:23 CET 2017 3 | # 4 | 5 | set -ex 6 | cd "${0%/*}" 7 | test -f ../pdfsizeopt.single 8 | if ! test -f pdfsizeopt_libexec_extraimgopt_linux.tar.gz; then 9 | wget -nv -O pdfsizeopt_libexec_extraimgopt_linux.tar.gz.tmp https://github.com/pts/pdfsizeopt/releases/download/2017-01-24/pdfsizeopt_libexec_extraimgopt_linux-v3.tar.gz 10 | rm -f pdfsizeopt_libexec_extraimgopt_linux.tar.gz 11 | mv pdfsizeopt_libexec_extraimgopt_linux.tar.gz.tmp pdfsizeopt_libexec_extraimgopt_linux.tar.gz 12 | fi 13 | rm -rf pdfsizeopt_libexec 14 | tar xzvf pdfsizeopt_libexec_extraimgopt_linux.tar.gz 15 | # Doing these chmods early makes the image half as large. 16 | echo chmod 755 pdfsizeopt_libexec/ECT pdfsizeopt_libexec/advpng pdfsizeopt_libexec/optipng pdfsizeopt_libexec/zopflipng 17 | # Reads Dockerfile. 18 | docker build -t ptspts/pdfsizeopt-with-extraimgopt . 19 | rm -rf pdfsizeopt_libexec 20 | : docker push ptspts/pdfsizeopt-with-extraimgopt 21 | 22 | : build_docker.sh OK. 23 | -------------------------------------------------------------------------------- /extra/debian/changelog: -------------------------------------------------------------------------------- 1 | pdfsizeopt (0.0+svn2013.02.15-1~pre0) UNRELEASED; urgency=low 2 | 3 | * Initial packaging. (Closes: #nnn) 4 | 5 | -- Rogério Brito Fri, 15 Feb 2013 22:27:05 -0200 6 | -------------------------------------------------------------------------------- /extra/debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /extra/debian/control: -------------------------------------------------------------------------------- 1 | Source: pdfsizeopt 2 | Priority: optional 3 | Section: text 4 | Maintainer: Rogério Brito 5 | Homepage: https://code.google.com/p/pdfsizeopt/ 6 | Build-Depends: 7 | debhelper (>= 9), 8 | python (<< 3), 9 | python (>= 2.6) 10 | Standards-Version: 3.9.3 11 | 12 | Package: pdfsizeopt 13 | Architecture: all 14 | Depends: 15 | ghostscript, 16 | png22pnm | netpbm, 17 | python (>= 2.6), 18 | sam2p (>= 0.49), 19 | ${misc:Depends} 20 | Recommends: 21 | jbig2enc 22 | Suggests: 23 | multivalent, 24 | pngout 25 | Description: size optimizer for PDF files 26 | pdfsizeopt is a program for converting large PDF files to small ones. More 27 | specifically, pdfsizeopt is a command-line application and a collection of 28 | best practices to optimize the size of PDF files, with focus on PDFs 29 | created from TeX and LaTeX documents. 30 | -------------------------------------------------------------------------------- /extra/debian/copyright: -------------------------------------------------------------------------------- 1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: pdfsizeopt 3 | Upstream-Contact: https://code.google.com/p/pdfsizeopt 4 | Source: http://pdfsizeopt.googlecode.com/svn/trunk/ 5 | 6 | Files: * 7 | Copyright: © 2009-2012 Peter Szabo 8 | License: GPL-2+ 9 | 10 | Files: debian/* 11 | Copyright: © 2013, Rogério Theodoro de Brito . 12 | License: GPL-2+ 13 | 14 | License: GPL-2+ 15 | This package is free software; you can redistribute it and/or modify 16 | it under the terms of the GNU General Public License as published by 17 | the Free Software Foundation; either version 2 of the License, or 18 | (at your option) any later version. 19 | . 20 | This package is distributed in the hope that it will be useful, 21 | but WITHOUT ANY WARRANTY; without even the implied warranty of 22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 23 | GNU General Public License for more details. 24 | . 25 | You should have received a copy of the GNU General Public License 26 | along with this program. If not, see 27 | . 28 | On Debian systems, the complete text of the GNU General 29 | Public License version 2 can be found in "/usr/share/common-licenses/GPL-2". 30 | -------------------------------------------------------------------------------- /extra/debian/dirs: -------------------------------------------------------------------------------- 1 | usr/bin 2 | -------------------------------------------------------------------------------- /extra/debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | %: 3 | dh $@ 4 | 5 | override_dh_install: 6 | cp pdfsizeopt.py debian/pdfsizeopt/usr/bin/pdfsizeopt 7 | -------------------------------------------------------------------------------- /extra/debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /extra/debian/watch: -------------------------------------------------------------------------------- 1 | version=3 2 | http://code.google.com/p/pdfsizeopt/downloads/list?can=1 .*/pdfsizeopt-(\d[\d.]*)\.(?:zip|tgz|tbz2|txz|tar\.gz|tar\.bz2|tar\.xz) 3 | -------------------------------------------------------------------------------- /extra/dvipdfmx_fontfix.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python2.4 2 | # by pts@fazekas.hu at Tue Jul 21 16:14:10 CEST 2009 3 | 4 | import re 5 | import sys 6 | import os 7 | 8 | 9 | def main(argv): 10 | map_list = [] 11 | 12 | cfg_kname = (os.popen('kpsewhich --progname=dvipdfmx dvipdfmx.cfg') 13 | .read().rstrip('\n')) 14 | for cfg_line in open(cfg_kname).xreadlines(): 15 | cfg_items = cfg_line.strip().split(None, 1) 16 | if len(cfg_items) == 2 and cfg_items[0] == 'f': 17 | map_list.append(cfg_items[1]) 18 | 19 | i = 1 20 | while i < len(argv): 21 | if argv[i] == '-f' and i < len(argv) - 1: 22 | map_list.append(argv[i + 1]) 23 | i += 2 24 | elif argv[i].startswith('-f'): 25 | map_list.append(argv[i][2:]) 26 | else: 27 | break 28 | 29 | f = open('dvipdfmx_base.map', 'w') 30 | 31 | for map_name in map_list: 32 | assert '$' not in map_name 33 | assert '"' not in map_name 34 | assert '\\' not in map_name 35 | assert '%' not in map_name 36 | map_kname = (os.popen('kpsewhich "%s"' % map_name) 37 | .read().rstrip('\n')) 38 | assert map_kname, 'font map not found: %s' % map_name 39 | 40 | for map_line in open(map_kname).xreadlines(): 41 | # A to-be-reencoded base font. Example: 42 | # ptmr8r Times-Roman "TeXBase1Encoding ReEncodeFont" <8r.enc 43 | match = re.match(r'\s*([^%\s]\S*)\s+(\S+)\s+(?:\d+\s+)?"([^"]*)"\s+' 44 | r'<(\S+)[.]enc\s*\Z', map_line) 45 | if match: 46 | #print map_line, 47 | tex_font_name = match.group(1) 48 | ps_font_name = match.group(2) 49 | ps_instructions = ' %s ' % re.sub('\s+', ' ', match.group(3).strip()) 50 | enc_file_name = match.group(4) 51 | dvipdfm_instructions = [] 52 | # TODO(pts): Obey the order 53 | match = re.match(' (\S+) SlantFont ', ps_instructions) 54 | if match: 55 | dvipdfm_instructions.append(' -s %s' % match.group(1)) 56 | match = re.match(' (\S+) ExtendFont ', ps_instructions) 57 | if match: 58 | dvipdfm_instructions.append(' -e %s' % match.group(1)) 59 | f.write('%s %s %s%s\n' % 60 | (tex_font_name, enc_file_name, ps_font_name, 61 | ' '.join(dvipdfm_instructions))) 62 | 63 | f.close() 64 | args = ['dvipdfmx', '-f', 'dvipdfmx_base.map'] + argv[1:] 65 | sys.stdout.flush() 66 | sys.stderr.flush() 67 | os.execlp(args[0], *args) 68 | 69 | if __name__ == '__main__': 70 | sys.exit(main(sys.argv) or 0) 71 | -------------------------------------------------------------------------------- /extra/dvipdfmx_ndl14_extra.map: -------------------------------------------------------------------------------- 1 | % Usage: dvipdfmx -f dvipdfmx_ndl14_extra.map doc.dvi 2 | % Usage: dvipdfmx -f pdftex_ndl14.map -f dvipdfmx_ndl14_extra.map doc.dvi 3 | cob texnansi Courier-Bold 4 | cobo texnansi Courier-BoldOblique 5 | com texnansi Courier 6 | coo texnansi Courier-Oblique 7 | hv texnansi Helvetica 8 | hvb texnansi Helvetica-Bold 9 | hvbo texnansi Helvetica-BoldOblique 10 | hvo texnansi Helvetica-Oblique 11 | pcrb8r 8r Courier-Bold 12 | pcrb8y texnansi Courier-Bold 13 | pcrbo8r 8r Courier-BoldOblique 14 | pcrbo8y texnansi Courier-BoldOblique 15 | pcrr8r 8r Courier 16 | pcrr8y texnansi Courier 17 | pcrro8r 8r Courier-Oblique 18 | pcrro8y texnansi Courier-Oblique 19 | phvb8r 8r Helvetica-Bold 20 | phvb8y texnansi Helvetica-Bold 21 | phvbo8r 8r Helvetica-BoldOblique 22 | phvbo8y texnansi Helvetica-BoldOblique 23 | phvr8r 8r Helvetica 24 | phvr8y texnansi Helvetica 25 | phvro8r 8r Helvetica-Oblique 26 | phvro8y texnansi Helvetica-Oblique 27 | ptmb8r 8r Times-Bold 28 | ptmb8y texnansi Times-Bold 29 | ptmbi8r 8r Times-BoldItalic 30 | ptmbi8y texnansi Times-BoldItalic 31 | ptmr8r 8r Times-Roman 32 | ptmr8y texnansi Times-Roman 33 | ptmri8r 8r Times-Italic 34 | ptmri8y texnansi Times-Italic 35 | rtxphvb 8r Helvetica-Bold 36 | rtxphvr 8r Helvetica 37 | rtxptmb 8r Times-Bold 38 | rtxptmbi 8r Times-BoldItalic 39 | rtxptmr 8r Times-Roman 40 | rtxptmri 8r Times-Italic 41 | tib texnansi Times-Bold 42 | tibi texnansi Times-BoldItalic 43 | tii texnansi Times-Italic 44 | tir texnansi Times-Roman 45 | tmbi108r 8r Times-BoldItalic 46 | tmrb108r 8r Times-Bold 47 | tmri108r 8r Times-Italic 48 | tmrm108r 8r Times-Roman 49 | -------------------------------------------------------------------------------- /extra/pdftex_ndl14_extra.map: -------------------------------------------------------------------------------- 1 | % Usage: pdflatex "\pdfmapfile{=pdftex_ndl14_extra.map}\input" doc.tex 2 | % Please consider using pdftex_ndl14_extraflag.map instead, which contains 3 | % extra information (font flags) for the PDF viewer. 4 | cob Courier-Bold "TeXnANSIEncoding ReEncodeFont" 0 19 | \PassOptionsToPackage{pdftex}{graphicx} 20 | \PassOptionsToPackage{pdftex}{hyperref} 21 | \else 22 | \PassOptionsToPackage{dvipdfm}{graphicx} 23 | \PassOptionsToPackage{dvipdfm}{hyperref} 24 | \fi 25 | 26 | \expandafter\ifx\csname ver@graphics.sty\endcsname\relax\else 27 | \PackageError{pts-graphics-helper}{Please load graphics-helper.sty before graphics.sty}\@ehc 28 | \fi 29 | \expandafter\ifx\csname ver@hyperref.sty\endcsname\relax\else 30 | \PackageError{pts-graphics-helper}{Please load graphics-helper.sty before hyperref.sty}\@ehc 31 | \fi 32 | 33 | \let\includegraphicsmps\@undefined 34 | 35 | %** Usage: like \includegraphics 36 | %** Example: \includgraphicsmps[width=5cm]{mpspic.6} 37 | \newcommand\includegraphicsmps[2][]{% 38 | \begingroup 39 | \ifx\Ginclude@mps\@undefined % for dvipdfm 40 | \@namedef{Gin@rule@*}##1{{eps}{\Gin@ext}{##1}}% 41 | \else \@namedef{Gin@rule@*}##1{{mps}{\Gin@ext}{##1}}\fi 42 | \includegraphics[#1]{#2}% 43 | \endgroup 44 | } 45 | 46 | % Like \@for, but splits on `/'. Based on \@for defined in latex.ltx. 47 | \def\@forslashnoop#1\@@#2#3{} 48 | \long\def\@forslash#1:=#2\do#3{% 49 | \expandafter\def\expandafter\@forslashtmp\expandafter{#2}% 50 | \ifx\@forslashtmp\@empty \else 51 | \expandafter\@forslashloop#2/\@nil/\@nil\@@#1{#3}\fi} 52 | \long\def\@forslashloop#1/#2/#3\@@#4#5{\def#4{#1}\ifx #4\@nnil \else 53 | #5\def#4{#2}\ifx #4\@nnil \else#5\@iforslashloop #3\@@#4{#5}\fi\fi} 54 | \long\def\@iforslashloop#1/#2\@@#3#4{\def#3{#1}\ifx #3\@nnil 55 | \expandafter\@forslashnoop \else 56 | #4\relax\expandafter\@iforslashloop\fi#2\@@#3{#4}} 57 | 58 | \def\pgh@@mediabox{MediaBox} 59 | \edef\pgh@@mediabox{\expandafter\strip@prefix\meaning\pgh@@mediabox}% 60 | \def\pgh@@mediaboxspc{MediaBox } 61 | \edef\pgh@@mediaboxspc{\expandafter\strip@prefix\meaning\pgh@@mediaboxspc}% 62 | 63 | \def\pgh@find@mediabox#1[#2]#3\\{% 64 | \def\reserved@a{#1}% 65 | \ifx\reserved@a\pgh@@mediabox 66 | \Gin@bboxtrue 67 | \Gread@false 68 | \gdef\@gtempa{#2 }% 69 | \else\ifx\reserved@a\pgh@@mediaboxspc 70 | \Gin@bboxtrue 71 | \Gread@false 72 | \gdef\@gtempa{#2 }% 73 | \fi\fi 74 | } 75 | 76 | %** @param #1 a line read from the .pdf file 77 | \long\def\pgh@find@bb#1\\{% 78 | \@forslash\reserved@a:=-#1\do{% 79 | \ifGread@\expandafter\pgh@find@mediabox\reserved@a[]\\\fi 80 | }% 81 | } 82 | 83 | %** @param #1 filename.pdf 84 | \def\pgh@readepspdfbb#1{% 85 | \begingroup 86 | \Gread@true 87 | \def\Gread@true{% 88 | \@tempcnta=0 89 | \loop\ifnum\@tempcnta<256 90 | \catcode\@tempcnta=12 91 | \advance\@tempcnta\@ne 92 | \repeat 93 | \catcode0=10 \catcode9=10 \catcode10=10 94 | \catcode11=10 \catcode12=10 \catcode13=10 \catcode32=10 95 | \catcode\string`\%=14 % comment 96 | % TeX's eyes always split the line at character 10. 97 | % So we cannot detect a MediaBox if it contains a newline. 98 | }% 99 | \let\Gread@find@bb\pgh@find@bb 100 | \Gread@eps{#1}% 101 | \endgroup 102 | \expandafter\Gread@parse@bb\@gtempa\\% parse again, after \endgroup above 103 | } 104 | 105 | \def\pgh@@dvipdfmdef{dvipdfm.def} 106 | \edef\pgh@@dvipdfmdef{\expandafter\strip@prefix\meaning\pgh@@dvipdfmdef}% 107 | 108 | \def\pgh@fixpdfbbread{% 109 | \ifnum\pdfoutput>0 \else 110 | \edef\reserved@a{\expandafter\strip@prefix\meaning\Gin@driver}% 111 | \ifx\reserved@a\pgh@@dvipdfmdef 112 | % Find the PDF bounding box (first occurrence of /MediaBox in file) 113 | % without a .bb file. 114 | \@namedef{Gin@rule@.pdf}##1{{epspdfbb}{\Gin@ext}{##1}}% 115 | \def\Ginclude@epspdfbb{\Ginclude@eps}% 116 | \def\Gread@epspdfbb{\pgh@readepspdfbb}% 117 | \fi 118 | \fi 119 | } 120 | 121 | \AtBeginDocument\pgh@fixpdfbbread 122 | 123 | \endinput 124 | -------------------------------------------------------------------------------- /extra/small.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/extra/small.pdf -------------------------------------------------------------------------------- /latest_libexec.txt: -------------------------------------------------------------------------------- 1 | # This config file contains the latest pdfsizeopt libexec download URLs in a 2 | # machine-readable format. It is useful for distribution package maintainers. 3 | # For the same URLs in a human-readable format and installation instuctions, 4 | # see README.md . 5 | 6 | Linux: https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_libexec_linux-v9.tar.gz 7 | Linux_extraimgopt: https://github.com/pts/pdfsizeopt/releases/download/2017-01-24/pdfsizeopt_libexec_extraimgopt_linux-v3.tar.gz 8 | macOS_i386: https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_libexec_darwin-v9.tar.gz 9 | macOS_x86_64: https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_libexec_darwinc64-v9.tar.gz 10 | Win32: https://github.com/pts/pdfsizeopt/releases/download/2023-04-18/pdfsizeopt_win32exec-v9.zip 11 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/lib/__init__.py -------------------------------------------------------------------------------- /lib/pdfsizeopt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/lib/pdfsizeopt/__init__.py -------------------------------------------------------------------------------- /lib/pdfsizeopt/float_util.py: -------------------------------------------------------------------------------- 1 | """Floating point number utilities.""" 2 | 3 | def FormatFloatShort(f, is_int_ok=False): 4 | """Formats a float accurately as a string as short as possible. 5 | 6 | Args: 7 | f: A value of type float. 8 | is_int_ok: If false, int(result) will raise a ValueError, because the result 9 | always contains 'e' or '.'. If true, it's OK for the result to contain 10 | only digits and '-' (and thus parse as an int). 11 | Returns: 12 | An str, for which float(result) == f, and len(result) is as short as 13 | possible. 14 | """ 15 | if not isinstance(f, float): 16 | raise TypeError 17 | r = repr(f) 18 | if r[-1] not in '0123456789': # 'inf', '-inf', 'nan' etc. 19 | return r 20 | m = '-' * r.startswith('-') 21 | r = r.lstrip('-') 22 | es = r.split('e') 23 | assert len(es) in (1, 2) 24 | e0 = es[0].rstrip('0') 25 | assert e0[0] in '0123456789', e0 # Can start with 0, e.g. '0.00123'. 26 | i = e0.find('.') + 1 27 | if i > 0: 28 | e0 = e0[:i - 1] + e0[i:] 29 | i = (i - 1) - len(e0) 30 | e0 = e0.lstrip('0') 31 | if not e0: 32 | if is_int_ok: 33 | return m + '0' # It's important that '-0' is different from '0'. 34 | return m + '0.' 35 | nd = len(e0) 36 | assert 1 <= nd <= 17, (r, es, nd) 37 | 38 | rr = '' 39 | if nd > 2: # Try to format it with 2 fewer digits. 40 | rr = '%%.%dg' % (nd - 2) % f 41 | if float(rr) != f or len(rr) >= len(r): 42 | rr = '' 43 | if nd > 1 and not rr: # Try to format it with 1 fewer digit. 44 | rr = '%%.%dg' % (nd - 1) % f 45 | if float(rr) != f or len(rr) >= len(r): 46 | rr = '' 47 | if rr: # A shorter formatting has succeeded. 48 | r = rr 49 | m = '-' * r.startswith('-') 50 | r = r.lstrip('-') 51 | es = r.split('e') 52 | assert len(es) in (1, 2) 53 | e0 = es[0].rstrip('0') 54 | assert e0[0] in '0123456789', e0 # Can start with 0, e.g. '0.00123' 55 | i = e0.find('.') + 1 56 | if i > 0: 57 | e0 = e0[:i - 1] + e0[i:] 58 | i = (i - 1) - len(e0) 59 | e0 = e0.lstrip('0') 60 | nd = len(e0) 61 | assert 1 <= nd <= 17, (r, es, nd) 62 | 63 | assert i <= 0, (r, es, e0, i) 64 | if len(es) > 1: 65 | i += int(es[1]) 66 | if e0.endswith('0'): 67 | j = len(e0) 68 | e0 = e0.rstrip('0') 69 | i += j - len(e0) 70 | # Now e0 is the formatted significand (as an integer, no dots, the dot is 71 | # implied in the end), i is the exponent, m is the sign. 72 | 73 | if i > 1 or i < -2 -len(e0): 74 | if i == 2 and is_int_ok: 75 | return '%s%s00' % (m, e0) 76 | return '%s%se%d' % (m, e0, i) 77 | elif 1 <= -i <= len(e0): 78 | return '%s%s.%s' % (m, e0[:i], e0[i:]) 79 | elif 0 <= i <= 1: # Number of '0's added below: 1 or 2. 80 | return '%s%s%s%s' % (m, e0, '0' * i, '.' * (not is_int_ok)) 81 | else: # Number of '0's added below: 1 or 2. 82 | return '%s.%s%s' % (m, '0' * (-i - len(e0)), e0) 83 | -------------------------------------------------------------------------------- /lib/pdfsizeopt/pdfsizeopt_argparse.version: -------------------------------------------------------------------------------- 1 | argparse-1.2.1 2 | -------------------------------------------------------------------------------- /lib/pdfsizeopt/psproc.py: -------------------------------------------------------------------------------- 1 | """PostScript procsets used by pdfsizeopt. 2 | 3 | It's moved to a separate file so that it can be better compressed by ZIP, and 4 | also minified by MinifyPostScript in mksingle.py. 5 | """ 6 | 7 | GENERIC = r''' 8 | % 9 | % PostScript procset of generic PDF parsing routines 10 | % by pts@fazekas.hu at Sun Mar 29 11:19:06 CEST 2009 11 | 12 | % TODO: use standard PDF whitespace 13 | /_WhitespaceCharCodes << 10 true 13 true 32 true 9 true 0 true >> def 14 | 15 | /SkipWhitespaceRead { % SkipWhitespaceRead 16 | { 17 | dup read 18 | not{/SkipWhitespaceRead /invalidfileaccess signalerror}if 19 | dup _WhitespaceCharCodes exch known not{exit}if 20 | pop 21 | } loop 22 | exch pop 23 | } bind def 24 | 25 | /ReadWhitespaceChar { % SkipWhitespaceRead 26 | read not{/ReadWhitespaceChar /invalidfileaccess signalerror}if 27 | dup _WhitespaceCharCodes exch known not { 28 | /WhitespaceCharExpected /invalidfileaccess signalerror 29 | } if 30 | } bind def 31 | 32 | % NeedsFilterInBetween 33 | % 34 | % Returns true iff the first filter is /JBIG2Decode. 35 | % 36 | % According to https://github.com/pts/pdfsizeopt/issues/32, such a filter 37 | % incorrectly produces an empty output if applied directly after 38 | % `/ReusableStreamDecode filter' for some input, in Ghostscript 9.05 and 9.10. 39 | /NeedsFilterInBetween { 40 | /Filter .knownget not {null} if 41 | dup type /arraytype eq {dup length 0 eq {pop null} {0 get} ifelse} if 42 | /JBIG2Decode eq 43 | } def 44 | 45 | /ReadStreamFile { % ReadStreamFile 46 | % Reading to a string would fail for >65535 bytes (this is the maximum 47 | % string size in PostScript) 48 | %string currentfile exch readstring 49 | %not{/ReadStreamData /invalidfileaccess signalerror}if 50 | currentfile 51 | 1 index /Length get () /SubFileDecode filter 52 | << /CloseSource true /Intent 0 >> /ReusableStreamDecode filter 53 | %dup 0 setfileposition % by default 54 | 1 index NeedsFilterInBetween { 55 | % As a workaround, add a no-op filter between /ReusableStreamDecode and 56 | % /JBIG2Decode. 57 | << /CloseSource true >> 2 index /Length get () /SubFileDecode filter 58 | } if 59 | 60 | currentfile SkipWhitespaceRead 61 | (.) dup 0 3 index put exch pop % Convert char to 1-char string. 62 | currentfile 8 string readstring 63 | not{/ReadEndStream /invalidfileaccess signalerror}if 64 | concatstrings % concat (e) and (ndstream) 65 | (endstream) ne{/CompareEndStream /invalidfileaccess signalerror}if 66 | currentfile ReadWhitespaceChar pop 67 | currentfile 6 string readstring 68 | not{/ReadEndObj /invalidfileaccess signalerror}if 69 | (endobj) ne{/CompareEndObj /invalidfileaccess signalerror}if 70 | currentfile ReadWhitespaceChar pop 71 | } bind def 72 | 73 | /Map { % Map 74 | [ 3 1 roll forall ] 75 | } bind def 76 | 77 | % GetFilterAndDecodeParms 78 | % 79 | % Ghostscript 8.61 (or earlier) raises `/typecheck in --.reusablestreamdecode--' 80 | % if /Filter is not an array. For testing: pdf.a9p4/lme_v6.a9p4.pdf 81 | % 82 | % Ghostscript 8.61 (or earlier) raises `/typecheck in --.reusablestreamdecode--' 83 | % if /DecodeParms is not an array. 84 | % 85 | % Ghostscript 8.61 (or earlier) raises ``/undefined in --filter--'' 86 | % if there is a null in the DecodeParms. 87 | % 88 | % We add `/PDFRules true' for /ASCII85Decode, see pdf_base.ps why it's needed. 89 | /GetFilterAndDecodeParms { 90 | dup /Filter .knownget not {null} if 91 | dup null eq {pop []} if 92 | dup type /arraytype ne {1 array dup 3 -1 roll 0 exch put} if 93 | 1 index /DecodeParms .knownget not {null} if 94 | dup null eq {pop []} if 95 | dup type /arraytype ne {1 array dup 3 -1 roll 0 exch put} if 96 | 3 -1 roll pop % pop 97 | % stack: 98 | 1 index {type /nametype ne { 99 | pop pop /FilterNotName /invalidfileaccess signalerror} if} forall 100 | dup length 0 eq { % If is empty, fill it up with nulls. 101 | pop dup length mark exch 1 exch 1 exch {pop null} for 102 | counttomark array astore exch pop } if 103 | dup length 2 index length ne 104 | {pop pop 105 | /FilterLengthNeDecodeParmsLength /invalidfileaccess signalerror} if 106 | % Convert null in to << >>. 107 | [exch {dup null eq {pop 0 dict} if} forall] 108 | dup {type /dicttype ne { 109 | pop pop /DecodeParmNotDict /invalidfileaccess signalerror} if} forall 110 | % Add `/PDFRules true' for /ASCII85Decode, see pdf_base.ps. 111 | dup length 1 sub 0 exch 1 exch { 112 | 1 index exch dup 4 index exch get 1 index 4 index exch get 113 | % stack: *2 114 | exch /ASCII85Decode eq { 115 | dup length 1 add dict copy dup /PDFRules true put 116 | } if 117 | % stack: *2 118 | put % We've created our own above, we can mutate it. 119 | } for 120 | % stack: 121 | } def 122 | 123 | % DecompressStreamFile 124 | % 125 | /DecompressStreamFileWithReusableStreamDecode { 126 | exch 127 | % TODO(pts): Give these parameters to the /ReusableStreamDecode in 128 | % ReadStreamFile. 129 | 5 dict begin 130 | /Intent 2 def % sequential access 131 | /CloseSource true def 132 | dup GetFilterAndDecodeParms 133 | /DecodeParms exch def /Filter exch def 134 | exch currentdict end 135 | % stack: 136 | /ReusableStreamDecode filter 137 | } bind def 138 | 139 | % DecompressStreamFile 140 | % 141 | % 142 | % Same as DecompressStreamFileWithReusableStreamDecode, but we don't use 143 | % /ReusableStreamDecode, because that would raise errors quickly 144 | % (at `filter' time) with corrupt or incomplete input. Instead of that, we 145 | % set up a filter chain. 146 | /DecompressStreamFileWithIndividualFilters { 147 | exch dup GetFilterAndDecodeParms 148 | % stack: 149 | 4 -1 roll 150 | % stack: 151 | 1 index length 1 sub 0 exch 1 exch { 152 | dup 3 index exch get exch 153 | 4 index exch get exch 154 | % stack: 155 | % 156 | exch filter 157 | } for 158 | % stack: 159 | exch pop exch pop 160 | % stack: 161 | } bind def 162 | 163 | /obj { % obj - 164 | pop 165 | save exch 166 | /_ObjNumber exch def 167 | % TODO(pts): Read here (not with `token', but recursively), so 168 | % don't redefine `stream'. 169 | } bind def 170 | 171 | % Sort an array, from Ghostscript's prfont.ps. 172 | /Sort { % Sort 173 | % Heapsort (algorithm 5.2.3H, Knuth vol. 2, p. 146), 174 | % modified for 0-origin indexing. */ 175 | 10 dict begin 176 | /LT exch def 177 | /recs exch def 178 | /N recs length def 179 | N 1 gt { 180 | /l N 2 idiv def 181 | /r N 1 sub def { 182 | l 0 gt { 183 | /l l 1 sub def 184 | /R recs l get def 185 | } { 186 | /R recs r get def 187 | recs r recs 0 get put 188 | /r r 1 sub def 189 | r 0 eq { recs 0 R put exit } if 190 | } ifelse 191 | /j l def { 192 | /i j def 193 | /j j dup add 1 add def 194 | j r lt { 195 | recs j get recs j 1 add get LT { /j j 1 add def } if 196 | } if 197 | j r gt { recs i R put exit } if 198 | R recs j get LT not { recs i R put exit } if 199 | recs i recs j get put 200 | } loop 201 | } loop 202 | } if recs end 203 | } bind def 204 | 205 | /NameSort { 206 | {dup length string cvs exch dup length string cvs gt} Sort 207 | } bind def 208 | 209 | % Find an item in an array (using `eq' -- so the executable bit is discarded, 210 | % i.e. /foo and foo are equal). The index -1 is returned if item not found. 211 | /FindItem { % FindItem 212 | exch dup 0 exch 213 | { 3 index eq { exit } if 1 add } forall 214 | exch length 1 index eq { pop -1 } if exch pop 215 | } bind def 216 | 217 | /_S1 1 string def 218 | 219 | % Like `glyphshow' but uses `show' if the glyph name is in /Encoding. 220 | % This is useful because gs -sDEVICE=pdfwrite autodetects the /Encoding of 221 | % the emitted CFF fonts if /glyphshow is used, possibly emitting two CFF 222 | % fonts 223 | % if there is a character position conflict (e.g. /G and /Phi). No such 224 | % splitting happens with if `show' is used instead of `glyphshow'. 225 | % Stack use: <_EncodingDict> GlyphShowWithEncodingDict - 226 | /GlyphShowWithEncodingDict { 227 | 1 index .knownget { 228 | dup 255 gt { 229 | % This long /Encoding was created by `[exch {pop} forall] NameSort' 230 | % below. 231 | pop 232 | (warning: using glyphshow for glyph encoded above 255: /) print dup = 233 | glyphshow 234 | } { 235 | _S1 exch 0 exch put _S1 show 236 | pop % pop the glyph name 237 | } ifelse 238 | } { 239 | (warning: using glyphshow for unencoded glyph: /) print dup = 240 | glyphshow 241 | } ifelse 242 | } bind def 243 | 244 | % A version of findfont which: 245 | % 246 | % * doesn't try to load fonts from dict 247 | % * doesn't use the Fontmap 248 | % * doesn't do font substitution 249 | % * doesn't do font aliasing 250 | % 251 | % See also gs_fonts.gs 252 | % 253 | % TryFindFont true 254 | % TryFindFont 255 | /TryFindFont { 256 | .FontDirectory 1 index .fontknownget { 257 | exch pop true 258 | } { 259 | pop false 260 | } ifelse 261 | } bind def 262 | 263 | % 264 | ''' 265 | 266 | TYPE1C_CONVERTER = r''' 267 | % 268 | % PDF Type1 font extraction and typesetter procset 269 | % by pts@fazekas.hu at Sun Mar 29 11:19:06 CEST 2009 270 | 271 | << 272 | /CompatibilityLevel 1.4 273 | /SubsetFonts false % GS ignores this for some fonts, no problem. 274 | /EmbedAllFonts true 275 | /Optimize true 276 | >> setdistillerparams 277 | .setpdfwrite 278 | 279 | /eexec { 280 | 1 index /FontName get userdict exch 281 | /_OrigFontName exch put eexec 282 | } bind def 283 | 284 | /stream { % stream - 285 | ReadStreamFile DecompressStreamFileWithReusableStreamDecode 286 | % 287 | exch pop 288 | % stack: (containing a Type1 font program) 289 | % Undefine all fonts before running our font program. 290 | systemdict /FontDirectory get {pop undefinefont} forall 291 | % Push a copy of userdict: userdict-copy. 292 | userdict dup length dict copy 293 | % .loadfont never leaves junk on the stack. 294 | % .loadfont is better than `cvx exec', because .loadfont can load PFB fonts 295 | % (in addition to PFA fonts), 296 | % while `cvx exec' fails for PFB fonts with something like: 297 | % /syntaxerror in (bin obj seq, type=128, elements=1, size=59650, non-zero unused field) 298 | exch dup .loadfont closefile 299 | dup /_OrigFontName _OrigFontName put % Add to userdict-copy. 300 | % Copy from userdict-copy back to userdict. 301 | userdict dup {pop 1 index exch undef} forall copy pop 302 | systemdict /FontDirectory get 303 | dup length 0 eq {/NoFontDefined /invalidfileaccess signalerror} if 304 | _OrigFontName null eq { 305 | % /MultipleFontsDefined can happen, the eexec part of some Type 1 font 306 | % programs call `definefont' multiple times, e.g. for /Helvetica and 307 | % /Helvetica-Oblique. 308 | dup length 1 gt {/MultipleFontsDefined /invalidfileaccess signalerror} if 309 | dup length === 310 | [exch {pop} forall] 0 get % Convert FontDirectory to the name of our font 311 | dup /_OrigFontName exch def 312 | } { 313 | _OrigFontName known not {/FontNotFound /invalidaccess signalerror} if 314 | _OrigFontName 315 | } ifelse 316 | % stack: 317 | TryFindFont not { /FontNotInFindfont /invalidaccess signalerror} if 318 | dup length dict copy 319 | % Let the font name be /Obj68 etc. 320 | dup /FullName _ObjNumber 10 string cvs 321 | % pad to 10 digits for object unification in FixFontNameInType1C. 322 | dup (0000000000) exch length neg 10 add 0 exch 323 | getinterval exch concatstrings 324 | (Obj) exch concatstrings put 325 | dup dup /FullName get cvn /FontName exch put 326 | 327 | % We want to make sure that: 328 | % 329 | % S1. All glyphs in /CharStrings are part of the /Encoding array. This is 330 | % needed for Ghostscript 8.54, which would sometimes generate two (or 331 | % more?) PDF font objects if not all glyphs are encoded. 332 | % 333 | % S2. All non-/.notdef elements of the /Encoding array remain unchanged. 334 | % This is needed because Adobe Actobat uses the /Encoding in the CFF 335 | % if /BaseEncoding was not specified in the /Type/Encoding for 336 | % /Type/Font. This is according to pdf_reference_1.7.pdf. (xpdf and 337 | % evince use /BaseEncoding/StandardEncoding.) 338 | % 339 | % To do this, we first check that all glyphs in /CharStrings are part of 340 | % /Encoding. If not, we extend /Encoding to 256 elements (by adding 341 | % /.notdef{}s), and we start replacing /.notdef{}s at the end of /Encoding 342 | % by the missing keys from /CharStrings. 343 | 344 | % stack: 345 | % As a workaround for `S1' above, we skip a font with too many 346 | % /CharStrings. 347 | dup /CharStrings get length 256 lt { 348 | (obj encoding ) print _ObjNumber ===only ( ) print 349 | dup /Encoding .knownget not {[]} if === 350 | 351 | % Create /Encoding from sorted keys of /CharStrings. 352 | [1 index /CharStrings get {pop} forall] NameSort 353 | % Pad it to size 256. 354 | dup length 256 lt { [exch aload length 1 255 {pop/.notdef} for] } if 355 | 1 index exch /Encoding exch put 356 | 357 | dup /Encoding get << exch -1 exch { exch 1 add dup } forall pop >> 358 | % _EncodingDict maps glyph names in th /Encoding to their last encoded 359 | % value. Example: << /space 32 /A 65 >> 360 | /_EncodingDict exch def 361 | 362 | %dup /FID undef % undef not needed. 363 | % We have to unset /OrigFont (for Ghostscript 8.61) and /.OrigFont 364 | % (for GhostScript 8.54) here, because otherwise Ghostscript would put 365 | % the /FontName defined there to the PDF object /Type/FontDescriptor , thus 366 | % preventing us from identifying the output font by input object number. 367 | dup /OrigFont undef % undef is OK even if /OrigFont doesn't exist 368 | dup /.OrigFont undef % undef is OK even if /.OrigFont doesn't exist 369 | dup /FontName get exch definefont 370 | % stack: 371 | (Type1CConverter: converting font /) print 372 | _OrigFontName =only 373 | ( to /) print 374 | dup /FontName get =only 375 | (\n) print flush 376 | dup /FontName get dup length string cvs 377 | systemdict /FontDirectory get { % Undefine all fonts except for 378 | pop dup 379 | dup length string cvs 2 index eq % Need cvs for eq comparison. 380 | {pop} {undefinefont} ifelse 381 | } forall 382 | pop % 383 | %systemdict /FontDirectory get {pop ===} forall 384 | 385 | dup setfont 386 | % TODO(pts): Check for embedding the base 14 fonts. 387 | % 388 | % * It is not enough to show only a few glyphs, because Ghostscript 389 | % sometimes ignores /SubsetFonts=false . 390 | % * 200 200 moveto is needed here, otherwise some characters would be too 391 | % far to the right so Ghostscript 8.61 would crop them from the page and 392 | % wouldn't include them to the fonts. 393 | % * We have to make sure that all glyphs are on the page -- otherwise 394 | % Ghostscript 8.61 becomes too smart by clipping the page and not embedding 395 | % the outliers. 396 | % * Using `show' instead of `glyphshow' to prevent Ghostscript from 397 | % splitting the output CFF font to two (or more) on auto-guessed 398 | % Encoding position conflict (such as /G and /Phi). 399 | dup /CharStrings get [exch {pop} forall] NameSort { 400 | newpath 200 200 moveto 401 | _EncodingDict GlyphShowWithEncodingDict 402 | } forall 403 | } { 404 | (skipping big-CharStrings font obj ) print _ObjNumber === flush 405 | } ifelse 406 | currentdict /_EncodingDict undef 407 | pop % 408 | restore 409 | } bind def 410 | % 411 | 412 | (Type1CConverter: using interpreter ) print 413 | product =only ( ) print 414 | revision =only ( ) print % 854 means version 8.54 415 | revisiondate =only (\n) print 416 | ''' 417 | 418 | TYPE1C_PARSER = r''' 419 | % 420 | % Type1C font (CFF) parser procset 421 | % by pts@fazekas.hu at Tue May 19 22:46:15 CEST 2009 422 | 423 | % keys to omit from the font dictionary dump 424 | /OMIT << /FontName 1 /FID 1 /.OrigFont 1 425 | /OrigFont 1 /FAPI 1 >> def 426 | 427 | /_DataFile DataFile (w) file def % -sDataFile=... on the command line 428 | 429 | % Dump the specified value to the specified stream in a parable form. 430 | % Dumps strings as hex (<...>). Dumps all arrays as [...], never {...}. The 431 | % motivation is to dump quickly, and read it back from Python quicly. Since 432 | % PdfObj.CompressValue called from PdfObj.ParseValueRecursive is slow on 433 | % (...) strings, we dump strings as <...>. 434 | /Dump { % Dump - 435 | dup type /dicttype eq { 436 | 1 index (<<) writestring 437 | { exch 2 index exch Dump 438 | 1 index ( ) writestring 439 | 1 index exch Dump 440 | dup ( ) writestring 441 | } forall 442 | (>>) writestring 443 | } { 444 | dup type /arraytype eq { 445 | 1 index ([) writestring 446 | { 1 index exch Dump 447 | dup ( ) writestring 448 | } forall 449 | (]) writestring 450 | } { 451 | dup type /stringtype eq { 452 | 1 index (<) writestring 453 | 1 index exch writehexstring 454 | (>) writestring 455 | } { 456 | write===only % Emits 0.0 for a float 0. 457 | } ifelse 458 | } ifelse 459 | } ifelse 460 | } bind def 461 | 462 | % /LoadCff { 463 | % /FontSetInit /ProcSet findresource begin //true //false ReadData } bind def 464 | % but some autodetection of `//false'' above based on the Ghostscript version: 465 | % Since gs 8.64: 466 | % pdfdict /readType1C get --> 467 | % {1 --index-- --exch-- PDFfile --fileposition-- 3 1 --roll-- --dup-- true 468 | % resolvestream --dup-- readfontfilter 3 --index-- /FontDescriptor oget 469 | % /FontName oget 1 --index-- /FontSetInit /ProcSet --findresource-- --begin-- 470 | % true false ReadData {--exch-- --pop-- --exit--} --forall-- 7 1 --roll-- 471 | % --closefile-- --closefile-- --pop-- PDFfile 3 -1 --roll-- 472 | % --setfileposition-- --pop-- --pop--} 473 | % Till gs 8.61: 474 | % GS_PDF_ProcSet /FRD get --> 475 | % {/FontSetInit /ProcSet findresource begin //true ReadData} 476 | GS_PDF_ProcSet /FRD .knownget not { pdfdict /readType1C get } if 477 | dup /FontSetInit FindItem 478 | dup 0 lt { /MissingFontSetInit /invalidfileaccess signalerror } if 479 | 1 index /ReadData FindItem 480 | dup 0 lt { /MissingReadData /invalidfileaccess signalerror } if 481 | 1 index sub 1 add getinterval 482 | cvx bind /LoadCff exch def 483 | % Now we have one of these: 484 | % /LoadCff { /FontSetInit /ProcSet findresource begin //true ReadData 485 | % pop } bind def % gs 8.62 or earlier 486 | % /LoadCff { /FontSetInit /ProcSet findresource begin //true //false ReadData 487 | % pop } bind def % gs 8.63 or later 488 | 489 | /stream { % stream - 490 | ReadStreamFile DecompressStreamFileWithReusableStreamDecode 491 | % 492 | systemdict /FontDirectory get {pop undefinefont} forall 493 | % CFF font loading can fail 2 ways: either LoadCff fails (caught by 494 | % `stopped'), or LoadCff succeeds and TryFindFont isn't able to find the font. 495 | 496 | /_MarkerCDS countdictstack def 497 | <<>> dup /_Marker exch def % eq will compare it by reference. 498 | 1 index /MY exch { LoadCff } stopped 499 | 500 | % Now clean up the stack and the dict stack. 501 | % 502 | % * If there was en error (stopped returns true), 503 | % the stack looks like: _Marker false true, and the dict 504 | % stack contains 2 extra dicts. 505 | % * If no error with gs >=8.63, the stack looks like: false, 506 | % pushed by ReadData. 507 | % * If no error with gs <=8.62, the stack looks like: false. 508 | {_Marker eq {exit} if} loop % Pop _Marker and everything on top. 509 | _MarkerCDS 1 add 1 countdictstack {pop end} for % Pop from the dictstack. 510 | 511 | closefile % Is this needed? 512 | % 513 | pop 514 | _DataFile _ObjNumber write===only 515 | _DataFile ( <<\n) writestring 516 | /MY TryFindFont { % This can fail if the font data is corrupt. 517 | dup /FontType get 2 ne {/NotType2Font /invalidfileaccess signalerror} if 518 | % SUXX: the CFF /FontName got lost (overwritten by /MY above) 519 | { 520 | exch dup OMIT exch known not 521 | { _DataFile exch write===only 522 | _DataFile ( ) writestring 523 | _DataFile exch Dump 524 | _DataFile (\n) writestring} {pop pop} ifelse 525 | } forall 526 | } if 527 | _DataFile (>>\n) writestring 528 | systemdict /FontDirectory get {pop undefinefont} forall 529 | restore % save created by /obj 530 | } bind def 531 | % 532 | 533 | (Type1CParser: using interpreter ) print 534 | product =only ( ) print 535 | revision =only ( ) print % 854 means version 8.54 536 | revisiondate =only (\n) print 537 | ''' 538 | 539 | TYPE1C_GENERATOR = r''' 540 | % 541 | % PDF Type1 font extraction and typesetter procset 542 | % by pts@fazekas.hu at Sun Mar 29 11:19:06 CEST 2009 543 | 544 | << 545 | /CompatibilityLevel 1.4 546 | /SubsetFonts false % GS ignores this for some fonts, no problem. 547 | /EmbedAllFonts true 548 | /Optimize true 549 | >> setdistillerparams 550 | .setpdfwrite 551 | 552 | /endobj { % endobj - 553 | % Undefine all fonts before running our font program. 554 | systemdict /FontDirectory get {pop undefinefont} forall 555 | /_FontName _ObjNumber 10 string cvs 556 | % pad to 10 digits for object unification in FixFontNameInType1C. 557 | dup (0000000000) exch length neg 10 add 0 exch 558 | getinterval exch concatstrings 559 | (Obj) exch concatstrings cvn def 560 | dup /FontName _FontName put 561 | 562 | % Replace the /Encoding array with the glyph names in /CharStrings, padded 563 | % with /.notdef{}s. This hack is needed for Ghostscript 8.54, which would 564 | % sometimes generate two (or more?) PDF font objects if not all characters 565 | % are encoded. 566 | % TODO(pts): What if /Encoding longer than 256? 567 | dup /CharStrings get 568 | [exch {pop} forall] NameSort 569 | [exch aload length 1 255 {pop/.notdef} for] 570 | 1 index exch /Encoding exch put 571 | 572 | % Regenerate _EncodingDict, now with /.notdef 573 | dup /Encoding .knownget not {[]} if 574 | << exch -1 exch { dup null eq { pop /.notdef } if 575 | exch 1 add dup } forall pop >> 576 | /_EncodingDict exch def 577 | 578 | _FontName exch definefont % includes findfont 579 | % TODO: (Type1Generator: ...) print 580 | dup setfont 581 | % * It is not enough to show only a few glyphs, because Ghostscript 582 | % sometimes ignores /SubsetFonts=false . 583 | % * 200 200 moveto is needed here, otherwise some characters would be too 584 | % far to the right so Ghostscript 8.61 would crop them from the page and 585 | % wouldn't include them to the fonts. 586 | % * We have to make sure that all glyphs are on the page -- otherwise 587 | % Ghostscript 8.61 becomes too smart by clipping the page and not embedding 588 | % the outliers. 589 | % * Using `show' instead of `glyphshow' to prevent Ghostscript from 590 | % splitting the output CFF font to two (or more) on auto-guessed 591 | % Encoding position conflict (such as /G and /Phi). 592 | dup /CharStrings get [exch {pop} forall] NameSort { 593 | newpath 200 200 moveto 594 | _EncodingDict GlyphShowWithEncodingDict 595 | } forall 596 | currentdict /_EncodingDict undef 597 | %dup /CharStrings get {pop dup === glyphshow} forall 598 | %dup /CharStrings get [ exch {pop} forall ] 0 get glyphshow 599 | pop % 600 | %showpage % not needed 601 | restore 602 | } bind def 603 | 604 | (Type1CGenerator: using interpreter ) print 605 | product =only ( ) print 606 | revision =only ( ) print % 854 means version 8.54 607 | revisiondate =only (\n) print 608 | % 609 | 610 | ''' 611 | 612 | IMAGE_RENDERER = r''' 613 | % 614 | % PDF image renderer procset 615 | % Sun Apr 5 15:58:02 CEST 2009 616 | 617 | /stream { % stream - 618 | ReadStreamFile 619 | % stack: 620 | 621 | 1 index 622 | (ImageRenderer: rendering image XObject ) print _ObjNumber =only 623 | ( width=) print dup /Width get =only 624 | ( height=) print dup /Height get =only 625 | ( bpc=) print dup /BitsPerComponent get =only 626 | ( colorspace=) print dup /ColorSpace get 627 | % Show [/Indexed /DeviceRGB] instead of longer array. 628 | dup type /arraytype eq {dup length 2 gt {0 2 getinterval}if }if ===only 629 | ( filter=) print dup /Filter .knownget not {null} if ===only 630 | ( decodeparms=) print dup /DecodeParms .knownget not {null} if ===only 631 | ( device=) print currentpagedevice 632 | /OutputDevice get dup length string cvs print 633 | (\n) print flush 634 | pop 635 | % stack: 636 | DecompressStreamFileWithIndividualFilters 637 | % stack: (containing image /DataSource) 638 | 639 | 9 dict begin % Image dictionary 640 | /DataSource exch def 641 | % Stack: 642 | dup /BitsPerComponent get /BitsPerComponent exch def 643 | dup /Width get /Width exch def 644 | dup /Height get /Height exch def 645 | dup /Decode .knownget {/Decode exch def} if 646 | % We cannot affect the file name of -sOutputFile=%d.png , doing a 647 | % ``<< /PageCount ... >> setpagedevice'' has no effect. 648 | % It's OK to change /PageSize for each page. 649 | << /PageSize [Width Height] >> setpagedevice 650 | % This must come after setpagedevice to take effect. 651 | dup /ColorSpace get setcolorspace 652 | /ImageType 1 def 653 | dup /Height get [1 0 0 -1 0 0] exch 5 exch 3 copy put pop pop 654 | /ImageMatrix exch def 655 | DataSource 656 | currentdict end 657 | % Stack: 658 | % This renders the partial image in case of /ioerror from a filter. 659 | {image} stopped {(ImageRenderer: warning: corrupt image data\n)print flush}if 660 | showpage 661 | closefile 662 | % Stack: 663 | pop restore 664 | } bind def 665 | % 666 | 667 | ''' 668 | -------------------------------------------------------------------------------- /lint.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # by pts@fazekas.hu at Tue Oct 8 14:51:23 CEST 2013 3 | 4 | PYTHON_FILES="$(find pdfsizeopt_test.py lib extra -name '*.py' | 5 | grep -v '^lib/pdfsizeopt/pdfsizeopt_argparse[.]py$')" 6 | if ! test "$PYTHON_FILES"; then 7 | echo "No Python source files found." >&2 8 | exit 2 9 | fi 10 | #echo "$PYTHON_FILES" 11 | pep8 $PYTHON_FILES | grep -vE '[0-9]: E(111|203) ' | tee pep8.out || : 12 | if test -s pep8.out; then 13 | echo "Found pep8 warnings, see above." >&2 14 | exit 2 15 | fi 16 | echo "No lint warnings." >&2 17 | -------------------------------------------------------------------------------- /mksingle.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # by pts@fazekas.hu at Fri Sep 1 16:34:46 CEST 2017 3 | 4 | """Build single-file script for Unix: pdfsizeopt.single.""" 5 | 6 | import cStringIO 7 | import os 8 | import os.path 9 | import re 10 | import subprocess 11 | import sys 12 | import time 13 | import token 14 | import tokenize 15 | import zipfile 16 | 17 | 18 | def Minify(source, output_func): 19 | """Minifies Python (2.4, 2.5, 2.6 or 2.7) source code. 20 | 21 | This function was tested and it works identically (consistently) in Python 22 | 2.4, 2.5, 2.6 and 2.7. 23 | 24 | The output will end with a newline, unless empty. 25 | 26 | This function does this: 27 | 28 | * Removes comments. 29 | * Compresses indentation to 1 space at a time. 30 | * Removes empty lines and consecutive duplicate newlines. 31 | * Removes newlines within expressions. 32 | * Removes unnecessary whitespace within a line (e.g. '1 + 2' to '1+2'). 33 | * Removes strings at the beginning of the expression (including docstring). 34 | * Removes even the first comment line with '-*- coding '... . 35 | 36 | This function doesn't do these: 37 | 38 | * Removing the final newline ('\\n'). 39 | * Shortening the names of local variables. 40 | * Making string literals shorter by better escaping etc. 41 | * Compressing compound statements to 1 line, e.g. 42 | 'if x:\\ny=5\\n' to 'if x:y=5\\n'. 43 | * Removing unnecessery parentheses, e.g. '1+(2*3)' to '1+2*3'. 44 | * Constant folding, e.g. '1+(2*3)' to '7'. 45 | * Concantenation of string literals, e.g. '"a"+"b"' to '"ab"', or 46 | '"a""b"' to '"ab"'. 47 | * Seprating expressions with ';' instead of newline + indent. 48 | * Any obfuscation. 49 | * Any general compression (such as Flate, LZMA, bzip2). 50 | 51 | Args: 52 | source: Python source code to minify. Can be str, buffer (or anything 53 | convertible to a buffer, e.g. bytearray), a readline method of a 54 | file-object or an iterable of line strs. 55 | output_func: Function which will be called with str arguments for each 56 | output piece. 57 | """ 58 | if isinstance(source, unicode): 59 | raise TypeError 60 | try: 61 | buf = buffer(source) 62 | except TypeError: 63 | buf = None 64 | if buf is not None: 65 | import cStringIO 66 | # This also works, except it's different at the end of the partial line: 67 | # source = iter(line + '\n' for line in str(buf).splitlines()).next 68 | source = cStringIO.StringIO(buf).readline 69 | elif not callable(source): 70 | # Treat source as an iterable of lines. Add trailing '\n' if needed. 71 | source = iter( 72 | line + '\n' * (not line.endswith('\n')) for line in source).next 73 | 74 | _COMMENT, _NL = tokenize.COMMENT, tokenize.NL 75 | _NAME, _NUMBER, _STRING = token.NAME, token.NUMBER, token.STRING 76 | _NEWLINE, _INDENT, _DEDENT = token.NEWLINE, token.INDENT, token.DEDENT 77 | _COMMENT_OR_NL = (_COMMENT, _NL) 78 | _NAME_OR_NUMBER = (_NAME, _NUMBER) 79 | 80 | i = 0 # Indentation. 81 | is_at_bol = is_at_bof = 1 # Beginning of line and file. 82 | is_empty_indent = 0 83 | pt, ps = -1, '' # Previous token. 84 | # There are small differences in tokenize.generate_tokens in Python 85 | # versions, but they don't affect us, so we don't care: 86 | # * In Python <=2.4, the final DEDENTs and ENDMARKER are not yielded. 87 | # * In Python <=2.5, the COMMENT ts contains the '\n', and a separate 88 | # NL is not generated. 89 | for tt, ts, _, _, _ in tokenize.generate_tokens(source): 90 | if tt == _INDENT: 91 | i += 1 92 | is_empty_indent = 1 93 | elif tt == _DEDENT: 94 | if is_empty_indent: 95 | output_func(' ' * i) # TODO(pts): Merge with previous line. 96 | output_func('pass\n') 97 | is_empty_indent = 0 98 | i -= 1 99 | elif tt == _NEWLINE: 100 | if not is_at_bol: 101 | output_func('\n') 102 | is_at_bol, pt, ps = 1, -1, '' 103 | elif (tt == _STRING and is_at_bol or # Module-level docstring etc. 104 | tt in _COMMENT_OR_NL): 105 | pass 106 | else: 107 | if is_at_bol: 108 | output_func(' ' * i) 109 | is_at_bol = is_at_bof = 0 110 | if pt in _NAME_OR_NUMBER and (tt in _NAME_OR_NUMBER or 111 | (tt == _STRING and ts[0] in 'rb')): 112 | output_func(' ') 113 | output_func(ts) 114 | pt, ps, is_empty_indent = tt, ts, 0 115 | if is_empty_indent: 116 | output_func(' ' * i) 117 | output_func('pass\n') 118 | 119 | 120 | # We could support \r and \t outside strings, Minify would remove them. 121 | UNSUPPORTED_CHARS_RE = re.compile(r'[^\na -~]+') 122 | 123 | 124 | def MinifyFile(file_name, code_orig): 125 | i = code_orig.find('\n') 126 | if i >= 0: 127 | line1 = code_orig[:i] 128 | if '-*- coding: ' in line1: 129 | # We could support them by keeping this comment, but instead we opt 130 | # for fully ASCII Python input files. 131 | raise ValueError('-*- coding declarations not supported.') 132 | match = UNSUPPORTED_CHARS_RE.search(code_orig) 133 | if match: 134 | raise ValueError('Unsupported chars in source: %r' % match.group(0)) 135 | compile(code_orig, file_name, 'exec') # Check for syntax errors. 136 | output = [] 137 | Minify(code_orig, output.append) 138 | code_mini = ''.join(output) 139 | compile(code_mini, file_name, 'exec') # Check for syntax errors. 140 | return code_mini 141 | 142 | # It's OK that this doesn't support the full PostScript syntax, it's enough to 143 | # support whatever PostScript procsets in pdfsizeopt have. 144 | # 145 | # This doesn't support string literals with unescaped nested parens, e.g. 146 | # '(())'. 147 | # 148 | # This doesn't support <0a> hex string literals or ASCII85 string literals. 149 | POSTSCRIPT_TOKEN_RE = re.compile( 150 | r'%[^\r\n]*|' # Comment. 151 | r'[\0\t\n\r\f ]+|' # Whitespace. 152 | r'(\((?:[^()\\]+|(?s)\\.)*\))|' # 1: String literal. 153 | r'(<<|>>|[{}\[\]])|' # 2: Token which stops the previous token. 154 | r'([^\0\t\n\r\f %(){}<>\[\]]+)|' # 3. Multi-character token, '/' included. 155 | r'(?s)(.)') # 4. Anything else we don't recognize. 156 | 157 | 158 | def MinifyPostScript(pscode): 159 | output = [' '] # Sentinel for output[-1][-1]. 160 | for match in POSTSCRIPT_TOKEN_RE.finditer(pscode): 161 | if match.group(1): 162 | output.append(match.group(1)) 163 | elif match.group(2): 164 | output.append(match.group(2)) 165 | elif match.group(3): 166 | t = match.group(3) 167 | if t[0] != '/' and output[-1][-1] not in ')<>{}[]': 168 | output.append(' ') 169 | output.append(t) 170 | elif match.group(4): 171 | i = match.start() 172 | raise ValueError('Unknown PostScript syntax: %r' % pscode[i : i + 20]) 173 | output[0] = '' # Remove sentinel. 174 | return ''.join(output) 175 | 176 | 177 | def MinifyPostScriptProcsets(file_name, code_orig): 178 | code_obj = compile(code_orig, file_name, 'exec') 179 | globals_dict = {} 180 | exec code_obj in globals_dict 181 | for name in sorted(globals_dict): 182 | if name.startswith('__'): 183 | del globals_dict[name] 184 | names, pscodes = [], [] 185 | for name, pscode in sorted(globals_dict.iteritems()): 186 | names.append(name) 187 | if not isinstance(pscode, str): 188 | raise ValueError('Expected pscode as str, got: %r' % type(pscode)) 189 | pscode = MinifyPostScript(pscode) 190 | if '%%' in pscode: 191 | raise ValueError('Unexpected %% in minified pscode.') 192 | pscodes.append(pscode) 193 | if not pscodes: 194 | return '' 195 | pscodes_str = '\n%%'.join(pscodes) 196 | assert "'''" not in pscodes_str 197 | return "%s=r'''%s\n'''.split('%%%%')" % (','.join(names), pscodes_str) 198 | 199 | 200 | # We need a file other than __main__.py, because 'import __main__' in 201 | # SCRIPT_PREFIX is a no-op, and it doesn't load __main__.py. 202 | M_PY_CODE = r''' 203 | import sys 204 | 205 | if not ((2, 4) <= sys.version_info[:2] < (3, 0)): 206 | sys.stderr.write( 207 | 'fatal: Python version 2.4, 2.5, 2.6 or 2.7 needed for: %s\n' % sys.path[0]) 208 | sys.exit(1) 209 | 210 | from pdfsizeopt import main 211 | sys.exit(main.main(sys.argv, zip_file=sys.path[0])) 212 | '''.strip() 213 | 214 | 215 | SCRIPT_PREFIX = r'''#!/bin/sh -- 216 | # 217 | # pdfsizeopt: PDF file size optimizer (single-file script for Unix) 218 | # 219 | # You need Python 2.4, 2.5, 2.6 or 2.7 to run this script. The shell script 220 | # below tries to find such an interpreter and then runs it. 221 | # 222 | # If you have Python 2.6 or Python 2.7, you can also run it directly with 223 | # Python, otherwise you have to run it as a shell script. 224 | # 225 | 226 | P="$(readlink "$0" 2>/dev/null)" 227 | test "$P" && test "${P#/}" = "$P" && P="${0%/*}/$P" 228 | test "$P" || P="$0" 229 | Q="${P%/*}"/pdfsizeopt_libexec/python 230 | test -f "$Q" && exec "$Q" -E -- "$P" ${1+"$@"} 231 | type python2.7 >/dev/null 2>&1 && exec python2.7 -- "$P" ${1+"$@"} 232 | type python2.6 >/dev/null 2>&1 && exec python2.6 -- "$P" ${1+"$@"} 233 | type python2.5 >/dev/null 2>&1 && exec python2.5 -c"import sys;del sys.argv[0];sys.path[0]=sys.argv[0];import m" "$P" ${1+"$@"} 234 | type python2.4 >/dev/null 2>&1 && exec python2.4 -c"import sys;del sys.argv[0];sys.path[0]=sys.argv[0];import m" "$P" ${1+"$@"} 235 | exec python -c"import sys;del sys.argv[0];sys.path[0]=sys.argv[0];import m" "$P" ${1+"$@"} 236 | exit 1 237 | 238 | ''' 239 | 240 | def new_zipinfo(file_name, file_mtime, permission_bits=0644): 241 | zipinfo = zipfile.ZipInfo(file_name, file_mtime) 242 | zipinfo.external_attr = (0100000 | (permission_bits & 07777)) << 16 243 | return zipinfo 244 | 245 | 246 | def main(argv): 247 | os.chdir(os.path.dirname(__file__)) 248 | assert os.path.isfile('lib/pdfsizeopt/main.py') 249 | zip_output_file_name = 't.zip' 250 | single_output_file_name = 'pdfsizeopt.single' 251 | try: 252 | os.remove(zip_output_file_name) 253 | except OSError: 254 | pass 255 | 256 | zf = zipfile.ZipFile(zip_output_file_name, 'w', zipfile.ZIP_DEFLATED) 257 | time_now = time.localtime()[:6] 258 | try: 259 | for file_name in ( 260 | # 'pdfsizeopt/pdfsizeopt_pargparse.py', # Not needed. 261 | 'pdfsizeopt/__init__.py', 262 | 'pdfsizeopt/cff.py', 263 | 'pdfsizeopt/float_util.py', 264 | 'pdfsizeopt/main.py'): 265 | code_orig = open('lib/' + file_name, 'rb').read() 266 | # The zip(1) command also uses localtime. The ZIP file format doesn't 267 | # store the time zone. 268 | file_mtime = time.localtime(os.stat('lib/' + file_name).st_mtime)[:6] 269 | code_mini = MinifyFile(file_name, code_orig) 270 | # Compression effort doesn't matter, we run advzip below anyway. 271 | zf.writestr(new_zipinfo(file_name, file_mtime), code_mini) 272 | del code_orig, code_mini # Save memory. 273 | 274 | # TODO(pts): Can we use `-m m'? Does it work in Python 2.0, 2.1, 2.2 and 275 | # 2.3? (So that we'd reach the proper error message.) 276 | zf.writestr(new_zipinfo('m.py', time_now), 277 | MinifyFile('m.py', M_PY_CODE)) 278 | 279 | zf.writestr(new_zipinfo('__main__.py', time_now), 280 | 'import m') 281 | 282 | file_name = 'pdfsizeopt/psproc.py' 283 | code_orig = open('lib/' + file_name, 'rb').read() 284 | file_mtime = time.localtime(os.stat('lib/' + file_name).st_mtime)[:6] 285 | code_mini = MinifyPostScriptProcsets(file_name, code_orig) 286 | zf.writestr(new_zipinfo(file_name, file_mtime), code_mini) 287 | finally: 288 | zf.close() 289 | 290 | subprocess.check_call(('advzip', '-qz4', '--', zip_output_file_name)) 291 | 292 | f = open(zip_output_file_name, 'rb') 293 | try: 294 | data = f.read() 295 | finally: 296 | f.close() 297 | os.remove(zip_output_file_name) 298 | 299 | f = open(single_output_file_name, 'wb') 300 | try: 301 | f.write(SCRIPT_PREFIX) 302 | f.write(data) 303 | finally: 304 | f.close() 305 | 306 | os.chmod(single_output_file_name, 0755) 307 | 308 | # Size reductions of pdfsizeopt.single: 309 | # 310 | # * 115100 bytes: mksingle.sh, before this script. 311 | # * 68591 bytes: Python minification, advzip, SCRIPT_PREFIX improvements. 312 | # * 63989 bytes: PostScript minification. 313 | print >>sys.stderr, 'info: created %s (%d bytes)' % ( 314 | single_output_file_name, os.stat(single_output_file_name).st_size) 315 | 316 | if __name__ == '__main__': 317 | sys.exit(main(sys.argv)) 318 | -------------------------------------------------------------------------------- /pdfsizeopt: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | """:" # pdfsizeopt: PDF file size optimizer 4 | 5 | P="$(readlink "$0" 2>/dev/null)" 6 | test "$P" && test "${P#/}" = "$P" && P="${0%/*}/$P" 7 | test "$P" || P="$0" 8 | Q="${P%/*}"/pdfsizeopt_libexec/python 9 | test -f "$Q" && exec "$Q" -E -- "$P" ${1+"$@"} 10 | type -p python2.7 >/dev/null 2>&1 && exec python2.7 -- "$P" ${1+"$@"} 11 | type -p python2.6 >/dev/null 2>&1 && exec python2.6 -- "$P" ${1+"$@"} 12 | type -p python2.5 >/dev/null 2>&1 && exec python2.5 -- "$P" ${1+"$@"} 13 | type -p python2.4 >/dev/null 2>&1 && exec python2.4 -- "$P" ${1+"$@"} 14 | exec python -- "$P" ${1+"$@"}; exit 1 15 | 16 | This is a Python 2.x script, it works with Python 2.4, 2.5, 2.6 and 2.7. It 17 | doesn't work with Python 3.x. Feel free to replace the #! line with 18 | `#! /usr/bin/python', `#! /usr/bin/env python' or whatever suits you best. 19 | """ 20 | 21 | import os 22 | import os.path 23 | import sys 24 | 25 | if not ((2, 4) <= sys.version_info[:2] < (3, 0)): 26 | sys.stderr.write( 27 | 'fatal: Python version 2.4, 2.5, 2.6 or 2.7 needed for: %s\n' % __file__) 28 | sys.exit(1) 29 | 30 | script_dir = os.path.dirname(__file__) 31 | try: 32 | __file__ = os.path.join(script_dir, os.readlink(__file__)) 33 | script_dir = os.path.dirname(__file__) 34 | except (OSError, AttributeError, NotImplementedError): 35 | pass 36 | if os.path.isfile(os.path.join( 37 | script_dir, 'lib', 'pdfsizeopt', 'main.py')): 38 | sys.path[0] = os.path.join(script_dir, 'lib') 39 | 40 | from pdfsizeopt import main 41 | sys.exit(main.main(sys.argv, script_dir=script_dir)) 42 | -------------------------------------------------------------------------------- /pdfsizeopt.single: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/pdfsizeopt.single -------------------------------------------------------------------------------- /pts_pdfsizeopt2009/README.txt: -------------------------------------------------------------------------------- 1 | The article was compiled on Linux with TeX Live 2008. No overfull 2 | or underfull \hbox or \vbox or undefined reference or any other warning 3 | should be reported. Complation instructions: 4 | 5 | pdflatex pts_pdfsizeopt2009 6 | bibtex pts_pdfsizeopt2009 7 | pdflatex pts_pdfsizeopt2009 8 | pdflatex pts_pdfsizeopt2009 9 | 10 | I haven't optimized for float placement or page breaks. Doing so may reduce 11 | the page count from 21 to 20. 12 | -------------------------------------------------------------------------------- /pts_pdfsizeopt2009/ltugbib.bst: -------------------------------------------------------------------------------- 1 | % This is the file `ltugbib.bst', used by the harvardcite option 2 | % to the ltug* LaTeX classes. 3 | % 4 | % \iffalse (this is a meta-comment (so they say)) 5 | % Copyright 1995,1996,1997,1998,2000,2004,2006,2007,2009 TeX Users Group. 6 | % 7 | % It may be distributed and/or modified under the 8 | % conditions of the LaTeX Project Public License, either version 1.3 9 | % of this license or (at your option) any later version. 10 | % The latest version of this license is in 11 | % http://www.latex-project.org/lppl.txt 12 | % and version 1.3 or later is part of all distributions of LaTeX 13 | % version 2003/12/01 or later. 14 | % 15 | % This file has the LPPL maintenance status "maintained". 16 | % 17 | % The Current Maintainer of this work is the TeX Users Group 18 | % (http://tug.org/TUGboat). 19 | % 20 | % The list of all files belonging to this package is given in the file 21 | % `manifest.txt'. 22 | % 23 | % The list of derived (unpacked) files belonging to the distribution 24 | % and covered by LPPL is defined by the unpacking scripts (with 25 | % extension .ins) which are part of the distribution. 26 | % \fi 27 | % 28 | % 7-Jun-04: (all related code marked OP) handle a title ending in, e.g., a 29 | % question-mark; otherwise the same as the (CTAN-time) 9-Apr-02 version: 30 | % 31 | % This file used to be generated from tugboat.dtx, but is now distributed 32 | % as a stand-alone file. 33 | % 34 | ENTRY 35 | { address 36 | author 37 | booktitle 38 | chapter 39 | edition 40 | editor 41 | howpublished 42 | institution 43 | journal 44 | key 45 | month 46 | note 47 | number 48 | organization 49 | pages 50 | publisher 51 | school 52 | series 53 | title 54 | type 55 | volume 56 | year 57 | } 58 | {} 59 | { label extra.label sort.label long.label short.label } 60 | 61 | INTEGERS { output.state before.all mid.sentence after.sentence after.block 62 | suppress.period } % OP: after quoted title (trivalued) 63 | 64 | FUNCTION {init.state.consts} 65 | { #0 'before.all := 66 | #1 'mid.sentence := 67 | #2 'after.sentence := 68 | #3 'after.block := 69 | #0 'suppress.period := % OP: initialize to FALSE 70 | } 71 | 72 | STRINGS { s t } 73 | 74 | FUNCTION {output.nonnull} 75 | { 's := 76 | output.state mid.sentence = 77 | { ", " * write$ } 78 | { output.state after.block = 79 | { suppress.period #2 = % OP: check for suppression this time 80 | { #0 'suppress.period := } % suppress, reset to FALSE 81 | 'add.period$ 82 | if$ 83 | write$ 84 | newline$ 85 | "\newblock " write$ 86 | } 87 | { output.state before.all = 88 | 'write$ 89 | { add.period$ " " * write$ } 90 | if$ 91 | } 92 | if$ 93 | mid.sentence 'output.state := 94 | } 95 | if$ 96 | s 97 | } 98 | 99 | FUNCTION {output} 100 | { duplicate$ empty$ 101 | 'pop$ 102 | 'output.nonnull 103 | if$ 104 | } 105 | 106 | FUNCTION {output.check} 107 | { 't := 108 | duplicate$ empty$ 109 | { pop$ "empty " t * " in " * cite$ * warning$ } 110 | 'output.nonnull 111 | if$ 112 | } 113 | 114 | FUNCTION {output.bibitem} 115 | { newline$ 116 | "\bibitem[" write$ 117 | "\protect\citeauthoryear{" long.label * "}{" * write$ 118 | short.label write$ 119 | "}{" year duplicate$ empty$ 120 | { pop$ "????" } 121 | 'skip$ 122 | if$ 123 | * extra.label * "}]{" * write$ 124 | cite$ write$ 125 | "}" write$ 126 | newline$ 127 | "" 128 | before.all 'output.state := 129 | } 130 | 131 | FUNCTION {fin.entry} 132 | { add.period$ 133 | write$ 134 | newline$ 135 | } 136 | 137 | FUNCTION {new.block} 138 | { output.state before.all = 139 | 'skip$ 140 | { after.block 'output.state := } 141 | if$ 142 | suppress.period #1 = % OP: check to suppress period 143 | { #2 'suppress.period := } % for current (1st) output 144 | 'skip$ 145 | if$ 146 | } 147 | 148 | FUNCTION {new.sentence} 149 | { output.state after.block = 150 | 'skip$ 151 | { output.state before.all = 152 | 'skip$ 153 | { after.sentence 'output.state := } 154 | if$ 155 | } 156 | if$ 157 | } 158 | 159 | FUNCTION {not} 160 | { { #0 } 161 | { #1 } 162 | if$ 163 | } 164 | 165 | FUNCTION {and} 166 | { 'skip$ 167 | { pop$ #0 } 168 | if$ 169 | } 170 | 171 | FUNCTION {or} 172 | { { pop$ #1 } 173 | 'skip$ 174 | if$ 175 | } 176 | 177 | FUNCTION {new.block.checkb} 178 | { empty$ 179 | swap$ empty$ 180 | and 181 | 'skip$ 182 | 'new.block 183 | if$ 184 | } 185 | 186 | FUNCTION {field.or.null} 187 | { duplicate$ empty$ 188 | { pop$ "" } 189 | 'skip$ 190 | if$ 191 | } 192 | 193 | FUNCTION {emphasize} 194 | { duplicate$ empty$ 195 | { pop$ "" } 196 | { "{\em " swap$ * "}" * } 197 | if$ 198 | } 199 | 200 | FUNCTION {bolden} 201 | { duplicate$ empty$ 202 | { pop$ "" } 203 | { "{\bf " swap$ * "}" * } 204 | if$ 205 | } 206 | INTEGERS { nameptr namesleft numnames } 207 | 208 | FUNCTION {format.names} 209 | { 's := 210 | #1 'nameptr := 211 | s num.names$ 'numnames := 212 | numnames 'namesleft := 213 | { namesleft #0 > } 214 | { s nameptr 215 | nameptr #1 = 216 | { "{vv~}{ll}{, jj}{, ff}" format.name$ } 217 | { "{f.~}{vv~}{ll}{, jj}" format.name$ } 218 | if$ 219 | 't := 220 | nameptr #1 > 221 | { 222 | namesleft #1 > 223 | { ", " * t * } 224 | { 225 | numnames #1 > 226 | { "," * } 227 | 'skip$ 228 | if$ 229 | t "others" = 230 | { " \etal{}" * } 231 | { " and " * t * } 232 | if$ 233 | } 234 | if$ 235 | } 236 | 't 237 | if$ 238 | nameptr #1 + 'nameptr := 239 | namesleft #1 - 'namesleft := 240 | } 241 | while$ 242 | } 243 | 244 | FUNCTION {format.names.ed} 245 | { 's := 246 | #1 'nameptr := 247 | s num.names$ 'numnames := 248 | numnames 'namesleft := 249 | { namesleft #0 > } 250 | { s nameptr 251 | "{f.~}{vv~}{ll}{, jj}" 252 | format.name$ 't := 253 | nameptr #1 > 254 | { 255 | namesleft #1 > 256 | { ", " * t * } 257 | { 258 | numnames #2 > 259 | { "," * } 260 | 'skip$ 261 | if$ 262 | t "others" = 263 | { " \etal{}" * } 264 | { " and " * t * } 265 | if$ 266 | } 267 | if$ 268 | } 269 | 't 270 | if$ 271 | nameptr #1 + 'nameptr := 272 | namesleft #1 - 'namesleft := 273 | } 274 | while$ 275 | } 276 | 277 | FUNCTION {format.key} 278 | { empty$ 279 | { key field.or.null } 280 | { "" } 281 | if$ 282 | } 283 | 284 | FUNCTION {format.authors} 285 | { author empty$ 286 | { "" } 287 | { author format.names } 288 | if$ 289 | } 290 | 291 | FUNCTION {format.editors} 292 | { editor empty$ 293 | { "" } 294 | { editor format.names 295 | editor num.names$ #1 > 296 | { ", editors" * } 297 | { ", editor" * } 298 | if$ 299 | } 300 | if$ 301 | } 302 | 303 | FUNCTION {format.in.editors} 304 | { editor empty$ 305 | { "" } 306 | { editor format.names.ed 307 | } 308 | if$ 309 | } 310 | 311 | FUNCTION {quote.period.spacing} % OP: reduce space between quotes and 312 | { "" % period; remove negative kern by 313 | } % replacing this simply with "" 314 | 315 | FUNCTION {format.title} % OP: a period follows the 316 | { title empty$ % quotation marks in this style 317 | { "" } 318 | { title 319 | "``" swap$ * 320 | duplicate$ "''" * swap$ % OP: create the quoted title 321 | duplicate$ add.period$ = % see if title needs period 322 | { #1 'suppress.period := } % no: suppress next (2nd) output 323 | { quote.period.spacing * } % yes: add desired spacing 324 | if$ 325 | } 326 | if$ 327 | } 328 | 329 | FUNCTION {n.dashify} 330 | { 't := 331 | "" 332 | { t empty$ not } 333 | { t #1 #1 substring$ "-" = 334 | { t #1 #2 substring$ "--" = not 335 | { "--" * 336 | t #2 global.max$ substring$ 't := 337 | } 338 | { { t #1 #1 substring$ "-" = } 339 | { "-" * 340 | t #2 global.max$ substring$ 't := 341 | } 342 | while$ 343 | } 344 | if$ 345 | } 346 | { t #1 #1 substring$ * 347 | t #2 global.max$ substring$ 't := 348 | } 349 | if$ 350 | } 351 | while$ 352 | } 353 | 354 | FUNCTION {word.in} 355 | { "In " } 356 | 357 | FUNCTION {format.date} 358 | { year duplicate$ empty$ 359 | { "empty year in " cite$ * "; set to ????" * warning$ 360 | pop$ "????" } 361 | 'skip$ 362 | if$ 363 | "\UseExtraLabel{" * extra.label * "}" * 364 | } 365 | 366 | FUNCTION {format.btitle} 367 | { title emphasize 368 | } 369 | 370 | FUNCTION {tie.or.space.connect} 371 | { duplicate$ text.length$ #3 < 372 | { "~" } 373 | { " " } 374 | if$ 375 | swap$ * * 376 | } 377 | 378 | FUNCTION {either.or.check} 379 | { empty$ 380 | 'pop$ 381 | { "can't use both " swap$ * " fields in " * cite$ * warning$ } 382 | if$ 383 | } 384 | 385 | FUNCTION {format.bvolume} 386 | { volume empty$ 387 | { "" } 388 | { "volume" volume tie.or.space.connect 389 | series empty$ 390 | 'skip$ 391 | { " of " * series emphasize * } 392 | if$ 393 | "volume and number" number either.or.check 394 | } 395 | if$ 396 | } 397 | 398 | FUNCTION {format.number.series} 399 | { volume empty$ 400 | { number empty$ 401 | { series field.or.null } 402 | { output.state mid.sentence = 403 | { "number" } 404 | { "Number" } 405 | if$ 406 | number tie.or.space.connect 407 | series empty$ 408 | { "there's a number but no series in " cite$ * warning$ } 409 | { " in " * series * } 410 | if$ 411 | } 412 | if$ 413 | } 414 | { "" } 415 | if$ 416 | } 417 | 418 | FUNCTION {format.edition} 419 | { edition empty$ 420 | { "" } 421 | { output.state mid.sentence = 422 | { edition "l" change.case$ " edition" * } 423 | { edition "t" change.case$ " edition" * } 424 | if$ 425 | } 426 | if$ 427 | } 428 | 429 | INTEGERS { multiresult } 430 | 431 | FUNCTION {multi.page.check} 432 | { 't := 433 | #0 'multiresult := 434 | { multiresult not 435 | t empty$ not 436 | and 437 | } 438 | { t #1 #1 substring$ 439 | duplicate$ "-" = 440 | swap$ duplicate$ "," = 441 | swap$ "+" = 442 | or or 443 | { #1 'multiresult := } 444 | { t #2 global.max$ substring$ 't := } 445 | if$ 446 | } 447 | while$ 448 | multiresult 449 | } 450 | 451 | FUNCTION {format.pages} 452 | { pages empty$ 453 | { "" } 454 | { pages multi.page.check 455 | { "pages" pages n.dashify tie.or.space.connect } 456 | { "page" pages tie.or.space.connect } 457 | if$ 458 | } 459 | if$ 460 | } 461 | 462 | FUNCTION {format.vol.num.pages} 463 | { volume field.or.null 464 | bolden 465 | number empty$ 466 | 'skip$ 467 | { "(" number * ")" * * 468 | volume empty$ 469 | { "there's a number but no volume in " cite$ * warning$ } 470 | 'skip$ 471 | if$ 472 | } 473 | if$ 474 | pages empty$ 475 | 'skip$ 476 | { duplicate$ empty$ 477 | { pop$ format.pages } 478 | { ", " * pages n.dashify * } 479 | if$ 480 | } 481 | if$ 482 | } 483 | 484 | FUNCTION {format.chapter.pages} 485 | { chapter empty$ 486 | 'format.pages 487 | { type empty$ 488 | { "chapter" } 489 | { type "l" change.case$ } 490 | if$ 491 | chapter tie.or.space.connect 492 | pages empty$ 493 | 'skip$ 494 | { ", " * format.pages * } 495 | if$ 496 | } 497 | if$ 498 | } 499 | 500 | FUNCTION {format.in.ed.booktitle} 501 | { booktitle empty$ 502 | { "" } 503 | { editor empty$ 504 | { word.in booktitle emphasize * } 505 | { word.in booktitle emphasize * ", edited by " * 506 | format.in.editors * } 507 | if$ 508 | } 509 | if$ 510 | } 511 | 512 | FUNCTION {format.thesis.type} 513 | { type empty$ 514 | 'skip$ 515 | { pop$ 516 | type "t" change.case$ 517 | } 518 | if$ 519 | } 520 | 521 | FUNCTION {format.tr.number} 522 | { type empty$ 523 | { "Technical Report" } 524 | 'type 525 | if$ 526 | number empty$ 527 | { "t" change.case$ } 528 | { number tie.or.space.connect } 529 | if$ 530 | } 531 | 532 | FUNCTION {format.article.crossref} 533 | { 534 | word.in 535 | "\cite{" * crossref * "}" * 536 | } 537 | 538 | FUNCTION {format.book.crossref} 539 | { volume empty$ 540 | { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ 541 | word.in 542 | } 543 | { "Volume" volume tie.or.space.connect 544 | " of " * 545 | } 546 | if$ 547 | "\cite{" * crossref * "}" * 548 | } 549 | 550 | FUNCTION {format.incoll.inproc.crossref} 551 | { 552 | word.in 553 | "\cite{" * crossref * "}" * 554 | } 555 | 556 | FUNCTION {article} 557 | { output.bibitem 558 | format.authors "author" output.check 559 | author format.key output 560 | new.block 561 | format.title "title" output.check 562 | new.block 563 | crossref missing$ 564 | { journal emphasize "journal" output.check 565 | " " * before.all 'output.state := % OP: sic (preexisting kludge) 566 | format.vol.num.pages output 567 | format.date "year" output.check 568 | } 569 | { format.article.crossref output.nonnull 570 | format.pages output 571 | } 572 | if$ 573 | new.block 574 | note output 575 | fin.entry 576 | } 577 | 578 | FUNCTION {book} 579 | { output.bibitem 580 | author empty$ 581 | { format.editors "author and editor" output.check 582 | editor format.key output 583 | } 584 | { format.authors output.nonnull 585 | crossref missing$ 586 | { "author and editor" editor either.or.check } 587 | 'skip$ 588 | if$ 589 | } 590 | if$ 591 | new.block 592 | format.btitle "title" output.check 593 | crossref missing$ 594 | { format.bvolume output 595 | new.block 596 | format.number.series output 597 | new.sentence 598 | publisher "publisher" output.check 599 | address output 600 | } 601 | { 602 | new.block 603 | format.book.crossref output.nonnull 604 | } 605 | if$ 606 | format.edition output 607 | format.date "year" output.check 608 | new.block 609 | note output 610 | fin.entry 611 | } 612 | 613 | FUNCTION {booklet} 614 | { output.bibitem 615 | format.authors output 616 | author format.key output 617 | new.block 618 | format.title "title" output.check 619 | new.block 620 | howpublished output 621 | address output 622 | format.date "year" output.check 623 | new.block 624 | note output 625 | fin.entry 626 | } 627 | 628 | FUNCTION {inbook} 629 | { output.bibitem 630 | author empty$ 631 | { format.editors "author and editor" output.check 632 | editor format.key output 633 | } 634 | { format.authors output.nonnull 635 | crossref missing$ 636 | { "author and editor" editor either.or.check } 637 | 'skip$ 638 | if$ 639 | } 640 | if$ 641 | new.block 642 | format.btitle "title" output.check 643 | crossref missing$ 644 | { format.bvolume output 645 | format.chapter.pages "chapter and pages" output.check 646 | new.block 647 | format.number.series output 648 | new.sentence 649 | publisher "publisher" output.check 650 | address output 651 | } 652 | { format.chapter.pages "chapter and pages" output.check 653 | new.block 654 | format.book.crossref output.nonnull 655 | } 656 | if$ 657 | format.edition output 658 | format.date "year" output.check 659 | new.block 660 | note output 661 | fin.entry 662 | } 663 | 664 | FUNCTION {incollection} 665 | { output.bibitem 666 | format.authors "author" output.check 667 | author format.key output 668 | new.block 669 | format.title "title" output.check 670 | new.block 671 | crossref missing$ 672 | { format.in.ed.booktitle "booktitle" output.check 673 | format.bvolume output 674 | format.number.series output 675 | format.chapter.pages output 676 | new.sentence 677 | publisher "publisher" output.check 678 | address output 679 | format.edition output 680 | format.date "year" output.check 681 | } 682 | { format.incoll.inproc.crossref output.nonnull 683 | format.chapter.pages output 684 | } 685 | if$ 686 | new.block 687 | note output 688 | fin.entry 689 | } 690 | 691 | FUNCTION {inproceedings} 692 | { output.bibitem 693 | format.authors "author" output.check 694 | author format.key output 695 | new.block 696 | format.title "title" output.check 697 | new.block 698 | crossref missing$ 699 | { format.in.ed.booktitle "booktitle" output.check 700 | format.bvolume output 701 | format.number.series output 702 | format.pages output 703 | address output 704 | new.sentence 705 | organization output 706 | publisher output 707 | format.date "year" output.check 708 | } 709 | { format.incoll.inproc.crossref output.nonnull 710 | format.pages output 711 | } 712 | if$ 713 | new.block 714 | note output 715 | fin.entry 716 | } 717 | 718 | FUNCTION {conference} { inproceedings } 719 | 720 | FUNCTION {manual} 721 | { output.bibitem 722 | format.authors output 723 | author format.key output 724 | new.block 725 | format.btitle "title" output.check 726 | organization address new.block.checkb 727 | organization output 728 | address output 729 | format.edition output 730 | format.date "year" output.check 731 | new.block 732 | note output 733 | fin.entry 734 | } 735 | 736 | FUNCTION {mastersthesis} 737 | { output.bibitem 738 | format.authors "author" output.check 739 | author format.key output 740 | new.block 741 | format.btitle "title" output.check 742 | new.block 743 | "Master's thesis" format.thesis.type output.nonnull 744 | school "school" output.check 745 | address output 746 | format.date "year" output.check 747 | new.block 748 | note output 749 | fin.entry 750 | } 751 | 752 | FUNCTION {misc} 753 | { output.bibitem 754 | format.authors output 755 | author format.key output 756 | new.block 757 | format.title output 758 | new.block 759 | howpublished output 760 | format.date "year" output.check 761 | new.block 762 | note output 763 | fin.entry 764 | } 765 | 766 | FUNCTION {phdthesis} 767 | { output.bibitem 768 | format.authors "author" output.check 769 | author format.key output 770 | new.block 771 | format.btitle "title" output.check 772 | new.block 773 | "Ph.D.\ thesis" format.thesis.type output.nonnull 774 | school "school" output.check 775 | address output 776 | format.date "year" output.check 777 | new.block 778 | note output 779 | fin.entry 780 | } 781 | 782 | FUNCTION {proceedings} 783 | { output.bibitem 784 | format.editors output 785 | editor format.key output 786 | new.block 787 | format.btitle "title" output.check 788 | format.bvolume output 789 | format.number.series output 790 | address output 791 | new.sentence 792 | organization output 793 | publisher output 794 | format.date "year" output.check 795 | new.block 796 | note output 797 | fin.entry 798 | } 799 | 800 | FUNCTION {techreport} 801 | { output.bibitem 802 | format.authors "author" output.check 803 | author format.key output 804 | new.block 805 | format.title "title" output.check 806 | new.block 807 | format.tr.number output.nonnull 808 | institution "institution" output.check 809 | address output 810 | format.date "year" output.check 811 | new.block 812 | note output 813 | fin.entry 814 | } 815 | 816 | FUNCTION {unpublished} 817 | { output.bibitem 818 | format.authors "author" output.check 819 | author format.key output 820 | new.block 821 | format.title "title" output.check 822 | new.block 823 | note "note" output.check 824 | fin.entry 825 | } 826 | 827 | FUNCTION {default.type} { misc } 828 | 829 | MACRO {jan} {"January"} 830 | 831 | MACRO {feb} {"February"} 832 | 833 | MACRO {mar} {"March"} 834 | 835 | MACRO {apr} {"April"} 836 | 837 | MACRO {may} {"May"} 838 | 839 | MACRO {jun} {"June"} 840 | 841 | MACRO {jul} {"July"} 842 | 843 | MACRO {aug} {"August"} 844 | 845 | MACRO {sep} {"September"} 846 | 847 | MACRO {oct} {"October"} 848 | 849 | MACRO {nov} {"November"} 850 | 851 | MACRO {dec} {"December"} 852 | 853 | READ 854 | 855 | FUNCTION {sortify} 856 | { purify$ 857 | "l" change.case$ 858 | } 859 | 860 | INTEGERS { len } 861 | 862 | FUNCTION {chop.word} 863 | { 's := 864 | 'len := 865 | s #1 len substring$ = 866 | { s len #1 + global.max$ substring$ } 867 | 's 868 | if$ 869 | } 870 | 871 | FUNCTION {format.lab.names} 872 | { 's := 873 | s #1 "{vv~}{ll}" format.name$ 874 | s num.names$ duplicate$ 875 | #2 > 876 | { pop$ % 2nd number of names 877 | " \etal{}" * 878 | } 879 | { #2 < 880 | 'skip$ 881 | { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = 882 | { " \etal{}" * } 883 | { " and " * s #2 "{vv~}{ll}" format.name$ * } 884 | if$ 885 | } 886 | if$ 887 | } 888 | if$ 889 | } 890 | 891 | FUNCTION {format.long.lab.names} 892 | { 's := 893 | #1 'nameptr := 894 | s num.names$ 'numnames := 895 | numnames 'namesleft := 896 | { namesleft #0 > } 897 | { s nameptr 898 | "{vv~}{ll}" format.name$ 't := 899 | nameptr #1 > 900 | { 901 | namesleft #1 > 902 | { ", " * t * } 903 | { 904 | numnames #2 > 905 | { "," * } 906 | 'skip$ 907 | if$ 908 | t "others" = 909 | { " \etal{}" * } 910 | { " and " * t * } 911 | if$ 912 | } 913 | if$ 914 | } 915 | 't 916 | if$ 917 | nameptr #1 + 'nameptr := 918 | namesleft #1 - 'namesleft := 919 | } 920 | while$ 921 | } 922 | 923 | FUNCTION {author.key} 924 | { author empty$ 925 | { "{" 926 | key empty$ 927 | { cite$ #1 #3 substring$ } 928 | 'key 929 | if$ * 930 | "}" * 931 | } 932 | 'author 933 | if$ 934 | } 935 | 936 | FUNCTION {author.editor.key} 937 | { author empty$ 938 | { editor empty$ 939 | { "{" 940 | key empty$ 941 | { cite$ #1 #3 substring$ } 942 | 'key 943 | if$ * 944 | "}" * 945 | } 946 | 'editor 947 | if$ 948 | } 949 | 'author 950 | if$ 951 | } 952 | 953 | FUNCTION {editor.key} 954 | { editor empty$ 955 | { "{" 956 | key empty$ 957 | { cite$ #1 #3 substring$ } 958 | 'key 959 | if$ * 960 | "}" * 961 | } 962 | 'editor 963 | if$ 964 | } 965 | 966 | FUNCTION {maybe.other.name.field} 967 | { swap$ % [stack: other.field name] 968 | duplicate$ empty$ % is the name empty? 969 | { pop$ % discard it [stack: other.field] 970 | #1 swap$ 'skip$ if$ % try the other one 971 | } 972 | { swap$ pop$ % discard other.field 973 | } 974 | if$ 975 | } 976 | 977 | FUNCTION {calc.label.names} 978 | { type$ "proceedings" = 979 | 'editor 980 | 'author 981 | if$ 982 | 'editor maybe.other.name.field 983 | 'organization maybe.other.name.field 984 | 'key maybe.other.name.field 985 | { "Cannot create a label name in " cite$ * warning$ 986 | cite$ #1 #3 substring$ } maybe.other.name.field 987 | } 988 | 989 | FUNCTION {calc.short.label} 990 | { calc.label.names format.lab.names 991 | 'short.label := 992 | } 993 | 994 | FUNCTION {calc.label} 995 | { calc.short.label 996 | short.label ", " * 997 | year duplicate$ empty$ 998 | { pop$ "????" } 999 | { purify$ #-1 #4 substring$ } 1000 | if$ 1001 | * 1002 | 'label := 1003 | } 1004 | 1005 | FUNCTION {calc.long.label} 1006 | { calc.label.names format.long.lab.names 1007 | 'long.label := 1008 | } 1009 | 1010 | FUNCTION {sort.format.names} 1011 | { 's := 1012 | #1 'nameptr := 1013 | "" 1014 | s num.names$ 'numnames := 1015 | numnames 'namesleft := 1016 | { namesleft #0 > } 1017 | { nameptr #1 > 1018 | { " " * } 1019 | 'skip$ 1020 | if$ 1021 | s nameptr 1022 | "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" 1023 | format.name$ 't := 1024 | nameptr numnames = t "others" = and 1025 | { "et al" * } 1026 | { t sortify * } 1027 | if$ 1028 | nameptr #1 + 'nameptr := 1029 | namesleft #1 - 'namesleft := 1030 | } 1031 | while$ 1032 | } 1033 | 1034 | FUNCTION {sort.format.title} 1035 | { 't := 1036 | "A " #2 1037 | "An " #3 1038 | "The " #4 t chop.word 1039 | chop.word 1040 | chop.word 1041 | sortify 1042 | #1 global.max$ substring$ 1043 | } 1044 | 1045 | FUNCTION {author.sort} 1046 | { author empty$ 1047 | { key empty$ 1048 | { "to sort, need author or key in " cite$ * warning$ 1049 | "" 1050 | } 1051 | { key sortify } 1052 | if$ 1053 | } 1054 | { author sort.format.names } 1055 | if$ 1056 | } 1057 | 1058 | FUNCTION {author.editor.sort} 1059 | { author empty$ 1060 | { editor empty$ 1061 | { key empty$ 1062 | { "to sort, need author, editor, or key in " cite$ * warning$ 1063 | "" 1064 | } 1065 | { key sortify } 1066 | if$ 1067 | } 1068 | { editor sort.format.names } 1069 | if$ 1070 | } 1071 | { author sort.format.names } 1072 | if$ 1073 | } 1074 | 1075 | FUNCTION {editor.sort} 1076 | { editor empty$ 1077 | { key empty$ 1078 | { "to sort, need editor or key in " cite$ * warning$ 1079 | "" 1080 | } 1081 | { key sortify } 1082 | if$ 1083 | } 1084 | { editor sort.format.names } 1085 | if$ 1086 | } 1087 | 1088 | FUNCTION {presort} 1089 | { calc.label 1090 | label sortify 1091 | " " 1092 | * 1093 | type$ "book" = 1094 | type$ "inbook" = 1095 | or 1096 | 'author.editor.sort 1097 | { type$ "proceedings" = 1098 | 'editor.sort 1099 | 'author.sort 1100 | if$ 1101 | } 1102 | if$ 1103 | #1 entry.max$ substring$ 1104 | 'sort.label := 1105 | sort.label 1106 | * 1107 | " " 1108 | * 1109 | title field.or.null 1110 | sort.format.title 1111 | * 1112 | #1 entry.max$ substring$ 1113 | 'sort.key$ := 1114 | } 1115 | 1116 | ITERATE {presort} 1117 | 1118 | SORT 1119 | 1120 | STRINGS { last.label next.extra } 1121 | 1122 | INTEGERS { last.extra.num } 1123 | 1124 | FUNCTION {initialize.extra.label.stuff} 1125 | { #0 int.to.chr$ 'last.label := 1126 | "" 'next.extra := 1127 | #0 'last.extra.num := 1128 | } 1129 | 1130 | FUNCTION {forward.pass} 1131 | { last.label label = 1132 | { last.extra.num #1 + 'last.extra.num := 1133 | last.extra.num int.to.chr$ 'extra.label := 1134 | } 1135 | { "a" chr.to.int$ 'last.extra.num := 1136 | "" 'extra.label := 1137 | label 'last.label := 1138 | } 1139 | if$ 1140 | } 1141 | 1142 | FUNCTION {reverse.pass} 1143 | { next.extra "b" = 1144 | { "a" 'extra.label := } 1145 | 'skip$ 1146 | if$ 1147 | extra.label 'next.extra := 1148 | } 1149 | 1150 | EXECUTE {initialize.extra.label.stuff} 1151 | 1152 | ITERATE {forward.pass} 1153 | 1154 | REVERSE {reverse.pass} 1155 | 1156 | FUNCTION {bib.sort.order} 1157 | { sort.label 1158 | " " 1159 | * 1160 | year field.or.null sortify 1161 | * 1162 | " " 1163 | * 1164 | title field.or.null 1165 | sort.format.title 1166 | * 1167 | #1 entry.max$ substring$ 1168 | 'sort.key$ := 1169 | calc.long.label 1170 | } 1171 | 1172 | ITERATE {bib.sort.order} 1173 | 1174 | SORT 1175 | 1176 | FUNCTION {begin.bib} 1177 | { preamble$ empty$ 1178 | 'skip$ 1179 | { preamble$ write$ newline$ } 1180 | if$ 1181 | "\begin{thebibliography}{}" write$ newline$ 1182 | } 1183 | 1184 | EXECUTE {begin.bib} 1185 | 1186 | EXECUTE {init.state.consts} 1187 | 1188 | ITERATE {call.type$} 1189 | 1190 | FUNCTION {end.bib} 1191 | { newline$ 1192 | "\end{thebibliography}" write$ newline$ 1193 | } 1194 | 1195 | EXECUTE {end.bib} 1196 | %% 1197 | %% 1198 | %% End of file `ltugbib.bst'. 1199 | -------------------------------------------------------------------------------- /pts_pdfsizeopt2009/ltugproc.cls: -------------------------------------------------------------------------------- 1 | %% 2 | %% This is file `ltugproc.cls', 3 | %% generated with the docstrip utility. 4 | %% 5 | %% The original source files were: 6 | %% 7 | %% tugboat.dtx (with options: `ltugproccls') 8 | %% 9 | %% This is a generated file. 10 | %% 11 | %% Copyright 1994,1995,1996,2001,2005,2006,2009 TeX Users Group. 12 | %% 13 | %% It may be distributed and/or modified under the 14 | %% conditions of the LaTeX Project Public License, either version 1.3 15 | %% of this license or (at your option) any later version. 16 | %% The latest version of this license is in 17 | %% http://www.latex-project.org/lppl.txt 18 | %% and version 1.3 or later is part of all distributions of LaTeX 19 | %% version 2003/12/01 or later. 20 | %% 21 | %% This file has the LPPL maintenance status "maintained". 22 | %% 23 | %% The Current Maintainer of this work is the TeX Users Group 24 | %% (http://tug.org/TUGboat). 25 | %% 26 | %% The list of all files belonging to the distribution is given in the file 27 | %% `manifest.txt'. 28 | %% 29 | %% The list of derived (unpacked) files belonging to the distribution 30 | %% and covered by LPPL is defined by the unpacking scripts (with 31 | %% extension .ins) which are part of the distribution. 32 | %% 33 | \NeedsTeXFormat{LaTeX2e}[1994/12/01] 34 | \ProvidesClass {ltugproc} 35 | [2009/06/21 v2.6 36 | TUG conference proceedings class% 37 | ] 38 | \def\@tugclass{ltugproc} 39 | \newif\if@proctw@column \@proctw@columntrue 40 | \DeclareOption{onecolumn}{\@proctw@columnfalse} 41 | \newif\if@proc@sober 42 | \newif\if@proc@numerable 43 | \DeclareOption{tug95}{% 44 | \@proc@soberfalse 45 | \@proc@numerablefalse 46 | } 47 | \DeclareOption{tug96}{% 48 | \@proc@sobertrue 49 | \@proc@numerablefalse 50 | } 51 | \DeclareOption{tug97}{% 52 | \@proc@sobertrue 53 | \@proc@numerabletrue 54 | } 55 | \DeclareOption{tug2002}{% 56 | \@proc@sobertrue 57 | \@proc@numerabletrue 58 | \let\if@proc@numbersec\iftrue 59 | \PassOptionsToClass{numbersec}{ltugboat}% 60 | } 61 | \DeclareOption{numbersec}{\let\if@proc@numbersec\iftrue 62 | \PassOptionsToClass{numbersec}{ltugboat}% 63 | } 64 | \DeclareOption{nonumber}{\let\if@proc@numbersec\iffalse 65 | \PassOptionsToClass{nonumber}{ltugboat}% 66 | } 67 | \newif\ifTB@title 68 | \DeclareOption{title}{\TB@titletrue} 69 | \DeclareOption{notitle}{\TB@titlefalse 70 | \AtBeginDocument{\stepcounter{page}}} 71 | \DeclareOption{tugproc}{% 72 | \ClassWarning{\@tugclass}{Option \CurrentOption\space ignored}% 73 | } 74 | \DeclareOption*{\PassOptionsToClass{\CurrentOption}{ltugboat}} 75 | \InputIfFileExists{\@tugclass.cfg}{\ClassInfo{ltugproc}% 76 | {Loading ltugproc configuration information}}{} 77 | \@ifundefined{TUGprocExtraOptions}% 78 | {\let\TUGprocExtraOptions\@empty}% 79 | {\edef\TUGprocExtraOptions{,\TUGprocExtraOptions}} 80 | \@tempcnta\year 81 | \ifnum\@tempcnta<2000 82 | \divide\@tempcnta by100 83 | \multiply\@tempcnta by100 84 | \advance\@tempcnta-\year 85 | \@tempcnta-\@tempcnta 86 | \fi 87 | \edef\@tempa{\noexpand\providecommand\noexpand\tugProcYear 88 | {\ifnum10>\@tempcnta0\fi\the\@tempcnta}} 89 | \@tempa 90 | \ClassInfo{ltugproc}{Class believes year is 91 | \expandafter\ifnum\tugProcYear<2000 19\fi\tugProcYear 92 | \@gobble} 93 | \expandafter\ifx\csname ds@tug\tugProcYear\endcsname\relax 94 | \def\tugProcYear{2002}\fi 95 | \ExecuteOptions{tug\tugProcYear,title\TUGprocExtraOptions} 96 | \ProcessOptions 97 | \if@proc@numbersec 98 | \if@proc@numerable 99 | \else 100 | \ClassWarning{\@tugclass}{This year's proceedings may not have 101 | numbered sections}% 102 | \fi 103 | \fi 104 | \LoadClass[\if@proc@numbersec numbersec\else nonumber\fi]{ltugboat} 105 | \def\maketitle{% 106 | \begingroup 107 | \ifshortAuthor\else 108 | \global\let\rhAuthor\@empty 109 | \def\g@addto@rhAuthor##1{% 110 | \begingroup 111 | \toks@\expandafter{\rhAuthor}% 112 | \let\thanks\@gobble 113 | \protected@xdef\rhAuthor{\the\toks@##1}% 114 | \endgroup 115 | }% 116 | \@getauthorlist\g@addto@rhAuthor 117 | \fi 118 | \ifTB@title 119 | \setcounter{footnote}{0}% 120 | \renewcommand\thefootnote{\@fnsymbol\c@footnote}% 121 | \if@proctw@column 122 | \twocolumn[\@maketitle]% 123 | \else 124 | \onecolumn 125 | \global\@topnum\z@ 126 | \@maketitle 127 | \fi 128 | \@thanks 129 | \thispagestyle{TBproctitle} 130 | \fi 131 | \endgroup 132 | \TB@madetitletrue 133 | } 134 | \newif\ifTB@madetitle \TB@madetitlefalse 135 | \def\@TB@test@document{% 136 | \edef\@tempa{\the\everypar} 137 | \def \@tempb{\@nodocument} 138 | \ifx \@tempa\@tempb 139 | \@nodocument 140 | \fi 141 | } 142 | \def\AUTHORfont {\large\rmfamily\mdseries\upshape} 143 | \def\TITLEfont {\Large\rmfamily\mdseries\upshape} 144 | \def\addressfont{\small\rmfamily\mdseries\upshape} 145 | \def\netaddrfont{\small\ttfamily\mdseries\upshape} 146 | \newskip\aboveauthorskip \aboveauthorskip=18\p@ \@plus4\p@ 147 | \newskip\belowauthorskip \belowauthorskip=\aboveauthorskip 148 | \newskip\belowabstractskip \belowabstractskip=14\p@ \@plus3\p@ \@minus2\p@ 149 | \def\@maketitle{% 150 | {\parskip\z@ 151 | \frenchspacing 152 | \TITLEfont\raggedright\noindent\@title\par 153 | \count@=0 154 | \loop 155 | \ifnum\count@<\authornumber 156 | \vskip\aboveauthorskip 157 | \advance\count@\@ne 158 | {\AUTHORfont\theauthor{\number\count@}\endgraf}% 159 | \addressfont\theaddress{\number\count@}\endgraf 160 | {% 161 | \allowhyphens 162 | \hangindent1.5pc 163 | \netaddrfont\thenetaddress{\number\count@}\endgraf 164 | \hangindent1.5pc 165 | \thePersonalURL{\number\count@}\endgraf 166 | }% 167 | \repeat 168 | \vskip\belowauthorskip}% 169 | \if@abstract 170 | \centerline{\bfseries Abstract}% 171 | \vskip.5\baselineskip\rmfamily 172 | \list{}{\listparindent20\p@ 173 | \itemindent\z@ \leftmargin\tubfullpageindent 174 | \rightmargin\leftmargin \parsep \z@}\item[]\ignorespaces 175 | \the\abstract@toks 176 | \endlist\global\@ignoretrue 177 | \fi 178 | \vskip\belowabstractskip 179 | \global\@afterindentfalse\aftergroup\@afterheading 180 | } 181 | \newtoks\abstract@toks \abstract@toks{} 182 | \let\if@abstract\iffalse 183 | \def\abstract{% 184 | \ifTB@madetitle 185 | \TBWarning{abstract environment after \string\maketitle} 186 | \fi 187 | \def\@abstract@{abstract}% 188 | \ifx\@currenvir\@abstract@ 189 | \else 190 | \TBError{\string\abstract\space is illegal:% 191 | \MessageBreak 192 | use \string\begin{\@abstract@} instead}% 193 | {\@abstract@\space may only be used as an environment} 194 | \fi 195 | \global\let\if@abstract\iftrue 196 | {\ifnum0=`}\fi 197 | \@abstract@getbody} 198 | \let\endabstract\relax 199 | \long\def\@abstract@getbody#1\end{% 200 | \global\abstract@toks\expandafter{\the\abstract@toks#1}% 201 | \@abstract@findend} 202 | \def\@abstract@findend#1{% 203 | \def\@tempa{#1}% 204 | \ifx\@tempa\@abstract@ 205 | \expandafter\@abstract@end 206 | \else 207 | \def\@tempb{document}% 208 | \ifx\@tempa\@tempb 209 | \TBError{\string\begin{\@abstract@} 210 | ended by \string\end{\@tempb}}% 211 | {You've forgotten \string\end{\@abstract@}} 212 | \else 213 | \global\abstract@toks\expandafter{\the\abstract@toks\end{#1}}% 214 | \expandafter\expandafter\expandafter\@abstract@getbody 215 | \fi 216 | \fi} 217 | \def\@abstract@end{\ifnum0=`{\fi}% 218 | \expandafter\end\expandafter{\@abstract@}} 219 | \renewcommand{\makesignature}{\TBWarning 220 | {\string\makesignature\space is invalid in proceedings issues}} 221 | \def\ps@TBproctitle{\let\@oddhead\MakeRegistrationMarks 222 | \let\@evenhead\MakeRegistrationMarks 223 | \TB@definefeet 224 | } 225 | \def\ps@TBproc{% 226 | \def\@oddhead{\MakeRegistrationMarks 227 | {% 228 | \hfil 229 | \def\\{\unskip\ \ignorespaces}% 230 | \rmfamily\rhTitle 231 | }% 232 | }% 233 | \def\@evenhead{\MakeRegistrationMarks 234 | {% 235 | \def\\{\unskip\ \ignorespaces}% 236 | \rmfamily\rhAuthor 237 | \hfil 238 | }% 239 | }% 240 | \TB@definefeet 241 | } 242 | 243 | \advance\footskip8\p@ % for deeper running feet 244 | 245 | \def\dopagecommands{\csname @@pagecommands\number\c@page\endcsname} 246 | \def\setpagecommands#1#2{\expandafter\def\csname @@pagecommands#1\endcsname 247 | {#2}} 248 | \def\TB@definefeet{% 249 | \def\@oddfoot{\ifpreprint\pfoottext\hfil\Now\hfil\thepage 250 | \else\rfoottext\hfil\thepage\fi\dopagecommands}% 251 | \def\@evenfoot{\ifpreprint\thepage\hfil\Now\hfil\pfoottext 252 | \else\thepage\hfil\rfoottext\fi\dopagecommands}% 253 | } 254 | 255 | \def\pfoottext{{\smc Preprint}: Proceedings of the \volyr{} Annual Meeting} 256 | \def\rfoottext{\normalfont\TUB, \volx\Dash 257 | {Proceedings of the \volyr{} Annual Meeting}} 258 | 259 | \pagestyle{TBproc} 260 | \if@proc@numbersec 261 | \else 262 | \setcounter{secnumdepth}{0} 263 | \fi 264 | \if@proc@numbersec 265 | \else 266 | \if@proc@sober 267 | \def\section 268 | {\TB@nolimelabel 269 | \TB@startsection{{section}% 270 | 1% 271 | \z@% 272 | {-8\p@\@plus-2\p@\@minus-2\p@}% 273 | {6\p@}% 274 | {\normalsize\bfseries\raggedright}}} 275 | \else 276 | \def\section 277 | {\TB@nolimelabel 278 | \TB@startsection{{section}% 279 | 1% 280 | \z@% 281 | {-8\p@\@plus-2\p@\@minus-2\p@}% 282 | {6\p@}% 283 | {\large\bfseries\raggedright}}} 284 | \fi 285 | \def\subsection 286 | {\TB@nolimelabel 287 | \TB@startsection{{subsection}% 288 | 2% 289 | \z@% 290 | {6\p@\@plus 2\p@\@minus2\p@}% 291 | {-5\p@\@plus -\fontdimen3\the\font}% 292 | {\normalsize\bfseries}}} 293 | \def\subsubsection 294 | {\TB@nolimelabel 295 | \TB@startsection{{subsubsection}% 296 | 3% 297 | \parindent% 298 | \z@% 299 | {-5\p@\@plus -\fontdimen3\the\font}% 300 | {\normalsize\bfseries}}} 301 | \fi 302 | \endinput 303 | %% 304 | %% End of file `ltugproc.cls'. 305 | -------------------------------------------------------------------------------- /pts_pdfsizeopt2009/pts_pdfsizeopt2009.bib: -------------------------------------------------------------------------------- 1 | % 2 | % by pts at fazekas.hu at Wed Jul 29 12:23:18 CEST 2009 3 | % 4 | 5 | @inproceedings{pdf-diet, 6 | title={Two Diet Plans for Fat {PDF}}, 7 | author={Thomas A. Phelps and Robert Wilensky}, 8 | booktitle={Proceedings of ACM Symposium on Document Engineering}, 9 | ignored-official-url={http://portal.acm.org/citation.cfm?doid=958220.958253}, 10 | year=2003, 11 | month=nov, 12 | note={URL \url{http://multivalent.sourceforge.net/Research/TwoDietPlans.pdf}}, 13 | } 14 | 15 | @misc{pdf-iso, 16 | title={{ISO} 32000-1:2008 {Document} management -- 17 | {Portable} document format -- Part 1: {PDF} 1.7}, 18 | note={URL \url{http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=51502}}, 19 | year=2008, 20 | key={ISO 32000-1:2008}, 21 | done=1, 22 | } 23 | 24 | @misc{multivalent-compact, 25 | title={Compact {PDF} Specification}, 26 | author={Thomas A. Phelps}, 27 | year=2004, 28 | month=mar, 29 | note={URL \url{http://multivalent.sourceforge.net/Research/CompactPDF.html}}, 30 | } 31 | 32 | @inproceedings{multivalent-archive, 33 | title={A No-Compromises Architecture for Digital Document Preservation}, 34 | author={Thomas A. Phelps and P.B. Watry}, 35 | booktitle={Proceedings of European Conference on Digital Libraries}, 36 | year=2005, 37 | month=sep, 38 | note={URL \url{http://multivalent.sourceforge.net/Research/Live.pdf}}, 39 | } 40 | 41 | @misc{pdfcreator, 42 | title={{PDFCreator}, a free tool to create {PDF} files from nearly any 43 | {Windows} application}, 44 | note={URL \url{http://www.pdfforge.org/products/pdfcreator}}, 45 | key={pdfcreator}, 46 | } 47 | 48 | @misc{ps2pdf, 49 | title={ps2pdf, a {PostScript-to-PDF} converter}, 50 | note={URL \url{http://pages.cs.wisc.edu/~ghost/doc/svn/Ps2pdf.htm}}, 51 | key={ps2pdf}, 52 | } 53 | 54 | @book{pdfref, 55 | author={Adobe}, 56 | title={{PDF} Reference, {Adobe} {Portable} {Document} {Format} Version 1.7}, 57 | publisher={Adobe}, 58 | year=2006, 59 | month=nov, 60 | edition=6, 61 | note={URL \url{http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf}}, 62 | key={adobe pdfref}, 63 | } 64 | 65 | @misc{dvips, 66 | author={Tomas Rokicki}, 67 | title={Dvips: A {DVI-to-PostScript} Translator}, 68 | year=2007, 69 | month=jan, 70 | edition={5.96}, 71 | note={URL \url{http://www.ctan.org/tex-archive/info/doc-k/dvips.pdf}}, 72 | } 73 | 74 | @manual{pdftex, 75 | author={H{\`a}n Th{\^e\llap{\raise 0.5ex\hbox{\'{}}}} Th{\`a}nh and Sebastian Rahtz and Hans Hagen and others}, 76 | title={The {pdf\TeX} manual}, 77 | year=2007, 78 | month="25~" # jan, 79 | edition={1.671}, 80 | note={URL \url{http://www.ctan.org/get/systems/pdftex/pdftex-a.pdf}}, 81 | key={Han The Thanh}, 82 | } 83 | 84 | @manual{tikz, 85 | author={Till Tantau ed.}, 86 | title={The {TikZ} and {PGF} Packages}, 87 | year=2008, 88 | month="20~" # feb, 89 | edition={2.00}, 90 | organization={Institute f{\"ur} Theoretische Informatik, Universit{\"a}t zu L{\"u}beck}, 91 | note={URL \url{http://www.ctan.org/tex-archive/graphics/pgf/base/doc/generic/pgf/pgfmanual.pdf}}, 92 | } 93 | 94 | @misc{textext, 95 | author={Pauli Virtanen}, 96 | title={{TexText}, an {Inkscape} extension for adding {\LaTeX} markup}, 97 | year=2009, 98 | month="6~" # feb, 99 | note={URL \url{http://www.elisanet.fi/ptvirtan/software/textext/}}, 100 | } 101 | 102 | @inproceedings{sam2p-article, 103 | author={P{\'e}ter Szab{\'o}}, 104 | title={Inserting figures into {\TeX} documents}, 105 | booktitle={{EuroBacho\TeX}}, 106 | year=2002, 107 | note={URL \url{http://www.inf.bme.hu/~pts/sam2p/sam2p_article.pdf}}, 108 | } 109 | 110 | @misc{sam2p, 111 | author={P{\'e}ter Szab{\'o}}, 112 | title={sam2p, a pixel image converter 113 | which can generate small PostScript and {PDF}}, 114 | note={URL \url{http://www.inf.bme.hu/~pts/sam2p/}}, 115 | } 116 | 117 | @misc{gimp, 118 | title={GIMP, the {GNU} {Image} {Manipulation} {Program}}, 119 | note={URL \url{http://www.gimp.org/}}, 120 | key={gimp}, 121 | } 122 | 123 | @manual{cff, 124 | author={Adobe}, 125 | title={The {Compact} {Font} {Format} Specification}, 126 | year=2003, 127 | month="4~" # dec, 128 | edition={1.0}, 129 | note={URL \url{http://www.adobe.com/devnet/font/pdfs/5176.CFF.pdf}}, 130 | } 131 | 132 | @misc{kzip, 133 | author={Ken Silverman}, 134 | title={{KZIP,} a {PKZIP-compatible} compressor focusing on space over speed}, 135 | note={URL \url{http://advsys.net/ken/utils.htm#kzip}}, 136 | key={kzip}, 137 | } 138 | 139 | @misc{pngout, 140 | author={Ken Silverman}, 141 | title={{PNGOUT,} a lossless {PNG} size optimizer}, 142 | year=2009, 143 | note={URL \url{http://advsys.net/ken/utils.htm#pngout}}, 144 | } 145 | 146 | @misc{pngout-linux, 147 | author={Jonathon Fowler}, 148 | title={{PNGOUT} port for Unix systems}, 149 | year=2007, 150 | note={URL \url{http://www.jonof.id.au/kenutils}}, 151 | } 152 | 153 | @manual{beamer, 154 | author={Till Tantau}, 155 | title={The beamer class}, 156 | edition={3.07}, 157 | year=2007, 158 | month="11~" # mar, 159 | note={URL \url{http://www.ctan.org/tex-archive/macros/latex/contrib/beamer/doc/beameruserguide.pdf}}, 160 | } 161 | 162 | @misc{codemantra, 163 | title={{codeMantra} {Universal} {PDF}, a {PDF} generator}, 164 | note={URL \url{http://www.codemantra.com/universalpdf.htm}}, 165 | key={codemantra universal pdf}, 166 | } 167 | 168 | @misc{pdf-concatenate, 169 | author={Matthew Skala}, 170 | title={How to concatenate {PDFs} without pain}, 171 | year=2008, 172 | month="13~" # may, 173 | note={URL \url{http://ansuz.sooke.bc.ca/software/pdf-append.php}}, 174 | } 175 | 176 | @misc{djvu-tutorial, 177 | title={DjVu: A Tutorial}, 178 | note={URL \url{http://www.djvuzone.org/support/tutorial/chapter-intro.html}}, 179 | key={djvu a tutorial}, 180 | } 181 | 182 | @manual{djvu3spec, 183 | author={Lizardtech}, 184 | title={DjVu Reference}, 185 | year=2005, 186 | month=nov, 187 | edition={DjVu v3}, 188 | note={URL \url{http://djvu.org/docs/DjVu3Spec.djvu}}, 189 | } 190 | 191 | @misc{multivalent, 192 | title={Multivalent, digital documents research and development}, 193 | note={URL \url{http://multivalent.sourceforge.net/}}, 194 | key={multivalent}, 195 | } 196 | 197 | @misc{multivalent-download, 198 | title={Multivalent, download location}, 199 | note={URL \url{http://sourceforge.net/projects/multivalent/files/}}, 200 | key={multivalent download}, 201 | } 202 | 203 | @misc{multivalent-compress-tool, 204 | author={Thomas A. Phelps}, 205 | title={Compress, the {Multivalent} PDF compression tool}, 206 | note={URL \url{http://multivalent.sourceforge.net/Tools/pdf/Compress.html}}, 207 | } 208 | 209 | @misc{dvipdfmx, 210 | title={{DVIPDFMx,} an extended {DVI-to-PDF} translator}, 211 | note={URL \url{http://project.ktug.or.kr/dvipdfmx/}}, 212 | key={dvipdfmx}, 213 | } 214 | 215 | @misc{pdfsizeopt-extra, 216 | author={P{\'e}ter Szab{\'o}}, 217 | title={Extra files related to {PDF} generation and {PDF} size optimization}, 218 | note={URL \url{http://code.google.com/p/pdfsizeopt/source/browse/#svn/trunk/extra}}, 219 | } 220 | 221 | @misc{pstricks-pdfoutput, 222 | title={Workarounds for {PDF} output with the pstricks {\LaTeX} package}, 223 | note={URL \url{http://tug.org/PSTricks/main.cgi?file=pdf/pdfoutput}}, 224 | key={PDF output with pstricks}, 225 | } 226 | 227 | @misc{pdfenhancer, 228 | title={{PDF} {Enhancer,} a PDF converter, concatenator and optimizer}, 229 | note={URL \url{http://www.apagoinc.com/prod_home.php?prod_id=2}}, 230 | key={PDF Enchancer}, 231 | } 232 | 233 | @misc{example-pdfs, 234 | author={P{\'e}ter Szab{\'o}}, 235 | title={{PDF} files pdfsizeopt.py was tested with}, 236 | note={URL \url{http://code.google.com/p/pdfsizeopt/wiki/ExamplePDFsToOptimize}}, 237 | } 238 | 239 | @misc{pdfsizeopt, 240 | author={P{\'e}ter Szab{\'o}}, 241 | title={pdfsizeopt.py, a PDF file size optimizer (project page)}, 242 | note={URL \url{http://code.google.com/p/pdfsizeopt}}, 243 | } 244 | 245 | @misc{jbig2enc, 246 | author={Adam Langley}, 247 | title={jbig2enc, a {JBIG2} encoder (project page)}, 248 | note={URL \url{http://github.com/agl/jbig2enc/tree/master}}, 249 | } 250 | 251 | @misc{pdfsizeopt-install, 252 | author={P{\'e}ter Szab{\'o}}, 253 | title={Installation instructions for pdfsizeopt.py}, 254 | note={URL \url{http://code.google.com/p/pdfsizeopt/wiki/InstallationInstructions}}, 255 | } 256 | 257 | @misc{png-recompressors, 258 | title={List of {PNG} recompressors}, 259 | note={URL \url{http://en.wikipedia.org/wiki/OptiPNG#See_also}}, 260 | key={List of PNG recompressors}, 261 | } 262 | 263 | @unpublished{creative-pdf, 264 | author={Ralf Koening}, 265 | title={Creative use of {PDF} files in {\LaTeX} environments}, 266 | year=2004, 267 | month="18~" # jun, 268 | note={URL \url{http://www.tu-chemnitz.de/urz/anwendungen/tex/stammtisch/chronik/pdf_interna.pdf}}, 269 | } 270 | 271 | @unpublished{optipng-article, 272 | author={Cosmin Tru{\c t}a}, 273 | title={A guide to {PNG} optimization}, 274 | year=2008, 275 | mon="10~" # may, 276 | note={URL \url{http://optipng.sourceforge.net/pngtech/optipng.html}}, 277 | } 278 | 279 | @misc{optipng, 280 | author={Cosmin Tru{\c t}a}, 281 | title={{OptiPNG,} Advanced {PNG} Optimizer (project page)}, 282 | year=2009, 283 | month="9~" # jun, 284 | note={URL \url{http://optipng.sourceforge.net/}}, 285 | } 286 | 287 | @unpublished{pdftweak, 288 | author={Andy King}, 289 | title={Optimize {PDF} Files}, 290 | year=2006, 291 | month="25~" # sep, 292 | note={URL \url{http://websiteoptimization.com/speed/tweak/pdf/}}, 293 | } 294 | 295 | @misc{packjpg, 296 | author={Matthias Stirner and Gerhard Seelmann}, 297 | title={{packJPG,} a lossless compressor for {JPEG} images (project page)}, 298 | year=2007, 299 | month="21~" # nov, 300 | note={URL \url{http://www.elektronik.htw-aalen.de/packjpg/}}, 301 | } 302 | 303 | @misc{compressing-your, 304 | author={VeryPDF.com}, 305 | title={Compressing your {PDF} files}, 306 | year=2006, 307 | month="13~" # jul, 308 | note={URL \url{http://www.verypdf.com/pdfinfoeditor/compression.htm}}, 309 | } 310 | 311 | @manual{pdfcompress, 312 | author={VeryPDF.com}, 313 | title={{PDF} {Compress} Command Line User Manual}, 314 | year=2006, 315 | month="13~" # jul, 316 | note={URL \url{http://www.verypdf.com/pdfinfoeditor/pdfcompress.htm}}, 317 | } 318 | 319 | @misc{pdfenhancer-features, 320 | author={Apago}, 321 | title={Which Features Are In What {PDF} {Enhancer} Edition?}, 322 | year=2009, 323 | month="29~" # jul, 324 | note={URL \url{http://www.apagoinc.com/prod_feat.php?feat_id=30&feat_disp_order=7&prod_id=2}}, 325 | } 326 | 327 | @misc{pdfdb, 328 | editor={Michael Still}, 329 | title={{PDF} Database}, 330 | year=2005, 331 | month="20~" # apr, 332 | note={URL \url{http://www.stillhq.com/pdfdb/db.html}}, 333 | key={PDF database}, 334 | } 335 | 336 | @misc{pdf-bestpractices, 337 | author={Shlomo Perets}, 338 | title={Best Practices {\#1}: Reducing the size of your {PDFs}}, 339 | year=2001, 340 | month="7~" # aug, 341 | note={URL \url{http://www.planetpdf.com/creative/article.asp?ContentID=6568}}, 342 | } 343 | 344 | @misc{acrobatpro, 345 | author={Adobe}, 346 | title={{Adobe} {Acrobat} {Pro} 9 (project page)}, 347 | note={URL \url{http://www.adobe.com/products/acrobatpro/}}, 348 | } 349 | -------------------------------------------------------------------------------- /pts_pdfsizeopt2009_talk/compile.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | set -ex 3 | pdflatex pts_pdfsizeopt2009_talk.tex 4 | pdflatex pts_pdfsizeopt2009_talk.tex 5 | ../pdfsizeopt.py pts_pdfsizeopt2009_talk.pdf 6 | cp pts_pdfsizeopt2009_talk.pdf ~/Dropbox.pts/Dropbox/Public/ 7 | -------------------------------------------------------------------------------- /pts_pdfsizeopt2009_talk/pdfsizeopt_charts.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/pts_pdfsizeopt2009_talk/pdfsizeopt_charts.key -------------------------------------------------------------------------------- /pts_pdfsizeopt2009_talk/pdfsizeopt_charts.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/pts_pdfsizeopt2009_talk/pdfsizeopt_charts.pdf -------------------------------------------------------------------------------- /pts_pdfsizeopt2009_talk/pdfsizeopt_charts_ps2pdf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pts/pdfsizeopt/2bab16031dad854e42c2910859564d9a962bc16c/pts_pdfsizeopt2009_talk/pdfsizeopt_charts_ps2pdf.pdf -------------------------------------------------------------------------------- /pts_pdfsizeopt2009_talk/pts_pdfsizeopt2009_talk.tex: -------------------------------------------------------------------------------- 1 | % by pts@fazekas.hu at Fri Aug 28 09:29:30 CEST 2009 2 | % 3 | % Imp: more visual effects (cliparts etc.) 4 | % Imp: no colors in the document size chart 5 | % Imp: add file sizes to the bottom of graph 2 6 | % 7 | % SUXX: pdflatex cannot embed (missing glyph) a Keynote chart with an ff 8 | % ligature in the caption. Solution: ZERO-WIDH-JOINER unicode char. 9 | \documentclass{beamer} 10 | \usepackage{beamerthemesplit} 11 | \usepackage{lmodern} 12 | \usepackage{t1enc} 13 | \usepackage{graphicx} 14 | \usepackage{ulem} 15 | \usepackage{mflogo} 16 | 17 | 18 | \definecolor{GoogleBlue}{RGB}{0,102,204} 19 | \definecolor{GoogleRed}{RGB}{255,0,0} 20 | \definecolor{GoogleYellow}{RGB}{255,204,0} 21 | \definecolor{GoogleGreen}{RGB}{0,153,0} 22 | \RequirePackage{xspace} 23 | \newcommand\Google{% The Googley \Google :) 24 | \texorpdfstring{% Color changes do not like to be in bookmarks. 25 | {\color{GoogleBlue}G}% G 26 | {\color{GoogleRed}o}% o 27 | {\color{GoogleYellow}o}% o :) 28 | {\color{GoogleBlue}g}% g 29 | {\color{GoogleGreen}l}% l 30 | {\color{GoogleRed}e}% e 31 | }{Google}% Colorless text used instead when colors won't work. 32 | \xspace} 33 | 34 | \definecolor{c-purple}{rgb}{0.4353,0.2392,0.4745} 35 | \definecolor{c-red}{rgb}{0.7373,0.1765,0.1882} 36 | \definecolor{c-yellow}{rgb}{0.9059,0.6314,0.2392} 37 | \definecolor{c-green}{rgb}{0.3647,0.5882,0.2824} 38 | \definecolor{c-blue}{rgb}{0.1804,0.3412,0.5490} 39 | 40 | \title{Optimizing PDF output size of \TeX{} documents} 41 | \subtitle{\ldots{} and PDF files created by other means as well} 42 | \author[P\'eter Szab\'o]{P\'eter Szab\'o\texorpdfstring{\\}{ -- }\textbf{\Google}} 43 | \date{2009-09-01\par\medskip EuroTeX\,2009\\ The Hague, The Netherlands} 44 | 45 | %** Create a colorful full square with the specified color. 46 | \def\colorsquare#1{{\color{#1}\vrule width.8em height.8em depth0pt}} 47 | 48 | %** Put content to a frame with raggedbottom vertical alignment. 49 | %** This has a hard-coded height for the useful content, depends on the 50 | %** number of sections as well. 51 | %** @example 52 | %** \frame{ 53 | %** \nocenterframetitle{...} 54 | %** \nocenter{ 55 | %** ... 56 | %** } 57 | %** } 58 | \def\nocenter#1{% 59 | \hrule height0pt 60 | \vbox to200pt{\vsize=200pt{\ignorespaces#1}\vfil}% 61 | } 62 | 63 | \begin{document} 64 | 65 | \frame{\titlepage} 66 | 67 | % !! 68 | %\section[Outline]{} 69 | \section{Why and how to optimize?} 70 | 71 | \frame{\frametitle{Outline}\tableofcontents} 72 | 73 | \subsection{Introduction} 74 | 75 | \def\coloremph#1{{\usebeamercolor[fg]{description item}#1}} 76 | 77 | \frame{ 78 | \frametitle{Why create small PDF files?} 79 | \begin{itemize} 80 | \advance\itemsep1em 81 | \item speed up \coloremph{downloads} (and also reduce download costs) 82 | \item reduce \coloremph{storage} costs 83 | \begin{itemize} 84 | \item for publishers, book shops, libraries and print shops 85 | \item save money everywhere the same PDF is stored 86 | \end{itemize} 87 | \item use the capacity of \coloremph{e-book readers} more effectively 88 | \end{itemize} 89 | } 90 | 91 | \def\conclusionbody{ 92 | \begin{itemize} 93 | \advance\itemsep10pt 94 | \item \coloremph{generate quickly,} optimize later 95 | \item \coloremph{no dvips} if possible 96 | \item find the \coloremph{culprit} (fonts, images or drawing instructions) 97 | \item \coloremph{simple techniques} yield the most size reduction 98 | \item optimizing \coloremph{drawing} instructions is hard (and costs money) 99 | \end{itemize} 100 | } 101 | 102 | \frame{ 103 | \frametitle{To be concluded} 104 | \conclusionbody 105 | } 106 | 107 | \frame{ 108 | \frametitle{Our PDF optimization approach} 109 | Steps: 110 | \begin{enumerate} 111 | \item \coloremph{generate} the PDF as usual, adjusting only a few, crucial settings 112 | \item \coloremph{repeat} if necessary 113 | \item once the final PDF is ready, \coloremph{optimize} it automatically with one 114 | or more optimizers 115 | \end{enumerate} 116 | Do not: 117 | \begin{itemize} 118 | \item try to improve or fine-tune \coloremph{every PDF creator software} 119 | \item \coloremph{lose information} (printable or interactive) 120 | while optimizing 121 | \item use a more compact output 122 | \coloremph{file format} (such as Multiavalent compact PDF) 123 | \item \coloremph{render} vector graphics 124 | \end{itemize} 125 | } 126 | 127 | \frame{ 128 | \frametitle{Proposed workflow} 129 | \begin{enumerate} 130 | \advance\itemsep1em 131 | \item follow the best practices for choosing and configuring the 132 | \coloremph{\TeX{} driver} (pdf\TeX{}, dvipdfm or 133 | \sout{dvips} + ps2pdf) 134 | \item if affordable, run commercial optimizer \coloremph{PDF Enhancer} or 135 | \coloremph{Adobe Acrobat} to optimize content streams 136 | \item run our new optimizer called \coloremph{pdfsizeopt.py} 137 | mainly to optimize images and Type\,1 fonts\par 138 | \url{http://code.google.com/p/pdfsizeopt/} 139 | \item use \coloremph{Multivalent} \texttt{tool.pdf.Compress} to do the 140 | rest of the optimization (done automatically by pdfsizeopt.py) 141 | \end{enumerate} 142 | } 143 | 144 | \subsection{Optimization techniques} 145 | 146 | \frame{ 147 | \frametitle{Local techniques are the most effective} 148 | \begin{itemize} 149 | \advance\itemsep1em 150 | \item remove extra \coloremph{whitespace} and comments 151 | \item serialize \coloremph{strings} more effectively 152 | \item compress streams with \coloremph{high-effort ZIP} 153 | (no RLE, LZW and fax anymore) 154 | \item use \coloremph{cross-reference streams} (with the $y$ predictor) 155 | \item use \coloremph{object streams} 156 | \end{itemize} 157 | } 158 | 159 | \frame{ 160 | \frametitle{Techniques if data types are known} 161 | \begin{itemize} 162 | \advance\itemsep1em 163 | \item get rid of explicitly specified \coloremph{default} values 164 | \item remove keys \coloremph{ignored} by the PDF specification 165 | \item remove page \coloremph{thumbnails} 166 | \item \coloremph{flatten} the page structure 167 | \item \coloremph{inline} indirect references 168 | (unless long and there are multiple referrers) 169 | \end{itemize} 170 | } 171 | 172 | \frame{ 173 | \frametitle{Get rid of duplicate and unused data} 174 | \begin{itemize} 175 | \advance\itemsep1em 176 | \item get rid of \coloremph{unused objects} (pages, images, anchors etc.) 177 | \item compact the \coloremph{cross-reference} tables 178 | \item find \coloremph{duplicate or equivalent} objects, and keep only one copy 179 | \item convert some \coloremph{inline images} to objects to help deduplication 180 | \item \coloremph{split} some large arrays and dictionaries to help 181 | deduplication 182 | \end{itemize} 183 | } 184 | 185 | \frame{ 186 | \frametitle{Font optimization techniques} 187 | \begin{itemize} 188 | \advance\itemsep1em 189 | \item convert Type\,1 fonts to \coloremph{CFF} (Type\,1C, Type\,2) 190 | \item \coloremph{subset fonts} 191 | \item \coloremph{unify subsets} of the same font 192 | \end{itemize} 193 | } 194 | 195 | \frame{ 196 | \frametitle{Image optimization techniques} 197 | \begin{itemize} 198 | \advance\itemsep1em 199 | \item use \coloremph{grayscale or a palette} instead of RGB or CMYK 200 | \item use the smallest \coloremph{bit depth} 201 | \item get rid of image \coloremph{duplicates based on pixel colors} 202 | \item compress with \coloremph{multiple settings} 203 | (ZIP, ZIP with predictor, JBIG2 or 204 | combinations) and pick the smallest output 205 | \item compress with \coloremph{high effort} (e.g. slow ZIP with PNGOUT) 206 | \end{itemize} 207 | } 208 | 209 | \frame{ 210 | \frametitle{Advanced content stream techniques} 211 | If you can calculate on-the-paper bounding boxes, then 212 | \begin{itemize} 213 | \advance\itemsep.25em 214 | \item get rid of \coloremph{objects outside the paper} (and then resubset fonts) 215 | \item get rid of \coloremph{parts of image data} outside the paper 216 | \item do not draw an object if it's \coloremph{covered} 217 | \item \coloremph{clip} vector graphics to the paper rectangle 218 | \end{itemize} 219 | \medskip 220 | Others: 221 | \begin{itemize} 222 | \advance\itemsep.25em 223 | \item \coloremph{flatten} form XObjects, and rebuild them if necessary 224 | \item reorganize \coloremph{graphic-state changing} instructions 225 | \item unify small adjacent images to a large image 226 | \item separate an image for better compressibility 227 | \end{itemize} 228 | } 229 | 230 | \subsection{For \TeX{} documents} 231 | 232 | \frame{ 233 | \frametitle{Drivers: dvipdfm(x) $<$ pdf\TeX{} $\ll$ dvips } 234 | \begin{itemize} 235 | \advance\itemsep1em 236 | \item output of dvips is \coloremph{$>$50\% larger} than any of the other 237 | drivers 238 | \item dvips output optimized is \coloremph{$>$70\% larger} 239 | \item only dvips supports \coloremph{psfrag} and \coloremph{pstricks} 240 | \item design vector graphics with \coloremph{TikZ or \MP{}} 241 | (with appropriate helpers) for 242 | vector graphics instead of \coloremph{pstricks} 243 | \end{itemize} 244 | } 245 | 246 | \frame{ 247 | \frametitle{Manual setup for small PDF from \TeX{}} 248 | \begin{itemize} 249 | \advance\itemsep1em 250 | \item get rid of \coloremph{complex graphics} 251 | \item reduce \coloremph{image resolution} (300\,DPI or 600\,DPI): 252 | no need for a higher resolution than the printer's for the scaled image 253 | \item choose the \coloremph{JPEG quality} 254 | \item optimize poorly exported images with \coloremph{sam2p} 255 | \item embed \coloremph{vector fonts} 256 | \item \coloremph{subset fonts} (on by default for \TeX{} text) 257 | \end{itemize} 258 | } 259 | 260 | \section{Effectiveness measurements} 261 | 262 | \subsection{Input PDF files} 263 | 264 | \frame{ 265 | \frametitle{Input PDF files} 266 | \begin{description} 267 | \item[cff] \textit{CFF reference}; 62 pages; by \emph{FrameMaker $+$ 268 | Distiller} 269 | \item[beamer] first beamer.cls example; 75 slide-steps; by pdf\TeX{} 270 | \item[eu2006] proceedings; 126 pages; by pdf\TeX{} $+$ concat 271 | \item[inkscape] \textit{Inkscape manual}; 341 pages; by \emph{CodeMantra} 272 | \item[lme2006] proceedings in Hungarian; 240 pages; by dvips $+$ ps2pdf $+$ concat 273 | \item[pdfref] \textit{PDF\,1.7 reference} 1310 pages; by \emph{FrameMaker $+$ Distiller} 274 | \item[pgf2] \textit{TikZ manual} 560 pages; by pdf\TeX{} 275 | \item[texbook] \textit{The \TeX{}book} 494 pages; by pdf\TeX{} 276 | \item[tuzv] mini novel in Hungarian; 20 pages; by dvipdfm 277 | \end{description} 278 | } 279 | 280 | \frame{ 281 | \frametitle{Input PDF sizes} 282 | \nocenter{% 283 | %\vbox to 0pt{}% 284 | %\nointerlineskip 285 | \noindent\hfil 286 | \includegraphics[height=\vsize]{pdfsizeopt_charts_ps2pdf.pdf} 287 | }} 288 | 289 | \frame{ 290 | \frametitle{PDF features measured} 291 | \begin{description} 292 | \item[xref \colorsquare{c-purple}] cross-reference table containing the document offsets 293 | \item[other \colorsquare{c-red}] hyperlinks, anchors, page structure, section structure 294 | (outlines), submittable forms, and other metadata 295 | \item[image \colorsquare{c-yellow}] embedded pixel images (XObject and inline) 296 | \item[font \colorsquare{c-green}] embedded vector font data 297 | \item[contents \colorsquare{c-blue}] vector graphics, text, colors, patterns 298 | etc., including content streams and form XObjects 299 | \end{description} 300 | } 301 | 302 | \frame{ 303 | \frametitle{Input PDF feature distribution} 304 | \nocenter{% 305 | \noindent\hfil 306 | \includegraphics[height=\vsize,page=2]{pdfsizeopt_charts_ps2pdf.pdf} 307 | }} 308 | 309 | \frame{ 310 | \frametitle{Optimizing tools measured} 311 | \begin{itemize} 312 | \advance\itemsep1em 313 | \item input PDF files were optimized using \coloremph{pdfsizeopt.py} 314 | (calling \coloremph{Multivalent} in its last step) 315 | \item \coloremph{further reductions} are possible (mostly in content 316 | streams) with Adobe Acrobat and PDF Enhancer (see in the paper) 317 | \item \coloremph{no information was removed} or harmed 318 | \end{itemize} 319 | } 320 | 321 | \subsection{Optimization effectiveness charts by feature} 322 | 323 | %\frame{ 324 | %\frametitle{Image optimization effectiveness (byte sizes)} 325 | %\nocenter{% 326 | %\noindent\hfil 327 | %\includegraphics[height=\vsize,page=3]{pdfsizeopt_charts_ps2pdf.pdf} 328 | %}} 329 | 330 | \frame{ 331 | \frametitle{Vector graphics and text optimization effectiveness} 332 | \nocenter{% 333 | \noindent\hfil 334 | \includegraphics[height=\vsize,page=4]{pdfsizeopt_charts_ps2pdf.pdf} 335 | }} 336 | 337 | \frame{ 338 | \frametitle{Embedded font optimization effectiveness} 339 | \nocenter{% 340 | \noindent\hfil 341 | \includegraphics[height=\vsize,page=5]{pdfsizeopt_charts_ps2pdf.pdf} 342 | }} 343 | 344 | \frame{ 345 | \frametitle{Pixel image optimization effectiveness} 346 | \nocenter{% 347 | \noindent\hfil 348 | \includegraphics[height=\vsize,page=6]{pdfsizeopt_charts_ps2pdf.pdf} 349 | }} 350 | 351 | \frame{ 352 | \frametitle{Other data optimization effectiveness} 353 | \nocenter{% 354 | \noindent\hfil 355 | \includegraphics[height=\vsize,page=7]{pdfsizeopt_charts_ps2pdf.pdf} 356 | }} 357 | 358 | \frame{ 359 | \frametitle{Cross-reference optimization effectiveness} 360 | \nocenter{% 361 | \noindent\hfil 362 | \includegraphics[height=\vsize,page=8]{pdfsizeopt_charts_ps2pdf.pdf} 363 | }} 364 | 365 | \section{Conclusion} 366 | 367 | \frame{ 368 | \frametitle{Related work} 369 | \begin{itemize} 370 | \advance\itemsep.5em 371 | \item PDF optimization \coloremph{articles} (mostly lossy) 372 | \item \coloremph{PNG optimizers} 373 | \item other \coloremph{PDF optimizers:} Multivalent, Adobe Acrobat, PDF Enhancer 374 | \item the \coloremph{PDF Database} 375 | \item \coloremph{DjVu}: at 600\,DPI, 300\% of a text-only PDF; smaller than a PDF for 376 | images 377 | \item \coloremph{compact PDF} (30\% to 60\% of normal PDF) 378 | \end{itemize} 379 | } 380 | 381 | \frame{ 382 | \frametitle{Future work} 383 | \begin{itemize} 384 | \advance\itemsep1em 385 | \item get rid of heavy \coloremph{dependencies} (Python, Java, Ghostscript) 386 | \par $\to$ C++ and Lua from the ground up 387 | \item fix \coloremph{shortcuts} 388 | \begin{itemize} 389 | \item support CMYK and other color spaces 390 | \item better find mergeable CFF fonts 391 | \item recognize all inline images 392 | \end{itemize} 393 | \item add \coloremph{test} PDF files (possibly from the PDF database) 394 | \item add \coloremph{concatenetion} support for collections 395 | \end{itemize} 396 | } 397 | 398 | \frame{ 399 | \frametitle{Conclusion} 400 | \conclusionbody 401 | } 402 | 403 | %\subsection{Overview of the Beamer Class} 404 | %\frame 405 | %{ 406 | % \frametitle{Features of the Beamer Class} 407 | % 408 | % \begin{itemize} 409 | % \item<1-> Normal LaTeX class. 410 | % \item<2-> Easy overlays. 411 | % \item<3-> No external programs needed. 412 | % \end{itemize} 413 | %} 414 | 415 | \frame{ 416 | \thispagestyle{empty} 417 | %\frametitle{hello} 418 | \noindent\hfill{\fontsize{150}{150}\selectfont?}\hfill\null\par 419 | } 420 | 421 | \frame{\thispagestyle{empty}} 422 | 423 | \end{document} 424 | -------------------------------------------------------------------------------- /win32port/README.txt: -------------------------------------------------------------------------------- 1 | This directory contains sources for some binary dependencies of pdfsizeopt. 2 | 3 | See also: http://pdfsizeopt.googlecode.com/files/pdfsizeopt_win32bin.zip 4 | See also: http://ghostscript.googlecode.com/files/gs902w32.exe 5 | -------------------------------------------------------------------------------- /win32port/pdfsizeopt_winstub.c: -------------------------------------------------------------------------------- 1 | /* 2 | * pdfsizeopt_winstub.c: Start Python on Win32. 3 | * by pts@fazekas.hu at Wed Jun 27 14:07:04 CEST 2012 4 | * 5 | * Compile with: i686-w64-mingw32-gcc -mconsole -s -Os -W -Wall -Wextra -o ../pdfsizeopt.exe pdfsizeopt_winstub.c 6 | */ 7 | 8 | #include 9 | #include /* PATH_MAX is 259. */ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | /* Shorter than , GetCommandLine() is defined in . */ 17 | #include 18 | #include 19 | #include 20 | 21 | #define FILE_SEP '\\' 22 | #define PATH_SEP ';' 23 | 24 | #define CMDLINE_MAX 16384 25 | 26 | static char is_in_dir(const char *file, const char *p, const char *q) { 27 | struct stat st; 28 | char pathname[PATH_MAX + 1]; 29 | int len = (q - p) + 1 + strlen(file); 30 | if (len > PATH_MAX) len = PATH_MAX; 31 | strncpy(pathname, p, q - p); 32 | if (len > q - p) { 33 | pathname[q - p] = FILE_SEP; 34 | strncpy(pathname + (q - p) + 1, file, len - (q - p) - 1); 35 | } else { 36 | if (q - p < PATH_MAX) pathname[q - p] = '\0'; 37 | } 38 | pathname[len] = '\0'; 39 | return 0 == stat(pathname, &st) && S_ISREG(st.st_mode); 40 | } 41 | 42 | static char is_file(const char *pathname) { 43 | struct stat st; 44 | return 0 == stat(pathname, &st) && S_ISREG(st.st_mode); 45 | } 46 | 47 | static void find_on_path(const char *prog, char *dir_out) { 48 | const char *path = getenv("PATH"), *p, *q; 49 | char is_qq; 50 | if (path != NULL || *path != '\0') { 51 | p = path; 52 | while (*p != '\0') { 53 | while (*p == PATH_SEP) { 54 | ++p; 55 | } 56 | if (*p == '\0') break; 57 | if ((is_qq = (*p == '"'))) { 58 | /* This is probably not the right way of removing "s, 59 | * but it happens to work with "...";... At least filenames can't 60 | * contain ", so the \..." unescaping hell won't happen for valid 61 | * filenames. 62 | */ 63 | q = ++p; 64 | while (*q != '\0' && *q != '"') ++q; 65 | } else { 66 | q = p; 67 | while (*q != '\0' && *q != PATH_SEP) ++q; 68 | } 69 | if (is_in_dir(prog, p, q)) { 70 | if (q - p > PATH_MAX) { 71 | q = p + PATH_MAX; 72 | } 73 | memcpy(dir_out, p, q - p); 74 | dir_out[q - p] = '\0'; 75 | return; 76 | } 77 | p = q; 78 | if (is_qq && *p == '"') ++p; 79 | } 80 | } 81 | *dir_out = '\0'; /* Not found. */ 82 | } 83 | 84 | /* Appends zero-terminated input p to out...outend, returns the position in 85 | * out after the output. Truncates silently. 86 | */ 87 | static char *add_verbatimz(const char *p, char *out, const char *outend) { 88 | for (; *p != '\0' && out != outend; *out++ = *p++) {} 89 | return out; 90 | } 91 | 92 | /* Escapes a command-line argument between p and pend to out, returns the 93 | * position in out after the output. Truncates silently. 94 | * 95 | * Does the escaping according to inverse of the rules defined in 96 | * 97 | * * https://stackoverflow.com/a/4094897/97248 98 | * * https://msdn.microsoft.com/en-us/library/a1y7w461.aspx 99 | * 100 | * The following characters need escaping: 101 | * space, tab, %, ", <, >, &, |. 102 | * If the input contains none of these, the output is same as the 103 | * input. Otherwise, the output looks like "...", and within the "s: 104 | * 105 | * * \s not followed by a " are kept intact 106 | * * \s followed by a " are doubled and the " is escaped as \" 107 | * * " (not preceded by a \) is escaped as \" 108 | * * anything else is kept intact 109 | */ 110 | static char *add_escaped(const char *p, const char *pend, 111 | char *out, const char *outend) { 112 | register char c; 113 | const char *q = p; 114 | while (q != pend && (c = *q) != ' ' && c != '\t' && c != '%' && 115 | c != '"' && c != '<' && c != '>' && c != '&' && c != '|') { 116 | ++q; 117 | } 118 | if (q == pend && p != pend) { /* No need for escaping. */ 119 | for (; p != pend && out != outend; *out++ = *p++) {} 120 | } else { 121 | if (out != outend) *out++ = '"'; 122 | while (p != pend) { 123 | if ((c = *p) == '"') { /* Escape " as \" */ 124 | do_qq: 125 | if (out != outend) *out++ = '\\'; 126 | goto do_verbatim; 127 | } else if (c == '\\') { 128 | for (q = p; q != pend && *q == '\\'; ++q) {} 129 | if (q == pend || *q != '"') { /* Copy verbatim. */ 130 | for (; p != q && out != outend; *out++ = *p++) {} 131 | if (out == outend) break; 132 | } else { 133 | for (; p != q; ++p) { 134 | if (out != outend) *out++ = '\\'; 135 | if (out != outend) *out++ = '\\'; 136 | } 137 | goto do_qq; 138 | } 139 | } else { /* Copy character verbatim. */ 140 | do_verbatim: 141 | if (out != outend) *out++ = *p; 142 | ++p; 143 | } 144 | } 145 | if (out != outend) *out++ = '"'; 146 | } 147 | return out; 148 | } 149 | 150 | /* Skips a single command-line argument in the beginning of p. 151 | * 152 | * Does the parsing according to the rules defined in 153 | * 154 | * * https://stackoverflow.com/a/4094897/97248 155 | * * https://msdn.microsoft.com/en-us/library/a1y7w461.aspx 156 | * 157 | * Most of this is unnecessary for skipping filenames, because filenames 158 | * cannot contain \\ or \". 159 | */ 160 | const char *skip_arg(const char *p) { 161 | register char c; 162 | const char *q; 163 | while ((c = *p) == ' ' || c == '\t') ++p; 164 | for (;;) { 165 | while ((c = *p++) != ' ' && c != '\t' && c != '\0' && c != '"' && 166 | c != '\\') {} 167 | if (c == ' ' || c == '\t' || c == '\0') break; 168 | if (c == '"') { 169 | do_qq: 170 | for (;;) { 171 | for (; (c = *p) != '\0' && c != '\\' && c != '"'; ++p) {} 172 | if (c == '\0') goto at_end; 173 | if (c == '"') { ++p; break; } 174 | q = ++p; /* Skip over the '\\'. */ 175 | for (; *p == '\\'; ++p) {} 176 | if (*p == '"') { 177 | ++p; 178 | if ((q - p) % 2 == 0) break; 179 | } 180 | } 181 | } else { /* c == '\\'. */ 182 | q = p; 183 | for (; *p == '\\'; ++p) {} 184 | if (*p == '"') { 185 | ++p; 186 | if ((q - p) % 2 == 0) goto do_qq; 187 | } 188 | } 189 | } 190 | while ((c = *p) == ' ' || c == '\t') ++p; 191 | at_end: 192 | return p; 193 | } 194 | 195 | /* FILE_SEP inlined here. */ 196 | static const char python_exe[] = "pdfsizeopt_win32exec\\pdfsizeopt_python.exe"; 197 | static const char pdfsizeopt_py0[] = "pdfsizeopt"; 198 | static const char pdfsizeopt_py1[] = "pdfsizeopt.single"; 199 | 200 | int main(int argc, char **argv) { 201 | char python_bin[PATH_MAX + 1], argv0_bin[PATH_MAX + 1], *p, *q; 202 | char prog_py[PATH_MAX + 2]; 203 | char cmdline[CMDLINE_MAX], *cp, *cend = cmdline + sizeof(cmdline); 204 | const char *cmdline_argv[2]; 205 | int i; 206 | (void)argc; 207 | p = argv[0]; 208 | q = NULL; 209 | while (*p != '\0') { 210 | if (*p++ == FILE_SEP) q = p; 211 | } 212 | if (q == NULL) { /* Try to find argv[0] on $PATH. */ 213 | p = argv[0]; 214 | strncpy(argv0_bin, p, sizeof argv0_bin); 215 | q = NULL; 216 | while (*p != '\0') { 217 | if (*p++ == '.') q = p; 218 | } 219 | if (q == NULL) { 220 | argv0_bin[sizeof argv0_bin - 5] = '\0'; 221 | strcat(argv0_bin, ".exe"); 222 | } else { 223 | argv0_bin[sizeof argv0_bin - 1] = '\0'; 224 | } 225 | find_on_path(argv0_bin, python_bin); 226 | } else { /* Put dirname(argv[0]) to python_bin. */ 227 | p = argv[0]; 228 | --q; 229 | if (q - p > PATH_MAX) { 230 | q = p + PATH_MAX; 231 | } 232 | strncpy(python_bin, p, q - p); 233 | python_bin[q - p] = '\0'; 234 | } 235 | 236 | if (python_bin[0] == '\0') { 237 | python_bin[0] = '.'; 238 | python_bin[1] = '\0'; 239 | } 240 | p = python_bin + strlen(python_bin); 241 | /* Remove trailing backslashes from python_bin. */ 242 | while (p != python_bin && p[-1] == FILE_SEP) --p; 243 | *p = '\0'; 244 | 245 | strcpy(prog_py, python_bin); 246 | 247 | i = strlen(python_bin); 248 | if (i + strlen(python_exe) > PATH_MAX) { 249 | i = PATH_MAX - strlen(python_exe); 250 | } 251 | python_bin[i] = FILE_SEP; 252 | strcpy(python_bin + i + 1, python_exe); 253 | 254 | i = strlen(prog_py); 255 | if (i + strlen(pdfsizeopt_py0) > PATH_MAX) { 256 | i = PATH_MAX - strlen(pdfsizeopt_py0); 257 | } 258 | if (i + strlen(pdfsizeopt_py1) > PATH_MAX) { 259 | i = PATH_MAX - strlen(pdfsizeopt_py1); 260 | } 261 | prog_py[i++] = FILE_SEP; 262 | strcpy(prog_py + i, pdfsizeopt_py0); 263 | if (!is_file(prog_py)) { 264 | strcpy(prog_py + i, pdfsizeopt_py1); 265 | if (!is_file(prog_py)) { 266 | fprintf(stderr, "error: Python script missing: %s\n", prog_py); 267 | return 121; 268 | } 269 | } 270 | 271 | cp = cmdline; 272 | cp = add_verbatimz("python ", cp, cend); 273 | /* strcat(prog_py, " \"foo\\\\bar\\\\\"baz?"); */ 274 | cp = add_escaped(prog_py, prog_py + strlen(prog_py), cp, cend); 275 | if (cp != cend) *cp++ = ' '; 276 | /* For debugging: we could have 2 skip_arg(...) calls. */ 277 | cp = add_verbatimz(skip_arg(GetCommandLine()), cp, cend); 278 | if (cp == cend) { 279 | fprintf(stderr, "error: output command-line too long\n"); 280 | return 122; 281 | } 282 | if (cp[-1] == ' ') --cp; 283 | *cp = '\0'; 284 | /* printf("cmdline=(%s)\n", cmdline); return 0; */ 285 | 286 | cmdline_argv[0] = cmdline; 287 | cmdline_argv[1] = NULL; 288 | 289 | /* execv(...) and P_OVERLAY don't work well in wine-1.2.2 and Windows XP, 290 | * because they make this process return before the started process finishes. 291 | * 292 | * It's undocumented, but spawvn just joins cmdline_argv with spaces, 293 | * which is not what we would want with argv (because it's already split 294 | * and unescaped). So we use GetCommandLine() in cmdline_argv, which is 295 | * escaped. 296 | * 297 | * Casting to (void*) needed to avoid different warnings (different 298 | * declarations) with i586-mingw32msvc-gcc and i686-w64-mingw32-gcc. 299 | */ 300 | i = spawnv(P_WAIT, python_bin, (void*)cmdline_argv); 301 | if (i < 0) { 302 | fprintf(stderr, "error: could not start %s: %s\n", 303 | python_bin, strerror(errno)); 304 | return 120; 305 | } 306 | return i; 307 | } 308 | --------------------------------------------------------------------------------