├── requirements.txt
├── PKG-INFO
├── .gitignore
├── setup.py
├── README.md
├── README
├── LICENSE
└── chm2pdf


/requirements.txt:
--------------------------------------------------------------------------------
1 | pychm
2 | sgmllib3k
3 | 


--------------------------------------------------------------------------------
/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: chm2pdf
 3 | Version: 0.9.1
 4 | Summary: A script to convert CHM files into PDF
 5 | Home-page: http://code.google.com/p/chm2pdf/
 6 | Author: Massimo Sandal, Chris Karakas
 7 | Author-email: devicerandom@gmail.com, chris@karakas-online.de
 8 | License: GPL v.2.
 9 | Description: A script to convert CHM files into PDF. Requires chmlib, pychm, htmldoc.
10 | Platform: UNKNOWN
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 | 
22 | # Installer logs
23 | pip-log.txt
24 | 
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 | 
30 | # Translations
31 | *.mo
32 | 
33 | # Mr Developer
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | 
 5 | setup(name = "chm2pdf",
 6 |     version = "0.9",
 7 |     description = "A script to convert CHM files into PDF",
 8 |     author = "Massimo Sandal, Chris Karakas, Suleyman Poyraz",
 9 |     author_email = "devicerandom@gmail.com, chris@karakas-online.de",
10 |     url = "http://code.google.com/p/chm2pdf/",
11 |     scripts = ["chm2pdf"],
12 |     long_description = "A script to convert CHM files into PDF. Requires chmlib, pychm, htmldoc."
13 |     )
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | chm2pdf
 2 | =======
 3 | A Python script that converts a CHM file into a single PDF file.
 4 | 
 5 | (c) 2007 Massimo Sandal <devicerandom@gmail.com>
 6 | 
 7 | (c) 2007-2008 Chris Karakas <chris@karakas-online.de> and <http://www.karakas-online.de>
 8 | 
 9 | Usage:
10 |   `chm2pdf [options] input_filename [output_filename]`
11 | 
12 | For all options, see
13 |   `chm2pdf --help`
14 | 
15 | RECOMMENDED READING:
16 |  - http://www.karakas-online.de/forum/viewtopic.php?t=10275
17 |  - http://www.karakas-online.de/forum/viewtopic.php?t=10969
18 | 
19 | Installation:
20 |  - download the .tar.gz
21 |  - unzip it: "tar -xzvf chm2pdf-a.b.c.tar.gz"
22 |  - enter the newly created directory
23 |  - acquire root privileges
24 |  - type "python setup.py install"
25 | 
26 | Requires:
27 |  - python
28 |  - chmlib. NOTE: chmlib *must* be configured with ./configure --enable-examples
29 |  - pychm
30 |  - htmldoc
31 |  - BeautifulSoup (optional)
32 | 
33 | All of these should be in your Linux/Unix distribution repository :)
34 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | CHM2PDF
 2 | 
 3 | (c) 2007 Massimo Sandal
 4 | (c) 2007-2008 Chris Karakas <http://www.karakas-online.de>
 5 | (c) 2020 Suleyman Poyraz <zaryob.dev@gmail.com>
 6 | 
 7 | A Python script that converts a CHM file into a single PDF file.
 8 | 
 9 | Usage:
10 | chm2pdf [options] input_filename [output_filename]
11 | 
12 | See
13 | 
14 | chm2pdf --help
15 | 
16 | for all options.
17 | 
18 | RECOMMENDED READING:
19 |     - http://www.karakas-online.de/forum/viewtopic.php?t=10275
20 |     - http://www.karakas-online.de/forum/viewtopic.php?t=10969
21 | 
22 | 
23 | Installation:
24 | - download the .tar.gz
25 | - unzip it: "tar -xzvf chm2pdf-a.b.c.tar.gz"
26 | - enter the newly created directory
27 | - acquire root privileges
28 | - type "python setup.py install"
29 | 
30 | Requires:
31 |     - python
32 |     - chmlib
33 |       NOTE: chmlib *must* be configured with ./configure --enable-examples
34 |     - pychm
35 |     - htmldoc
36 | 
37 | Optional:
38 |     - BeautifulSoup
39 | 
40 | All of these should be in your Linux/Unix distribution repository :)
41 | 
42 | To contact Massimo: devicerandom@gmail.com
43 | To contact Chris: chris@karakas-online.de
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/chm2pdf:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | '''
   3 | CHM2PDF v. 0.9.2
   4 | http://code.google.com/p/chm2pdf
   5 | 
   6 | A script that converts a CHM compiled HTML file into a single PDF file.
   7 | (c) 2007 Massimo Sandal
   8 | (c) 2007 Chris Karakas <http://www.karakas-online.de>
   9 | (c) 2020 Suleyman Poyraz <zaryob.dev@gmail.com>
  10 | 
  11 |     This program is free software: you can redistribute it and/or modify
  12 | 
  13 |     it under the terms of the GNU General Public License as published by
  14 |     the Free Software Foundation, either version 2 of the License, or
  15 |     (at your option) any later version.
  16 | 
  17 |     This program is distributed in the hope that it will be useful,
  18 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
  19 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20 |     GNU General Public License for more details.
  21 | 
  22 |     You should have received a copy of the GNU General Public License
  23 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  24 | '''
  25 | 
  26 | 
  27 | from __future__ import print_function    # (at top of module)
  28 | 
  29 | import chm.chm as chm
  30 | import sys
  31 | import sgmllib
  32 | import os, os.path
  33 | import re, glob
  34 | import getopt
  35 | # from BeautifulSoup import BeautifulSoup
  36 | 
  37 | global version
  38 | 
  39 | global CHM2PDF_TEMP_WORK_DIR #where conversions etc. take place
  40 | global CHM2PDF_TEMP_ORIG_DIR #where the chm file is exploded
  41 | global CHM2PDF_WORK_DIR
  42 | global CHM2PDF_ORIG_DIR
  43 | 
  44 | global filename #the input filename
  45 | 
  46 | version = '0.9.2'
  47 | CHM2PDF_TEMP_WORK_DIR='/tmp/chm2pdf/work'
  48 | CHM2PDF_TEMP_ORIG_DIR='/tmp/chm2pdf/orig'
  49 | 
  50 | # YOU DON'T NEED TO CHANGE ANYTHING BELOW THIS LINE!
  51 | 
  52 | 
  53 | class PageLister(sgmllib.SGMLParser):
  54 |     '''
  55 |     parser of the chm.chm GetTopicsTree() method that retrieves the URL of the HTML
  56 |     page embedded in the CHM file.
  57 |     '''
  58 |     def reset(self):
  59 |         sgmllib.SGMLParser.reset(self)
  60 |         self.pages=[]
  61 | 
  62 |     def start_param(self,attrs):
  63 |        urlparam_flag=False
  64 |        for key,value in attrs:
  65 |            if key=='name' and value=='Local':
  66 |                urlparam_flag=True
  67 |            if urlparam_flag and key=='value':
  68 |                self.pages.append('/'+value)
  69 | 
  70 | class ImageCatcher(sgmllib.SGMLParser):
  71 |     '''
  72 |     finds image urls in the current html page, so to take them out from the chm file.
  73 |     '''
  74 |     def reset(self):
  75 |         sgmllib.SGMLParser.reset(self)
  76 |         self.imgurls=[]
  77 | 
  78 |     def start_img(self,attrs):
  79 |         for key,value in attrs:
  80 |             if key=='src' or key=='SRC':
  81 |                 # Avoid duplicates in the list of image URLs.
  82 |                 if not self.imgurls.count(value):
  83 |                     self.imgurls.append(value)
  84 | 
  85 | class CssCatcher(sgmllib.SGMLParser):
  86 |     '''
  87 |     finds CSS urls in the current html page, so to take them out from the chm file.
  88 |     '''
  89 |     def reset(self):
  90 |         sgmllib.SGMLParser.reset(self)
  91 |         self.cssurls=[]
  92 | 
  93 |     def start_link(self,attrs):
  94 |         for key,value in attrs:
  95 |             if key=='href' or key=='HREF':
  96 |                 # Avoid duplicates in the list of CSS URLs.
  97 |                 if not self.cssurls.count(value):
  98 |                     self.cssurls.append(value)
  99 | 
 100 | def get_html_list(cfile):
 101 |     '''
 102 |     retrieves the list of HTML files contained into the CHM file, **in order** (that's the important bit).
 103 |     (actually performed by the PageLister class)
 104 |     '''
 105 |     if sys.version_info[0] == 2:
 106 |         topicstree=cfile.GetTopicsTree()
 107 |     else:
 108 |         topicstree=cfile.GetTopicsTree().decode("utf-8")
 109 |     #print(type(topicstree))
 110 | 
 111 |     lister=PageLister()
 112 |     lister.feed(topicstree)
 113 |     #print 'lister pages',lister.pages
 114 |     return lister.pages
 115 | 
 116 | def get_objective_urls_list(filename):
 117 |     '''
 118 |     takes the list of files inside the chm archive, with the correct urls of each one.
 119 |     '''
 120 | 
 121 |     os.system('enum_chmLib '+filename+' > '+CHM2PDF_WORK_DIR+'/urlslist.txt')
 122 |     if sys.version_info[0] == 2:
 123 |         flist=open(CHM2PDF_WORK_DIR+'/urlslist.txt','rU')
 124 |     else:
 125 |         flist=open(CHM2PDF_WORK_DIR+'/urlslist.txt','r', newline=None)
 126 | 
 127 |     urls_list=[]
 128 |     for line in flist.readlines()[3:]:
 129 |         #print 'line',line
 130 |         spline=line.split()
 131 |         urls_list.append(spline[5])
 132 |     flist.close()
 133 |     # os.remove(CHM2PDF_WORK_DIR+'/urlslist.txt')
 134 | 
 135 |     return urls_list
 136 | 
 137 | 
 138 | def correct_file(input_file, output_file, html_list, objective_urls, options):
 139 | 
 140 |     # Correct image links in file
 141 |     if sys.version_info[0] == 2:
 142 |         pf=open(input_file,'rU')
 143 |     else:
 144 |         pf=open(input_file,'r', newline=None)
 145 | 
 146 |     page=pf.read()
 147 |     pf.close()
 148 | 
 149 |     # Correct the HTML markup of the page, if the --beautifulsoup was passed.
 150 |     if options['beautifulsoup']=='--beautifulsoup':
 151 |         from BeautifulSoup import BeautifulSoup, Tag
 152 |         soup = BeautifulSoup(page)
 153 |         page = str(soup)
 154 | 
 155 |     image_catcher=ImageCatcher()
 156 |     image_catcher.feed(page)
 157 | 
 158 |     css_catcher=CssCatcher()
 159 |     css_catcher.feed(page)
 160 | 
 161 |     # We substitute the image URLs of input_file with the *actual* URLs on the CHM2PDF_ORIG_DIR directory
 162 |     for iurl in image_catcher.imgurls:
 163 |         # print 'iurl = '  + iurl
 164 | 
 165 |         img_filename = ''
 166 |         for item in objective_urls:
 167 |             if iurl in item:
 168 |                 img_filename=CHM2PDF_ORIG_DIR+item
 169 |                 if ';' in img_filename: #hack to get rid of mysterious ; in filenames and urls...
 170 |                     img_filename=img_filename.split(';')[0]
 171 |         # substitute the new image filenames - but only if an img_filename was found!
 172 |         if img_filename:
 173 |             page=re.sub(iurl,img_filename,page)
 174 | 
 175 | 
 176 |     # We substitute the CSS URLs of input_file with the *actual* URLs on the CHM2PDF_ORIG_DIR directory
 177 |     for curl in css_catcher.cssurls:
 178 |         # print 'curl = '  + curl
 179 | 
 180 |         css_filename = ''
 181 |         for item in objective_urls:
 182 |             if curl in item:
 183 |                 css_filename=CHM2PDF_ORIG_DIR+item
 184 |                 if ';' in css_filename: #hack to get rid of mysterious ; in filenames and urls...
 185 |                     css_filename=img_filename.split(';')[0]
 186 |         # substitute the new image filenames - but only if a css_filename was found!
 187 |         if css_filename:
 188 |             page=re.sub(curl,css_filename,page)
 189 | 
 190 |     # Fontsize hack:
 191 |     # Since htmldoc ignores the --fontsize option, we have to do something about it...
 192 |     # If --fontsize xxx was given on the command line,
 193 |     # insert <font> and </font> tags between <p> and </p>.
 194 |     # While doing so, use xxx as the value of the size attribute of the font tag.
 195 |     if options['fontsize']:
 196 |         # page=re.sub('(<p[^>]*>)','\\1' + os.linesep + '<font size="' + options['fontsize'] + '">' + os.linesep, page)
 197 |         # page=re.sub('(<\/p[^>]*>)', os.linesep + '</font>' + os.linesep + '\\1' + os.linesep, page)
 198 | 
 199 |         # page=re.sub('(<body[^>]*>)','\\1<basefont size="' + options['fontsize'] + '">' + '<font size="' + options['fontsize'] + '" face="' + options['bodyfont'] + '">',page)
 200 |         # page=re.sub('(<\/body[^>]*>)','</font>' + '</basefont>\\1',page)
 201 | 
 202 |         # page=re.sub('(<table[^>]*>)','\\1' + os.linesep + '<font size="' + options['fontsize'] + '">' + os.linesep, page)
 203 |         # page=re.sub('(<\/table[^>]*>)', os.linesep + '</font>' + os.linesep + '\\1' + os.linesep, page)
 204 | 
 205 |         # page=re.sub('(<body[^>]*>)','\\1' + '<basefont size="4"> <font size="' + options['fontsize'] + '" face="' + options['bodyfont'] + '">',page)
 206 |         page=re.sub('(<body[^>]*>)','\\1' + '<font size="' + options['fontsize'] + '" face="' + options['bodyfont'] + '">',page)
 207 |         page=re.sub('(<\/body[^>]*>)','</font>' + '\\1',page)
 208 | 
 209 |     # Delete unwanted HTML elements.
 210 |     page=re.sub('<div .*teamlib\.gif.*\/div>','', page)
 211 |     page=re.sub('<a href.*next\.gif[^>]*><\/a>','' ,page)
 212 |     page=re.sub('<a href.*previous\.gif[^>]*><\/a>','', page)
 213 |     page=re.sub('<a href.*prev\.gif[^>]*><\/a>','', page)
 214 |     page=re.sub('"[^"]*previous\.gif"','""', page)
 215 |     page=re.sub('"[^"]*prev\.gif"','""', page)
 216 |     page=re.sub('"[^"]*next\.gif"','""', page)
 217 | 
 218 |     # Delete CSS markup (<link rel="stylesheet"...)
 219 |     # Currently, htmldoc chokes on CSS. In some distant, bright future things will be different, but until then...
 220 |     # I know, it is silly to try to correct the CSS URLs as above, only to delete them here, just a few lines later.
 221 |     # But this is the right way to proceed - when htmldoc comes with CSS support we will only have
 222 |     # to remove the following one line and we will take full advantage of CSS, since the corrected CSS URLs will
 223 |     # already be there. ;-)
 224 |     page=re.sub('<link *rel="[Ss][Tt][Yy][Ll][Ee][Ss][Hh][Ee][Ee][Tt]"[^>]*>','', page)
 225 | 
 226 |     # Change font size between <pre> and </pre> to the value given by the '--prefontsize' option.
 227 |     if options['prefontsize'] != '':
 228 |         page=re.sub('<pre>','<pre><font size="' + options['prefontsize'] + '">', page)
 229 |         page=re.sub('<\/pre>','</font></pre>', page)
 230 | 
 231 | 
 232 |     # Correct the HTML markup of the page, if the --beautifulsoup was passed.
 233 |     if options['beautifulsoup']=='--beautifulsoup':
 234 |         # from BeautifulSoup import BeautifulSoup
 235 |         soup = BeautifulSoup(page)
 236 |         # page = soup.prettify()
 237 | 
 238 |         # top_vevel_table = soup.table
 239 |         # top_vevel_table.extract()
 240 |         # top_vevel_table['width'] = options['browserwidth']
 241 | 
 242 | #         tables=soup.findAll('table')
 243 | #         for table in tables:
 244 | #             # table['cellpadding'] = '0.15in'
 245 | #             table['border'] = '1'
 246 | #             table['width'] = '95%'
 247 | #             # table['width'] = options['browserwidth']
 248 | 
 249 | #             trs=table.findAll('tr')
 250 | #             for tr in trs:
 251 | #                 tds=tr.findAll('td')
 252 | #                 for td in tds:
 253 | #                     # print td.find(text=True)
 254 | #                     # td['cellpadding'] = '0.15in'
 255 | #                     # td['nowrap'] = 'false'
 256 | #                     # if td.content != None:
 257 | #                     for tag in td.contents:
 258 | #                         if tag.__class__.__name__ == 'NavigableString' and (tag == "\n" or tag == ''):
 259 | #                             print 'The following is of class ' + tag.__class__.__name__ + ' and is empty: '
 260 | #                             print '-->' + tag + '<--'
 261 | #                             tag.extract
 262 | #                             # print td.content
 263 | #                             # td.string = re.sub('^[\n\t\r ]*$', '', td.string)
 264 | #                             # if td.string == '':
 265 | #                             #     td.extract()
 266 | 
 267 | #         pixelimgs=soup.findAll('img', {"width" : "1", "height" : "1"})
 268 | #         for pixelimg in pixelimgs:
 269 | #             pixelimg.extract()
 270 | 
 271 | #         divs=soup.findAll('div')
 272 | #         for div in divs:
 273 | #             if div.string == None or div.string == os.linesep:
 274 | #                 div.extract()
 275 | #             # else:
 276 | #             #     print '-->"' + div.string + '"'
 277 | 
 278 | 
 279 |         pres = soup.findAll('pre')
 280 |         line_regex = re.compile(r'^.*$', re.MULTILINE)
 281 |         for pre in pres:
 282 | 
 283 |             pre['width'] = '50'
 284 | 
 285 | #             font_tag = Tag(BeautifulSoup(), 'font')
 286 | #             font_tag['size'] = '-1'
 287 | #             pre.insert(0, font_tag)
 288 | 
 289 |             texts_with_linesep = pre.findAll(text=line_regex, recursive=True)
 290 |             for text in texts_with_linesep:
 291 |                 newtext=re.sub(os.linesep,'<br>',text.string)
 292 | #                 print "#########################################################################"
 293 | #                 print text
 294 | #                 print "#########################################################################"
 295 | #                 print
 296 | #                 print
 297 |                 text.replaceWith(newtext)
 298 | 
 299 | 
 300 |         # page = soup.prettify()
 301 |         page = str(soup)
 302 | 
 303 | 
 304 |     f=open(output_file,'w')
 305 |     f.write(page)
 306 |     f.close()
 307 | 
 308 | 
 309 | def convert_to_pdf(cfile, filename, outputfilename, options):
 310 |     '''
 311 |     Performs actual converting.
 312 |     '''
 313 | 
 314 |     # Extract the files from the CHM archive and correct them ONLY IF '--dontextract' was NOT given!
 315 |     # ########################### File extraction and correction: START ############################
 316 |     #
 317 |     if options['dontextract'] == '':
 318 | 
 319 |         try:
 320 |             os.mkdir(CHM2PDF_TEMP_WORK_DIR)
 321 |         except OSError: # The directory already exists.
 322 |             pass
 323 | 
 324 |         try:
 325 |             os.mkdir(CHM2PDF_TEMP_ORIG_DIR)
 326 |         except OSError: # The directory already exists.
 327 |             pass
 328 | 
 329 |         try:
 330 |             os.mkdir(CHM2PDF_ORIG_DIR)
 331 |         except OSError: # The directory already exists.
 332 |             pass
 333 | 
 334 |         try:
 335 |             os.mkdir(CHM2PDF_WORK_DIR)
 336 |         except OSError: # The directory already exists.
 337 |             pass
 338 | 
 339 |     # Compute filenames and lists. This is needed no matter if '--dontextract' was given or not!
 340 | 
 341 |     html_list=get_html_list(cfile)
 342 |     objective_urls=get_objective_urls_list(filename)
 343 | 
 344 |     # print 'objective_urls'
 345 |     # print '=============='
 346 |     # print objective_urls
 347 |     # print
 348 |     # print 'html_list'
 349 |     # print '========='
 350 |     # print html_list
 351 | 
 352 |     true_html_list=[] #Should mostly coincide with html_list, but...
 353 | 
 354 |     input_titlefile = ''
 355 |     output_titlefile = ''
 356 |     for html_file in html_list:
 357 |         for item in objective_urls:
 358 |             if html_file in item:
 359 |                 true_html_list.append(CHM2PDF_ORIG_DIR+item)
 360 |             if not options['titlefile']=='' and options['titlefile'] in item:
 361 |                 input_titlefile = CHM2PDF_ORIG_DIR+item
 362 |                 output_titlefile = CHM2PDF_WORK_DIR + os.sep + options['titlefile']
 363 | 
 364 |     if not options['titlefile']=='' and not output_titlefile:
 365 |         print('### WARNING: ' + options['titlefile'] + ' not found inside ' + filename + ' - possible spelling error.')
 366 |         print('### You can check it if you do  \'' + sys.argv[0] + ' --extract-only\',')
 367 |         print('### then have a look at the files in  ' + CHM2PDF_ORIG_DIR + '.')
 368 |         print('### Option \'--titlefile ' + options['titlefile'] + '\' ignored')
 369 |         options['titlefile'] = ''
 370 | 
 371 | 
 372 |     # Process toc file. This depends on the '--dontextract' option.
 373 | 
 374 |     if options['dontextract'] == '':
 375 |         # Correct image links in toc file.
 376 |         if not options['titlefile']=='' and os.path.exists(input_titlefile):
 377 |             correct_file(input_titlefile, output_titlefile, html_list, objective_urls, options)
 378 | 
 379 | 
 380 |     # Now process the rest of HTML files.
 381 | 
 382 |     # Compute some lists. Again, this is independent of the '--dontextract' option.
 383 | 
 384 |     c=0
 385 |     htmlout_filename_list=''
 386 |     htmlout_filenames = []
 387 |     if output_titlefile:
 388 |         htmlout_filenames.append(output_titlefile)
 389 |     match_strings = []
 390 |     replace_strings = []
 391 |     replace_garbled_strings = []
 392 |     for url in html_list:
 393 |         c+=1
 394 |         page_filename=CHM2PDF_ORIG_DIR + url
 395 |         # Some names contain a '%20' (an HTML code for a space). We substitute with a "real space"
 396 |         # otherwise a 'File not found' error will occur.
 397 |         page_filename = re.sub('%20',' ',page_filename)
 398 | 
 399 |         if options['verbose']=='--verbose' and options['verbositylevel']=='high' and options['dontextract'] == '':
 400 |             print("Correcting " + page_filename)
 401 | 
 402 | 
 403 |         if os.path.exists(page_filename) and (options['titlefile'] == '' or not options['titlefile'] in url):
 404 |             htmlout_filename=CHM2PDF_WORK_DIR+'/temp'+'%(#)04d' %{"#":c}+'.html'
 405 |             htmlout_filename_list+=' '+ htmlout_filename
 406 |             htmlout_filenames.append(htmlout_filename)
 407 | 
 408 |             if options['dontextract'] == '':
 409 |                 # Correct image links in file page_filename.
 410 |                 correct_file(page_filename, htmlout_filename, html_list, objective_urls, options)
 411 | 
 412 |             # Escape slashes in url.
 413 |             url_filename_escaped = re.sub('/', '\/', os.path.basename(url))
 414 |             # Escape dots in url.
 415 |             url_filename_escaped = re.sub('\.', '\.', url_filename_escaped)
 416 |             # Escape slashes in htmlout_filename.
 417 |             htmlout_filename_escaped = re.sub('/', '\/', os.path.basename(htmlout_filename))
 418 |             # Compute a "garbled" htmlout_filename, where dots are simply replaced with underscores.
 419 |             htmlout_filename_escaped_garbled = re.sub('\.', '_', htmlout_filename_escaped)
 420 | 
 421 |             # Build a list for each of the three strings (the original URL, the output filename and the garbled one).
 422 |             # The idea is that we want to replace the match_strings with the corresponding replace_garbled_strings first.
 423 |             # Then, in a second pass, we will replace the garbled strings with the "real" replace_strings.
 424 |             # This trick is necessary to avoid problems in cases where the original URLs look like
 425 |             #
 426 |             # 0001.html, 0002.html, 0003.html...
 427 |             #
 428 |             # and we want to replace as follows:
 429 |             #
 430 |             # toc.html	-> temp0001.html
 431 |             # 0001.html	-> temp0002.html
 432 |             # 0002.html -> temp0003.html
 433 |             # 0003.html -> temp0004.html
 434 |             #
 435 |             # If we try it "directly", i.e. without the "garbled" names first, we will end up changing:
 436 |             #
 437 |             # tol.html	-> temp0001.html -> temptemp0002.html -> temptemptemp0003.html ...
 438 |             # 0001.html	-> temp0002.html -> temptemp0003.html -> temptemptemp0004.html ...
 439 |             # ...
 440 |             #
 441 |             # which is not what we want.
 442 |             match_strings.append(url_filename_escaped)
 443 |             replace_strings.append(htmlout_filename_escaped)
 444 |             replace_garbled_strings.append(htmlout_filename_escaped_garbled)
 445 | 
 446 |     # Now we've got the lists computed. We proceed with the actual correction,
 447 |     # which IS dependent on the '--dontextract' option:
 448 | 
 449 |     if options['dontextract'] == '':
 450 |         # Correct links to files in the local collection.
 451 |         if options['verbose']=='--verbose' and options['verbositylevel']=='low':
 452 |             print('Correcting links in the HTML files...')
 453 | 
 454 |         if options['verbose']=='--verbose' and options['verbositylevel']=='high':
 455 |             print('############### 1st pass ###############')
 456 |         for match_string in  match_strings:
 457 |             replace_string = replace_garbled_strings[match_strings.index(match_string)]
 458 |             if options['verbose']=='--verbose' and options['verbositylevel']=='high':
 459 |                 print("match " + match_string + ' ' + "and replace it with " + replace_string)
 460 |         if options['verbose']=='--verbose' and options['verbositylevel']=='high':
 461 |             print()
 462 | 
 463 |         if options['verbose']=='--verbose' and options['verbositylevel']=='high':
 464 |             print('############### 2nd pass ###############')
 465 |         for match_string in  replace_garbled_strings:
 466 |             replace_string = replace_strings[replace_garbled_strings.index(match_string)]
 467 |             if options['verbose']=='--verbose' and options['verbositylevel']=='high':
 468 |                 print("match " + match_string + ' ' + "and replace it with " + replace_string)
 469 |         if options['verbose']=='--verbose' and options['verbositylevel']=='high':
 470 |             print()
 471 | 
 472 |         for filename in htmlout_filenames:
 473 | 
 474 |             if sys.version_info[0] == 2:
 475 |                 pf=open(filename,'rU')
 476 |             else:
 477 |                 pf=open(filename, 'r', newline=None)
 478 | 
 479 |             page=pf.read()
 480 |             pf.close()
 481 | 
 482 |             # Substitutions in 1st pass: we replace the original filenames with their corresponding "garbled" equivalents.
 483 |             for match_string in  match_strings:
 484 |                 replace_string = replace_garbled_strings[match_strings.index(match_string)]
 485 |                 page = re.sub(match_string, replace_string, page)
 486 | 
 487 | 
 488 |             # Substitutuions in the 2nd pass: we replace the garbled filenames with the correct ones.
 489 |             for match_string in  replace_garbled_strings:
 490 |                 replace_string = replace_strings[replace_garbled_strings.index(match_string)]
 491 |                 page = re.sub(match_string, replace_string, page)
 492 | 
 493 |             # Replace links of the form "somefile.html#894" with "somefile0206.html"
 494 |             # The following will match anchors like '<a href="temp0206.html#894"' and will store the 'temp0206.html' in backreference 1.
 495 |             # The replace string will then replace it with '<a href="temp0206.html"', i.e. it will take away the '#894' part.
 496 |             # This is because the numbers after the '#' are often wrong or non-existent. It is better to link to an existing
 497 |             # chapter than to a non-existent part of an existing chapter.
 498 |             page = re.sub('<a href="([^#]*)#[^"]*"', '<a href="\\1"', page)
 499 | 
 500 |             pf=open(filename,'w')
 501 |             pf.write(page)
 502 |             pf.close
 503 | 
 504 |     # Here ends the extraction and correction of the HTML files which, as said above,
 505 |     # will take place ONLY IF '--dontextract' was NOT given.
 506 |     # If '--dontextract' was given, only the file lists like htmlout_filename_list
 507 |     # were computed above, but no file extraction or correction took place.
 508 |     #
 509 |     # ########################### File extraction and correction: END   ############################
 510 | 
 511 |     # Stop here if '--extract-only' was given...
 512 |     if options['extract-only'] == '--extract-only':
 513 |         return
 514 | 
 515 |     # ...otherwise continue with PDF generation.
 516 | 
 517 |     if options['verbose']=='--verbose' and options['verbositylevel']=='low':
 518 |         print('Producing the PDF from the '+str(c)+' individual HTML files...')
 519 | 
 520 |     htmldoc_opts = ''
 521 |     # print options
 522 |     for key in list(options):
 523 |         value = options[key]
 524 |         if not value == '':
 525 |             if   key=='bodycolor': htmldoc_opts += ' --bodycolor ' + value
 526 |             elif key=='bodyfont': htmldoc_opts += ' --bodyfont ' + value
 527 |             elif key=='bodyimage': htmldoc_opts += ' --bodyimage ' + value
 528 |             elif key=='book': htmldoc_opts += ' ' + value
 529 |             elif key=='bottom': htmldoc_opts += ' --bottom ' + value
 530 |             elif key=='browserwidth': htmldoc_opts += ' --browserwidth ' + value
 531 |             elif key=='charset': htmldoc_opts += ' --charset ' + value
 532 |             elif key=='color': htmldoc_opts += ' ' + value
 533 |             elif key=='compression': htmldoc_opts += ' --compression=' + value
 534 |             elif key=='continuous': htmldoc_opts += ' ' + value
 535 |             elif key=='cookies': htmldoc_opts += ' --cookies ' + value
 536 |             elif key=='datadir': htmldoc_opts += ' --datadir ' + value
 537 |             elif key=='duplex': htmldoc_opts += ' ' + value
 538 |             elif key=='effectduration': htmldoc_opts += ' --effectduration ' + value
 539 |             elif key=='embedfonts': htmldoc_opts += ' ' + value
 540 |             elif key=='encryption': htmldoc_opts += ' ' + value
 541 |             elif key=='firstpage': htmldoc_opts += ' --firstpage ' + value
 542 |             elif key=='fontsize': htmldoc_opts += ' --fontsize ' + value
 543 |             elif key=='fontspacing': htmldoc_opts += ' --fontspacing ' + value
 544 |             elif key=='footer': htmldoc_opts += ' --footer ' + value
 545 |             elif key=='format': htmldoc_opts += ' --format ' + value
 546 |             elif key=='gray': htmldoc_opts += ' ' + value
 547 |             elif key=='header': htmldoc_opts += ' --header ' + value
 548 |             elif key=='header1': htmldoc_opts += ' --header1 ' + value
 549 |             elif key=='headfootfont': htmldoc_opts += ' --headfootfont ' + value
 550 |             elif key=='headfootsize': htmldoc_opts += ' --headfootsize ' + value
 551 |             elif key=='headingfont': htmldoc_opts += ' --headingfont ' + value
 552 |             elif key=='help': htmldoc_opts += ' ' + value
 553 |             elif key=='hfimage0': htmldoc_opts += ' --hfimage0 ' + value
 554 |             elif key=='hfimage1': htmldoc_opts += ' --hfimage1 ' + value
 555 |             elif key=='hfimage2': htmldoc_opts += ' --hfimage2 ' + value
 556 |             elif key=='hfimage3': htmldoc_opts += ' --hfimage3 ' + value
 557 |             elif key=='hfimage4': htmldoc_opts += ' --hfimage4 ' + value
 558 |             elif key=='hfimage5': htmldoc_opts += ' --hfimage5 ' + value
 559 |             elif key=='hfimage6': htmldoc_opts += ' --hfimage6 ' + value
 560 |             elif key=='hfimage7': htmldoc_opts += ' --hfimage7 ' + value
 561 |             elif key=='hfimage8': htmldoc_opts += ' --hfimage8 ' + value
 562 |             elif key=='hfimage9': htmldoc_opts += ' --hfimage9 ' + value
 563 |             elif key=='jpeg': htmldoc_opts += ' --jpeg=' + value
 564 |             elif key=='landscape': htmldoc_opts += ' ' + value
 565 |             elif key=='left': htmldoc_opts += ' --left ' + value
 566 |             elif key=='linkcolor': htmldoc_opts += ' --linkcolor ' + value
 567 |             elif key=='links': htmldoc_opts += ' ' + value
 568 |             elif key=='linkstyle': htmldoc_opts += ' --linkstyle ' + value
 569 |             elif key=='logoimage': htmldoc_opts += ' --logoimage ' + value
 570 |             elif key=='logoimage': htmldoc_opts += ' --logoimage ' + value
 571 |             elif key=='no-compression': htmldoc_opts += ' ' + value
 572 |             elif key=='no-duplex': htmldoc_opts += ' ' + value
 573 |             elif key=='no-embedfonts': htmldoc_opts += ' ' + value
 574 |             elif key=='no-encryption': htmldoc_opts += ' ' + value
 575 |             elif key=='no-links': htmldoc_opts += ' ' + value
 576 |             elif key=='no-localfiles': htmldoc_opts += ' ' + value
 577 |             elif key=='no-numbered': htmldoc_opts += ' ' + value
 578 |             elif key=='no-overflow': htmldoc_opts += ' ' + value
 579 |             elif key=='no-strict': htmldoc_opts += ' ' + value
 580 |             elif key=='no-title': htmldoc_opts += ' ' + value
 581 |             elif key=='no-toc': htmldoc_opts += ' ' + value
 582 |             elif key=='numbered': htmldoc_opts += ' ' + value
 583 |             elif key=='nup': htmldoc_opts += ' --nup ' + value
 584 |             elif key=='outfile': htmldoc_opts += ' --outfile ' + value
 585 |             elif key=='overflow': htmldoc_opts += ' ' + value
 586 |             elif key=='owner-password': htmldoc_opts += ' --owner-password ' + value
 587 |             elif key=='pageduration': htmldoc_opts += ' --pageduration ' + value
 588 |             elif key=='pageeffect': htmldoc_opts += ' --pageeffect ' + value
 589 |             elif key=='pagelayout': htmldoc_opts += ' --pagelayout ' + value
 590 |             elif key=='pagemode': htmldoc_opts += ' --pagemode ' + value
 591 |             elif key=='path': htmldoc_opts += ' --path ' + value
 592 |             elif key=='permissions': htmldoc_opts += ' --permissions ' + value
 593 |             elif key=='portrait': htmldoc_opts += ' ' + value
 594 |             elif key=='quiet': htmldoc_opts += ' ' + value
 595 |             elif key=='right': htmldoc_opts += ' --right ' + value
 596 |             elif key=='size': htmldoc_opts += ' --size ' + value
 597 |             elif key=='strict': htmldoc_opts += ' ' + value
 598 |             elif key=='textcolor': htmldoc_opts += ' --textcolor ' + value
 599 |             elif key=='textfont': htmldoc_opts += ' --textfont ' + value
 600 |             elif key=='title': htmldoc_opts += ' ' + value
 601 |             elif key=='titlefile': htmldoc_opts += ' --titlefile ' + output_titlefile
 602 |             elif key=='titleimage': htmldoc_opts += ' --titleimage ' + value
 603 |             elif key=='tocfooter': htmldoc_opts += ' --tocfooter ' + value
 604 |             elif key=='tocheader': htmldoc_opts += ' --tocheader ' + value
 605 |             elif key=='toclevels': htmldoc_opts += ' --toclevels ' + value
 606 |             elif key=='toctitle': htmldoc_opts += ' --toctitle ' + value
 607 |             elif key=='top': htmldoc_opts += ' --top ' + value
 608 |             elif key=='user-password': htmldoc_opts += ' --user-password ' + value
 609 |             elif key=='version': htmldoc_opts += ' ' + value
 610 |             elif key=='webpage': htmldoc_opts += ' ' + value
 611 | 
 612 |     if options['verbose']=='--verbose' and options['verbositylevel']=='high':
 613 |         print('htmldoc' + htmldoc_opts + ' ' + htmlout_filename_list + " -f "+ outputfilename + " > /dev/null")
 614 |     exit_value=os.system ('htmldoc' + htmldoc_opts + ' ' + htmlout_filename_list + " -f "+ outputfilename + " > /dev/null")
 615 | 
 616 |     if exit_value != 0:
 617 |         print('Something wrong happened when launching htmldoc.')
 618 |         print('exit value: ',exit_value)
 619 |         print('Check if output exists or if it is good.')
 620 |     else:
 621 |         print('Written file ' + outputfilename)
 622 |     print('Done.')
 623 | 
 624 | def usage (name):
 625 |     print('Usage:')
 626 |     print("\t%s [options] input_filename [output_filename]" % name)
 627 |     print()
 628 |     print('Options:')
 629 |     print()
 630 |     print('\t--beautifulsoup\n\t\tCorrect the HTML code of the pages before converting them to PDF. The BeautifulSoup package (see http://www.crummy.com/software/BeautifulSoup/) needs to be installed.')
 631 |     print('\t--bodycolor color\n\t\tSpecifies the background color for all pages.')
 632 |     print('\t--bodyfont {courier,helvetica,monospace,sans,serif,times}. Default is times.')
 633 |     print('\t--bodyimage filename.{bmp,gif,jpg,png}')
 634 |     print('\t--book\n\t\tSpecifies that the HTML sources are structured (headings, chapters, etc.).')
 635 |     print('\t--bottom margin{in,cm,mm}\n\t\tSpecifies the bottom margin in points (no suffix or ##pt), inches  (##in),  centimeters  (##cm),  or millimeters (##mm).')
 636 |     print('\t--browserwidth pixels\n\t\tSee http://www.htmldoc.org/newsgroups.php?ghtmldoc.general+v:3465')
 637 |     print('\t--charset {cp-874...1258,iso-8859-1...8859-15,koi8-r}\n\t\tSpecifies the ISO character set to use for the output.')
 638 |     print('\t--color\n\t\tSpecifies that PDF output should be in color.')
 639 |     print('\t--compression[=level]\n\t\t')
 640 |     print('\t--continuous\n\t\tSpecifies  that  the  HTML  sources are unstructured (plain web pages).\n\t\tNo page breaks are inserted between each file or URL in the output.')
 641 |     print('\t--cookies \'name="value with space"; name=value\'\n\t\t')
 642 |     print('\t--datadir directory\n\t\tSpecifies the  location  of  the  HTMLDOC  data  files,  usually  /usr/share/htmldoc  or  C:\Program Files\HTMLDOC ')
 643 |     print("\t--dontextract \n\t\tIf given, %s will not extract the HTML files from the given CHM file, but will use previously extracted copies from the temporary directory " %name + '(i.e. ' + CHM2PDF_TEMP_ORIG_DIR + ' and ' + CHM2PDF_TEMP_WORK_DIR + '). Usually you will use this option after you have used the \'--extract-only\' option to extract the files in order to correct them manually (in ' + CHM2PDF_TEMP_WORK_DIR + '). After the correction, a call with \'--dontextract\' will not overwrite your changes, but will use the corrected files instead.')
 644 |     print('\t--duplex\n\t\tSpecifies that the output should be formatted for double-sided printing.')
 645 |     print('\t--effectduration {0.1..10.0}\n\t\tSpecifies the duration in seconds of PDF page transition effects.')
 646 |     print('\t--embedfonts\n\t\tSpecifies that fonts should be embedded in PDF output.')
 647 |     print('\t--encryption\n\t\tEnables encryption of PDF files.')
 648 |     print('\t--extract-only\n\t\tExtract the HTML files from the CHM file and stop.\n\t\tThe extracted files will be found in CHM2PDF_WORK_DIR/input_filename_without_extension.')
 649 |     print('\t--firstpage {p1,toc,c1}\n\t\t')
 650 |     print('\t--fontsize {4.0..24.0}\n\t\tSpecifies the default font size for body text.')
 651 |     print('\t--fontspacing {1.0..3.0}\n\t\tSpecifies  the  default  line  spacing  for body text.\n\t\tThe line spacing is a multiplier for the font size, so a value of 1.2 \n\t\twill provide an additional 20% of space between the lines.')
 652 |     print('\t--footer fff\n\t\t')
 653 |     print('\t{--format, -t} {pdf11,pdf12,pdf13,pdf14}\n\t\tSpecifies the output format: pdf11\n\t\tpdf11 (PDF 1.1/Acrobat 2.0), pdf12 (PDF 1.2/Acrobat 3.0), \n\t\tpdf or pdf13 (PDF  1.3/Acrobat  4.0),  or  pdf14 (PDF 1.4/Acrobat 5.0)')
 654 |     print('\t--gray\n\t\t')
 655 |     print('\t--header fff\n\t\t')
 656 |     print('\t--header1 fff\n\t\t')
 657 |     print('\t--headfootfont {courier{-bold,-oblique,-boldoblique}, \n\t\thelvetica{-bold,-oblique,-boldoblique}, \n\t\tmonospace{-bold,-oblique,-boldoblique}, \n\t\tsans{-bold,-oblique,-boldoblique}, \n\t\tserif{-bold,-italic,-bolditalic}, \n\t\ttimes{-roman,-bold,-italic,-bolditalic}} \n\t\t\tSets the font to use on headers and footers.')
 658 |     print('\t--headfootsize {6.0..24.0}\n\t\tSets the size of the font to use on headers and footers.')
 659 |     print('\t--headingfont {courier,helvetica,monospace,sans,serif,times}\n\t\tSets the typeface to use for headings.')
 660 |     print('\t--help\n\t\tDisplays a summary of command-line options.')
 661 |     print('\t--hfimage0 filename.{bmp,gif,jpg,png}\n\t\t ')
 662 |     print('\t--hfimage1 filename.{bmp,gif,jpg,png}\n\t\t ')
 663 |     print('\t--hfimage2 filename.{bmp,gif,jpg,png}\n\t\t ')
 664 |     print('\t--hfimage3 filename.{bmp,gif,jpg,png}\n\t\t ')
 665 |     print('\t--hfimage4 filename.{bmp,gif,jpg,png}\n\t\t ')
 666 |     print('\t--hfimage5 filename.{bmp,gif,jpg,png}\n\t\t ')
 667 |     print('\t--hfimage6 filename.{bmp,gif,jpg,png}\n\t\t ')
 668 |     print('\t--hfimage7 filename.{bmp,gif,jpg,png}\n\t\t ')
 669 |     print('\t--hfimage8 filename.{bmp,gif,jpg,png}\n\t\t ')
 670 |     print('\t--hfimage9 filename.{bmp,gif,jpg,png}\n\t\t ')
 671 |     print('\t--jpeg quality\n\t\tSets the JPEG compression level to use for large images. A value of 0 disables JPEG compression.')
 672 |     print('\t--landscape\n\t\t')
 673 |     print('\t--left margin{in,cm,mm}\n\t\tSpecifies the left margin in points (no suffix or ##pt), inches (##in), centimeters (##cm), or  millimeters (##mm).')
 674 |     print('\t--linkcolor color\n\t\tSets the color of links. You can use well-known color names like blue, or the usual #RRGGBB notation.')
 675 |     print('\t--links\n\t\tEnables generation of links in PDF files (default).')
 676 |     print('\t--linkstyle {plain,underline}\n\t\tSets the style of links.')
 677 |     print('\t--logoimage filename.{bmp,gif,jpg,png}\n\t\tSpecifies an image to be used as a logo in the header or footer in a PDF document.')
 678 |     print('\t--logoimage filename.{bmp,gif,jpg,png}\n\t\tNote that you need to use the --header and/or --footer options with the l parameter.')
 679 |     print('\t--no-compression\n\t\tDisables compression of PDF file.')
 680 |     print('\t--no-duplex\n\t\tDisables double-sided printing.')
 681 |     print('\t--no-embedfonts\n\t\tSpecifies that fonts should not be embedded in PDF and PostScript output.')
 682 |     print('\t--no-encryption\n\t\tDisables document encryption.')
 683 |     print('\t--no-links\n\t\tDisables generation of links in a PDF document. ')
 684 |     print('\t--no-localfiles\n\t\t')
 685 |     print('\t--no-numbered\n\t\tDisables automatic heading numbering.')
 686 |     print('\t--no-overflow\n\t\t')
 687 |     print('\t--no-strict\n\t\tDisables strict HTML input checking.')
 688 |     print('\t--no-title\n\t\tDisables generation of a title page.')
 689 |     print('\t--no-toc\n\t\tDisables generation of a table of contents.')
 690 |     print('\t--numbered\n\t\tNumbers all headings in a document.')
 691 |     print('\t--nup {1,2,4,6,9,16}\n\t\tSets  the  number of pages that are placed on each output page.  Valid values are 1, 2, 4, 6, 9, and 16.')
 692 |     print('\t{--outfile, -f} filename{.pdf}\n\t\tSpecifies the name of the output file. If no ending is given, ".pdf" is used.')
 693 |     print('\t--overflow\n\t\t')
 694 |     print('\t--owner-password password\n\t\tSets the owner password for encrypted PDF files.')
 695 |     print('\t--pageduration {1.0..60.0}\n\t\tSets the view duration of a page in a PDF document.')
 696 |     print('\t--pageeffect {none,bi,bo,d,gd,gdr,gr,hb,hsi,hso,vb,vsi,vso,wd,wl,wr,wu}\n\t\tSpecifies the page transition effect for all pages; this attribute is ignored by all Adobe PDF viewers.. ')
 697 |     print('\t--pagelayout {single,one,twoleft,tworight}\n\t\tSpecifies the initial layout of pages for a PDF file.')
 698 |     print('\t--pagemode {document,outline,fullscreen}\n\t\tSpecifies the initial viewing mode for a PDF file.')
 699 |     print('\t--path "dir1;dir2;dir3;...;dirN"\n\t\tSpecifies a search path for files in a document.')
 700 |     print('\t--permissions {all,annotate,copy,modify,print,no-annotate,no-copy,no-modify,no-print,none}\n\t\tSpecifies document permissions for encrypted PDF files. Separate multiple permissions with commas. ')
 701 |     print('\t--portrait\n\t\t')
 702 |     print('\t--prefontsize {4.0..24.0, small, normal, big, huge, +1, +2, -1, -2,...whatever}\n\t\tSpecifies the font size for text inside <pre> tags. Whatever you pass here will be used as font size inside preformatted pieces of text (usually code snippets, console output, program messages...). Effective only together with the \'--beautifulsoup\' option.')
 703 |     print('\t--quiet\n\t\tSuppresses all messages, even error messages.')
 704 |     print('\t--right margin{in,cm,mm}\n\t\tSpecifies the right margin in points (no suffix or ##pt), inches (##in), centimeters (##cm), or millimeters (##mm).')
 705 |     print('\t--size {letter,a4,WxH{in,cm,mm},etc}\n\t\tSpecifies the page size using a standard name or in points (no suffix or ##x##pt), inches (##x##in),\n\t\tcentimeters (##x##cm), or millimeters (##x##mm). The standard sizes that  are  currently  recognized\n\t\tare "letter" (8.5x11in), "legal" (8.5x14in), "a4" (210x297mm), and "universal" (8.27x11in).')
 706 |     print('\t--strict\n\t\tEnables strict HTML input checking.')
 707 |     print('\t--textcolor color\n\t\tSpecifies the default color of all text.')
 708 |     print('\t--textfont {courier,helvetica,monospace,sans,serif,times}\n\t\t')
 709 |     print('\t--title\n\t\tEnables the generation of a title page.')
 710 |     print('\t--titlefile filename.{htm,html,shtml}\n\t\tSpecifies  the  file to use for the title page. If the file is an image then the title page\n\t\tis automatically generated using the document meta data and image title.')
 711 |     print('\t--titleimage filename.{bmp,gif,jpg,png}\n\t\tSpecifies  the  image to use for the title page. The title page is automatically \n\t\tgenerated using the document meta data and title image.')
 712 |     print('\t--tocfooter fff\n\t\tSets the page footer to use on table-of-contents pages. See below for the format of fff.')
 713 |     print('\t--tocheader fff\n\t\tSets the page header to use on table-of-contents pages. See below for the format of fff.')
 714 |     print('\t--toclevels levels\n\t\tSets the number of levels in the table-of-contents.')
 715 |     print('\t--toctitle string\n\t\tSets the title for the table-of-contents.')
 716 |     print('\t--top margin{in,cm,mm}\n\t\tSpecifies the top margin in points (no suffix or ##pt), inches (##in), centimeters (##cm),  or  millimeters (##mm).')
 717 |     print('\t--user-password password\n\t\tSpecifies the user password for encryption of PDF files.')
 718 |     print('\t--verbose\n\t\tDisplays various messages indicating the progress of the conversion process. The verbosity depends on --verbositylevel. If --verbose is not set, almost no messages will be printed. The default is \'not set\', so if you want the minimum amount of messages, just don\' use this option.')
 719 |     print('\t--verbositylevel\n\t\tSpecifies the verbosity level of the messages. Can be either \'low\' or \'high\'. Default is low. The --verbose option must be set, otherwise this option will have no effect.')
 720 |     print('\t--version\n\t\tDisplays the current version number.')
 721 |     print('\t--webpage\n\t\tSpecifies  that  the  HTML  sources  are  unstructured  (plain web pages).\n\t\tA page break is inserted between each file or URL in the output.')
 722 |     print()
 723 |     print('\tfff\n\t\tHeading format string; each \'f\' can be one of:')
 724 |     print()
 725 |     print('\t\t\t. = blank')
 726 |     print('\t\t\t/ = n/N arabic page numbers (1/3, 2/3, 3/3)')
 727 |     print('\t\t\t: = c/C arabic chapter page numbers (1/2, 2/2, 1/4, 2/4, ...)')
 728 |     print('\t\t\t1 = arabic numbers (1, 2, 3, ...)')
 729 |     print('\t\t\ta = lowercase letters')
 730 |     print('\t\t\tA = uppercase letters')
 731 |     print('\t\t\tc = current chapter heading')
 732 |     print('\t\t\tC = current chapter page number (arabic)')
 733 |     print('\t\t\td = current date')
 734 |     print('\t\t\tD = current date and time')
 735 |     print('\t\t\th = current heading')
 736 |     print('\t\t\ti = lowercase roman numerals')
 737 |     print('\t\t\tI = uppercase roman numerals')
 738 |     print('\t\t\tl = logo image')
 739 |     print('\t\t\tt = title text')
 740 |     print('\t\t\tT = current time')
 741 |     print()
 742 |     print('### See also: http://www.karakas-online.de/forum/viewtopic.php?t=10275')
 743 | 
 744 | def split(path):
 745 |     if path[-1] == os.sep :
 746 |         path, fname = path[:-1], ''
 747 |     else:
 748 |         path, fname = os.path.split( path )
 749 |         fname, ext = os.path.splitext( fname )
 750 |     return ( path, fname, ext[1:] )
 751 | 
 752 | 
 753 | 
 754 | def main(argv):
 755 | 
 756 |     global CHM2PDF_WORK_DIR
 757 |     global CHM2PDF_ORIG_DIR
 758 | 
 759 |     # Defaults
 760 |     options={}
 761 |     options['beautifulsoup'] = ''
 762 |     options['bodycolor'] = ''
 763 |     options['bodyfont'] = 'times'
 764 |     options['bodyimage'] = ''
 765 |     options['book'] = ''
 766 |     options['bottom'] = ''
 767 |     options['browserwidth'] = ''
 768 |     options['charset'] = ''
 769 |     options['color'] = ''
 770 |     options['compression'] = ''
 771 |     options['continuous'] = ''
 772 |     options['cookies'] = ''
 773 |     options['datadir'] = ''
 774 |     options['dontextract'] = ''
 775 |     options['duplex'] = '--duplex'
 776 |     options['effectduration'] = ''
 777 |     options['embedfonts'] = '--embedfonts'
 778 |     options['encryption'] = ''
 779 |     options['extract-only'] = ''
 780 |     options['firstpage'] = ''
 781 |     options['fontsize'] = ''
 782 |     options['fontspacing'] = ''
 783 |     options['footer'] = '\'c C\''
 784 |     options['format'] = '\'pdf14\''
 785 |     options['gray'] = ''
 786 |     options['header'] = '\'c C\''
 787 |     options['header1'] = ''
 788 |     options['headfootfont'] = ''
 789 |     options['headfootsize'] = ''
 790 |     options['headingfont'] = ''
 791 |     options['help'] = ''
 792 |     options['hfimage0'] = ''
 793 |     options['hfimage1'] = ''
 794 |     options['hfimage2'] = ''
 795 |     options['hfimage3'] = ''
 796 |     options['hfimage4'] = ''
 797 |     options['hfimage5'] = ''
 798 |     options['hfimage6'] = ''
 799 |     options['hfimage7'] = ''
 800 |     options['hfimage8'] = ''
 801 |     options['hfimage9'] = ''
 802 |     options['jpeg'] = '\'100\''
 803 |     options['landscape'] = ''
 804 |     options['left'] = ''
 805 |     options['linkcolor'] = '\'blue\''
 806 |     options['links'] = ''
 807 |     options['linkstyle'] = '\'plain\''
 808 |     options['logoimage'] = ''
 809 |     options['logoimage'] = ''
 810 |     options['no-compression'] = ''
 811 |     options['no-duplex'] = ''
 812 |     options['no-embedfonts'] = ''
 813 |     options['no-encryption'] = ''
 814 |     options['no-links'] = ''
 815 |     options['no-localfiles'] = ''
 816 |     options['no-numbered'] = ''
 817 |     options['no-overflow'] = ''
 818 |     options['no-strict'] = ''
 819 |     options['no-title'] = ''
 820 |     options['no-toc'] = ''
 821 |     options['numbered'] = ''
 822 |     options['nup'] = ''
 823 |     options['outfile'] = ''
 824 |     options['overflow'] = ''
 825 |     options['owner-password'] = ''
 826 |     options['pageduration'] = ''
 827 |     options['pageeffect'] = ''
 828 |     options['pagelayout'] = ''
 829 |     options['pagemode'] = ''
 830 |     options['path'] = ''
 831 |     options['permissions'] = ''
 832 |     options['portrait'] = ''
 833 |     options['prefontsize'] = ''
 834 |     options['quiet'] = ''
 835 |     options['right'] = ''
 836 |     options['size'] = '\'a4\''
 837 |     options['strict'] = ''
 838 |     options['textcolor'] = ''
 839 |     options['textfont'] = ''
 840 |     options['title'] = ''
 841 |     options['titlefile'] = ''
 842 |     options['titleimage'] = ''
 843 |     options['tocfooter'] = ''
 844 |     options['tocheader'] = ''
 845 |     options['toclevels'] = ''
 846 |     options['toctitle'] = ''
 847 |     options['top'] = ''
 848 |     options['user-password'] = ''
 849 |     options['verbose'] = ''
 850 |     options['verbositylevel'] = 'low'
 851 |     options['version'] = ''
 852 |     options['webpage'] = ''
 853 | 
 854 |     try:
 855 |         opts, args = getopt.getopt(sys.argv[1:], "f:t:v:",
 856 |                      [
 857 |                       "beautifulsoup",
 858 |                       "bodycolor=",
 859 |                       "bodyfont=",
 860 |                       "bodyimage=",
 861 |                       "book",
 862 |                       "bottom=",
 863 |                       "browserwidth=",
 864 |                       "charset=",
 865 |                       "color",
 866 |                       "compression=",
 867 |                       "continuous",
 868 |                       "cookies=",
 869 |                       "datadir=",
 870 |                       "dontextract",
 871 |                       "duplex",
 872 |                       "effectduration=",
 873 |                       "embedfonts",
 874 |                       "encryption",
 875 |                       "extract-only",
 876 |                       "firstpage=",
 877 |                       "fontsize=",
 878 |                       "fontspacing=",
 879 |                       "footer=",
 880 |                       "format=",
 881 |                       "gray",
 882 |                       "header=",
 883 |                       "header1=",
 884 |                       "headfootfont=",
 885 |                       "headfootsize=",
 886 |                       "headingfont=",
 887 |                       "help",
 888 |                       "hfimage0=",
 889 |                       "hfimage1=",
 890 |                       "hfimage2=",
 891 |                       "hfimage3=",
 892 |                       "hfimage4=",
 893 |                       "hfimage5=",
 894 |                       "hfimage6=",
 895 |                       "hfimage7=",
 896 |                       "hfimage8=",
 897 |                       "hfimage9=",
 898 |                       "jpeg=",
 899 |                       "landscape",
 900 |                       "left=",
 901 |                       "linkcolor=",
 902 |                       "links",
 903 |                       "linkstyle=",
 904 |                       "logoimage=",
 905 |                       "logoimage=",
 906 |                       "no-compression",
 907 |                       "no-duplex",
 908 |                       "no-embedfonts",
 909 |                       "no-encryption",
 910 |                       "no-links",
 911 |                       "no-localfiles",
 912 |                       "no-numbered",
 913 |                       "no-overflow",
 914 |                       "no-strict",
 915 |                       "no-title",
 916 |                       "no-toc",
 917 |                       "numbered",
 918 |                       "nup=",
 919 |                       "outfile=",
 920 |                       "overflow",
 921 |                       "owner-password=",
 922 |                       "pageduration=",
 923 |                       "pageeffect=",
 924 |                       "pagelayout=",
 925 |                       "pagemode=",
 926 |                       "path=",
 927 |                       "permissions=",
 928 |                       "portrait",
 929 |                       "prefontsize=",
 930 |                       "quiet",
 931 |                       "right=",
 932 |                       "size=",
 933 |                       "strict",
 934 |                       "textcolor=",
 935 |                       "textfont=",
 936 |                       "title",
 937 |                       "titlefile=",
 938 |                       "titleimage=",
 939 |                       "tocfooter=",
 940 |                       "tocheader=",
 941 |                       "toclevels=",
 942 |                       "toctitle=",
 943 |                       "top=",
 944 |                       "user-password=",
 945 |                       "verbose",
 946 |                       "verbositylevel=",
 947 |                       "version",
 948 |                       "webpage"
 949 |                      ])
 950 |     except getopt.GetoptError:
 951 |         usage(sys.argv[0])
 952 |         sys.exit(1)
 953 | 
 954 |     for o, a in opts:
 955 |         if   o == '--beautifulsoup': options['beautifulsoup'] = '--beautifulsoup'
 956 |         elif o == '--bodycolor': options['bodycolor'] = a
 957 |         elif o == '--bodyfont': options['bodyfont'] = a
 958 |         elif o == '--bodyimage': options['bodyimage'] = a
 959 |         elif o == '--book': options['book'] = '--book'
 960 |         elif o == '--bottom': options['bottom'] = a
 961 |         elif o == '--browserwidth': options['browserwidth'] = a
 962 |         elif o == '--charset': options['charset'] = a
 963 |         elif o == '--color': options['color'] = '--color'
 964 |         elif o == '--compression': options['compression'] = a
 965 |         elif o == '--continuous': options['continuous'] = '--continuous'
 966 |         elif o == '--cookies': options['cookies'] = a
 967 |         elif o == '--datadir': options['datadir'] = a
 968 |         elif o == '--dontextract': options['dontextract'] = '--dontextract'
 969 |         elif o == '--duplex': options['duplex'] = '--duplex'
 970 |         elif o == '--effectduration': options['effectduration'] = a
 971 |         elif o == '--embedfonts': options['embedfonts'] = '--embedfonts'
 972 |         elif o == '--encryption': options['encryption'] = '--encryption'
 973 |         elif o == '--extract-only': options['extract-only'] = '--extract-only'
 974 |         elif o == '--firstpage': options['firstpage'] = a
 975 |         elif o == '--fontsize': options['fontsize'] = a
 976 |         elif o == '--fontspacing': options['fontspacing'] = a
 977 |         elif o == '--footer': options['footer'] = a
 978 |         elif o in ('-t', '--format'): options['format'] = a
 979 |         elif o == '--gray': options['gray'] = '--gray'
 980 |         elif o == '--header': options['header'] = a
 981 |         elif o == '--header1': options['header1'] = a
 982 |         elif o == '--headfootfont': options['headfootfont'] = a
 983 |         elif o == '--headfootsize': options['headfootsize'] = a
 984 |         elif o == '--headingfont': options['headingfont'] = a
 985 |         elif o == '--help': options['help'] = '--help'
 986 |         elif o == '--hfimage0': options['hfimage0'] = a
 987 |         elif o == '--hfimage1': options['hfimage1'] = a
 988 |         elif o == '--hfimage2': options['hfimage2'] = a
 989 |         elif o == '--hfimage3': options['hfimage3'] = a
 990 |         elif o == '--hfimage4': options['hfimage4'] = a
 991 |         elif o == '--hfimage5': options['hfimage5'] = a
 992 |         elif o == '--hfimage6': options['hfimage6'] = a
 993 |         elif o == '--hfimage7': options['hfimage7'] = a
 994 |         elif o == '--hfimage8': options['hfimage8'] = a
 995 |         elif o == '--hfimage9': options['hfimage9'] = a
 996 |         elif o == '--jpeg': options['jpeg'] = a
 997 |         elif o == '--landscape': options['landscape'] = '--landscape'
 998 |         elif o == '--left': options['left'] = a
 999 |         elif o == '--linkcolor': options['linkcolor'] = a
1000 |         elif o == '--links': options['links'] = '--links'
1001 |         elif o == '--linkstyle': options['linkstyle'] = a
1002 |         elif o == '--logoimage': options['logoimage'] = a
1003 |         elif o == '--logoimage': options['logoimage'] = a
1004 |         elif o == '--no-compression': options['no-compression'] = '--no-compression'
1005 |         elif o == '--no-duplex': options['no-duplex'] = '--no-duplex'
1006 |         elif o == '--no-embedfonts': options['no-embedfonts'] = '--no-embedfonts'
1007 |         elif o == '--no-encryption': options['no-encryption'] = '--no-encryption'
1008 |         elif o == '--no-links': options['no-links'] = '--no-links'
1009 |         elif o == '--no-localfiles': options['no-localfiles'] = '--no-localfiles'
1010 |         elif o == '--no-numbered': options['no-numbered'] = '--no-numbered'
1011 |         elif o == '--no-overflow': options['no-overflow'] = '--no-overflow'
1012 |         elif o == '--no-strict': options['no-strict'] = '--no-strict'
1013 |         elif o == '--no-title': options['no-title'] = '--no-title'
1014 |         elif o == '--no-toc': options['no-toc'] = '--no-toc'
1015 |         elif o == '--numbered': options['numbered'] = '--numbered'
1016 |         elif o == '--nup': options['nup'] = a
1017 |         elif o in ('-f', '--outfile'): options['outfile'] = a
1018 |         elif o == '--overflow': options['overflow'] = '--overflow'
1019 |         elif o == '--owner-password': options['owner-password'] = a
1020 |         elif o == '--pageduration': options['pageduration'] = a
1021 |         elif o == '--pageeffect': options['pageeffect'] = a
1022 |         elif o == '--pagelayout': options['pagelayout'] = a
1023 |         elif o == '--pagemode': options['pagemode'] = a
1024 |         elif o == '--path': options['path'] = a
1025 |         elif o == '--permissions': options['permissions'] = a
1026 |         elif o == '--portrait': options['portrait'] = '--portrait'
1027 |         elif o == '--prefontsize': options['prefontsize'] = a
1028 |         elif o == '--quiet': options['quiet'] = '--quiet'
1029 |         elif o == '--right': options['right'] = a
1030 |         elif o == '--size': options['size'] = a
1031 |         elif o == '--strict': options['strict'] = '--strict'
1032 |         elif o == '--textcolor': options['textcolor'] = a
1033 |         elif o == '--textfont': options['textfont'] = a
1034 |         elif o == '--title': options['title'] = '--title'
1035 |         elif o == '--titlefile': options['titlefile'] = a
1036 |         elif o == '--titleimage': options['titleimage'] = a
1037 |         elif o == '--tocfooter': options['tocfooter'] = a
1038 |         elif o == '--tocheader': options['tocheader'] = a
1039 |         elif o == '--toclevels': options['toclevels'] = a
1040 |         elif o == '--toctitle': options['toctitle'] = a
1041 |         elif o == '--top': options['top'] = a
1042 |         elif o == '--user-password': options['user-password'] = a
1043 |         elif o in ('-v', '--verbose'): options['verbose'] = '--verbose'
1044 |         elif o == '--verbositylevel': options['verbositylevel'] = a
1045 |         elif o == '--version':
1046 |             print(sys.argv[0] + ' version ' + version)
1047 |             print('This is free software; see the source for copying conditions.  There is NO')
1048 |             print('warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.')
1049 |             return
1050 |         elif o == '--webpage': options['webpage'] = '--webpage'
1051 | 
1052 |     # Option validation checks
1053 |     #
1054 |     # Only one of '--extract-only' and '--dontextract' may be given!
1055 |     if options['dontextract'] == '--dontextract' and options['extract-only'] == '--extract-only':
1056 |         usage(sys.argv[0])
1057 |         print()
1058 |         print('### Either \'--dontextract\' or \'extract-only\' may be given!')
1059 |         print('### Only one of the two options can be present, not both!')
1060 |         print('### See above or try \'' + sys.argv[0] + ' --help | less\' to view the help contents in less.')
1061 |         return
1062 |     #
1063 |     # One of '--book' or '--webpage' MUST be given!
1064 |     if options['extract-only'] == '' and ((options['book'] == '' and options['webpage'] == '' and options['continuous'] == '') or
1065 |                                           (options['book'] == '--book' and options['webpage'] == '--webpage') or
1066 |                                           (options['book'] == '--book' and options['continuous'] == '--continuous') or
1067 |                                           (options['webpage'] == '--webpage' and options['continuous'] == '--continuous')):
1068 |         usage(sys.argv[0])
1069 |         print()
1070 |         print('### Either \'--book\' or \'--webpage\' or \'--continuous\' MUST be given!')
1071 |         print('### Only one of the three options can be present, not two, or even all three of them!')
1072 |         print('### See above or try \'' + sys.argv[0] + ' --help | less\' to view the help contents in less.')
1073 |         return
1074 | 
1075 |     if len(args)==0:
1076 |         usage(sys.argv[0])
1077 |         return
1078 |     elif len(args)==1:
1079 |         filename = args[0]
1080 |         dirname, basename, suffix = split(filename)
1081 |         if dirname:
1082 |             outputfilename = dirname + os.sep + basename +'.pdf'
1083 |         else:
1084 |             outputfilename = dirname + basename +'.pdf'
1085 |         # print 'outputfilename = ' + outputfilename
1086 | 
1087 |     elif len(args)==2:
1088 |         filename = args[0]
1089 |         dirname, basename, suffix = split(filename)
1090 |         outputfilename = args[1]
1091 |         # print 'outputfilename = ' + outputfilename
1092 |     else:
1093 |         usage(sys.argv[0])
1094 |         return
1095 | 
1096 |     CHM2PDF_WORK_DIR = CHM2PDF_TEMP_WORK_DIR + os.sep + basename
1097 |     CHM2PDF_ORIG_DIR = CHM2PDF_TEMP_ORIG_DIR + os.sep + basename
1098 | 
1099 |     if options['verbose']=='--verbose' and options['verbositylevel']=='low':
1100 |         print('CHM2PDF_WORK_DIR = ' + CHM2PDF_WORK_DIR)
1101 |         print('CHM2PDF_ORIG_DIR = ' + CHM2PDF_ORIG_DIR)
1102 | 
1103 |     if not os.path.exists(filename):
1104 |         print('CHM file "' + filename + '" not found!')
1105 |         return
1106 | 
1107 |     #remove temporary files
1108 |     if options['dontextract'] == '':
1109 |         if options['verbose']=='--verbose' and options['verbositylevel']=='high':
1110 |             print('Removing any previous temporary files...')
1111 |         os.system('rm -r '+CHM2PDF_ORIG_DIR+'/*')
1112 |         os.system('rm -r '+CHM2PDF_WORK_DIR+'/*')
1113 | 
1114 |     cfile = chm.CHMFile()
1115 |     cfile.LoadCHM(filename)
1116 | 
1117 |     if options['dontextract'] == '--dontextract':
1118 |         if options['verbose'] == '--verbose':
1119 |             print('\'--dontextract\' option was given. No files will be extracted from CHM.')
1120 |             print('Will use the files in ' + CHM2PDF_ORIG_DIR + ' and ' + CHM2PDF_WORK_DIR + '.')
1121 |     else:
1122 |         if options['verbose'] == '--verbose' and options['verbositylevel'] == 'high':
1123 |             os.system('extract_chmLib ' + filename + ' ' + CHM2PDF_ORIG_DIR)
1124 |         else:
1125 |             os.system('extract_chmLib ' + filename + ' ' + CHM2PDF_ORIG_DIR + '&> /dev/null')
1126 | 
1127 |     convert_to_pdf(cfile, filename, outputfilename, options)
1128 | 
1129 | 
1130 | if __name__ == '__main__':
1131 |     main(sys.argv)
1132 | 


--------------------------------------------------------------------------------