├── README.md └── doc2pdf.py /README.md: -------------------------------------------------------------------------------- 1 | # doc2pdf 2 | --- 3 | This is a very dirty code about convert Windows doc files to pdf files. It's my first time to write python code. Fortunately, it works fine. 4 | 5 | It's implemented by Windows Office COM interfaces, and call COM interfaces by **Python 6 | for Windows Extensions**. You can get it from 7 | [**HERE**](http://sourceforge.net/projects/pywin32/files%2Fpywin32). 8 | 9 | This program accepts a directory path, and convert all the .doc files in that path 10 | to .pdf file format. It works recursively. So it will covert all doc files in 11 | its sub-directorys. 12 | ``` bash 13 | $ doc2pdf.py [source-directory-path] 14 | 15 | $ doc2pdf.py [source-doc-file-path] 16 | 17 | $ doc2pdf.py [source-doc-file-path] [target-pdf-file-path] 18 | ``` 19 | 20 | I complete these code by learning some guys' works. The program contains some codes from 21 | [**HERE**](http://blog.csdn.net/rumswell/article/details/7434302). Thanks for 22 | your blog article and source code. 23 | -------------------------------------------------------------------------------- /doc2pdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- coding:utf-8 -*- 3 | 4 | # doc2pdf.py: python script to convert doc to pdf with bookmarks! 5 | # Requires Office 2007 SP2 6 | # Requires python for win32 extension 7 | 8 | 9 | import sys, os 10 | from win32com.client import Dispatch, constants, gencache 11 | 12 | app = Dispatch("Word.Application") 13 | # set UI un-visible, no warning 14 | app.Visible = False 15 | app.DisplayAlerts = False 16 | 17 | # convert .doc file to .pdf file 18 | def doc2pdf(input, output): 19 | #w = Dispatch("Word.Application") 20 | global app 21 | try: 22 | doc = app.Documents.Open(input, ReadOnly = 1) 23 | doc.ExportAsFixedFormat(output, constants.wdExportFormatPDF, 24 | Item = constants.wdExportDocumentWithMarkup, CreateBookmarks = constants.wdExportCreateHeadingBookmarks) 25 | return 0 26 | except: 27 | return 1 28 | finally: 29 | app.Documents.Close(constants.wdDoNotSaveChanges) 30 | 31 | # Generate all the support we can. 32 | def GenerateSupport(): 33 | # enable python COM support for Word 2007 34 | # this is generated by: makepy.py -i "Microsoft Word 12.0 Object Library" 35 | gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4) 36 | 37 | # convert files in directory and sub-directories 38 | def walk_directory(directory): 39 | total = 0 40 | for root, dirs, files in os.walk(directory): 41 | count = 0 42 | # make directory 43 | pdf_file_dir = os.path.join(root, "doc2pdf") 44 | if not "doc2pdf" in dirs: 45 | os.mkdir(pdf_file_dir) 46 | for name in files: 47 | if name.split('.')[-1] == "doc": 48 | pdf_file = os.path.join(pdf_file_dir, name.split('.')[0]+".pdf") 49 | doc_file = os.path.join(root, name) 50 | if not doc2pdf(doc_file, pdf_file): 51 | count = count + 1 52 | if count == 0: 53 | os.rmdir(pdf_file_dir) 54 | else: 55 | total = total + count 56 | return total 57 | 58 | # convert a exact file and the target path is optional 59 | # the dafault target path is in the same directory with source .doc file 60 | def convert_one_file(filepath, target=None): 61 | if not os.path.isabs(filepath): 62 | path = os.path.abspath(filepath) 63 | else: 64 | path = filepath 65 | if target: 66 | if not os.path.isabs(target): 67 | target = os.path.abspath(target) 68 | return doc2pdf(path, target) 69 | pdf_file = os.path.splitext(path)[0] + ".pdf" 70 | return doc2pdf(path, pdf_file) 71 | 72 | def main(): 73 | GenerateSupport() 74 | if len(sys.argv) == 2: 75 | if os.path.isdir(sys.argv[1]): 76 | return walk_directory(sys.argv[1]) 77 | elif os.path.isfile(sys.argv[1]): 78 | return convert_one_file(sys.argv[1]) 79 | elif len(sys.argv) == 3 and os.path.isfile(sys.argv[1]): 80 | return convert_one_file(sys.argv[1], sys.argv[2]) 81 | 82 | if __name__=='__main__': 83 | rc = main() 84 | app.Quit(constants.wdDoNotSaveChanges) 85 | sys.exit(rc) 86 | --------------------------------------------------------------------------------