├── README.md
└── html2dash.py


/README.md:
--------------------------------------------------------------------------------
 1 | # html2Dash
 2 | 
 3 | html2Dash is an Documentation Set generator intended to be used with the [Dash.app](http://kapeli.com/dash/) API browser for OS X or one of its many clones. html2Dash is just like [doc2dash](https://github.com/hynek/doc2dash) but generating docset from any HTML documentations.
 4 | 
 5 | If you’ve never heard of Dash.app, you’re missing out: together with html2Dash it’s all your API documentation at your fingertips!
 6 | 
 7 | Third part library required:
 8 |    
 9 |     beautifulsoup4==4.3.2
10 | 
11 | It’s tested on Python 2.7, OS X 10.9.
12 | 
13 | # How to Use
14 | 
15 | The usage is as simple as:
16 | 
17 | 	$ html2Dash <htmldir>
18 | 
19 | html2dash will create a new directory called `<htmldir>.docset` in `~/Library/Application Support/html2dash/DocSets` containing a Dash.app-compatible docset. When finished, the docset is automatically added to Dash.app.
20 | 
21 | **Options and Arguments**
22 | 
23 | The full usage is:
24 | 
25 | 	$ doc2dash [OPTIONS] SOURCE  
26 | 
27 | The `SOURCE` is a directory containing the HTML documents you would like to convert.
28 | 
29 | Valid `OPTIONS` are the following:
30 | 
31 | * -n, --name  
32 | 
33 | 	Name the docset explicitly instead of letting doc2dash guess the correct name from the directory name of the source.
34 | 
35 | * -d PATH, --destination PATH  
36 | 
37 | 	Put the resulting docset into PATH. Default is the directory `~/Library/Application Support/html2dash/DocSets` 
38 | 
39 | * -i FILENAME, --icon FILENAME
40 | 
41 | 	Add PNG icon FILENAME to docset that is used within Dash.app to represent the docset.
42 | 	
43 | * -p INDEX_PAGE, --index-page INDEX_PAGE
44 | 	
45 | 	Set the file that is shown when the docset is clicked within Dash.app.
46 | 	
47 | * -h, --help
48 | 
49 | 	Show a brief usage summary and exit.
50 | 
51 | DEPENDENCIES:  
52 | 	
53 | * BeautifulSoup HTML parsing library
54 | 
55 | # Demo
56 | 
57 | Generate the Docset for requests: `requests.docset`. Command：
58 | 
59 |     $ ./html2dash.py -n requests -i ~/Documents/requests-sidebar.png ~/Documents/requests  
60 |     Create the Docset Folder!  
61 |     Copy the HTML Documentation!  
62 |     Create the SQLite Index    
63 |     Create the Info.plist File  
64 |     Create the Icon for the Docset!  
65 |     Generate Docset Successfully!  
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/html2dash.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import argparse
  5 | import os
  6 | import re
  7 | import sqlite3
  8 | import subprocess
  9 | import shutil
 10 | 
 11 | from bs4 import BeautifulSoup
 12 | 
 13 | 
 14 | def update_db(name, path):
 15 |     try:
 16 |         cur.execute("SELECT rowid FROM searchIndex WHERE path = ?", (path,))
 17 |         dbpath = cur.fetchone()
 18 |         cur.execute("SELECT rowid FROM searchIndex WHERE name = ?", (name,))
 19 |         dbname = cur.fetchone()
 20 | 
 21 |         if dbpath is None and dbname is None:
 22 |             cur.execute('INSERT OR IGNORE INTO searchIndex(name, type, path)\
 23 |                     VALUES (?,?,?)', (name, "Section", path))
 24 |         else:
 25 |             pass
 26 |     except:
 27 |         pass
 28 | 
 29 | 
 30 | def add_urls():
 31 |     index_page = open(os.path.join(docset_path, 'index.html'), encoding="utf-8").read()
 32 |     soup = BeautifulSoup(index_page, "html.parser")
 33 |     any = re.compile('.*')
 34 |     for tag in soup.find_all('a', {'href': any}):
 35 |         name = tag.text.strip()
 36 |         if len(name) > 0:
 37 |             path = tag.attrs['href'].strip()
 38 |             if path.split('#')[0] not in ('index.html'):
 39 |                 update_db(name, path)
 40 | 
 41 | 
 42 | def add_infoplist(info_path, index_page):
 43 |     name = docset_name.split('.')[0]
 44 |     info = """
 45 |     <?xml version="1.0" encoding="UTF-8"?>
 46 |     <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 47 |     <plist version="1.0">
 48 |     <dict>
 49 |             <key>CFBundleIdentifier</key>
 50 |             <string>{0}</string>
 51 |             <key>CFBundleName</key>
 52 |             <string>{1}</string>
 53 |             <key>DashDocSetFamily</key>
 54 |             <string>{2}</string>
 55 |             <key>DocSetPlatformFamily</key>
 56 |             <string>requests</string>
 57 |             <key>isDashDocset</key>
 58 |             <true/>
 59 |             <key>isJavaScriptEnabled</key>
 60 |             <true/>
 61 |             <key>dashIndexFilePath</key>
 62 |             <string>{3}</string>
 63 |     </dict>
 64 |     </plist>
 65 |     """.format(name, name, name, index_page)
 66 | 
 67 |     try:
 68 |         open(info_path, 'w', encoding="utf-8").write(info)
 69 |         print("Create the Info.plist File")
 70 |     except Exception as  e:
 71 |         print("**Error**:  Create the Info.plist File Failed...")
 72 |         print(e)
 73 |         clear_trash()
 74 |         exit(2)
 75 | 
 76 | 
 77 | def clear_trash():
 78 |     try:
 79 |         shutil.rmtree(docset_name)
 80 |         # subprocess.call(["rm", "-r", docset_name])
 81 |         print("Clear generated useless files!")
 82 |     except:
 83 |         print("**Error**:  Clear trash failed...")
 84 | 
 85 | 
 86 | if __name__ == "__main__":
 87 | 
 88 |     parser = argparse.ArgumentParser()
 89 | 
 90 |     parser.add_argument('-n', '--name',
 91 |                         help='Name the docset explicitly')
 92 |     parser.add_argument('-d', '--destination',
 93 |                         dest='path',
 94 |                         default='',
 95 |                         help='Put the resulting docset into PATH')
 96 |     parser.add_argument('-i', '--icon',
 97 |                         dest='filename',
 98 |                         help='Add PNG icon FILENAME to docset')
 99 |     parser.add_argument('-p', '--index-page',
100 |                         help='Set the file that is shown')
101 |     parser.add_argument('SOURCE',
102 |                         help='Directory containing the HTML documents')
103 | 
104 |     results = parser.parse_args()
105 | 
106 |     source_dir = results.SOURCE
107 |     if source_dir[-1] == "/":
108 |         source_dir = results.SOURCE[:-1]
109 | 
110 |     if not os.path.exists(source_dir):
111 |         print(source_dir + " does not exsit!")
112 |         exit(2)
113 | 
114 |     dir_name = os.path.basename(source_dir)
115 |     if not results.name:
116 |         docset_name = dir_name + ".docset"
117 |     else:
118 |         docset_name = results.name + ".docset"
119 | 
120 |     # create docset directory and copy files
121 |     doc_path = docset_name + "/Contents/Resources/Documents"
122 |     dsidx_path = docset_name + "/Contents/Resources/docSet.dsidx"
123 |     icon = docset_name + "/icon.png"
124 |     info = docset_name + "/Contents/info.plist"
125 | 
126 |     destpath = results.path
127 |     if results.path and results.path[-1] != "/":
128 |         destpath += "/"
129 |     docset_path = destpath + doc_path
130 |     sqlite_path = destpath + dsidx_path
131 |     info_path = destpath + info
132 |     icon_path = destpath + icon
133 | 
134 |     # printdocset_path, sqlite_path
135 | 
136 |     if not os.path.exists(docset_path):
137 |         os.makedirs(docset_path)
138 |         print( "Create the Docset Folder!")
139 |     else:
140 |         print("Docset Folder already exist!")
141 | 
142 |     # Copy the HTML Documentation to the Docset Folder
143 |     for subdir in os.listdir(source_dir):
144 |         try:
145 |             shutil.copytree(os.path.join(source_dir, subdir), os.path.join(docset_path, subdir))
146 |         except NotADirectoryError as not_dir:
147 |             try:
148 |                 shutil.copy(os.path.join(source_dir, subdir), os.path.join(docset_path, subdir))
149 |             except:
150 |                 clear_trash()
151 |                 raise "**Error**:  Copy Html Documents Failed..." from not_dir
152 | 
153 |     # create and connect to SQLite
154 |     try:
155 |         db = sqlite3.connect(sqlite_path)
156 |         cur = db.cursor()
157 |     except:
158 |         print("**Error**:  Create SQLite Index Failed...")
159 |         clear_trash()
160 |         exit(2)
161 | 
162 |     try:
163 |         cur.execute('DROP TABLE searchIndex;')
164 |     except:
165 |         pass
166 | 
167 |     cur.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY,\
168 |                 name TEXT,\
169 |                 type TEXT,\
170 |                 path TEXT);')
171 |     cur.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
172 |     print("Create the SQLite Index")
173 | 
174 |     add_urls()
175 |     db.commit()
176 |     db.close()
177 | 
178 |     # Create the Info.plist File
179 |     if not results.index_page:
180 |         index_page = "index.html"
181 |     else:
182 |         index_page = results.index_page
183 | 
184 |     add_infoplist(info_path, index_page)
185 | 
186 |     # Add icon file if defined
187 |     icon_filename = results.filename
188 |     if icon_filename:
189 |         if icon_filename[-4:] == ".png" and os.path.isfile(icon_filename):
190 |             try:
191 |                 subprocess.call(["cp", icon_filename, icon_path])
192 |                 print("Create the Icon for the Docset!")
193 |             except:
194 |                 print("**Error**:  Copy Icon file failed...")
195 |                 clear_trash()
196 |                 exit(2)
197 |         else:
198 |             print( "**Error**:  Icon file should be a valid PNG image...")
199 |             clear_trash()
200 |             exit(2)
201 |     else:
202 |         pass
203 | 
204 |     print("Generate Docset Successfully!")
205 | 


--------------------------------------------------------------------------------