├── README.md
└── html2dash.py
/README.md:
--------------------------------------------------------------------------------
1 | # html2Dash
2 |
3 | html2Dash is an Documentation Set generator intended to be used with the [Dash.app](http://kapeli.com/dash/) API browser for OS X or one of its many clones. html2Dash is just like [doc2dash](https://github.com/hynek/doc2dash) but generating docset from any HTML documentations.
4 |
5 | If you’ve never heard of Dash.app, you’re missing out: together with html2Dash it’s all your API documentation at your fingertips!
6 |
7 | Third part library required:
8 |
9 | beautifulsoup4==4.3.2
10 |
11 | It’s tested on Python 2.7, OS X 10.9.
12 |
13 | # How to Use
14 |
15 | The usage is as simple as:
16 |
17 | $ html2Dash
18 |
19 | html2dash will create a new directory called `.docset` in `~/Library/Application Support/html2dash/DocSets` containing a Dash.app-compatible docset. When finished, the docset is automatically added to Dash.app.
20 |
21 | **Options and Arguments**
22 |
23 | The full usage is:
24 |
25 | $ doc2dash [OPTIONS] SOURCE
26 |
27 | The `SOURCE` is a directory containing the HTML documents you would like to convert.
28 |
29 | Valid `OPTIONS` are the following:
30 |
31 | * -n, --name
32 |
33 | Name the docset explicitly instead of letting doc2dash guess the correct name from the directory name of the source.
34 |
35 | * -d PATH, --destination PATH
36 |
37 | Put the resulting docset into PATH. Default is the directory `~/Library/Application Support/html2dash/DocSets`
38 |
39 | * -i FILENAME, --icon FILENAME
40 |
41 | Add PNG icon FILENAME to docset that is used within Dash.app to represent the docset.
42 |
43 | * -p INDEX_PAGE, --index-page INDEX_PAGE
44 |
45 | Set the file that is shown when the docset is clicked within Dash.app.
46 |
47 | * -h, --help
48 |
49 | Show a brief usage summary and exit.
50 |
51 | DEPENDENCIES:
52 |
53 | * BeautifulSoup HTML parsing library
54 |
55 | # Demo
56 |
57 | Generate the Docset for requests: `requests.docset`. Command:
58 |
59 | $ ./html2dash.py -n requests -i ~/Documents/requests-sidebar.png ~/Documents/requests
60 | Create the Docset Folder!
61 | Copy the HTML Documentation!
62 | Create the SQLite Index
63 | Create the Info.plist File
64 | Create the Icon for the Docset!
65 | Generate Docset Successfully!
66 |
67 |
68 |
--------------------------------------------------------------------------------
/html2dash.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import argparse
5 | import os
6 | import re
7 | import sqlite3
8 | import subprocess
9 | import shutil
10 |
11 | from bs4 import BeautifulSoup
12 |
13 |
14 | def update_db(name, path):
15 | try:
16 | cur.execute("SELECT rowid FROM searchIndex WHERE path = ?", (path,))
17 | dbpath = cur.fetchone()
18 | cur.execute("SELECT rowid FROM searchIndex WHERE name = ?", (name,))
19 | dbname = cur.fetchone()
20 |
21 | if dbpath is None and dbname is None:
22 | cur.execute('INSERT OR IGNORE INTO searchIndex(name, type, path)\
23 | VALUES (?,?,?)', (name, "Section", path))
24 | else:
25 | pass
26 | except:
27 | pass
28 |
29 |
30 | def add_urls():
31 | index_page = open(os.path.join(docset_path, 'index.html'), encoding="utf-8").read()
32 | soup = BeautifulSoup(index_page, "html.parser")
33 | any = re.compile('.*')
34 | for tag in soup.find_all('a', {'href': any}):
35 | name = tag.text.strip()
36 | if len(name) > 0:
37 | path = tag.attrs['href'].strip()
38 | if path.split('#')[0] not in ('index.html'):
39 | update_db(name, path)
40 |
41 |
42 | def add_infoplist(info_path, index_page):
43 | name = docset_name.split('.')[0]
44 | info = """
45 |
46 |
47 |
48 |
49 | CFBundleIdentifier
50 | {0}
51 | CFBundleName
52 | {1}
53 | DashDocSetFamily
54 | {2}
55 | DocSetPlatformFamily
56 | requests
57 | isDashDocset
58 |
59 | isJavaScriptEnabled
60 |
61 | dashIndexFilePath
62 | {3}
63 |
64 |
65 | """.format(name, name, name, index_page)
66 |
67 | try:
68 | open(info_path, 'w', encoding="utf-8").write(info)
69 | print("Create the Info.plist File")
70 | except Exception as e:
71 | print("**Error**: Create the Info.plist File Failed...")
72 | print(e)
73 | clear_trash()
74 | exit(2)
75 |
76 |
77 | def clear_trash():
78 | try:
79 | shutil.rmtree(docset_name)
80 | # subprocess.call(["rm", "-r", docset_name])
81 | print("Clear generated useless files!")
82 | except:
83 | print("**Error**: Clear trash failed...")
84 |
85 |
86 | if __name__ == "__main__":
87 |
88 | parser = argparse.ArgumentParser()
89 |
90 | parser.add_argument('-n', '--name',
91 | help='Name the docset explicitly')
92 | parser.add_argument('-d', '--destination',
93 | dest='path',
94 | default='',
95 | help='Put the resulting docset into PATH')
96 | parser.add_argument('-i', '--icon',
97 | dest='filename',
98 | help='Add PNG icon FILENAME to docset')
99 | parser.add_argument('-p', '--index-page',
100 | help='Set the file that is shown')
101 | parser.add_argument('SOURCE',
102 | help='Directory containing the HTML documents')
103 |
104 | results = parser.parse_args()
105 |
106 | source_dir = results.SOURCE
107 | if source_dir[-1] == "/":
108 | source_dir = results.SOURCE[:-1]
109 |
110 | if not os.path.exists(source_dir):
111 | print(source_dir + " does not exsit!")
112 | exit(2)
113 |
114 | dir_name = os.path.basename(source_dir)
115 | if not results.name:
116 | docset_name = dir_name + ".docset"
117 | else:
118 | docset_name = results.name + ".docset"
119 |
120 | # create docset directory and copy files
121 | doc_path = docset_name + "/Contents/Resources/Documents"
122 | dsidx_path = docset_name + "/Contents/Resources/docSet.dsidx"
123 | icon = docset_name + "/icon.png"
124 | info = docset_name + "/Contents/info.plist"
125 |
126 | destpath = results.path
127 | if results.path and results.path[-1] != "/":
128 | destpath += "/"
129 | docset_path = destpath + doc_path
130 | sqlite_path = destpath + dsidx_path
131 | info_path = destpath + info
132 | icon_path = destpath + icon
133 |
134 | # printdocset_path, sqlite_path
135 |
136 | if not os.path.exists(docset_path):
137 | os.makedirs(docset_path)
138 | print( "Create the Docset Folder!")
139 | else:
140 | print("Docset Folder already exist!")
141 |
142 | # Copy the HTML Documentation to the Docset Folder
143 | for subdir in os.listdir(source_dir):
144 | try:
145 | shutil.copytree(os.path.join(source_dir, subdir), os.path.join(docset_path, subdir))
146 | except NotADirectoryError as not_dir:
147 | try:
148 | shutil.copy(os.path.join(source_dir, subdir), os.path.join(docset_path, subdir))
149 | except:
150 | clear_trash()
151 | raise "**Error**: Copy Html Documents Failed..." from not_dir
152 |
153 | # create and connect to SQLite
154 | try:
155 | db = sqlite3.connect(sqlite_path)
156 | cur = db.cursor()
157 | except:
158 | print("**Error**: Create SQLite Index Failed...")
159 | clear_trash()
160 | exit(2)
161 |
162 | try:
163 | cur.execute('DROP TABLE searchIndex;')
164 | except:
165 | pass
166 |
167 | cur.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY,\
168 | name TEXT,\
169 | type TEXT,\
170 | path TEXT);')
171 | cur.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
172 | print("Create the SQLite Index")
173 |
174 | add_urls()
175 | db.commit()
176 | db.close()
177 |
178 | # Create the Info.plist File
179 | if not results.index_page:
180 | index_page = "index.html"
181 | else:
182 | index_page = results.index_page
183 |
184 | add_infoplist(info_path, index_page)
185 |
186 | # Add icon file if defined
187 | icon_filename = results.filename
188 | if icon_filename:
189 | if icon_filename[-4:] == ".png" and os.path.isfile(icon_filename):
190 | try:
191 | subprocess.call(["cp", icon_filename, icon_path])
192 | print("Create the Icon for the Docset!")
193 | except:
194 | print("**Error**: Copy Icon file failed...")
195 | clear_trash()
196 | exit(2)
197 | else:
198 | print( "**Error**: Icon file should be a valid PNG image...")
199 | clear_trash()
200 | exit(2)
201 | else:
202 | pass
203 |
204 | print("Generate Docset Successfully!")
205 |
--------------------------------------------------------------------------------