├── README.md └── html2dash.py /README.md: -------------------------------------------------------------------------------- 1 | # html2Dash 2 | 3 | html2Dash is an Documentation Set generator intended to be used with the [Dash.app](http://kapeli.com/dash/) API browser for OS X or one of its many clones. html2Dash is just like [doc2dash](https://github.com/hynek/doc2dash) but generating docset from any HTML documentations. 4 | 5 | If you’ve never heard of Dash.app, you’re missing out: together with html2Dash it’s all your API documentation at your fingertips! 6 | 7 | Third part library required: 8 | 9 | beautifulsoup4==4.3.2 10 | 11 | It’s tested on Python 2.7, OS X 10.9. 12 | 13 | # How to Use 14 | 15 | The usage is as simple as: 16 | 17 | $ html2Dash 18 | 19 | html2dash will create a new directory called `.docset` in `~/Library/Application Support/html2dash/DocSets` containing a Dash.app-compatible docset. When finished, the docset is automatically added to Dash.app. 20 | 21 | **Options and Arguments** 22 | 23 | The full usage is: 24 | 25 | $ doc2dash [OPTIONS] SOURCE 26 | 27 | The `SOURCE` is a directory containing the HTML documents you would like to convert. 28 | 29 | Valid `OPTIONS` are the following: 30 | 31 | * -n, --name 32 | 33 | Name the docset explicitly instead of letting doc2dash guess the correct name from the directory name of the source. 34 | 35 | * -d PATH, --destination PATH 36 | 37 | Put the resulting docset into PATH. Default is the directory `~/Library/Application Support/html2dash/DocSets` 38 | 39 | * -i FILENAME, --icon FILENAME 40 | 41 | Add PNG icon FILENAME to docset that is used within Dash.app to represent the docset. 42 | 43 | * -p INDEX_PAGE, --index-page INDEX_PAGE 44 | 45 | Set the file that is shown when the docset is clicked within Dash.app. 46 | 47 | * -h, --help 48 | 49 | Show a brief usage summary and exit. 50 | 51 | DEPENDENCIES: 52 | 53 | * BeautifulSoup HTML parsing library 54 | 55 | # Demo 56 | 57 | Generate the Docset for requests: `requests.docset`. Command: 58 | 59 | $ ./html2dash.py -n requests -i ~/Documents/requests-sidebar.png ~/Documents/requests 60 | Create the Docset Folder! 61 | Copy the HTML Documentation! 62 | Create the SQLite Index 63 | Create the Info.plist File 64 | Create the Icon for the Docset! 65 | Generate Docset Successfully! 66 | 67 | 68 | -------------------------------------------------------------------------------- /html2dash.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | import re 7 | import sqlite3 8 | import subprocess 9 | import shutil 10 | 11 | from bs4 import BeautifulSoup 12 | 13 | 14 | def update_db(name, path): 15 | try: 16 | cur.execute("SELECT rowid FROM searchIndex WHERE path = ?", (path,)) 17 | dbpath = cur.fetchone() 18 | cur.execute("SELECT rowid FROM searchIndex WHERE name = ?", (name,)) 19 | dbname = cur.fetchone() 20 | 21 | if dbpath is None and dbname is None: 22 | cur.execute('INSERT OR IGNORE INTO searchIndex(name, type, path)\ 23 | VALUES (?,?,?)', (name, "Section", path)) 24 | else: 25 | pass 26 | except: 27 | pass 28 | 29 | 30 | def add_urls(): 31 | index_page = open(os.path.join(docset_path, 'index.html'), encoding="utf-8").read() 32 | soup = BeautifulSoup(index_page, "html.parser") 33 | any = re.compile('.*') 34 | for tag in soup.find_all('a', {'href': any}): 35 | name = tag.text.strip() 36 | if len(name) > 0: 37 | path = tag.attrs['href'].strip() 38 | if path.split('#')[0] not in ('index.html'): 39 | update_db(name, path) 40 | 41 | 42 | def add_infoplist(info_path, index_page): 43 | name = docset_name.split('.')[0] 44 | info = """ 45 | 46 | 47 | 48 | 49 | CFBundleIdentifier 50 | {0} 51 | CFBundleName 52 | {1} 53 | DashDocSetFamily 54 | {2} 55 | DocSetPlatformFamily 56 | requests 57 | isDashDocset 58 | 59 | isJavaScriptEnabled 60 | 61 | dashIndexFilePath 62 | {3} 63 | 64 | 65 | """.format(name, name, name, index_page) 66 | 67 | try: 68 | open(info_path, 'w', encoding="utf-8").write(info) 69 | print("Create the Info.plist File") 70 | except Exception as e: 71 | print("**Error**: Create the Info.plist File Failed...") 72 | print(e) 73 | clear_trash() 74 | exit(2) 75 | 76 | 77 | def clear_trash(): 78 | try: 79 | shutil.rmtree(docset_name) 80 | # subprocess.call(["rm", "-r", docset_name]) 81 | print("Clear generated useless files!") 82 | except: 83 | print("**Error**: Clear trash failed...") 84 | 85 | 86 | if __name__ == "__main__": 87 | 88 | parser = argparse.ArgumentParser() 89 | 90 | parser.add_argument('-n', '--name', 91 | help='Name the docset explicitly') 92 | parser.add_argument('-d', '--destination', 93 | dest='path', 94 | default='', 95 | help='Put the resulting docset into PATH') 96 | parser.add_argument('-i', '--icon', 97 | dest='filename', 98 | help='Add PNG icon FILENAME to docset') 99 | parser.add_argument('-p', '--index-page', 100 | help='Set the file that is shown') 101 | parser.add_argument('SOURCE', 102 | help='Directory containing the HTML documents') 103 | 104 | results = parser.parse_args() 105 | 106 | source_dir = results.SOURCE 107 | if source_dir[-1] == "/": 108 | source_dir = results.SOURCE[:-1] 109 | 110 | if not os.path.exists(source_dir): 111 | print(source_dir + " does not exsit!") 112 | exit(2) 113 | 114 | dir_name = os.path.basename(source_dir) 115 | if not results.name: 116 | docset_name = dir_name + ".docset" 117 | else: 118 | docset_name = results.name + ".docset" 119 | 120 | # create docset directory and copy files 121 | doc_path = docset_name + "/Contents/Resources/Documents" 122 | dsidx_path = docset_name + "/Contents/Resources/docSet.dsidx" 123 | icon = docset_name + "/icon.png" 124 | info = docset_name + "/Contents/info.plist" 125 | 126 | destpath = results.path 127 | if results.path and results.path[-1] != "/": 128 | destpath += "/" 129 | docset_path = destpath + doc_path 130 | sqlite_path = destpath + dsidx_path 131 | info_path = destpath + info 132 | icon_path = destpath + icon 133 | 134 | # printdocset_path, sqlite_path 135 | 136 | if not os.path.exists(docset_path): 137 | os.makedirs(docset_path) 138 | print( "Create the Docset Folder!") 139 | else: 140 | print("Docset Folder already exist!") 141 | 142 | # Copy the HTML Documentation to the Docset Folder 143 | for subdir in os.listdir(source_dir): 144 | try: 145 | shutil.copytree(os.path.join(source_dir, subdir), os.path.join(docset_path, subdir)) 146 | except NotADirectoryError as not_dir: 147 | try: 148 | shutil.copy(os.path.join(source_dir, subdir), os.path.join(docset_path, subdir)) 149 | except: 150 | clear_trash() 151 | raise "**Error**: Copy Html Documents Failed..." from not_dir 152 | 153 | # create and connect to SQLite 154 | try: 155 | db = sqlite3.connect(sqlite_path) 156 | cur = db.cursor() 157 | except: 158 | print("**Error**: Create SQLite Index Failed...") 159 | clear_trash() 160 | exit(2) 161 | 162 | try: 163 | cur.execute('DROP TABLE searchIndex;') 164 | except: 165 | pass 166 | 167 | cur.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY,\ 168 | name TEXT,\ 169 | type TEXT,\ 170 | path TEXT);') 171 | cur.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);') 172 | print("Create the SQLite Index") 173 | 174 | add_urls() 175 | db.commit() 176 | db.close() 177 | 178 | # Create the Info.plist File 179 | if not results.index_page: 180 | index_page = "index.html" 181 | else: 182 | index_page = results.index_page 183 | 184 | add_infoplist(info_path, index_page) 185 | 186 | # Add icon file if defined 187 | icon_filename = results.filename 188 | if icon_filename: 189 | if icon_filename[-4:] == ".png" and os.path.isfile(icon_filename): 190 | try: 191 | subprocess.call(["cp", icon_filename, icon_path]) 192 | print("Create the Icon for the Docset!") 193 | except: 194 | print("**Error**: Copy Icon file failed...") 195 | clear_trash() 196 | exit(2) 197 | else: 198 | print( "**Error**: Icon file should be a valid PNG image...") 199 | clear_trash() 200 | exit(2) 201 | else: 202 | pass 203 | 204 | print("Generate Docset Successfully!") 205 | --------------------------------------------------------------------------------