├── __init__.py └── icons └── paste.png /__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # PasteHTML 4 | # Let you paste richtext to anki, with formatting preserved. 5 | # 6 | # v3. rewrite for Anki 2.1 (Michael Goerz) 7 | # v2. bugfix, embedded images are now supported 8 | # v1. Initial release 9 | # 10 | 11 | import sys 12 | from html.parser import HTMLParser 13 | import re 14 | import cgi 15 | import urllib.request, urllib.error, urllib.parse 16 | import os 17 | 18 | from aqt.editor import Editor, EditorWebView 19 | from aqt.qt import ( 20 | Qt, 21 | QClipboard, 22 | QWebEngineView, 23 | QDialog, 24 | QImage, 25 | QLabel, 26 | QVBoxLayout, 27 | QMimeData, 28 | ) 29 | from anki.utils import namedtmp 30 | from aqt.utils import tooltip 31 | from anki.lang import _ 32 | from anki.hooks import wrap, addHook 33 | 34 | ADDON_PATH = os.path.dirname(__file__) 35 | ICONS_PATH = os.path.join(ADDON_PATH, "icons") 36 | 37 | # Tags that don't have ending tags. 38 | _voidElements = { 39 | 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 40 | 'command', 'embed', 'frame', 'hr', 'image', 'img', 'input', 'isindex', 41 | 'keygen', 'link', 'menuitem', 'meta', 'nextid', 'param', 'source', 42 | 'track', 'wbr' 43 | } 44 | 45 | 46 | # Tags that should be allowed 47 | _allowedTags = { 48 | # Paragraph-related elements 49 | 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'blockquote', 'pre', 50 | 51 | # Useful inline elements 52 | 'img', 'a', 'span', 'br', 'code', 53 | 'b', 'em', 'i', 'u', 'strong', 54 | 55 | # Lists 56 | 'ul', 'ol', 'li', 57 | 58 | # Useful layout elements 59 | 'div', 'table', 'tr', 'td', 'thead', 'th', 'tbody', 60 | } 61 | 62 | 63 | # Tags that should be ignored: They are valid, but shouldn't be present in 64 | # the output html 65 | _ignoredTags = { 66 | 'html', 'body', 67 | } 68 | 69 | 70 | # Allowed attributes 71 | _allowedAttributes = { 72 | 'style', 73 | 74 | 'src', 'alt', # img 75 | 'href', 'title', # a href 76 | 77 | 'colspan', 'rowspan', # table 78 | } 79 | 80 | 81 | # Allowed CSS styles 82 | _allowedStyles = { 83 | # General text attributes 84 | 'font-weight', 'color', 'background-color', 'font-style', 85 | 86 | # Block attributes 87 | 'text-align', 'valign', 88 | 89 | # Table attributes 90 | 'background', 'background-color', 91 | } 92 | 93 | 94 | # CleanHTML overrides some default styles 95 | _overrideStyles = { 96 | 'table': { 97 | 'box-sizing': 'border-box', 98 | 'width': '100%', 99 | 'margin': '.5em', 100 | 'border-collapse': 'collapse', 101 | 'outline': '1px solid black', 102 | }, 103 | 104 | 'th': { 105 | 'position': 'relative', 106 | 'border': '1px solid black', 107 | 'padding': '.4em', 108 | 'font-size': '1.2em', 109 | }, 110 | 111 | 'td': { 112 | 'position': 'relative', 113 | 'border': '1px solid black', 114 | 'padding': '.2em', 115 | }, 116 | 117 | 'div': { 118 | 'padding': '.2em' 119 | }, 120 | } 121 | 122 | 123 | ############################################## 124 | # Main implementation 125 | ############################################## 126 | 127 | 128 | class NonFormatTagCleaner(HTMLParser): 129 | def __init__(self, editorWebView): 130 | HTMLParser.__init__(self) 131 | self.nonAllowedTagCountInStack = 0 132 | self.output = [] 133 | self.tagStack = [] 134 | self.parseError = False 135 | self.editorWebView = editorWebView 136 | self.editor = editorWebView.editor 137 | 138 | def writeData(self, data): 139 | if self.nonAllowedTagCountInStack == 0: 140 | self.output.append(data) 141 | 142 | def handle_starttag(self, tag, attrs): 143 | if tag not in _voidElements: 144 | self.tagStack.append(tag) 145 | if tag not in _allowedTags: 146 | self.nonAllowedTagCountInStack += 1 147 | 148 | if tag in _ignoredTags: 149 | return 150 | 151 | # Parse attributes 152 | attrDict = {'style': ''} 153 | for k, v in attrs: 154 | if k in _allowedAttributes: 155 | attrDict[k] = v 156 | 157 | # Parse styles 158 | styleDict = {} 159 | for k, v in _styleRegex.findall(attrDict['style']): 160 | if k in _allowedStyles: 161 | styleDict[k] = v 162 | 163 | # Override styles 164 | if tag in _overrideStyles: 165 | for k, v in list(_overrideStyles[tag].items()): 166 | styleDict[k] = v 167 | 168 | if styleDict: 169 | attrDict['style'] = ''.join( 170 | "%s:%s;" % (k, v) for k, v in list(styleDict.items()) 171 | ) 172 | else: 173 | del attrDict['style'] 174 | 175 | # Special cure for images: Download web images 176 | if tag == 'img' and 'src' in attrDict: 177 | imageUrl = attrDict['src'] 178 | 179 | imageData = downloadMedia(imageUrl, self.editor) 180 | if imageData: 181 | fname = SaveImageToMedia(imageData, self.editor) 182 | attrDict['src'] = fname 183 | else: 184 | tooltip("Failed to download %s" % imageUrl) 185 | 186 | if attrDict: 187 | attrStr = ' ' + ' '.join( 188 | '%s="%s"' % (k, cgi.escape(v)) for k, v in list(attrDict.items()) 189 | ) 190 | else: 191 | attrStr = '' 192 | 193 | # Write to stack 194 | self.writeData("<%s%s>" % (tag, attrStr)) 195 | 196 | def handle_endtag(self, tag): 197 | # Do nothing for void elements 198 | if tag in _voidElements: 199 | return 200 | 201 | while self.tagStack and self.tagStack[-1] != tag: 202 | self.tagStack.pop() 203 | self.parseError = True 204 | 205 | if self.tagStack: 206 | self.tagStack.pop() 207 | 208 | if tag in _allowedTags: 209 | if tag not in _ignoredTags: 210 | self.writeData("" % tag) 211 | else: 212 | self.nonAllowedTagCountInStack -= 1 213 | 214 | def handle_data(self, data): 215 | self.writeData(data) 216 | 217 | def flush(self): 218 | return ''.join(self.output) 219 | 220 | 221 | _styleRegex = re.compile('(.+?) *: *(.+?);') 222 | _allowedTags |= _ignoredTags 223 | 224 | 225 | def SaveImageToMedia(imageData, editor): 226 | im = QImage.fromData(imageData) 227 | uname = namedtmp("pasteHTML-%d" % im.cacheKey()) 228 | 229 | if editor.mw.pm.profile.get("pastePNG", False): 230 | ext = ".png" 231 | im.save(uname + ext, None, 50) 232 | else: 233 | ext = ".jpg" 234 | im.save(uname + ext, None, 80) 235 | 236 | # invalid image? 237 | if not os.path.exists(uname + ext): 238 | return "" 239 | 240 | fname = editor.mw.col.media.addFile(uname + ext) 241 | return fname 242 | 243 | 244 | def cleanTag(data, editorWebView): 245 | parser = NonFormatTagCleaner(editorWebView) 246 | parser.feed(data) 247 | data = parser.flush() 248 | data = re.sub('^\s*\n', '', data, flags=re.M) 249 | return data 250 | 251 | 252 | def downloadMedia(url, editor): 253 | # Local file : just read the file content 254 | if url.startswith("file://"): 255 | try: 256 | url = url[7:] 257 | # On windows, paths tend to be prefixed by file:/// 258 | # rather than file://, so we remove redundant slash. 259 | if re.match(r'^/[A-Za-z]:\\', url): 260 | url = url[1:] 261 | return open(url, 'rb').read() 262 | except OSError: 263 | pass 264 | 265 | app = editor.mw.app 266 | 267 | # Show download dialog 268 | d = QDialog(editor.parentWindow) 269 | d.setWindowTitle("Downloading media (0.0%)") 270 | d.setWindowModality(Qt.WindowModal) 271 | vbox = QVBoxLayout() 272 | label = QLabel(url) 273 | label.setWordWrap(True) 274 | vbox.addWidget(label) 275 | d.setLayout(vbox) 276 | d.show() 277 | 278 | # Download chunk by chunk for progress bar 279 | try: 280 | response = urllib.request.urlopen(url) 281 | totSize = int(response.info().getheader('Content-Length').strip()) 282 | currentRead = 0 283 | chunk_size = 16384 284 | chunks = [] 285 | 286 | while True: 287 | chunk = response.read(chunk_size) 288 | currentRead += len(chunk) 289 | 290 | if not chunk: 291 | break 292 | 293 | d.setWindowTitle( 294 | "Downloading media (%.1f%%)" % 295 | (currentRead * 100.0 / totSize) 296 | ) 297 | app.processEvents() 298 | chunks.append(chunk) 299 | 300 | return ''.join(chunks) 301 | 302 | except urllib.error.URLError: 303 | return None 304 | 305 | finally: 306 | d.close() 307 | del d 308 | 309 | 310 | # Hook functions for EditorWebView 311 | 312 | 313 | def buttonSetup(buttons, editor): 314 | icon = os.path.join(ICONS_PATH, 'paste.png') 315 | if sys.platform == 'darwin': # macos 316 | shortcut = "Cmd+Alt+V" 317 | else: 318 | shortcut = "Ctrl+Alt+V" 319 | b = editor.addButton( 320 | icon=icon, cmd='pasteHTML', 321 | func=lambda editor: onHtmlCopy(editor.web), 322 | tip='Paste formatted HTML (%s)' % shortcut, 323 | keys='Ctrl+Alt+V') 324 | buttons.append(b) 325 | return buttons 326 | 327 | 328 | def onHtmlCopy(editor_web_view): 329 | mode = QClipboard.Clipboard 330 | 331 | clip = editor_web_view.editor.mw.app.clipboard() 332 | mime = clip.mimeData(mode=mode) 333 | 334 | if mime.hasHtml(): 335 | newMime = QMimeData() 336 | try: 337 | newHtml = cleanTag(mime.html(), editor_web_view) 338 | except: 339 | newHtml = mime.html() 340 | newMime.setHtml(newHtml) 341 | clip.setMimeData(newMime, mode=mode) 342 | editor_web_view.editor.doPaste(newHtml, internal=True) 343 | 344 | 345 | addHook("setupEditorButtons", buttonSetup) 346 | -------------------------------------------------------------------------------- /icons/paste.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goerz/anki_addons21_paste_html/f859a04ecab05168262a9b6425ed7325a043d82a/icons/paste.png --------------------------------------------------------------------------------