└── autoload └── wwwrenderer.vim /autoload/wwwrenderer.vim: -------------------------------------------------------------------------------- 1 | "File: wwwrenderer.vim 2 | "Last Change: 27-Jul-2012. 3 | "Version: 0.01 4 | " 5 | " *wwwrenderer.vim* www renderer for vim 6 | " 7 | " Author: Yasuhiro Matsumoto 8 | " WebSite: http://mattn.kaoriya.net/ 9 | " Repository: http://github.com/mattn/wwwrenderer-vim 10 | " License: BSD style license 11 | " =============================================================================== 12 | " CONTENTS *wwwrenderer-contents* 13 | " Introduction |wwwrenderer-intro| 14 | " Install |wwwrenderer-install| 15 | " Install |wwwrenderer-usage| 16 | " For Reader |wwwrenderer-todo| 17 | " 18 | " INTRODUCTION *wwwrenderer-intro* 19 | " 20 | " This is vimscript like world wide web browser. 21 | " 22 | " INSTALL *wwwrenderer-install* 23 | " 24 | " copy wwwrenderer.vim to rtp/autoload directory. 25 | " this script require curl command and webapi-vim. 26 | " 27 | " see: http://github.com/mattn/webapi-vim 28 | " 29 | " USAGE *wwwrenderer-writer* 30 | " 31 | " This is utility function. Then, you should call as below. 32 | " 33 | " > 34 | " echo wwwrenderer#render('http://eow.alc.co.jp/fusion/UTF-8/', 35 | " ['div', {'class': 'sas'}], ['div', {'id': 'resultsList'}]) 36 | " < 37 | " 38 | " First parameter is URL to get. 39 | " Second and following parameters is scraping option. 40 | " 41 | " > 42 | " ['div', {'class': 'sas'}] 43 | " < 44 | " 45 | " A first of array is tag name. and second one is directory object that 46 | " specify attributes. Above line is meaning div[@class=sas] in saying XPath. 47 | " 48 | " ============================================================================== 49 | " TODO *wwwrenderer-todo* 50 | " * form/input/text ? 51 | " ============================================================================== 52 | " vim:tw=78:ts=8:ft=help:norl:noet:fen:fdl=0: 53 | " ExportDoc: wwwrenderer.txt:5:-1 54 | " 55 | function! s:render(dom, pre, extra) 56 | let dom = a:dom 57 | if type(dom) == 0 || type(dom) == 1 || type(dom) == 5 58 | let html = webapi#html#decodeEntityReference(dom) 59 | let html = substitute(html, '\r', '', 'g') 60 | if a:pre == 0 61 | let html = substitute(html, '\n\+\s*', '', 'g') 62 | endif 63 | let html = substitute(html, '\t', ' ', 'g') 64 | return html 65 | elseif type(dom) == 3 66 | let html = "" 67 | for d in dom 68 | let html .= s:render(d, a:pre, a:extra) 69 | unlet d 70 | endfor 71 | return html 72 | elseif type(dom) == 4 73 | if empty(dom) 74 | return "" 75 | endif 76 | if dom.name != "script" && dom.name != "style" && dom.name != "head" 77 | let html = s:render(dom.child, a:pre || dom.name == "pre", a:extra) 78 | if dom.name =~ "^h[1-6]$" || dom.name == "br" || dom.name == "dt" || dom.name == "dl" || dom.name == "li" || dom.name == "p" || dom.name == "div" 79 | let html = "\n".html."\n" 80 | endif 81 | if dom.name == "pre" || dom.name == "blockquote" 82 | let html = "\n ".substitute(html, '\n', '\n ', 'g')."\n" 83 | endif 84 | if type(a:extra) == 3 && dom.name == "a" 85 | let lines = split(html, "\n", 1) 86 | let y = len(lines) 87 | let x = len(lines[-1]) 88 | call add(a:extra, {"x": x, "y": y, "node": dom}) 89 | endif 90 | return html 91 | endif 92 | return "" 93 | endif 94 | endfunction 95 | 96 | function! wwwrenderer#render_dom(dom) 97 | return s:render(a:dom, 0, 0) 98 | endfunction 99 | 100 | function! wwwrenderer#render(url, ...) 101 | let scrape = a:000 102 | let res = webapi#http#get(a:url) 103 | let enc = "utf-8" 104 | let mx = '.*charset="\?\([^;]\+\)' 105 | for h in res.header 106 | if h =~ "^Content-Type" 107 | let tmp = matchlist(h, mx) 108 | if len(tmp) 109 | let enc = tolower(tmp[1]) 110 | endif 111 | endif 112 | endfor 113 | if res.content !~ '^\s*= &columns 166 | let indent = matchstr(lines[idx], '^\s*') 167 | let [line, ins] = ['', ''] 168 | for c in split(lines[idx], '\zs') 169 | if strdisplaywidth(line . c) >= &columns 170 | let ins .= line . "\n" 171 | let line = indent 172 | endif 173 | let line .= c 174 | endfor 175 | let ins .= line . "\n" 176 | if idx == 0 177 | let lines = split(ins, "\n") + lines[idx+1:] 178 | else 179 | let lines = lines[0: idx-1] + split(ins, "\n") + lines[idx+1:] 180 | endif 181 | endif 182 | let idx += 1 183 | endwhile 184 | return join(lines, "\n") 185 | endfunction 186 | --------------------------------------------------------------------------------